debuggers.hg

annotate xen/arch/x86/mm.c @ 20938:d311d1efc25e

x86: make max_mfn returned from XENMEM_machphys_mapping dynamic

This helps debugging in the guest kernels, as then MFNs there can then
be range checked based on the reported value.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Feb 04 08:53:49 2010 +0000 (2010-02-04)
parents 91358472d8c4
children a3fa6d444b25
rev   line source
kaf24@3757 1 /******************************************************************************
kaf24@3757 2 * arch/x86/mm.c
kaf24@3757 3 *
kaf24@3757 4 * Copyright (c) 2002-2005 K A Fraser
kaf24@3757 5 * Copyright (c) 2004 Christian Limpach
kaf24@3757 6 *
kaf24@3757 7 * This program is free software; you can redistribute it and/or modify
kaf24@3757 8 * it under the terms of the GNU General Public License as published by
kaf24@3757 9 * the Free Software Foundation; either version 2 of the License, or
kaf24@3757 10 * (at your option) any later version.
kaf24@3757 11 *
kaf24@3757 12 * This program is distributed in the hope that it will be useful,
kaf24@3757 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
kaf24@3757 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
kaf24@3757 15 * GNU General Public License for more details.
kaf24@3757 16 *
kaf24@3757 17 * You should have received a copy of the GNU General Public License
kaf24@3757 18 * along with this program; if not, write to the Free Software
kaf24@3757 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
kaf24@3757 20 */
kaf24@3757 21
kaf24@3757 22 /*
kaf24@3757 23 * A description of the x86 page table API:
kaf24@3757 24 *
kaf24@3757 25 * Domains trap to do_mmu_update with a list of update requests.
kaf24@3757 26 * This is a list of (ptr, val) pairs, where the requested operation
kaf24@3757 27 * is *ptr = val.
kaf24@3757 28 *
kaf24@3757 29 * Reference counting of pages:
kaf24@3757 30 * ----------------------------
kaf24@3757 31 * Each page has two refcounts: tot_count and type_count.
kaf24@3757 32 *
kaf24@3757 33 * TOT_COUNT is the obvious reference count. It counts all uses of a
kaf24@3757 34 * physical page frame by a domain, including uses as a page directory,
kaf24@3757 35 * a page table, or simple mappings via a PTE. This count prevents a
kaf24@3757 36 * domain from releasing a frame back to the free pool when it still holds
kaf24@3757 37 * a reference to it.
kaf24@3757 38 *
kaf24@3757 39 * TYPE_COUNT is more subtle. A frame can be put to one of three
kaf24@3757 40 * mutually-exclusive uses: it might be used as a page directory, or a
kaf24@3757 41 * page table, or it may be mapped writable by the domain [of course, a
kaf24@3757 42 * frame may not be used in any of these three ways!].
kaf24@3757 43 * So, type_count is a count of the number of times a frame is being
kaf24@3757 44 * referred to in its current incarnation. Therefore, a page can only
kaf24@3757 45 * change its type when its type count is zero.
kaf24@3757 46 *
kaf24@3757 47 * Pinning the page type:
kaf24@3757 48 * ----------------------
kaf24@3757 49 * The type of a page can be pinned/unpinned with the commands
kaf24@3757 50 * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is,
kaf24@3757 51 * pinning is not reference counted, so it can't be nested).
kaf24@3757 52 * This is useful to prevent a page's type count falling to zero, at which
kaf24@3757 53 * point safety checks would need to be carried out next time the count
kaf24@3757 54 * is increased again.
kaf24@3757 55 *
kaf24@3757 56 * A further note on writable page mappings:
kaf24@3757 57 * -----------------------------------------
kaf24@3757 58 * For simplicity, the count of writable mappings for a page may not
kaf24@3757 59 * correspond to reality. The 'writable count' is incremented for every
kaf24@3757 60 * PTE which maps the page with the _PAGE_RW flag set. However, for
kaf24@3757 61 * write access to be possible the page directory entry must also have
kaf24@3757 62 * its _PAGE_RW bit set. We do not check this as it complicates the
kaf24@3757 63 * reference counting considerably [consider the case of multiple
kaf24@3757 64 * directory entries referencing a single page table, some with the RW
kaf24@3757 65 * bit set, others not -- it starts getting a bit messy].
kaf24@3757 66 * In normal use, this simplification shouldn't be a problem.
kaf24@3757 67 * However, the logic can be added if required.
kaf24@3757 68 *
kaf24@3757 69 * One more note on read-only page mappings:
kaf24@3757 70 * -----------------------------------------
kaf24@3757 71 * We want domains to be able to map pages for read-only access. The
kaf24@3757 72 * main reason is that page tables and directories should be readable
kaf24@3757 73 * by a domain, but it would not be safe for them to be writable.
kaf24@3757 74 * However, domains have free access to rings 1 & 2 of the Intel
kaf24@3757 75 * privilege model. In terms of page protection, these are considered
kaf24@3757 76 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether
kaf24@3757 77 * read-only restrictions are respected in supervisor mode -- if the
kaf24@3757 78 * bit is clear then any mapped page is writable.
kaf24@3757 79 *
kaf24@3757 80 * We get round this by always setting the WP bit and disallowing
kaf24@3757 81 * updates to it. This is very unlikely to cause a problem for guest
kaf24@3757 82 * OS's, which will generally use the WP bit to simplify copy-on-write
kaf24@3757 83 * implementation (in that case, OS wants a fault when it writes to
kaf24@3757 84 * an application-supplied buffer).
kaf24@3757 85 */
kaf24@3757 86
kaf24@3757 87 #include <xen/config.h>
kaf24@3757 88 #include <xen/init.h>
kaf24@3757 89 #include <xen/kernel.h>
kaf24@3757 90 #include <xen/lib.h>
kaf24@3757 91 #include <xen/mm.h>
kaf24@10281 92 #include <xen/domain.h>
kaf24@3757 93 #include <xen/sched.h>
kaf24@3757 94 #include <xen/errno.h>
kaf24@3757 95 #include <xen/perfc.h>
kaf24@3757 96 #include <xen/irq.h>
kaf24@3757 97 #include <xen/softirq.h>
kaf24@5394 98 #include <xen/domain_page.h>
kaf24@6133 99 #include <xen/event.h>
kaf24@8498 100 #include <xen/iocap.h>
kaf24@9054 101 #include <xen/guest_access.h>
Tim@13938 102 #include <asm/paging.h>
kaf24@3757 103 #include <asm/shadow.h>
kaf24@3757 104 #include <asm/page.h>
kaf24@3757 105 #include <asm/flushtlb.h>
kaf24@3757 106 #include <asm/io.h>
kaf24@3757 107 #include <asm/ldt.h>
kaf24@4198 108 #include <asm/x86_emulate.h>
kaf24@11109 109 #include <asm/e820.h>
ack@13298 110 #include <asm/hypercall.h>
kfraser@14478 111 #include <asm/shared.h>
kaf24@8733 112 #include <public/memory.h>
keir@19946 113 #include <public/sched.h>
kfraser@15846 114 #include <xsm/xsm.h>
keir@16142 115 #include <xen/trace.h>
keir@20323 116 #include <asm/setup.h>
keir@20728 117 #include <asm/mem_sharing.h>
kaf24@3757 118
keir@16926 119 /*
keir@16926 120 * Mapping of first 2 or 4 megabytes of memory. This is mapped with 4kB
keir@16926 121 * mappings to avoid type conflicts with fixed-range MTRRs covering the
keir@16926 122 * lowest megabyte of physical memory. In any case the VGA hole should be
keir@16926 123 * mapped with type UC.
keir@16926 124 */
keir@16926 125 l1_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
keir@16926 126 l1_identmap[L1_PAGETABLE_ENTRIES];
keir@16926 127
kaf24@12063 128 #define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING , _f "\n" , ## _a)
kaf24@3757 129
kaf24@4426 130 /*
kfraser@10487 131 * PTE updates can be done with ordinary writes except:
kfraser@10487 132 * 1. Debug builds get extra checking by using CMPXCHG[8B].
kfraser@10487 133 * 2. PAE builds perform an atomic 8-byte store with CMPXCHG8B.
kfraser@10487 134 */
keir@17638 135 #if !defined(NDEBUG) || defined(__i386__)
kfraser@10487 136 #define PTE_UPDATE_WITH_CMPXCHG
kfraser@10487 137 #endif
kfraser@10487 138
keir@20645 139 int mem_hotplug = 0;
keir@20645 140
kaf24@3757 141 /* Private domain structs for DOMID_XEN and DOMID_IO. */
keir@20723 142 struct domain *dom_xen, *dom_io, *dom_cow;
kaf24@3757 143
keir@20201 144 /* Frame table size in pages. */
kaf24@3757 145 unsigned long max_page;
kaf24@7249 146 unsigned long total_pages;
kaf24@3757 147
keir@20275 148 unsigned long __read_mostly pdx_group_valid[BITS_TO_LONGS(
keir@20275 149 (FRAMETABLE_SIZE / sizeof(*frame_table) + PDX_GROUP_COUNT - 1)
keir@20275 150 / PDX_GROUP_COUNT)] = { [0] = 1 };
keir@20275 151
keir@16164 152 #define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
keir@16164 153
keir@18794 154 int opt_allow_hugepage;
keir@18794 155 boolean_param("allowhugepage", opt_allow_hugepage);
keir@18794 156
keir@16164 157 #define l1_disallow_mask(d) \
keir@16179 158 ((d != dom_io) && \
keir@16179 159 (rangeset_is_empty((d)->iomem_caps) && \
keir@18393 160 rangeset_is_empty((d)->arch.ioport_caps) && \
keir@18393 161 !has_arch_pdevs(d)) ? \
keir@16164 162 L1_DISALLOW_MASK : (L1_DISALLOW_MASK & ~PAGE_CACHE_ATTRS))
keir@16164 163
keir@20341 164 #ifdef __x86_64__
ack@13295 165 l2_pgentry_t *compat_idle_pg_table_l2 = NULL;
kfraser@15012 166 #define l3_disallow_mask(d) (!is_pv_32on64_domain(d) ? \
kfraser@15012 167 L3_DISALLOW_MASK : \
ack@13295 168 COMPAT_L3_DISALLOW_MASK)
ack@13295 169 #else
ack@13295 170 #define l3_disallow_mask(d) L3_DISALLOW_MASK
ack@13295 171 #endif
ack@13295 172
keir@20275 173 static void __init init_frametable_chunk(void *start, void *end)
kaf24@3757 174 {
keir@20275 175 unsigned long s = (unsigned long)start;
keir@20275 176 unsigned long e = (unsigned long)end;
keir@20275 177 unsigned long step, mfn;
keir@20275 178
keir@20275 179 ASSERT(!(s & ((1 << L2_PAGETABLE_SHIFT) - 1)));
keir@20275 180 for ( ; s < e; s += step << PAGE_SHIFT )
kaf24@3757 181 {
keir@20275 182 step = 1UL << (cpu_has_page1gb &&
keir@20275 183 !(s & ((1UL << L3_PAGETABLE_SHIFT) - 1)) ?
keir@20275 184 L3_PAGETABLE_SHIFT - PAGE_SHIFT :
keir@20275 185 L2_PAGETABLE_SHIFT - PAGE_SHIFT);
keir@19669 186 /*
keir@19669 187 * The hardcoded 4 below is arbitrary - just pick whatever you think
keir@19669 188 * is reasonable to waste as a trade-off for using a large page.
keir@19669 189 */
keir@20275 190 while ( step && s + (step << PAGE_SHIFT) > e + (4 << PAGE_SHIFT) )
keir@20275 191 step >>= PAGETABLE_ORDER;
keir@20275 192 do {
keir@20275 193 mfn = alloc_boot_pages(step, step);
keir@20275 194 } while ( !mfn && (step >>= PAGETABLE_ORDER) );
keir@20275 195 if ( !mfn )
keir@20275 196 panic("Not enough memory for frame table");
keir@20275 197 map_pages_to_xen(s, mfn, step, PAGE_HYPERVISOR);
kaf24@3757 198 }
kaf24@3757 199
keir@20275 200 memset(start, 0, end - start);
keir@20275 201 memset(end, -1, s - (unsigned long)end);
keir@20275 202 }
keir@20275 203
keir@20275 204 void __init init_frametable(void)
keir@20275 205 {
keir@20275 206 unsigned int sidx, eidx, nidx;
keir@20275 207 unsigned int max_idx = (max_pdx + PDX_GROUP_COUNT - 1) / PDX_GROUP_COUNT;
keir@20275 208
keir@20275 209 #ifdef __x86_64__
keir@20275 210 BUILD_BUG_ON(XEN_VIRT_END > FRAMETABLE_VIRT_END);
keir@20275 211 #endif
keir@20275 212 BUILD_BUG_ON(FRAMETABLE_VIRT_START & ((1UL << L2_PAGETABLE_SHIFT) - 1));
keir@20275 213
keir@20275 214 for ( sidx = 0; ; sidx = nidx )
keir@20275 215 {
keir@20275 216 eidx = find_next_zero_bit(pdx_group_valid, max_idx, sidx);
keir@20275 217 nidx = find_next_bit(pdx_group_valid, max_idx, eidx);
keir@20275 218 if ( nidx >= max_idx )
keir@20275 219 break;
keir@20275 220 init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT),
keir@20275 221 pdx_to_page(eidx * PDX_GROUP_COUNT));
keir@20275 222 }
keir@20663 223 if ( !mem_hotplug )
keir@20663 224 init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT),
keir@20663 225 pdx_to_page(max_pdx - 1) + 1);
keir@20663 226 else
keir@20663 227 {
keir@20937 228 init_frametable_chunk(pdx_to_page(sidx * PDX_GROUP_COUNT),
keir@20937 229 pdx_to_page(max_idx * PDX_GROUP_COUNT - 1) + 1);
keir@20937 230 memset(pdx_to_page(max_pdx), -1,
keir@20937 231 (unsigned long)pdx_to_page(max_idx * PDX_GROUP_COUNT) -
keir@20937 232 (unsigned long)pdx_to_page(max_pdx));
keir@20663 233 }
kaf24@3757 234 }
kaf24@3757 235
keir@15081 236 void __init arch_init_memory(void)
kaf24@3757 237 {
keir@16964 238 unsigned long i, pfn, rstart_pfn, rend_pfn, iostart_pfn, ioend_pfn;
kaf24@4570 239
kaf24@3757 240 /*
kaf24@3757 241 * Initialise our DOMID_XEN domain.
kaf24@3757 242 * Any Xen-heap pages that we will allow to be mapped will have
kaf24@3757 243 * their domain field set to dom_xen.
kaf24@3757 244 */
keir@17922 245 dom_xen = domain_create(DOMID_XEN, DOMCRF_dummy, 0);
kfraser@10280 246 BUG_ON(dom_xen == NULL);
kaf24@3757 247
kaf24@3757 248 /*
kaf24@3757 249 * Initialise our DOMID_IO domain.
kaf24@8764 250 * This domain owns I/O pages that are within the range of the page_info
kaf24@4570 251 * array. Mappings occur at the priv of the caller.
kaf24@3757 252 */
keir@17922 253 dom_io = domain_create(DOMID_IO, DOMCRF_dummy, 0);
kfraser@10280 254 BUG_ON(dom_io == NULL);
keir@20723 255
keir@20723 256 /*
keir@20723 257 * Initialise our DOMID_IO domain.
keir@20723 258 * This domain owns sharable pages.
keir@20723 259 */
keir@20723 260 dom_cow = domain_create(DOMID_COW, DOMCRF_dummy, 0);
keir@20723 261 BUG_ON(dom_cow == NULL);
kaf24@3757 262
kaf24@4570 263 /* First 1MB of RAM is historically marked as I/O. */
kaf24@4570 264 for ( i = 0; i < 0x100; i++ )
kaf24@9214 265 share_xen_page_with_guest(mfn_to_page(i), dom_io, XENSHARE_writable);
kaf24@4570 266
kaf24@5043 267 /* Any areas not specified as RAM by the e820 map are considered I/O. */
kfraser@15497 268 for ( i = 0, pfn = 0; pfn < max_page; i++ )
kaf24@4570 269 {
kfraser@15830 270 while ( (i < e820.nr_map) &&
kfraser@15830 271 (e820.map[i].type != E820_RAM) &&
kfraser@15830 272 (e820.map[i].type != E820_UNUSABLE) )
kfraser@15495 273 i++;
kfraser@15495 274
kfraser@15497 275 if ( i >= e820.nr_map )
kfraser@15495 276 {
kfraser@15495 277 /* No more RAM regions: mark as I/O right to end of memory map. */
kfraser@15495 278 rstart_pfn = rend_pfn = max_page;
kfraser@15495 279 }
kfraser@15495 280 else
kfraser@15495 281 {
kfraser@15495 282 /* Mark as I/O just up as far as next RAM region. */
kfraser@15495 283 rstart_pfn = min_t(unsigned long, max_page,
kfraser@15495 284 PFN_UP(e820.map[i].addr));
kfraser@15495 285 rend_pfn = max_t(unsigned long, rstart_pfn,
kfraser@15495 286 PFN_DOWN(e820.map[i].addr + e820.map[i].size));
kfraser@15495 287 }
kfraser@15495 288
keir@16927 289 /*
keir@16964 290 * Make sure any Xen mappings of RAM holes above 1MB are blown away.
keir@16927 291 * In particular this ensures that RAM holes are respected even in
keir@16964 292 * the statically-initialised 1-16MB mapping area.
keir@16927 293 */
keir@16964 294 iostart_pfn = max_t(unsigned long, pfn, 1UL << (20 - PAGE_SHIFT));
keir@16927 295 #if defined(CONFIG_X86_32)
keir@20274 296 ioend_pfn = min_t(unsigned long, rstart_pfn,
keir@16927 297 DIRECTMAP_MBYTES << (20 - PAGE_SHIFT));
keir@20274 298 #else
keir@20274 299 ioend_pfn = min(rstart_pfn, 16UL << (20 - PAGE_SHIFT));
keir@16927 300 #endif
keir@16964 301 if ( iostart_pfn < ioend_pfn )
keir@16964 302 destroy_xen_mappings((unsigned long)mfn_to_virt(iostart_pfn),
keir@16927 303 (unsigned long)mfn_to_virt(ioend_pfn));
keir@16927 304
kfraser@15495 305 /* Mark as I/O up to next RAM region. */
kaf24@8434 306 for ( ; pfn < rstart_pfn; pfn++ )
kaf24@4570 307 {
keir@20274 308 if ( !mfn_valid(pfn) )
keir@20274 309 continue;
kaf24@9214 310 share_xen_page_with_guest(
kaf24@9214 311 mfn_to_page(pfn), dom_io, XENSHARE_writable);
kaf24@4570 312 }
kfraser@15495 313
kaf24@5043 314 /* Skip the RAM region. */
kaf24@5043 315 pfn = rend_pfn;
kaf24@4570 316 }
kaf24@4570 317
kaf24@9214 318 subarch_init_memory();
keir@20728 319
keir@20728 320 mem_sharing_init();
kaf24@9214 321 }
kaf24@9214 322
keir@19295 323 int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
kfraser@11194 324 {
keir@19123 325 uint64_t maddr = pfn_to_paddr(mfn);
kfraser@11194 326 int i;
kfraser@11194 327
kfraser@11194 328 for ( i = 0; i < e820.nr_map; i++ )
kfraser@11194 329 {
keir@19295 330 switch ( e820.map[i].type )
keir@19295 331 {
keir@19295 332 case E820_RAM:
keir@19295 333 if ( mem_type & RAM_TYPE_CONVENTIONAL )
keir@19295 334 break;
keir@19295 335 continue;
keir@19295 336 case E820_RESERVED:
keir@19295 337 if ( mem_type & RAM_TYPE_RESERVED )
keir@19295 338 break;
keir@19295 339 continue;
keir@19295 340 case E820_UNUSABLE:
keir@19295 341 if ( mem_type & RAM_TYPE_UNUSABLE )
keir@19295 342 break;
keir@19295 343 continue;
keir@19295 344 case E820_ACPI:
keir@19295 345 case E820_NVS:
keir@19295 346 if ( mem_type & RAM_TYPE_ACPI )
keir@19295 347 break;
keir@19295 348 continue;
keir@19295 349 default:
keir@19295 350 /* unknown */
keir@19295 351 continue;
keir@19295 352 }
keir@19295 353
keir@19295 354 /* Test the range. */
keir@19295 355 if ( (e820.map[i].addr <= maddr) &&
keir@19123 356 ((e820.map[i].addr + e820.map[i].size) >= (maddr + PAGE_SIZE)) )
kfraser@11194 357 return 1;
kfraser@11194 358 }
kfraser@11194 359
kfraser@11194 360 return 0;
kfraser@11194 361 }
kfraser@11194 362
kfraser@14478 363 unsigned long domain_get_maximum_gpfn(struct domain *d)
kfraser@14478 364 {
keir@14982 365 if ( is_hvm_domain(d) )
keir@17442 366 return d->arch.p2m->max_mapped_pfn;
keir@14982 367 /* NB. PV guests specify nr_pfns rather than max_pfn so we adjust here. */
keir@14982 368 return arch_get_max_pfn(d) - 1;
kfraser@14478 369 }
kfraser@14478 370
kaf24@9214 371 void share_xen_page_with_guest(
kaf24@9214 372 struct page_info *page, struct domain *d, int readonly)
kaf24@9214 373 {
kaf24@9214 374 if ( page_get_owner(page) == d )
kaf24@9214 375 return;
kaf24@9214 376
kaf24@9237 377 set_gpfn_from_mfn(page_to_mfn(page), INVALID_M2P_ENTRY);
kaf24@9237 378
kaf24@9214 379 spin_lock(&d->page_alloc_lock);
kaf24@9214 380
kaf24@9214 381 /* The incremented type count pins as writable or read-only. */
kaf24@9214 382 page->u.inuse.type_info = (readonly ? PGT_none : PGT_writable_page);
kaf24@9214 383 page->u.inuse.type_info |= PGT_validated | 1;
kaf24@9214 384
kaf24@9214 385 page_set_owner(page, d);
kaf24@9214 386 wmb(); /* install valid domain ptr before updating refcnt. */
keir@19132 387 ASSERT((page->count_info & ~PGC_xen_heap) == 0);
kfraser@14224 388
kfraser@14224 389 /* Only add to the allocation list if the domain isn't dying. */
kfraser@14677 390 if ( !d->is_dying )
kfraser@14224 391 {
kfraser@14224 392 page->count_info |= PGC_allocated | 1;
kfraser@14224 393 if ( unlikely(d->xenheap_pages++ == 0) )
kfraser@14224 394 get_knownalive_domain(d);
keir@19170 395 page_list_add_tail(page, &d->xenpage_list);
kfraser@14224 396 }
kaf24@9214 397
kaf24@9214 398 spin_unlock(&d->page_alloc_lock);
kaf24@9214 399 }
kaf24@9214 400
kaf24@9214 401 void share_xen_page_with_privileged_guests(
kaf24@9214 402 struct page_info *page, int readonly)
kaf24@9214 403 {
kaf24@9214 404 share_xen_page_with_guest(page, dom_xen, readonly);
kaf24@3757 405 }
kaf24@3757 406
keir@17638 407 #if defined(__i386__)
kaf24@10215 408
kaf24@10505 409 #ifdef NDEBUG
kaf24@10215 410 /* Only PDPTs above 4GB boundary need to be shadowed in low memory. */
kfraser@10490 411 #define l3tab_needs_shadow(mfn) ((mfn) >= 0x100000)
kaf24@10215 412 #else
kaf24@10222 413 /*
kaf24@10505 414 * In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
keir@19306 415 * We cannot safely shadow the idle page table, nor shadow page tables
keir@19306 416 * (detected by zero reference count). As required for correctness, we
tdeegan@11189 417 * always shadow PDPTs above 4GB.
kaf24@10222 418 */
keir@19306 419 #define l3tab_needs_shadow(mfn) \
keir@19306 420 (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
keir@19306 421 (mfn_to_page(mfn)->count_info & PGC_count_mask) && \
keir@19306 422 ((mfn) & 1)) || /* odd MFNs are shadowed */ \
kfraser@10490 423 ((mfn) >= 0x100000))
kaf24@10215 424 #endif
kaf24@10215 425
kaf24@10215 426 static l1_pgentry_t *fix_pae_highmem_pl1e;
kaf24@10215 427
kaf24@10215 428 /* Cache the address of PAE high-memory fixmap page tables. */
kaf24@10215 429 static int __init cache_pae_fixmap_address(void)
kaf24@10215 430 {
kaf24@10215 431 unsigned long fixmap_base = fix_to_virt(FIX_PAE_HIGHMEM_0);
kaf24@10215 432 l2_pgentry_t *pl2e = virt_to_xen_l2e(fixmap_base);
kaf24@10215 433 fix_pae_highmem_pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(fixmap_base);
kaf24@10215 434 return 0;
kaf24@10215 435 }
kaf24@10215 436 __initcall(cache_pae_fixmap_address);
kaf24@10215 437
tdeegan@11189 438 static DEFINE_PER_CPU(u32, make_cr3_timestamp);
tdeegan@11189 439
tdeegan@11189 440 void make_cr3(struct vcpu *v, unsigned long mfn)
tdeegan@11189 441 /* Takes the MFN of a PAE l3 table, copies the contents to below 4GB if
tdeegan@11189 442 * necessary, and sets v->arch.cr3 to the value to load in CR3. */
kaf24@10211 443 {
kaf24@10215 444 l3_pgentry_t *highmem_l3tab, *lowmem_l3tab;
tdeegan@11189 445 struct pae_l3_cache *cache = &v->arch.pae_l3_cache;
kaf24@10215 446 unsigned int cpu = smp_processor_id();
kaf24@10215 447
tdeegan@11189 448 /* Fast path: does this mfn need a shadow at all? */
kaf24@10215 449 if ( !l3tab_needs_shadow(mfn) )
kaf24@10211 450 {
tdeegan@11189 451 v->arch.cr3 = mfn << PAGE_SHIFT;
tdeegan@11189 452 /* Cache is no longer in use or valid */
kaf24@10505 453 cache->high_mfn = 0;
kaf24@10215 454 return;
kaf24@10215 455 }
kaf24@10215 456
kaf24@10215 457 /* Caching logic is not interrupt safe. */
kaf24@10215 458 ASSERT(!in_irq());
kaf24@10215 459
kaf24@10215 460 /* Protects against pae_flush_pgd(). */
kaf24@10215 461 spin_lock(&cache->lock);
kaf24@10215 462
kaf24@10215 463 cache->inuse_idx ^= 1;
kaf24@10215 464 cache->high_mfn = mfn;
kaf24@10215 465
kaf24@10215 466 /* Map the guest L3 table and copy to the chosen low-memory cache. */
kfraser@12825 467 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
tdeegan@11189 468 /* First check the previous high mapping can't be in the TLB.
tdeegan@11189 469 * (i.e. have we loaded CR3 since we last did this?) */
tdeegan@11189 470 if ( unlikely(this_cpu(make_cr3_timestamp) == this_cpu(tlbflush_time)) )
keir@16155 471 flush_tlb_one_local(fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu));
kaf24@10215 472 highmem_l3tab = (l3_pgentry_t *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
kaf24@10215 473 lowmem_l3tab = cache->table[cache->inuse_idx];
kaf24@10215 474 memcpy(lowmem_l3tab, highmem_l3tab, sizeof(cache->table[0]));
kfraser@12825 475 l1e_write(fix_pae_highmem_pl1e-cpu, l1e_empty());
tdeegan@11189 476 this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
tdeegan@11189 477
tdeegan@11189 478 v->arch.cr3 = __pa(lowmem_l3tab);
kaf24@10215 479
kaf24@10215 480 spin_unlock(&cache->lock);
kaf24@10215 481 }
kaf24@10215 482
keir@17638 483 #else /* !defined(__i386__) */
kaf24@10215 484
tdeegan@11189 485 void make_cr3(struct vcpu *v, unsigned long mfn)
kaf24@10215 486 {
tdeegan@11189 487 v->arch.cr3 = mfn << PAGE_SHIFT;
kaf24@10211 488 }
kaf24@10211 489
keir@17638 490 #endif /* !defined(__i386__) */
kaf24@10215 491
kaf24@5327 492 void write_ptbase(struct vcpu *v)
kaf24@3757 493 {
tdeegan@11189 494 write_cr3(v->arch.cr3);
kaf24@3757 495 }
kaf24@3757 496
kfraser@15738 497 /*
kfraser@15738 498 * Should be called after CR3 is updated.
Tim@13143 499 *
Tim@13143 500 * Uses values found in vcpu->arch.(guest_table and guest_table_user), and
Tim@13143 501 * for HVM guests, arch.monitor_table and hvm's guest CR3.
Tim@13143 502 *
Tim@13143 503 * Update ref counts to shadow tables appropriately.
Tim@13143 504 */
Tim@13143 505 void update_cr3(struct vcpu *v)
Tim@13143 506 {
Tim@13143 507 unsigned long cr3_mfn=0;
Tim@13143 508
Tim@13938 509 if ( paging_mode_enabled(v->domain) )
Tim@13143 510 {
Tim@13938 511 paging_update_cr3(v);
Tim@13143 512 return;
Tim@13143 513 }
Tim@13143 514
Tim@13143 515 #if CONFIG_PAGING_LEVELS == 4
Tim@13143 516 if ( !(v->arch.flags & TF_kernel_mode) )
Tim@13143 517 cr3_mfn = pagetable_get_pfn(v->arch.guest_table_user);
Tim@13143 518 else
Tim@13143 519 #endif
Tim@13143 520 cr3_mfn = pagetable_get_pfn(v->arch.guest_table);
Tim@13143 521
Tim@13143 522 make_cr3(v, cr3_mfn);
Tim@13143 523 }
Tim@13143 524
Tim@13143 525
keir@19199 526 static void invalidate_shadow_ldt(struct vcpu *v, int flush)
kaf24@3757 527 {
kaf24@3757 528 int i;
kaf24@3757 529 unsigned long pfn;
kaf24@8764 530 struct page_info *page;
keir@19198 531
keir@19199 532 BUG_ON(unlikely(in_irq()));
keir@19199 533
keir@19199 534 spin_lock(&v->arch.shadow_ldt_lock);
keir@19199 535
kaf24@5327 536 if ( v->arch.shadow_ldt_mapcnt == 0 )
keir@19199 537 goto out;
kaf24@4426 538
kaf24@5327 539 v->arch.shadow_ldt_mapcnt = 0;
kaf24@3757 540
kaf24@3757 541 for ( i = 16; i < 32; i++ )
kaf24@3757 542 {
kaf24@5327 543 pfn = l1e_get_pfn(v->arch.perdomain_ptes[i]);
kaf24@3757 544 if ( pfn == 0 ) continue;
kfraser@12825 545 l1e_write(&v->arch.perdomain_ptes[i], l1e_empty());
kaf24@8764 546 page = mfn_to_page(pfn);
keir@17425 547 ASSERT_PAGE_IS_TYPE(page, PGT_seg_desc_page);
kaf24@5327 548 ASSERT_PAGE_IS_DOMAIN(page, v->domain);
kaf24@3757 549 put_page_and_type(page);
kaf24@3757 550 }
kaf24@3757 551
keir@19199 552 /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */
keir@19199 553 if ( flush )
keir@19689 554 flush_tlb_mask(&v->vcpu_dirty_cpumask);
keir@19199 555
keir@19199 556 out:
keir@19199 557 spin_unlock(&v->arch.shadow_ldt_lock);
kaf24@3757 558 }
kaf24@3757 559
kaf24@3757 560
kaf24@8764 561 static int alloc_segdesc_page(struct page_info *page)
kaf24@3757 562 {
kaf24@3757 563 struct desc_struct *descs;
kaf24@3757 564 int i;
kaf24@3757 565
keir@20277 566 descs = __map_domain_page(page);
kaf24@3757 567
kaf24@3757 568 for ( i = 0; i < 512; i++ )
ack@13290 569 if ( unlikely(!check_descriptor(page_get_owner(page), &descs[i])) )
kaf24@3757 570 goto fail;
kaf24@3757 571
kaf24@5394 572 unmap_domain_page(descs);
keir@18450 573 return 0;
kaf24@3757 574
kaf24@3757 575 fail:
kaf24@5394 576 unmap_domain_page(descs);
keir@18450 577 return -EINVAL;
kaf24@3757 578 }
kaf24@3757 579
kaf24@3757 580
kaf24@3757 581 /* Map shadow page at offset @off. */
kaf24@3757 582 int map_ldt_shadow_page(unsigned int off)
kaf24@3757 583 {
kaf24@5327 584 struct vcpu *v = current;
kaf24@5327 585 struct domain *d = v->domain;
kaf24@8764 586 unsigned long gmfn, mfn;
mafetter@4629 587 l1_pgentry_t l1e, nl1e;
kaf24@5589 588 unsigned long gva = v->arch.guest_context.ldt_base + (off << PAGE_SHIFT);
tim@11687 589 int okay;
kaf24@4176 590
kaf24@4198 591 BUG_ON(unlikely(in_irq()));
kaf24@3757 592
tim@11687 593 guest_get_eff_kern_l1e(v, gva, &l1e);
mafetter@4629 594 if ( unlikely(!(l1e_get_flags(l1e) & _PAGE_PRESENT)) )
mafetter@4179 595 return 0;
mafetter@4179 596
kaf24@8764 597 gmfn = l1e_get_pfn(l1e);
kaf24@8764 598 mfn = gmfn_to_mfn(d, gmfn);
kfraser@12606 599 if ( unlikely(!mfn_valid(mfn)) )
kaf24@3757 600 return 0;
kaf24@3757 601
keir@17425 602 okay = get_page_and_type(mfn_to_page(mfn), d, PGT_seg_desc_page);
tim@11687 603 if ( unlikely(!okay) )
mafetter@4179 604 return 0;
mafetter@4179 605
kaf24@8764 606 nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
mafetter@4629 607
keir@19199 608 spin_lock(&v->arch.shadow_ldt_lock);
kfraser@12825 609 l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
kaf24@5327 610 v->arch.shadow_ldt_mapcnt++;
keir@19199 611 spin_unlock(&v->arch.shadow_ldt_lock);
kaf24@3757 612
kaf24@3757 613 return 1;
kaf24@3757 614 }
kaf24@3757 615
kaf24@3757 616
kaf24@3757 617 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d)
kaf24@3757 618 {
kaf24@8764 619 struct page_info *page = mfn_to_page(page_nr);
kaf24@8764 620
kaf24@8764 621 if ( unlikely(!mfn_valid(page_nr)) || unlikely(!get_page(page, d)) )
kaf24@3757 622 {
kaf24@4692 623 MEM_LOG("Could not get page ref for pfn %lx", page_nr);
kaf24@3757 624 return 0;
kaf24@3757 625 }
kaf24@3757 626
kaf24@3757 627 return 1;
kaf24@3757 628 }
kaf24@3757 629
kaf24@3757 630
kaf24@3757 631 static int get_page_and_type_from_pagenr(unsigned long page_nr,
kaf24@6077 632 unsigned long type,
keir@18450 633 struct domain *d,
keir@18780 634 int partial,
keir@18450 635 int preemptible)
kaf24@3757 636 {
kaf24@8764 637 struct page_info *page = mfn_to_page(page_nr);
keir@18450 638 int rc;
kaf24@3757 639
keir@18780 640 if ( likely(partial >= 0) &&
keir@18780 641 unlikely(!get_page_from_pagenr(page_nr, d)) )
keir@18450 642 return -EINVAL;
keir@18450 643
keir@18450 644 rc = (preemptible ?
keir@18450 645 get_page_type_preemptible(page, type) :
keir@18450 646 (get_page_type(page, type) ? 0 : -EINVAL));
keir@18450 647
keir@18780 648 if ( unlikely(rc) && partial >= 0 )
kaf24@3757 649 put_page(page);
keir@18450 650
keir@18450 651 return rc;
kaf24@3757 652 }
kaf24@3757 653
keir@18794 654 static int get_data_page(
keir@18794 655 struct page_info *page, struct domain *d, int writeable)
keir@18794 656 {
keir@18794 657 int rc;
keir@18794 658
keir@18794 659 if ( writeable )
keir@18794 660 rc = get_page_and_type(page, d, PGT_writable_page);
keir@18794 661 else
keir@18794 662 rc = get_page(page, d);
keir@18794 663
keir@18794 664 return rc;
keir@18794 665 }
keir@18794 666
keir@18794 667 static void put_data_page(
keir@18794 668 struct page_info *page, int writeable)
keir@18794 669 {
keir@18794 670 if ( writeable )
keir@18794 671 put_page_and_type(page);
keir@18794 672 else
keir@18794 673 put_page(page);
keir@18794 674 }
keir@18794 675
kaf24@3757 676 /*
kaf24@3782 677 * We allow root tables to map each other (a.k.a. linear page tables). It
kaf24@3782 678 * needs some special care with reference counts and access permissions:
kaf24@3757 679 * 1. The mapping entry must be read-only, or the guest may get write access
kaf24@3757 680 * to its own PTEs.
kaf24@3757 681 * 2. We must only bump the reference counts for an *already validated*
kaf24@3757 682 * L2 table, or we can end up in a deadlock in get_page_type() by waiting
kaf24@3757 683 * on a validation that is required to complete that validation.
kaf24@3757 684 * 3. We only need to increment the reference counts for the mapped page
kaf24@3782 685 * frame if it is mapped by a different root table. This is sufficient and
kaf24@3782 686 * also necessary to allow validation of a root table mapping itself.
kaf24@3757 687 */
kfraser@14392 688 #define define_get_linear_pagetable(level) \
kfraser@14391 689 static int \
kfraser@14392 690 get_##level##_linear_pagetable( \
kfraser@14392 691 level##_pgentry_t pde, unsigned long pde_pfn, struct domain *d) \
kfraser@14391 692 { \
kfraser@14391 693 unsigned long x, y; \
kfraser@14391 694 struct page_info *page; \
kfraser@14391 695 unsigned long pfn; \
kfraser@14391 696 \
kfraser@14392 697 if ( (level##e_get_flags(pde) & _PAGE_RW) ) \
kfraser@14391 698 { \
kfraser@14391 699 MEM_LOG("Attempt to create linear p.t. with write perms"); \
kfraser@14391 700 return 0; \
kfraser@14391 701 } \
kfraser@14391 702 \
kfraser@14392 703 if ( (pfn = level##e_get_pfn(pde)) != pde_pfn ) \
kfraser@14391 704 { \
kfraser@14391 705 /* Make sure the mapped frame belongs to the correct domain. */ \
kfraser@14391 706 if ( unlikely(!get_page_from_pagenr(pfn, d)) ) \
kfraser@14391 707 return 0; \
kfraser@14391 708 \
kfraser@14391 709 /* \
kfraser@14391 710 * Ensure that the mapped frame is an already-validated page table. \
kfraser@14391 711 * If so, atomically increment the count (checking for overflow). \
kfraser@14391 712 */ \
kfraser@14391 713 page = mfn_to_page(pfn); \
kfraser@14391 714 y = page->u.inuse.type_info; \
kfraser@14391 715 do { \
kfraser@14391 716 x = y; \
kfraser@14391 717 if ( unlikely((x & PGT_count_mask) == PGT_count_mask) || \
kfraser@14391 718 unlikely((x & (PGT_type_mask|PGT_validated)) != \
kfraser@14392 719 (PGT_##level##_page_table|PGT_validated)) ) \
kfraser@14391 720 { \
kfraser@14391 721 put_page(page); \
kfraser@14391 722 return 0; \
kfraser@14391 723 } \
kfraser@14391 724 } \
kfraser@14391 725 while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x ); \
kfraser@14391 726 } \
kfraser@14391 727 \
kfraser@14391 728 return 1; \
kaf24@3757 729 }
kaf24@3757 730
keir@16105 731
keir@16369 732 int is_iomem_page(unsigned long mfn)
keir@16105 733 {
keir@19306 734 struct page_info *page;
keir@19306 735
keir@19306 736 if ( !mfn_valid(mfn) )
keir@19306 737 return 1;
keir@19306 738
keir@19306 739 /* Caller must know that it is an iomem page, or a reference is held. */
keir@19306 740 page = mfn_to_page(mfn);
keir@19306 741 ASSERT((page->count_info & PGC_count_mask) != 0);
keir@19306 742
keir@19306 743 return (page_get_owner(page) == dom_io);
keir@16105 744 }
keir@16105 745
keir@19670 746 static void update_xen_mappings(unsigned long mfn, unsigned long cacheattr)
keir@19670 747 {
keir@19670 748 #ifdef __x86_64__
keir@19670 749 bool_t alias = mfn >= PFN_DOWN(xen_phys_start) &&
keir@19670 750 mfn < PFN_UP(xen_phys_start + (unsigned long)_end - XEN_VIRT_START);
keir@19670 751 unsigned long xen_va =
keir@19670 752 XEN_VIRT_START + ((mfn - PFN_DOWN(xen_phys_start)) << PAGE_SHIFT);
keir@19670 753
keir@19670 754 if ( unlikely(alias) && cacheattr )
keir@19670 755 map_pages_to_xen(xen_va, mfn, 1, 0);
keir@19670 756 map_pages_to_xen((unsigned long)mfn_to_virt(mfn), mfn, 1,
keir@19670 757 PAGE_HYPERVISOR | cacheattr_to_pte_flags(cacheattr));
keir@19670 758 if ( unlikely(alias) && !cacheattr )
keir@19670 759 map_pages_to_xen(xen_va, mfn, 1, PAGE_HYPERVISOR);
keir@19670 760 #endif
keir@19670 761 }
keir@19670 762
mafetter@4179 763 int
kaf24@3757 764 get_page_from_l1e(
keir@19746 765 l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner)
kaf24@3757 766 {
mafetter@4629 767 unsigned long mfn = l1e_get_pfn(l1e);
kaf24@8764 768 struct page_info *page = mfn_to_page(mfn);
keir@16369 769 uint32_t l1f = l1e_get_flags(l1e);
keir@16543 770 struct vcpu *curr = current;
keir@19746 771 struct domain *real_pg_owner;
kaf24@3757 772
keir@16369 773 if ( !(l1f & _PAGE_PRESENT) )
kaf24@3757 774 return 1;
kaf24@3757 775
keir@19746 776 if ( unlikely(l1f & l1_disallow_mask(l1e_owner)) )
kaf24@3757 777 {
keir@19746 778 MEM_LOG("Bad L1 flags %x", l1f & l1_disallow_mask(l1e_owner));
kaf24@3757 779 return 0;
kaf24@3757 780 }
kaf24@3757 781
keir@19306 782 if ( !mfn_valid(mfn) ||
keir@19746 783 (real_pg_owner = page_get_owner_and_reference(page)) == dom_io )
kaf24@3757 784 {
keir@19306 785 /* Only needed the reference to confirm dom_io ownership. */
keir@19306 786 if ( mfn_valid(mfn) )
keir@19306 787 put_page(page);
keir@19306 788
kaf24@4570 789 /* DOMID_IO reverts to caller for privilege checks. */
keir@19746 790 if ( pg_owner == dom_io )
keir@19746 791 pg_owner = curr->domain;
keir@19746 792
keir@19746 793 if ( !iomem_access_permitted(pg_owner, mfn, mfn) )
kaf24@4570 794 {
kaf24@12281 795 if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */
kfraser@12258 796 MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx",
keir@19746 797 pg_owner->domain_id, mfn);
kaf24@4570 798 return 0;
kaf24@4570 799 }
kaf24@4570 800
keir@16440 801 return 1;
kaf24@3757 802 }
kaf24@3757 803
keir@19746 804 if ( unlikely(real_pg_owner != pg_owner) )
keir@19746 805 {
keir@19746 806 /*
keir@19746 807 * Let privileged domains transfer the right to map their target
keir@19746 808 * domain's pages. This is used to allow stub-domain pvfb export to
keir@19746 809 * dom0, until pvfb supports granted mappings. At that time this
keir@19746 810 * minor hack can go away.
keir@19746 811 */
keir@20415 812 if ( (real_pg_owner == NULL) || (pg_owner == l1e_owner) ||
keir@20415 813 !IS_PRIV_FOR(pg_owner, real_pg_owner) )
keir@19746 814 goto could_not_pin;
keir@19746 815 pg_owner = real_pg_owner;
keir@19746 816 }
keir@17277 817
kaf24@11298 818 /* Foreign mappings into guests in shadow external mode don't
tdeegan@11189 819 * contribute to writeable mapping refcounts. (This allows the
tdeegan@11189 820 * qemu-dm helper process in dom0 to map the domain's memory without
tdeegan@11189 821 * messing up the count of "real" writable mappings.) */
keir@19306 822 if ( (l1f & _PAGE_RW) &&
keir@19746 823 ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) &&
keir@19306 824 !get_page_type(page, PGT_writable_page) )
keir@19306 825 goto could_not_pin;
keir@19306 826
keir@19306 827 if ( pte_flags_to_cacheattr(l1f) !=
keir@19642 828 ((page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base) )
keir@16369 829 {
keir@19127 830 unsigned long x, nx, y = page->count_info;
keir@19127 831 unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
keir@16369 832
keir@16376 833 if ( is_xen_heap_page(page) )
keir@16369 834 {
keir@16369 835 if ( (l1f & _PAGE_RW) &&
keir@19746 836 ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) )
keir@16369 837 put_page_type(page);
keir@16369 838 put_page(page);
keir@16369 839 MEM_LOG("Attempt to change cache attributes of Xen heap page");
keir@16369 840 return 0;
keir@16369 841 }
keir@16369 842
keir@19642 843 while ( ((y & PGC_cacheattr_mask) >> PGC_cacheattr_base) != cacheattr )
keir@16369 844 {
keir@16369 845 x = y;
keir@16369 846 nx = (x & ~PGC_cacheattr_mask) | (cacheattr << PGC_cacheattr_base);
keir@16369 847 y = cmpxchg(&page->count_info, x, nx);
keir@16369 848 }
keir@16369 849
keir@19670 850 update_xen_mappings(mfn, cacheattr);
keir@16369 851 }
kaf24@7434 852
keir@19306 853 return 1;
keir@19306 854
keir@19306 855 could_not_pin:
keir@19306 856 MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
keir@19746 857 " for l1e_owner=%d, pg_owner=%d",
keir@19306 858 mfn, get_gpfn_from_mfn(mfn),
keir@19746 859 l1e_get_intpte(l1e), l1e_owner->domain_id, pg_owner->domain_id);
keir@19746 860 if ( real_pg_owner != NULL )
keir@19306 861 put_page(page);
keir@19306 862 return 0;
kaf24@3757 863 }
kaf24@3757 864
kaf24@3757 865
kaf24@3757 866 /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */
kfraser@14392 867 define_get_linear_pagetable(l2);
kfraser@14392 868 static int
kaf24@3757 869 get_page_from_l2e(
kfraser@11522 870 l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
kaf24@3757 871 {
keir@18794 872 unsigned long mfn = l2e_get_pfn(l2e);
kaf24@3757 873 int rc;
kaf24@3757 874
mafetter@4629 875 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
kaf24@3757 876 return 1;
kaf24@3757 877
mafetter@4629 878 if ( unlikely((l2e_get_flags(l2e) & L2_DISALLOW_MASK)) )
kaf24@3757 879 {
kaf24@6368 880 MEM_LOG("Bad L2 flags %x", l2e_get_flags(l2e) & L2_DISALLOW_MASK);
keir@18450 881 return -EINVAL;
kaf24@3757 882 }
kaf24@3757 883
keir@18794 884 if ( !(l2e_get_flags(l2e) & _PAGE_PSE) )
keir@18794 885 {
keir@18794 886 rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, 0, 0);
keir@18794 887 if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
keir@18794 888 rc = 0;
keir@18794 889 }
keir@18794 890 else if ( !opt_allow_hugepage || (mfn & (L1_PAGETABLE_ENTRIES-1)) )
keir@18794 891 {
keir@18794 892 rc = -EINVAL;
keir@18794 893 }
keir@18794 894 else
keir@18794 895 {
keir@18794 896 unsigned long m = mfn;
keir@18794 897 int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW);
keir@18794 898
keir@18794 899 do {
keir@20227 900 if ( !mfn_valid(m) ||
keir@20227 901 !get_data_page(mfn_to_page(m), d, writeable) )
keir@18794 902 {
keir@18794 903 while ( m-- > mfn )
keir@18794 904 put_data_page(mfn_to_page(m), writeable);
keir@18794 905 return -EINVAL;
keir@18794 906 }
keir@18794 907 } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
keir@20227 908
keir@20227 909 rc = 1;
keir@18794 910 }
kfraser@14391 911
kaf24@3782 912 return rc;
kaf24@3782 913 }
kaf24@3782 914
kaf24@3782 915
kfraser@14392 916 define_get_linear_pagetable(l3);
kfraser@14392 917 static int
kaf24@3782 918 get_page_from_l3e(
keir@18780 919 l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int preemptible)
kaf24@3782 920 {
kaf24@5275 921 int rc;
kaf24@5275 922
mafetter@4629 923 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
kaf24@3782 924 return 1;
kaf24@3782 925
ack@13295 926 if ( unlikely((l3e_get_flags(l3e) & l3_disallow_mask(d))) )
kaf24@3782 927 {
ack@13295 928 MEM_LOG("Bad L3 flags %x", l3e_get_flags(l3e) & l3_disallow_mask(d));
keir@18450 929 return -EINVAL;
kaf24@3782 930 }
kaf24@3782 931
keir@18450 932 rc = get_page_and_type_from_pagenr(
keir@18780 933 l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible);
keir@18458 934 if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
keir@18458 935 rc = 0;
kfraser@14392 936
kaf24@5275 937 return rc;
kaf24@3782 938 }
kaf24@5275 939
kaf24@5275 940 #if CONFIG_PAGING_LEVELS >= 4
kfraser@14392 941 define_get_linear_pagetable(l4);
kfraser@14392 942 static int
kaf24@3782 943 get_page_from_l4e(
keir@18780 944 l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int preemptible)
kaf24@3782 945 {
kaf24@3782 946 int rc;
kaf24@3782 947
mafetter@4629 948 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
kaf24@3782 949 return 1;
kaf24@3782 950
mafetter@4629 951 if ( unlikely((l4e_get_flags(l4e) & L4_DISALLOW_MASK)) )
kaf24@3782 952 {
kaf24@6368 953 MEM_LOG("Bad L4 flags %x", l4e_get_flags(l4e) & L4_DISALLOW_MASK);
keir@18450 954 return -EINVAL;
kaf24@3782 955 }
kaf24@3782 956
keir@18450 957 rc = get_page_and_type_from_pagenr(
keir@18780 958 l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible);
keir@18458 959 if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
keir@18458 960 rc = 0;
mafetter@7730 961
mafetter@7730 962 return rc;
kaf24@3757 963 }
kaf24@5275 964 #endif /* 4 level */
kaf24@3782 965
kfraser@11264 966 #ifdef __x86_64__
kfraser@11567 967
kfraser@11567 968 #ifdef USER_MAPPINGS_ARE_GLOBAL
ack@13295 969 #define adjust_guest_l1e(pl1e, d) \
kfraser@11567 970 do { \
ack@13295 971 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
kfraser@15012 972 likely(!is_pv_32on64_domain(d)) ) \
kfraser@11567 973 { \
kfraser@11567 974 /* _PAGE_GUEST_KERNEL page cannot have the Global bit set. */ \
kfraser@11567 975 if ( (l1e_get_flags((pl1e)) & (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL)) \
kfraser@11567 976 == (_PAGE_GUEST_KERNEL|_PAGE_GLOBAL) ) \
kfraser@11567 977 MEM_LOG("Global bit is set to kernel page %lx", \
kfraser@11567 978 l1e_get_pfn((pl1e))); \
kfraser@11567 979 if ( !(l1e_get_flags((pl1e)) & _PAGE_USER) ) \
kfraser@11567 980 l1e_add_flags((pl1e), (_PAGE_GUEST_KERNEL|_PAGE_USER)); \
kfraser@11567 981 if ( !(l1e_get_flags((pl1e)) & _PAGE_GUEST_KERNEL) ) \
kfraser@11567 982 l1e_add_flags((pl1e), (_PAGE_GLOBAL|_PAGE_USER)); \
kfraser@11567 983 } \
kfraser@11567 984 } while ( 0 )
kfraser@11567 985 #else
ack@13295 986 #define adjust_guest_l1e(pl1e, d) \
kfraser@11567 987 do { \
ack@13295 988 if ( likely(l1e_get_flags((pl1e)) & _PAGE_PRESENT) && \
kfraser@15012 989 likely(!is_pv_32on64_domain(d)) ) \
kfraser@11264 990 l1e_add_flags((pl1e), _PAGE_USER); \
kfraser@11264 991 } while ( 0 )
kfraser@11567 992 #endif
kfraser@11264 993
ack@13295 994 #define adjust_guest_l2e(pl2e, d) \
kfraser@11264 995 do { \
ack@13295 996 if ( likely(l2e_get_flags((pl2e)) & _PAGE_PRESENT) && \
kfraser@15012 997 likely(!is_pv_32on64_domain(d)) ) \
kfraser@11264 998 l2e_add_flags((pl2e), _PAGE_USER); \
kfraser@11264 999 } while ( 0 )
kfraser@11264 1000
kfraser@15012 1001 #define adjust_guest_l3e(pl3e, d) \
kfraser@15012 1002 do { \
kfraser@15012 1003 if ( likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
kfraser@15012 1004 l3e_add_flags((pl3e), likely(!is_pv_32on64_domain(d)) ? \
kfraser@15012 1005 _PAGE_USER : \
kfraser@15012 1006 _PAGE_USER|_PAGE_RW); \
kfraser@11264 1007 } while ( 0 )
kfraser@11264 1008
ack@13295 1009 #define adjust_guest_l4e(pl4e, d) \
kfraser@11264 1010 do { \
ack@13295 1011 if ( likely(l4e_get_flags((pl4e)) & _PAGE_PRESENT) && \
kfraser@15012 1012 likely(!is_pv_32on64_domain(d)) ) \
kfraser@11264 1013 l4e_add_flags((pl4e), _PAGE_USER); \
kfraser@11264 1014 } while ( 0 )
kfraser@11567 1015
kfraser@11567 1016 #else /* !defined(__x86_64__) */
kfraser@11567 1017
ack@13295 1018 #define adjust_guest_l1e(_p, _d) ((void)(_d))
ack@13295 1019 #define adjust_guest_l2e(_p, _d) ((void)(_d))
ack@13295 1020 #define adjust_guest_l3e(_p, _d) ((void)(_d))
ack@13295 1021
ack@13295 1022 #endif
ack@13295 1023
keir@20341 1024 #ifdef __x86_64__
kfraser@15012 1025 #define unadjust_guest_l3e(pl3e, d) \
kfraser@15012 1026 do { \
kfraser@15012 1027 if ( unlikely(is_pv_32on64_domain(d)) && \
kfraser@15012 1028 likely(l3e_get_flags((pl3e)) & _PAGE_PRESENT) ) \
kfraser@15012 1029 l3e_remove_flags((pl3e), _PAGE_USER|_PAGE_RW|_PAGE_ACCESSED); \
ack@13295 1030 } while ( 0 )
ack@13295 1031 #else
ack@13295 1032 #define unadjust_guest_l3e(_p, _d) ((void)(_d))
kfraser@11264 1033 #endif
kaf24@3757 1034
keir@19746 1035 void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
kaf24@3757 1036 {
keir@16440 1037 unsigned long pfn = l1e_get_pfn(l1e);
keir@16440 1038 struct page_info *page;
keir@19746 1039 struct domain *pg_owner;
keir@16440 1040 struct vcpu *v;
keir@16440 1041
keir@16440 1042 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || is_iomem_page(pfn) )
kaf24@3757 1043 return;
kaf24@3757 1044
keir@16440 1045 page = mfn_to_page(pfn);
keir@19746 1046 pg_owner = page_get_owner(page);
kaf24@8006 1047
kaf24@8006 1048 /*
kaf24@8006 1049 * Check if this is a mapping that was established via a grant reference.
kaf24@8006 1050 * If it was then we should not be here: we require that such mappings are
kaf24@8006 1051 * explicitly destroyed via the grant-table interface.
kaf24@8006 1052 *
kaf24@8006 1053 * The upshot of this is that the guest can end up with active grants that
kaf24@8006 1054 * it cannot destroy (because it no longer has a PTE to present to the
kaf24@8006 1055 * grant-table interface). This can lead to subtle hard-to-catch bugs,
kaf24@8006 1056 * hence a special grant PTE flag can be enabled to catch the bug early.
kaf24@8006 1057 *
kaf24@8006 1058 * (Note that the undestroyable active grants are not a security hole in
kaf24@8006 1059 * Xen. All active grants can safely be cleaned up when the domain dies.)
kaf24@8006 1060 */
kfraser@14739 1061 if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
keir@19746 1062 !l1e_owner->is_shutting_down && !l1e_owner->is_dying )
kaf24@3757 1063 {
kaf24@8006 1064 MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
kaf24@8006 1065 l1e_get_intpte(l1e));
keir@19746 1066 domain_crash(l1e_owner);
kaf24@3757 1067 }
kaf24@3757 1068
tdeegan@11189 1069 /* Remember we didn't take a type-count of foreign writable mappings
Tim@13938 1070 * to paging-external domains */
tdeegan@11189 1071 if ( (l1e_get_flags(l1e) & _PAGE_RW) &&
keir@19746 1072 ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) )
kaf24@3757 1073 {
kaf24@3757 1074 put_page_and_type(page);
kaf24@3757 1075 }
kaf24@3757 1076 else
kaf24@3757 1077 {
kaf24@3757 1078 /* We expect this is rare so we blow the entire shadow LDT. */
kaf24@3757 1079 if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
keir@17425 1080 PGT_seg_desc_page)) &&
kaf24@7430 1081 unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) &&
keir@19746 1082 (l1e_owner == pg_owner) )
kaf24@7430 1083 {
keir@19746 1084 for_each_vcpu ( pg_owner, v )
keir@19199 1085 invalidate_shadow_ldt(v, 1);
kaf24@7430 1086 }
kaf24@3757 1087 put_page(page);
kaf24@3757 1088 }
kaf24@3757 1089 }
kaf24@3757 1090
kaf24@3757 1091
kaf24@3757 1092 /*
kaf24@3757 1093 * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'.
kaf24@3757 1094 * Note also that this automatically deals correctly with linear p.t.'s.
kaf24@3757 1095 */
keir@18450 1096 static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
kaf24@3757 1097 {
keir@18794 1098 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) )
keir@18794 1099 return 1;
keir@18794 1100
keir@18794 1101 if ( l2e_get_flags(l2e) & _PAGE_PSE )
keir@18794 1102 {
keir@18794 1103 unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
keir@18794 1104 int writeable = l2e_get_flags(l2e) & _PAGE_RW;
keir@19047 1105
keir@19047 1106 ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
keir@18794 1107 do {
keir@18794 1108 put_data_page(mfn_to_page(m), writeable);
keir@18794 1109 } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
keir@18794 1110 }
keir@18794 1111 else
keir@18450 1112 {
kfraser@13380 1113 put_page_and_type(l2e_get_page(l2e));
keir@18450 1114 }
keir@18794 1115
keir@18794 1116 return 0;
kaf24@3757 1117 }
kaf24@3757 1118
keir@18780 1119 static int __put_page_type(struct page_info *, int preemptible);
kaf24@3757 1120
keir@18450 1121 static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
keir@18780 1122 int partial, int preemptible)
kaf24@3757 1123 {
keir@19047 1124 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
keir@19047 1125 return 1;
keir@19047 1126
keir@19047 1127 #ifdef __x86_64__
keir@19047 1128 if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) )
keir@18780 1129 {
keir@19047 1130 unsigned long mfn = l3e_get_pfn(l3e);
keir@19047 1131 int writeable = l3e_get_flags(l3e) & _PAGE_RW;
keir@19047 1132
keir@19047 1133 ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)));
keir@19047 1134 do {
keir@19047 1135 put_data_page(mfn_to_page(mfn), writeable);
keir@19047 1136 } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) );
keir@19047 1137
keir@19047 1138 return 0;
keir@18780 1139 }
keir@19047 1140 #endif
keir@19047 1141
keir@19047 1142 if ( unlikely(partial > 0) )
keir@19047 1143 return __put_page_type(l3e_get_page(l3e), preemptible);
keir@19047 1144
keir@19047 1145 return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
kaf24@3757 1146 }
kaf24@5275 1147
kaf24@5275 1148 #if CONFIG_PAGING_LEVELS >= 4
keir@18450 1149 static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
keir@18780 1150 int partial, int preemptible)
kaf24@3782 1151 {
mafetter@4629 1152 if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
mafetter@4629 1153 (l4e_get_pfn(l4e) != pfn) )
keir@18780 1154 {
keir@18780 1155 if ( unlikely(partial > 0) )
keir@18780 1156 return __put_page_type(l4e_get_page(l4e), preemptible);
keir@18450 1157 return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
keir@18780 1158 }
keir@18450 1159 return 1;
kaf24@3782 1160 }
kaf24@5275 1161 #endif
kaf24@3782 1162
kaf24@8764 1163 static int alloc_l1_table(struct page_info *page)
kaf24@3757 1164 {
kaf24@3757 1165 struct domain *d = page_get_owner(page);
kaf24@8764 1166 unsigned long pfn = page_to_mfn(page);
kaf24@3757 1167 l1_pgentry_t *pl1e;
keir@18450 1168 unsigned int i;
kaf24@3757 1169
kaf24@5394 1170 pl1e = map_domain_page(pfn);
kaf24@3757 1171
kaf24@3775 1172 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
kfraser@11264 1173 {
kaf24@3791 1174 if ( is_guest_l1_slot(i) &&
keir@19746 1175 unlikely(!get_page_from_l1e(pl1e[i], d, d)) )
kaf24@3757 1176 goto fail;
kaf24@3757 1177
ack@13295 1178 adjust_guest_l1e(pl1e[i], d);
kfraser@11264 1179 }
kfraser@11264 1180
kaf24@5394 1181 unmap_domain_page(pl1e);
keir@18450 1182 return 0;
kaf24@3757 1183
kaf24@3757 1184 fail:
kaf24@7434 1185 MEM_LOG("Failure in alloc_l1_table: entry %d", i);
kaf24@3757 1186 while ( i-- > 0 )
kaf24@3791 1187 if ( is_guest_l1_slot(i) )
kaf24@3791 1188 put_page_from_l1e(pl1e[i], d);
kaf24@3757 1189
kaf24@5394 1190 unmap_domain_page(pl1e);
keir@18450 1191 return -EINVAL;
kaf24@3757 1192 }
kaf24@3757 1193
ack@13295 1194 static int create_pae_xen_mappings(struct domain *d, l3_pgentry_t *pl3e)
kaf24@5275 1195 {
kaf24@8764 1196 struct page_info *page;
kaf24@5399 1197 l3_pgentry_t l3e3;
keir@19983 1198 #ifdef __i386__
keir@19983 1199 l2_pgentry_t *pl2e, l2e;
kaf24@5399 1200 int i;
kfraser@15012 1201 #endif
kfraser@15012 1202
kfraser@15012 1203 if ( !is_pv_32bit_domain(d) )
ack@13295 1204 return 1;
kaf24@5399 1205
kaf24@5399 1206 pl3e = (l3_pgentry_t *)((unsigned long)pl3e & PAGE_MASK);
kaf24@5399 1207
kaf24@5399 1208 /* 3rd L3 slot contains L2 with Xen-private mappings. It *must* exist. */
kaf24@5399 1209 l3e3 = pl3e[3];
kaf24@5399 1210 if ( !(l3e_get_flags(l3e3) & _PAGE_PRESENT) )
kaf24@5399 1211 {
kaf24@5399 1212 MEM_LOG("PAE L3 3rd slot is empty");
kaf24@5275 1213 return 0;
kaf24@5275 1214 }
kaf24@5275 1215
kaf24@5399 1216 /*
kaf24@5399 1217 * The Xen-private mappings include linear mappings. The L2 thus cannot
kaf24@5399 1218 * be shared by multiple L3 tables. The test here is adequate because:
kfraser@11522 1219 * 1. Cannot appear in slots != 3 because get_page_type() checks the
kfraser@11522 1220 * PGT_pae_xen_l2 flag, which is asserted iff the L2 appears in slot 3
kaf24@5399 1221 * 2. Cannot appear in another page table's L3:
kaf24@5399 1222 * a. alloc_l3_table() calls this function and this check will fail
kaf24@5399 1223 * b. mod_l3_entry() disallows updates to slot 3 in an existing table
kaf24@5399 1224 */
kaf24@5399 1225 page = l3e_get_page(l3e3);
kaf24@5399 1226 BUG_ON(page->u.inuse.type_info & PGT_pinned);
kaf24@5399 1227 BUG_ON((page->u.inuse.type_info & PGT_count_mask) == 0);
kfraser@11522 1228 BUG_ON(!(page->u.inuse.type_info & PGT_pae_xen_l2));
kaf24@5399 1229 if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
kaf24@5399 1230 {
kaf24@5399 1231 MEM_LOG("PAE L3 3rd slot is shared");
kaf24@5399 1232 return 0;
kaf24@5275 1233 }
kaf24@5399 1234
keir@19983 1235 #ifdef __i386__
keir@19983 1236 /* Xen linear pagetable mappings. */
kaf24@5399 1237 pl2e = map_domain_page(l3e_get_pfn(l3e3));
kaf24@5399 1238 for ( i = 0; i < (LINEARPT_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
kfraser@12825 1239 {
kfraser@12825 1240 l2e = l2e_empty();
kfraser@12825 1241 if ( l3e_get_flags(pl3e[i]) & _PAGE_PRESENT )
kfraser@12825 1242 l2e = l2e_from_pfn(l3e_get_pfn(pl3e[i]), __PAGE_HYPERVISOR);
kfraser@12825 1243 l2e_write(&pl2e[l2_table_offset(LINEAR_PT_VIRT_START) + i], l2e);
kfraser@12825 1244 }
keir@19983 1245 unmap_domain_page(pl2e);
ack@13295 1246 #endif
kaf24@5275 1247
kaf24@5275 1248 return 1;
kaf24@5275 1249 }
keir@17638 1250
keir@17638 1251 #ifdef __i386__
kaf24@10211 1252 /* Flush a pgdir update into low-memory caches. */
kaf24@10211 1253 static void pae_flush_pgd(
kaf24@10211 1254 unsigned long mfn, unsigned int idx, l3_pgentry_t nl3e)
kaf24@10211 1255 {
kaf24@10211 1256 struct domain *d = page_get_owner(mfn_to_page(mfn));
kaf24@10215 1257 struct vcpu *v;
kaf24@10215 1258 intpte_t _ol3e, _nl3e, _pl3e;
kaf24@10215 1259 l3_pgentry_t *l3tab_ptr;
kaf24@10215 1260 struct pae_l3_cache *cache;
kaf24@10211 1261
Tim@13943 1262 if ( unlikely(shadow_mode_enabled(d)) )
Tim@13943 1263 {
Tim@13943 1264 cpumask_t m = CPU_MASK_NONE;
Tim@13943 1265 /* Re-shadow this l3 table on any vcpus that are using it */
Tim@13943 1266 for_each_vcpu ( d, v )
Tim@13943 1267 if ( pagetable_get_pfn(v->arch.guest_table) == mfn )
Tim@13943 1268 {
Tim@13943 1269 paging_update_cr3(v);
Tim@13943 1270 cpus_or(m, m, v->vcpu_dirty_cpumask);
Tim@13943 1271 }
keir@19689 1272 flush_tlb_mask(&m);
Tim@13943 1273 }
Tim@13943 1274
kaf24@10211 1275 /* If below 4GB then the pgdir is not shadowed in low memory. */
kaf24@10215 1276 if ( !l3tab_needs_shadow(mfn) )
kaf24@10211 1277 return;
kaf24@10211 1278
kaf24@10215 1279 for_each_vcpu ( d, v )
kaf24@10215 1280 {
kaf24@10215 1281 cache = &v->arch.pae_l3_cache;
kaf24@10215 1282
kaf24@10215 1283 spin_lock(&cache->lock);
kaf24@10215 1284
kaf24@10215 1285 if ( cache->high_mfn == mfn )
kaf24@10215 1286 {
kaf24@10215 1287 l3tab_ptr = &cache->table[cache->inuse_idx][idx];
kaf24@10215 1288 _ol3e = l3e_get_intpte(*l3tab_ptr);
kaf24@10215 1289 _nl3e = l3e_get_intpte(nl3e);
kfraser@15100 1290 _pl3e = cmpxchg(&l3e_get_intpte(*l3tab_ptr), _ol3e, _nl3e);
kaf24@10215 1291 BUG_ON(_pl3e != _ol3e);
kaf24@10215 1292 }
kaf24@10215 1293
kaf24@10215 1294 spin_unlock(&cache->lock);
kaf24@10215 1295 }
kaf24@10215 1296
keir@19689 1297 flush_tlb_mask(&d->domain_dirty_cpumask);
kaf24@10211 1298 }
ack@13295 1299 #else
kaf24@10211 1300 # define pae_flush_pgd(mfn, idx, nl3e) ((void)0)
kaf24@5275 1301 #endif
kaf24@5275 1302
keir@18450 1303 static int alloc_l2_table(struct page_info *page, unsigned long type,
keir@18450 1304 int preemptible)
kaf24@3757 1305 {
kaf24@3782 1306 struct domain *d = page_get_owner(page);
kaf24@8764 1307 unsigned long pfn = page_to_mfn(page);
kaf24@3782 1308 l2_pgentry_t *pl2e;
keir@18450 1309 unsigned int i;
keir@18450 1310 int rc = 0;
mafetter@4179 1311
kaf24@5394 1312 pl2e = map_domain_page(pfn);
kaf24@3757 1313
keir@18450 1314 for ( i = page->nr_validated_ptes; i < L2_PAGETABLE_ENTRIES; i++ )
kaf24@5399 1315 {
keir@18450 1316 if ( preemptible && i && hypercall_preempt_check() )
keir@18450 1317 {
keir@18450 1318 page->nr_validated_ptes = i;
keir@18450 1319 rc = -EAGAIN;
keir@18450 1320 break;
keir@18450 1321 }
keir@18450 1322
keir@18450 1323 if ( !is_guest_l2_slot(d, type, i) ||
keir@18450 1324 (rc = get_page_from_l2e(pl2e[i], pfn, d)) > 0 )
keir@18197 1325 continue;
keir@18197 1326
keir@18450 1327 if ( rc < 0 )
keir@18450 1328 {
keir@18450 1329 MEM_LOG("Failure in alloc_l2_table: entry %d", i);
keir@18450 1330 while ( i-- > 0 )
keir@18450 1331 if ( is_guest_l2_slot(d, type, i) )
keir@18450 1332 put_page_from_l2e(pl2e[i], pfn);
keir@18450 1333 break;
keir@18450 1334 }
keir@18450 1335
ack@13295 1336 adjust_guest_l2e(pl2e[i], d);
kaf24@5275 1337 }
kaf24@5275 1338
keir@19983 1339 if ( rc >= 0 && (type & PGT_pae_xen_l2) )
keir@19983 1340 {
keir@19983 1341 /* Xen private mappings. */
keir@19983 1342 #if defined(__i386__)
keir@19983 1343 memcpy(&pl2e[L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1)],
keir@19983 1344 &idle_pg_table_l2[L2_PAGETABLE_FIRST_XEN_SLOT],
keir@19983 1345 L2_PAGETABLE_XEN_SLOTS * sizeof(l2_pgentry_t));
keir@19983 1346 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
keir@19983 1347 l2e_write(&pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i],
keir@19983 1348 l2e_from_page(perdomain_pt_page(d, i),
keir@19983 1349 __PAGE_HYPERVISOR));
keir@19983 1350 pl2e[l2_table_offset(LINEAR_PT_VIRT_START)] =
keir@19983 1351 l2e_from_pfn(pfn, __PAGE_HYPERVISOR);
keir@20341 1352 #else
keir@19983 1353 memcpy(&pl2e[COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d)],
keir@19983 1354 &compat_idle_pg_table_l2[
keir@19983 1355 l2_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
keir@19983 1356 COMPAT_L2_PAGETABLE_XEN_SLOTS(d) * sizeof(*pl2e));
keir@19983 1357 #endif
keir@19983 1358 }
keir@19983 1359
kaf24@5394 1360 unmap_domain_page(pl2e);
keir@18450 1361 return rc > 0 ? 0 : rc;
kaf24@3757 1362 }
kaf24@3757 1363
keir@18450 1364 static int alloc_l3_table(struct page_info *page, int preemptible)
kaf24@3782 1365 {
kaf24@3782 1366 struct domain *d = page_get_owner(page);
kaf24@8764 1367 unsigned long pfn = page_to_mfn(page);
kaf24@5275 1368 l3_pgentry_t *pl3e;
keir@18450 1369 unsigned int i;
keir@18780 1370 int rc = 0, partial = page->partial_pte;
kaf24@3782 1371
keir@17638 1372 #if CONFIG_PAGING_LEVELS == 3
kaf24@10304 1373 /*
kaf24@10304 1374 * PAE pgdirs above 4GB are unacceptable if the guest does not understand
kaf24@10304 1375 * the weird 'extended cr3' format for dealing with high-order address
kaf24@10304 1376 * bits. We cut some slack for control tools (before vcpu0 is initialised).
kaf24@10304 1377 */
kaf24@10304 1378 if ( (pfn >= 0x100000) &&
kaf24@10304 1379 unlikely(!VM_ASSIST(d, VMASST_TYPE_pae_extended_cr3)) &&
keir@19826 1380 d->vcpu && d->vcpu[0] && d->vcpu[0]->is_initialised )
kaf24@10304 1381 {
kaf24@10304 1382 MEM_LOG("PAE pgd must be below 4GB (0x%lx >= 0x100000)", pfn);
keir@18450 1383 return -EINVAL;
kaf24@10304 1384 }
kaf24@10304 1385 #endif
kaf24@10304 1386
kaf24@5394 1387 pl3e = map_domain_page(pfn);
ack@13295 1388
ack@13295 1389 /*
ack@13295 1390 * PAE guests allocate full pages, but aren't required to initialize
ack@13295 1391 * more than the first four entries; when running in compatibility
ack@13295 1392 * mode, however, the full page is visible to the MMU, and hence all
ack@13295 1393 * 512 entries must be valid/verified, which is most easily achieved
ack@13295 1394 * by clearing them out.
ack@13295 1395 */
kfraser@15012 1396 if ( is_pv_32on64_domain(d) )
ack@13295 1397 memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
ack@13295 1398
keir@18780 1399 for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
keir@18780 1400 i++, partial = 0 )
kaf24@5399 1401 {
kfraser@15012 1402 if ( is_pv_32bit_domain(d) && (i == 3) )
kfraser@11522 1403 {
kfraser@11522 1404 if ( !(l3e_get_flags(pl3e[i]) & _PAGE_PRESENT) ||
keir@18450 1405 (l3e_get_flags(pl3e[i]) & l3_disallow_mask(d)) )
keir@18450 1406 rc = -EINVAL;
keir@18450 1407 else
keir@18450 1408 rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
keir@18450 1409 PGT_l2_page_table |
keir@18450 1410 PGT_pae_xen_l2,
keir@18780 1411 d, partial, preemptible);
kfraser@11522 1412 }
keir@18450 1413 else if ( !is_guest_l3_slot(i) ||
keir@18780 1414 (rc = get_page_from_l3e(pl3e[i], pfn, d,
keir@18780 1415 partial, preemptible)) > 0 )
keir@18197 1416 continue;
keir@18450 1417
keir@18450 1418 if ( rc == -EAGAIN )
keir@18450 1419 {
keir@18450 1420 page->nr_validated_ptes = i;
keir@18780 1421 page->partial_pte = partial ?: 1;
keir@18450 1422 }
keir@18450 1423 else if ( rc == -EINTR && i )
keir@18450 1424 {
keir@18450 1425 page->nr_validated_ptes = i;
keir@18450 1426 page->partial_pte = 0;
keir@18450 1427 rc = -EAGAIN;
keir@18450 1428 }
keir@18450 1429 if ( rc < 0 )
keir@18450 1430 break;
keir@17638 1431
ack@13295 1432 adjust_guest_l3e(pl3e[i], d);
kaf24@5275 1433 }
kaf24@5275 1434
keir@18450 1435 if ( rc >= 0 && !create_pae_xen_mappings(d, pl3e) )
keir@18450 1436 rc = -EINVAL;
keir@18450 1437 if ( rc < 0 && rc != -EAGAIN && rc != -EINTR )
keir@18197 1438 {
keir@18450 1439 MEM_LOG("Failure in alloc_l3_table: entry %d", i);
keir@18450 1440 while ( i-- > 0 )
keir@18450 1441 {
keir@18450 1442 if ( !is_guest_l3_slot(i) )
keir@18450 1443 continue;
keir@18450 1444 unadjust_guest_l3e(pl3e[i], d);
keir@18780 1445 put_page_from_l3e(pl3e[i], pfn, 0, 0);
keir@18450 1446 }
keir@18197 1447 }
kaf24@3782 1448
kaf24@5394 1449 unmap_domain_page(pl3e);
keir@18450 1450 return rc > 0 ? 0 : rc;
kaf24@3782 1451 }
kaf24@5275 1452
kaf24@5275 1453 #if CONFIG_PAGING_LEVELS >= 4
keir@18450 1454 static int alloc_l4_table(struct page_info *page, int preemptible)
kaf24@3782 1455 {
kaf24@3782 1456 struct domain *d = page_get_owner(page);
kaf24@8764 1457 unsigned long pfn = page_to_mfn(page);
kaf24@3782 1458 l4_pgentry_t *pl4e = page_to_virt(page);
keir@18450 1459 unsigned int i;
keir@18780 1460 int rc = 0, partial = page->partial_pte;
keir@18780 1461
keir@18780 1462 for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
keir@18780 1463 i++, partial = 0 )
kaf24@6094 1464 {
keir@18450 1465 if ( !is_guest_l4_slot(d, i) ||
keir@18780 1466 (rc = get_page_from_l4e(pl4e[i], pfn, d,
keir@18780 1467 partial, preemptible)) > 0 )
keir@18197 1468 continue;
keir@18197 1469
keir@18450 1470 if ( rc == -EAGAIN )
keir@18450 1471 {
keir@18450 1472 page->nr_validated_ptes = i;
keir@18780 1473 page->partial_pte = partial ?: 1;
keir@18450 1474 }
keir@18450 1475 else if ( rc == -EINTR )
keir@18450 1476 {
keir@18450 1477 if ( i )
keir@18450 1478 {
keir@18450 1479 page->nr_validated_ptes = i;
keir@18450 1480 page->partial_pte = 0;
keir@18450 1481 rc = -EAGAIN;
keir@18450 1482 }
keir@18450 1483 }
keir@18450 1484 else if ( rc < 0 )
keir@18450 1485 {
keir@18450 1486 MEM_LOG("Failure in alloc_l4_table: entry %d", i);
keir@18450 1487 while ( i-- > 0 )
keir@18450 1488 if ( is_guest_l4_slot(d, i) )
keir@18780 1489 put_page_from_l4e(pl4e[i], pfn, 0, 0);
keir@18450 1490 }
keir@18450 1491 if ( rc < 0 )
keir@18450 1492 return rc;
kfraser@11264 1493
ack@13295 1494 adjust_guest_l4e(pl4e[i], d);
kaf24@6077 1495 }
kaf24@3782 1496
kaf24@3791 1497 /* Xen private mappings. */
kaf24@3791 1498 memcpy(&pl4e[ROOT_PAGETABLE_FIRST_XEN_SLOT],
kaf24@3791 1499 &idle_pg_table[ROOT_PAGETABLE_FIRST_XEN_SLOT],
kaf24@3791 1500 ROOT_PAGETABLE_XEN_SLOTS * sizeof(l4_pgentry_t));
kaf24@3791 1501 pl4e[l4_table_offset(LINEAR_PT_VIRT_START)] =
kaf24@5288 1502 l4e_from_pfn(pfn, __PAGE_HYPERVISOR);
kaf24@3791 1503 pl4e[l4_table_offset(PERDOMAIN_VIRT_START)] =
ack@13297 1504 l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
ack@13297 1505 __PAGE_HYPERVISOR);
kaf24@3791 1506
keir@18450 1507 return rc > 0 ? 0 : rc;
kaf24@3782 1508 }
kaf24@5399 1509 #else
keir@18450 1510 #define alloc_l4_table(page, preemptible) (-EINVAL)
kaf24@5399 1511 #endif
kaf24@3782 1512
kaf24@3782 1513
kaf24@8764 1514 static void free_l1_table(struct page_info *page)
kaf24@3757 1515 {
kaf24@3757 1516 struct domain *d = page_get_owner(page);
kaf24@8764 1517 unsigned long pfn = page_to_mfn(page);
kaf24@3757 1518 l1_pgentry_t *pl1e;
keir@18450 1519 unsigned int i;
kaf24@3757 1520
kaf24@5394 1521 pl1e = map_domain_page(pfn);
kaf24@3757 1522
kaf24@3775 1523 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
kaf24@3791 1524 if ( is_guest_l1_slot(i) )
kaf24@3791 1525 put_page_from_l1e(pl1e[i], d);
kaf24@3757 1526
kaf24@5394 1527 unmap_domain_page(pl1e);
kaf24@3757 1528 }
kaf24@3757 1529
kaf24@3757 1530
keir@18450 1531 static int free_l2_table(struct page_info *page, int preemptible)
kaf24@3782 1532 {
keir@20341 1533 #ifdef __x86_64__
ack@13295 1534 struct domain *d = page_get_owner(page);
ack@13295 1535 #endif
kaf24@8764 1536 unsigned long pfn = page_to_mfn(page);
kaf24@3782 1537 l2_pgentry_t *pl2e;
keir@18450 1538 unsigned int i = page->nr_validated_ptes - 1;
keir@18450 1539 int err = 0;
kaf24@3782 1540
kaf24@5394 1541 pl2e = map_domain_page(pfn);
kaf24@3782 1542
keir@18450 1543 ASSERT(page->nr_validated_ptes);
keir@18450 1544 do {
keir@18450 1545 if ( is_guest_l2_slot(d, page->u.inuse.type_info, i) &&
keir@18450 1546 put_page_from_l2e(pl2e[i], pfn) == 0 &&
keir@18450 1547 preemptible && i && hypercall_preempt_check() )
keir@18450 1548 {
keir@18450 1549 page->nr_validated_ptes = i;
keir@18450 1550 err = -EAGAIN;
keir@18450 1551 }
keir@18450 1552 } while ( !err && i-- );
kaf24@3782 1553
kaf24@5394 1554 unmap_domain_page(pl2e);
kfraser@11522 1555
keir@18450 1556 if ( !err )
keir@18450 1557 page->u.inuse.type_info &= ~PGT_pae_xen_l2;
keir@18450 1558
keir@18450 1559 return err;
kaf24@3782 1560 }
kaf24@3782 1561
keir@18450 1562 static int free_l3_table(struct page_info *page, int preemptible)
kaf24@3782 1563 {
ack@13295 1564 struct domain *d = page_get_owner(page);
kaf24@8764 1565 unsigned long pfn = page_to_mfn(page);
kaf24@5275 1566 l3_pgentry_t *pl3e;
keir@18780 1567 int rc = 0, partial = page->partial_pte;
keir@18780 1568 unsigned int i = page->nr_validated_ptes - !partial;
kaf24@3782 1569
kaf24@5394 1570 pl3e = map_domain_page(pfn);
kaf24@5275 1571
keir@18450 1572 do {
kaf24@3791 1573 if ( is_guest_l3_slot(i) )
ack@13295 1574 {
keir@18780 1575 rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible);
keir@18780 1576 if ( rc < 0 )
keir@18780 1577 break;
keir@18780 1578 partial = 0;
keir@18450 1579 if ( rc > 0 )
keir@18450 1580 continue;
ack@13295 1581 unadjust_guest_l3e(pl3e[i], d);
ack@13295 1582 }
keir@18450 1583 } while ( i-- );
kaf24@5275 1584
kaf24@5394 1585 unmap_domain_page(pl3e);
keir@18450 1586
keir@18450 1587 if ( rc == -EAGAIN )
keir@18450 1588 {
keir@18450 1589 page->nr_validated_ptes = i;
keir@18780 1590 page->partial_pte = partial ?: -1;
keir@18450 1591 }
keir@18450 1592 else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
keir@18450 1593 {
keir@18450 1594 page->nr_validated_ptes = i + 1;
keir@18450 1595 page->partial_pte = 0;
keir@18450 1596 rc = -EAGAIN;
keir@18450 1597 }
keir@18450 1598 return rc > 0 ? 0 : rc;
kaf24@3782 1599 }
kaf24@3782 1600
kaf24@5275 1601 #if CONFIG_PAGING_LEVELS >= 4
keir@18450 1602 static int free_l4_table(struct page_info *page, int preemptible)
kaf24@3782 1603 {
ack@14033 1604 struct domain *d = page_get_owner(page);
kaf24@8764 1605 unsigned long pfn = page_to_mfn(page);
kaf24@3782 1606 l4_pgentry_t *pl4e = page_to_virt(page);
keir@18780 1607 int rc = 0, partial = page->partial_pte;
keir@18780 1608 unsigned int i = page->nr_validated_ptes - !partial;
kaf24@3782 1609
keir@18450 1610 do {
ack@14033 1611 if ( is_guest_l4_slot(d, i) )
keir@18780 1612 rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible);
keir@18780 1613 if ( rc < 0 )
keir@18780 1614 break;
keir@18780 1615 partial = 0;
keir@18780 1616 } while ( i-- );
keir@18450 1617
keir@18450 1618 if ( rc == -EAGAIN )
keir@18450 1619 {
keir@18450 1620 page->nr_validated_ptes = i;
keir@18780 1621 page->partial_pte = partial ?: -1;
keir@18450 1622 }
keir@18450 1623 else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
keir@18450 1624 {
keir@18450 1625 page->nr_validated_ptes = i + 1;
keir@18450 1626 page->partial_pte = 0;
keir@18450 1627 rc = -EAGAIN;
keir@18450 1628 }
keir@18450 1629 return rc > 0 ? 0 : rc;
kaf24@3782 1630 }
keir@18450 1631 #else
keir@18450 1632 #define free_l4_table(page, preemptible) (-EINVAL)
kaf24@5275 1633 #endif
kaf24@3782 1634
keir@19141 1635 static int page_lock(struct page_info *page)
keir@17884 1636 {
keir@19141 1637 unsigned long x, nx;
keir@19141 1638
keir@19141 1639 do {
keir@19141 1640 while ( (x = page->u.inuse.type_info) & PGT_locked )
keir@17884 1641 cpu_relax();
keir@19141 1642 nx = x + (1 | PGT_locked);
keir@19141 1643 if ( !(x & PGT_validated) ||
keir@19141 1644 !(x & PGT_count_mask) ||
keir@19141 1645 !(nx & PGT_count_mask) )
keir@19141 1646 return 0;
keir@19141 1647 } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
keir@19141 1648
keir@19141 1649 return 1;
keir@17884 1650 }
keir@17884 1651
keir@17884 1652 static void page_unlock(struct page_info *page)
keir@17884 1653 {
keir@19141 1654 unsigned long x, nx, y = page->u.inuse.type_info;
keir@19141 1655
keir@19141 1656 do {
keir@19141 1657 x = y;
keir@19141 1658 nx = x - (1 | PGT_locked);
keir@19141 1659 } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
keir@17884 1660 }
Tim@13143 1661
Tim@13143 1662 /* How to write an entry to the guest pagetables.
Tim@13143 1663 * Returns 0 for failure (pointer not valid), 1 for success. */
Tim@13143 1664 static inline int update_intpte(intpte_t *p,
Tim@13143 1665 intpte_t old,
Tim@13143 1666 intpte_t new,
Tim@13143 1667 unsigned long mfn,
keir@16756 1668 struct vcpu *v,
keir@16756 1669 int preserve_ad)
kaf24@3757 1670 {
tdeegan@11189 1671 int rv = 1;
Tim@13143 1672 #ifndef PTE_UPDATE_WITH_CMPXCHG
keir@16756 1673 if ( !preserve_ad )
keir@16756 1674 {
keir@16756 1675 rv = paging_write_guest_entry(v, p, new, _mfn(mfn));
keir@16756 1676 }
keir@16756 1677 else
keir@16756 1678 #endif
kaf24@3757 1679 {
Tim@13143 1680 intpte_t t = old;
tdeegan@11189 1681 for ( ; ; )
kfraser@10487 1682 {
keir@16756 1683 intpte_t _new = new;
keir@16756 1684 if ( preserve_ad )
keir@16756 1685 _new |= old & (_PAGE_ACCESSED | _PAGE_DIRTY);
keir@16756 1686
keir@16756 1687 rv = paging_cmpxchg_guest_entry(v, p, &t, _new, _mfn(mfn));
Tim@13143 1688 if ( unlikely(rv == 0) )
tdeegan@11189 1689 {
tdeegan@11189 1690 MEM_LOG("Failed to update %" PRIpte " -> %" PRIpte
keir@16756 1691 ": saw %" PRIpte, old, _new, t);
tdeegan@11189 1692 break;
tdeegan@11189 1693 }
tdeegan@11189 1694
Tim@13143 1695 if ( t == old )
tdeegan@11189 1696 break;
tdeegan@11189 1697
tdeegan@11189 1698 /* Allowed to change in Accessed/Dirty flags only. */
Tim@13143 1699 BUG_ON((t ^ old) & ~(intpte_t)(_PAGE_ACCESSED|_PAGE_DIRTY));
Tim@13143 1700
Tim@13143 1701 old = t;
kfraser@10487 1702 }
kaf24@3757 1703 }
tdeegan@11189 1704 return rv;
kaf24@3757 1705 }
kaf24@3757 1706
Tim@13143 1707 /* Macro that wraps the appropriate type-changes around update_intpte().
Tim@13143 1708 * Arguments are: type, ptr, old, new, mfn, vcpu */
keir@16756 1709 #define UPDATE_ENTRY(_t,_p,_o,_n,_m,_v,_ad) \
kfraser@15100 1710 update_intpte(&_t ## e_get_intpte(*(_p)), \
Tim@13143 1711 _t ## e_get_intpte(_o), _t ## e_get_intpte(_n), \
keir@16756 1712 (_m), (_v), (_ad))
kaf24@3757 1713
kaf24@3757 1714 /* Update the L1 entry at pl1e to new value nl1e. */
keir@19421 1715 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
keir@19421 1716 unsigned long gl1mfn, int preserve_ad,
keir@20132 1717 struct vcpu *pt_vcpu, struct domain *pg_dom)
kaf24@3757 1718 {
kaf24@3757 1719 l1_pgentry_t ol1e;
keir@20132 1720 struct domain *pt_dom = pt_vcpu->domain;
kfraser@15212 1721 unsigned long mfn;
keir@18826 1722 p2m_type_t p2mt;
keir@17884 1723 int rc = 1;
keir@17884 1724
mafetter@4629 1725 if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
keir@19141 1726 return 0;
mafetter@4629 1727
keir@20132 1728 if ( unlikely(paging_mode_refcounts(pt_dom)) )
keir@17884 1729 {
keir@20132 1730 rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu, preserve_ad);
keir@17884 1731 return rc;
keir@17884 1732 }
tim@11687 1733
mafetter@4629 1734 if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
kaf24@3757 1735 {
kfraser@12601 1736 /* Translate foreign guest addresses. */
keir@20132 1737 mfn = mfn_x(gfn_to_mfn(pg_dom, l1e_get_pfn(nl1e), &p2mt));
keir@18826 1738 if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) )
keir@19141 1739 return 0;
kfraser@15212 1740 ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
kfraser@15212 1741 nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
kfraser@12601 1742
keir@20132 1743 if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom)) )
kaf24@3791 1744 {
kaf24@6368 1745 MEM_LOG("Bad L1 flags %x",
keir@20132 1746 l1e_get_flags(nl1e) & l1_disallow_mask(pt_dom));
kaf24@3791 1747 return 0;
kaf24@3791 1748 }
kaf24@3791 1749
kaf24@3791 1750 /* Fast path for identical mapping, r/w and presence. */
tdeegan@11189 1751 if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
keir@17099 1752 {
keir@20132 1753 adjust_guest_l1e(nl1e, pt_dom);
keir@20132 1754 rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
keir@17884 1755 preserve_ad);
keir@17884 1756 return rc;
keir@17099 1757 }
kaf24@3757 1758
keir@20132 1759 if ( unlikely(!get_page_from_l1e(nl1e, pt_dom, pg_dom)) )
keir@19141 1760 return 0;
kaf24@3757 1761
keir@20132 1762 adjust_guest_l1e(nl1e, pt_dom);
keir@20132 1763 if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
keir@16756 1764 preserve_ad)) )
kaf24@3757 1765 {
keir@17884 1766 ol1e = nl1e;
keir@17884 1767 rc = 0;
kaf24@3757 1768 }
kaf24@3757 1769 }
keir@20132 1770 else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, pt_vcpu,
keir@17884 1771 preserve_ad)) )
mafetter@4179 1772 {
keir@17884 1773 return 0;
mafetter@4179 1774 }
mafetter@4837 1775
keir@20132 1776 put_page_from_l1e(ol1e, pt_dom);
keir@17884 1777 return rc;
kaf24@3757 1778 }
kaf24@3757 1779
kaf24@3791 1780
kaf24@3791 1781 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */
kaf24@3791 1782 static int mod_l2_entry(l2_pgentry_t *pl2e,
kaf24@3791 1783 l2_pgentry_t nl2e,
kaf24@5275 1784 unsigned long pfn,
keir@19421 1785 int preserve_ad,
keir@19421 1786 struct vcpu *vcpu)
kaf24@3791 1787 {
kaf24@3791 1788 l2_pgentry_t ol2e;
keir@19421 1789 struct domain *d = vcpu->domain;
keir@17884 1790 struct page_info *l2pg = mfn_to_page(pfn);
keir@19141 1791 unsigned long type = l2pg->u.inuse.type_info;
keir@17884 1792 int rc = 1;
ack@13295 1793
ack@13295 1794 if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
kaf24@3791 1795 {
kaf24@3791 1796 MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e);
kaf24@3791 1797 return 0;
kaf24@3791 1798 }
kaf24@3791 1799
mafetter@4629 1800 if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
keir@19141 1801 return 0;
mafetter@4629 1802
mafetter@4629 1803 if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
kaf24@3791 1804 {
mafetter@4629 1805 if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
kaf24@3791 1806 {
kaf24@6368 1807 MEM_LOG("Bad L2 flags %x",
kaf24@5283 1808 l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
kaf24@3791 1809 return 0;
kaf24@3791 1810 }
kaf24@3791 1811
kaf24@3791 1812 /* Fast path for identical mapping and presence. */
keir@17099 1813 if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) )
keir@17099 1814 {
keir@17099 1815 adjust_guest_l2e(nl2e, d);
keir@19421 1816 rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad);
keir@17884 1817 return rc;
keir@17099 1818 }
kaf24@3791 1819
keir@18450 1820 if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
keir@19141 1821 return 0;
kaf24@3791 1822
keir@17099 1823 adjust_guest_l2e(nl2e, d);
keir@19421 1824 if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
keir@16756 1825 preserve_ad)) )
kaf24@3791 1826 {
keir@17884 1827 ol2e = nl2e;
keir@17884 1828 rc = 0;
kaf24@3791 1829 }
kaf24@3791 1830 }
keir@19421 1831 else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
keir@16756 1832 preserve_ad)) )
mafetter@4179 1833 {
kaf24@6094 1834 return 0;
mafetter@4179 1835 }
kaf24@3791 1836
kaf24@3791 1837 put_page_from_l2e(ol2e, pfn);
keir@17884 1838 return rc;
kaf24@3791 1839 }
kaf24@3791 1840
kaf24@3791 1841 /* Update the L3 entry at pl3e to new value nl3e. pl3e is within frame pfn. */
kaf24@3791 1842 static int mod_l3_entry(l3_pgentry_t *pl3e,
kaf24@3791 1843 l3_pgentry_t nl3e,
keir@16756 1844 unsigned long pfn,
keir@18450 1845 int preserve_ad,
keir@19421 1846 int preemptible,
keir@19421 1847 struct vcpu *vcpu)
kaf24@3791 1848 {
kaf24@3791 1849 l3_pgentry_t ol3e;
keir@19421 1850 struct domain *d = vcpu->domain;
keir@18450 1851 int rc = 0;
kaf24@3791 1852
kaf24@3791 1853 if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
kaf24@3791 1854 {
kaf24@3791 1855 MEM_LOG("Illegal L3 update attempt in Xen-private area %p", pl3e);
keir@18450 1856 return -EINVAL;
kaf24@3791 1857 }
kaf24@3791 1858
kaf24@5399 1859 /*
kaf24@5399 1860 * Disallow updates to final L3 slot. It contains Xen mappings, and it
kaf24@5399 1861 * would be a pain to ensure they remain continuously valid throughout.
kaf24@5399 1862 */
kfraser@15012 1863 if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
keir@18450 1864 return -EINVAL;
kaf24@5399 1865
mafetter@4629 1866 if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
keir@19141 1867 return -EFAULT;
mafetter@4629 1868
mafetter@4629 1869 if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
kaf24@3791 1870 {
ack@13295 1871 if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
kaf24@3791 1872 {
kaf24@6368 1873 MEM_LOG("Bad L3 flags %x",
ack@13295 1874 l3e_get_flags(nl3e) & l3_disallow_mask(d));
keir@18450 1875 return -EINVAL;
kaf24@3791 1876 }
kaf24@3791 1877
kaf24@3791 1878 /* Fast path for identical mapping and presence. */
keir@17099 1879 if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) )
keir@17099 1880 {
keir@17099 1881 adjust_guest_l3e(nl3e, d);
keir@19421 1882 rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad);
keir@18450 1883 return rc ? 0 : -EFAULT;
keir@17099 1884 }
kaf24@3791 1885
keir@18780 1886 rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
keir@18450 1887 if ( unlikely(rc < 0) )
keir@19141 1888 return rc;
keir@18450 1889 rc = 0;
tdeegan@11189 1890
keir@17099 1891 adjust_guest_l3e(nl3e, d);
keir@19421 1892 if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu,
keir@16756 1893 preserve_ad)) )
kaf24@3791 1894 {
keir@17884 1895 ol3e = nl3e;
keir@18450 1896 rc = -EFAULT;
kaf24@3791 1897 }
kaf24@3791 1898 }
keir@19421 1899 else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu,
keir@16756 1900 preserve_ad)) )
kaf24@6094 1901 {
keir@18450 1902 return -EFAULT;
kaf24@6094 1903 }
kaf24@6094 1904
keir@18450 1905 if ( likely(rc == 0) )
keir@18197 1906 {
keir@18197 1907 if ( !create_pae_xen_mappings(d, pl3e) )
keir@18197 1908 BUG();
keir@18197 1909
keir@18197 1910 pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
keir@18197 1911 }
kaf24@10211 1912
keir@18780 1913 put_page_from_l3e(ol3e, pfn, 0, 0);
keir@17884 1914 return rc;
kaf24@3791 1915 }
kaf24@3791 1916
kaf24@5275 1917 #if CONFIG_PAGING_LEVELS >= 4
kaf24@3791 1918
kaf24@3791 1919 /* Update the L4 entry at pl4e to new value nl4e. pl4e is within frame pfn. */
keir@16543 1920 static int mod_l4_entry(l4_pgentry_t *pl4e,
kaf24@3791 1921 l4_pgentry_t nl4e,
keir@16756 1922 unsigned long pfn,
keir@18450 1923 int preserve_ad,
keir@19421 1924 int preemptible,
keir@19421 1925 struct vcpu *vcpu)
kaf24@3791 1926 {
keir@19421 1927 struct domain *d = vcpu->domain;
kaf24@3791 1928 l4_pgentry_t ol4e;
keir@18450 1929 int rc = 0;
kaf24@3791 1930
ack@14033 1931 if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
kaf24@3791 1932 {
kaf24@3791 1933 MEM_LOG("Illegal L4 update attempt in Xen-private area %p", pl4e);
keir@18450 1934 return -EINVAL;
kaf24@3791 1935 }
kaf24@3791 1936
mafetter@4629 1937 if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
keir@19141 1938 return -EFAULT;
mafetter@4629 1939
mafetter@4629 1940 if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
kaf24@3791 1941 {
mafetter@4629 1942 if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
kaf24@3791 1943 {
kaf24@6368 1944 MEM_LOG("Bad L4 flags %x",
kaf24@5283 1945 l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
keir@18450 1946 return -EINVAL;
kaf24@3791 1947 }
kaf24@3791 1948
kaf24@3791 1949 /* Fast path for identical mapping and presence. */
keir@17099 1950 if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) )
keir@17099 1951 {
keir@17099 1952 adjust_guest_l4e(nl4e, d);
keir@19421 1953 rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad);
keir@18450 1954 return rc ? 0 : -EFAULT;
keir@17099 1955 }
keir@16543 1956
keir@18780 1957 rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
keir@18450 1958 if ( unlikely(rc < 0) )
keir@19141 1959 return rc;
keir@18450 1960 rc = 0;
kaf24@3791 1961
keir@17099 1962 adjust_guest_l4e(nl4e, d);
keir@19421 1963 if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu,
keir@16756 1964 preserve_ad)) )
kaf24@3791 1965 {
keir@17884 1966 ol4e = nl4e;
keir@18450 1967 rc = -EFAULT;
kaf24@3791 1968 }
kaf24@3791 1969 }
keir@19421 1970 else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu,
keir@16756 1971 preserve_ad)) )
kaf24@6077 1972 {
keir@18450 1973 return -EFAULT;
kaf24@6094 1974 }
kaf24@3791 1975
keir@18780 1976 put_page_from_l4e(ol4e, pfn, 0, 0);
keir@17884 1977 return rc;
kaf24@3791 1978 }
kaf24@3791 1979
kaf24@5275 1980 #endif
kaf24@3791 1981
keir@16530 1982 void put_page(struct page_info *page)
keir@16530 1983 {
keir@19127 1984 unsigned long nx, x, y = page->count_info;
keir@16530 1985
keir@16530 1986 do {
keir@19414 1987 ASSERT((y & PGC_count_mask) != 0);
keir@16530 1988 x = y;
keir@16530 1989 nx = x - 1;
keir@16530 1990 }
keir@16530 1991 while ( unlikely((y = cmpxchg(&page->count_info, x, nx)) != x) );
keir@16530 1992
keir@16530 1993 if ( unlikely((nx & PGC_count_mask) == 0) )
keir@16530 1994 {
keir@16530 1995 cleanup_page_cacheattr(page);
keir@16530 1996 free_domheap_page(page);
keir@16530 1997 }
keir@16530 1998 }
keir@16530 1999
keir@16530 2000
keir@19306 2001 struct domain *page_get_owner_and_reference(struct page_info *page)
keir@16530 2002 {
keir@19127 2003 unsigned long x, y = page->count_info;
keir@16530 2004
keir@16530 2005 do {
keir@19126 2006 x = y;
keir@19412 2007 /*
keir@19412 2008 * Count == 0: Page is not allocated, so we cannot take a reference.
keir@19412 2009 * Count == -1: Reference count would wrap, which is invalid.
keir@19412 2010 * Count == -2: Remaining unused ref is reserved for get_page_light().
keir@19412 2011 */
keir@19412 2012 if ( unlikely(((x + 2) & PGC_count_mask) <= 2) )
keir@19306 2013 return NULL;
keir@16530 2014 }
keir@19126 2015 while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x );
keir@19126 2016
keir@19306 2017 return page_get_owner(page);
keir@19306 2018 }
keir@19306 2019
keir@19306 2020
keir@19306 2021 int get_page(struct page_info *page, struct domain *domain)
keir@19306 2022 {
keir@19306 2023 struct domain *owner = page_get_owner_and_reference(page);
keir@19306 2024
keir@19306 2025 if ( likely(owner == domain) )
keir@19126 2026 return 1;
keir@19126 2027
keir@19412 2028 if ( owner != NULL )
keir@19412 2029 put_page(page);
keir@19126 2030
keir@19126 2031 if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
keir@19126 2032 gdprintk(XENLOG_INFO,
keir@19132 2033 "Error pfn %lx: rd=%p, od=%p, caf=%08lx, taf=%"
keir@19132 2034 PRtype_info "\n",
keir@19306 2035 page_to_mfn(page), domain, owner,
keir@19306 2036 page->count_info, page->u.inuse.type_info);
keir@19126 2037 return 0;
keir@16530 2038 }
keir@16530 2039
keir@18785 2040 /*
keir@18785 2041 * Special version of get_page() to be used exclusively when
keir@18785 2042 * - a page is known to already have a non-zero reference count
keir@18785 2043 * - the page does not need its owner to be checked
keir@18785 2044 * - it will not be called more than once without dropping the thus
keir@18785 2045 * acquired reference again.
keir@18785 2046 * Due to get_page() reserving one reference, this call cannot fail.
keir@18785 2047 */
keir@18785 2048 static void get_page_light(struct page_info *page)
keir@18785 2049 {
keir@19127 2050 unsigned long x, nx, y = page->count_info;
keir@18785 2051
keir@18785 2052 do {
keir@18785 2053 x = y;
keir@18785 2054 nx = x + 1;
keir@18785 2055 BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */
keir@18785 2056 BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */
keir@18785 2057 y = cmpxchg(&page->count_info, x, nx);
keir@18785 2058 }
keir@18785 2059 while ( unlikely(y != x) );
keir@18785 2060 }
keir@18785 2061
keir@18450 2062 static int alloc_page_type(struct page_info *page, unsigned long type,
keir@18450 2063 int preemptible)
kaf24@3757 2064 {
kfraser@11588 2065 struct domain *owner = page_get_owner(page);
keir@18450 2066 int rc;
kfraser@11588 2067
kfraser@11588 2068 /* A page table is dirtied when its type count becomes non-zero. */
kfraser@11588 2069 if ( likely(owner != NULL) )
Tim@15293 2070 paging_mark_dirty(owner, page_to_mfn(page));
kfraser@11588 2071
kaf24@5275 2072 switch ( type & PGT_type_mask )
kaf24@3757 2073 {
kaf24@3757 2074 case PGT_l1_page_table:
keir@18658 2075 rc = alloc_l1_table(page);
keir@18450 2076 break;
kaf24@3757 2077 case PGT_l2_page_table:
keir@18450 2078 rc = alloc_l2_table(page, type, preemptible);
keir@18450 2079 break;
kaf24@3782 2080 case PGT_l3_page_table:
keir@18450 2081 rc = alloc_l3_table(page, preemptible);
keir@18450 2082 break;
kaf24@3782 2083 case PGT_l4_page_table:
keir@18450 2084 rc = alloc_l4_table(page, preemptible);
keir@18450 2085 break;
keir@17425 2086 case PGT_seg_desc_page:
keir@18450 2087 rc = alloc_segdesc_page(page);
keir@18450 2088 break;
kaf24@3757 2089 default:
keir@19127 2090 printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%lx\n",
kaf24@3757 2091 type, page->u.inuse.type_info,
kaf24@3757 2092 page->count_info);
keir@18450 2093 rc = -EINVAL;
kaf24@3757 2094 BUG();
kaf24@3757 2095 }
kaf24@3757 2096
keir@18450 2097 /* No need for atomic update of type_info here: noone else updates it. */
keir@18450 2098 wmb();
keir@18450 2099 if ( rc == -EAGAIN )
keir@18450 2100 {
keir@18785 2101 get_page_light(page);
keir@18450 2102 page->u.inuse.type_info |= PGT_partial;
keir@18450 2103 }
keir@18785 2104 else if ( rc == -EINTR )
keir@18450 2105 {
keir@18450 2106 ASSERT((page->u.inuse.type_info &
keir@18450 2107 (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
keir@18450 2108 page->u.inuse.type_info &= ~PGT_count_mask;
keir@18450 2109 }
keir@18450 2110 else if ( rc )
keir@18450 2111 {
keir@18450 2112 ASSERT(rc < 0);
keir@18450 2113 MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
keir@19127 2114 PRtype_info ": caf=%08lx taf=%" PRtype_info,
keir@18450 2115 page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
keir@18450 2116 type, page->count_info, page->u.inuse.type_info);
keir@18450 2117 page->u.inuse.type_info = 0;
keir@18450 2118 }
keir@18450 2119 else
keir@18450 2120 {
keir@18450 2121 page->u.inuse.type_info |= PGT_validated;
keir@18450 2122 }
keir@18450 2123
keir@18450 2124 return rc;
kaf24@3757 2125 }
kaf24@3757 2126
kaf24@3757 2127
keir@18450 2128 int free_page_type(struct page_info *page, unsigned long type,
keir@18450 2129 int preemptible)
kaf24@3757 2130 {
mafetter@4179 2131 struct domain *owner = page_get_owner(page);
kaf24@8764 2132 unsigned long gmfn;
keir@18450 2133 int rc;
mafetter@4837 2134
keir@19197 2135 if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) )
mafetter@4837 2136 {
keir@19197 2137 /* A page table is dirtied when its type count becomes zero. */
keir@19197 2138 paging_mark_dirty(owner, page_to_mfn(page));
keir@19197 2139
keir@19197 2140 if ( shadow_mode_refcounts(owner) )
keir@19197 2141 return 0;
keir@19197 2142
keir@19197 2143 gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
keir@19197 2144 ASSERT(VALID_M2P(gmfn));
keir@20726 2145 /* Page sharing not supported for shadowed domains */
keir@20726 2146 if(!SHARED_M2P(gmfn))
keir@20726 2147 shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
mafetter@4837 2148 }
kaf24@3757 2149
keir@18450 2150 if ( !(type & PGT_partial) )
keir@18450 2151 {
keir@18450 2152 page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
keir@18450 2153 page->partial_pte = 0;
keir@18450 2154 }
keir@18769 2155
kaf24@7430 2156 switch ( type & PGT_type_mask )
kaf24@3757 2157 {
kaf24@3757 2158 case PGT_l1_page_table:
kaf24@3757 2159 free_l1_table(page);
keir@18450 2160 rc = 0;
kaf24@3757 2161 break;
kaf24@3757 2162 case PGT_l2_page_table:
keir@18450 2163 rc = free_l2_table(page, preemptible);
kaf24@3757 2164 break;
kaf24@3782 2165 case PGT_l3_page_table:
keir@18450 2166 #if CONFIG_PAGING_LEVELS == 3
keir@18450 2167 if ( !(type & PGT_partial) )
keir@18450 2168 page->nr_validated_ptes = L3_PAGETABLE_ENTRIES;
kaf24@5275 2169 #endif
keir@18450 2170 rc = free_l3_table(page, preemptible);
kaf24@3782 2171 break;
keir@18450 2172 case PGT_l4_page_table:
keir@18450 2173 rc = free_l4_table(page, preemptible);
keir@18450 2174 break;
kaf24@3757 2175 default:
keir@18450 2176 MEM_LOG("type %lx pfn %lx\n", type, page_to_mfn(page));
keir@18450 2177 rc = -EINVAL;
kaf24@3757 2178 BUG();
kaf24@3757 2179 }
keir@18450 2180
keir@18769 2181 return rc;
keir@18769 2182 }
keir@18769 2183
keir@18769 2184
keir@18769 2185 static int __put_final_page_type(
keir@18769 2186 struct page_info *page, unsigned long type, int preemptible)
keir@18769 2187 {
keir@18769 2188 int rc = free_page_type(page, type, preemptible);
keir@18769 2189
keir@18450 2190 /* No need for atomic update of type_info here: noone else updates it. */
keir@18450 2191 if ( rc == 0 )
keir@18450 2192 {
keir@18450 2193 /*
keir@18450 2194 * Record TLB information for flush later. We do not stamp page tables
keir@18450 2195 * when running in shadow mode:
keir@18450 2196 * 1. Pointless, since it's the shadow pt's which must be tracked.
keir@18450 2197 * 2. Shadow mode reuses this field for shadowed page tables to
keir@18450 2198 * store flags info -- we don't want to conflict with that.
keir@18450 2199 */
keir@18450 2200 if ( !(shadow_mode_enabled(page_get_owner(page)) &&
keir@18450 2201 (page->count_info & PGC_page_table)) )
keir@18450 2202 page->tlbflush_timestamp = tlbflush_current_time();
keir@18450 2203 wmb();
keir@18450 2204 page->u.inuse.type_info--;
keir@18450 2205 }
keir@18450 2206 else if ( rc == -EINTR )
keir@18450 2207 {
keir@18785 2208 ASSERT((page->u.inuse.type_info &
keir@18785 2209 (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
keir@18450 2210 if ( !(shadow_mode_enabled(page_get_owner(page)) &&
keir@18450 2211 (page->count_info & PGC_page_table)) )
keir@18450 2212 page->tlbflush_timestamp = tlbflush_current_time();
keir@18450 2213 wmb();
keir@18450 2214 page->u.inuse.type_info |= PGT_validated;
keir@18450 2215 }
keir@18450 2216 else
keir@18450 2217 {
keir@18450 2218 BUG_ON(rc != -EAGAIN);
keir@18450 2219 wmb();
keir@18785 2220 get_page_light(page);
keir@18450 2221 page->u.inuse.type_info |= PGT_partial;
keir@18450 2222 }
keir@18450 2223
keir@18450 2224 return rc;
kaf24@3757 2225 }
kaf24@3757 2226
kaf24@3757 2227
keir@18450 2228 static int __put_page_type(struct page_info *page,
keir@18450 2229 int preemptible)
kaf24@3757 2230 {
kaf24@6077 2231 unsigned long nx, x, y = page->u.inuse.type_info;
keir@18785 2232 int rc = 0;
keir@18780 2233
keir@18450 2234 for ( ; ; )
keir@18450 2235 {
kaf24@3757 2236 x = y;
cwc22@4061 2237 nx = x - 1;
kaf24@3757 2238
kaf24@3757 2239 ASSERT((x & PGT_count_mask) != 0);
kaf24@3757 2240
kaf24@3757 2241 if ( unlikely((nx & PGT_count_mask) == 0) )
kaf24@3757 2242 {
kaf24@3757 2243 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) &&
keir@18450 2244 likely(nx & (PGT_validated|PGT_partial)) )
kaf24@3757 2245 {
kaf24@3757 2246 /*
kaf24@3757 2247 * Page-table pages must be unvalidated when count is zero. The
kaf24@3757 2248 * 'free' is safe because the refcnt is non-zero and validated
kaf24@3757 2249 * bit is clear => other ops will spin or fail.
kaf24@3757 2250 */
keir@18450 2251 nx = x & ~(PGT_validated|PGT_partial);
keir@18450 2252 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info,
keir@18450 2253 x, nx)) != x) )
keir@18450 2254 continue;
keir@18785 2255 /* We cleared the 'valid bit' so we do the clean up. */
keir@18785 2256 rc = __put_final_page_type(page, x, preemptible);
keir@18780 2257 if ( x & PGT_partial )
keir@18780 2258 put_page(page);
keir@18785 2259 break;
kaf24@3757 2260 }
tdeegan@11189 2261
kfraser@11588 2262 /*
kfraser@11588 2263 * Record TLB information for flush later. We do not stamp page
kfraser@11588 2264 * tables when running in shadow mode:
kfraser@11588 2265 * 1. Pointless, since it's the shadow pt's which must be tracked.
kfraser@11588 2266 * 2. Shadow mode reuses this field for shadowed page tables to
kfraser@11588 2267 * store flags info -- we don't want to conflict with that.
kfraser@11588 2268 */
tim@11686 2269 if ( !(shadow_mode_enabled(page_get_owner(page)) &&
tim@11686 2270 (page->count_info & PGC_page_table)) )
kfraser@11588 2271 page->tlbflush_timestamp = tlbflush_current_time();
kaf24@3757 2272 }
keir@18450 2273
keir@18450 2274 if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
keir@18450 2275 break;
keir@18450 2276
keir@18450 2277 if ( preemptible && hypercall_preempt_check() )
keir@18450 2278 return -EINTR;
kaf24@3757 2279 }
keir@18450 2280
keir@18785 2281 return rc;
kaf24@3757 2282 }
kaf24@3757 2283
kaf24@3757 2284
keir@18450 2285 static int __get_page_type(struct page_info *page, unsigned long type,
keir@18450 2286 int preemptible)
kaf24@3757 2287 {
kaf24@6077 2288 unsigned long nx, x, y = page->u.inuse.type_info;
keir@18785 2289 int rc = 0;
kaf24@3757 2290
kfraser@11522 2291 ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
kfraser@11522 2292
keir@18450 2293 for ( ; ; )
keir@18450 2294 {
kaf24@3757 2295 x = y;
kaf24@3757 2296 nx = x + 1;
kaf24@3757 2297 if ( unlikely((nx & PGT_count_mask) == 0) )
kaf24@3757 2298 {
kaf24@8764 2299 MEM_LOG("Type count overflow on pfn %lx", page_to_mfn(page));
keir@18450 2300 return -EINVAL;
kaf24@3757 2301 }
kaf24@3757 2302 else if ( unlikely((x & PGT_count_mask) == 0) )
kaf24@3757 2303 {
tim@11686 2304 struct domain *d = page_get_owner(page);
tim@11686 2305
keir@17927 2306 /* Normally we should never let a page go from type count 0
keir@17927 2307 * to type count 1 when it is shadowed. One exception:
keir@17927 2308 * out-of-sync shadowed pages are allowed to become
keir@17927 2309 * writeable. */
keir@17927 2310 if ( d && shadow_mode_enabled(d)
keir@17927 2311 && (page->count_info & PGC_page_table)
keir@17927 2312 && !((page->shadow_flags & (1u<<29))
keir@17927 2313 && type == PGT_writable_page) )
keir@17927 2314 shadow_remove_all_shadows(d->vcpu[0], _mfn(page_to_mfn(page)));
tim@11686 2315
kfraser@11522 2316 ASSERT(!(x & PGT_pae_xen_l2));
kfraser@11522 2317 if ( (x & PGT_type_mask) != type )
kaf24@3757 2318 {
kfraser@11522 2319 /*
kfraser@11522 2320 * On type change we check to flush stale TLB entries. This
kfraser@11522 2321 * may be unnecessary (e.g., page was GDT/LDT) but those
kfraser@11522 2322 * circumstances should be very rare.
kfraser@11522 2323 */
tim@11686 2324 cpumask_t mask = d->domain_dirty_cpumask;
tim@11686 2325
tim@11686 2326 /* Don't flush if the timestamp is old enough */
kfraser@11522 2327 tlbflush_filter(mask, page->tlbflush_timestamp);
kfraser@11522 2328
kfraser@11588 2329 if ( unlikely(!cpus_empty(mask)) &&
kfraser@11588 2330 /* Shadow mode: track only writable pages. */
kfraser@11588 2331 (!shadow_mode_enabled(page_get_owner(page)) ||
kfraser@11588 2332 ((nx & PGT_type_mask) == PGT_writable_page)) )
kaf24@3757 2333 {
kfraser@14625 2334 perfc_incr(need_flush_tlb_flush);
keir@19689 2335 flush_tlb_mask(&mask);
kaf24@3757 2336 }
kaf24@3757 2337
keir@16732 2338 /* We lose existing type and validity. */
kfraser@11522 2339 nx &= ~(PGT_type_mask | PGT_validated);
kaf24@3757 2340 nx |= type;
kaf24@3757 2341
kaf24@3757 2342 /* No special validation needed for writable pages. */
kaf24@3757 2343 /* Page tables and GDT/LDT need to be scanned for validity. */
kaf24@3757 2344 if ( type == PGT_writable_page )
kaf24@3757 2345 nx |= PGT_validated;
kaf24@3757 2346 }
kaf24@3757 2347 }
kfraser@11522 2348 else if ( unlikely((x & (PGT_type_mask|PGT_pae_xen_l2)) != type) )
mafetter@4179 2349 {
keir@16162 2350 /* Don't log failure if it could be a recursive-mapping attempt. */
keir@16162 2351 if ( ((x & PGT_type_mask) == PGT_l2_page_table) &&
keir@16162 2352 (type == PGT_l1_page_table) )
keir@18450 2353 return -EINVAL;
keir@16162 2354 if ( ((x & PGT_type_mask) == PGT_l3_page_table) &&
keir@16162 2355 (type == PGT_l2_page_table) )
keir@18450 2356 return -EINVAL;
keir@16162 2357 if ( ((x & PGT_type_mask) == PGT_l4_page_table) &&
keir@16162 2358 (type == PGT_l3_page_table) )
keir@18450 2359 return -EINVAL;
keir@16162 2360 MEM_LOG("Bad type (saw %" PRtype_info " != exp %" PRtype_info ") "
keir@16162 2361 "for mfn %lx (pfn %lx)",
keir@16162 2362 x, type, page_to_mfn(page),
keir@16162 2363 get_gpfn_from_mfn(page_to_mfn(page)));
keir@18450 2364 return -EINVAL;
kfraser@11522 2365 }
kfraser@11522 2366 else if ( unlikely(!(x & PGT_validated)) )
kfraser@11522 2367 {
keir@18450 2368 if ( !(x & PGT_partial) )
keir@18450 2369 {
keir@18450 2370 /* Someone else is updating validation of this page. Wait... */
keir@18450 2371 while ( (y = page->u.inuse.type_info) == x )
keir@18450 2372 {
keir@18450 2373 if ( preemptible && hypercall_preempt_check() )
keir@18450 2374 return -EINTR;
keir@18450 2375 cpu_relax();
keir@18450 2376 }
keir@18450 2377 continue;
keir@18450 2378 }
keir@18450 2379 /* Type ref count was left at 1 when PGT_partial got set. */
keir@18450 2380 ASSERT((x & PGT_count_mask) == 1);
keir@18450 2381 nx = x & ~PGT_partial;
kaf24@3757 2382 }
keir@18450 2383
keir@18450 2384 if ( likely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) == x) )
keir@18450 2385 break;
keir@18450 2386
keir@18450 2387 if ( preemptible && hypercall_preempt_check() )
keir@18450 2388 return -EINTR;
kaf24@3757 2389 }
kaf24@3757 2390
keir@17761 2391 if ( unlikely((x & PGT_type_mask) != type) )
keir@17761 2392 {
keir@17761 2393 /* Special pages should not be accessible from devices. */
keir@17761 2394 struct domain *d = page_get_owner(page);
keir@20793 2395 if ( d && !is_hvm_domain(d) && unlikely(need_iommu(d)) )
keir@17761 2396 {
keir@17761 2397 if ( (x & PGT_type_mask) == PGT_writable_page )
keir@17761 2398 iommu_unmap_page(d, mfn_to_gmfn(d, page_to_mfn(page)));
keir@17761 2399 else if ( type == PGT_writable_page )
keir@17761 2400 iommu_map_page(d, mfn_to_gmfn(d, page_to_mfn(page)),
keir@17761 2401 page_to_mfn(page));
keir@17761 2402 }
keir@17761 2403 }
keir@17761 2404
kaf24@3757 2405 if ( unlikely(!(nx & PGT_validated)) )
kaf24@3757 2406 {
keir@18450 2407 if ( !(x & PGT_partial) )
kaf24@3757 2408 {
keir@18450 2409 page->nr_validated_ptes = 0;
keir@18450 2410 page->partial_pte = 0;
kaf24@3757 2411 }
keir@18785 2412 rc = alloc_page_type(page, type, preemptible);
kaf24@3757 2413 }
kaf24@3757 2414
keir@18785 2415 if ( (x & PGT_partial) && !(nx & PGT_partial) )
keir@18785 2416 put_page(page);
keir@18785 2417
keir@18785 2418 return rc;
keir@18450 2419 }
keir@18450 2420
keir@18450 2421 void put_page_type(struct page_info *page)
keir@18450 2422 {
keir@18450 2423 int rc = __put_page_type(page, 0);
keir@18450 2424 ASSERT(rc == 0);
keir@18450 2425 (void)rc;
kaf24@3757 2426 }
kaf24@3757 2427
keir@18450 2428 int get_page_type(struct page_info *page, unsigned long type)
keir@18450 2429 {
keir@18450 2430 int rc = __get_page_type(page, type, 0);
keir@18450 2431 if ( likely(rc == 0) )
keir@18450 2432 return 1;
keir@18450 2433 ASSERT(rc == -EINVAL);
keir@18450 2434 return 0;
keir@18450 2435 }
keir@18450 2436
keir@18450 2437 int put_page_type_preemptible(struct page_info *page)
keir@18450 2438 {
keir@18450 2439 return __put_page_type(page, 1);
keir@18450 2440 }
keir@18450 2441
keir@18450 2442 int get_page_type_preemptible(struct page_info *page, unsigned long type)
keir@18450 2443 {
keir@18450 2444 return __get_page_type(page, type, 1);
keir@18450 2445 }
kaf24@3757 2446
keir@16369 2447 void cleanup_page_cacheattr(struct page_info *page)
keir@16369 2448 {
keir@19642 2449 uint32_t cacheattr =
keir@19642 2450 (page->count_info & PGC_cacheattr_mask) >> PGC_cacheattr_base;
keir@16369 2451
keir@16369 2452 if ( likely(cacheattr == 0) )
keir@16369 2453 return;
keir@16369 2454
keir@16369 2455 page->count_info &= ~PGC_cacheattr_mask;
keir@16369 2456
keir@16376 2457 BUG_ON(is_xen_heap_page(page));
keir@16369 2458
keir@19670 2459 update_xen_mappings(page_to_mfn(page), 0);
keir@16369 2460 }
keir@16369 2461
keir@16369 2462
mafetter@4179 2463 int new_guest_cr3(unsigned long mfn)
kaf24@3757 2464 {
keir@19198 2465 struct vcpu *curr = current;
keir@19198 2466 struct domain *d = curr->domain;
mafetter@4179 2467 int okay;
mafetter@4179 2468 unsigned long old_base_mfn;
mafetter@4179 2469
keir@20341 2470 #ifdef __x86_64__
kfraser@15012 2471 if ( is_pv_32on64_domain(d) )
ack@13296 2472 {
Tim@13938 2473 okay = paging_mode_refcounts(d)
kaf24@13441 2474 ? 0 /* Old code was broken, but what should it be? */
ack@14033 2475 : mod_l4_entry(
keir@19198 2476 __va(pagetable_get_paddr(curr->arch.guest_table)),
ack@14033 2477 l4e_from_pfn(
ack@14033 2478 mfn,
ack@14033 2479 (_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
keir@19421 2480 pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0;
ack@13296 2481 if ( unlikely(!okay) )
ack@13296 2482 {
ack@13296 2483 MEM_LOG("Error while installing new compat baseptr %lx", mfn);
ack@13296 2484 return 0;
ack@13296 2485 }
ack@13296 2486
keir@19199 2487 invalidate_shadow_ldt(curr, 0);
keir@19198 2488 write_ptbase(curr);
ack@13296 2489
ack@13296 2490 return 1;
ack@13296 2491 }
ack@13296 2492 #endif
Tim@13938 2493 okay = paging_mode_refcounts(d)
kaf24@13441 2494 ? get_page_from_pagenr(mfn, d)
keir@18780 2495 : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
kaf24@13441 2496 if ( unlikely(!okay) )
kaf24@3757 2497 {
kaf24@13441 2498 MEM_LOG("Error while installing new baseptr %lx", mfn);
kaf24@13441 2499 return 0;
kaf24@9037 2500 }
kaf24@9037 2501
keir@19199 2502 invalidate_shadow_ldt(curr, 0);
keir@19198 2503
keir@19198 2504 old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
keir@19198 2505
keir@19198 2506 curr->arch.guest_table = pagetable_from_pfn(mfn);
keir@19198 2507 update_cr3(curr);
keir@19198 2508
keir@19198 2509 write_ptbase(curr);
kaf24@9037 2510
kaf24@9037 2511 if ( likely(old_base_mfn != 0) )
kaf24@9037 2512 {
Tim@13938 2513 if ( paging_mode_refcounts(d) )
kaf24@8764 2514 put_page(mfn_to_page(old_base_mfn));
mafetter@4179 2515 else
kaf24@8764 2516 put_page_and_type(mfn_to_page(old_base_mfn));
kaf24@3757 2517 }
kaf24@9037 2518
kaf24@9037 2519 return 1;
kaf24@3757 2520 }
kaf24@3757 2521
keir@20132 2522 static struct domain *get_pg_owner(domid_t domid)
kaf24@3757 2523 {
keir@20132 2524 struct domain *pg_owner = NULL, *curr = current->domain;
kaf24@8830 2525
kaf24@8830 2526 if ( likely(domid == DOMID_SELF) )
keir@20132 2527 {
keir@20132 2528 pg_owner = rcu_lock_domain(curr);
kaf24@4426 2529 goto out;
keir@20132 2530 }
keir@20132 2531
keir@20132 2532 if ( unlikely(domid == curr->domain_id) )
kaf24@8830 2533 {
keir@16612 2534 MEM_LOG("Cannot specify itself as foreign domain");
keir@20132 2535 goto out;
kaf24@8830 2536 }
keir@20132 2537
keir@20132 2538 if ( unlikely(paging_mode_translate(curr)) )
kfraser@12601 2539 {
kfraser@12601 2540 MEM_LOG("Cannot mix foreign mappings with translated domains");
keir@20132 2541 goto out;
kfraser@12601 2542 }
keir@20132 2543
keir@20132 2544 switch ( domid )
kaf24@4426 2545 {
keir@16894 2546 case DOMID_IO:
keir@20132 2547 pg_owner = rcu_lock_domain(dom_io);
keir@16894 2548 break;
keir@16894 2549 case DOMID_XEN:
keir@20132 2550 if ( !IS_PRIV(curr) )
keir@20132 2551 {
keir@16612 2552 MEM_LOG("Cannot set foreign dom");
kaf24@3757 2553 break;
kaf24@3757 2554 }
keir@20132 2555 pg_owner = rcu_lock_domain(dom_xen);
keir@16894 2556 break;
keir@16894 2557 default:
keir@20132 2558 if ( (pg_owner = rcu_lock_domain_by_id(domid)) == NULL )
kaf24@3757 2559 {
keir@16894 2560 MEM_LOG("Unknown domain '%u'", domid);
keir@16894 2561 break;
kaf24@3757 2562 }
keir@20132 2563 if ( !IS_PRIV_FOR(curr, pg_owner) )
keir@17357 2564 {
keir@16894 2565 MEM_LOG("Cannot set foreign dom");
keir@20132 2566 rcu_unlock_domain(pg_owner);
keir@20132 2567 pg_owner = NULL;
keir@16894 2568 }
keir@16894 2569 break;
kaf24@4426 2570 }
kaf24@4426 2571
kaf24@4426 2572 out:
keir@20132 2573 return pg_owner;
keir@20132 2574 }
keir@20132 2575
keir@20132 2576 static void put_pg_owner(struct domain *pg_owner)
keir@20132 2577 {
keir@20132 2578 rcu_unlock_domain(pg_owner);
kaf24@4426 2579 }
kaf24@4426 2580
keir@19824 2581 static inline int vcpumask_to_pcpumask(
keir@19824 2582 struct domain *d, XEN_GUEST_HANDLE(const_void) bmap, cpumask_t *pmask)
kaf24@4459 2583 {
keir@19824 2584 unsigned int vcpu_id, vcpu_bias, offs;
keir@19824 2585 unsigned long vmask;
kaf24@5327 2586 struct vcpu *v;
keir@19824 2587 bool_t is_native = !is_pv_32on64_domain(d);
keir@19824 2588
keir@19824 2589 cpus_clear(*pmask);
keir@19824 2590 for ( vmask = 0, offs = 0; ; ++offs)
kaf24@4459 2591 {
keir@19824 2592 vcpu_bias = offs * (is_native ? BITS_PER_LONG : 32);
keir@19826 2593 if ( vcpu_bias >= d->max_vcpus )
keir@19824 2594 return 0;
keir@19824 2595
keir@19824 2596 if ( unlikely(is_native ?
keir@19824 2597 copy_from_guest_offset(&vmask, bmap, offs, 1) :
keir@19824 2598 copy_from_guest_offset((unsigned int *)&vmask, bmap,
keir@19824 2599 offs, 1)) )
keir@19824 2600 {
keir@19824 2601 cpus_clear(*pmask);
keir@19824 2602 return -EFAULT;
keir@19824 2603 }
keir@19824 2604
keir@19824 2605 while ( vmask )
keir@19824 2606 {
keir@19824 2607 vcpu_id = find_first_set_bit(vmask);
keir@19824 2608 vmask &= ~(1UL << vcpu_id);
keir@19824 2609 vcpu_id += vcpu_bias;
keir@19826 2610 if ( (vcpu_id >= d->max_vcpus) )
keir@19824 2611 return 0;
keir@19824 2612 if ( ((v = d->vcpu[vcpu_id]) != NULL) )
keir@19824 2613 cpus_or(*pmask, *pmask, v->vcpu_dirty_cpumask);
keir@19824 2614 }
kaf24@4459 2615 }
kaf24@4459 2616 }
kaf24@4459 2617
keir@18762 2618 #ifdef __i386__
keir@18762 2619 static inline void *fixmap_domain_page(unsigned long mfn)
keir@18762 2620 {
keir@18762 2621 unsigned int cpu = smp_processor_id();
keir@18762 2622 void *ptr = (void *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
keir@18762 2623
keir@18762 2624 l1e_write(fix_pae_highmem_pl1e - cpu,
keir@18762 2625 l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
keir@18762 2626 flush_tlb_one_local(ptr);
keir@18762 2627 return ptr;
keir@18762 2628 }
keir@18762 2629 static inline void fixunmap_domain_page(const void *ptr)
keir@18762 2630 {
keir@18762 2631 unsigned int cpu = virt_to_fix((unsigned long)ptr) - FIX_PAE_HIGHMEM_0;
keir@18762 2632
keir@18762 2633 l1e_write(fix_pae_highmem_pl1e - cpu, l1e_empty());
keir@18762 2634 this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
keir@18762 2635 }
keir@18762 2636 #else
keir@18762 2637 #define fixmap_domain_page(mfn) mfn_to_virt(mfn)
keir@18762 2638 #define fixunmap_domain_page(ptr) ((void)(ptr))
keir@18762 2639 #endif
keir@18762 2640
kaf24@4426 2641 int do_mmuext_op(
kaf24@9904 2642 XEN_GUEST_HANDLE(mmuext_op_t) uops,
kaf24@4426 2643 unsigned int count,
kaf24@9904 2644 XEN_GUEST_HANDLE(uint) pdone,
kaf24@4426 2645 unsigned int foreigndom)
kaf24@4426 2646 {
kaf24@4426 2647 struct mmuext_op op;
kaf24@11019 2648 int rc = 0, i = 0, okay;
keir@20769 2649 unsigned long type;
kaf24@9197 2650 unsigned int done = 0;
keir@19198 2651 struct vcpu *curr = current;
keir@19198 2652 struct domain *d = curr->domain;
keir@20132 2653 struct domain *pg_owner;
kaf24@4426 2654
kaf24@4426 2655 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
kaf24@4426 2656 {
kaf24@4426 2657 count &= ~MMU_UPDATE_PREEMPTED;
kaf24@9197 2658 if ( unlikely(!guest_handle_is_null(pdone)) )
kaf24@9197 2659 (void)copy_from_guest(&done, pdone, 1);
kaf24@4426 2660 }
kfraser@14624 2661 else
kfraser@14624 2662 perfc_incr(calls_to_mmuext_op);
kaf24@4426 2663
kfraser@14181 2664 if ( unlikely(!guest_handle_okay(uops, count)) )
kfraser@14181 2665 {
kfraser@14181 2666 rc = -EFAULT;
kfraser@14181 2667 goto out;
kfraser@14181 2668 }
kfraser@14181 2669
keir@20132 2670 if ( (pg_owner = get_pg_owner(foreigndom)) == NULL )
kaf24@4426 2671 {
kaf24@8830 2672 rc = -ESRCH;
kaf24@4426 2673 goto out;
kaf24@4426 2674 }
kaf24@4426 2675
kaf24@4426 2676 for ( i = 0; i < count; i++ )
kaf24@4426 2677 {
kaf24@4426 2678 if ( hypercall_preempt_check() )
kaf24@3757 2679 {
keir@18450 2680 rc = -EAGAIN;
kaf24@4426 2681 break;
kaf24@3757 2682 }
kaf24@4426 2683
kaf24@9197 2684 if ( unlikely(__copy_from_guest(&op, uops, 1) != 0) )
kaf24@3757 2685 {
kaf24@9197 2686 MEM_LOG("Bad __copy_from_guest");
kaf24@4426 2687 rc = -EFAULT;
kaf24@3757 2688 break;
kaf24@3757 2689 }
kaf24@3757 2690
keir@19970 2691 okay = 1;
kaf24@4426 2692
kaf24@4426 2693 switch ( op.cmd )
kaf24@4426 2694 {
kaf24@4426 2695 case MMUEXT_PIN_L1_TABLE:
kfraser@11522 2696 type = PGT_l1_page_table;
kaf24@9263 2697 goto pin_page;
kaf24@9263 2698
kaf24@9255 2699 case MMUEXT_PIN_L2_TABLE:
kfraser@11522 2700 type = PGT_l2_page_table;
kfraser@11522 2701 goto pin_page;
kfraser@11522 2702
kaf24@9255 2703 case MMUEXT_PIN_L3_TABLE:
kfraser@11522 2704 type = PGT_l3_page_table;
kfraser@11522 2705 goto pin_page;
kfraser@11522 2706
kaf24@9255 2707 case MMUEXT_PIN_L4_TABLE:
keir@20132 2708 if ( is_pv_32bit_domain(pg_owner) )
ack@13298 2709 break;
kfraser@11522 2710 type = PGT_l4_page_table;
kaf24@9255 2711
keir@20769 2712 pin_page: {
keir@20769 2713 unsigned long mfn;
keir@20769 2714 struct page_info *page;
kfraser@15846 2715
kfraser@11522 2716 /* Ignore pinning of invalid paging levels. */
kfraser@11522 2717 if ( (op.cmd - MMUEXT_PIN_L1_TABLE) > (CONFIG_PAGING_LEVELS - 1) )
kfraser@11522 2718 break;
kfraser@11522 2719
keir@20132 2720 if ( paging_mode_refcounts(pg_owner) )
kaf24@9263 2721 break;
kaf24@9263 2722
keir@20769 2723 mfn = gmfn_to_mfn(pg_owner, op.arg1.mfn);
keir@20132 2724 rc = get_page_and_type_from_pagenr(mfn, type, pg_owner, 0, 1);
keir@18450 2725 okay = !rc;
kaf24@4426 2726 if ( unlikely(!okay) )
kaf24@4426 2727 {
keir@18450 2728 if ( rc == -EINTR )
keir@18450 2729 rc = -EAGAIN;
keir@18450 2730 else if ( rc != -EAGAIN )
keir@18450 2731 MEM_LOG("Error while pinning mfn %lx", mfn);
kaf24@4426 2732 break;
kaf24@4426 2733 }
keir@14888 2734
keir@20769 2735 page = mfn_to_page(mfn);
keir@20769 2736
keir@20769 2737 if ( (rc = xsm_memory_pin_page(d, page)) != 0 )
keir@20769 2738 {
keir@20769 2739 put_page_and_type(page);
keir@20769 2740 okay = 0;
keir@20769 2741 break;
keir@20769 2742 }
keir@20769 2743
kaf24@4426 2744 if ( unlikely(test_and_set_bit(_PGT_pinned,
kaf24@4426 2745 &page->u.inuse.type_info)) )
kaf24@4426 2746 {
kaf24@6841 2747 MEM_LOG("Mfn %lx already pinned", mfn);
kaf24@4426 2748 put_page_and_type(page);
kaf24@4426 2749 okay = 0;
kaf24@4426 2750 break;
kaf24@4426 2751 }
steven@11579 2752
kfraser@11581 2753 /* A page is dirtied when its pin status is set. */
keir@20790 2754 paging_mark_dirty(pg_owner, mfn);
steven@11579 2755
keir@14888 2756 /* We can race domain destruction (domain_relinquish_resources). */
keir@20132 2757 if ( unlikely(pg_owner != d) )
keir@14888 2758 {
keir@14888 2759 int drop_ref;
keir@20132 2760 spin_lock(&pg_owner->page_alloc_lock);
keir@20132 2761 drop_ref = (pg_owner->is_dying &&
keir@14888 2762 test_and_clear_bit(_PGT_pinned,
keir@14888 2763 &page->u.inuse.type_info));
keir@20132 2764 spin_unlock(&pg_owner->page_alloc_lock);
keir@14888 2765 if ( drop_ref )
keir@14888 2766 put_page_and_type(page);
keir@14888 2767 }
kfraser@14225 2768
kaf24@4426 2769 break;
keir@20769 2770 }
keir@20769 2771
keir@20769 2772 case MMUEXT_UNPIN_TABLE: {
keir@20769 2773 unsigned long mfn;
keir@20769 2774 struct page_info *page;
keir@20769 2775
keir@20790 2776 if ( paging_mode_refcounts(pg_owner) )
kaf24@8981 2777 break;
kaf24@8981 2778
keir@20769 2779 mfn = gmfn_to_mfn(pg_owner, op.arg1.mfn);
keir@20790 2780 if ( unlikely(!(okay = get_page_from_pagenr(mfn, pg_owner))) )
kaf24@4426 2781 {
keir@20769 2782 MEM_LOG("Mfn %lx bad domain", mfn);
keir@20769 2783 break;
kaf24@4426 2784 }
keir@20769 2785
keir@20769 2786 page = mfn_to_page(mfn);
keir@20769 2787
keir@20769 2788 if ( !test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
kaf24@4426 2789 {
kaf24@4426 2790 okay = 0;
kaf24@4426 2791 put_page(page);
kaf24@6841 2792 MEM_LOG("Mfn %lx not pinned", mfn);
keir@20769 2793 break;
kaf24@4426 2794 }
keir@20769 2795
keir@20769 2796 put_page_and_type(page);
keir@20769 2797 put_page(page);
keir@20769 2798
keir@20769 2799 /* A page is dirtied when its pin status is cleared. */
keir@20790 2800 paging_mark_dirty(pg_owner, mfn);
keir@20769 2801
kaf24@4426 2802 break;
keir@20769 2803 }
kaf24@4426 2804
kaf24@4426 2805 case MMUEXT_NEW_BASEPTR:
keir@20769 2806 okay = new_guest_cr3(gmfn_to_mfn(d, op.arg1.mfn));
kaf24@4426 2807 break;
kaf24@4426 2808
kaf24@4426 2809 #ifdef __x86_64__
kfraser@14181 2810 case MMUEXT_NEW_USER_BASEPTR: {
keir@20769 2811 unsigned long old_mfn, mfn;
keir@20769 2812
keir@20769 2813 mfn = gmfn_to_mfn(d, op.arg1.mfn);
kfraser@14181 2814 if ( mfn != 0 )
tim@11687 2815 {
Tim@13938 2816 if ( paging_mode_refcounts(d) )
tim@11687 2817 okay = get_page_from_pagenr(mfn, d);
tim@11687 2818 else
keir@18450 2819 okay = !get_page_and_type_from_pagenr(
keir@18780 2820 mfn, PGT_root_page_table, d, 0, 0);
kfraser@14181 2821 if ( unlikely(!okay) )
tim@11687 2822 {
kfraser@14181 2823 MEM_LOG("Error while installing new mfn %lx", mfn);
kfraser@14181 2824 break;
tim@11687 2825 }
kaf24@4426 2826 }
kfraser@14181 2827
keir@19198 2828 old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
keir@19198 2829 curr->arch.guest_table_user = pagetable_from_pfn(mfn);
kfraser@14181 2830
kfraser@14181 2831 if ( old_mfn != 0 )
kfraser@14181 2832 {
kfraser@14181 2833 if ( paging_mode_refcounts(d) )
kfraser@14181 2834 put_page(mfn_to_page(old_mfn));
kfraser@14181 2835 else
kfraser@14181 2836 put_page_and_type(mfn_to_page(old_mfn));
kfraser@14181 2837 }
kfraser@14181 2838
kaf24@4426 2839 break;
kfraser@14181 2840 }
kaf24@4426 2841 #endif
kaf24@4426 2842
kaf24@4426 2843 case MMUEXT_TLB_FLUSH_LOCAL:
keir@20132 2844 flush_tlb_local();
kaf24@4426 2845 break;
kaf24@4426 2846
kaf24@4426 2847 case MMUEXT_INVLPG_LOCAL:
Tim@13938 2848 if ( !paging_mode_enabled(d)
keir@19198 2849 || paging_invlpg(curr, op.arg1.linear_addr) != 0 )
keir@16155 2850 flush_tlb_one_local(op.arg1.linear_addr);
kaf24@4426 2851 break;
kaf24@4426 2852
kaf24@4426 2853 case MMUEXT_TLB_FLUSH_MULTI:
kaf24@4441 2854 case MMUEXT_INVLPG_MULTI:
kaf24@4441 2855 {
keir@20132 2856 cpumask_t pmask;
keir@19824 2857
keir@19824 2858 if ( unlikely(vcpumask_to_pcpumask(d, op.arg2.vcpumask, &pmask)) )
kaf24@4441 2859 {
kaf24@4459 2860 okay = 0;
kaf24@4459 2861 break;
kaf24@4441 2862 }
kaf24@4441 2863 if ( op.cmd == MMUEXT_TLB_FLUSH_MULTI )
keir@19689 2864 flush_tlb_mask(&pmask);
kaf24@4441 2865 else
keir@19689 2866 flush_tlb_one_mask(&pmask, op.arg1.linear_addr);
kaf24@4426 2867 break;
kaf24@4441 2868 }
kaf24@4426 2869
kaf24@4426 2870 case MMUEXT_TLB_FLUSH_ALL:
keir@20132 2871 flush_tlb_mask(&d->domain_dirty_cpumask);
kaf24@4426 2872 break;
kaf24@4426 2873
kaf24@4426 2874 case MMUEXT_INVLPG_ALL:
keir@19689 2875 flush_tlb_one_mask(&d->domain_dirty_cpumask, op.arg1.linear_addr);
kaf24@4426 2876 break;
kaf24@4426 2877
kaf24@4426 2878 case MMUEXT_FLUSH_CACHE:
kaf24@8498 2879 if ( unlikely(!cache_flush_permitted(d)) )
kaf24@4426 2880 {
kaf24@6368 2881 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.");
kaf24@4426 2882 okay = 0;
kaf24@3757 2883 }
kaf24@4426 2884 else
kaf24@4426 2885 {
kaf24@4426 2886 wbinvd();
kaf24@4426 2887 }
kaf24@4426 2888 break;
kaf24@4426 2889
kaf24@4426 2890 case MMUEXT_SET_LDT:
kaf24@3757 2891 {
kaf24@8011 2892 unsigned long ptr = op.arg1.linear_addr;
kaf24@8011 2893 unsigned long ents = op.arg2.nr_ents;
kaf24@8011 2894
Tim@13938 2895 if ( paging_mode_external(d) )
mafetter@4502 2896 {
keir@16612 2897 MEM_LOG("ignoring SET_LDT hypercall from external domain");
mafetter@4502 2898 okay = 0;
mafetter@4502 2899 }
kaf24@8011 2900 else if ( ((ptr & (PAGE_SIZE-1)) != 0) ||
kaf24@8011 2901 (ents > 8192) ||
kaf24@8011 2902 !array_access_ok(ptr, ents, LDT_ENTRY_SIZE) )
kaf24@4426 2903 {
kaf24@4426 2904 okay = 0;
kaf24@4692 2905 MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
kaf24@4426 2906 }
keir@19198 2907 else if ( (curr->arch.guest_context.ldt_ents != ents) ||
keir@19198 2908 (curr->arch.guest_context.ldt_base != ptr) )
kaf24@4426 2909 {
keir@19199 2910 invalidate_shadow_ldt(curr, 0);
keir@20132 2911 flush_tlb_local();
keir@19198 2912 curr->arch.guest_context.ldt_base = ptr;
keir@19198 2913 curr->arch.guest_context.ldt_ents = ents;
keir@19198 2914 load_LDT(curr);
kaf24@4426 2915 if ( ents != 0 )
keir@20132 2916 (void)map_ldt_shadow_page(0);
kaf24@4426 2917 }
kaf24@4426 2918 break;
kaf24@4426 2919 }
sos22@8724 2920
keir@20769 2921 case MMUEXT_CLEAR_PAGE: {
keir@20769 2922 unsigned long mfn;
keir@18762 2923 unsigned char *ptr;
keir@18762 2924
keir@20769 2925 mfn = gmfn_to_mfn(d, op.arg1.mfn);
keir@20790 2926 okay = !get_page_and_type_from_pagenr(
keir@20790 2927 mfn, PGT_writable_page, d, 0, 0);
keir@18762 2928 if ( unlikely(!okay) )
keir@18762 2929 {
keir@18762 2930 MEM_LOG("Error while clearing mfn %lx", mfn);
keir@18762 2931 break;
keir@18762 2932 }
keir@18762 2933
keir@18762 2934 /* A page is dirtied when it's being cleared. */
keir@18762 2935 paging_mark_dirty(d, mfn);
keir@18762 2936
keir@18762 2937 ptr = fixmap_domain_page(mfn);
keir@18762 2938 clear_page(ptr);
keir@18762 2939 fixunmap_domain_page(ptr);
keir@18762 2940
keir@20769 2941 put_page_and_type(mfn_to_page(mfn));
keir@18762 2942 break;
keir@18762 2943 }
keir@18762 2944
keir@18762 2945 case MMUEXT_COPY_PAGE:
keir@18762 2946 {
keir@18762 2947 const unsigned char *src;
keir@18762 2948 unsigned char *dst;
keir@20769 2949 unsigned long src_mfn, mfn;
keir@18762 2950
keir@20790 2951 src_mfn = gmfn_to_mfn(d, op.arg2.src_mfn);
keir@20790 2952 okay = get_page_from_pagenr(src_mfn, d);
keir@18762 2953 if ( unlikely(!okay) )
keir@18762 2954 {
keir@18762 2955 MEM_LOG("Error while copying from mfn %lx", src_mfn);
keir@18762 2956 break;
keir@18762 2957 }
keir@18762 2958
keir@20769 2959 mfn = gmfn_to_mfn(d, op.arg1.mfn);
keir@20790 2960 okay = !get_page_and_type_from_pagenr(
keir@20790 2961 mfn, PGT_writable_page, d, 0, 0);
keir@18762 2962 if ( unlikely(!okay) )
keir@18762 2963 {
keir@18762 2964 put_page(mfn_to_page(src_mfn));
keir@18762 2965 MEM_LOG("Error while copying to mfn %lx", mfn);
keir@18762 2966 break;
keir@18762 2967 }
keir@18762 2968
keir@18762 2969 /* A page is dirtied when it's being copied to. */
keir@18762 2970 paging_mark_dirty(d, mfn);
keir@18762 2971
keir@18762 2972 src = map_domain_page(src_mfn);
keir@18762 2973 dst = fixmap_domain_page(mfn);
keir@18762 2974 copy_page(dst, src);
keir@18762 2975 fixunmap_domain_page(dst);
keir@18762 2976 unmap_domain_page(src);
keir@18762 2977
keir@20769 2978 put_page_and_type(mfn_to_page(mfn));
keir@18762 2979 put_page(mfn_to_page(src_mfn));
keir@18762 2980 break;
keir@18762 2981 }
keir@18762 2982
kaf24@4426 2983 default:
kaf24@4692 2984 MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
kfraser@12423 2985 rc = -ENOSYS;
kaf24@3757 2986 okay = 0;
kaf24@3757 2987 break;
kaf24@3757 2988 }
kaf24@3757 2989
kaf24@4426 2990 if ( unlikely(!okay) )
kaf24@3757 2991 {
kfraser@12423 2992 rc = rc ? rc : -EINVAL;
kaf24@3757 2993 break;
kaf24@3757 2994 }
kaf24@3757 2995
kaf24@9197 2996 guest_handle_add_offset(uops, 1);
kaf24@3757 2997 }
kaf24@3757 2998
keir@18450 2999 if ( rc == -EAGAIN )
keir@18450 3000 rc = hypercall_create_continuation(
keir@18450 3001 __HYPERVISOR_mmuext_op, "hihi",
keir@18450 3002 uops, (count - i) | MMU_UPDATE_PREEMPTED, pdone, foreigndom);
keir@18450 3003
keir@20132 3004 put_pg_owner(pg_owner);
kfraser@14181 3005
kfraser@14624 3006 perfc_add(num_mmuext_ops, i);
kfraser@14624 3007
kaf24@4426 3008 out:
kaf24@4426 3009 /* Add incremental work we have done to the @done output parameter. */
kaf24@9197 3010 if ( unlikely(!guest_handle_is_null(pdone)) )
kfraser@12423 3011 {
kfraser@12423 3012 done += i;
kaf24@9197 3013 copy_to_guest(pdone, &done, 1);
kfraser@12423 3014 }
kaf24@4426 3015
kaf24@4426 3016 return rc;
kaf24@3757 3017 }
kaf24@3757 3018
kaf24@3757 3019 int do_mmu_update(
kaf24@9904 3020 XEN_GUEST_HANDLE(mmu_update_t) ureqs,
kaf24@4426 3021 unsigned int count,
kaf24@9904 3022 XEN_GUEST_HANDLE(uint) pdone,
kaf24@4426 3023 unsigned int foreigndom)
kaf24@3757 3024 {
kaf24@8717 3025 struct mmu_update req;
mafetter@4837 3026 void *va;
kaf24@8764 3027 unsigned long gpfn, gmfn, mfn;
kaf24@8764 3028 struct page_info *page;
kaf24@11019 3029 int rc = 0, okay = 1, i = 0;
keir@19946 3030 unsigned int cmd, done = 0, pt_dom;
keir@20132 3031 struct domain *d = current->domain, *pt_owner = d, *pg_owner;
keir@19946 3032 struct vcpu *v = current;
kfraser@15099 3033 struct domain_mmap_cache mapcache;
kaf24@3757 3034
kaf24@4426 3035 if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
kaf24@3757 3036 {
kaf24@3757 3037 count &= ~MMU_UPDATE_PREEMPTED;
kaf24@9197 3038 if ( unlikely(!guest_handle_is_null(pdone)) )
kaf24@9197 3039 (void)copy_from_guest(&done, pdone, 1);
kaf24@4426 3040 }
kfraser@14624 3041 else
kfraser@14624 3042 perfc_incr(calls_to_mmu_update);
kaf24@4426 3043
kfraser@14181 3044 if ( unlikely(!guest_handle_okay(ureqs, count)) )
kfraser@14181 3045 {
kfraser@14181 3046 rc = -EFAULT;
kfraser@14181 3047 goto out;
kfraser@14181 3048 }
kaf24@4847 3049
keir@19946 3050 if ( (pt_dom = foreigndom >> 16) != 0 )
keir@19946 3051 {
keir@19946 3052 /* Pagetables belong to a foreign domain (PFD). */
keir@19946 3053 if ( (pt_owner = rcu_lock_domain_by_id(pt_dom - 1)) == NULL )
keir@19946 3054 {
keir@19946 3055 rc = -EINVAL;
keir@19946 3056 goto out;
keir@19946 3057 }
keir@19946 3058 if ( pt_owner == d )
keir@19946 3059 rcu_unlock_domain(pt_owner);
keir@19946 3060 if ( (v = pt_owner->vcpu ? pt_owner->vcpu[0] : NULL) == NULL )
keir@19946 3061 {
keir@19946 3062 rc = -EINVAL;
keir@19946 3063 goto out;
keir@19946 3064 }
keir@19946 3065 if ( !IS_PRIV_FOR(d, pt_owner) )
keir@19946 3066 {
keir@19946 3067 rc = -ESRCH;
keir@19946 3068 goto out;
keir@19946 3069 }
keir@19946 3070 }
keir@19946 3071
keir@20132 3072 if ( (pg_owner = get_pg_owner((uint16_t)foreigndom)) == NULL )
kaf24@4426 3073 {
kaf24@8830 3074 rc = -ESRCH;
kaf24@4426 3075 goto out;
kaf24@3757 3076 }
kaf24@3757 3077
kfraser@14181 3078 domain_mmap_cache_init(&mapcache);
kfraser@14181 3079
kaf24@3757 3080 for ( i = 0; i < count; i++ )
kaf24@3757 3081 {
kaf24@3757 3082 if ( hypercall_preempt_check() )
kaf24@3757 3083 {
keir@18450 3084 rc = -EAGAIN;
kaf24@3757 3085 break;
kaf24@3757 3086 }
kaf24@3757 3087
kaf24@9197 3088 if ( unlikely(__copy_from_guest(&req, ureqs, 1) != 0) )
kaf24@3757 3089 {
kaf24@9197 3090 MEM_LOG("Bad __copy_from_guest");
kaf24@3757 3091 rc = -EFAULT;
kaf24@3757 3092 break;
kaf24@3757 3093 }
kaf24@3757 3094
kaf24@3757 3095 cmd = req.ptr & (sizeof(l1_pgentry_t)-1);
kaf24@3757 3096 okay = 0;
kaf24@3757 3097
kaf24@3757 3098 switch ( cmd )
kaf24@3757 3099 {
kaf24@3757 3100 /*
kaf24@3757 3101 * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table.
keir@16756 3102 * MMU_UPDATE_PT_PRESERVE_AD: As above but also preserve (OR)
keir@16756 3103 * current A/D bits.
kaf24@3757 3104 */
kaf24@3757 3105 case MMU_NORMAL_PT_UPDATE:
keir@16756 3106 case MMU_PT_UPDATE_PRESERVE_AD:
keir@20718 3107 {
keir@20718 3108 p2m_type_t p2mt;
keir@20718 3109
keir@20132 3110 rc = xsm_mmu_normal_update(d, pg_owner, req.val);
kfraser@15846 3111 if ( rc )
kfraser@15846 3112 break;
kfraser@15846 3113
keir@16756 3114 req.ptr -= cmd;
kaf24@8764 3115 gmfn = req.ptr >> PAGE_SHIFT;
keir@20718 3116 mfn = mfn_x(gfn_to_mfn(pt_owner, gmfn, &p2mt));
keir@20750 3117 if ( !p2m_is_valid(p2mt) )
keir@20750 3118 mfn = INVALID_MFN;
keir@20718 3119
keir@20718 3120 if ( p2m_is_paged(p2mt) )
keir@20718 3121 {
keir@20718 3122 p2m_mem_paging_populate(pg_owner, gmfn);
keir@20718 3123
keir@20718 3124 rc = -ENOENT;
keir@20718 3125 break;
keir@20718 3126 }
keir@19946 3127
keir@19946 3128 if ( unlikely(!get_page_from_pagenr(mfn, pt_owner)) )
kaf24@3757 3129 {
kaf24@3757 3130 MEM_LOG("Could not get page for normal update");
kaf24@3757 3131 break;
kaf24@3757 3132 }
kaf24@3757 3133
kaf24@5394 3134 va = map_domain_page_with_cache(mfn, &mapcache);
kaf24@5709 3135 va = (void *)((unsigned long)va +
kaf24@5709 3136 (unsigned long)(req.ptr & ~PAGE_MASK));
kaf24@8764 3137 page = mfn_to_page(mfn);
mafetter@4837 3138
keir@19141 3139 if ( page_lock(page) )
kfraser@10274 3140 {
keir@19141 3141 switch ( page->u.inuse.type_info & PGT_type_mask )
kaf24@3757 3142 {
kfraser@10274 3143 case PGT_l1_page_table:
kfraser@10274 3144 {
kfraser@10274 3145 l1_pgentry_t l1e = l1e_from_intpte(req.val);
keir@20718 3146 p2m_type_t l1e_p2mt;
keir@20718 3147 gfn_to_mfn(pg_owner, l1e_get_pfn(l1e), &l1e_p2mt);
keir@20718 3148
keir@20718 3149 if ( p2m_is_paged(l1e_p2mt) )
keir@20718 3150 {
keir@20718 3151 p2m_mem_paging_populate(pg_owner, l1e_get_pfn(l1e));
keir@20718 3152
keir@20718 3153 rc = -ENOENT;
keir@20718 3154 break;
keir@20718 3155 }
keir@20718 3156 else if ( p2m_ram_paging_in_start == l1e_p2mt )
keir@20718 3157 {
keir@20718 3158 rc = -ENOENT;
keir@20718 3159 break;
keir@20718 3160 }
keir@20729 3161 /* XXX: Ugly: pull all the checks into a separate function.
keir@20729 3162 * Don't want to do it now, not to interfere with mem_paging
keir@20729 3163 * patches */
keir@20729 3164 else if ( p2m_ram_shared == l1e_p2mt )
keir@20729 3165 {
keir@20729 3166 /* Unshare the page for RW foreign mappings */
keir@20729 3167 if(l1e_get_flags(l1e) & _PAGE_RW)
keir@20729 3168 {
keir@20729 3169 rc = mem_sharing_unshare_page(pg_owner,
keir@20729 3170 l1e_get_pfn(l1e),
keir@20729 3171 0);
keir@20729 3172 if(rc) break;
keir@20729 3173 }
keir@20729 3174 }
keir@20718 3175
keir@16756 3176 okay = mod_l1_entry(va, l1e, mfn,
keir@20132 3177 cmd == MMU_PT_UPDATE_PRESERVE_AD, v,
keir@20132 3178 pg_owner);
kaf24@3757 3179 }
kaf24@3757 3180 break;
kfraser@10274 3181 case PGT_l2_page_table:
kaf24@3757 3182 {
kfraser@10274 3183 l2_pgentry_t l2e = l2e_from_intpte(req.val);
keir@20718 3184 p2m_type_t l2e_p2mt;
keir@20718 3185 gfn_to_mfn(pg_owner, l2e_get_pfn(l2e), &l2e_p2mt);
keir@20718 3186
keir@20718 3187 if ( p2m_is_paged(l2e_p2mt) )
keir@20718 3188 {
keir@20718 3189 p2m_mem_paging_populate(pg_owner, l2e_get_pfn(l2e));
keir@20718 3190
keir@20718 3191 rc = -ENOENT;
keir@20718 3192 break;
keir@20718 3193 }
keir@20718 3194 else if ( p2m_ram_paging_in_start == l2e_p2mt )
keir@20718 3195 {
keir@20718 3196 rc = -ENOENT;
keir@20718 3197 break;
keir@20718 3198 }
keir@20729 3199 else if ( p2m_ram_shared == l2e_p2mt )
keir@20729 3200 {
keir@20729 3201 MEM_LOG("Unexpected attempt to map shared page.\n");
keir@20729 3202 rc = -EINVAL;
keir@20729 3203 break;
keir@20729 3204 }
keir@20729 3205
keir@20718 3206
keir@19141 3207 okay = mod_l2_entry(va, l2e, mfn,
keir@19946 3208 cmd == MMU_PT_UPDATE_PRESERVE_AD, v);
kaf24@3757 3209 }
kaf24@3757 3210 break;
kfraser@10274 3211 case PGT_l3_page_table:
kaf24@3791 3212 {
kfraser@10274 3213 l3_pgentry_t l3e = l3e_from_intpte(req.val);
keir@20718 3214 p2m_type_t l3e_p2mt;
keir@20718 3215 gfn_to_mfn(pg_owner, l3e_get_pfn(l3e), &l3e_p2mt);
keir@20718 3216
keir@20718 3217 if ( p2m_is_paged(l3e_p2mt) )
keir@20718 3218 {
keir@20718 3219 p2m_mem_paging_populate(pg_owner, l3e_get_pfn(l3e));
keir@20718 3220
keir@20718 3221 rc = -ENOENT;
keir@20718 3222 break;
keir@20718 3223 }
keir@20718 3224 else if ( p2m_ram_paging_in_start == l3e_p2mt )
keir@20718 3225 {
keir@20718 3226 rc = -ENOENT;
keir@20718 3227 break;
keir@20718 3228 }
keir@20729 3229 else if ( p2m_ram_shared == l3e_p2mt )
keir@20729 3230 {
keir@20729 3231 MEM_LOG("Unexpected attempt to map shared page.\n");
keir@20729 3232 rc = -EINVAL;
keir@20729 3233 break;
keir@20729 3234 }
keir@20718 3235
keir@18450 3236 rc = mod_l3_entry(va, l3e, mfn,
keir@19946 3237 cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
keir@18450 3238 okay = !rc;
kaf24@3791 3239 }
kaf24@3791 3240 break;
kaf24@5275 3241 #if CONFIG_PAGING_LEVELS >= 4
kfraser@10274 3242 case PGT_l4_page_table:
kfraser@14181 3243 {
kfraser@14181 3244 l4_pgentry_t l4e = l4e_from_intpte(req.val);
keir@20718 3245 p2m_type_t l4e_p2mt;
keir@20718 3246 gfn_to_mfn(pg_owner, l4e_get_pfn(l4e), &l4e_p2mt);
keir@20718 3247
keir@20718 3248 if ( p2m_is_paged(l4e_p2mt) )
keir@20718 3249 {
keir@20718 3250 p2m_mem_paging_populate(pg_owner, l4e_get_pfn(l4e));
keir@20718 3251
keir@20718 3252 rc = -ENOENT;
keir@20718 3253 break;
keir@20718 3254 }
keir@20718 3255 else if ( p2m_ram_paging_in_start == l4e_p2mt )
keir@20718 3256 {
keir@20718 3257 rc = -ENOENT;
keir@20718 3258 break;
keir@20718 3259 }
keir@20729 3260 else if ( p2m_ram_shared == l4e_p2mt )
keir@20729 3261 {
keir@20729 3262 MEM_LOG("Unexpected attempt to map shared page.\n");
keir@20729 3263 rc = -EINVAL;
keir@20729 3264 break;
keir@20729 3265 }
keir@20718 3266
keir@18450 3267 rc = mod_l4_entry(va, l4e, mfn,
keir@19946 3268 cmd == MMU_PT_UPDATE_PRESERVE_AD, 1, v);
keir@18450 3269 okay = !rc;
kfraser@14181 3270 }
kfraser@14181 3271 break;
kaf24@5275 3272 #endif
keir@19141 3273 case PGT_writable_page:
keir@19141 3274 perfc_incr(writable_mmu_updates);
keir@19198 3275 okay = paging_write_guest_entry(
keir@19946 3276 v, va, req.val, _mfn(mfn));
keir@19141 3277 break;
kfraser@10274 3278 }
keir@19141 3279 page_unlock(page);
keir@18450 3280 if ( rc == -EINTR )
keir@18450 3281 rc = -EAGAIN;
kfraser@10274 3282 }
keir@19141 3283 else if ( get_page_type(page, PGT_writable_page) )
kfraser@10274 3284 {
keir@17876 3285 perfc_incr(writable_mmu_updates);
keir@19198 3286 okay = paging_write_guest_entry(
keir@19946 3287 v, va, req.val, _mfn(mfn));
kfraser@10274 3288 put_page_type(page);
kfraser@10274 3289 }