debuggers.hg
annotate xen/arch/x86/memory.c @ 3645:fd1dd0663b09
bitkeeper revision 1.1159.212.68 (42001e4d1AQiGV2pdPTNrs2AU2LjsQ)
Merge pb001.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
into pb001.cl.cam.ac.uk:/auto/groups/xeno/users/iap10/xeno-clone/xen-unstable.bk
Merge pb001.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
into pb001.cl.cam.ac.uk:/auto/groups/xeno/users/iap10/xeno-clone/xen-unstable.bk
author | iap10@pb001.cl.cam.ac.uk |
---|---|
date | Wed Feb 02 00:26:53 2005 +0000 (2005-02-02) |
parents | fec8b1778268 e6af5d8f8b39 |
children | 060c1ea52343 |
rev | line source |
---|---|
djm@1749 | 1 /****************************************************************************** |
djm@1749 | 2 * arch/x86/memory.c |
djm@1749 | 3 * |
djm@1749 | 4 * Copyright (c) 2002-2004 K A Fraser |
cl349@2093 | 5 * Copyright (c) 2004 Christian Limpach |
djm@1749 | 6 * |
djm@1749 | 7 * This program is free software; you can redistribute it and/or modify |
djm@1749 | 8 * it under the terms of the GNU General Public License as published by |
djm@1749 | 9 * the Free Software Foundation; either version 2 of the License, or |
djm@1749 | 10 * (at your option) any later version. |
djm@1749 | 11 * |
djm@1749 | 12 * This program is distributed in the hope that it will be useful, |
djm@1749 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
djm@1749 | 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
djm@1749 | 15 * GNU General Public License for more details. |
djm@1749 | 16 * |
djm@1749 | 17 * You should have received a copy of the GNU General Public License |
djm@1749 | 18 * along with this program; if not, write to the Free Software |
djm@1749 | 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
djm@1749 | 20 */ |
djm@1749 | 21 |
djm@1749 | 22 /* |
djm@1749 | 23 * A description of the x86 page table API: |
djm@1749 | 24 * |
djm@1749 | 25 * Domains trap to do_mmu_update with a list of update requests. |
djm@1749 | 26 * This is a list of (ptr, val) pairs, where the requested operation |
djm@1749 | 27 * is *ptr = val. |
djm@1749 | 28 * |
djm@1749 | 29 * Reference counting of pages: |
djm@1749 | 30 * ---------------------------- |
djm@1749 | 31 * Each page has two refcounts: tot_count and type_count. |
djm@1749 | 32 * |
djm@1749 | 33 * TOT_COUNT is the obvious reference count. It counts all uses of a |
djm@1749 | 34 * physical page frame by a domain, including uses as a page directory, |
djm@1749 | 35 * a page table, or simple mappings via a PTE. This count prevents a |
djm@1749 | 36 * domain from releasing a frame back to the free pool when it still holds |
djm@1749 | 37 * a reference to it. |
djm@1749 | 38 * |
djm@1749 | 39 * TYPE_COUNT is more subtle. A frame can be put to one of three |
djm@1749 | 40 * mutually-exclusive uses: it might be used as a page directory, or a |
kaf24@2375 | 41 * page table, or it may be mapped writable by the domain [of course, a |
djm@1749 | 42 * frame may not be used in any of these three ways!]. |
djm@1749 | 43 * So, type_count is a count of the number of times a frame is being |
djm@1749 | 44 * referred to in its current incarnation. Therefore, a page can only |
djm@1749 | 45 * change its type when its type count is zero. |
djm@1749 | 46 * |
djm@1749 | 47 * Pinning the page type: |
djm@1749 | 48 * ---------------------- |
djm@1749 | 49 * The type of a page can be pinned/unpinned with the commands |
djm@1749 | 50 * MMUEXT_[UN]PIN_L?_TABLE. Each page can be pinned exactly once (that is, |
djm@1749 | 51 * pinning is not reference counted, so it can't be nested). |
djm@1749 | 52 * This is useful to prevent a page's type count falling to zero, at which |
djm@1749 | 53 * point safety checks would need to be carried out next time the count |
djm@1749 | 54 * is increased again. |
djm@1749 | 55 * |
kaf24@2375 | 56 * A further note on writable page mappings: |
kaf24@2375 | 57 * ----------------------------------------- |
kaf24@2375 | 58 * For simplicity, the count of writable mappings for a page may not |
kaf24@2375 | 59 * correspond to reality. The 'writable count' is incremented for every |
djm@1749 | 60 * PTE which maps the page with the _PAGE_RW flag set. However, for |
djm@1749 | 61 * write access to be possible the page directory entry must also have |
djm@1749 | 62 * its _PAGE_RW bit set. We do not check this as it complicates the |
djm@1749 | 63 * reference counting considerably [consider the case of multiple |
djm@1749 | 64 * directory entries referencing a single page table, some with the RW |
djm@1749 | 65 * bit set, others not -- it starts getting a bit messy]. |
djm@1749 | 66 * In normal use, this simplification shouldn't be a problem. |
djm@1749 | 67 * However, the logic can be added if required. |
djm@1749 | 68 * |
djm@1749 | 69 * One more note on read-only page mappings: |
djm@1749 | 70 * ----------------------------------------- |
djm@1749 | 71 * We want domains to be able to map pages for read-only access. The |
djm@1749 | 72 * main reason is that page tables and directories should be readable |
kaf24@2375 | 73 * by a domain, but it would not be safe for them to be writable. |
djm@1749 | 74 * However, domains have free access to rings 1 & 2 of the Intel |
djm@1749 | 75 * privilege model. In terms of page protection, these are considered |
djm@1749 | 76 * to be part of 'supervisor mode'. The WP bit in CR0 controls whether |
djm@1749 | 77 * read-only restrictions are respected in supervisor mode -- if the |
kaf24@2375 | 78 * bit is clear then any mapped page is writable. |
djm@1749 | 79 * |
djm@1749 | 80 * We get round this by always setting the WP bit and disallowing |
djm@1749 | 81 * updates to it. This is very unlikely to cause a problem for guest |
djm@1749 | 82 * OS's, which will generally use the WP bit to simplify copy-on-write |
djm@1749 | 83 * implementation (in that case, OS wants a fault when it writes to |
djm@1749 | 84 * an application-supplied buffer). |
djm@1749 | 85 */ |
djm@1749 | 86 |
djm@1749 | 87 #include <xen/config.h> |
djm@1749 | 88 #include <xen/init.h> |
kaf24@3392 | 89 #include <xen/kernel.h> |
djm@1749 | 90 #include <xen/lib.h> |
djm@1749 | 91 #include <xen/mm.h> |
djm@1749 | 92 #include <xen/sched.h> |
djm@1749 | 93 #include <xen/errno.h> |
djm@1749 | 94 #include <xen/perfc.h> |
djm@1749 | 95 #include <xen/irq.h> |
iap10@2479 | 96 #include <xen/softirq.h> |
kaf24@1787 | 97 #include <asm/shadow.h> |
djm@1749 | 98 #include <asm/page.h> |
djm@1749 | 99 #include <asm/flushtlb.h> |
djm@1749 | 100 #include <asm/io.h> |
djm@1749 | 101 #include <asm/uaccess.h> |
djm@1749 | 102 #include <asm/domain_page.h> |
djm@1749 | 103 #include <asm/ldt.h> |
djm@1749 | 104 |
kaf24@2097 | 105 #ifdef VERBOSE |
djm@1749 | 106 #define MEM_LOG(_f, _a...) \ |
djm@1749 | 107 printk("DOM%u: (file=memory.c, line=%d) " _f "\n", \ |
cl349@2957 | 108 current->domain->id , __LINE__ , ## _a ) |
djm@1749 | 109 #else |
djm@1749 | 110 #define MEM_LOG(_f, _a...) ((void)0) |
djm@1749 | 111 #endif |
djm@1749 | 112 |
djm@1749 | 113 static int alloc_l2_table(struct pfn_info *page); |
djm@1749 | 114 static int alloc_l1_table(struct pfn_info *page); |
djm@1749 | 115 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d); |
djm@1749 | 116 static int get_page_and_type_from_pagenr(unsigned long page_nr, |
djm@1749 | 117 u32 type, |
djm@1749 | 118 struct domain *d); |
djm@1749 | 119 |
djm@1749 | 120 static void free_l2_table(struct pfn_info *page); |
djm@1749 | 121 static void free_l1_table(struct pfn_info *page); |
djm@1749 | 122 |
djm@1749 | 123 static int mod_l2_entry(l2_pgentry_t *, l2_pgentry_t, unsigned long); |
djm@1749 | 124 static int mod_l1_entry(l1_pgentry_t *, l1_pgentry_t); |
djm@1749 | 125 |
djm@1749 | 126 /* Used to defer flushing of memory structures. */ |
djm@1749 | 127 static struct { |
djm@1749 | 128 #define DOP_FLUSH_TLB (1<<0) /* Flush the TLB. */ |
djm@1749 | 129 #define DOP_RELOAD_LDT (1<<1) /* Reload the LDT shadow mapping. */ |
kaf24@3187 | 130 unsigned long deferred_ops; |
kaf24@2314 | 131 /* If non-NULL, specifies a foreign subject domain for some operations. */ |
kaf24@3187 | 132 struct domain *foreign; |
kaf24@3113 | 133 } __cacheline_aligned percpu_info[NR_CPUS]; |
djm@1749 | 134 |
kaf24@2314 | 135 /* |
kaf24@2314 | 136 * Returns the current foreign domain; defaults to the currently-executing |
kaf24@2314 | 137 * domain if a foreign override hasn't been specified. |
kaf24@2314 | 138 */ |
cl349@2957 | 139 #define FOREIGNDOM (percpu_info[smp_processor_id()].foreign ? : current->domain) |
djm@1749 | 140 |
kaf24@2336 | 141 /* Private domain structs for DOMID_XEN and DOMID_IO. */ |
kaf24@2336 | 142 static struct domain *dom_xen, *dom_io; |
cl349@2227 | 143 |
kaf24@3392 | 144 /* Frame table and its size in pages. */ |
kaf24@3392 | 145 struct pfn_info *frame_table; |
kaf24@3392 | 146 unsigned long frame_table_size; |
kaf24@3392 | 147 unsigned long max_page; |
kaf24@3392 | 148 |
kaf24@3392 | 149 void __init init_frametable(void) |
kaf24@3392 | 150 { |
kaf24@3392 | 151 unsigned long i, p; |
kaf24@3392 | 152 |
kaf24@3632 | 153 frame_table = (struct pfn_info *)FRAMETABLE_VIRT_START; |
kaf24@3392 | 154 frame_table_size = max_page * sizeof(struct pfn_info); |
kaf24@3392 | 155 frame_table_size = (frame_table_size + PAGE_SIZE - 1) & PAGE_MASK; |
kaf24@3392 | 156 |
kaf24@3392 | 157 for ( i = 0; i < frame_table_size; i += (4UL << 20) ) |
kaf24@3392 | 158 { |
kaf24@3392 | 159 p = alloc_boot_pages(min(frame_table_size - i, 4UL << 20), 4UL << 20); |
kaf24@3392 | 160 if ( p == 0 ) |
kaf24@3392 | 161 panic("Not enough memory for frame table\n"); |
kaf24@3632 | 162 map_pages(idle_pg_table, FRAMETABLE_VIRT_START + i, p, |
kaf24@3632 | 163 4UL << 20, PAGE_HYPERVISOR); |
kaf24@3392 | 164 } |
kaf24@3392 | 165 |
kaf24@3392 | 166 memset(frame_table, 0, frame_table_size); |
kaf24@3392 | 167 } |
kaf24@3392 | 168 |
cl349@2227 | 169 void arch_init_memory(void) |
djm@1749 | 170 { |
kaf24@3640 | 171 #ifdef __i386__ |
sos22@3478 | 172 unsigned long i; |
kaf24@2336 | 173 |
kaf24@2384 | 174 /* |
kaf24@2384 | 175 * We are rather picky about the layout of 'struct pfn_info'. The |
kaf24@2384 | 176 * count_info and domain fields must be adjacent, as we perform atomic |
kaf24@2384 | 177 * 64-bit operations on them. Also, just for sanity, we assert the size |
kaf24@2384 | 178 * of the structure here. |
kaf24@2384 | 179 */ |
kaf24@2384 | 180 if ( (offsetof(struct pfn_info, u.inuse.domain) != |
kaf24@2384 | 181 (offsetof(struct pfn_info, count_info) + sizeof(u32))) || |
kaf24@2384 | 182 (sizeof(struct pfn_info) != 24) ) |
kaf24@2384 | 183 { |
kaf24@2384 | 184 printk("Weird pfn_info layout (%ld,%ld,%d)\n", |
kaf24@2384 | 185 offsetof(struct pfn_info, count_info), |
kaf24@2384 | 186 offsetof(struct pfn_info, u.inuse.domain), |
kaf24@2384 | 187 sizeof(struct pfn_info)); |
kaf24@2384 | 188 for ( ; ; ) ; |
kaf24@2384 | 189 } |
kaf24@2384 | 190 |
djm@1749 | 191 memset(percpu_info, 0, sizeof(percpu_info)); |
cl349@2227 | 192 |
kaf24@2336 | 193 /* Initialise to a magic of 0x55555555 so easier to spot bugs later. */ |
kaf24@2336 | 194 memset(machine_to_phys_mapping, 0x55, 4<<20); |
kaf24@2336 | 195 |
kaf24@2336 | 196 /* |
kaf24@2336 | 197 * Initialise our DOMID_XEN domain. |
kaf24@2336 | 198 * Any Xen-heap pages that we will allow to be mapped will have |
kaf24@2336 | 199 * their domain field set to dom_xen. |
kaf24@2336 | 200 */ |
kaf24@2336 | 201 dom_xen = alloc_domain_struct(); |
kaf24@2336 | 202 atomic_set(&dom_xen->refcnt, 1); |
kaf24@2748 | 203 dom_xen->id = DOMID_XEN; |
kaf24@2336 | 204 |
kaf24@2336 | 205 /* |
kaf24@2336 | 206 * Initialise our DOMID_IO domain. |
kaf24@2336 | 207 * This domain owns no pages but is considered a special case when |
kaf24@2336 | 208 * mapping I/O pages, as the mappings occur at the priv of the caller. |
kaf24@2336 | 209 */ |
kaf24@2336 | 210 dom_io = alloc_domain_struct(); |
kaf24@2336 | 211 atomic_set(&dom_io->refcnt, 1); |
kaf24@2748 | 212 dom_io->id = DOMID_IO; |
kaf24@2336 | 213 |
kaf24@2336 | 214 /* M2P table is mappable read-only by privileged domains. */ |
kaf24@3392 | 215 for ( i = 0; i < 1024; i++ ) |
kaf24@2336 | 216 { |
sos22@3478 | 217 frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1; |
sos22@3478 | 218 /* gdt to make sure it's only mapped read-only by non-privileged |
sos22@3478 | 219 domains. */ |
sos22@3478 | 220 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1; |
sos22@3478 | 221 frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen; |
kaf24@2336 | 222 } |
kaf24@3640 | 223 #endif |
djm@1749 | 224 } |
djm@1749 | 225 |
cl349@2957 | 226 static void __invalidate_shadow_ldt(struct exec_domain *d) |
djm@1749 | 227 { |
djm@1749 | 228 int i; |
djm@1749 | 229 unsigned long pfn; |
djm@1749 | 230 struct pfn_info *page; |
djm@1749 | 231 |
djm@1749 | 232 d->mm.shadow_ldt_mapcnt = 0; |
djm@1749 | 233 |
djm@1749 | 234 for ( i = 16; i < 32; i++ ) |
djm@1749 | 235 { |
cl349@3036 | 236 pfn = l1_pgentry_to_pagenr(d->mm.perdomain_ptes[i]); |
djm@1749 | 237 if ( pfn == 0 ) continue; |
cl349@3036 | 238 d->mm.perdomain_ptes[i] = mk_l1_pgentry(0); |
djm@1749 | 239 page = &frame_table[pfn]; |
djm@1749 | 240 ASSERT_PAGE_IS_TYPE(page, PGT_ldt_page); |
cl349@3036 | 241 ASSERT_PAGE_IS_DOMAIN(page, d->domain); |
djm@1749 | 242 put_page_and_type(page); |
djm@1749 | 243 } |
djm@1749 | 244 |
djm@1749 | 245 /* Dispose of the (now possibly invalid) mappings from the TLB. */ |
djm@1749 | 246 percpu_info[d->processor].deferred_ops |= DOP_FLUSH_TLB | DOP_RELOAD_LDT; |
djm@1749 | 247 } |
djm@1749 | 248 |
djm@1749 | 249 |
cl349@2957 | 250 static inline void invalidate_shadow_ldt(struct exec_domain *d) |
djm@1749 | 251 { |
djm@1749 | 252 if ( d->mm.shadow_ldt_mapcnt != 0 ) |
djm@1749 | 253 __invalidate_shadow_ldt(d); |
djm@1749 | 254 } |
djm@1749 | 255 |
djm@1749 | 256 |
kaf24@2336 | 257 static int alloc_segdesc_page(struct pfn_info *page) |
djm@1749 | 258 { |
djm@1749 | 259 unsigned long *descs = map_domain_mem((page-frame_table) << PAGE_SHIFT); |
djm@1749 | 260 int i; |
djm@1749 | 261 |
djm@1749 | 262 for ( i = 0; i < 512; i++ ) |
kaf24@1854 | 263 if ( unlikely(!check_descriptor(&descs[i*2])) ) |
djm@1749 | 264 goto fail; |
djm@1749 | 265 |
djm@1749 | 266 unmap_domain_mem(descs); |
djm@1749 | 267 return 1; |
djm@1749 | 268 |
djm@1749 | 269 fail: |
djm@1749 | 270 unmap_domain_mem(descs); |
djm@1749 | 271 return 0; |
djm@1749 | 272 } |
djm@1749 | 273 |
djm@1749 | 274 |
djm@1749 | 275 /* Map shadow page at offset @off. */ |
djm@1749 | 276 int map_ldt_shadow_page(unsigned int off) |
djm@1749 | 277 { |
cl349@2957 | 278 struct exec_domain *ed = current; |
cl349@2957 | 279 struct domain *d = ed->domain; |
djm@1749 | 280 unsigned long l1e; |
djm@1749 | 281 |
djm@1749 | 282 if ( unlikely(in_irq()) ) |
djm@1749 | 283 BUG(); |
djm@1749 | 284 |
cl349@2957 | 285 __get_user(l1e, (unsigned long *)&linear_pg_table[(ed->mm.ldt_base >> |
djm@1749 | 286 PAGE_SHIFT) + off]); |
djm@1749 | 287 |
djm@1749 | 288 if ( unlikely(!(l1e & _PAGE_PRESENT)) || |
djm@1749 | 289 unlikely(!get_page_and_type(&frame_table[l1e >> PAGE_SHIFT], |
djm@1749 | 290 d, PGT_ldt_page)) ) |
djm@1749 | 291 return 0; |
djm@1749 | 292 |
cl349@3036 | 293 ed->mm.perdomain_ptes[off + 16] = mk_l1_pgentry(l1e | _PAGE_RW); |
cl349@2957 | 294 ed->mm.shadow_ldt_mapcnt++; |
djm@1749 | 295 |
djm@1749 | 296 return 1; |
djm@1749 | 297 } |
djm@1749 | 298 |
djm@1749 | 299 |
djm@1749 | 300 static int get_page_from_pagenr(unsigned long page_nr, struct domain *d) |
djm@1749 | 301 { |
djm@1749 | 302 struct pfn_info *page = &frame_table[page_nr]; |
djm@1749 | 303 |
djm@1749 | 304 if ( unlikely(!pfn_is_ram(page_nr)) ) |
djm@1749 | 305 { |
djm@1749 | 306 MEM_LOG("Pfn %08lx is not RAM", page_nr); |
djm@1749 | 307 return 0; |
djm@1749 | 308 } |
djm@1749 | 309 |
djm@1749 | 310 if ( unlikely(!get_page(page, d)) ) |
djm@1749 | 311 { |
djm@1749 | 312 MEM_LOG("Could not get page ref for pfn %08lx", page_nr); |
djm@1749 | 313 return 0; |
djm@1749 | 314 } |
djm@1749 | 315 |
djm@1749 | 316 return 1; |
djm@1749 | 317 } |
djm@1749 | 318 |
djm@1749 | 319 |
djm@1749 | 320 static int get_page_and_type_from_pagenr(unsigned long page_nr, |
djm@1749 | 321 u32 type, |
djm@1749 | 322 struct domain *d) |
djm@1749 | 323 { |
djm@1749 | 324 struct pfn_info *page = &frame_table[page_nr]; |
djm@1749 | 325 |
djm@1749 | 326 if ( unlikely(!get_page_from_pagenr(page_nr, d)) ) |
djm@1749 | 327 return 0; |
djm@1749 | 328 |
djm@1749 | 329 if ( unlikely(!get_page_type(page, type)) ) |
djm@1749 | 330 { |
cl349@2450 | 331 #ifdef VERBOSE |
cl349@2491 | 332 if ( (type & PGT_type_mask) != PGT_l1_page_table ) |
cl349@2491 | 333 MEM_LOG("Bad page type for pfn %08lx (%08x)", |
cl349@2491 | 334 page_nr, page->u.inuse.type_info); |
cl349@2450 | 335 #endif |
djm@1749 | 336 put_page(page); |
djm@1749 | 337 return 0; |
djm@1749 | 338 } |
djm@1749 | 339 |
djm@1749 | 340 return 1; |
djm@1749 | 341 } |
djm@1749 | 342 |
djm@1749 | 343 |
djm@1749 | 344 /* |
djm@1749 | 345 * We allow an L2 tables to map each other (a.k.a. linear page tables). It |
djm@1749 | 346 * needs some special care with reference counst and access permissions: |
djm@1749 | 347 * 1. The mapping entry must be read-only, or the guest may get write access |
djm@1749 | 348 * to its own PTEs. |
djm@1749 | 349 * 2. We must only bump the reference counts for an *already validated* |
djm@1749 | 350 * L2 table, or we can end up in a deadlock in get_page_type() by waiting |
djm@1749 | 351 * on a validation that is required to complete that validation. |
djm@1749 | 352 * 3. We only need to increment the reference counts for the mapped page |
djm@1749 | 353 * frame if it is mapped by a different L2 table. This is sufficient and |
djm@1749 | 354 * also necessary to allow validation of an L2 table mapping itself. |
djm@1749 | 355 */ |
kaf24@2314 | 356 static int |
kaf24@2314 | 357 get_linear_pagetable( |
kaf24@2314 | 358 l2_pgentry_t l2e, unsigned long pfn, struct domain *d) |
djm@1749 | 359 { |
djm@1749 | 360 u32 x, y; |
djm@1749 | 361 struct pfn_info *page; |
djm@1749 | 362 |
djm@1749 | 363 if ( (l2_pgentry_val(l2e) & _PAGE_RW) ) |
djm@1749 | 364 { |
djm@1749 | 365 MEM_LOG("Attempt to create linear p.t. with write perms"); |
djm@1749 | 366 return 0; |
djm@1749 | 367 } |
djm@1749 | 368 |
djm@1749 | 369 if ( (l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn ) |
djm@1749 | 370 { |
djm@1749 | 371 /* Make sure the mapped frame belongs to the correct domain. */ |
kaf24@2314 | 372 if ( unlikely(!get_page_from_pagenr(l2_pgentry_to_pagenr(l2e), d)) ) |
djm@1749 | 373 return 0; |
djm@1749 | 374 |
djm@1749 | 375 /* |
djm@1749 | 376 * Make sure that the mapped frame is an already-validated L2 table. |
djm@1749 | 377 * If so, atomically increment the count (checking for overflow). |
djm@1749 | 378 */ |
djm@1749 | 379 page = &frame_table[l2_pgentry_to_pagenr(l2e)]; |
kaf24@1970 | 380 y = page->u.inuse.type_info; |
djm@1749 | 381 do { |
djm@1749 | 382 x = y; |
djm@1749 | 383 if ( unlikely((x & PGT_count_mask) == PGT_count_mask) || |
djm@1749 | 384 unlikely((x & (PGT_type_mask|PGT_validated)) != |
djm@1749 | 385 (PGT_l2_page_table|PGT_validated)) ) |
djm@1749 | 386 { |
djm@1749 | 387 put_page(page); |
djm@1749 | 388 return 0; |
djm@1749 | 389 } |
djm@1749 | 390 } |
kaf24@1970 | 391 while ( (y = cmpxchg(&page->u.inuse.type_info, x, x + 1)) != x ); |
djm@1749 | 392 } |
djm@1749 | 393 |
djm@1749 | 394 return 1; |
djm@1749 | 395 } |
djm@1749 | 396 |
djm@1749 | 397 |
kaf24@2314 | 398 static int |
kaf24@2314 | 399 get_page_from_l1e( |
kaf24@2314 | 400 l1_pgentry_t l1e, struct domain *d) |
djm@1749 | 401 { |
djm@1749 | 402 unsigned long l1v = l1_pgentry_val(l1e); |
djm@1749 | 403 unsigned long pfn = l1_pgentry_to_pagenr(l1e); |
kaf24@2382 | 404 struct pfn_info *page = &frame_table[pfn]; |
djm@1749 | 405 extern int domain_iomem_in_pfn(struct domain *d, unsigned long pfn); |
djm@1749 | 406 |
djm@1749 | 407 if ( !(l1v & _PAGE_PRESENT) ) |
djm@1749 | 408 return 1; |
djm@1749 | 409 |
djm@1749 | 410 if ( unlikely(l1v & (_PAGE_GLOBAL|_PAGE_PAT)) ) |
djm@1749 | 411 { |
djm@1749 | 412 MEM_LOG("Bad L1 type settings %04lx", l1v & (_PAGE_GLOBAL|_PAGE_PAT)); |
djm@1749 | 413 return 0; |
djm@1749 | 414 } |
djm@1749 | 415 |
djm@1749 | 416 if ( unlikely(!pfn_is_ram(pfn)) ) |
djm@1749 | 417 { |
kaf24@2336 | 418 /* Revert to caller privileges if FD == DOMID_IO. */ |
kaf24@2336 | 419 if ( d == dom_io ) |
cl349@2957 | 420 d = current->domain; |
kaf24@2336 | 421 |
kaf24@2336 | 422 if ( IS_PRIV(d) ) |
djm@1749 | 423 return 1; |
djm@1749 | 424 |
kaf24@2336 | 425 if ( IS_CAPABLE_PHYSDEV(d) ) |
kaf24@2336 | 426 return domain_iomem_in_pfn(d, pfn); |
djm@1749 | 427 |
djm@1749 | 428 MEM_LOG("Non-privileged attempt to map I/O space %08lx", pfn); |
djm@1749 | 429 return 0; |
djm@1749 | 430 } |
djm@1749 | 431 |
kaf24@2756 | 432 return ((l1v & _PAGE_RW) ? |
kaf24@2756 | 433 get_page_and_type(page, d, PGT_writable_page) : |
kaf24@2757 | 434 get_page(page, d)); |
djm@1749 | 435 } |
djm@1749 | 436 |
djm@1749 | 437 |
djm@1749 | 438 /* NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. */ |
kaf24@2314 | 439 static int |
kaf24@2314 | 440 get_page_from_l2e( |
kaf24@2466 | 441 l2_pgentry_t l2e, unsigned long pfn, |
kaf24@2466 | 442 struct domain *d, unsigned long va_idx) |
djm@1749 | 443 { |
iap10@2458 | 444 int rc; |
iap10@2458 | 445 |
djm@1749 | 446 if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) ) |
djm@1749 | 447 return 1; |
djm@1749 | 448 |
djm@1749 | 449 if ( unlikely((l2_pgentry_val(l2e) & (_PAGE_GLOBAL|_PAGE_PSE))) ) |
djm@1749 | 450 { |
djm@1749 | 451 MEM_LOG("Bad L2 page type settings %04lx", |
djm@1749 | 452 l2_pgentry_val(l2e) & (_PAGE_GLOBAL|_PAGE_PSE)); |
djm@1749 | 453 return 0; |
djm@1749 | 454 } |
djm@1749 | 455 |
iap10@2458 | 456 rc = get_page_and_type_from_pagenr( |
iap10@2458 | 457 l2_pgentry_to_pagenr(l2e), |
kaf24@2466 | 458 PGT_l1_page_table | (va_idx<<PGT_va_shift), d); |
iap10@2458 | 459 |
iap10@2458 | 460 if ( unlikely(!rc) ) |
kaf24@2314 | 461 return get_linear_pagetable(l2e, pfn, d); |
djm@1749 | 462 |
djm@1749 | 463 return 1; |
djm@1749 | 464 } |
djm@1749 | 465 |
djm@1749 | 466 |
kaf24@2382 | 467 static void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d) |
djm@1749 | 468 { |
djm@1749 | 469 unsigned long l1v = l1_pgentry_val(l1e); |
kaf24@2385 | 470 unsigned long pfn = l1_pgentry_to_pagenr(l1e); |
kaf24@2385 | 471 struct pfn_info *page = &frame_table[pfn]; |
iap10@3424 | 472 struct domain *e; |
djm@1749 | 473 |
kaf24@2385 | 474 if ( !(l1v & _PAGE_PRESENT) || !pfn_is_ram(pfn) ) |
djm@1749 | 475 return; |
djm@1749 | 476 |
iap10@3424 | 477 e = page->u.inuse.domain; |
kaf24@2382 | 478 if ( unlikely(e != d) ) |
kaf24@2382 | 479 { |
kaf24@2382 | 480 /* |
kaf24@2382 | 481 * Unmap a foreign page that may have been mapped via a grant table. |
kaf24@2382 | 482 * Note that this can fail for a privileged domain that can map foreign |
kaf24@2382 | 483 * pages via MMUEXT_SET_FOREIGNDOM. Such domains can have some mappings |
kaf24@2382 | 484 * counted via a grant entry and some counted directly in the page |
kaf24@2382 | 485 * structure's reference count. Note that reference counts won't get |
kaf24@2382 | 486 * dangerously confused as long as we always try to decrement the |
kaf24@2382 | 487 * grant entry first. We may end up with a mismatch between which |
kaf24@2382 | 488 * mappings and which unmappings are counted via the grant entry, but |
kaf24@2382 | 489 * really it doesn't matter as privileged domains have carte blanche. |
kaf24@2382 | 490 */ |
kaf24@2655 | 491 if ( likely(gnttab_check_unmap(e, d, pfn, !(l1v & _PAGE_RW))) ) |
kaf24@2382 | 492 return; |
kaf24@2382 | 493 /* Assume this mapping was made via MMUEXT_SET_FOREIGNDOM... */ |
kaf24@2382 | 494 } |
kaf24@2382 | 495 |
djm@1749 | 496 if ( l1v & _PAGE_RW ) |
djm@1749 | 497 { |
djm@1749 | 498 put_page_and_type(page); |
djm@1749 | 499 } |
djm@1749 | 500 else |
djm@1749 | 501 { |
djm@1749 | 502 /* We expect this is rare so we blow the entire shadow LDT. */ |
kaf24@1970 | 503 if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) == |
djm@1749 | 504 PGT_ldt_page)) && |
kaf24@1970 | 505 unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) ) |
cl349@2957 | 506 invalidate_shadow_ldt(e->exec_domain[0]); |
djm@1749 | 507 put_page(page); |
djm@1749 | 508 } |
djm@1749 | 509 } |
djm@1749 | 510 |
djm@1749 | 511 |
djm@1749 | 512 /* |
djm@1749 | 513 * NB. Virtual address 'l2e' maps to a machine address within frame 'pfn'. |
djm@1749 | 514 * Note also that this automatically deals correctly with linear p.t.'s. |
djm@1749 | 515 */ |
djm@1749 | 516 static void put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn) |
djm@1749 | 517 { |
djm@1749 | 518 if ( (l2_pgentry_val(l2e) & _PAGE_PRESENT) && |
djm@1749 | 519 ((l2_pgentry_val(l2e) >> PAGE_SHIFT) != pfn) ) |
djm@1749 | 520 put_page_and_type(&frame_table[l2_pgentry_to_pagenr(l2e)]); |
djm@1749 | 521 } |
djm@1749 | 522 |
djm@1749 | 523 |
djm@1749 | 524 static int alloc_l2_table(struct pfn_info *page) |
djm@1749 | 525 { |
kaf24@2314 | 526 struct domain *d = page->u.inuse.domain; |
kaf24@2314 | 527 unsigned long page_nr = page_to_pfn(page); |
kaf24@2314 | 528 l2_pgentry_t *pl2e; |
kaf24@2314 | 529 int i; |
djm@1749 | 530 |
djm@1749 | 531 pl2e = map_domain_mem(page_nr << PAGE_SHIFT); |
djm@1749 | 532 |
kaf24@3392 | 533 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) |
iap10@2458 | 534 if ( unlikely(!get_page_from_l2e(pl2e[i], page_nr, d, i)) ) |
djm@1749 | 535 goto fail; |
kaf24@3392 | 536 |
djm@1749 | 537 #if defined(__i386__) |
djm@1749 | 538 /* Now we add our private high mappings. */ |
djm@1749 | 539 memcpy(&pl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE], |
djm@1749 | 540 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE], |
djm@1749 | 541 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t)); |
djm@1749 | 542 pl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] = |
djm@1749 | 543 mk_l2_pgentry((page_nr << PAGE_SHIFT) | __PAGE_HYPERVISOR); |
djm@1749 | 544 pl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] = |
cl349@3036 | 545 mk_l2_pgentry(__pa(page->u.inuse.domain->mm_perdomain_pt) | |
djm@1749 | 546 __PAGE_HYPERVISOR); |
djm@1749 | 547 #endif |
djm@1749 | 548 |
djm@1749 | 549 unmap_domain_mem(pl2e); |
djm@1749 | 550 return 1; |
djm@1749 | 551 |
djm@1749 | 552 fail: |
djm@1749 | 553 while ( i-- > 0 ) |
djm@1749 | 554 put_page_from_l2e(pl2e[i], page_nr); |
djm@1749 | 555 |
djm@1749 | 556 unmap_domain_mem(pl2e); |
djm@1749 | 557 return 0; |
djm@1749 | 558 } |
djm@1749 | 559 |
djm@1749 | 560 |
djm@1749 | 561 static int alloc_l1_table(struct pfn_info *page) |
djm@1749 | 562 { |
kaf24@2314 | 563 struct domain *d = page->u.inuse.domain; |
kaf24@2314 | 564 unsigned long page_nr = page_to_pfn(page); |
kaf24@2314 | 565 l1_pgentry_t *pl1e; |
kaf24@2314 | 566 int i; |
djm@1749 | 567 |
djm@1749 | 568 pl1e = map_domain_mem(page_nr << PAGE_SHIFT); |
djm@1749 | 569 |
djm@1749 | 570 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) |
kaf24@2314 | 571 if ( unlikely(!get_page_from_l1e(pl1e[i], d)) ) |
djm@1749 | 572 goto fail; |
djm@1749 | 573 |
djm@1749 | 574 unmap_domain_mem(pl1e); |
djm@1749 | 575 return 1; |
djm@1749 | 576 |
djm@1749 | 577 fail: |
djm@1749 | 578 while ( i-- > 0 ) |
kaf24@2382 | 579 put_page_from_l1e(pl1e[i], d); |
djm@1749 | 580 |
djm@1749 | 581 unmap_domain_mem(pl1e); |
djm@1749 | 582 return 0; |
djm@1749 | 583 } |
djm@1749 | 584 |
djm@1749 | 585 |
djm@1749 | 586 static void free_l2_table(struct pfn_info *page) |
djm@1749 | 587 { |
djm@1749 | 588 unsigned long page_nr = page - frame_table; |
djm@1749 | 589 l2_pgentry_t *pl2e; |
djm@1749 | 590 int i; |
djm@1749 | 591 |
djm@1749 | 592 pl2e = map_domain_mem(page_nr << PAGE_SHIFT); |
djm@1749 | 593 |
djm@1749 | 594 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) |
djm@1749 | 595 put_page_from_l2e(pl2e[i], page_nr); |
djm@1749 | 596 |
djm@1749 | 597 unmap_domain_mem(pl2e); |
djm@1749 | 598 } |
djm@1749 | 599 |
djm@1749 | 600 |
djm@1749 | 601 static void free_l1_table(struct pfn_info *page) |
djm@1749 | 602 { |
kaf24@2382 | 603 struct domain *d = page->u.inuse.domain; |
djm@1749 | 604 unsigned long page_nr = page - frame_table; |
djm@1749 | 605 l1_pgentry_t *pl1e; |
djm@1749 | 606 int i; |
djm@1749 | 607 |
djm@1749 | 608 pl1e = map_domain_mem(page_nr << PAGE_SHIFT); |
djm@1749 | 609 |
djm@1749 | 610 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) |
kaf24@2382 | 611 put_page_from_l1e(pl1e[i], d); |
djm@1749 | 612 |
djm@1749 | 613 unmap_domain_mem(pl1e); |
djm@1749 | 614 } |
djm@1749 | 615 |
djm@1749 | 616 |
djm@1749 | 617 static inline int update_l2e(l2_pgentry_t *pl2e, |
djm@1749 | 618 l2_pgentry_t ol2e, |
djm@1749 | 619 l2_pgentry_t nl2e) |
djm@1749 | 620 { |
djm@1749 | 621 unsigned long o = cmpxchg((unsigned long *)pl2e, |
djm@1749 | 622 l2_pgentry_val(ol2e), |
djm@1749 | 623 l2_pgentry_val(nl2e)); |
djm@1749 | 624 if ( o != l2_pgentry_val(ol2e) ) |
djm@1749 | 625 MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n", |
djm@1749 | 626 l2_pgentry_val(ol2e), l2_pgentry_val(nl2e), o); |
djm@1749 | 627 return (o == l2_pgentry_val(ol2e)); |
djm@1749 | 628 } |
djm@1749 | 629 |
djm@1749 | 630 |
djm@1749 | 631 /* Update the L2 entry at pl2e to new value nl2e. pl2e is within frame pfn. */ |
djm@1749 | 632 static int mod_l2_entry(l2_pgentry_t *pl2e, |
djm@1749 | 633 l2_pgentry_t nl2e, |
djm@1749 | 634 unsigned long pfn) |
djm@1749 | 635 { |
djm@1749 | 636 l2_pgentry_t ol2e; |
djm@1749 | 637 unsigned long _ol2e; |
djm@1749 | 638 |
djm@1749 | 639 if ( unlikely((((unsigned long)pl2e & (PAGE_SIZE-1)) >> 2) >= |
djm@1749 | 640 DOMAIN_ENTRIES_PER_L2_PAGETABLE) ) |
djm@1749 | 641 { |
djm@1749 | 642 MEM_LOG("Illegal L2 update attempt in Xen-private area %p", pl2e); |
djm@1749 | 643 return 0; |
djm@1749 | 644 } |
djm@1749 | 645 |
djm@1749 | 646 if ( unlikely(__get_user(_ol2e, (unsigned long *)pl2e) != 0) ) |
djm@1749 | 647 return 0; |
djm@1749 | 648 ol2e = mk_l2_pgentry(_ol2e); |
djm@1749 | 649 |
djm@1749 | 650 if ( l2_pgentry_val(nl2e) & _PAGE_PRESENT ) |
djm@1749 | 651 { |
djm@1749 | 652 /* Differ in mapping (bits 12-31) or presence (bit 0)? */ |
djm@1749 | 653 if ( ((l2_pgentry_val(ol2e) ^ l2_pgentry_val(nl2e)) & ~0xffe) == 0 ) |
djm@1749 | 654 return update_l2e(pl2e, ol2e, nl2e); |
djm@1749 | 655 |
cl349@2957 | 656 if ( unlikely(!get_page_from_l2e(nl2e, pfn, current->domain, |
cl349@2491 | 657 ((unsigned long)pl2e & |
kaf24@2466 | 658 ~PAGE_MASK) >> 2)) ) |
djm@1749 | 659 return 0; |
cl349@1860 | 660 |
djm@1749 | 661 if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) |
djm@1749 | 662 { |
djm@1749 | 663 put_page_from_l2e(nl2e, pfn); |
djm@1749 | 664 return 0; |
djm@1749 | 665 } |
djm@1749 | 666 |
djm@1749 | 667 put_page_from_l2e(ol2e, pfn); |
djm@1749 | 668 return 1; |
djm@1749 | 669 } |
djm@1749 | 670 |
djm@1749 | 671 if ( unlikely(!update_l2e(pl2e, ol2e, nl2e)) ) |
djm@1749 | 672 return 0; |
djm@1749 | 673 |
djm@1749 | 674 put_page_from_l2e(ol2e, pfn); |
djm@1749 | 675 return 1; |
djm@1749 | 676 } |
djm@1749 | 677 |
djm@1749 | 678 |
djm@1749 | 679 static inline int update_l1e(l1_pgentry_t *pl1e, |
djm@1749 | 680 l1_pgentry_t ol1e, |
djm@1749 | 681 l1_pgentry_t nl1e) |
djm@1749 | 682 { |
djm@1749 | 683 unsigned long o = l1_pgentry_val(ol1e); |
djm@1749 | 684 unsigned long n = l1_pgentry_val(nl1e); |
djm@1749 | 685 |
djm@1749 | 686 if ( unlikely(cmpxchg_user(pl1e, o, n) != 0) || |
djm@1749 | 687 unlikely(o != l1_pgentry_val(ol1e)) ) |
djm@1749 | 688 { |
djm@1749 | 689 MEM_LOG("Failed to update %08lx -> %08lx: saw %08lx\n", |
djm@1749 | 690 l1_pgentry_val(ol1e), l1_pgentry_val(nl1e), o); |
djm@1749 | 691 return 0; |
djm@1749 | 692 } |
djm@1749 | 693 |
djm@1749 | 694 return 1; |
djm@1749 | 695 } |
djm@1749 | 696 |
djm@1749 | 697 |
djm@1749 | 698 /* Update the L1 entry at pl1e to new value nl1e. */ |
djm@1749 | 699 static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e) |
djm@1749 | 700 { |
djm@1749 | 701 l1_pgentry_t ol1e; |
djm@1749 | 702 unsigned long _ol1e; |
cl349@2957 | 703 struct domain *d = current->domain; |
djm@1749 | 704 |
djm@1749 | 705 if ( unlikely(__get_user(_ol1e, (unsigned long *)pl1e) != 0) ) |
djm@1749 | 706 { |
djm@1749 | 707 MEM_LOG("Bad get_user\n"); |
djm@1749 | 708 return 0; |
djm@1749 | 709 } |
djm@1749 | 710 |
djm@1749 | 711 ol1e = mk_l1_pgentry(_ol1e); |
djm@1749 | 712 |
djm@1749 | 713 if ( l1_pgentry_val(nl1e) & _PAGE_PRESENT ) |
djm@1749 | 714 { |
djm@1749 | 715 /* Differ in mapping (bits 12-31), r/w (bit 1), or presence (bit 0)? */ |
djm@1749 | 716 if ( ((l1_pgentry_val(ol1e) ^ l1_pgentry_val(nl1e)) & ~0xffc) == 0 ) |
djm@1749 | 717 return update_l1e(pl1e, ol1e, nl1e); |
djm@1749 | 718 |
kaf24@2314 | 719 if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) ) |
djm@1749 | 720 return 0; |
djm@1749 | 721 |
djm@1749 | 722 if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) |
djm@1749 | 723 { |
kaf24@2382 | 724 put_page_from_l1e(nl1e, d); |
djm@1749 | 725 return 0; |
djm@1749 | 726 } |
djm@1749 | 727 |
kaf24@2382 | 728 put_page_from_l1e(ol1e, d); |
djm@1749 | 729 return 1; |
djm@1749 | 730 } |
djm@1749 | 731 |
djm@1749 | 732 if ( unlikely(!update_l1e(pl1e, ol1e, nl1e)) ) |
djm@1749 | 733 return 0; |
djm@1749 | 734 |
kaf24@2382 | 735 put_page_from_l1e(ol1e, d); |
djm@1749 | 736 return 1; |
djm@1749 | 737 } |
djm@1749 | 738 |
djm@1749 | 739 |
djm@1749 | 740 int alloc_page_type(struct pfn_info *page, unsigned int type) |
djm@1749 | 741 { |
djm@1749 | 742 switch ( type ) |
djm@1749 | 743 { |
djm@1749 | 744 case PGT_l1_page_table: |
djm@1749 | 745 return alloc_l1_table(page); |
djm@1749 | 746 case PGT_l2_page_table: |
djm@1749 | 747 return alloc_l2_table(page); |
djm@1749 | 748 case PGT_gdt_page: |
djm@1749 | 749 case PGT_ldt_page: |
djm@1749 | 750 return alloc_segdesc_page(page); |
djm@1749 | 751 default: |
cl349@2491 | 752 printk("Bad type in alloc_page_type %x t=%x c=%x\n", |
cl349@2491 | 753 type, page->u.inuse.type_info, |
cl349@2491 | 754 page->count_info); |
djm@1749 | 755 BUG(); |
djm@1749 | 756 } |
djm@1749 | 757 |
djm@1749 | 758 return 0; |
djm@1749 | 759 } |
djm@1749 | 760 |
djm@1749 | 761 |
djm@1749 | 762 void free_page_type(struct pfn_info *page, unsigned int type) |
djm@1749 | 763 { |
kaf24@2314 | 764 struct domain *d = page->u.inuse.domain; |
kaf24@2314 | 765 |
djm@1749 | 766 switch ( type ) |
djm@1749 | 767 { |
djm@1749 | 768 case PGT_l1_page_table: |
djm@1749 | 769 free_l1_table(page); |
djm@1749 | 770 break; |
djm@1749 | 771 |
djm@1749 | 772 case PGT_l2_page_table: |
djm@1749 | 773 free_l2_table(page); |
djm@1749 | 774 break; |
djm@1749 | 775 |
djm@1749 | 776 default: |
djm@1749 | 777 BUG(); |
djm@1749 | 778 } |
kaf24@2314 | 779 |
cl349@2957 | 780 if ( unlikely(d->exec_domain[0]->mm.shadow_mode) && |
cl349@2957 | 781 (get_shadow_status(&d->exec_domain[0]->mm, page_to_pfn(page)) & PSH_shadowed) ) |
kaf24@2314 | 782 { |
kaf24@2314 | 783 unshadow_table(page_to_pfn(page), type); |
cl349@2957 | 784 put_shadow_status(&d->exec_domain[0]->mm); |
kaf24@2314 | 785 } |
djm@1749 | 786 } |
djm@1749 | 787 |
djm@1749 | 788 |
kaf24@2498 | 789 void put_page_type(struct pfn_info *page) |
kaf24@2498 | 790 { |
kaf24@2498 | 791 u32 nx, x, y = page->u.inuse.type_info; |
kaf24@2498 | 792 |
kaf24@2498 | 793 again: |
kaf24@2498 | 794 do { |
kaf24@2498 | 795 x = y; |
kaf24@2498 | 796 nx = x - 1; |
kaf24@2498 | 797 |
kaf24@2498 | 798 ASSERT((x & PGT_count_mask) != 0); |
kaf24@2588 | 799 |
kaf24@2588 | 800 /* |
kaf24@2588 | 801 * The page should always be validated while a reference is held. The |
kaf24@2588 | 802 * exception is during domain destruction, when we forcibly invalidate |
kaf24@2588 | 803 * page-table pages if we detect a referential loop. |
kaf24@2588 | 804 * See domain.c:relinquish_list(). |
kaf24@2588 | 805 */ |
kaf24@2588 | 806 ASSERT((x & PGT_validated) || |
cl349@3036 | 807 test_bit(DF_DYING, &page->u.inuse.domain->d_flags)); |
kaf24@2498 | 808 |
kaf24@2498 | 809 if ( unlikely((nx & PGT_count_mask) == 0) ) |
kaf24@2498 | 810 { |
kaf24@2498 | 811 /* Record TLB information for flush later. Races are harmless. */ |
kaf24@2790 | 812 page->tlbflush_timestamp = tlbflush_current_time(); |
kaf24@2498 | 813 |
kaf24@2588 | 814 if ( unlikely((nx & PGT_type_mask) <= PGT_l4_page_table) && |
kaf24@2588 | 815 likely(nx & PGT_validated) ) |
kaf24@2498 | 816 { |
kaf24@2498 | 817 /* |
kaf24@2498 | 818 * Page-table pages must be unvalidated when count is zero. The |
kaf24@2498 | 819 * 'free' is safe because the refcnt is non-zero and validated |
kaf24@2498 | 820 * bit is clear => other ops will spin or fail. |
kaf24@2498 | 821 */ |
kaf24@2498 | 822 if ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, |
kaf24@2498 | 823 x & ~PGT_validated)) != x) ) |
kaf24@2498 | 824 goto again; |
kaf24@2498 | 825 /* We cleared the 'valid bit' so we do the clear up. */ |
kaf24@2498 | 826 free_page_type(page, x & PGT_type_mask); |
kaf24@2498 | 827 /* Carry on, but with the 'valid bit' now clear. */ |
kaf24@2498 | 828 x &= ~PGT_validated; |
kaf24@2498 | 829 nx &= ~PGT_validated; |
kaf24@2498 | 830 } |
kaf24@2498 | 831 } |
cl349@2644 | 832 else if ( unlikely((nx & (PGT_pinned | PGT_count_mask)) == |
kaf24@2498 | 833 (PGT_pinned | 1)) ) |
cl349@2644 | 834 { |
kaf24@2498 | 835 /* Page is now only pinned. Make the back pointer mutable again. */ |
cl349@2644 | 836 nx |= PGT_va_mutable; |
cl349@2644 | 837 } |
kaf24@2498 | 838 } |
kaf24@2498 | 839 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); |
kaf24@2498 | 840 } |
kaf24@2498 | 841 |
kaf24@2498 | 842 |
kaf24@2498 | 843 int get_page_type(struct pfn_info *page, u32 type) |
kaf24@2498 | 844 { |
kaf24@2498 | 845 u32 nx, x, y = page->u.inuse.type_info; |
kaf24@2498 | 846 |
kaf24@2498 | 847 again: |
kaf24@2498 | 848 do { |
kaf24@2498 | 849 x = y; |
kaf24@2498 | 850 nx = x + 1; |
kaf24@2498 | 851 if ( unlikely((nx & PGT_count_mask) == 0) ) |
kaf24@2498 | 852 { |
kaf24@2498 | 853 MEM_LOG("Type count overflow on pfn %08lx\n", page_to_pfn(page)); |
kaf24@2498 | 854 return 0; |
kaf24@2498 | 855 } |
kaf24@2498 | 856 else if ( unlikely((x & PGT_count_mask) == 0) ) |
kaf24@2498 | 857 { |
kaf24@2498 | 858 if ( (x & (PGT_type_mask|PGT_va_mask)) != type ) |
kaf24@2498 | 859 { |
kaf24@2498 | 860 /* |
kaf24@2498 | 861 * On type change we check to flush stale TLB entries. This |
kaf24@2498 | 862 * may be unnecessary (e.g., page was GDT/LDT) but those |
kaf24@2498 | 863 * circumstances should be very rare. |
kaf24@2498 | 864 */ |
kaf24@2498 | 865 struct domain *d = page->u.inuse.domain; |
cl349@2957 | 866 if ( unlikely(NEED_FLUSH(tlbflush_time[d->exec_domain[0]->processor], |
kaf24@2498 | 867 page->tlbflush_timestamp)) ) |
kaf24@2498 | 868 { |
kaf24@2498 | 869 perfc_incr(need_flush_tlb_flush); |
cl349@2957 | 870 flush_tlb_cpu(d->exec_domain[0]->processor); |
kaf24@2498 | 871 } |
kaf24@2498 | 872 |
kaf24@2498 | 873 /* We lose existing type, back pointer, and validity. */ |
kaf24@2498 | 874 nx &= ~(PGT_type_mask | PGT_va_mask | PGT_validated); |
kaf24@2498 | 875 nx |= type; |
kaf24@2498 | 876 |
kaf24@2498 | 877 /* No special validation needed for writable pages. */ |
kaf24@2498 | 878 /* Page tables and GDT/LDT need to be scanned for validity. */ |
kaf24@2498 | 879 if ( type == PGT_writable_page ) |
kaf24@2498 | 880 nx |= PGT_validated; |
kaf24@2498 | 881 } |
kaf24@2498 | 882 } |
kaf24@2498 | 883 else if ( unlikely((x & (PGT_type_mask|PGT_va_mask)) != type) ) |
kaf24@2498 | 884 { |
kaf24@2498 | 885 if ( unlikely((x & PGT_type_mask) != (type & PGT_type_mask) ) ) |
kaf24@2498 | 886 { |
kaf24@2498 | 887 if ( ((x & PGT_type_mask) != PGT_l2_page_table) || |
kaf24@2498 | 888 ((type & PGT_type_mask) != PGT_l1_page_table) ) |
kaf24@2498 | 889 MEM_LOG("Bad type (saw %08x != exp %08x) for pfn %08lx\n", |
kaf24@2498 | 890 x & PGT_type_mask, type, page_to_pfn(page)); |
kaf24@2498 | 891 return 0; |
kaf24@2498 | 892 } |
kaf24@2498 | 893 else if ( (x & PGT_va_mask) == PGT_va_mutable ) |
kaf24@2498 | 894 { |
kaf24@2498 | 895 /* The va backpointer is mutable, hence we update it. */ |
kaf24@2498 | 896 nx &= ~PGT_va_mask; |
kaf24@2498 | 897 nx |= type; /* we know the actual type is correct */ |
kaf24@2498 | 898 } |
kaf24@2498 | 899 else if ( unlikely((x & PGT_va_mask) != (type & PGT_va_mask)) ) |
kaf24@2498 | 900 { |
kaf24@2506 | 901 /* This table is potentially mapped at multiple locations. */ |
kaf24@2506 | 902 nx &= ~PGT_va_mask; |
kaf24@2506 | 903 nx |= PGT_va_unknown; |
kaf24@2498 | 904 } |
kaf24@2498 | 905 } |
cl349@2644 | 906 else if ( unlikely(!(x & PGT_validated)) ) |
kaf24@2498 | 907 { |
kaf24@2498 | 908 /* Someone else is updating validation of this page. Wait... */ |
kaf24@2498 | 909 while ( (y = page->u.inuse.type_info) == x ) |
kaf24@2498 | 910 { |
kaf24@2498 | 911 rep_nop(); |
kaf24@2498 | 912 barrier(); |
kaf24@2498 | 913 } |
kaf24@2498 | 914 goto again; |
kaf24@2498 | 915 } |
kaf24@2498 | 916 } |
kaf24@2498 | 917 while ( unlikely((y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x) ); |
kaf24@2498 | 918 |
kaf24@2498 | 919 if ( unlikely(!(nx & PGT_validated)) ) |
kaf24@2498 | 920 { |
kaf24@2498 | 921 /* Try to validate page type; drop the new reference on failure. */ |
kaf24@2498 | 922 if ( unlikely(!alloc_page_type(page, type & PGT_type_mask)) ) |
kaf24@2498 | 923 { |
kaf24@2498 | 924 MEM_LOG("Error while validating pfn %08lx for type %08x." |
kaf24@2498 | 925 " caf=%08x taf=%08x\n", |
kaf24@2498 | 926 page_to_pfn(page), type, |
cl349@2644 | 927 page->count_info, |
cl349@2644 | 928 page->u.inuse.type_info); |
kaf24@2498 | 929 /* Noone else can get a reference. We hold the only ref. */ |
kaf24@2498 | 930 page->u.inuse.type_info = 0; |
kaf24@2498 | 931 return 0; |
kaf24@2498 | 932 } |
kaf24@2498 | 933 |
kaf24@2498 | 934 /* Noone else is updating simultaneously. */ |
kaf24@2498 | 935 __set_bit(_PGT_validated, &page->u.inuse.type_info); |
kaf24@2498 | 936 } |
kaf24@2498 | 937 |
kaf24@2498 | 938 return 1; |
kaf24@2498 | 939 } |
kaf24@2498 | 940 |
kaf24@2498 | 941 |
kaf24@3443 | 942 int new_guest_cr3(unsigned long pfn) |
kaf24@3443 | 943 { |
kaf24@3443 | 944 struct exec_domain *ed = current; |
kaf24@3443 | 945 struct domain *d = ed->domain; |
kaf24@3443 | 946 int okay, cpu = smp_processor_id(); |
kaf24@3443 | 947 unsigned long old_base_pfn; |
kaf24@3443 | 948 |
kaf24@3443 | 949 okay = get_page_and_type_from_pagenr(pfn, PGT_l2_page_table, d); |
kaf24@3443 | 950 if ( likely(okay) ) |
kaf24@3443 | 951 { |
kaf24@3443 | 952 invalidate_shadow_ldt(ed); |
kaf24@3443 | 953 |
kaf24@3443 | 954 percpu_info[cpu].deferred_ops &= ~DOP_FLUSH_TLB; |
kaf24@3443 | 955 old_base_pfn = pagetable_val(ed->mm.pagetable) >> PAGE_SHIFT; |
kaf24@3443 | 956 ed->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT); |
kaf24@3443 | 957 |
kaf24@3443 | 958 shadow_mk_pagetable(&ed->mm); |
kaf24@3443 | 959 |
kaf24@3443 | 960 write_ptbase(&ed->mm); |
kaf24@3443 | 961 |
kaf24@3443 | 962 put_page_and_type(&frame_table[old_base_pfn]); |
kaf24@3443 | 963 } |
kaf24@3443 | 964 else |
kaf24@3443 | 965 { |
kaf24@3517 | 966 MEM_LOG("Error while installing new baseptr %08lx", pfn); |
kaf24@3443 | 967 } |
kaf24@3443 | 968 |
kaf24@3443 | 969 return okay; |
kaf24@3443 | 970 } |
kaf24@3443 | 971 |
djm@1749 | 972 static int do_extended_command(unsigned long ptr, unsigned long val) |
djm@1749 | 973 { |
djm@1749 | 974 int okay = 1, cpu = smp_processor_id(); |
djm@1749 | 975 unsigned int cmd = val & MMUEXT_CMD_MASK; |
djm@1749 | 976 unsigned long pfn = ptr >> PAGE_SHIFT; |
djm@1749 | 977 struct pfn_info *page = &frame_table[pfn]; |
cl349@2957 | 978 struct exec_domain *ed = current; |
cl349@2957 | 979 struct domain *d = ed->domain, *nd, *e; |
djm@1749 | 980 u32 x, y; |
djm@1749 | 981 domid_t domid; |
kaf24@2385 | 982 grant_ref_t gntref; |
djm@1749 | 983 |
djm@1749 | 984 switch ( cmd ) |
djm@1749 | 985 { |
kaf24@2465 | 986 case MMUEXT_PIN_L1_TABLE: |
kaf24@2465 | 987 case MMUEXT_PIN_L2_TABLE: |
kaf24@2466 | 988 /* |
kaf24@2466 | 989 * We insist that, if you pin an L1 page, it's the first thing that |
kaf24@2466 | 990 * you do to it. This is because we require the backptr to still be |
kaf24@2466 | 991 * mutable. This assumption seems safe. |
kaf24@2466 | 992 */ |
djm@1749 | 993 okay = get_page_and_type_from_pagenr( |
kaf24@2465 | 994 pfn, |
kaf24@2465 | 995 ((cmd==MMUEXT_PIN_L2_TABLE) ? |
cl349@2491 | 996 PGT_l2_page_table : (PGT_l1_page_table|PGT_va_mutable)), |
kaf24@2465 | 997 FOREIGNDOM); |
iap10@2458 | 998 |
djm@1749 | 999 if ( unlikely(!okay) ) |
djm@1749 | 1000 { |
djm@1749 | 1001 MEM_LOG("Error while pinning pfn %08lx", pfn); |
djm@1749 | 1002 break; |
djm@1749 | 1003 } |
djm@1749 | 1004 |
kaf24@2466 | 1005 if ( unlikely(test_and_set_bit(_PGT_pinned, |
kaf24@2466 | 1006 &page->u.inuse.type_info)) ) |
djm@1749 | 1007 { |
djm@1749 | 1008 MEM_LOG("Pfn %08lx already pinned", pfn); |
djm@1749 | 1009 put_page_and_type(page); |
djm@1749 | 1010 okay = 0; |
djm@1749 | 1011 break; |
djm@1749 | 1012 } |
djm@1749 | 1013 |
djm@1749 | 1014 break; |
djm@1749 | 1015 |
djm@1749 | 1016 case MMUEXT_UNPIN_TABLE: |
kaf24@2314 | 1017 if ( unlikely(!(okay = get_page_from_pagenr(pfn, FOREIGNDOM))) ) |
djm@1749 | 1018 { |
djm@1749 | 1019 MEM_LOG("Page %08lx bad domain (dom=%p)", |
kaf24@1970 | 1020 ptr, page->u.inuse.domain); |
djm@1749 | 1021 } |
kaf24@2466 | 1022 else if ( likely(test_and_clear_bit(_PGT_pinned, |
kaf24@2466 | 1023 &page->u.inuse.type_info)) ) |
djm@1749 | 1024 { |
djm@1749 | 1025 put_page_and_type(page); |
djm@1749 | 1026 put_page(page); |
djm@1749 | 1027 } |
djm@1749 | 1028 else |
djm@1749 | 1029 { |
djm@1749 | 1030 okay = 0; |
djm@1749 | 1031 put_page(page); |
djm@1749 | 1032 MEM_LOG("Pfn %08lx not pinned", pfn); |
djm@1749 | 1033 } |
djm@1749 | 1034 break; |
djm@1749 | 1035 |
djm@1749 | 1036 case MMUEXT_NEW_BASEPTR: |
kaf24@3443 | 1037 okay = new_guest_cr3(pfn); |
djm@1749 | 1038 break; |
djm@1749 | 1039 |
djm@1749 | 1040 case MMUEXT_TLB_FLUSH: |
djm@1749 | 1041 percpu_info[cpu].deferred_ops |= DOP_FLUSH_TLB; |
djm@1749 | 1042 break; |
djm@1749 | 1043 |
djm@1749 | 1044 case MMUEXT_INVLPG: |
djm@1749 | 1045 __flush_tlb_one(ptr); |
djm@1749 | 1046 break; |
djm@1749 | 1047 |
kaf24@2463 | 1048 case MMUEXT_FLUSH_CACHE: |
kaf24@2463 | 1049 if ( unlikely(!IS_CAPABLE_PHYSDEV(d)) ) |
kaf24@2463 | 1050 { |
kaf24@2463 | 1051 MEM_LOG("Non-physdev domain tried to FLUSH_CACHE.\n"); |
kaf24@2463 | 1052 okay = 0; |
kaf24@2463 | 1053 } |
kaf24@2463 | 1054 else |
kaf24@2463 | 1055 { |
kaf24@2463 | 1056 wbinvd(); |
kaf24@2463 | 1057 } |
kaf24@2463 | 1058 break; |
kaf24@2463 | 1059 |
djm@1749 | 1060 case MMUEXT_SET_LDT: |
djm@1749 | 1061 { |
djm@1749 | 1062 unsigned long ents = val >> MMUEXT_CMD_SHIFT; |
djm@1749 | 1063 if ( ((ptr & (PAGE_SIZE-1)) != 0) || |
djm@1749 | 1064 (ents > 8192) || |
djm@1749 | 1065 ((ptr+ents*LDT_ENTRY_SIZE) < ptr) || |
djm@1749 | 1066 ((ptr+ents*LDT_ENTRY_SIZE) > PAGE_OFFSET) ) |
djm@1749 | 1067 { |
djm@1749 | 1068 okay = 0; |
djm@1749 | 1069 MEM_LOG("Bad args to SET_LDT: ptr=%08lx, ents=%08lx", ptr, ents); |
djm@1749 | 1070 } |
cl349@2957 | 1071 else if ( (ed->mm.ldt_ents != ents) || |
cl349@2957 | 1072 (ed->mm.ldt_base != ptr) ) |
djm@1749 | 1073 { |
cl349@2957 | 1074 invalidate_shadow_ldt(ed); |
cl349@2957 | 1075 ed->mm.ldt_base = ptr; |
cl349@2957 | 1076 ed->mm.ldt_ents = ents; |
cl349@2957 | 1077 load_LDT(ed); |
djm@1749 | 1078 percpu_info[cpu].deferred_ops &= ~DOP_RELOAD_LDT; |
djm@1749 | 1079 if ( ents != 0 ) |
djm@1749 | 1080 percpu_info[cpu].deferred_ops |= DOP_RELOAD_LDT; |
djm@1749 | 1081 } |
djm@1749 | 1082 break; |
djm@1749 | 1083 } |
djm@1749 | 1084 |
kaf24@2314 | 1085 case MMUEXT_SET_FOREIGNDOM: |
kaf24@2314 | 1086 domid = (domid_t)(val >> 16); |
djm@1749 | 1087 |
kaf24@2362 | 1088 if ( (e = percpu_info[cpu].foreign) != NULL ) |
kaf24@2362 | 1089 put_domain(e); |
kaf24@2362 | 1090 percpu_info[cpu].foreign = NULL; |
kaf24@2362 | 1091 |
djm@1749 | 1092 if ( !IS_PRIV(d) ) |
djm@1749 | 1093 { |
kaf24@2336 | 1094 switch ( domid ) |
kaf24@2336 | 1095 { |
kaf24@2336 | 1096 case DOMID_IO: |
kaf24@2362 | 1097 get_knownalive_domain(dom_io); |
kaf24@2362 | 1098 percpu_info[cpu].foreign = dom_io; |
kaf24@2336 | 1099 break; |
kaf24@2336 | 1100 default: |
kaf24@2748 | 1101 MEM_LOG("Dom %u cannot set foreign dom\n", d->id); |
kaf24@2336 | 1102 okay = 0; |
kaf24@2336 | 1103 break; |
kaf24@2336 | 1104 } |
djm@1749 | 1105 } |
djm@1749 | 1106 else |
djm@1749 | 1107 { |
kaf24@2314 | 1108 percpu_info[cpu].foreign = e = find_domain_by_id(domid); |
kaf24@2314 | 1109 if ( e == NULL ) |
djm@1749 | 1110 { |
kaf24@2336 | 1111 switch ( domid ) |
kaf24@2336 | 1112 { |
kaf24@2336 | 1113 case DOMID_XEN: |
kaf24@2362 | 1114 get_knownalive_domain(dom_xen); |
kaf24@2362 | 1115 percpu_info[cpu].foreign = dom_xen; |
kaf24@2336 | 1116 break; |
kaf24@2336 | 1117 case DOMID_IO: |
kaf24@2362 | 1118 get_knownalive_domain(dom_io); |
kaf24@2362 | 1119 percpu_info[cpu].foreign = dom_io; |
kaf24@2336 | 1120 break; |
kaf24@2336 | 1121 default: |
kaf24@2336 | 1122 MEM_LOG("Unknown domain '%u'", domid); |
kaf24@2336 | 1123 okay = 0; |
kaf24@2336 | 1124 break; |
kaf24@2336 | 1125 } |
djm@1749 | 1126 } |
djm@1749 | 1127 } |
djm@1749 | 1128 break; |
djm@1749 | 1129 |
kaf24@2385 | 1130 case MMUEXT_TRANSFER_PAGE: |
kaf24@2385 | 1131 domid = (domid_t)(val >> 16); |
kaf24@2385 | 1132 gntref = (grant_ref_t)((val & 0xFF00) | ((ptr >> 2) & 0x00FF)); |
kaf24@2385 | 1133 |
kaf24@2385 | 1134 if ( unlikely(IS_XEN_HEAP_FRAME(page)) || |
kaf24@2385 | 1135 unlikely(!pfn_is_ram(pfn)) || |
kaf24@2385 | 1136 unlikely((e = find_domain_by_id(domid)) == NULL) ) |
kaf24@2385 | 1137 { |
kaf24@2385 | 1138 MEM_LOG("Bad frame (%08lx) or bad domid (%d).\n", pfn, domid); |
kaf24@2385 | 1139 okay = 0; |
kaf24@2385 | 1140 break; |
kaf24@2385 | 1141 } |
kaf24@2385 | 1142 |
kaf24@2385 | 1143 spin_lock(&d->page_alloc_lock); |
kaf24@2385 | 1144 |
kaf24@2385 | 1145 /* |
kaf24@2385 | 1146 * The tricky bit: atomically release ownership while there is just one |
kaf24@2385 | 1147 * benign reference to the page (PGC_allocated). If that reference |
kaf24@2385 | 1148 * disappears then the deallocation routine will safely spin. |
kaf24@2385 | 1149 */ |
kaf24@2385 | 1150 nd = page->u.inuse.domain; |
kaf24@2385 | 1151 y = page->count_info; |
kaf24@2385 | 1152 do { |
kaf24@2385 | 1153 x = y; |
kaf24@2385 | 1154 if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != |
kaf24@2385 | 1155 (1|PGC_allocated)) || |
kaf24@2385 | 1156 unlikely(nd != d) ) |
kaf24@2385 | 1157 { |
kaf24@2385 | 1158 MEM_LOG("Bad page values %08lx: ed=%p(%u), sd=%p," |
kaf24@2385 | 1159 " caf=%08x, taf=%08x\n", page_to_pfn(page), |
kaf24@2748 | 1160 d, d->id, nd, x, page->u.inuse.type_info); |
kaf24@2385 | 1161 spin_unlock(&d->page_alloc_lock); |
kaf24@2385 | 1162 put_domain(e); |
kaf24@2663 | 1163 return 0; |
kaf24@2385 | 1164 } |
kaf24@2385 | 1165 __asm__ __volatile__( |
kaf24@2385 | 1166 LOCK_PREFIX "cmpxchg8b %2" |
kaf24@2385 | 1167 : "=d" (nd), "=a" (y), |
kaf24@2385 | 1168 "=m" (*(volatile u64 *)(&page->count_info)) |
kaf24@2385 | 1169 : "0" (d), "1" (x), "c" (NULL), "b" (x) ); |
kaf24@2385 | 1170 } |
kaf24@2385 | 1171 while ( unlikely(nd != d) || unlikely(y != x) ); |
kaf24@2385 | 1172 |
kaf24@2385 | 1173 /* |
kaf24@2385 | 1174 * Unlink from 'd'. At least one reference remains (now anonymous), so |
kaf24@2385 | 1175 * noone else is spinning to try to delete this page from 'd'. |
kaf24@2385 | 1176 */ |
kaf24@2385 | 1177 d->tot_pages--; |
kaf24@2385 | 1178 list_del(&page->list); |
kaf24@2385 | 1179 |
kaf24@2385 | 1180 spin_unlock(&d->page_alloc_lock); |
kaf24@2385 | 1181 |
kaf24@2385 | 1182 spin_lock(&e->page_alloc_lock); |
kaf24@2385 | 1183 |
kaf24@2466 | 1184 /* |
kaf24@2466 | 1185 * Check that 'e' will accept the page and has reservation headroom. |
kaf24@2466 | 1186 * Also, a domain mustn't have PGC_allocated pages when it is dying. |
kaf24@2466 | 1187 */ |
kaf24@2385 | 1188 ASSERT(e->tot_pages <= e->max_pages); |
cl349@2957 | 1189 if ( unlikely(test_bit(DF_DYING, &e->d_flags)) || |
kaf24@2466 | 1190 unlikely(e->tot_pages == e->max_pages) || |
kaf24@2385 | 1191 unlikely(!gnttab_prepare_for_transfer(e, d, gntref)) ) |
kaf24@2385 | 1192 { |
kaf24@2431 | 1193 MEM_LOG("Transferee has no reservation headroom (%d,%d), or " |
kaf24@2469 | 1194 "provided a bad grant ref, or is dying (%08lx).\n", |
cl349@2957 | 1195 e->tot_pages, e->max_pages, e->d_flags); |
kaf24@2385 | 1196 spin_unlock(&e->page_alloc_lock); |
kaf24@2385 | 1197 put_domain(e); |
kaf24@2385 | 1198 okay = 0; |
kaf24@2385 | 1199 break; |
kaf24@2385 | 1200 } |
kaf24@2385 | 1201 |
kaf24@2385 | 1202 /* Okay, add the page to 'e'. */ |
kaf24@2385 | 1203 if ( unlikely(e->tot_pages++ == 0) ) |
kaf24@2385 | 1204 get_knownalive_domain(e); |
kaf24@2385 | 1205 list_add_tail(&page->list, &e->page_list); |
kaf24@2385 | 1206 page->u.inuse.domain = e; |
kaf24@2385 | 1207 |
kaf24@2385 | 1208 spin_unlock(&e->page_alloc_lock); |
kaf24@2385 | 1209 |
kaf24@2385 | 1210 /* Transfer is all done: tell the guest about its new page frame. */ |
kaf24@2385 | 1211 gnttab_notify_transfer(e, gntref, pfn); |
kaf24@2385 | 1212 |
kaf24@2385 | 1213 put_domain(e); |
kaf24@2385 | 1214 break; |
kaf24@2385 | 1215 |
djm@1749 | 1216 case MMUEXT_REASSIGN_PAGE: |
djm@1749 | 1217 if ( unlikely(!IS_PRIV(d)) ) |
djm@1749 | 1218 { |
kaf24@2748 | 1219 MEM_LOG("Dom %u has no reassignment priv", d->id); |
djm@1749 | 1220 okay = 0; |
djm@1749 | 1221 break; |
djm@1749 | 1222 } |
djm@1749 | 1223 |
kaf24@2314 | 1224 e = percpu_info[cpu].foreign; |
kaf24@2314 | 1225 if ( unlikely(e == NULL) ) |
djm@1749 | 1226 { |
kaf24@2314 | 1227 MEM_LOG("No FOREIGNDOM to reassign pfn %08lx to", pfn); |
djm@1749 | 1228 okay = 0; |
djm@1749 | 1229 break; |
djm@1749 | 1230 } |
djm@1749 | 1231 |
djm@1749 | 1232 /* |
djm@1749 | 1233 * Grab both page_list locks, in order. This prevents the page from |
djm@1749 | 1234 * disappearing elsewhere while we modify the owner, and we'll need |
djm@1749 | 1235 * both locks if we're successful so that we can change lists. |
djm@1749 | 1236 */ |
djm@1749 | 1237 if ( d < e ) |
djm@1749 | 1238 { |
djm@1749 | 1239 spin_lock(&d->page_alloc_lock); |
djm@1749 | 1240 spin_lock(&e->page_alloc_lock); |
djm@1749 | 1241 } |
djm@1749 | 1242 else |
djm@1749 | 1243 { |
djm@1749 | 1244 spin_lock(&e->page_alloc_lock); |
djm@1749 | 1245 spin_lock(&d->page_alloc_lock); |
djm@1749 | 1246 } |
djm@1749 | 1247 |
djm@1749 | 1248 /* A domain shouldn't have PGC_allocated pages when it is dying. */ |
cl349@2957 | 1249 if ( unlikely(test_bit(DF_DYING, &e->d_flags)) || |
djm@1749 | 1250 unlikely(IS_XEN_HEAP_FRAME(page)) ) |
djm@1749 | 1251 { |
kaf24@1871 | 1252 MEM_LOG("Reassignment page is Xen heap, or dest dom is dying."); |
djm@1749 | 1253 okay = 0; |
djm@1749 | 1254 goto reassign_fail; |
djm@1749 | 1255 } |
djm@1749 | 1256 |
djm@1749 | 1257 /* |
djm@1749 | 1258 * The tricky bit: atomically change owner while there is just one |
djm@1749 | 1259 * benign reference to the page (PGC_allocated). If that reference |
djm@1749 | 1260 * disappears then the deallocation routine will safely spin. |
djm@1749 | 1261 */ |
kaf24@1970 | 1262 nd = page->u.inuse.domain; |
kaf24@2384 | 1263 y = page->count_info; |
djm@1749 | 1264 do { |
djm@1749 | 1265 x = y; |
djm@1749 | 1266 if ( unlikely((x & (PGC_count_mask|PGC_allocated)) != |
djm@1749 | 1267 (1|PGC_allocated)) || |
djm@1749 | 1268 unlikely(nd != d) ) |
djm@1749 | 1269 { |
djm@1749 | 1270 MEM_LOG("Bad page values %08lx: ed=%p(%u), sd=%p," |
djm@1749 | 1271 " caf=%08x, taf=%08x\n", page_to_pfn(page), |
kaf24@2748 | 1272 d, d->id, nd, x, page->u.inuse.type_info); |
djm@1749 | 1273 okay = 0; |
djm@1749 | 1274 goto reassign_fail; |
djm@1749 | 1275 } |
djm@1749 | 1276 __asm__ __volatile__( |
djm@1749 | 1277 LOCK_PREFIX "cmpxchg8b %3" |
kaf24@2384 | 1278 : "=d" (nd), "=a" (y), "=c" (e), |
kaf24@2384 | 1279 "=m" (*(volatile u64 *)(&page->count_info)) |
kaf24@2384 | 1280 : "0" (d), "1" (x), "c" (e), "b" (x) ); |
djm@1749 | 1281 } |
djm@1749 | 1282 while ( unlikely(nd != d) || unlikely(y != x) ); |
djm@1749 | 1283 |
djm@1749 | 1284 /* |
djm@1749 | 1285 * Unlink from 'd'. We transferred at least one reference to 'e', so |
djm@1749 | 1286 * noone else is spinning to try to delete this page from 'd'. |
djm@1749 | 1287 */ |
djm@1749 | 1288 d->tot_pages--; |
djm@1749 | 1289 list_del(&page->list); |
djm@1749 | 1290 |
djm@1749 | 1291 /* |
djm@1749 | 1292 * Add the page to 'e'. Someone may already have removed the last |
djm@1749 | 1293 * reference and want to remove the page from 'e'. However, we have |
djm@1749 | 1294 * the lock so they'll spin waiting for us. |
djm@1749 | 1295 */ |
djm@1749 | 1296 if ( unlikely(e->tot_pages++ == 0) ) |
kaf24@2336 | 1297 get_knownalive_domain(e); |
djm@1749 | 1298 list_add_tail(&page->list, &e->page_list); |
djm@1749 | 1299 |
djm@1749 | 1300 reassign_fail: |
djm@1749 | 1301 spin_unlock(&d->page_alloc_lock); |
djm@1749 | 1302 spin_unlock(&e->page_alloc_lock); |
djm@1749 | 1303 break; |
djm@1749 | 1304 |
kaf24@2314 | 1305 case MMUEXT_CLEAR_FOREIGNDOM: |
kaf24@2314 | 1306 if ( (e = percpu_info[cpu].foreign) != NULL ) |
kaf24@2314 | 1307 put_domain(e); |
kaf24@2314 | 1308 percpu_info[cpu].foreign = NULL; |
djm@1749 | 1309 break; |
djm@1749 | 1310 |
djm@1749 | 1311 default: |
djm@1749 | 1312 MEM_LOG("Invalid extended pt command 0x%08lx", val & MMUEXT_CMD_MASK); |
djm@1749 | 1313 okay = 0; |
djm@1749 | 1314 break; |
djm@1749 | 1315 } |
djm@1749 | 1316 |
djm@1749 | 1317 return okay; |
djm@1749 | 1318 } |
djm@1749 | 1319 |
kaf24@3177 | 1320 int do_mmu_update( |
kaf24@3177 | 1321 mmu_update_t *ureqs, unsigned int count, unsigned int *pdone) |
kaf24@3177 | 1322 { |
kaf24@3177 | 1323 /* |
kaf24@3177 | 1324 * We steal the m.s.b. of the @count parameter to indicate whether this |
kaf24@3177 | 1325 * invocation of do_mmu_update() is resuming a previously preempted call. |
kaf24@3187 | 1326 * We steal the next 15 bits to remember the current FOREIGNDOM. |
kaf24@3177 | 1327 */ |
kaf24@3187 | 1328 #define MMU_UPDATE_PREEMPTED (~(~0U>>1)) |
kaf24@3187 | 1329 #define MMU_UPDATE_PREEMPT_FDOM_SHIFT ((sizeof(int)*8)-16) |
kaf24@3187 | 1330 #define MMU_UPDATE_PREEMPT_FDOM_MASK (0x7FFFU<<MMU_UPDATE_PREEMPT_FDOM_SHIFT) |
djm@1749 | 1331 |
djm@1749 | 1332 mmu_update_t req; |
djm@1749 | 1333 unsigned long va = 0, deferred_ops, pfn, prev_pfn = 0; |
djm@1749 | 1334 struct pfn_info *page; |
kaf24@3187 | 1335 int rc = 0, okay = 1, i = 0, cpu = smp_processor_id(); |
kaf24@3177 | 1336 unsigned int cmd, done = 0; |
djm@1749 | 1337 unsigned long prev_spfn = 0; |
djm@1749 | 1338 l1_pgentry_t *prev_spl1e = 0; |
cl349@2957 | 1339 struct exec_domain *ed = current; |
cl349@2957 | 1340 struct domain *d = ed->domain; |
kaf24@2466 | 1341 u32 type_info; |
kaf24@3187 | 1342 domid_t domid; |
djm@1749 | 1343 |
cl349@3036 | 1344 LOCK_BIGLOCK(d); |
cl349@3036 | 1345 |
kaf24@3517 | 1346 cleanup_writable_pagetable(d); |
kaf24@2375 | 1347 |
kaf24@3177 | 1348 /* |
kaf24@3177 | 1349 * If we are resuming after preemption, read how much work we have already |
kaf24@3177 | 1350 * done. This allows us to set the @done output parameter correctly. |
kaf24@3187 | 1351 * We also reset FOREIGNDOM here. |
kaf24@3177 | 1352 */ |
kaf24@3187 | 1353 if ( unlikely(count&(MMU_UPDATE_PREEMPTED|MMU_UPDATE_PREEMPT_FDOM_MASK)) ) |
kaf24@3177 | 1354 { |
kaf24@3187 | 1355 if ( !(count & MMU_UPDATE_PREEMPTED) ) |
kaf24@3187 | 1356 { |
kaf24@3187 | 1357 /* Count overflow into private FOREIGNDOM field. */ |
kaf24@3187 | 1358 MEM_LOG("do_mmu_update count is too large"); |
kaf24@3187 | 1359 rc = -EINVAL; |
kaf24@3187 | 1360 goto out; |
kaf24@3187 | 1361 } |
kaf24@3177 | 1362 count &= ~MMU_UPDATE_PREEMPTED; |
kaf24@3187 | 1363 domid = count >> MMU_UPDATE_PREEMPT_FDOM_SHIFT; |
kaf24@3187 | 1364 count &= ~MMU_UPDATE_PREEMPT_FDOM_MASK; |
kaf24@3177 | 1365 if ( unlikely(pdone != NULL) ) |
kaf24@3177 | 1366 (void)get_user(done, pdone); |
cl349@3193 | 1367 if ( (domid != current->domain->id) && |
kaf24@3187 | 1368 !do_extended_command(0, MMUEXT_SET_FOREIGNDOM | (domid << 16)) ) |
kaf24@3187 | 1369 { |
kaf24@3187 | 1370 rc = -EINVAL; |
kaf24@3187 | 1371 goto out; |
kaf24@3187 | 1372 } |
kaf24@3177 | 1373 } |
kaf24@3177 | 1374 |
kaf24@3269 | 1375 perfc_incrc(calls_to_mmu_update); |
kaf24@3269 | 1376 perfc_addc(num_page_updates, count); |
kaf24@3269 | 1377 |
kaf24@3177 | 1378 if ( unlikely(!array_access_ok(VERIFY_READ, ureqs, count, sizeof(req))) ) |
kaf24@3187 | 1379 { |
kaf24@3187 | 1380 rc = -EFAULT; |
kaf24@3187 | 1381 goto out; |
kaf24@3187 | 1382 } |
cl349@1860 | 1383 |
djm@1749 | 1384 for ( i = 0; i < count; i++ ) |
djm@1749 | 1385 { |
kaf24@3177 | 1386 if ( hypercall_preempt_check() ) |
kaf24@3177 | 1387 { |
kaf24@3187 | 1388 rc = hypercall_create_continuation( |
kaf24@3177 | 1389 __HYPERVISOR_mmu_update, 3, ureqs, |
kaf24@3187 | 1390 (count - i) | |
kaf24@3187 | 1391 (FOREIGNDOM->id << MMU_UPDATE_PREEMPT_FDOM_SHIFT) | |
kaf24@3187 | 1392 MMU_UPDATE_PREEMPTED, pdone); |
kaf24@3177 | 1393 break; |
kaf24@3177 | 1394 } |
kaf24@3129 | 1395 |
kaf24@2375 | 1396 if ( unlikely(__copy_from_user(&req, ureqs, sizeof(req)) != 0) ) |
djm@1749 | 1397 { |
kaf24@2375 | 1398 MEM_LOG("Bad __copy_from_user"); |
djm@1749 | 1399 rc = -EFAULT; |
djm@1749 | 1400 break; |
djm@1749 | 1401 } |
djm@1749 | 1402 |
djm@1749 | 1403 cmd = req.ptr & (sizeof(l1_pgentry_t)-1); |
djm@1749 | 1404 pfn = req.ptr >> PAGE_SHIFT; |
djm@1749 | 1405 |
djm@1749 | 1406 okay = 0; |
djm@1749 | 1407 |
djm@1749 | 1408 switch ( cmd ) |
djm@1749 | 1409 { |
djm@1749 | 1410 /* |
djm@1749 | 1411 * MMU_NORMAL_PT_UPDATE: Normal update to any level of page table. |
djm@1749 | 1412 */ |
djm@1749 | 1413 case MMU_NORMAL_PT_UPDATE: |
cl349@2957 | 1414 if ( unlikely(!get_page_from_pagenr(pfn, current->domain)) ) |
djm@1749 | 1415 { |
djm@1749 | 1416 MEM_LOG("Could not get page for normal update"); |
djm@1749 | 1417 break; |
djm@1749 | 1418 } |
djm@1749 | 1419 |
djm@1749 | 1420 if ( likely(prev_pfn == pfn) ) |
djm@1749 | 1421 { |
djm@1749 | 1422 va = (va & PAGE_MASK) | (req.ptr & ~PAGE_MASK); |
djm@1749 | 1423 } |
djm@1749 | 1424 else |
djm@1749 | 1425 { |
djm@1749 | 1426 if ( prev_pfn != 0 ) |
djm@1749 | 1427 unmap_domain_mem((void *)va); |
djm@1749 | 1428 va = (unsigned long)map_domain_mem(req.ptr); |
djm@1749 | 1429 prev_pfn = pfn; |
djm@1749 | 1430 } |
djm@1749 | 1431 |
djm@1749 | 1432 page = &frame_table[pfn]; |
kaf24@2466 | 1433 switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask ) |
djm@1749 | 1434 { |
djm@1749 | 1435 case PGT_l1_page_table: |
kaf24@2466 | 1436 if ( likely(get_page_type( |
kaf24@2466 | 1437 page, type_info & (PGT_type_mask|PGT_va_mask))) ) |
djm@1749 | 1438 { |
djm@1749 | 1439 okay = mod_l1_entry((l1_pgentry_t *)va, |
djm@1749 | 1440 mk_l1_pgentry(req.val)); |
djm@1749 | 1441 |
cl349@2957 | 1442 if ( unlikely(ed->mm.shadow_mode) && okay && |
cl349@2957 | 1443 (get_shadow_status(&ed->mm, page-frame_table) & |
djm@1749 | 1444 PSH_shadowed) ) |
djm@1749 | 1445 { |
kaf24@2375 | 1446 shadow_l1_normal_pt_update( |
kaf24@2375 | 1447 req.ptr, req.val, &prev_spfn, &prev_spl1e); |
cl349@2957 | 1448 put_shadow_status(&ed->mm); |
djm@1749 | 1449 } |
djm@1749 | 1450 |
djm@1749 | 1451 put_page_type(page); |
djm@1749 | 1452 } |
djm@1749 | 1453 break; |
djm@1749 | 1454 case PGT_l2_page_table: |
djm@1749 | 1455 if ( likely(get_page_type(page, PGT_l2_page_table)) ) |
djm@1749 | 1456 { |
djm@1749 | 1457 okay = mod_l2_entry((l2_pgentry_t *)va, |
djm@1749 | 1458 mk_l2_pgentry(req.val), |
djm@1749 | 1459 pfn); |
djm@1749 | 1460 |
cl349@2957 | 1461 if ( unlikely(ed->mm.shadow_mode) && okay && |
cl349@2957 | 1462 (get_shadow_status(&ed->mm, page-frame_table) & |
djm@1749 | 1463 PSH_shadowed) ) |
djm@1749 | 1464 { |
kaf24@2375 | 1465 shadow_l2_normal_pt_update(req.ptr, req.val); |
cl349@2957 | 1466 put_shadow_status(&ed->mm); |
djm@1749 | 1467 } |
djm@1749 | 1468 |
djm@1749 | 1469 put_page_type(page); |
djm@1749 | 1470 } |
djm@1749 | 1471 break; |
djm@1749 | 1472 default: |
kaf24@2375 | 1473 if ( likely(get_page_type(page, PGT_writable_page)) ) |
djm@1749 | 1474 { |
djm@1749 | 1475 *(unsigned long *)va = req.val; |
djm@1749 | 1476 okay = 1; |
djm@1749 | 1477 put_page_type(page); |
djm@1749 | 1478 } |
djm@1749 | 1479 break; |
djm@1749 | 1480 } |
djm@1749 | 1481 |
djm@1749 | 1482 put_page(page); |
djm@1749 | 1483 break; |
djm@1749 | 1484 |
djm@1749 | 1485 case MMU_MACHPHYS_UPDATE: |
kaf24@2314 | 1486 if ( unlikely(!get_page_from_pagenr(pfn, FOREIGNDOM)) ) |
djm@1749 | 1487 { |
djm@1749 | 1488 MEM_LOG("Could not get page for mach->phys update"); |
djm@1749 | 1489 break; |
djm@1749 | 1490 } |
djm@1749 | 1491 |
djm@1749 | 1492 machine_to_phys_mapping[pfn] = req.val; |
djm@1749 | 1493 okay = 1; |
djm@1749 | 1494 |
djm@1749 | 1495 /* |
djm@1749 | 1496 * If in log-dirty mode, mark the corresponding pseudo-physical |
djm@1749 | 1497 * page as dirty. |
djm@1749 | 1498 */ |
cl349@2957 | 1499 if ( unlikely(ed->mm.shadow_mode == SHM_logdirty) && |
cl349@2957 | 1500 mark_dirty(&ed->mm, pfn) ) |
cl349@2957 | 1501 ed->mm.shadow_dirty_block_count++; |
djm@1749 | 1502 |
djm@1749 | 1503 put_page(&frame_table[pfn]); |
djm@1749 | 1504 break; |
djm@1749 | 1505 |
djm@1749 | 1506 /* |
djm@1749 | 1507 * MMU_EXTENDED_COMMAND: Extended command is specified |
djm@1749 | 1508 * in the least-siginificant bits of the 'value' field. |
djm@1749 | 1509 */ |
djm@1749 | 1510 case MMU_EXTENDED_COMMAND: |
djm@1749 | 1511 req.ptr &= ~(sizeof(l1_pgentry_t) - 1); |
djm@1749 | 1512 okay = do_extended_command(req.ptr, req.val); |
djm@1749 | 1513 break; |
djm@1749 | 1514 |
djm@1749 | 1515 default: |
djm@1749 | 1516 MEM_LOG("Invalid page update command %08lx", req.ptr); |
djm@1749 | 1517 break; |
djm@1749 | 1518 } |
djm@1749 | 1519 |
djm@1749 | 1520 if ( unlikely(!okay) ) |
djm@1749 | 1521 { |
djm@1749 | 1522 rc = -EINVAL; |
djm@1749 | 1523 break; |
djm@1749 | 1524 } |
djm@1749 | 1525 |
djm@1749 | 1526 ureqs++; |
djm@1749 | 1527 } |
djm@1749 | 1528 |
kaf24@3187 | 1529 out: |
djm@1749 | 1530 if ( prev_pfn != 0 ) |
djm@1749 | 1531 unmap_domain_mem((void *)va); |
djm@1749 | 1532 |
kaf24@2375 | 1533 if ( unlikely(prev_spl1e != 0) ) |
djm@1749 | 1534 unmap_domain_mem((void *)prev_spl1e); |
djm@1749 | 1535 |
djm@1749 | 1536 deferred_ops = percpu_info[cpu].deferred_ops; |
djm@1749 | 1537 percpu_info[cpu].deferred_ops = 0; |
djm@1749 | 1538 |
djm@1749 | 1539 if ( deferred_ops & DOP_FLUSH_TLB ) |
djm@1749 | 1540 local_flush_tlb(); |
kaf24@2375 | 1541 |
djm@1749 | 1542 if ( deferred_ops & DOP_RELOAD_LDT ) |
djm@1749 | 1543 (void)map_ldt_shadow_page(0); |
djm@1749 | 1544 |
kaf24@2314 | 1545 if ( unlikely(percpu_info[cpu].foreign != NULL) ) |
djm@1749 | 1546 { |
kaf24@2314 | 1547 put_domain(percpu_info[cpu].foreign); |
kaf24@2314 | 1548 percpu_info[cpu].foreign = NULL; |
djm@1749 | 1549 } |
djm@1749 | 1550 |
kaf24@3177 | 1551 /* Add incremental work we have done to the @done output parameter. */ |
kaf24@3177 | 1552 if ( unlikely(pdone != NULL) ) |
kaf24@3177 | 1553 __put_user(done + i, pdone); |
djm@1749 | 1554 |
cl349@3036 | 1555 UNLOCK_BIGLOCK(d); |
djm@1749 | 1556 return rc; |
djm@1749 | 1557 } |
djm@1749 | 1558 |
djm@1749 | 1559 |
djm@1749 | 1560 int do_update_va_mapping(unsigned long page_nr, |
djm@1749 | 1561 unsigned long val, |
djm@1749 | 1562 unsigned long flags) |
djm@1749 | 1563 { |
cl349@2957 | 1564 struct exec_domain *ed = current; |
cl349@2957 | 1565 struct domain *d = ed->domain; |
djm@1749 | 1566 int err = 0; |
cl349@2957 | 1567 unsigned int cpu = ed->processor; |
djm@1749 | 1568 unsigned long deferred_ops; |
djm@1749 | 1569 |
djm@1749 | 1570 perfc_incrc(calls_to_update_va); |
djm@1749 | 1571 |
djm@1749 | 1572 if ( unlikely(page_nr >= (HYPERVISOR_VIRT_START >> PAGE_SHIFT)) ) |
djm@1749 | 1573 return -EINVAL; |
djm@1749 | 1574 |
cl349@3036 | 1575 LOCK_BIGLOCK(d); |
cl349@3036 | 1576 |
kaf24@3517 | 1577 cleanup_writable_pagetable(d); |
cl349@1879 | 1578 |
djm@1749 | 1579 /* |
djm@1749 | 1580 * XXX When we make this support 4MB superpages we should also deal with |
djm@1749 | 1581 * the case of updating L2 entries. |
djm@1749 | 1582 */ |
djm@1749 | 1583 |
djm@1749 | 1584 if ( unlikely(!mod_l1_entry(&linear_pg_table[page_nr], |
djm@1749 | 1585 mk_l1_pgentry(val))) ) |
djm@1749 | 1586 err = -EINVAL; |
djm@1749 | 1587 |
cl349@2957 | 1588 if ( unlikely(ed->mm.shadow_mode) ) |
djm@1749 | 1589 { |
djm@1749 | 1590 unsigned long sval; |
djm@1749 | 1591 |
cl349@2957 | 1592 l1pte_propagate_from_guest(&ed->mm, &val, &sval); |
djm@1749 | 1593 |
djm@1749 | 1594 if ( unlikely(__put_user(sval, ((unsigned long *)( |
djm@1749 | 1595 &shadow_linear_pg_table[page_nr])))) ) |
djm@1749 | 1596 { |
djm@1749 | 1597 /* |
djm@1749 | 1598 * Since L2's are guranteed RW, failure indicates the page was not |
djm@1749 | 1599 * shadowed, so ignore. |
djm@1749 | 1600 */ |
djm@1749 | 1601 perfc_incrc(shadow_update_va_fail); |
djm@1749 | 1602 } |
djm@1749 | 1603 |
djm@1749 | 1604 /* |
djm@1749 | 1605 * If we're in log-dirty mode then we need to note that we've updated |
djm@1749 | 1606 * the PTE in the PT-holding page. We need the machine frame number |
djm@1749 | 1607 * for this. |
djm@1749 | 1608 */ |
cl349@2957 | 1609 if ( ed->mm.shadow_mode == SHM_logdirty ) |
kaf24@2673 | 1610 mark_dirty(¤t->mm, va_to_l1mfn(page_nr << PAGE_SHIFT)); |
djm@1749 | 1611 |
cl349@2957 | 1612 check_pagetable(&ed->mm, ed->mm.pagetable, "va"); /* debug */ |
djm@1749 | 1613 } |
djm@1749 | 1614 |
djm@1749 | 1615 deferred_ops = percpu_info[cpu].deferred_ops; |
djm@1749 | 1616 percpu_info[cpu].deferred_ops = 0; |
djm@1749 | 1617 |
djm@1749 | 1618 if ( unlikely(deferred_ops & DOP_FLUSH_TLB) || |
djm@1749 | 1619 unlikely(flags & UVMF_FLUSH_TLB) ) |
djm@1749 | 1620 local_flush_tlb(); |
djm@1749 | 1621 else if ( unlikely(flags & UVMF_INVLPG) ) |
djm@1749 | 1622 __flush_tlb_one(page_nr << PAGE_SHIFT); |
djm@1749 | 1623 |
djm@1749 | 1624 if ( unlikely(deferred_ops & DOP_RELOAD_LDT) ) |
djm@1749 | 1625 (void)map_ldt_shadow_page(0); |
djm@1749 | 1626 |
cl349@3036 | 1627 UNLOCK_BIGLOCK(d); |
cl349@3036 | 1628 |
djm@1749 | 1629 return err; |
djm@1749 | 1630 } |
djm@1749 | 1631 |
djm@1749 | 1632 int do_update_va_mapping_otherdomain(unsigned long page_nr, |
djm@1749 | 1633 unsigned long val, |
djm@1749 | 1634 unsigned long flags, |
djm@1749 | 1635 domid_t domid) |
djm@1749 | 1636 { |
djm@1749 | 1637 unsigned int cpu = smp_processor_id(); |
djm@1749 | 1638 struct domain *d; |
djm@1749 | 1639 int rc; |
djm@1749 | 1640 |
cl349@2957 | 1641 if ( unlikely(!IS_PRIV(current->domain)) ) |
djm@1749 | 1642 return -EPERM; |
djm@1749 | 1643 |
kaf24@2314 | 1644 percpu_info[cpu].foreign = d = find_domain_by_id(domid); |
djm@1749 | 1645 if ( unlikely(d == NULL) ) |
djm@1749 | 1646 { |
djm@1749 | 1647 MEM_LOG("Unknown domain '%u'", domid); |
djm@1749 | 1648 return -ESRCH; |
djm@1749 | 1649 } |
djm@1749 | 1650 |
djm@1749 | 1651 rc = do_update_va_mapping(page_nr, val, flags); |
djm@1749 | 1652 |
djm@1749 | 1653 put_domain(d); |
kaf24@2314 | 1654 percpu_info[cpu].foreign = NULL; |
djm@1749 | 1655 |
djm@1749 | 1656 return rc; |
djm@1749 | 1657 } |
cl349@1879 | 1658 |
cl349@1879 | 1659 |
cl349@1921 | 1660 |
kaf24@2382 | 1661 /************************* |
kaf24@2382 | 1662 * Writable Pagetables |
kaf24@2382 | 1663 */ |
cl349@2093 | 1664 |
kaf24@2663 | 1665 ptwr_info_t ptwr_info[NR_CPUS]; |
cl349@1894 | 1666 |
kaf24@2097 | 1667 #ifdef VERBOSE |
cl349@2496 | 1668 int ptwr_debug = 0x0; |
kaf24@2654 | 1669 #define PTWR_PRINTK(_f, _a...) \ |
kaf24@2654 | 1670 do { if ( unlikely(ptwr_debug) ) printk( _f , ## _a ); } while ( 0 ) |
cl349@2652 | 1671 #define PTWR_PRINT_WHICH (which ? 'I' : 'A') |
cl349@2093 | 1672 #else |
kaf24@2654 | 1673 #define PTWR_PRINTK(_f, _a...) ((void)0) |
cl349@2093 | 1674 #endif |
cl349@1879 | 1675 |
kaf24@2663 | 1676 /* Flush the given writable p.t. page and write-protect it again. */ |
cl349@2512 | 1677 void ptwr_flush(const int which) |
cl349@1879 | 1678 { |
kaf24@2663 | 1679 unsigned long sstat, spte, pte, *ptep, l1va; |
kaf24@2663 | 1680 l1_pgentry_t *sl1e = NULL, *pl1e, ol1e, nl1e; |
kaf24@3022 | 1681 l2_pgentry_t *pl2e; |
kaf24@2663 | 1682 int i, cpu = smp_processor_id(); |
cl349@2957 | 1683 struct exec_domain *ed = current; |
cl349@2957 | 1684 struct domain *d = ed->domain; |
cl349@1879 | 1685 |
iap10@2640 | 1686 l1va = ptwr_info[cpu].ptinfo[which].l1va; |
cl349@2644 | 1687 ptep = (unsigned long *)&linear_pg_table[l1va>>PAGE_SHIFT]; |
cl349@1913 | 1688 |
kaf24@2663 | 1689 /* |
kaf24@2663 | 1690 * STEP 1. Write-protect the p.t. page so no more updates can occur. |
kaf24@2663 | 1691 */ |
kaf24@2663 | 1692 |
kaf24@2663 | 1693 if ( unlikely(__get_user(pte, ptep)) ) |
kaf24@2663 | 1694 { |
cl349@2512 | 1695 MEM_LOG("ptwr: Could not read pte at %p\n", ptep); |
kaf24@2707 | 1696 /* |
kaf24@2707 | 1697 * Really a bug. We could read this PTE during the initial fault, |
kaf24@2841 | 1698 * and pagetables can't have changed meantime. XXX Multi-CPU guests? |
kaf24@2707 | 1699 */ |
kaf24@2707 | 1700 BUG(); |
cl349@2414 | 1701 } |
kaf24@2654 | 1702 PTWR_PRINTK("[%c] disconnected_l1va at %p is %08lx\n", |
kaf24@2654 | 1703 PTWR_PRINT_WHICH, ptep, pte); |
cl349@2631 | 1704 pte &= ~_PAGE_RW; |
iap10@2640 | 1705 |
cl349@2957 | 1706 if ( unlikely(ed->mm.shadow_mode) ) |
kaf24@2663 | 1707 { |
kaf24@2663 | 1708 /* Write-protect the p.t. page in the shadow page table. */ |
cl349@2957 | 1709 l1pte_propagate_from_guest(&ed->mm, &pte, &spte); |
kaf24@2663 | 1710 __put_user( |
kaf24@2663 | 1711 spte, (unsigned long *)&shadow_linear_pg_table[l1va>>PAGE_SHIFT]); |
kaf24@2663 | 1712 |
kaf24@2663 | 1713 /* Is the p.t. page itself shadowed? Map it into Xen space if so. */ |
cl349@2957 | 1714 sstat = get_shadow_status(&ed->mm, pte >> PAGE_SHIFT); |
kaf24@2663 | 1715 if ( sstat & PSH_shadowed ) |
kaf24@2663 | 1716 sl1e = map_domain_mem((sstat & PSH_pfn_mask) << PAGE_SHIFT); |
iap10@2640 | 1717 } |
iap10@2640 | 1718 |
kaf24@2663 | 1719 /* Write-protect the p.t. page in the guest page table. */ |
kaf24@2663 | 1720 if ( unlikely(__put_user(pte, ptep)) ) |
kaf24@2663 | 1721 { |
cl349@2512 | 1722 MEM_LOG("ptwr: Could not update pte at %p\n", ptep); |
kaf24@2707 | 1723 /* |
kaf24@2707 | 1724 * Really a bug. We could write this PTE during the initial fault, |
kaf24@2841 | 1725 * and pagetables can't have changed meantime. XXX Multi-CPU guests? |
kaf24@2707 | 1726 */ |
kaf24@2707 | 1727 BUG(); |
cl349@2414 | 1728 } |
kaf24@2663 | 1729 |
kaf24@2663 | 1730 /* Ensure that there are no stale writable mappings in any TLB. */ |
kaf24@2841 | 1731 /* NB. INVLPG is a serialising instruction: flushes pending updates. */ |
cl349@3325 | 1732 #if 1 |
kaf24@2841 | 1733 __flush_tlb_one(l1va); /* XXX Multi-CPU guests? */ |
cl349@3036 | 1734 #else |
cl349@3036 | 1735 flush_tlb_all(); |
cl349@3036 | 1736 #endif |
kaf24@2654 | 1737 PTWR_PRINTK("[%c] disconnected_l1va at %p now %08lx\n", |
kaf24@2654 | 1738 PTWR_PRINT_WHICH, ptep, pte); |
cl349@2631 | 1739 |
kaf24@2663 | 1740 /* |
kaf24@2663 | 1741 * STEP 2. Validate any modified PTEs. |
kaf24@2663 | 1742 */ |
kaf24@2663 | 1743 |
cl349@2631 | 1744 pl1e = ptwr_info[cpu].ptinfo[which].pl1e; |
kaf24@2663 | 1745 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) |
kaf24@2663 | 1746 { |
cl349@2631 | 1747 ol1e = ptwr_info[cpu].ptinfo[which].page[i]; |
cl349@2631 | 1748 nl1e = pl1e[i]; |
kaf24@2663 | 1749 |
kaf24@2663 | 1750 if ( likely(l1_pgentry_val(ol1e) == l1_pgentry_val(nl1e)) ) |
cl349@2631 | 1751 continue; |
kaf24@2663 | 1752 |
kaf24@2663 | 1753 /* |
kaf24@2663 | 1754 * Fast path for PTEs that have merely been write-protected |
kaf24@2663 | 1755 * (e.g., during a Unix fork()). A strict reduction in privilege. |
kaf24@2663 | 1756 */ |
kaf24@2663 | 1757 if ( likely(l1_pgentry_val(ol1e) == (l1_pgentry_val(nl1e)|_PAGE_RW)) ) |
kaf24@2663 | 1758 { |
kaf24@2663 | 1759 if ( likely(l1_pgentry_val(nl1e) & _PAGE_PRESENT) ) |
kaf24@2663 | 1760 { |
kaf24@2663 | 1761 if ( unlikely(sl1e != NULL) ) |
kaf24@2673 | 1762 l1pte_propagate_from_guest( |
cl349@2957 | 1763 &ed->mm, &l1_pgentry_val(nl1e), |
kaf24@2663 | 1764 &l1_pgentry_val(sl1e[i])); |
kaf24@2663 | 1765 put_page_type(&frame_table[l1_pgentry_to_pagenr(nl1e)]); |
kaf24@2663 | 1766 } |
cl349@2644 | 1767 continue; |
kaf24@2663 | 1768 } |
kaf24@2663 | 1769 |
kaf24@2663 | 1770 if ( unlikely(!get_page_from_l1e(nl1e, d)) ) |
kaf24@2663 | 1771 { |
cl349@2631 | 1772 MEM_LOG("ptwr: Could not re-validate l1 page\n"); |
kaf24@2707 | 1773 /* |
kaf24@2707 | 1774 * Make the remaining p.t's consistent before crashing, so the |
kaf24@2707 | 1775 * reference counts are correct. |
kaf24@2707 | 1776 */ |
kaf24@2707 | 1777 memcpy(&pl1e[i], &ptwr_info[cpu].ptinfo[which].page[i], |
kaf24@2707 | 1778 (ENTRIES_PER_L1_PAGETABLE - i) * sizeof(l1_pgentry_t)); |
cl349@2708 | 1779 unmap_domain_mem(pl1e); |
cl349@2708 | 1780 ptwr_info[cpu].ptinfo[which].l1va = 0; |
cl349@3036 | 1781 UNLOCK_BIGLOCK(d); |
cl349@2631 | 1782 domain_crash(); |
cl349@2631 | 1783 } |
kaf24@2663 | 1784 |
kaf24@2663 | 1785 if ( unlikely(sl1e != NULL) ) |
kaf24@2673 | 1786 l1pte_propagate_from_guest( |
cl349@2957 | 1787 &ed->mm, &l1_pgentry_val(nl1e), &l1_pgentry_val(sl1e[i])); |
kaf24@2663 | 1788 |
kaf24@2663 | 1789 if ( unlikely(l1_pgentry_val(ol1e) & _PAGE_PRESENT) ) |
kaf24@2663 | 1790 put_page_from_l1e(ol1e, d); |
cl349@2631 | 1791 } |
cl349@2631 | 1792 unmap_domain_mem(pl1e); |
cl349@2631 | 1793 |
kaf24@2663 | 1794 /* |
kaf24@2663 | 1795 * STEP 3. Reattach the L1 p.t. page into the current address space. |
kaf24@2663 | 1796 */ |
kaf24@2663 | 1797 |
cl349@2957 | 1798 if ( (which == PTWR_PT_ACTIVE) && likely(!ed->mm.shadow_mode) ) |
kaf24@2663 | 1799 { |
kaf24@2663 | 1800 pl2e = &linear_l2_table[ptwr_info[cpu].ptinfo[which].l2_idx]; |
kaf24@3022 | 1801 *pl2e = mk_l2_pgentry(l2_pgentry_val(*pl2e) | _PAGE_PRESENT); |
cl349@2631 | 1802 } |
iap10@2509 | 1803 |
kaf24@2663 | 1804 /* |
kaf24@2663 | 1805 * STEP 4. Final tidy-up. |
kaf24@2663 | 1806 */ |
iap10@2509 | 1807 |
cl349@2512 | 1808 ptwr_info[cpu].ptinfo[which].l1va = 0; |
kaf24@2663 | 1809 |
kaf24@2663 | 1810 if ( unlikely(sl1e != NULL) ) |
kaf24@2663 | 1811 { |
kaf24@2663 | 1812 unmap_domain_mem(sl1e); |
cl349@2957 | 1813 put_shadow_status(&ed->mm); |
kaf24@2663 | 1814 } |
cl349@1879 | 1815 } |
cl349@1879 | 1816 |
kaf24@2663 | 1817 /* Write page fault handler: check if guest is trying to modify a PTE. */ |
cl349@1879 | 1818 int ptwr_do_page_fault(unsigned long addr) |
cl349@1879 | 1819 { |
kaf24@3022 | 1820 unsigned long pte, pfn, l2e; |
cl349@1879 | 1821 struct pfn_info *page; |
kaf24@3022 | 1822 l2_pgentry_t *pl2e; |
kaf24@2663 | 1823 int which, cpu = smp_processor_id(); |
kaf24@2663 | 1824 u32 l2_idx; |
iap10@2458 | 1825 |
kaf24@2663 | 1826 /* |
kaf24@2663 | 1827 * Attempt to read the PTE that maps the VA being accessed. By checking for |
kaf24@2663 | 1828 * PDE validity in the L2 we avoid many expensive fixups in __get_user(). |
kaf24@2663 | 1829 */ |
kaf24@2663 | 1830 if ( !(l2_pgentry_val(linear_l2_table[addr>>L2_PAGETABLE_SHIFT]) & |
kaf24@2663 | 1831 _PAGE_PRESENT) || |
kaf24@2663 | 1832 __get_user(pte, (unsigned long *)&linear_pg_table[addr>>PAGE_SHIFT]) ) |
cl349@3036 | 1833 { |
kaf24@2663 | 1834 return 0; |
cl349@3036 | 1835 } |
iap10@2509 | 1836 |
kaf24@2663 | 1837 pfn = pte >> PAGE_SHIFT; |
kaf24@2663 | 1838 page = &frame_table[pfn]; |
cl349@1915 | 1839 |
kaf24@2663 | 1840 /* We are looking only for read-only mappings of p.t. pages. */ |
kaf24@2663 | 1841 if ( ((pte & (_PAGE_RW | _PAGE_PRESENT)) != _PAGE_PRESENT) || |
kaf24@2663 | 1842 ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ) |
cl349@3036 | 1843 { |
kaf24@2663 | 1844 return 0; |
cl349@3036 | 1845 } |
kaf24@2663 | 1846 |
kaf24@2663 | 1847 /* Get the L2 index at which this L1 p.t. is always mapped. */ |
kaf24@2663 | 1848 l2_idx = page->u.inuse.type_info & PGT_va_mask; |
kaf24@2663 | 1849 if ( unlikely(l2_idx >= PGT_va_unknown) ) |
cl349@3036 | 1850 { |
kaf24@2663 | 1851 domain_crash(); /* Urk! This L1 is mapped in multiple L2 slots! */ |
cl349@3036 | 1852 } |
kaf24@2663 | 1853 l2_idx >>= PGT_va_shift; |
kaf24@3022 | 1854 |
kaf24@3022 | 1855 if ( l2_idx == (addr >> L2_PAGETABLE_SHIFT) ) |
kaf24@3022 | 1856 { |
kaf24@3022 | 1857 MEM_LOG("PTWR failure! Pagetable maps itself at %08lx\n", addr); |
kaf24@3022 | 1858 domain_crash(); |
kaf24@3022 | 1859 } |
kaf24@3022 | 1860 |
kaf24@2663 | 1861 /* |
kaf24@2663 | 1862 * Is the L1 p.t. mapped into the current address space? If so we call it |
kaf24@2663 | 1863 * an ACTIVE p.t., otherwise it is INACTIVE. |
kaf24@2663 | 1864 */ |
kaf24@2663 | 1865 pl2e = &linear_l2_table[l2_idx]; |
kaf24@3022 | 1866 l2e = l2_pgentry_val(*pl2e); |
kaf24@3022 | 1867 which = PTWR_PT_INACTIVE; |
kaf24@3022 | 1868 if ( (l2e >> PAGE_SHIFT) == pfn ) |
kaf24@3022 | 1869 { |
cl349@3179 | 1870 /* Check the PRESENT bit to set ACTIVE. */ |
kaf24@3022 | 1871 if ( likely(l2e & _PAGE_PRESENT) ) |
kaf24@3022 | 1872 which = PTWR_PT_ACTIVE; |
cl349@3179 | 1873 else { |
cl349@3179 | 1874 /* |
cl349@3179 | 1875 * If the PRESENT bit is clear, we may be conflicting with |
cl349@3179 | 1876 * the current ACTIVE p.t. (it may be the same p.t. mapped |
cl349@3179 | 1877 * at another virt addr). |
cl349@3179 | 1878 * The ptwr_flush call below will restore the PRESENT bit. |
cl349@3179 | 1879 */ |
cl349@3179 | 1880 if ( ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va && |
cl349@3179 | 1881 l2_idx == ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx ) |
cl349@3179 | 1882 which = PTWR_PT_ACTIVE; |
cl349@3179 | 1883 } |
kaf24@3022 | 1884 } |
kaf24@2663 | 1885 |
kaf24@2663 | 1886 PTWR_PRINTK("[%c] page_fault on l1 pt at va %08lx, pt for %08x, " |
kaf24@2663 | 1887 "pfn %08lx\n", PTWR_PRINT_WHICH, |
kaf24@2663 | 1888 addr, l2_idx << L2_PAGETABLE_SHIFT, pfn); |
kaf24@2663 | 1889 |
kaf24@2663 | 1890 /* |
kaf24@2663 | 1891 * We only allow one ACTIVE and one INACTIVE p.t. to be updated at at |
kaf24@2663 | 1892 * time. If there is already one, we must flush it out. |
kaf24@2663 | 1893 */ |
kaf24@2663 | 1894 if ( ptwr_info[cpu].ptinfo[which].l1va ) |
kaf24@2663 | 1895 ptwr_flush(which); |
iap10@2507 | 1896 |
kaf24@2663 | 1897 ptwr_info[cpu].ptinfo[which].l1va = addr | 1; |
kaf24@2663 | 1898 ptwr_info[cpu].ptinfo[which].l2_idx = l2_idx; |
kaf24@2663 | 1899 |
kaf24@2663 | 1900 /* For safety, disconnect the L1 p.t. page from current space. */ |
kaf24@2663 | 1901 if ( (which == PTWR_PT_ACTIVE) && likely(!current->mm.shadow_mode) ) |
kaf24@2663 | 1902 { |
kaf24@3022 | 1903 *pl2e = mk_l2_pgentry(l2e & ~_PAGE_PRESENT); |
cl349@3325 | 1904 #if 1 |
kaf24@2841 | 1905 flush_tlb(); /* XXX Multi-CPU guests? */ |
cl349@3036 | 1906 #else |
cl349@3036 | 1907 flush_tlb_all(); |
cl349@3036 | 1908 #endif |
cl349@1879 | 1909 } |
kaf24@2663 | 1910 |
kaf24@2663 | 1911 /* Temporarily map the L1 page, and make a copy of it. */ |
kaf24@2663 | 1912 ptwr_info[cpu].ptinfo[which].pl1e = map_domain_mem(pfn << PAGE_SHIFT); |
kaf24@2663 | 1913 memcpy(ptwr_info[cpu].ptinfo[which].page, |
kaf24@2663 | 1914 ptwr_info[cpu].ptinfo[which].pl1e, |
kaf24@2663 | 1915 ENTRIES_PER_L1_PAGETABLE * sizeof(l1_pgentry_t)); |
kaf24@2663 | 1916 |
kaf24@2663 | 1917 /* Finally, make the p.t. page writable by the guest OS. */ |
kaf24@2663 | 1918 pte |= _PAGE_RW; |
kaf24@2663 | 1919 PTWR_PRINTK("[%c] update %p pte to %08lx\n", PTWR_PRINT_WHICH, |
kaf24@2663 | 1920 &linear_pg_table[addr>>PAGE_SHIFT], pte); |
kaf24@2663 | 1921 if ( unlikely(__put_user(pte, (unsigned long *) |
kaf24@2663 | 1922 &linear_pg_table[addr>>PAGE_SHIFT])) ) |
kaf24@2663 | 1923 { |
kaf24@2663 | 1924 MEM_LOG("ptwr: Could not update pte at %p\n", (unsigned long *) |
kaf24@2663 | 1925 &linear_pg_table[addr>>PAGE_SHIFT]); |
kaf24@2707 | 1926 /* Toss the writable pagetable state and crash. */ |
kaf24@2707 | 1927 unmap_domain_mem(ptwr_info[cpu].ptinfo[which].pl1e); |
kaf24@2707 | 1928 ptwr_info[cpu].ptinfo[which].l1va = 0; |
kaf24@2663 | 1929 domain_crash(); |
kaf24@2663 | 1930 } |
kaf24@2663 | 1931 |
kaf24@3090 | 1932 return EXCRET_fault_fixed; |
cl349@1879 | 1933 } |
cl349@1894 | 1934 |
kaf24@2504 | 1935 static __init int ptwr_init(void) |
kaf24@2504 | 1936 { |
kaf24@2504 | 1937 int i; |
kaf24@2504 | 1938 |
kaf24@2504 | 1939 for ( i = 0; i < smp_num_cpus; i++ ) |
kaf24@2504 | 1940 { |
cl349@2512 | 1941 ptwr_info[i].ptinfo[PTWR_PT_ACTIVE].page = |
cl349@2512 | 1942 (void *)alloc_xenheap_page(); |
cl349@2512 | 1943 ptwr_info[i].ptinfo[PTWR_PT_INACTIVE].page = |
cl349@2512 | 1944 (void *)alloc_xenheap_page(); |
kaf24@2504 | 1945 } |
kaf24@2504 | 1946 |
kaf24@2504 | 1947 return 0; |
kaf24@2504 | 1948 } |
kaf24@2504 | 1949 __initcall(ptwr_init); |
kaf24@2504 | 1950 |
kaf24@2663 | 1951 |
kaf24@2663 | 1952 |
kaf24@2663 | 1953 |
kaf24@2663 | 1954 /************************************************************************/ |
kaf24@2663 | 1955 /************************************************************************/ |
kaf24@2663 | 1956 /************************************************************************/ |
kaf24@2663 | 1957 |
cl349@2092 | 1958 #ifndef NDEBUG |
kaf24@2663 | 1959 |
cl349@1894 | 1960 void ptwr_status(void) |
cl349@1894 | 1961 { |
cl349@2512 | 1962 unsigned long pte, *ptep, pfn; |
cl349@1894 | 1963 struct pfn_info *page; |
cl349@1894 | 1964 int cpu = smp_processor_id(); |
cl349@1894 | 1965 |
cl349@2512 | 1966 ptep = (unsigned long *)&linear_pg_table |
cl349@2512 | 1967 [ptwr_info[cpu].ptinfo[PTWR_PT_INACTIVE].l1va>>PAGE_SHIFT]; |
kaf24@2237 | 1968 |
cl349@2512 | 1969 if ( __get_user(pte, ptep) ) { |
cl349@2512 | 1970 MEM_LOG("ptwr: Could not read pte at %p\n", ptep); |
cl349@2495 | 1971 domain_crash(); |
cl349@1894 | 1972 } |
cl349@1894 | 1973 |
cl349@2495 | 1974 pfn = pte >> PAGE_SHIFT; |
cl349@2495 | 1975 page = &frame_table[pfn]; |
cl349@2495 | 1976 printk("need to alloc l1 page %p\n", page); |
cl349@2495 | 1977 /* make pt page writable */ |
cl349@2495 | 1978 printk("need to make read-only l1-page at %p is %08lx\n", |
cl349@2512 | 1979 ptep, pte); |
cl349@2495 | 1980 |
cl349@2512 | 1981 if ( ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va == 0 ) |
cl349@1894 | 1982 return; |
cl349@1894 | 1983 |
cl349@2512 | 1984 if ( __get_user(pte, (unsigned long *) |
cl349@2512 | 1985 ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) ) { |
cl349@2491 | 1986 MEM_LOG("ptwr: Could not read pte at %p\n", (unsigned long *) |
cl349@2512 | 1987 ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va); |
cl349@2491 | 1988 domain_crash(); |
cl349@2414 | 1989 } |
cl349@1894 | 1990 pfn = pte >> PAGE_SHIFT; |
cl349@1894 | 1991 page = &frame_table[pfn]; |
cl349@1894 | 1992 } |
iap10@2479 | 1993 |
kaf24@2637 | 1994 void audit_domain(struct domain *d) |
iap10@2479 | 1995 { |
iap10@2595 | 1996 int ttot=0, ctot=0, io_mappings=0, lowmem_mappings=0; |
kaf24@2637 | 1997 |
kaf24@2637 | 1998 void adjust (struct pfn_info *page, int dir, int adjtype) |
iap10@2479 | 1999 { |
cl349@2491 | 2000 int count = page->count_info & PGC_count_mask; |
iap10@2479 | 2001 |
cl349@2491 | 2002 if ( adjtype ) |
cl349@2491 | 2003 { |
cl349@2491 | 2004 int tcount = page->u.inuse.type_info & PGT_count_mask; |
cl349@2491 | 2005 |
cl349@2491 | 2006 ttot++; |
iap10@2479 | 2007 |
cl349@2491 | 2008 tcount += dir; |
iap10@2479 | 2009 |
cl349@2491 | 2010 if ( tcount < 0 ) |
cl349@2491 | 2011 { |
cl349@2644 | 2012 /* This will only come out once. */ |
kaf24@2637 | 2013 printk("Audit %d: type count whent below zero pfn=%x " |
kaf24@2637 | 2014 "taf=%x otaf=%x\n", |
kaf24@2748 | 2015 d->id, page-frame_table, |
cl349@2491 | 2016 page->u.inuse.type_info, |
cl349@2491 | 2017 page->tlbflush_timestamp); |
cl349@2491 | 2018 } |
cl349@2491 | 2019 |
cl349@2491 | 2020 page->u.inuse.type_info = |
iap10@2573 | 2021 (page->u.inuse.type_info & ~PGT_count_mask) | |
cl349@2644 | 2022 (tcount & PGT_count_mask); |
cl349@2491 | 2023 } |
iap10@2479 | 2024 |
cl349@2491 | 2025 ctot++; |
cl349@2491 | 2026 count += dir; |
cl349@2491 | 2027 if ( count < 0 ) |
cl349@2491 | 2028 { |
cl349@2644 | 2029 /* This will only come out once. */ |
kaf24@2637 | 2030 printk("Audit %d: general count whent below zero pfn=%x " |
kaf24@2637 | 2031 "taf=%x otaf=%x\n", |
kaf24@2748 | 2032 d->id, page-frame_table, |
cl349@2491 | 2033 page->u.inuse.type_info, |
cl349@2491 | 2034 page->tlbflush_timestamp); |
cl349@2491 | 2035 } |
cl349@2491 | 2036 |
cl349@2491 | 2037 page->count_info = |
iap10@2573 | 2038 (page->count_info & ~PGC_count_mask) | |
cl349@2644 | 2039 (count & PGC_count_mask); |
iap10@2479 | 2040 |
iap10@2479 | 2041 } |
iap10@2479 | 2042 |
kaf24@2637 | 2043 void scan_for_pfn(struct domain *d, unsigned long xpfn) |
iap10@2479 | 2044 { |
kaf24@2637 | 2045 unsigned long pfn, *pt; |
cl349@2491 | 2046 struct list_head *list_ent; |
kaf24@2637 | 2047 struct pfn_info *page; |
cl349@2491 | 2048 int i; |
iap10@2479 | 2049 |
iap10@2479 | 2050 list_ent = d->page_list.next; |
cl349@2491 | 2051 for ( i = 0; (list_ent != &d->page_list); i++ ) |
cl349@2491 | 2052 { |
cl349@2491 | 2053 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table; |
cl349@2491 | 2054 page = &frame_table[pfn]; |
cl349@2491 | 2055 |
kaf24@2637 | 2056 switch ( page->u.inuse.type_info & PGT_type_mask ) |
cl349@2491 | 2057 { |
kaf24@2637 | 2058 case PGT_l1_page_table: |
kaf24@2637 | 2059 case PGT_l2_page_table: |
kaf24@2637 | 2060 pt = map_domain_mem(pfn<<PAGE_SHIFT); |
cl349@2491 | 2061 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) |
kaf24@2637 | 2062 if ( (pt[i] & _PAGE_PRESENT) && |
kaf24@2637 | 2063 ((pt[i] >> PAGE_SHIFT) == xpfn) ) |
kaf24@2637 | 2064 printk(" found dom=%d i=%x pfn=%lx t=%x c=%x\n", |
kaf24@2748 | 2065 d->id, i, pfn, page->u.inuse.type_info, |
kaf24@2637 | 2066 page->count_info); |
cl349@2491 | 2067 unmap_domain_mem(pt); |
cl349@2491 | 2068 } |
iap10@2479 | 2069 |
cl349@2491 | 2070 list_ent = frame_table[pfn].list.next; |
cl349@2491 | 2071 } |
iap10@2479 | 2072 |
iap10@2479 | 2073 } |
iap10@2479 | 2074 |
kaf24@2637 | 2075 void scan_for_pfn_remote(unsigned long xpfn) |
iap10@2479 | 2076 { |
cl349@2491 | 2077 struct domain *e; |
cl349@2491 | 2078 for_each_domain ( e ) |
cl349@2491 | 2079 scan_for_pfn( e, xpfn ); |
iap10@2479 | 2080 } |
iap10@2479 | 2081 |
iap10@2479 | 2082 int i; |
iap10@2479 | 2083 unsigned long pfn; |
iap10@2479 | 2084 struct list_head *list_ent; |
kaf24@2637 | 2085 struct pfn_info *page; |
iap10@2479 | 2086 |
cl349@3036 | 2087 if ( d != current->domain ) |
cl349@2491 | 2088 domain_pause(d); |
iap10@2479 | 2089 synchronise_pagetables(~0UL); |
iap10@2479 | 2090 |
iap10@2479 | 2091 printk("pt base=%lx sh_info=%x\n", |
cl349@3036 | 2092 pagetable_val(d->exec_domain[0]->mm.pagetable)>>PAGE_SHIFT, |
cl349@2491 | 2093 virt_to_page(d->shared_info)-frame_table); |
cl349@2491 | 2094 |
iap10@2479 | 2095 spin_lock(&d->page_alloc_lock); |
iap10@2479 | 2096 |
kaf24@2637 | 2097 /* PHASE 0 */ |
iap10@2479 | 2098 |
iap10@2479 | 2099 list_ent = d->page_list.next; |
iap10@2479 | 2100 for ( i = 0; (list_ent != &d->page_list); i++ ) |
iap10@2479 | 2101 { |
cl349@2491 | 2102 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table; |
cl349@2491 | 2103 page = &frame_table[pfn]; |
iap10@2479 | 2104 |
cl349@2491 | 2105 if ( page->u.inuse.domain != d ) |
cl349@2491 | 2106 BUG(); |
iap10@2479 | 2107 |
cl349@2491 | 2108 if ( (page->u.inuse.type_info & PGT_count_mask) > |
cl349@2491 | 2109 (page->count_info & PGC_count_mask) ) |
cl349@2491 | 2110 printk("taf > caf %x %x pfn=%lx\n", |
cl349@2491 | 2111 page->u.inuse.type_info, page->count_info, pfn ); |
iap10@2479 | 2112 |
kaf24@2637 | 2113 #if 0 /* SYSV shared memory pages plus writeable files. */ |
cl349@2491 | 2114 if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page && |
cl349@2491 | 2115 (page->u.inuse.type_info & PGT_count_mask) > 1 ) |
cl349@2491 | 2116 { |
cl349@2491 | 2117 printk("writeable page with type count >1: pfn=%lx t=%x c=%x\n", |
cl349@2491 | 2118 pfn, |
cl349@2491 | 2119 page->u.inuse.type_info, |
cl349@2491 | 2120 page->count_info ); |
cl349@2491 | 2121 scan_for_pfn_remote(pfn); |
cl349@2491 | 2122 } |
cl349@2092 | 2123 #endif |
cl349@2491 | 2124 if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_none && |
cl349@2491 | 2125 (page->u.inuse.type_info & PGT_count_mask) > 1 ) |
cl349@2491 | 2126 { |
cl349@2491 | 2127 printk("normal page with type count >1: pfn=%lx t=%x c=%x\n", |
cl349@2491 | 2128 pfn, |
cl349@2491 | 2129 page->u.inuse.type_info, |
cl349@2491 | 2130 page->count_info ); |
cl349@2491 | 2131 } |
iap10@2479 | 2132 |
kaf24@2637 | 2133 /* Use tlbflush_timestamp to store original type_info. */ |
cl349@2491 | 2134 page->tlbflush_timestamp = page->u.inuse.type_info; |
iap10@2479 | 2135 |
cl349@2491 | 2136 list_ent = frame_table[pfn].list.next; |
iap10@2479 | 2137 } |
iap10@2479 | 2138 |
iap10@2479 | 2139 |
kaf24@2637 | 2140 /* PHASE 1 */ |
iap10@2479 | 2141 |
cl349@3036 | 2142 adjust(&frame_table[pagetable_val(d->exec_domain[0]->mm.pagetable)>>PAGE_SHIFT], -1, 1); |
iap10@2479 | 2143 |
iap10@2479 | 2144 list_ent = d->page_list.next; |
iap10@2479 | 2145 for ( i = 0; (list_ent != &d->page_list); i++ ) |
iap10@2479 | 2146 { |
kaf24@2637 | 2147 unsigned long *pt; |
cl349@2491 | 2148 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table; |
cl349@2491 | 2149 page = &frame_table[pfn]; |
iap10@2479 | 2150 |
cl349@2491 | 2151 if ( page->u.inuse.domain != d ) |
cl349@2491 | 2152 BUG(); |
iap10@2479 | 2153 |
cl349@2491 | 2154 switch ( page->u.inuse.type_info & PGT_type_mask ) |
cl349@2491 | 2155 { |
cl349@2491 | 2156 case PGT_l2_page_table: |
iap10@2479 | 2157 |
cl349@2491 | 2158 if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated ) |
cl349@2491 | 2159 printk("Audit %d: L2 not validated %x\n", |
kaf24@2748 | 2160 d->id, page->u.inuse.type_info); |
iap10@2479 | 2161 |
cl349@2491 | 2162 if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned ) |
cl349@2491 | 2163 printk("Audit %d: L2 not pinned %x\n", |
kaf24@2748 | 2164 d->id, page->u.inuse.type_info); |
cl349@2491 | 2165 else |
cl349@2491 | 2166 adjust( page, -1, 1 ); |
cl349@2491 | 2167 |
cl349@2491 | 2168 pt = map_domain_mem( pfn<<PAGE_SHIFT ); |
iap10@2479 | 2169 |
cl349@2491 | 2170 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) |
cl349@2491 | 2171 { |
cl349@2491 | 2172 if ( pt[i] & _PAGE_PRESENT ) |
cl349@2491 | 2173 { |
cl349@2491 | 2174 unsigned long l1pfn = pt[i]>>PAGE_SHIFT; |
cl349@2491 | 2175 struct pfn_info *l1page = &frame_table[l1pfn]; |
iap10@2479 | 2176 |
cl349@2491 | 2177 if ( l1page->u.inuse.domain != d ) |
cl349@2491 | 2178 { |
kaf24@2637 | 2179 printk("L2: Skip bizarre page belonging to other " |
kaf24@2637 | 2180 "dom %p\n", l1page->u.inuse.domain); |
cl349@2491 | 2181 continue; |
cl349@2491 | 2182 } |
kaf24@2637 | 2183 |
kaf24@2637 | 2184 if ( (l1page->u.inuse.type_info & PGT_type_mask) == |
kaf24@2637 | 2185 PGT_l2_page_table ) |
kaf24@2637 | 2186 printk("Audit %d: [%x] Found %s Linear PT " |
kaf24@2748 | 2187 "t=%x pfn=%lx\n", d->id, i, |
kaf24@2637 | 2188 (l1pfn==pfn) ? "Self" : "Other", |
kaf24@2637 | 2189 l1page->u.inuse.type_info, |
kaf24@2637 | 2190 l1pfn); |
kaf24@2637 | 2191 else if ( (l1page->u.inuse.type_info & PGT_type_mask) != |
kaf24@2637 | 2192 PGT_l1_page_table ) |
kaf24@2637 | 2193 printk("Audit %d: [%x] Expected L1 t=%x pfn=%lx\n", |
kaf24@2748 | 2194 d->id, i, |
kaf24@2637 | 2195 l1page->u.inuse.type_info, |
kaf24@2637 | 2196 l1pfn); |
iap10@2479 | 2197 |
kaf24@2637 | 2198 adjust(l1page, -1, 1); |
cl349@2491 | 2199 } |
cl349@2491 | 2200 } |
iap10@2479 | 2201 |
cl349@2491 | 2202 unmap_domain_mem(pt); |
iap10@2479 | 2203 |
cl349@2491 | 2204 break; |
iap10@2479 | 2205 |
iap10@2479 | 2206 |
cl349@2491 | 2207 case PGT_l1_page_table: |
cl349@2491 | 2208 |
cl349@2491 | 2209 if ( (page->u.inuse.type_info & PGT_pinned) == PGT_pinned ) |
cl349@2491 | 2210 adjust( page, -1, 1 ); |
iap10@2479 | 2211 |
cl349@2491 | 2212 if ( (page->u.inuse.type_info & PGT_validated) != PGT_validated ) |
cl349@2491 | 2213 printk("Audit %d: L1 not validated %x\n", |
kaf24@2748 | 2214 d->id, page->u.inuse.type_info); |
iap10@2479 | 2215 #if 0 |
cl349@2491 | 2216 if ( (page->u.inuse.type_info & PGT_pinned) != PGT_pinned ) |
cl349@2491 | 2217 printk("Audit %d: L1 not pinned %x\n", |
kaf24@2748 | 2218 d->id, page->u.inuse.type_info); |
iap10@2479 | 2219 #endif |
cl349@2491 | 2220 pt = map_domain_mem( pfn<<PAGE_SHIFT ); |
iap10@2479 | 2221 |
cl349@2491 | 2222 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) |
cl349@2491 | 2223 { |
cl349@2491 | 2224 if ( pt[i] & _PAGE_PRESENT ) |
cl349@2491 | 2225 { |
cl349@2491 | 2226 unsigned long l1pfn = pt[i]>>PAGE_SHIFT; |
cl349@2491 | 2227 struct pfn_info *l1page = &frame_table[l1pfn]; |
iap10@2479 | 2228 |
cl349@2644 | 2229 if ( l1pfn < 0x100 ) |
cl349@2644 | 2230 { |
cl349@2644 | 2231 lowmem_mappings++; |
cl349@2644 | 2232 continue; |
cl349@2644 | 2233 } |
iap10@2595 | 2234 |
cl349@2644 | 2235 if ( l1pfn > max_page ) |
cl349@2644 | 2236 { |
cl349@2644 | 2237 io_mappings++; |
cl349@2644 | 2238 continue; |
cl349@2644 | 2239 } |
iap10@2595 | 2240 |
cl349@2491 | 2241 if ( pt[i] & _PAGE_RW ) |
cl349@2491 | 2242 { |
iap10@2479 | 2243 |
cl349@2491 | 2244 if ( (l1page->u.inuse.type_info & PGT_type_mask) == |
cl349@2491 | 2245 PGT_l1_page_table || |
cl349@2491 | 2246 (l1page->u.inuse.type_info & PGT_type_mask) == |
cl349@2491 | 2247 PGT_l2_page_table ) |
cl349@2491 | 2248 printk("Audit %d: [%x] Ilegal RW t=%x pfn=%lx\n", |
kaf24@2748 | 2249 d->id, i, |
cl349@2491 | 2250 l1page->u.inuse.type_info, |
cl349@2491 | 2251 l1pfn); |
iap10@2479 | 2252 |
cl349@2491 | 2253 } |
iap10@2479 | 2254 |
cl349@2491 | 2255 if ( l1page->u.inuse.domain != d ) |
cl349@2491 | 2256 { |
kaf24@2637 | 2257 printk("Audit %d: [%lx,%x] Skip foreign page dom=%lx " |
kaf24@2637 | 2258 "pfn=%lx c=%08x t=%08x m2p=%lx\n", |
kaf24@2748 | 2259 d->id, pfn, i, |
iap10@2595 | 2260 (unsigned long)l1page->u.inuse.domain, |
cl349@2644 | 2261 l1pfn, |
cl349@2644 | 2262 l1page->count_info, |
cl349@2644 | 2263 l1page->u.inuse.type_info, |
cl349@2644 | 2264 machine_to_phys_mapping[l1pfn]); |
cl349@2491 | 2265 continue; |
cl349@2491 | 2266 } |
iap10@2479 | 2267 |
kaf24@2637 | 2268 adjust(l1page, -1, 0); |
cl349@2491 | 2269 } |
cl349@2491 | 2270 } |
iap10@2479 | 2271 |
cl349@2491 | 2272 unmap_domain_mem(pt); |
iap10@2479 | 2273 |
cl349@2491 | 2274 break; |
iap10@2595 | 2275 } |
iap10@2479 | 2276 |
cl349@2491 | 2277 list_ent = frame_table[pfn].list.next; |
iap10@2479 | 2278 } |
iap10@2479 | 2279 |
kaf24@2637 | 2280 if ( (io_mappings > 0) || (lowmem_mappings > 0) ) |
cl349@2644 | 2281 printk("Audit %d: Found %d lowmem mappings and %d io mappings\n", |
kaf24@2748 | 2282 d->id, lowmem_mappings, io_mappings); |
iap10@2595 | 2283 |
kaf24@2637 | 2284 /* PHASE 2 */ |
iap10@2479 | 2285 |
iap10@2479 | 2286 ctot = ttot = 0; |
iap10@2479 | 2287 list_ent = d->page_list.next; |
iap10@2479 | 2288 for ( i = 0; (list_ent != &d->page_list); i++ ) |
iap10@2479 | 2289 { |
cl349@2491 | 2290 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table; |
cl349@2491 | 2291 page = &frame_table[pfn]; |
iap10@2479 | 2292 |
cl349@2491 | 2293 switch ( page->u.inuse.type_info & PGT_type_mask) |
cl349@2491 | 2294 { |
cl349@2491 | 2295 case PGT_l1_page_table: |
cl349@2491 | 2296 case PGT_l2_page_table: |
cl349@2491 | 2297 if ( (page->u.inuse.type_info & PGT_count_mask) != 0 ) |
cl349@2491 | 2298 { |
cl349@2491 | 2299 printk("Audit %d: type count!=0 t=%x ot=%x c=%x pfn=%lx\n", |
kaf24@2748 | 2300 d->id, page->u.inuse.type_info, |
cl349@2491 | 2301 page->tlbflush_timestamp, |
cl349@2491 | 2302 page->count_info, pfn ); |
cl349@2491 | 2303 scan_for_pfn_remote(pfn); |
cl349@2491 | 2304 } |
cl349@2491 | 2305 default: |
cl349@2491 | 2306 if ( (page->count_info & PGC_count_mask) != 1 ) |
cl349@2491 | 2307 { |
kaf24@2637 | 2308 printk("Audit %d: gen count!=1 (c=%x) t=%x ot=%x pfn=%lx\n", |
kaf24@2748 | 2309 d->id, |
cl349@2491 | 2310 page->count_info, |
cl349@2491 | 2311 page->u.inuse.type_info, |
cl349@2491 | 2312 page->tlbflush_timestamp, pfn ); |
cl349@2491 | 2313 scan_for_pfn_remote(pfn); |
cl349@2491 | 2314 } |
cl349@2491 | 2315 break; |
cl349@2491 | 2316 } |
iap10@2479 | 2317 |
cl349@2491 | 2318 list_ent = frame_table[pfn].list.next; |
iap10@2479 | 2319 } |
iap10@2479 | 2320 |
kaf24@2637 | 2321 /* PHASE 3 */ |
iap10@2479 | 2322 |
iap10@2479 | 2323 list_ent = d->page_list.next; |
iap10@2479 | 2324 for ( i = 0; (list_ent != &d->page_list); i++ ) |
iap10@2479 | 2325 { |
kaf24@2637 | 2326 unsigned long *pt; |
cl349@2491 | 2327 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table; |
cl349@2491 | 2328 page = &frame_table[pfn]; |
iap10@2479 | 2329 |
cl349@2491 | 2330 switch ( page->u.inuse.type_info & PGT_type_mask ) |
cl349@2491 | 2331 { |
cl349@2491 | 2332 case PGT_l2_page_table: |
cl349@2491 | 2333 if ( (page->u.inuse.type_info & PGT_pinned) == PGT_pinned ) |
cl349@2491 | 2334 adjust( page, 1, 1 ); |
iap10@2479 | 2335 |
cl349@2491 | 2336 pt = map_domain_mem( pfn<<PAGE_SHIFT ); |
iap10@2479 | 2337 |
cl349@2491 | 2338 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ ) |
cl349@2491 | 2339 { |
cl349@2491 | 2340 if ( pt[i] & _PAGE_PRESENT ) |
cl349@2491 | 2341 { |
cl349@2491 | 2342 unsigned long l1pfn = pt[i]>>PAGE_SHIFT; |
cl349@2491 | 2343 struct pfn_info *l1page = &frame_table[l1pfn]; |
iap10@2479 | 2344 |
cl349@2491 | 2345 if ( l1page->u.inuse.domain == d) |
kaf24@2637 | 2346 adjust(l1page, 1, 1); |
cl349@2491 | 2347 } |
cl349@2491 | 2348 } |
iap10@2479 | 2349 |
cl349@2491 | 2350 unmap_domain_mem(pt); |
cl349@2491 | 2351 break; |
iap10@2479 | 2352 |
cl349@2491 | 2353 case PGT_l1_page_table: |
cl349@2491 | 2354 if ( (page->u.inuse.type_info & PGT_pinned) == PGT_pinned ) |
cl349@2491 | 2355 adjust( page, 1, 1 ); |
iap10@2479 | 2356 |
cl349@2491 | 2357 pt = map_domain_mem( pfn<<PAGE_SHIFT ); |
iap10@2479 | 2358 |
cl349@2491 | 2359 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ ) |
cl349@2491 | 2360 { |
cl349@2491 | 2361 if ( pt[i] & _PAGE_PRESENT ) |
cl349@2491 | 2362 { |
cl349@2491 | 2363 unsigned long l1pfn = pt[i]>>PAGE_SHIFT; |
cl349@2491 | 2364 struct pfn_info *l1page = &frame_table[l1pfn]; |
iap10@2479 | 2365 |
kaf24@2637 | 2366 if ( (l1page->u.inuse.domain != d) || |
kaf24@2637 | 2367 (l1pfn < 0x100) || (l1pfn > max_page) ) |
kaf24@2637 | 2368 continue; |
iap10@2595 | 2369 |
cl349@2644 | 2370 adjust(l1page, 1, 0); |
cl349@2491 | 2371 } |
cl349@2491 | 2372 } |
iap10@2479 | 2373 |
cl349@2491 | 2374 unmap_domain_mem(pt); |
cl349@2491 | 2375 break; |
cl349@2491 | 2376 } |
iap10@2479 | 2377 |
iap10@2479 | 2378 |
kaf24@2637 | 2379 page->tlbflush_timestamp = 0; |
iap10@2479 | 2380 |
cl349@2491 | 2381 list_ent = frame_table[pfn].list.next; |
iap10@2479 | 2382 } |
iap10@2479 | 2383 |
iap10@2479 | 2384 spin_unlock(&d->page_alloc_lock); |
iap10@2479 | 2385 |
cl349@3036 | 2386 adjust(&frame_table[pagetable_val(d->exec_domain[0]->mm.pagetable)>>PAGE_SHIFT], 1, 1); |
iap10@2479 | 2387 |
kaf24@2748 | 2388 printk("Audit %d: Done. ctot=%d ttot=%d\n", d->id, ctot, ttot ); |
iap10@2479 | 2389 |
cl349@3036 | 2390 if ( d != current->domain ) |
cl349@2491 | 2391 domain_unpause(d); |
iap10@2479 | 2392 } |
iap10@2479 | 2393 |
cl349@2491 | 2394 void audit_domains(void) |
iap10@2479 | 2395 { |
iap10@2479 | 2396 struct domain *d; |
iap10@2479 | 2397 for_each_domain ( d ) |
cl349@2644 | 2398 audit_domain(d); |
iap10@2479 | 2399 } |
iap10@2479 | 2400 |
kaf24@2842 | 2401 void audit_domains_key(unsigned char key) |
iap10@2479 | 2402 { |
kaf24@2842 | 2403 audit_domains(); |
iap10@2479 | 2404 } |
iap10@2479 | 2405 |
iap10@2479 | 2406 #endif |