debuggers.hg

annotate xen/arch/x86/x86_64/mm.c @ 20943:217f6aa87716

mem hotplug: Fix an incorrect sanity check in memory add

Current, memory hot-add will fail if the new added memory is bigger
than current max_pages. This is really a stupid checking, considering
user may hot-add the biggest address riser card firstly.

This patch fix this issue. It check if all new added memory is
unpopulated, if yes, then it is ok.

Signed-off-by: Jiang, Yunhong <yunhong.jiang@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Feb 04 13:09:30 2010 +0000 (2010-02-04)
parents 19479955c074
children b8d2a4134a68
rev   line source
kaf24@3314 1 /******************************************************************************
kaf24@3314 2 * arch/x86/x86_64/mm.c
kaf24@3314 3 *
kaf24@5288 4 * Modifications to Linux original are copyright (c) 2004, K A Fraser tr This
kaf24@5288 5 * program is free software; you can redistribute it and/or modify it under
kaf24@5288 6 * the terms of the GNU General Public License as published by the Free
kaf24@5288 7 * Software Foundation; either version 2 of the License, or (at your option)
kaf24@5288 8 * any later version.
kaf24@3314 9 *
kaf24@5288 10 * This program is distributed in the hope that it will be useful, but WITHOUT
kaf24@5288 11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
kaf24@5288 12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
kaf24@5288 13 * more details.
kaf24@3314 14 *
kaf24@5288 15 * You should have received a copy of the GNU General Public License along
kaf24@5288 16 * with this program; if not, write to the Free Software Foundation, Inc., 59
kaf24@5288 17 * Temple Place, Suite 330, Boston, MA 02111-1307 USA
kaf24@3314 18 */
kaf24@3314 19
kaf24@3314 20 #include <xen/config.h>
kaf24@3314 21 #include <xen/lib.h>
kaf24@3314 22 #include <xen/init.h>
kaf24@3314 23 #include <xen/mm.h>
kaf24@4254 24 #include <xen/sched.h>
keir@19934 25 #include <xen/numa.h>
keir@20666 26 #include <xen/nodemask.h>
kaf24@9054 27 #include <xen/guest_access.h>
cl349@5329 28 #include <asm/current.h>
kaf24@4536 29 #include <asm/asm_defns.h>
kaf24@3314 30 #include <asm/page.h>
kaf24@3314 31 #include <asm/flushtlb.h>
kaf24@3314 32 #include <asm/fixmap.h>
ack@13297 33 #include <asm/hypercall.h>
kaf24@3799 34 #include <asm/msr.h>
keir@20323 35 #include <asm/setup.h>
keir@20666 36 #include <asm/numa.h>
kaf24@8081 37 #include <public/memory.h>
kaf24@3314 38
keir@20274 39 /* Parameters for PFN/MADDR compression. */
keir@20274 40 unsigned long __read_mostly max_pdx;
keir@20274 41 unsigned long __read_mostly pfn_pdx_bottom_mask = ~0UL;
keir@20274 42 unsigned long __read_mostly ma_va_bottom_mask = ~0UL;
keir@20274 43 unsigned long __read_mostly pfn_top_mask = 0;
keir@20274 44 unsigned long __read_mostly ma_top_mask = 0;
keir@20274 45 unsigned long __read_mostly pfn_hole_mask = 0;
keir@20274 46 unsigned int __read_mostly pfn_pdx_hole_shift = 0;
keir@20274 47
keir@20420 48 unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
ack@13295 49
keir@19964 50 DEFINE_PER_CPU_READ_MOSTLY(void *, compat_arg_xlat);
keir@17869 51
kfraser@15073 52 /* Top-level master (and idle-domain) page directory. */
kfraser@15073 53 l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
kfraser@15073 54 idle_pg_table[L4_PAGETABLE_ENTRIES];
kfraser@15073 55
kfraser@15073 56 /* Enough page directories to map bottom 4GB of the memory map. */
kfraser@15073 57 l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
kfraser@15073 58 l3_identmap[L3_PAGETABLE_ENTRIES];
kfraser@15073 59 l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
kfraser@15073 60 l2_identmap[4*L2_PAGETABLE_ENTRIES];
kfraser@15073 61
kfraser@15073 62 /* Enough page directories to map the Xen text and static data. */
kfraser@15073 63 l3_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
kfraser@15073 64 l3_xenmap[L3_PAGETABLE_ENTRIES];
kfraser@15073 65 l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
kfraser@15073 66 l2_xenmap[L2_PAGETABLE_ENTRIES];
kfraser@15073 67
keir@20275 68 int __mfn_valid(unsigned long mfn)
keir@20275 69 {
keir@20275 70 return likely(mfn < max_page) &&
keir@20275 71 likely(!(mfn & pfn_hole_mask)) &&
keir@20275 72 likely(test_bit(pfn_to_pdx(mfn) / PDX_GROUP_COUNT,
keir@20275 73 pdx_group_valid));
keir@20275 74 }
keir@20275 75
kfraser@14172 76 void *alloc_xen_pagetable(void)
kaf24@3632 77 {
kfraser@14172 78 unsigned long mfn;
kaf24@5041 79
kaf24@5041 80 if ( !early_boot )
kfraser@14172 81 {
keir@17421 82 struct page_info *pg = alloc_domheap_page(NULL, 0);
kfraser@14172 83 BUG_ON(pg == NULL);
kfraser@14172 84 return page_to_virt(pg);
kfraser@14172 85 }
kaf24@5041 86
keir@15631 87 mfn = alloc_boot_pages(1, 1);
kfraser@14172 88 return mfn_to_virt(mfn);
kaf24@3314 89 }
kaf24@3314 90
keir@16959 91 l3_pgentry_t *virt_to_xen_l3e(unsigned long v)
kaf24@3314 92 {
kaf24@3632 93 l4_pgentry_t *pl4e;
kaf24@3632 94
kaf24@5041 95 pl4e = &idle_pg_table[l4_table_offset(v)];
kaf24@5041 96 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
kaf24@3632 97 {
keir@16959 98 l3_pgentry_t *pl3e = alloc_xen_pagetable();
kaf24@5041 99 clear_page(pl3e);
kfraser@12825 100 l4e_write(pl4e, l4e_from_paddr(__pa(pl3e), __PAGE_HYPERVISOR));
kaf24@3632 101 }
kaf24@5041 102
keir@16959 103 return l4e_to_l3e(*pl4e) + l3_table_offset(v);
keir@16959 104 }
keir@16959 105
keir@16959 106 l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
keir@16959 107 {
keir@16959 108 l3_pgentry_t *pl3e;
keir@16959 109
keir@16959 110 pl3e = virt_to_xen_l3e(v);
kaf24@5041 111 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
kaf24@5041 112 {
keir@16959 113 l2_pgentry_t *pl2e = alloc_xen_pagetable();
kaf24@5041 114 clear_page(pl2e);
kfraser@12825 115 l3e_write(pl3e, l3e_from_paddr(__pa(pl2e), __PAGE_HYPERVISOR));
kaf24@5041 116 }
keir@16959 117
keir@16959 118 BUG_ON(l3e_get_flags(*pl3e) & _PAGE_PSE);
keir@16959 119 return l3e_to_l2e(*pl3e) + l2_table_offset(v);
kaf24@3632 120 }
kaf24@3632 121
keir@19806 122 void *do_page_walk(struct vcpu *v, unsigned long addr)
keir@19806 123 {
keir@19806 124 unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
keir@19806 125 l4_pgentry_t l4e, *l4t;
keir@19806 126 l3_pgentry_t l3e, *l3t;
keir@19806 127 l2_pgentry_t l2e, *l2t;
keir@19806 128 l1_pgentry_t l1e, *l1t;
keir@19806 129
keir@19806 130 if ( is_hvm_vcpu(v) )
keir@19806 131 return NULL;
keir@19806 132
keir@19806 133 l4t = mfn_to_virt(mfn);
keir@19806 134 l4e = l4t[l4_table_offset(addr)];
keir@19806 135 mfn = l4e_get_pfn(l4e);
keir@19806 136 if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
keir@19806 137 return NULL;
keir@19806 138
keir@19806 139 l3t = mfn_to_virt(mfn);
keir@19806 140 l3e = l3t[l3_table_offset(addr)];
keir@19806 141 mfn = l3e_get_pfn(l3e);
keir@19806 142 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || !mfn_valid(mfn) )
keir@19998 143 return NULL;
keir@19806 144 if ( (l3e_get_flags(l3e) & _PAGE_PSE) )
keir@19806 145 return mfn_to_virt(mfn) + (addr & ((1UL << L3_PAGETABLE_SHIFT) - 1));
keir@19806 146
keir@19806 147 l2t = mfn_to_virt(mfn);
keir@19806 148 l2e = l2t[l2_table_offset(addr)];
keir@19806 149 mfn = l2e_get_pfn(l2e);
keir@19806 150 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || !mfn_valid(mfn) )
keir@19806 151 return NULL;
keir@19806 152 if ( (l2e_get_flags(l2e) & _PAGE_PSE) )
keir@19806 153 return mfn_to_virt(mfn) + (addr & ((1UL << L2_PAGETABLE_SHIFT) - 1));
keir@19806 154
keir@19806 155 l1t = mfn_to_virt(mfn);
keir@19806 156 l1e = l1t[l1_table_offset(addr)];
keir@19806 157 mfn = l1e_get_pfn(l1e);
keir@19806 158 if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(mfn) )
keir@19806 159 return NULL;
keir@19806 160
keir@19806 161 return mfn_to_virt(mfn) + (addr & ~PAGE_MASK);
keir@19806 162 }
keir@19806 163
keir@20274 164 void __init pfn_pdx_hole_setup(unsigned long mask)
keir@20274 165 {
keir@20274 166 unsigned int i, j, bottom_shift, hole_shift;
keir@20274 167
keir@20274 168 for ( hole_shift = bottom_shift = j = 0; ; )
keir@20274 169 {
keir@20274 170 i = find_next_zero_bit(&mask, BITS_PER_LONG, j);
keir@20274 171 j = find_next_bit(&mask, BITS_PER_LONG, i);
keir@20274 172 if ( j >= BITS_PER_LONG )
keir@20274 173 break;
keir@20274 174 if ( j - i > hole_shift )
keir@20274 175 {
keir@20274 176 hole_shift = j - i;
keir@20274 177 bottom_shift = i;
keir@20274 178 }
keir@20274 179 }
keir@20274 180 if ( !hole_shift )
keir@20274 181 return;
keir@20274 182
keir@20274 183 printk(KERN_INFO "PFN compression on bits %u...%u\n",
keir@20274 184 bottom_shift, bottom_shift + hole_shift - 1);
keir@20274 185
keir@20274 186 pfn_pdx_hole_shift = hole_shift;
keir@20274 187 pfn_pdx_bottom_mask = (1UL << bottom_shift) - 1;
keir@20274 188 ma_va_bottom_mask = (PAGE_SIZE << bottom_shift) - 1;
keir@20274 189 pfn_hole_mask = ((1UL << hole_shift) - 1) << bottom_shift;
keir@20274 190 pfn_top_mask = ~(pfn_pdx_bottom_mask | pfn_hole_mask);
keir@20274 191 ma_top_mask = pfn_top_mask << PAGE_SHIFT;
keir@20274 192 }
keir@20274 193
keir@20660 194 /*
keir@20660 195 * Allocate page table pages for m2p table
keir@20660 196 */
keir@20660 197 struct mem_hotadd_info
keir@20660 198 {
keir@20660 199 unsigned long spfn;
keir@20660 200 unsigned long epfn;
keir@20660 201 unsigned long cur;
keir@20660 202 };
keir@20660 203
keir@20660 204 int hotadd_mem_valid(unsigned long pfn, struct mem_hotadd_info *info)
keir@20660 205 {
keir@20660 206 return (pfn < info->epfn && pfn >= info->spfn);
keir@20660 207 }
keir@20660 208
keir@20660 209 static unsigned long alloc_hotadd_mfn(struct mem_hotadd_info *info)
keir@20660 210 {
keir@20660 211 unsigned mfn;
keir@20660 212
keir@20660 213 ASSERT((info->cur + ( 1UL << PAGETABLE_ORDER) < info->epfn) &&
keir@20660 214 info->cur >= info->spfn);
keir@20660 215
keir@20660 216 mfn = info->cur;
keir@20660 217 info->cur += (1UL << PAGETABLE_ORDER);
keir@20660 218 return mfn;
keir@20660 219 }
keir@20660 220
keir@20660 221 #define M2P_NO_MAPPED 0
keir@20660 222 #define M2P_2M_MAPPED 1
keir@20660 223 #define M2P_1G_MAPPED 2
keir@20660 224 static int m2p_mapped(unsigned long spfn)
keir@20660 225 {
keir@20660 226 unsigned long va;
keir@20660 227 l3_pgentry_t *l3_ro_mpt;
keir@20660 228 l2_pgentry_t *l2_ro_mpt;
keir@20660 229
keir@20660 230 va = RO_MPT_VIRT_START + spfn * sizeof(*machine_to_phys_mapping);
keir@20660 231 l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(va)]);
keir@20660 232
keir@20660 233 switch ( l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
keir@20660 234 (_PAGE_PRESENT |_PAGE_PSE))
keir@20660 235 {
keir@20660 236 case _PAGE_PSE|_PAGE_PRESENT:
keir@20660 237 return M2P_1G_MAPPED;
keir@20660 238 break;
keir@20660 239 /* Check for next level */
keir@20660 240 case _PAGE_PRESENT:
keir@20660 241 break;
keir@20660 242 default:
keir@20660 243 return M2P_NO_MAPPED;
keir@20660 244 break;
keir@20660 245 }
keir@20660 246 l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]);
keir@20660 247
keir@20660 248 if (l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT)
keir@20660 249 return M2P_2M_MAPPED;
keir@20660 250
keir@20660 251 return M2P_NO_MAPPED;
keir@20660 252 }
keir@20660 253
keir@20662 254 int share_hotadd_m2p_table(struct mem_hotadd_info *info)
keir@20662 255 {
keir@20662 256 unsigned long i, n, v, m2p_start_mfn = 0;
keir@20662 257 l3_pgentry_t l3e;
keir@20662 258 l2_pgentry_t l2e;
keir@20662 259
keir@20662 260 /* M2P table is mappable read-only by privileged domains. */
keir@20662 261 for ( v = RDWR_MPT_VIRT_START;
keir@20662 262 v != RDWR_MPT_VIRT_END;
keir@20662 263 v += n << PAGE_SHIFT )
keir@20662 264 {
keir@20662 265 n = L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES;
keir@20662 266 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
keir@20662 267 l3_table_offset(v)];
keir@20662 268 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
keir@20662 269 continue;
keir@20662 270 if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
keir@20662 271 {
keir@20662 272 n = L1_PAGETABLE_ENTRIES;
keir@20662 273 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
keir@20662 274 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
keir@20662 275 continue;
keir@20662 276 m2p_start_mfn = l2e_get_pfn(l2e);
keir@20662 277 }
keir@20662 278 else
keir@20662 279 continue;
keir@20662 280
keir@20662 281 for ( i = 0; i < n; i++ )
keir@20662 282 {
keir@20662 283 struct page_info *page = mfn_to_page(m2p_start_mfn + i);
keir@20662 284 if (hotadd_mem_valid(m2p_start_mfn + i, info))
keir@20662 285 share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
keir@20662 286 }
keir@20662 287 }
keir@20662 288
keir@20662 289 for ( v = RDWR_COMPAT_MPT_VIRT_START;
keir@20662 290 v != RDWR_COMPAT_MPT_VIRT_END;
keir@20662 291 v += 1 << L2_PAGETABLE_SHIFT )
keir@20662 292 {
keir@20662 293 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
keir@20662 294 l3_table_offset(v)];
keir@20662 295 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
keir@20662 296 continue;
keir@20662 297 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
keir@20662 298 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
keir@20662 299 continue;
keir@20662 300 m2p_start_mfn = l2e_get_pfn(l2e);
keir@20662 301
keir@20662 302 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
keir@20662 303 {
keir@20662 304 struct page_info *page = mfn_to_page(m2p_start_mfn + i);
keir@20662 305 if (hotadd_mem_valid(m2p_start_mfn + i, info))
keir@20662 306 share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
keir@20662 307 }
keir@20662 308 }
keir@20662 309 return 0;
keir@20662 310 }
keir@20662 311
keir@20661 312 static void destroy_compat_m2p_mapping(struct mem_hotadd_info *info)
keir@20661 313 {
keir@20661 314 unsigned long i, va, rwva, pt_pfn;
keir@20661 315 unsigned long smap = info->spfn, emap = info->spfn;
keir@20661 316
keir@20661 317 l3_pgentry_t *l3_ro_mpt;
keir@20661 318 l2_pgentry_t *l2_ro_mpt;
keir@20661 319
keir@20661 320 if ( smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) )
keir@20661 321 return;
keir@20661 322
keir@20661 323 if ( emap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) )
keir@20661 324 emap = (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2;
keir@20661 325
keir@20661 326 l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
keir@20661 327
keir@20661 328 ASSERT(l3e_get_flags(l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)]) & _PAGE_PRESENT);
keir@20661 329
keir@20661 330 l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
keir@20661 331
keir@20661 332 for ( i = smap; i < emap; )
keir@20661 333 {
keir@20661 334 va = HIRO_COMPAT_MPT_VIRT_START +
keir@20661 335 i * sizeof(*compat_machine_to_phys_mapping);
keir@20661 336 rwva = RDWR_COMPAT_MPT_VIRT_START +
keir@20661 337 i * sizeof(*compat_machine_to_phys_mapping);
keir@20661 338 if ( l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT )
keir@20661 339 {
keir@20661 340 pt_pfn = l2e_get_pfn(l2_ro_mpt[l2_table_offset(va)]);
keir@20661 341 if ( hotadd_mem_valid(pt_pfn, info) )
keir@20661 342 {
keir@20661 343 destroy_xen_mappings(rwva, rwva +
keir@20661 344 (1UL << L2_PAGETABLE_SHIFT));
keir@20661 345 l2e_write(&l2_ro_mpt[l2_table_offset(va)], l2e_empty());
keir@20661 346 }
keir@20661 347 }
keir@20661 348
keir@20661 349 i += 1UL < (L2_PAGETABLE_SHIFT - 2);
keir@20661 350 }
keir@20661 351
keir@20661 352 return;
keir@20661 353 }
keir@20661 354
keir@20661 355 void destroy_m2p_mapping(struct mem_hotadd_info *info)
keir@20661 356 {
keir@20661 357 l3_pgentry_t *l3_ro_mpt;
keir@20661 358 unsigned long i, va, rwva;
keir@20661 359 unsigned long smap = info->spfn, emap = info->epfn;
keir@20661 360
keir@20661 361 l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]);
keir@20661 362
keir@20661 363 /*
keir@20661 364 * No need to clean m2p structure existing before the hotplug
keir@20661 365 */
keir@20661 366 for (i = smap; i < emap;)
keir@20661 367 {
keir@20661 368 unsigned long pt_pfn;
keir@20661 369 l2_pgentry_t *l2_ro_mpt;
keir@20661 370
keir@20661 371 va = RO_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping);
keir@20661 372 rwva = RDWR_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping);
keir@20661 373
keir@20661 374 /* 1G mapping should not be created by mem hotadd */
keir@20661 375 if (!(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & _PAGE_PRESENT) ||
keir@20661 376 (l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & _PAGE_PSE))
keir@20661 377 {
keir@20661 378 i = ( i & ~((1UL << (L3_PAGETABLE_SHIFT - 3)) - 1)) +
keir@20661 379 (1UL << (L3_PAGETABLE_SHIFT - 3) );
keir@20661 380 continue;
keir@20661 381 }
keir@20661 382
keir@20661 383 l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]);
keir@20661 384 if (!(l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT))
keir@20661 385 {
keir@20661 386 i = ( i & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)) +
keir@20661 387 (1UL << (L2_PAGETABLE_SHIFT - 3)) ;
keir@20661 388 continue;
keir@20661 389 }
keir@20661 390
keir@20661 391 pt_pfn = l2e_get_pfn(l2_ro_mpt[l2_table_offset(va)]);
keir@20661 392 if ( hotadd_mem_valid(pt_pfn, info) )
keir@20661 393 {
keir@20661 394 destroy_xen_mappings(rwva, rwva + (1UL << L2_PAGETABLE_SHIFT));
keir@20661 395
keir@20661 396 l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]);
keir@20661 397 l2e_write(&l2_ro_mpt[l2_table_offset(va)], l2e_empty());
keir@20661 398 }
keir@20661 399 i = ( i & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)) +
keir@20661 400 (1UL << (L2_PAGETABLE_SHIFT - 3));
keir@20661 401 }
keir@20661 402
keir@20661 403 destroy_compat_m2p_mapping(info);
keir@20661 404
keir@20661 405 /* Brute-Force flush all TLB */
keir@20661 406 flush_tlb_all();
keir@20661 407 return;
keir@20661 408 }
keir@20661 409
keir@20660 410 /*
keir@20660 411 * Allocate and map the compatibility mode machine-to-phys table.
keir@20660 412 * spfn/epfn: the pfn ranges to be setup
keir@20660 413 * free_s/free_e: the pfn ranges that is free still
keir@20660 414 */
keir@20660 415 static int setup_compat_m2p_table(struct mem_hotadd_info *info)
keir@20660 416 {
keir@20660 417 unsigned long i, va, smap, emap, rwva, epfn = info->epfn;
keir@20660 418 unsigned int n, memflags;
keir@20660 419 l3_pgentry_t *l3_ro_mpt = NULL;
keir@20660 420 l2_pgentry_t *l2_ro_mpt = NULL;
keir@20660 421 struct page_info *l1_pg;
keir@20660 422
keir@20660 423 smap = info->spfn & (~((1UL << (L2_PAGETABLE_SHIFT - 2)) -1));
keir@20660 424
keir@20660 425 /*
keir@20660 426 * Notice: For hot-added memory, only range below m2p_compat_vstart
keir@20660 427 * will be filled up (assuming memory is discontinous when booting).
keir@20660 428 */
keir@20660 429 if ((smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2)) )
keir@20660 430 return 0;
keir@20660 431
keir@20660 432 if (epfn > (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START))
keir@20660 433 epfn = (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2;
keir@20660 434
keir@20660 435 emap = ( (epfn + ((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1 )) &
keir@20660 436 ~((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1) );
keir@20660 437
keir@20660 438 va = HIRO_COMPAT_MPT_VIRT_START +
keir@20660 439 smap * sizeof(*compat_machine_to_phys_mapping);
keir@20660 440 l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(va)]);
keir@20660 441
keir@20660 442 ASSERT(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & _PAGE_PRESENT);
keir@20660 443
keir@20660 444 l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]);
keir@20660 445
keir@20660 446 #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int))
keir@20660 447 #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
keir@20660 448 sizeof(*compat_machine_to_phys_mapping))
keir@20660 449 BUILD_BUG_ON((sizeof(*frame_table) & -sizeof(*frame_table)) % \
keir@20660 450 sizeof(*compat_machine_to_phys_mapping));
keir@20660 451
keir@20660 452 for ( i = smap; i < emap; i += (1UL << (L2_PAGETABLE_SHIFT - 2)) )
keir@20660 453 {
keir@20660 454 va = HIRO_COMPAT_MPT_VIRT_START +
keir@20660 455 i * sizeof(*compat_machine_to_phys_mapping);
keir@20660 456
keir@20660 457 rwva = RDWR_COMPAT_MPT_VIRT_START +
keir@20660 458 i * sizeof(*compat_machine_to_phys_mapping);
keir@20660 459
keir@20660 460 if (l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT)
keir@20660 461 continue;
keir@20660 462
keir@20660 463 for ( n = 0; n < CNT; ++n)
keir@20660 464 if ( mfn_valid(i + n * PDX_GROUP_COUNT) )
keir@20660 465 break;
keir@20660 466 if ( n == CNT )
keir@20660 467 continue;
keir@20660 468
keir@20660 469 memflags = MEMF_node(phys_to_nid(i << PAGE_SHIFT));
keir@20660 470
keir@20660 471 l1_pg = mfn_to_page(alloc_hotadd_mfn(info));
keir@20660 472 map_pages_to_xen(rwva,
keir@20660 473 page_to_mfn(l1_pg),
keir@20660 474 1UL << PAGETABLE_ORDER,
keir@20660 475 PAGE_HYPERVISOR);
keir@20660 476 memset((void *)rwva, 0x55, 1UL << L2_PAGETABLE_SHIFT);
keir@20660 477 /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */
keir@20660 478 l2e_write(&l2_ro_mpt[l2_table_offset(va)], l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT));
keir@20660 479 }
keir@20660 480 #undef CNT
keir@20660 481 #undef MFN
keir@20660 482 return 0;
keir@20660 483 }
keir@20660 484
keir@20660 485 /*
keir@20660 486 * Allocate and map the machine-to-phys table.
keir@20660 487 * The L3 for RO/RWRW MPT and the L2 for compatible MPT should be setup already
keir@20660 488 */
keir@20660 489 int setup_m2p_table(struct mem_hotadd_info *info)
keir@20660 490 {
keir@20660 491 unsigned long i, va, smap, emap;
keir@20660 492 unsigned int n, memflags;
keir@20660 493 l2_pgentry_t *l2_ro_mpt = NULL;
keir@20660 494 l3_pgentry_t *l3_ro_mpt = NULL;
keir@20660 495 struct page_info *l1_pg, *l2_pg;
keir@20660 496 int ret = 0;
keir@20660 497
keir@20660 498 ASSERT(l4e_get_flags(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)])
keir@20660 499 & _PAGE_PRESENT);
keir@20660 500 l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]);
keir@20660 501
keir@20660 502 smap = (info->spfn & (~((1UL << (L2_PAGETABLE_SHIFT - 3)) -1)));
keir@20660 503 emap = ((info->epfn + ((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1 )) &
keir@20660 504 ~((1UL << (L2_PAGETABLE_SHIFT - 3)) -1));
keir@20660 505
keir@20660 506 va = RO_MPT_VIRT_START + smap * sizeof(*machine_to_phys_mapping);
keir@20660 507
keir@20660 508 #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long))
keir@20660 509 #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
keir@20660 510 sizeof(*machine_to_phys_mapping))
keir@20660 511
keir@20660 512 BUILD_BUG_ON((sizeof(*frame_table) & -sizeof(*frame_table)) % \
keir@20660 513 sizeof(*machine_to_phys_mapping));
keir@20660 514
keir@20660 515 i = smap;
keir@20660 516 while ( i < emap )
keir@20660 517 {
keir@20660 518 switch ( m2p_mapped(i) )
keir@20660 519 {
keir@20660 520 case M2P_1G_MAPPED:
keir@20660 521 i = ( i & ~((1UL << (L3_PAGETABLE_SHIFT - 3)) - 1)) +
keir@20660 522 (1UL << (L3_PAGETABLE_SHIFT - 3));
keir@20660 523 continue;
keir@20660 524 case M2P_2M_MAPPED:
keir@20660 525 i = (i & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)) +
keir@20660 526 (1UL << (L2_PAGETABLE_SHIFT - 3));
keir@20660 527 continue;
keir@20660 528 default:
keir@20660 529 break;
keir@20660 530 }
keir@20660 531
keir@20660 532 va = RO_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping);
keir@20660 533 memflags = MEMF_node(phys_to_nid(i << PAGE_SHIFT));
keir@20660 534
keir@20660 535 for ( n = 0; n < CNT; ++n)
keir@20660 536 if ( mfn_valid(i + n * PDX_GROUP_COUNT) )
keir@20660 537 break;
keir@20660 538 if ( n == CNT )
keir@20660 539 l1_pg = NULL;
keir@20660 540 else
keir@20660 541 {
keir@20660 542 l1_pg = mfn_to_page(alloc_hotadd_mfn(info));
keir@20660 543 map_pages_to_xen(
keir@20660 544 RDWR_MPT_VIRT_START + i * sizeof(unsigned long),
keir@20660 545 page_to_mfn(l1_pg),
keir@20660 546 1UL << PAGETABLE_ORDER,
keir@20660 547 PAGE_HYPERVISOR);
keir@20660 548 memset((void *)(RDWR_MPT_VIRT_START + i * sizeof(unsigned long)),
keir@20660 549 0x55, 1UL << L2_PAGETABLE_SHIFT);
keir@20660 550
keir@20660 551 ASSERT(!(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
keir@20660 552 _PAGE_PSE));
keir@20660 553 if ( l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
keir@20660 554 _PAGE_PRESENT )
keir@20660 555 l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]) +
keir@20660 556 l2_table_offset(va);
keir@20660 557 else
keir@20660 558 {
keir@20660 559 l2_pg = alloc_domheap_page(NULL, memflags);
keir@20660 560
keir@20660 561 if (!l2_pg)
keir@20660 562 {
keir@20660 563 ret = -ENOMEM;
keir@20660 564 goto error;
keir@20660 565 }
keir@20660 566
keir@20660 567 l2_ro_mpt = page_to_virt(l2_pg);
keir@20660 568 clear_page(l2_ro_mpt);
keir@20660 569 l3e_write(&l3_ro_mpt[l3_table_offset(va)],
keir@20660 570 l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
keir@20660 571 l2_ro_mpt += l2_table_offset(va);
keir@20660 572 }
keir@20660 573
keir@20660 574 /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
keir@20660 575 l2e_write(l2_ro_mpt, l2e_from_page(l1_pg,
keir@20660 576 /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
keir@20660 577 }
keir@20660 578 if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
keir@20660 579 l2_ro_mpt = NULL;
keir@20660 580 i += ( 1UL << (L2_PAGETABLE_SHIFT - 3));
keir@20660 581 }
keir@20660 582 #undef CNT
keir@20660 583 #undef MFN
keir@20660 584
keir@20660 585 ret = setup_compat_m2p_table(info);
keir@20660 586 error:
keir@20660 587 return ret;
keir@20660 588 }
keir@20660 589
kaf24@3314 590 void __init paging_init(void)
kaf24@3314 591 {
kfraser@12422 592 unsigned long i, mpt_size, va;
keir@20276 593 unsigned int n, memflags;
kaf24@5106 594 l3_pgentry_t *l3_ro_mpt;
kaf24@11501 595 l2_pgentry_t *l2_ro_mpt = NULL;
keir@15811 596 struct page_info *l1_pg, *l2_pg, *l3_pg;
kaf24@3632 597
keir@20665 598 /*
keir@20665 599 * We setup the L3s for 1:1 mapping if host support memory hotplug
keir@20665 600 * to avoid sync the 1:1 mapping on page fault handler
keir@20665 601 */
keir@20665 602 if ( mem_hotplug )
keir@20665 603 {
keir@20665 604 unsigned long va;
keir@20665 605
keir@20665 606 for ( va = DIRECTMAP_VIRT_START;
keir@20665 607 va < DIRECTMAP_VIRT_END;
keir@20665 608 va += (1UL << L4_PAGETABLE_SHIFT) )
keir@20665 609 {
keir@20665 610 if ( !(l4e_get_flags(idle_pg_table[l4_table_offset(va)]) &
keir@20665 611 _PAGE_PRESENT) )
keir@20665 612 {
keir@20665 613 l3_pg = alloc_domheap_page(NULL, 0);
keir@20665 614 if ( !l3_pg )
keir@20665 615 goto nomem;
keir@20665 616 l3_ro_mpt = page_to_virt(l3_pg);
keir@20665 617 clear_page(l3_ro_mpt);
keir@20665 618 l4e_write(&idle_pg_table[l4_table_offset(va)],
keir@20665 619 l4e_from_page(l3_pg, __PAGE_HYPERVISOR));
keir@20665 620 }
keir@20665 621 }
keir@20665 622 }
keir@20665 623
kaf24@5106 624 /* Create user-accessible L2 directory to map the MPT for guests. */
keir@17421 625 if ( (l3_pg = alloc_domheap_page(NULL, 0)) == NULL )
kfraser@12422 626 goto nomem;
keir@15811 627 l3_ro_mpt = page_to_virt(l3_pg);
kfraser@15439 628 clear_page(l3_ro_mpt);
kfraser@12825 629 l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
keir@15811 630 l4e_from_page(l3_pg, __PAGE_HYPERVISOR | _PAGE_USER));
kaf24@5106 631
kaf24@3668 632 /*
kaf24@3668 633 * Allocate and map the machine-to-phys table.
kaf24@5106 634 * This also ensures L3 is present for fixmaps.
kaf24@3668 635 */
kaf24@6680 636 mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
kaf24@6440 637 mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
keir@20276 638 #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long))
keir@20276 639 #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
keir@20276 640 sizeof(*machine_to_phys_mapping))
keir@20276 641 BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
keir@20276 642 sizeof(*machine_to_phys_mapping));
kaf24@6440 643 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
kaf24@3668 644 {
keir@19135 645 BUILD_BUG_ON(RO_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
keir@19135 646 va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
keir@19135 647 memflags = MEMF_node(phys_to_nid(i <<
keir@19135 648 (L2_PAGETABLE_SHIFT - 3 + PAGE_SHIFT)));
keir@19135 649
keir@19135 650 if ( cpu_has_page1gb &&
keir@19135 651 !((unsigned long)l2_ro_mpt & ~PAGE_MASK) &&
keir@20276 652 (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) )
keir@20276 653 {
keir@20276 654 unsigned int k, holes;
keir@20276 655
keir@20276 656 for ( holes = k = 0; k < 1 << PAGETABLE_ORDER; ++k)
keir@20276 657 {
keir@20276 658 for ( n = 0; n < CNT; ++n)
keir@20276 659 if ( mfn_valid(MFN(i + k) + n * PDX_GROUP_COUNT) )
keir@20276 660 break;
keir@20276 661 if ( n == CNT )
keir@20276 662 ++holes;
keir@20276 663 }
keir@20276 664 if ( k == holes )
keir@20276 665 {
keir@20276 666 i += (1UL << PAGETABLE_ORDER) - 1;
keir@20276 667 continue;
keir@20276 668 }
keir@20276 669 if ( holes == 0 &&
keir@20276 670 (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER,
keir@20276 671 memflags)) != NULL )
keir@20276 672 {
keir@20276 673 map_pages_to_xen(
keir@20276 674 RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
keir@20276 675 page_to_mfn(l1_pg),
keir@20276 676 1UL << (2 * PAGETABLE_ORDER),
keir@20276 677 PAGE_HYPERVISOR);
keir@20276 678 memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
keir@20276 679 0x77, 1UL << L3_PAGETABLE_SHIFT);
keir@20276 680
keir@20276 681 ASSERT(!l2_table_offset(va));
keir@20276 682 /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
keir@20276 683 l3e_write(&l3_ro_mpt[l3_table_offset(va)],
keir@20276 684 l3e_from_page(l1_pg,
keir@20276 685 /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
keir@20276 686 i += (1UL << PAGETABLE_ORDER) - 1;
keir@20276 687 continue;
keir@20276 688 }
keir@20276 689 }
keir@20276 690
keir@20276 691 for ( n = 0; n < CNT; ++n)
keir@20276 692 if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) )
keir@20276 693 break;
keir@20276 694 if ( n == CNT )
keir@20276 695 l1_pg = NULL;
keir@20276 696 else if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
keir@20276 697 memflags)) == NULL )
keir@20276 698 goto nomem;
keir@20276 699 else
keir@19135 700 {
keir@19135 701 map_pages_to_xen(
keir@19135 702 RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
keir@19135 703 page_to_mfn(l1_pg),
keir@20276 704 1UL << PAGETABLE_ORDER,
keir@19135 705 PAGE_HYPERVISOR);
keir@19135 706 memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
keir@20276 707 0x55, 1UL << L2_PAGETABLE_SHIFT);
keir@19135 708 }
kaf24@11501 709 if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
kaf24@11501 710 {
keir@19135 711 if ( (l2_pg = alloc_domheap_page(NULL, memflags)) == NULL )
kfraser@12422 712 goto nomem;
kfraser@15439 713 l2_ro_mpt = page_to_virt(l2_pg);
kfraser@15439 714 clear_page(l2_ro_mpt);
kfraser@12825 715 l3e_write(&l3_ro_mpt[l3_table_offset(va)],
kfraser@12825 716 l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
keir@19135 717 ASSERT(!l2_table_offset(va));
kaf24@11501 718 }
kfraser@11163 719 /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
keir@20276 720 if ( l1_pg )
keir@20276 721 l2e_write(l2_ro_mpt, l2e_from_page(
keir@20276 722 l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
kfraser@12825 723 l2_ro_mpt++;
kaf24@3668 724 }
keir@20276 725 #undef CNT
keir@20276 726 #undef MFN
kaf24@3632 727
kfraser@15546 728 /* Create user-accessible L2 directory to map the MPT for compat guests. */
kfraser@15546 729 BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) !=
kfraser@15546 730 l4_table_offset(HIRO_COMPAT_MPT_VIRT_START));
kfraser@15546 731 l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(
kfraser@15546 732 HIRO_COMPAT_MPT_VIRT_START)]);
keir@17421 733 if ( (l2_pg = alloc_domheap_page(NULL, 0)) == NULL )
kfraser@15546 734 goto nomem;
kfraser@15546 735 compat_idle_pg_table_l2 = l2_ro_mpt = page_to_virt(l2_pg);
kfraser@15546 736 clear_page(l2_ro_mpt);
kfraser@15546 737 l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
kfraser@15546 738 l3e_from_page(l2_pg, __PAGE_HYPERVISOR));
kfraser@15546 739 l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
kfraser@15546 740 /* Allocate and map the compatibility mode machine-to-phys table. */
kfraser@15546 741 mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1));
kfraser@15546 742 if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START )
kfraser@15546 743 mpt_size = RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START;
kfraser@15546 744 mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
keir@17874 745 if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END )
kfraser@15546 746 m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size;
keir@20276 747 #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int))
keir@20276 748 #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
keir@20276 749 sizeof(*compat_machine_to_phys_mapping))
keir@20276 750 BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
keir@20276 751 sizeof(*compat_machine_to_phys_mapping));
keir@20276 752 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++, l2_ro_mpt++ )
ack@13295 753 {
keir@19135 754 memflags = MEMF_node(phys_to_nid(i <<
keir@19135 755 (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT)));
keir@20276 756 for ( n = 0; n < CNT; ++n)
keir@20276 757 if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) )
keir@20276 758 break;
keir@20276 759 if ( n == CNT )
keir@20276 760 continue;
keir@19135 761 if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
keir@20276 762 memflags)) == NULL )
ack@13295 763 goto nomem;
kfraser@15546 764 map_pages_to_xen(
kfraser@15546 765 RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
kfraser@15546 766 page_to_mfn(l1_pg),
kfraser@15546 767 1UL << PAGETABLE_ORDER,
kfraser@15546 768 PAGE_HYPERVISOR);
kfraser@15546 769 memset((void *)(RDWR_COMPAT_MPT_VIRT_START +
kfraser@15546 770 (i << L2_PAGETABLE_SHIFT)),
kfraser@15546 771 0x55,
kfraser@15546 772 1UL << L2_PAGETABLE_SHIFT);
kfraser@15546 773 /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */
kfraser@15546 774 l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT));
ack@13295 775 }
keir@20276 776 #undef CNT
keir@20276 777 #undef MFN
ack@13295 778
kaf24@3314 779 /* Set up linear page table mapping. */
kfraser@12825 780 l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)],
kfraser@12825 781 l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR));
kfraser@12422 782 return;
kfraser@12422 783
kfraser@12422 784 nomem:
kfraser@12422 785 panic("Not enough memory for m2p table\n");
kfraser@11257 786 }
kaf24@8563 787
kfraser@11257 788 void __init setup_idle_pagetable(void)
kfraser@11257 789 {
kaf24@8563 790 /* Install per-domain mappings for idle domain. */
kfraser@12825 791 l4e_write(&idle_pg_table[l4_table_offset(PERDOMAIN_VIRT_START)],
kfraser@12825 792 l4e_from_page(
kfraser@12825 793 virt_to_page(idle_vcpu[0]->domain->arch.mm_perdomain_l3),
kfraser@12825 794 __PAGE_HYPERVISOR));
kaf24@3314 795 }
kaf24@3314 796
kaf24@3314 797 void __init zap_low_mappings(void)
kaf24@3314 798 {
keir@15082 799 BUG_ON(num_online_cpus() != 1);
keir@15082 800
keir@15082 801 /* Remove aliased mapping of first 1:1 PML4 entry. */
kfraser@12825 802 l4e_write(&idle_pg_table[0], l4e_empty());
keir@16155 803 flush_local(FLUSH_TLB_GLOBAL);
keir@15082 804
keir@15082 805 /* Replace with mapping of the boot trampoline only. */
keir@15082 806 map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
keir@15082 807 0x10, __PAGE_HYPERVISOR);
kaf24@3314 808 }
kaf24@3314 809
keir@19934 810 int __cpuinit setup_compat_arg_xlat(unsigned int cpu, int node)
keir@19934 811 {
keir@19934 812 unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
keir@19934 813 unsigned long sz = PAGE_SIZE << order;
keir@19934 814 unsigned int memflags = node != NUMA_NO_NODE ? MEMF_node(node) : 0;
keir@19934 815 struct page_info *pg;
keir@19934 816
keir@19934 817 pg = alloc_domheap_pages(NULL, order, memflags);
keir@19934 818 if ( !pg )
keir@19934 819 return -ENOMEM;
keir@19934 820
keir@19934 821 for ( ; (sz -= PAGE_SIZE) >= COMPAT_ARG_XLAT_SIZE; ++pg )
keir@19934 822 free_domheap_page(pg);
keir@19934 823
keir@19934 824 per_cpu(compat_arg_xlat, cpu) = page_to_virt(pg);
keir@19934 825
keir@19934 826 return 0;
keir@19934 827 }
keir@19934 828
keir@20663 829 void cleanup_frame_table(struct mem_hotadd_info *info)
keir@20663 830 {
keir@20663 831 unsigned long sva, eva;
keir@20663 832 l3_pgentry_t l3e;
keir@20663 833 l2_pgentry_t l2e;
keir@20663 834 unsigned long spfn, epfn;
keir@20663 835
keir@20663 836 spfn = info->spfn;
keir@20663 837 epfn = info->epfn;
keir@20663 838
keir@20663 839 sva = (unsigned long)pdx_to_page(pfn_to_pdx(spfn));
keir@20663 840 eva = (unsigned long)pdx_to_page(pfn_to_pdx(epfn));
keir@20663 841
keir@20663 842 /* Intialize all page */
keir@20753 843 memset(mfn_to_page(spfn), -1,
keir@20753 844 (unsigned long)mfn_to_page(epfn) - (unsigned long)mfn_to_page(spfn));
keir@20663 845
keir@20663 846 while (sva < eva)
keir@20663 847 {
keir@20663 848 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(sva)])[
keir@20663 849 l3_table_offset(sva)];
keir@20663 850 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ||
keir@20663 851 (l3e_get_flags(l3e) & _PAGE_PSE) )
keir@20663 852 {
keir@20663 853 sva = (sva & ~((1UL << L3_PAGETABLE_SHIFT) - 1)) +
keir@20663 854 (1UL << L3_PAGETABLE_SHIFT);
keir@20663 855 continue;
keir@20663 856 }
keir@20663 857
keir@20663 858 l2e = l3e_to_l2e(l3e)[l2_table_offset(sva)];
keir@20663 859 ASSERT(l2e_get_flags(l2e) & _PAGE_PRESENT);
keir@20663 860
keir@20663 861 if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) ==
keir@20663 862 (_PAGE_PSE | _PAGE_PRESENT) )
keir@20663 863 {
keir@20663 864 if (hotadd_mem_valid(l2e_get_pfn(l2e), info))
keir@20663 865 destroy_xen_mappings(sva & ~((1UL << L2_PAGETABLE_SHIFT) - 1),
keir@20663 866 ((sva & ~((1UL << L2_PAGETABLE_SHIFT) -1 )) +
keir@20663 867 (1UL << L2_PAGETABLE_SHIFT) - 1));
keir@20663 868
keir@20663 869 sva = (sva & ~((1UL << L2_PAGETABLE_SHIFT) -1 )) +
keir@20663 870 (1UL << L2_PAGETABLE_SHIFT);
keir@20663 871 continue;
keir@20663 872 }
keir@20663 873
keir@20663 874 ASSERT(l1e_get_flags(l2e_to_l1e(l2e)[l1_table_offset(sva)]) &
keir@20663 875 _PAGE_PRESENT);
keir@20663 876 sva = (sva & ~((1UL << PAGE_SHIFT) - 1)) +
keir@20663 877 (1UL << PAGE_SHIFT);
keir@20663 878 }
keir@20663 879
keir@20663 880 /* Brute-Force flush all TLB */
keir@20663 881 flush_tlb_all();
keir@20663 882 }
keir@20663 883
keir@20663 884 /* Should we be paraniod failure in map_pages_to_xen? */
keir@20663 885 static int setup_frametable_chunk(void *start, void *end,
keir@20663 886 struct mem_hotadd_info *info)
keir@20663 887 {
keir@20663 888 unsigned long s = (unsigned long)start;
keir@20663 889 unsigned long e = (unsigned long)end;
keir@20663 890 unsigned long mfn;
keir@20663 891
keir@20663 892 ASSERT(!(s & ((1 << L2_PAGETABLE_SHIFT) - 1)));
keir@20663 893 ASSERT(!(e & ((1 << L2_PAGETABLE_SHIFT) - 1)));
keir@20663 894
keir@20663 895 for ( ; s < e; s += (1UL << L2_PAGETABLE_SHIFT))
keir@20663 896 {
keir@20663 897 mfn = alloc_hotadd_mfn(info);
keir@20663 898 map_pages_to_xen(s, mfn, 1UL << PAGETABLE_ORDER, PAGE_HYPERVISOR);
keir@20663 899 }
keir@20663 900 memset(start, -1, s - (unsigned long)start);
keir@20663 901
keir@20663 902 return 0;
keir@20663 903 }
keir@20663 904
keir@20663 905 int extend_frame_table(struct mem_hotadd_info *info)
keir@20663 906 {
keir@20663 907 unsigned long cidx, nidx, eidx, spfn, epfn;
keir@20663 908
keir@20663 909 spfn = info->spfn;
keir@20663 910 epfn = info->epfn;
keir@20663 911
keir@20663 912 eidx = (pfn_to_pdx(epfn) + PDX_GROUP_COUNT - 1) / PDX_GROUP_COUNT;
keir@20663 913 nidx = cidx = pfn_to_pdx(spfn)/PDX_GROUP_COUNT;
keir@20663 914
keir@20663 915 ASSERT( pfn_to_pdx(epfn) <= (DIRECTMAP_SIZE >> PAGE_SHIFT) &&
keir@20663 916 (pfn_to_pdx(epfn) <= FRAMETABLE_SIZE / sizeof(struct page_info)) );
keir@20663 917
keir@20663 918 if ( test_bit(cidx, pdx_group_valid) )
keir@20663 919 cidx = find_next_zero_bit(pdx_group_valid, eidx, cidx);
keir@20663 920
keir@20663 921 if ( cidx >= eidx )
keir@20663 922 return 0;
keir@20663 923
keir@20663 924 while ( cidx < eidx )
keir@20663 925 {
keir@20663 926 nidx = find_next_bit(pdx_group_valid, eidx, cidx);
keir@20663 927 if ( nidx >= eidx )
keir@20663 928 nidx = eidx;
keir@20663 929 setup_frametable_chunk(pdx_to_page(cidx * PDX_GROUP_COUNT ),
keir@20663 930 pdx_to_page(nidx * PDX_GROUP_COUNT),
keir@20663 931 info);
keir@20663 932
keir@20663 933 cidx = find_next_zero_bit(pdx_group_valid, eidx, nidx);
keir@20663 934 }
keir@20663 935
keir@20753 936 memset(mfn_to_page(spfn), 0,
keir@20753 937 (unsigned long)mfn_to_page(epfn) - (unsigned long)mfn_to_page(spfn));
keir@20663 938 return 0;
keir@20663 939 }
keir@20663 940
keir@15081 941 void __init subarch_init_memory(void)
kaf24@3668 942 {
keir@19135 943 unsigned long i, n, v, m2p_start_mfn;
kaf24@3668 944 l3_pgentry_t l3e;
kaf24@3668 945 l2_pgentry_t l2e;
kaf24@3668 946
keir@19135 947 BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
keir@19135 948 BUILD_BUG_ON(RDWR_MPT_VIRT_END & ((1UL << L3_PAGETABLE_SHIFT) - 1));
kaf24@3668 949 /* M2P table is mappable read-only by privileged domains. */
shand@11173 950 for ( v = RDWR_MPT_VIRT_START;
kaf24@3668 951 v != RDWR_MPT_VIRT_END;
keir@19135 952 v += n << PAGE_SHIFT )
kaf24@3668 953 {
keir@19135 954 n = L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES;
mafetter@4629 955 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
kaf24@3668 956 l3_table_offset(v)];
mafetter@4629 957 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
kaf24@3668 958 continue;
keir@19135 959 if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
keir@19135 960 {
keir@19135 961 n = L1_PAGETABLE_ENTRIES;
keir@19135 962 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
keir@19135 963 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
keir@19135 964 continue;
keir@19135 965 m2p_start_mfn = l2e_get_pfn(l2e);
keir@19135 966 }
keir@19135 967 else
keir@19135 968 {
keir@19135 969 m2p_start_mfn = l3e_get_pfn(l3e);
keir@19135 970 }
kaf24@3668 971
keir@19135 972 for ( i = 0; i < n; i++ )
kaf24@3668 973 {
kaf24@8764 974 struct page_info *page = mfn_to_page(m2p_start_mfn + i);
kaf24@9214 975 share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
kaf24@3668 976 }
kaf24@3668 977 }
kfraser@15546 978
kfraser@15546 979 for ( v = RDWR_COMPAT_MPT_VIRT_START;
kfraser@15546 980 v != RDWR_COMPAT_MPT_VIRT_END;
kfraser@15546 981 v += 1 << L2_PAGETABLE_SHIFT )
ack@13295 982 {
kfraser@15546 983 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
kfraser@15546 984 l3_table_offset(v)];
kfraser@15546 985 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
kfraser@15546 986 continue;
kfraser@15546 987 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
kfraser@15546 988 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
kfraser@15546 989 continue;
kfraser@15546 990 m2p_start_mfn = l2e_get_pfn(l2e);
kfraser@15546 991
kfraser@15546 992 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
ack@13295 993 {
kfraser@15546 994 struct page_info *page = mfn_to_page(m2p_start_mfn + i);
kfraser@15546 995 share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
ack@13295 996 }
ack@13295 997 }
keir@19934 998
keir@19934 999 if ( setup_compat_arg_xlat(smp_processor_id(),
keir@20800 1000 cpu_to_node[0]) )
keir@19934 1001 panic("Could not setup argument translation area");
kaf24@3668 1002 }
kaf24@3314 1003
kaf24@9904 1004 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
kaf24@8081 1005 {
kaf24@8081 1006 struct xen_machphys_mfn_list xmml;
kaf24@8081 1007 l3_pgentry_t l3e;
kaf24@8081 1008 l2_pgentry_t l2e;
ack@13295 1009 unsigned long v;
keir@20276 1010 xen_pfn_t mfn, last_mfn;
kaf24@8804 1011 unsigned int i;
kaf24@8081 1012 long rc = 0;
kaf24@8081 1013
kaf24@8081 1014 switch ( op )
kaf24@8081 1015 {
kaf24@8081 1016 case XENMEM_machphys_mfn_list:
kaf24@9054 1017 if ( copy_from_guest(&xmml, arg, 1) )
kaf24@8081 1018 return -EFAULT;
kaf24@8081 1019
keir@19135 1020 BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
keir@19135 1021 BUILD_BUG_ON(RDWR_MPT_VIRT_END & ((1UL << L3_PAGETABLE_SHIFT) - 1));
keir@20276 1022 for ( i = 0, v = RDWR_MPT_VIRT_START, last_mfn = 0;
keir@20276 1023 (i != xmml.max_extents) &&
keir@20276 1024 (v < (unsigned long)(machine_to_phys_mapping + max_page));
keir@19135 1025 i++, v += 1UL << L2_PAGETABLE_SHIFT )
kaf24@8081 1026 {
kaf24@8081 1027 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
kaf24@8081 1028 l3_table_offset(v)];
kaf24@8081 1029 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
keir@20276 1030 mfn = last_mfn;
keir@20276 1031 else if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
keir@19135 1032 {
keir@19135 1033 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
keir@20276 1034 if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
keir@20276 1035 mfn = l2e_get_pfn(l2e);
keir@20276 1036 else
keir@20276 1037 mfn = last_mfn;
keir@19135 1038 }
keir@19135 1039 else
keir@19135 1040 {
keir@19135 1041 mfn = l3e_get_pfn(l3e)
keir@19135 1042 + (l2_table_offset(v) << PAGETABLE_ORDER);
keir@19135 1043 }
keir@20276 1044 ASSERT(mfn);
kaf24@9054 1045 if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
kaf24@8081 1046 return -EFAULT;
keir@20276 1047 last_mfn = mfn;
kaf24@8081 1048 }
kaf24@8081 1049
kaf24@9054 1050 xmml.nr_extents = i;
kaf24@9054 1051 if ( copy_to_guest(arg, &xmml, 1) )
kaf24@8081 1052 return -EFAULT;
kaf24@8081 1053
kaf24@8081 1054 break;
kaf24@8081 1055
kaf24@8081 1056 default:
kaf24@8081 1057 rc = -ENOSYS;
kaf24@8081 1058 break;
kaf24@8081 1059 }
kaf24@8081 1060
kaf24@8081 1061 return rc;
kaf24@8081 1062 }
kaf24@8081 1063
kaf24@3314 1064 long do_stack_switch(unsigned long ss, unsigned long esp)
kaf24@3314 1065 {
ack@13290 1066 fixup_guest_stack_selector(current->domain, ss);
kaf24@4727 1067 current->arch.guest_context.kernel_ss = ss;
kaf24@4727 1068 current->arch.guest_context.kernel_sp = esp;
kaf24@3314 1069 return 0;
kaf24@3314 1070 }
kaf24@3314 1071
kaf24@3799 1072 long do_set_segment_base(unsigned int which, unsigned long base)
kaf24@3799 1073 {
kaf24@5327 1074 struct vcpu *v = current;
kaf24@4536 1075 long ret = 0;
kaf24@4488 1076
kaf24@3799 1077 switch ( which )
kaf24@3799 1078 {
kaf24@3799 1079 case SEGBASE_FS:
kaf24@8869 1080 if ( wrmsr_safe(MSR_FS_BASE, base, base>>32) )
kaf24@4536 1081 ret = -EFAULT;
kaf24@4727 1082 else
kaf24@5327 1083 v->arch.guest_context.fs_base = base;
kaf24@3799 1084 break;
kaf24@3799 1085
kaf24@3799 1086 case SEGBASE_GS_USER:
kaf24@8869 1087 if ( wrmsr_safe(MSR_SHADOW_GS_BASE, base, base>>32) )
kaf24@4536 1088 ret = -EFAULT;
kaf24@4727 1089 else
kaf24@5327 1090 v->arch.guest_context.gs_base_user = base;
kaf24@3799 1091 break;
kaf24@3799 1092
kaf24@3799 1093 case SEGBASE_GS_KERNEL:
kaf24@8869 1094 if ( wrmsr_safe(MSR_GS_BASE, base, base>>32) )
kaf24@4536 1095 ret = -EFAULT;
kaf24@4727 1096 else
kaf24@5327 1097 v->arch.guest_context.gs_base_kernel = base;
kaf24@3799 1098 break;
kaf24@3799 1099
kaf24@4488 1100 case SEGBASE_GS_USER_SEL:
kaf24@4488 1101 __asm__ __volatile__ (
kaf24@4488 1102 " swapgs \n"
kaf24@4488 1103 "1: movl %k0,%%gs \n"
kaf24@4536 1104 " "safe_swapgs" \n"
kaf24@4488 1105 ".section .fixup,\"ax\" \n"
kaf24@4488 1106 "2: xorl %k0,%k0 \n"
kaf24@4488 1107 " jmp 1b \n"
kaf24@4488 1108 ".previous \n"
kaf24@4488 1109 ".section __ex_table,\"a\"\n"
kaf24@4488 1110 " .align 8 \n"
kaf24@4488 1111 " .quad 1b,2b \n"
kaf24@4488 1112 ".previous "
kaf24@4488 1113 : : "r" (base&0xffff) );
kaf24@4488 1114 break;
kaf24@4488 1115
kaf24@3799 1116 default:
kaf24@4536 1117 ret = -EINVAL;
kaf24@4536 1118 break;
kaf24@3799 1119 }
kaf24@3799 1120
kaf24@4536 1121 return ret;
kaf24@3799 1122 }
kaf24@3799 1123
kaf24@3314 1124
kaf24@3314 1125 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
ack@13290 1126 int check_descriptor(const struct domain *dom, struct desc_struct *d)
kaf24@3314 1127 {
kaf24@3754 1128 u32 a = d->a, b = d->b;
kaf24@8980 1129 u16 cs;
keir@16284 1130 unsigned int dpl;
kaf24@3314 1131
kaf24@3314 1132 /* A not-present descriptor will always fault, so is safe. */
kaf24@3314 1133 if ( !(b & _SEGMENT_P) )
kaf24@3314 1134 goto good;
kaf24@3314 1135
kaf24@8990 1136 /* Check and fix up the DPL. */
keir@16284 1137 dpl = (b >> 13) & 3;
keir@16284 1138 __fixup_guest_selector(dom, dpl);
keir@16284 1139 b = (b & ~_SEGMENT_DPL) | (dpl << 13);
kaf24@3314 1140
kaf24@4186 1141 /* All code and data segments are okay. No base/limit checking. */
kaf24@3754 1142 if ( (b & _SEGMENT_S) )
ack@13296 1143 {
keir@17847 1144 if ( is_pv_32bit_domain(dom) )
keir@17847 1145 {
keir@17847 1146 unsigned long base, limit;
keir@17847 1147
keir@17847 1148 if ( b & _SEGMENT_L )
keir@17847 1149 goto bad;
keir@17847 1150
keir@17847 1151 /*
keir@17847 1152 * Older PAE Linux guests use segments which are limited to
keir@17847 1153 * 0xf6800000. Extend these to allow access to the larger read-only
keir@17847 1154 * M2P table available in 32on64 mode.
keir@17847 1155 */
keir@17847 1156 base = (b & (0xff << 24)) | ((b & 0xff) << 16) | (a >> 16);
keir@17847 1157
keir@17847 1158 limit = (b & 0xf0000) | (a & 0xffff);
keir@17847 1159 limit++; /* We add one because limit is inclusive. */
keir@17847 1160
keir@17847 1161 if ( (b & _SEGMENT_G) )
keir@17847 1162 limit <<= 12;
keir@17847 1163
keir@17847 1164 if ( (base == 0) && (limit > HYPERVISOR_COMPAT_VIRT_START(dom)) )
keir@17847 1165 {
keir@17847 1166 a |= 0x0000ffff;
keir@17847 1167 b |= 0x000f0000;
keir@17847 1168 }
keir@17847 1169 }
keir@17847 1170
kfraser@15012 1171 goto good;
ack@13296 1172 }
kaf24@3314 1173
kaf24@3754 1174 /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
kaf24@3754 1175 if ( (b & _SEGMENT_TYPE) == 0x000 )
kaf24@3754 1176 goto good;
kaf24@3314 1177
kaf24@3754 1178 /* Everything but a call gate is discarded here. */
kaf24@3754 1179 if ( (b & _SEGMENT_TYPE) != 0xc00 )
kaf24@3314 1180 goto bad;
kaf24@3314 1181
keir@16284 1182 /* Validate the target code selector. */
kaf24@8980 1183 cs = a >> 16;
ack@13290 1184 if ( !guest_gate_selector_okay(dom, cs) )
kaf24@3754 1185 goto bad;
keir@16284 1186 /*
keir@16284 1187 * Force DPL to zero, causing a GP fault with its error code indicating
keir@16284 1188 * the gate in use, allowing emulation. This is necessary because with
keir@16284 1189 * native guests (kernel in ring 3) call gates cannot be used directly
keir@16284 1190 * to transition from user to kernel mode (and whether a gate is used
keir@16284 1191 * to enter the kernel can only be determined when the gate is being
keir@16284 1192 * used), and with compat guests call gates cannot be used at all as
keir@16284 1193 * there are only 64-bit ones.
keir@16284 1194 * Store the original DPL in the selector's RPL field.
keir@16284 1195 */
keir@16284 1196 b &= ~_SEGMENT_DPL;
keir@16284 1197 cs = (cs & ~3) | dpl;
keir@16284 1198 a = (a & 0xffffU) | (cs << 16);
Ian@8971 1199
kaf24@3754 1200 /* Reserved bits must be zero. */
keir@16284 1201 if ( b & (is_pv_32bit_domain(dom) ? 0xe0 : 0xff) )
kaf24@3754 1202 goto bad;
kaf24@3754 1203
kaf24@3314 1204 good:
keir@16284 1205 d->a = a;
keir@16284 1206 d->b = b;
kaf24@3314 1207 return 1;
kaf24@3314 1208 bad:
kaf24@3314 1209 return 0;
kaf24@3314 1210 }
kaf24@3314 1211
keir@20664 1212 int pagefault_by_memadd(unsigned long addr, struct cpu_user_regs *regs)
keir@20664 1213 {
keir@20664 1214 struct domain *d = current->domain;
keir@20664 1215
keir@20664 1216 if (guest_mode(regs) &&
keir@20664 1217 is_pv_32bit_domain(d) &&
keir@20664 1218 ((addr >= HYPERVISOR_COMPAT_VIRT_START(d)) &&
keir@20664 1219 (addr < MACH2PHYS_COMPAT_VIRT_END)) )
keir@20664 1220 return 1;
keir@20664 1221 return 0;
keir@20664 1222 }
keir@20664 1223
keir@20664 1224 int handle_memadd_fault(unsigned long addr, struct cpu_user_regs *regs)
keir@20664 1225 {
keir@20664 1226 struct domain *d = current->domain;
keir@20664 1227 l4_pgentry_t *pl4e = NULL;
keir@20664 1228 l4_pgentry_t l4e;
keir@20664 1229 l3_pgentry_t *pl3e = NULL;
keir@20664 1230 l3_pgentry_t l3e;
keir@20664 1231 l2_pgentry_t *pl2e = NULL;
keir@20664 1232 l2_pgentry_t l2e, idle_l2e;
keir@20664 1233 unsigned long mfn, idle_index;
keir@20664 1234 int ret = 0;
keir@20664 1235
keir@20664 1236 if (!is_pv_32on64_domain(d))
keir@20664 1237 return 0;
keir@20664 1238
keir@20664 1239 if ((addr < HYPERVISOR_COMPAT_VIRT_START(d)) ||
keir@20664 1240 (addr > MACH2PHYS_COMPAT_VIRT_END) )
keir@20664 1241 return 0;
keir@20664 1242
keir@20664 1243 mfn = (read_cr3()) >> PAGE_SHIFT;
keir@20664 1244
keir@20664 1245 pl4e = map_domain_page(mfn);
keir@20664 1246
keir@20664 1247 l4e = pl4e[addr];
keir@20664 1248
keir@20664 1249 if (!(l4e_get_flags(l4e) & _PAGE_PRESENT))
keir@20664 1250 goto unmap;
keir@20664 1251
keir@20664 1252 mfn = l4e_get_pfn(l4e);
keir@20664 1253 /* We don't need get page type here since it is current CR3 */
keir@20664 1254 pl3e = map_domain_page(mfn);
keir@20664 1255
keir@20664 1256 l3e = pl3e[3];
keir@20664 1257
keir@20664 1258 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
keir@20664 1259 goto unmap;
keir@20664 1260
keir@20664 1261 mfn = l3e_get_pfn(l3e);
keir@20664 1262 pl2e = map_domain_page(mfn);
keir@20664 1263
keir@20664 1264 l2e = pl2e[l2_table_offset(addr)];
keir@20664 1265
keir@20664 1266 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT))
keir@20664 1267 goto unmap;
keir@20664 1268
keir@20664 1269 idle_index = (l2_table_offset(addr) -
keir@20664 1270 COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d))/
keir@20664 1271 sizeof(l2_pgentry_t);
keir@20664 1272 idle_l2e = compat_idle_pg_table_l2[idle_index];
keir@20664 1273 if (!(l2e_get_flags(idle_l2e) & _PAGE_PRESENT))
keir@20664 1274 goto unmap;
keir@20664 1275
keir@20664 1276 memcpy(&pl2e[l2_table_offset(addr)],
keir@20664 1277 &compat_idle_pg_table_l2[idle_index],
keir@20664 1278 sizeof(l2_pgentry_t));
keir@20664 1279
keir@20664 1280 ret = EXCRET_fault_fixed;
keir@20664 1281
keir@20664 1282 unmap:
keir@20664 1283 if ( pl4e )
keir@20664 1284 unmap_domain_page(pl4e);
keir@20664 1285 if ( pl3e )
keir@20664 1286 unmap_domain_page(pl3e);
keir@20664 1287 if ( pl2e )
keir@20664 1288 unmap_domain_page(pl2e);
keir@20664 1289
keir@20664 1290 return ret;
keir@20664 1291 }
keir@20664 1292
keir@17874 1293 void domain_set_alloc_bitsize(struct domain *d)
keir@17874 1294 {
keir@17874 1295 if ( !is_pv_32on64_domain(d) ||
keir@18074 1296 (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
keir@18074 1297 d->arch.physaddr_bitsize > 0 )
keir@17874 1298 return;
keir@17874 1299 d->arch.physaddr_bitsize =
keir@17874 1300 /* 2^n entries can be contained in guest's p2m mapping space */
keir@17874 1301 fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1
keir@17874 1302 /* 2^n pages -> 2^(n+PAGE_SHIFT) bits */
keir@17874 1303 + PAGE_SHIFT;
keir@17874 1304 }
keir@17874 1305
keir@14135 1306 unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits)
keir@14135 1307 {
keir@17874 1308 if ( (d == NULL) || (d->arch.physaddr_bitsize == 0) )
keir@14135 1309 return bits;
keir@14135 1310 return min(d->arch.physaddr_bitsize, bits);
keir@14135 1311 }
keir@14135 1312
keir@20666 1313 int transfer_pages_to_heap(struct mem_hotadd_info *info)
keir@20666 1314 {
keir@20666 1315 unsigned long i;
keir@20666 1316 struct page_info *pg;
keir@20666 1317
keir@20666 1318 /*
keir@20666 1319 * Mark the allocated page before put free pages to buddy allocator
keir@20666 1320 * to avoid merge in free_heap_pages
keir@20666 1321 */
keir@20666 1322 for (i = info->spfn; i < info->cur; i++)
keir@20666 1323 {
keir@20666 1324 pg = mfn_to_page(i);
keir@20666 1325 pg->count_info = PGC_state_inuse;
keir@20666 1326 }
keir@20666 1327
keir@20666 1328 init_domheap_pages(pfn_to_paddr(info->cur), pfn_to_paddr(info->epfn));
keir@20666 1329
keir@20666 1330 return 0;
keir@20666 1331 }
keir@20666 1332
keir@20666 1333 int mem_hotadd_check(unsigned long spfn, unsigned long epfn)
keir@20666 1334 {
keir@20943 1335 unsigned long s, e, length, sidx, eidx;
keir@20754 1336
keir@20943 1337 if ( (spfn >= epfn) )
keir@20943 1338 return 0;
keir@20943 1339
keir@20943 1340 if (pfn_to_pdx(epfn) > (FRAMETABLE_SIZE / sizeof(*frame_table)))
keir@20752 1341 return 0;
keir@20752 1342
keir@20752 1343 if ( (spfn | epfn) & ((1UL << PAGETABLE_ORDER) - 1) )
keir@20752 1344 return 0;
keir@20752 1345
keir@20752 1346 if ( (spfn | epfn) & pfn_hole_mask )
keir@20752 1347 return 0;
keir@20752 1348
keir@20943 1349 /* Make sure the new range is not present now */
keir@20943 1350 sidx = ((pfn_to_pdx(spfn) + PDX_GROUP_COUNT - 1) & ~(PDX_GROUP_COUNT - 1))
keir@20943 1351 / PDX_GROUP_COUNT;
keir@20943 1352 eidx = (pfn_to_pdx(epfn - 1) & ~(PDX_GROUP_COUNT - 1)) / PDX_GROUP_COUNT;
keir@20943 1353 if (sidx >= eidx)
keir@20943 1354 return 0;
keir@20943 1355
keir@20943 1356 s = find_next_zero_bit(pdx_group_valid, eidx, sidx);
keir@20943 1357 if ( s > eidx )
keir@20943 1358 return 0;
keir@20943 1359 e = find_next_bit(pdx_group_valid, eidx, s);
keir@20943 1360 if ( e < eidx )
keir@20943 1361 return 0;
keir@20943 1362
keir@20754 1363 /* Caculate at most required m2p/compat m2p/frametable pages */
keir@20754 1364 s = (spfn & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1));
keir@20754 1365 e = (epfn + (1UL << (L2_PAGETABLE_SHIFT - 3)) - 1) &
keir@20754 1366 ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1);
keir@20754 1367
keir@20754 1368 length = (e - s) * sizeof(unsigned long);
keir@20754 1369
keir@20754 1370 s = (spfn & ~((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1));
keir@20754 1371 e = (epfn + (1UL << (L2_PAGETABLE_SHIFT - 2)) - 1) &
keir@20754 1372 ~((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1);
keir@20754 1373
keir@20754 1374 e = min_t(unsigned long, e,
keir@20754 1375 (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2);
keir@20754 1376
keir@20754 1377 if ( e > s )
keir@20754 1378 length += (e -s) * sizeof(unsigned int);
keir@20754 1379
keir@20754 1380 s = pfn_to_pdx(spfn) & ~(PDX_GROUP_COUNT - 1);
keir@20754 1381 e = ( pfn_to_pdx(epfn) + (PDX_GROUP_COUNT - 1) ) & ~(PDX_GROUP_COUNT - 1);
keir@20754 1382
keir@20754 1383 length += (e - s) * sizeof(struct page_info);
keir@20754 1384
keir@20754 1385 if ((length >> PAGE_SHIFT) > (epfn - spfn))
keir@20754 1386 return 0;
keir@20754 1387
keir@20666 1388 return 1;
keir@20666 1389 }
keir@20666 1390
keir@20752 1391 /*
keir@20752 1392 * A bit paranoid for memory allocation failure issue since
keir@20752 1393 * it may be reason for memory add
keir@20752 1394 */
keir@20666 1395 int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm)
keir@20666 1396 {
keir@20666 1397 struct mem_hotadd_info info;
keir@20666 1398 int ret, node;
keir@20666 1399 unsigned long old_max = max_page, old_total = total_pages;
keir@20752 1400 unsigned long old_node_start, old_node_span, orig_online;
keir@20666 1401 unsigned long i;
keir@20666 1402
keir@20752 1403 dprintk(XENLOG_INFO, "memory_add %lx ~ %lx with pxm %x\n", spfn, epfn, pxm);
keir@20666 1404
keir@20752 1405 if ( !mem_hotadd_check(spfn, epfn) )
keir@20666 1406 return -EINVAL;
keir@20666 1407
keir@20666 1408 if ( (node = setup_node(pxm)) == -1 )
keir@20666 1409 return -EINVAL;
keir@20666 1410
keir@20666 1411 if ( !valid_numa_range(spfn << PAGE_SHIFT, epfn << PAGE_SHIFT, node) )
keir@20666 1412 {
keir@20666 1413 dprintk(XENLOG_WARNING, "spfn %lx ~ epfn %lx pxm %x node %x"
keir@20666 1414 "is not numa valid", spfn, epfn, pxm, node);
keir@20666 1415 return -EINVAL;
keir@20666 1416 }
keir@20666 1417
keir@20752 1418 ret = map_pages_to_xen((unsigned long)mfn_to_virt(spfn), spfn,
keir@20752 1419 epfn - spfn, PAGE_HYPERVISOR);
keir@20752 1420 if ( ret )
keir@20752 1421 return ret;
keir@20666 1422
keir@20752 1423 old_node_start = NODE_DATA(node)->node_start_pfn;
keir@20752 1424 old_node_span = NODE_DATA(node)->node_spanned_pages;
keir@20752 1425 orig_online = node_online(node);
keir@20752 1426
keir@20752 1427 if ( !orig_online )
keir@20666 1428 {
keir@20666 1429 dprintk(XENLOG_WARNING, "node %x pxm %x is not online\n",node, pxm);
keir@20666 1430 NODE_DATA(node)->node_id = node;
keir@20666 1431 NODE_DATA(node)->node_start_pfn = spfn;
keir@20666 1432 NODE_DATA(node)->node_spanned_pages =
keir@20666 1433 epfn - node_start_pfn(node);
keir@20666 1434 node_set_online(node);
keir@20666 1435 }else
keir@20666 1436 {
keir@20666 1437 if (NODE_DATA(node)->node_start_pfn > spfn)
keir@20666 1438 NODE_DATA(node)->node_start_pfn = spfn;
keir@20666 1439 if (node_end_pfn(node) < epfn)
keir@20666 1440 NODE_DATA(node)->node_spanned_pages = epfn - node_start_pfn(node);
keir@20666 1441 }
keir@20666 1442
keir@20666 1443 ret = -EINVAL;
keir@20666 1444 info.spfn = spfn;
keir@20666 1445 info.epfn = epfn;
keir@20666 1446 info.cur = spfn;
keir@20666 1447
keir@20666 1448 ret = extend_frame_table(&info);
keir@20666 1449 if (ret)
keir@20666 1450 goto destroy_frametable;
keir@20752 1451
keir@20666 1452 /* Set max_page as setup_m2p_table will use it*/
keir@20943 1453 if (max_page < epfn)
keir@20943 1454 {
keir@20943 1455 max_page = epfn;
keir@20943 1456 max_pdx = pfn_to_pdx(max_page - 1) + 1;
keir@20943 1457 }
keir@20666 1458 total_pages += epfn - spfn;
keir@20666 1459
keir@20666 1460 set_pdx_range(spfn, epfn);
keir@20666 1461 ret = setup_m2p_table(&info);
keir@20666 1462
keir@20666 1463 if ( ret )
keir@20666 1464 goto destroy_m2p;
keir@20666 1465
keir@20943 1466 for ( i = spfn; i < epfn; i++ )
keir@20752 1467 if ( iommu_map_page(dom0, i, i) )
keir@20752 1468 break;
keir@20752 1469
keir@20752 1470 if ( i != epfn )
keir@20752 1471 goto destroy_iommu;
keir@20666 1472
keir@20666 1473 /* We can't revert any more */
keir@20666 1474 transfer_pages_to_heap(&info);
keir@20666 1475
keir@20666 1476 share_hotadd_m2p_table(&info);
keir@20666 1477
keir@20666 1478 return 0;
keir@20666 1479
keir@20752 1480 destroy_iommu:
keir@20752 1481 while (i-- > old_max)
keir@20752 1482 iommu_unmap_page(dom0, i);
keir@20752 1483
keir@20666 1484 destroy_m2p:
keir@20666 1485 destroy_m2p_mapping(&info);
keir@20752 1486 max_page = old_max;
keir@20752 1487 total_pages = old_total;
keir@20752 1488 max_pdx = pfn_to_pdx(max_page - 1) + 1;
keir@20666 1489 destroy_frametable:
keir@20666 1490 cleanup_frame_table(&info);
keir@20666 1491 destroy_xen_mappings((unsigned long)mfn_to_virt(spfn),
keir@20666 1492 (unsigned long)mfn_to_virt(epfn));
keir@20752 1493
keir@20752 1494 if ( !orig_online )
keir@20752 1495 node_set_offline(node);
keir@20752 1496 NODE_DATA(node)->node_start_pfn = old_node_start;
keir@20752 1497 NODE_DATA(node)->node_spanned_pages = old_node_span;
keir@20752 1498
keir@20752 1499 destroy_xen_mappings((unsigned long)mfn_to_virt(spfn),
keir@20752 1500 (unsigned long)mfn_to_virt(epfn));
keir@20666 1501 return ret;
keir@20666 1502 }
keir@20666 1503
ack@13297 1504 #include "compat/mm.c"
ack@13297 1505
kaf24@3952 1506 /*
kaf24@3952 1507 * Local variables:
kaf24@3952 1508 * mode: C
kaf24@3952 1509 * c-set-style: "BSD"
kaf24@3952 1510 * c-basic-offset: 4
kaf24@3952 1511 * tab-width: 4
kaf24@3952 1512 * indent-tabs-mode: nil
kaf24@4026 1513 * End:
kaf24@3952 1514 */