debuggers.hg
annotate tools/libxc/xc_domain_restore.c @ 20983:a948403c8f99
Remus: increase failover timeout from 500ms to 1s
500ms is aggressive enough to trigger split-brain under fairly
ordinary workloads, particularly for HVM. The long-term fix is to
integrate with a real HA monitor like linux HA.
Signed-off-by: Brendan Cully <brendan@cs.ubc.ca>
500ms is aggressive enough to trigger split-brain under fairly
ordinary workloads, particularly for HVM. The long-term fix is to
integrate with a real HA monitor like linux HA.
Signed-off-by: Brendan Cully <brendan@cs.ubc.ca>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Fri Feb 12 09:23:10 2010 +0000 (2010-02-12) |
parents | fbe8f32fa257 |
children | 779c0ef9682c |
rev | line source |
---|---|
mjw@1661 | 1 /****************************************************************************** |
Tim@14782 | 2 * xc_domain_restore.c |
kaf24@9698 | 3 * |
Tim@14782 | 4 * Restore the state of a guest session. |
kaf24@9698 | 5 * |
mjw@1661 | 6 * Copyright (c) 2003, K A Fraser. |
Tim@14782 | 7 * Copyright (c) 2006, Intel Corporation |
Tim@14782 | 8 * Copyright (c) 2007, XenSource Inc. |
Tim@14782 | 9 * |
Tim@14782 | 10 * This program is free software; you can redistribute it and/or modify it |
Tim@14782 | 11 * under the terms and conditions of the GNU General Public License, |
Tim@14782 | 12 * version 2, as published by the Free Software Foundation. |
Tim@14782 | 13 * |
Tim@14782 | 14 * This program is distributed in the hope it will be useful, but WITHOUT |
Tim@14782 | 15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
Tim@14782 | 16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
Tim@14782 | 17 * more details. |
Tim@14782 | 18 * |
Tim@14782 | 19 * You should have received a copy of the GNU General Public License along with |
Tim@14782 | 20 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple |
Tim@14782 | 21 * Place - Suite 330, Boston, MA 02111-1307 USA. |
Tim@14782 | 22 * |
mjw@1661 | 23 */ |
mjw@1661 | 24 |
cl349@6427 | 25 #include <stdlib.h> |
cl349@6427 | 26 #include <unistd.h> |
smh22@7740 | 27 |
cl349@6427 | 28 #include "xg_private.h" |
smh22@7740 | 29 #include "xg_save_restore.h" |
keir@14138 | 30 #include "xc_dom.h" |
smh22@7740 | 31 |
Tim@14782 | 32 #include <xen/hvm/ioreq.h> |
Tim@14782 | 33 #include <xen/hvm/params.h> |
Tim@14782 | 34 |
keir@20587 | 35 struct restore_ctx { |
keir@20587 | 36 unsigned long max_mfn; /* max mfn of the current host machine */ |
keir@20587 | 37 unsigned long hvirt_start; /* virtual starting address of the hypervisor */ |
keir@20587 | 38 unsigned int pt_levels; /* #levels of page tables used by the current guest */ |
keir@20587 | 39 unsigned long nr_pfns; /* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */ |
keir@20587 | 40 xen_pfn_t *live_p2m; /* Live mapping of the table mapping each PFN to its current MFN. */ |
keir@20587 | 41 xen_pfn_t *p2m; /* A table mapping each PFN to its new MFN. */ |
keir@20587 | 42 unsigned no_superpage_mem; /* If have enough continuous memory for super page allocation */ |
keir@20589 | 43 struct domain_info_context dinfo; |
keir@20587 | 44 }; |
steven@14732 | 45 |
smh22@7740 | 46 /* |
keir@19677 | 47 ** |
keir@19677 | 48 ** |
keir@19677 | 49 */ |
keir@19677 | 50 #define SUPERPAGE_PFN_SHIFT 9 |
keir@19677 | 51 #define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT) |
keir@19677 | 52 |
keir@20164 | 53 /* |
keir@20164 | 54 * Setting bit 31 force to allocate super page even not all pfns come out, |
keir@20164 | 55 * bit 30 indicate that not is in a super page tracking. |
keir@20164 | 56 */ |
keir@20164 | 57 #define FORCE_SP_SHIFT 31 |
keir@20164 | 58 #define FORCE_SP_MASK (1UL << FORCE_SP_SHIFT) |
keir@19677 | 59 |
keir@20164 | 60 #define INVALID_SUPER_PAGE ((1UL << 30) + 1) |
keir@20164 | 61 #define SUPER_PAGE_START(pfn) (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 ) |
keir@20164 | 62 #define SUPER_PAGE_TRACKING(pfn) ( (pfn) != INVALID_SUPER_PAGE ) |
keir@20164 | 63 #define SUPER_PAGE_DONE(pfn) ( SUPER_PAGE_START(pfn) ) |
keir@19677 | 64 |
keir@20591 | 65 static int super_page_populated(struct restore_ctx *ctx, unsigned long pfn) |
keir@20164 | 66 { |
keir@20164 | 67 int i; |
keir@20164 | 68 pfn &= ~(SUPERPAGE_NR_PFNS - 1); |
keir@20164 | 69 for ( i = pfn; i < pfn + SUPERPAGE_NR_PFNS; i++ ) |
keir@20164 | 70 { |
keir@20587 | 71 if ( ctx->p2m[i] != INVALID_P2M_ENTRY ) |
keir@20164 | 72 return 1; |
keir@20164 | 73 } |
keir@20164 | 74 return 0; |
keir@20164 | 75 } |
keir@19677 | 76 |
keir@20164 | 77 /* |
keir@20164 | 78 * Break a 2M page and move contents of [extent start, next_pfn-1] to |
keir@20164 | 79 * some new allocated 4K pages |
keir@20164 | 80 */ |
keir@20164 | 81 static int break_super_page(int xc_handle, |
keir@20164 | 82 uint32_t dom, |
keir@20591 | 83 struct restore_ctx *ctx, |
keir@20164 | 84 xen_pfn_t next_pfn) |
keir@20164 | 85 { |
keir@20164 | 86 xen_pfn_t *page_array, start_pfn, mfn; |
keir@20164 | 87 uint8_t *ram_base, *save_buf; |
keir@20164 | 88 unsigned long i; |
keir@20164 | 89 int tot_pfns, rc = 0; |
keir@20164 | 90 |
keir@20164 | 91 tot_pfns = (next_pfn & (SUPERPAGE_NR_PFNS - 1)); |
keir@20164 | 92 |
keir@20164 | 93 start_pfn = next_pfn & ~(SUPERPAGE_NR_PFNS - 1); |
keir@20164 | 94 for ( i = start_pfn; i < start_pfn + SUPERPAGE_NR_PFNS; i++ ) |
keir@20164 | 95 { |
keir@20164 | 96 /* check the 2M page are populated */ |
keir@20587 | 97 if ( ctx->p2m[i] == INVALID_P2M_ENTRY ) { |
keir@20164 | 98 DPRINTF("Previous super page was populated wrongly!\n"); |
keir@19677 | 99 return 1; |
keir@19677 | 100 } |
keir@20164 | 101 } |
keir@20164 | 102 |
keir@20164 | 103 page_array = (xen_pfn_t*)malloc(tot_pfns * sizeof(xen_pfn_t)); |
keir@20164 | 104 save_buf = (uint8_t*)malloc(tot_pfns * PAGE_SIZE); |
keir@20164 | 105 |
keir@20164 | 106 if ( !page_array || !save_buf ) |
keir@20164 | 107 { |
keir@20164 | 108 ERROR("alloc page_array failed\n"); |
keir@20164 | 109 errno = ENOMEM; |
keir@20164 | 110 rc = 1; |
keir@20164 | 111 goto out; |
keir@20164 | 112 } |
keir@20164 | 113 |
keir@20164 | 114 /* save previous super page contents */ |
keir@20164 | 115 for ( i = 0; i < tot_pfns; i++ ) |
keir@20164 | 116 { |
keir@20164 | 117 /* only support HVM, as the mfn of the 2M page is missing */ |
keir@20164 | 118 page_array[i] = start_pfn + i; |
keir@20164 | 119 } |
keir@20164 | 120 |
keir@20837 | 121 ram_base = xc_map_foreign_pages(xc_handle, dom, PROT_READ, |
keir@20164 | 122 page_array, tot_pfns); |
keir@20164 | 123 |
keir@20164 | 124 if ( ram_base == NULL ) |
keir@20164 | 125 { |
keir@20164 | 126 ERROR("map batch failed\n"); |
keir@20164 | 127 rc = 1; |
keir@20164 | 128 goto out; |
keir@20164 | 129 } |
keir@20164 | 130 |
keir@20164 | 131 memcpy(save_buf, ram_base, tot_pfns * PAGE_SIZE); |
keir@20164 | 132 munmap(ram_base, tot_pfns * PAGE_SIZE); |
keir@20164 | 133 |
keir@20164 | 134 /* free the super page */ |
keir@20164 | 135 if ( xc_domain_memory_decrease_reservation(xc_handle, dom, 1, |
keir@20164 | 136 SUPERPAGE_PFN_SHIFT, &start_pfn) != 0 ) |
keir@20164 | 137 { |
keir@20164 | 138 ERROR("free 2M page failure @ 0x%ld.\n", next_pfn); |
keir@20164 | 139 rc = 1; |
keir@20164 | 140 goto out; |
keir@20164 | 141 } |
keir@20164 | 142 |
keir@20164 | 143 start_pfn = next_pfn & ~(SUPERPAGE_NR_PFNS - 1); |
keir@20164 | 144 for ( i = start_pfn; i < start_pfn + SUPERPAGE_NR_PFNS; i++ ) |
keir@20164 | 145 { |
keir@20587 | 146 ctx->p2m[i] = INVALID_P2M_ENTRY; |
keir@20164 | 147 } |
keir@20164 | 148 |
keir@20164 | 149 for ( i = start_pfn; i < start_pfn + tot_pfns; i++ ) |
keir@20164 | 150 { |
keir@20164 | 151 mfn = i; |
keir@20164 | 152 if (xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, |
keir@20164 | 153 0, &mfn) != 0) |
keir@19677 | 154 { |
keir@20164 | 155 ERROR("Failed to allocate physical memory.!\n"); |
keir@20164 | 156 errno = ENOMEM; |
keir@20164 | 157 rc = 1; |
keir@20164 | 158 goto out; |
keir@20164 | 159 } |
keir@20587 | 160 ctx->p2m[i] = mfn; |
keir@20164 | 161 } |
keir@20164 | 162 |
keir@20164 | 163 /* restore contents */ |
keir@20164 | 164 for ( i = 0; i < tot_pfns; i++ ) |
keir@20164 | 165 { |
keir@20164 | 166 page_array[i] = start_pfn + i; |
keir@20164 | 167 } |
keir@20164 | 168 |
keir@20837 | 169 ram_base = xc_map_foreign_pages(xc_handle, dom, PROT_WRITE, |
keir@20164 | 170 page_array, tot_pfns); |
keir@20164 | 171 if ( ram_base == NULL ) |
keir@20164 | 172 { |
keir@20164 | 173 ERROR("map batch failed\n"); |
keir@20164 | 174 rc = 1; |
keir@20164 | 175 goto out; |
keir@20164 | 176 } |
keir@20164 | 177 |
keir@20164 | 178 memcpy(ram_base, save_buf, tot_pfns * PAGE_SIZE); |
keir@20164 | 179 munmap(ram_base, tot_pfns * PAGE_SIZE); |
keir@20164 | 180 |
keir@20164 | 181 out: |
keir@20164 | 182 free(page_array); |
keir@20164 | 183 free(save_buf); |
keir@20164 | 184 return rc; |
keir@20164 | 185 } |
keir@20164 | 186 |
keir@20164 | 187 |
keir@20164 | 188 /* |
keir@20164 | 189 * According to pfn list allocate pages: one 2M page or series of 4K pages. |
keir@20164 | 190 * Also optimistically allocate a 2M page even when not all pages in the 2M |
keir@20164 | 191 * extent come out, and fix it up in next batch: |
keir@20164 | 192 * If new pages fit the missing one in the 2M extent, do nothing; Else take |
keir@20164 | 193 * place of the original 2M page by some 4K pages. |
keir@20164 | 194 */ |
keir@20164 | 195 static int allocate_mfn_list(int xc_handle, |
keir@20164 | 196 uint32_t dom, |
keir@20591 | 197 struct restore_ctx *ctx, |
keir@20164 | 198 unsigned long nr_extents, |
keir@20164 | 199 xen_pfn_t *batch_buf, |
keir@20164 | 200 xen_pfn_t *next_pfn, |
keir@20164 | 201 int superpages) |
keir@20164 | 202 { |
keir@20164 | 203 unsigned int i; |
keir@20164 | 204 unsigned long mfn, pfn, sp_pfn; |
keir@20164 | 205 |
keir@20164 | 206 /*Check if force super page, then clear it */ |
keir@20164 | 207 unsigned force_super_page = !!(*next_pfn & FORCE_SP_MASK); |
keir@20164 | 208 *next_pfn &= ~FORCE_SP_MASK; |
keir@20164 | 209 |
keir@20164 | 210 sp_pfn = *next_pfn; |
keir@20164 | 211 |
keir@20164 | 212 if ( !superpages || |
keir@20587 | 213 ctx->no_superpage_mem || |
keir@20164 | 214 !SUPER_PAGE_TRACKING(sp_pfn) ) |
keir@20164 | 215 goto normal_page; |
keir@20164 | 216 |
keir@20164 | 217 if ( !batch_buf ) |
keir@20164 | 218 { |
keir@20164 | 219 /* Break previous 2M page, if 512 pages split across a batch boundary */ |
keir@20164 | 220 if ( SUPER_PAGE_TRACKING(sp_pfn) && |
keir@20164 | 221 !SUPER_PAGE_DONE(sp_pfn)) |
keir@20164 | 222 { |
keir@20164 | 223 /* break previously allocated super page*/ |
keir@20591 | 224 if ( break_super_page(xc_handle, dom, ctx, sp_pfn) != 0 ) |
keir@20164 | 225 { |
keir@20164 | 226 ERROR("Break previous super page fail!\n"); |
keir@20164 | 227 return 1; |
keir@20164 | 228 } |
keir@20164 | 229 } |
keir@20164 | 230 |
keir@20164 | 231 /* follwing pages fit the order in 2M extent */ |
keir@20164 | 232 return 0; |
keir@20164 | 233 } |
keir@20164 | 234 |
keir@20164 | 235 /* |
keir@20164 | 236 * We try to allocate a 2M page only when: |
keir@20164 | 237 * user require this(superpages), |
keir@20164 | 238 * AND have enough memory, |
keir@20164 | 239 * AND is in the tracking, |
keir@20164 | 240 * AND tracked all pages in 2M extent, OR partial 2M extent for speculation |
keir@20164 | 241 * AND any page in 2M extent are not populated |
keir@20164 | 242 */ |
keir@20164 | 243 if ( !SUPER_PAGE_DONE(sp_pfn) && !force_super_page ) |
keir@20164 | 244 goto normal_page; |
keir@20164 | 245 |
keir@20164 | 246 pfn = batch_buf[0] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; |
keir@20591 | 247 if ( super_page_populated(ctx, pfn) ) |
keir@20164 | 248 goto normal_page; |
keir@20164 | 249 |
keir@20164 | 250 pfn &= ~(SUPERPAGE_NR_PFNS - 1); |
keir@20164 | 251 mfn = pfn; |
keir@20164 | 252 |
keir@20164 | 253 if ( xc_domain_memory_populate_physmap(xc_handle, dom, 1, |
keir@20164 | 254 SUPERPAGE_PFN_SHIFT, 0, &mfn) == 0) |
keir@20164 | 255 { |
keir@20164 | 256 for ( i = pfn; i < pfn + SUPERPAGE_NR_PFNS; i++, mfn++ ) |
keir@20164 | 257 { |
keir@20587 | 258 ctx->p2m[i] = mfn; |
keir@20164 | 259 } |
keir@20164 | 260 return 0; |
keir@20164 | 261 } |
keir@20164 | 262 DPRINTF("No 2M page available for pfn 0x%lx, fall back to 4K page.\n", |
keir@20164 | 263 pfn); |
keir@20587 | 264 ctx->no_superpage_mem = 1; |
keir@20164 | 265 |
keir@20164 | 266 normal_page: |
keir@20164 | 267 if ( !batch_buf ) |
keir@20164 | 268 return 0; |
keir@20164 | 269 |
keir@20164 | 270 /* End the tracking, if want a 2M page but end by 4K pages, */ |
keir@20164 | 271 *next_pfn = INVALID_SUPER_PAGE; |
keir@20164 | 272 |
keir@20164 | 273 for ( i = 0; i < nr_extents; i++ ) |
keir@20164 | 274 { |
keir@20164 | 275 unsigned long pagetype = batch_buf[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; |
keir@20164 | 276 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) |
keir@20164 | 277 continue; |
keir@20164 | 278 |
keir@20164 | 279 pfn = mfn = batch_buf[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; |
keir@20587 | 280 if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY ) |
keir@20164 | 281 { |
keir@20164 | 282 if (xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, |
keir@20164 | 283 0, &mfn) != 0) |
keir@20164 | 284 { |
keir@20164 | 285 ERROR("Failed to allocate physical memory.! pfn=0x%lx, mfn=0x%lx.\n", |
keir@20164 | 286 pfn, mfn); |
keir@20164 | 287 errno = ENOMEM; |
keir@20164 | 288 return 1; |
keir@20164 | 289 } |
keir@20587 | 290 ctx->p2m[pfn] = mfn; |
keir@19677 | 291 } |
keir@19677 | 292 } |
keir@20164 | 293 |
keir@19677 | 294 return 0; |
keir@19677 | 295 } |
keir@19677 | 296 |
keir@19677 | 297 static int allocate_physmem(int xc_handle, uint32_t dom, |
keir@20591 | 298 struct restore_ctx *ctx, |
keir@19677 | 299 unsigned long *region_pfn_type, int region_size, |
keir@19677 | 300 unsigned int hvm, xen_pfn_t *region_mfn, int superpages) |
keir@19677 | 301 { |
keir@20164 | 302 int i; |
keir@19677 | 303 unsigned long pfn; |
keir@19677 | 304 unsigned long pagetype; |
keir@19677 | 305 |
keir@20164 | 306 /* Next expected pfn in order to track a possible 2M page */ |
keir@20164 | 307 static unsigned long required_pfn = INVALID_SUPER_PAGE; |
keir@20164 | 308 |
keir@20164 | 309 /* Buffer of pfn list for 2M page, or series of 4K pages */ |
keir@20164 | 310 xen_pfn_t *batch_buf; |
keir@20164 | 311 unsigned int batch_buf_len; |
keir@20589 | 312 struct domain_info_context *dinfo = &ctx->dinfo; |
keir@20164 | 313 |
keir@20164 | 314 if ( !superpages ) |
keir@20164 | 315 { |
keir@20164 | 316 batch_buf = ®ion_pfn_type[0]; |
keir@20164 | 317 batch_buf_len = region_size; |
keir@20164 | 318 goto alloc_page; |
keir@20164 | 319 } |
keir@20164 | 320 |
keir@20164 | 321 batch_buf = NULL; |
keir@20164 | 322 batch_buf_len = 0; |
keir@20164 | 323 /* This loop tracks the possible 2M page */ |
keir@20164 | 324 for (i = 0; i < region_size; i++) |
keir@20164 | 325 { |
keir@20164 | 326 pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; |
keir@20164 | 327 pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; |
keir@20164 | 328 |
keir@20164 | 329 if (pagetype == XEN_DOMCTL_PFINFO_XTAB) |
keir@20164 | 330 { |
keir@20164 | 331 /* Do not start collecting pfns until get a valid pfn */ |
keir@20164 | 332 if ( batch_buf_len != 0 ) |
keir@20164 | 333 batch_buf_len++; |
keir@20164 | 334 continue; |
keir@20164 | 335 } |
keir@20164 | 336 |
keir@20164 | 337 if ( SUPER_PAGE_START(pfn) ) |
keir@20164 | 338 { |
keir@20164 | 339 /* Start of a 2M extent, populate previsous buf */ |
keir@20591 | 340 if ( allocate_mfn_list(xc_handle, dom, ctx, |
keir@20164 | 341 batch_buf_len, batch_buf, |
keir@20164 | 342 &required_pfn, superpages) != 0 ) |
keir@20164 | 343 { |
keir@20164 | 344 errno = ENOMEM; |
keir@20164 | 345 return 1; |
keir@20164 | 346 } |
keir@20164 | 347 |
keir@20164 | 348 /* start new tracking for 2M page */ |
keir@20164 | 349 batch_buf = ®ion_pfn_type[i]; |
keir@20164 | 350 batch_buf_len = 1; |
keir@20164 | 351 required_pfn = pfn + 1; |
keir@20164 | 352 } |
keir@20164 | 353 else if ( pfn == required_pfn ) |
keir@20164 | 354 { |
keir@20164 | 355 /* this page fit the 2M extent in order */ |
keir@20164 | 356 batch_buf_len++; |
keir@20164 | 357 required_pfn++; |
keir@20164 | 358 } |
keir@20164 | 359 else if ( SUPER_PAGE_TRACKING(required_pfn) ) |
keir@20164 | 360 { |
keir@20164 | 361 /* break of a 2M extent, populate previous buf */ |
keir@20591 | 362 if ( allocate_mfn_list(xc_handle, dom, ctx, |
keir@20164 | 363 batch_buf_len, batch_buf, |
keir@20164 | 364 &required_pfn, superpages) != 0 ) |
keir@20164 | 365 { |
keir@20164 | 366 errno = ENOMEM; |
keir@20164 | 367 return 1; |
keir@20164 | 368 } |
keir@20164 | 369 /* start new tracking for a series of 4K pages */ |
keir@20164 | 370 batch_buf = ®ion_pfn_type[i]; |
keir@20164 | 371 batch_buf_len = 1; |
keir@20164 | 372 required_pfn = INVALID_SUPER_PAGE; |
keir@20164 | 373 } |
keir@20164 | 374 else |
keir@20164 | 375 { |
keir@20164 | 376 /* this page is 4K */ |
keir@20164 | 377 if ( !batch_buf ) |
keir@20164 | 378 batch_buf = ®ion_pfn_type[i]; |
keir@20164 | 379 batch_buf_len++; |
keir@20164 | 380 } |
keir@20164 | 381 } |
keir@20164 | 382 |
keir@20164 | 383 /* |
keir@20164 | 384 * populate rest batch_buf in the end. |
keir@20164 | 385 * In a speculative way, we allocate a 2M page even when not see all the |
keir@20164 | 386 * pages in order(set bit 31). If not require super page support, |
keir@20164 | 387 * we can skip the tracking loop and come here directly. |
keir@20164 | 388 * Speculative allocation can't be used for PV guest, as we have no mfn to |
keir@20164 | 389 * map previous 2M mem range if need break it. |
keir@20164 | 390 */ |
keir@20164 | 391 if ( SUPER_PAGE_TRACKING(required_pfn) && |
keir@20164 | 392 !SUPER_PAGE_DONE(required_pfn) ) |
keir@20164 | 393 { |
keir@20164 | 394 if (hvm) |
keir@20164 | 395 required_pfn |= FORCE_SP_MASK; |
keir@20164 | 396 else |
keir@20164 | 397 required_pfn = INVALID_SUPER_PAGE; |
keir@20164 | 398 } |
keir@20164 | 399 |
keir@20164 | 400 alloc_page: |
keir@20164 | 401 if ( batch_buf ) |
keir@20164 | 402 { |
keir@20591 | 403 if ( allocate_mfn_list(xc_handle, dom, ctx, |
keir@20164 | 404 batch_buf_len, batch_buf, |
keir@20164 | 405 &required_pfn, |
keir@20164 | 406 superpages) != 0 ) |
keir@20164 | 407 { |
keir@20164 | 408 errno = ENOMEM; |
keir@20164 | 409 return 1; |
keir@20164 | 410 } |
keir@20164 | 411 } |
keir@20164 | 412 |
keir@19677 | 413 for (i = 0; i < region_size; i++) |
keir@19677 | 414 { |
keir@19677 | 415 pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; |
keir@19677 | 416 pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK; |
keir@19677 | 417 |
keir@20585 | 418 if ( pfn > dinfo->p2m_size ) |
keir@19677 | 419 { |
keir@19677 | 420 ERROR("pfn out of range"); |
keir@19677 | 421 return 1; |
keir@19677 | 422 } |
keir@19677 | 423 if (pagetype == XEN_DOMCTL_PFINFO_XTAB) |
keir@19677 | 424 { |
keir@19677 | 425 region_mfn[i] = ~0UL; |
keir@19677 | 426 } |
keir@19677 | 427 else |
keir@19677 | 428 { |
keir@20587 | 429 if (ctx->p2m[pfn] == INVALID_P2M_ENTRY) |
keir@19677 | 430 { |
keir@20164 | 431 DPRINTF("Warning: pfn 0x%lx are not allocated!\n", pfn); |
keir@20164 | 432 /*XXX:allocate this page?*/ |
keir@19677 | 433 } |
keir@19677 | 434 |
keir@19677 | 435 /* setup region_mfn[] for batch map. |
keir@19677 | 436 * For HVM guests, this interface takes PFNs, not MFNs */ |
keir@20587 | 437 region_mfn[i] = hvm ? pfn : ctx->p2m[pfn]; |
keir@19677 | 438 } |
keir@19677 | 439 } |
keir@19677 | 440 return 0; |
keir@19677 | 441 } |
keir@19677 | 442 |
keir@19677 | 443 |
keir@20453 | 444 /* set when a consistent image is available */ |
keir@20453 | 445 static int completed = 0; |
keir@20453 | 446 |
keir@20983 | 447 #define HEARTBEAT_MS 1000 |
keir@20453 | 448 |
keir@20453 | 449 #ifndef __MINIOS__ |
keir@20453 | 450 static ssize_t read_exact_timed(int fd, void* buf, size_t size) |
keir@20453 | 451 { |
keir@20453 | 452 size_t offset = 0; |
keir@20453 | 453 ssize_t len; |
keir@20453 | 454 struct timeval tv; |
keir@20453 | 455 fd_set rfds; |
keir@20453 | 456 |
keir@20453 | 457 while ( offset < size ) |
keir@20453 | 458 { |
keir@20453 | 459 if ( completed ) { |
keir@20453 | 460 /* expect a heartbeat every HEARBEAT_MS ms maximum */ |
keir@20983 | 461 tv.tv_sec = HEARTBEAT_MS / 1000; |
keir@20983 | 462 tv.tv_usec = (HEARTBEAT_MS % 1000) * 1000; |
keir@20453 | 463 |
keir@20453 | 464 FD_ZERO(&rfds); |
keir@20453 | 465 FD_SET(fd, &rfds); |
keir@20453 | 466 len = select(fd + 1, &rfds, NULL, NULL, &tv); |
keir@20453 | 467 if ( !FD_ISSET(fd, &rfds) ) { |
keir@20453 | 468 fprintf(stderr, "read_exact_timed failed (select returned %zd)\n", len); |
keir@20453 | 469 return -1; |
keir@20453 | 470 } |
keir@20453 | 471 } |
keir@20453 | 472 |
keir@20453 | 473 len = read(fd, buf + offset, size - offset); |
keir@20453 | 474 if ( (len == -1) && ((errno == EINTR) || (errno == EAGAIN)) ) |
keir@20453 | 475 continue; |
keir@20453 | 476 if ( len <= 0 ) |
keir@20453 | 477 return -1; |
keir@20453 | 478 offset += len; |
keir@20453 | 479 } |
keir@20453 | 480 |
keir@20453 | 481 return 0; |
keir@20453 | 482 } |
keir@20453 | 483 |
keir@20453 | 484 #define read_exact read_exact_timed |
keir@20453 | 485 |
keir@20453 | 486 #else |
keir@20453 | 487 #define read_exact_timed read_exact |
keir@20453 | 488 #endif |
keir@19677 | 489 /* |
kaf24@9698 | 490 ** In the state file (or during transfer), all page-table pages are |
kaf24@9698 | 491 ** converted into a 'canonical' form where references to actual mfns |
kaf24@9698 | 492 ** are replaced with references to the corresponding pfns. |
kaf24@9698 | 493 ** This function inverts that operation, replacing the pfn values with |
kaf24@9698 | 494 ** the (now known) appropriate mfn values. |
smh22@7740 | 495 */ |
keir@20591 | 496 static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, struct restore_ctx *ctx, |
keir@20837 | 497 void *page, int superpages) |
kaf24@9698 | 498 { |
kaf24@9698 | 499 int i, pte_last; |
kaf24@9698 | 500 unsigned long pfn; |
kaf24@9698 | 501 uint64_t pte; |
keir@20589 | 502 struct domain_info_context *dinfo = &ctx->dinfo; |
smh22@7740 | 503 |
keir@20587 | 504 pte_last = PAGE_SIZE / ((ctx->pt_levels == 2)? 4 : 8); |
smh22@7740 | 505 |
keir@14809 | 506 for ( i = 0; i < pte_last; i++ ) |
keir@14809 | 507 { |
keir@20587 | 508 if ( ctx->pt_levels == 2 ) |
kaf24@9698 | 509 pte = ((uint32_t *)page)[i]; |
kaf24@9698 | 510 else |
kaf24@9698 | 511 pte = ((uint64_t *)page)[i]; |
steven@13424 | 512 |
steven@13424 | 513 /* XXX SMH: below needs fixing for PROT_NONE etc */ |
keir@14809 | 514 if ( !(pte & _PAGE_PRESENT) ) |
steven@13424 | 515 continue; |
steven@13424 | 516 |
kfraser@14005 | 517 pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86; |
kfraser@14819 | 518 |
keir@19677 | 519 /* Allocate mfn if necessary */ |
keir@20587 | 520 if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY ) |
keir@19677 | 521 { |
keir@20164 | 522 unsigned long force_pfn = superpages ? FORCE_SP_MASK : pfn; |
keir@20591 | 523 if (allocate_mfn_list(xc_handle, dom, ctx, |
keir@20164 | 524 1, &pfn, &force_pfn, superpages) != 0) |
keir@19677 | 525 return 0; |
keir@19677 | 526 } |
kfraser@14005 | 527 pte &= ~MADDR_MASK_X86; |
keir@20587 | 528 pte |= (uint64_t)ctx->p2m[pfn] << PAGE_SHIFT; |
smh22@7886 | 529 |
keir@20587 | 530 if ( ctx->pt_levels == 2 ) |
steven@13424 | 531 ((uint32_t *)page)[i] = (uint32_t)pte; |
steven@13424 | 532 else |
steven@13424 | 533 ((uint64_t *)page)[i] = (uint64_t)pte; |
smh22@7740 | 534 } |
kaf24@9698 | 535 |
kaf24@9698 | 536 return 1; |
smh22@7740 | 537 } |
smh22@7740 | 538 |
steven@14732 | 539 |
Tim@14782 | 540 /* Load the p2m frame list, plus potential extended info chunk */ |
keir@20591 | 541 static xen_pfn_t *load_p2m_frame_list(struct restore_ctx *ctx, |
keir@16257 | 542 int io_fd, int *pae_extended_cr3, int *ext_vcpucontext) |
Tim@14782 | 543 { |
Tim@14782 | 544 xen_pfn_t *p2m_frame_list; |
keir@17918 | 545 vcpu_guest_context_any_t ctxt; |
Tim@15955 | 546 xen_pfn_t p2m_fl_zero; |
keir@20589 | 547 struct domain_info_context *dinfo = &ctx->dinfo; |
Tim@14782 | 548 |
Tim@14782 | 549 /* Read first entry of P2M list, or extended-info signature (~0UL). */ |
keir@16408 | 550 if ( read_exact(io_fd, &p2m_fl_zero, sizeof(long)) ) |
keir@14809 | 551 { |
keir@14809 | 552 ERROR("read extended-info signature failed"); |
keir@14809 | 553 return NULL; |
keir@14809 | 554 } |
Tim@14782 | 555 |
Tim@15955 | 556 if ( p2m_fl_zero == ~0UL ) |
keir@14809 | 557 { |
Tim@14782 | 558 uint32_t tot_bytes; |
Tim@14782 | 559 |
Tim@14782 | 560 /* Next 4 bytes: total size of following extended info. */ |
keir@16408 | 561 if ( read_exact(io_fd, &tot_bytes, sizeof(tot_bytes)) ) |
keir@14809 | 562 { |
Tim@14782 | 563 ERROR("read extended-info size failed"); |
Tim@14782 | 564 return NULL; |
Tim@14782 | 565 } |
Tim@14782 | 566 |
keir@14809 | 567 while ( tot_bytes ) |
keir@14809 | 568 { |
Tim@14782 | 569 uint32_t chunk_bytes; |
Tim@14782 | 570 char chunk_sig[4]; |
Tim@14782 | 571 |
Tim@14782 | 572 /* 4-character chunk signature + 4-byte remaining chunk size. */ |
keir@16408 | 573 if ( read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) || |
keir@16408 | 574 read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes)) || |
keir@16257 | 575 (tot_bytes < (chunk_bytes + 8)) ) |
keir@14809 | 576 { |
Tim@14782 | 577 ERROR("read extended-info chunk signature failed"); |
Tim@14782 | 578 return NULL; |
Tim@14782 | 579 } |
Tim@14782 | 580 tot_bytes -= 8; |
keir@14809 | 581 |
Tim@14782 | 582 /* VCPU context structure? */ |
keir@14809 | 583 if ( !strncmp(chunk_sig, "vcpu", 4) ) |
keir@14809 | 584 { |
Tim@15955 | 585 /* Pick a guest word-size and PT depth from the ctxt size */ |
Tim@15955 | 586 if ( chunk_bytes == sizeof (ctxt.x32) ) |
Tim@15955 | 587 { |
keir@20585 | 588 dinfo->guest_width = 4; |
keir@20587 | 589 if ( ctx->pt_levels > 2 ) |
keir@20587 | 590 ctx->pt_levels = 3; |
Tim@15955 | 591 } |
Tim@15955 | 592 else if ( chunk_bytes == sizeof (ctxt.x64) ) |
Tim@15955 | 593 { |
keir@20585 | 594 dinfo->guest_width = 8; |
keir@20587 | 595 ctx->pt_levels = 4; |
Tim@15955 | 596 } |
Tim@15955 | 597 else |
Tim@15955 | 598 { |
Tim@15955 | 599 ERROR("bad extended-info context size %d", chunk_bytes); |
Tim@15955 | 600 return NULL; |
Tim@15955 | 601 } |
Tim@15955 | 602 |
keir@16408 | 603 if ( read_exact(io_fd, &ctxt, chunk_bytes) ) |
keir@14809 | 604 { |
Tim@14782 | 605 ERROR("read extended-info vcpu context failed"); |
Tim@14782 | 606 return NULL; |
Tim@14782 | 607 } |
Tim@15955 | 608 tot_bytes -= chunk_bytes; |
Tim@15955 | 609 chunk_bytes = 0; |
Tim@15955 | 610 |
Tim@15955 | 611 if ( GET_FIELD(&ctxt, vm_assist) |
Tim@15955 | 612 & (1UL << VMASST_TYPE_pae_extended_cr3) ) |
Tim@14782 | 613 *pae_extended_cr3 = 1; |
Tim@14782 | 614 } |
keir@16257 | 615 else if ( !strncmp(chunk_sig, "extv", 4) ) |
keir@16257 | 616 { |
keir@16257 | 617 *ext_vcpucontext = 1; |
keir@16257 | 618 } |
Tim@14782 | 619 |
Tim@14782 | 620 /* Any remaining bytes of this chunk: read and discard. */ |
keir@14809 | 621 while ( chunk_bytes ) |
keir@14809 | 622 { |
Tim@15955 | 623 unsigned long sz = MIN(chunk_bytes, sizeof(xen_pfn_t)); |
keir@16408 | 624 if ( read_exact(io_fd, &p2m_fl_zero, sz) ) |
keir@14809 | 625 { |
Tim@14782 | 626 ERROR("read-and-discard extended-info chunk bytes failed"); |
Tim@14782 | 627 return NULL; |
Tim@14782 | 628 } |
Tim@14782 | 629 chunk_bytes -= sz; |
Tim@14782 | 630 tot_bytes -= sz; |
Tim@14782 | 631 } |
Tim@14782 | 632 } |
keir@14809 | 633 |
Tim@14782 | 634 /* Now read the real first entry of P2M list. */ |
keir@16408 | 635 if ( read_exact(io_fd, &p2m_fl_zero, sizeof(xen_pfn_t)) ) |
keir@14809 | 636 { |
Tim@14782 | 637 ERROR("read first entry of p2m_frame_list failed"); |
Tim@14782 | 638 return NULL; |
Tim@14782 | 639 } |
Tim@14782 | 640 } |
keir@14809 | 641 |
Tim@15955 | 642 /* Now that we know the guest's word-size, can safely allocate |
Tim@15955 | 643 * the p2m frame list */ |
keir@17074 | 644 if ( (p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) == NULL ) |
Tim@15955 | 645 { |
Tim@15955 | 646 ERROR("Couldn't allocate p2m_frame_list array"); |
Tim@15955 | 647 return NULL; |
Tim@15955 | 648 } |
Tim@15955 | 649 |
Tim@15955 | 650 /* First entry has already been read. */ |
Tim@15955 | 651 p2m_frame_list[0] = p2m_fl_zero; |
keir@16408 | 652 if ( read_exact(io_fd, &p2m_frame_list[1], |
keir@16408 | 653 (P2M_FL_ENTRIES - 1) * sizeof(xen_pfn_t)) ) |
keir@14809 | 654 { |
keir@14809 | 655 ERROR("read p2m_frame_list failed"); |
keir@14809 | 656 return NULL; |
Tim@14782 | 657 } |
Tim@14782 | 658 |
Tim@14782 | 659 return p2m_frame_list; |
Tim@14782 | 660 } |
Tim@14782 | 661 |
keir@20452 | 662 typedef struct { |
keir@20457 | 663 int ishvm; |
keir@20457 | 664 union { |
keir@20457 | 665 struct tailbuf_pv { |
keir@20457 | 666 unsigned int pfncount; |
keir@20457 | 667 unsigned long* pfntab; |
keir@20457 | 668 unsigned int vcpucount; |
keir@20457 | 669 unsigned char* vcpubuf; |
keir@20457 | 670 unsigned char shared_info_page[PAGE_SIZE]; |
keir@20457 | 671 } pv; |
keir@20457 | 672 struct tailbuf_hvm { |
keir@20457 | 673 uint64_t magicpfns[3]; |
keir@20457 | 674 uint32_t hvmbufsize, reclen; |
keir@20457 | 675 uint8_t* hvmbuf; |
keir@20457 | 676 struct { |
keir@20457 | 677 uint32_t magic; |
keir@20457 | 678 uint32_t version; |
keir@20457 | 679 uint64_t len; |
keir@20457 | 680 } qemuhdr; |
keir@20457 | 681 uint32_t qemubufsize; |
keir@20457 | 682 uint8_t* qemubuf; |
keir@20457 | 683 } hvm; |
keir@20457 | 684 } u; |
keir@20452 | 685 } tailbuf_t; |
keir@20452 | 686 |
keir@20457 | 687 /* read stream until EOF, growing buffer as necssary */ |
keir@20457 | 688 static int compat_buffer_qemu(int fd, struct tailbuf_hvm *buf) |
keir@20457 | 689 { |
keir@20457 | 690 uint8_t *qbuf, *tmp; |
keir@20457 | 691 int blen = 0, dlen = 0; |
keir@20457 | 692 int rc; |
keir@20457 | 693 |
keir@20457 | 694 /* currently save records tend to be about 7K */ |
keir@20457 | 695 blen = 8192; |
keir@20457 | 696 if ( !(qbuf = malloc(blen)) ) { |
keir@20457 | 697 ERROR("Error allocating QEMU buffer"); |
keir@20457 | 698 return -1; |
keir@20457 | 699 } |
keir@20457 | 700 |
keir@20457 | 701 while( (rc = read(fd, qbuf+dlen, blen-dlen)) > 0 ) { |
keir@20457 | 702 DPRINTF("Read %d bytes of QEMU data\n", rc); |
keir@20457 | 703 dlen += rc; |
keir@20457 | 704 |
keir@20457 | 705 if (dlen == blen) { |
keir@20457 | 706 DPRINTF("%d-byte QEMU buffer full, reallocating...\n", dlen); |
keir@20457 | 707 blen += 4096; |
keir@20457 | 708 tmp = realloc(qbuf, blen); |
keir@20457 | 709 if ( !tmp ) { |
keir@20457 | 710 ERROR("Error growing QEMU buffer to %d bytes", blen); |
keir@20457 | 711 free(qbuf); |
keir@20457 | 712 return -1; |
keir@20457 | 713 } |
keir@20457 | 714 qbuf = tmp; |
keir@20457 | 715 } |
keir@20457 | 716 } |
keir@20457 | 717 |
keir@20457 | 718 if ( rc < 0 ) { |
keir@20457 | 719 ERROR("Error reading QEMU data"); |
keir@20457 | 720 free(qbuf); |
keir@20457 | 721 return -1; |
keir@20457 | 722 } |
keir@20457 | 723 |
keir@20457 | 724 if ( memcmp(qbuf, "QEVM", 4) ) { |
keir@20457 | 725 ERROR("Invalid QEMU magic: 0x%08x", *(unsigned long*)qbuf); |
keir@20457 | 726 free(qbuf); |
keir@20457 | 727 return -1; |
keir@20457 | 728 } |
keir@20457 | 729 |
keir@20457 | 730 buf->qemubuf = qbuf; |
keir@20457 | 731 buf->qemubufsize = dlen; |
keir@20457 | 732 |
keir@20457 | 733 return 0; |
keir@20457 | 734 } |
keir@20457 | 735 |
keir@20457 | 736 static int buffer_qemu(int fd, struct tailbuf_hvm *buf) |
keir@20457 | 737 { |
keir@20457 | 738 uint32_t qlen; |
keir@20457 | 739 uint8_t *tmp; |
keir@20457 | 740 |
keir@20457 | 741 if ( read_exact(fd, &qlen, sizeof(qlen)) ) { |
keir@20457 | 742 ERROR("Error reading QEMU header length"); |
keir@20457 | 743 return -1; |
keir@20457 | 744 } |
keir@20457 | 745 |
keir@20457 | 746 if ( qlen > buf->qemubufsize ) { |
keir@20457 | 747 if ( buf->qemubuf) { |
keir@20457 | 748 tmp = realloc(buf->qemubuf, qlen); |
keir@20457 | 749 if ( tmp ) |
keir@20457 | 750 buf->qemubuf = tmp; |
keir@20457 | 751 else { |
keir@20457 | 752 ERROR("Error reallocating QEMU state buffer"); |
keir@20457 | 753 return -1; |
keir@20457 | 754 } |
keir@20457 | 755 } else { |
keir@20457 | 756 buf->qemubuf = malloc(qlen); |
keir@20457 | 757 if ( !buf->qemubuf ) { |
keir@20457 | 758 ERROR("Error allocating QEMU state buffer"); |
keir@20457 | 759 return -1; |
keir@20457 | 760 } |
keir@20457 | 761 } |
keir@20457 | 762 } |
keir@20457 | 763 buf->qemubufsize = qlen; |
keir@20457 | 764 |
keir@20457 | 765 if ( read_exact(fd, buf->qemubuf, buf->qemubufsize) ) { |
keir@20457 | 766 ERROR("Error reading QEMU state"); |
keir@20457 | 767 return -1; |
keir@20457 | 768 } |
keir@20457 | 769 |
keir@20457 | 770 return 0; |
keir@20457 | 771 } |
keir@20457 | 772 |
keir@20457 | 773 static int dump_qemu(uint32_t dom, struct tailbuf_hvm *buf) |
keir@20457 | 774 { |
keir@20457 | 775 int saved_errno; |
keir@20457 | 776 char path[256]; |
keir@20457 | 777 FILE *fp; |
keir@20457 | 778 |
keir@20457 | 779 sprintf(path, "/var/lib/xen/qemu-save.%u", dom); |
keir@20457 | 780 fp = fopen(path, "wb"); |
keir@20457 | 781 if ( !fp ) |
keir@20457 | 782 return -1; |
keir@20457 | 783 |
keir@20457 | 784 DPRINTF("Writing %d bytes of QEMU data\n", buf->qemubufsize); |
keir@20457 | 785 if ( fwrite(buf->qemubuf, 1, buf->qemubufsize, fp) != buf->qemubufsize) { |
keir@20457 | 786 saved_errno = errno; |
keir@20457 | 787 fclose(fp); |
keir@20457 | 788 errno = saved_errno; |
keir@20457 | 789 return -1; |
keir@20457 | 790 } |
keir@20457 | 791 |
keir@20457 | 792 fclose(fp); |
keir@20457 | 793 |
keir@20457 | 794 return 0; |
keir@20457 | 795 } |
keir@20457 | 796 |
keir@20591 | 797 static int buffer_tail_hvm(struct restore_ctx *ctx, struct tailbuf_hvm *buf, int fd, |
keir@20457 | 798 unsigned int max_vcpu_id, uint64_t vcpumap, |
keir@20457 | 799 int ext_vcpucontext) |
keir@20457 | 800 { |
keir@20457 | 801 uint8_t *tmp; |
keir@20457 | 802 unsigned char qemusig[21]; |
keir@20457 | 803 |
keir@20457 | 804 if ( read_exact(fd, buf->magicpfns, sizeof(buf->magicpfns)) ) { |
keir@20457 | 805 ERROR("Error reading magic PFNs"); |
keir@20457 | 806 return -1; |
keir@20457 | 807 } |
keir@20457 | 808 |
keir@20457 | 809 if ( read_exact(fd, &buf->reclen, sizeof(buf->reclen)) ) { |
keir@20457 | 810 ERROR("Error reading HVM params size"); |
keir@20457 | 811 return -1; |
keir@20457 | 812 } |
keir@20457 | 813 |
keir@20457 | 814 if ( buf->reclen > buf->hvmbufsize ) { |
keir@20457 | 815 if ( buf->hvmbuf) { |
keir@20457 | 816 tmp = realloc(buf->hvmbuf, buf->reclen); |
keir@20457 | 817 if ( tmp ) { |
keir@20457 | 818 buf->hvmbuf = tmp; |
keir@20457 | 819 buf->hvmbufsize = buf->reclen; |
keir@20457 | 820 } else { |
keir@20457 | 821 ERROR("Error reallocating HVM param buffer"); |
keir@20457 | 822 return -1; |
keir@20457 | 823 } |
keir@20457 | 824 } else { |
keir@20457 | 825 buf->hvmbuf = malloc(buf->reclen); |
keir@20457 | 826 if ( !buf->hvmbuf ) { |
keir@20457 | 827 ERROR("Error allocating HVM param buffer"); |
keir@20457 | 828 return -1; |
keir@20457 | 829 } |
keir@20457 | 830 buf->hvmbufsize = buf->reclen; |
keir@20457 | 831 } |
keir@20457 | 832 } |
keir@20457 | 833 |
keir@20457 | 834 if ( read_exact(fd, buf->hvmbuf, buf->reclen) ) { |
keir@20457 | 835 ERROR("Error reading HVM params"); |
keir@20457 | 836 return -1; |
keir@20457 | 837 } |
keir@20457 | 838 |
keir@20457 | 839 if ( read_exact(fd, qemusig, sizeof(qemusig)) ) { |
keir@20457 | 840 ERROR("Error reading QEMU signature"); |
keir@20457 | 841 return -1; |
keir@20457 | 842 } |
keir@20457 | 843 |
keir@20457 | 844 /* The normal live-migration QEMU record has no length information. |
keir@20457 | 845 * Short of reimplementing the QEMU parser, we're forced to just read |
keir@20457 | 846 * until EOF. Remus gets around this by sending a different signature |
keir@20457 | 847 * which includes a length prefix */ |
keir@20457 | 848 if ( !memcmp(qemusig, "QemuDeviceModelRecord", sizeof(qemusig)) ) |
keir@20457 | 849 return compat_buffer_qemu(fd, buf); |
keir@20457 | 850 else if ( !memcmp(qemusig, "RemusDeviceModelState", sizeof(qemusig)) ) |
keir@20457 | 851 return buffer_qemu(fd, buf); |
keir@20457 | 852 |
keir@20457 | 853 qemusig[20] = '\0'; |
keir@20457 | 854 ERROR("Invalid QEMU signature: %s", qemusig); |
keir@20457 | 855 return -1; |
keir@20457 | 856 } |
keir@20457 | 857 |
keir@20591 | 858 static int buffer_tail_pv(struct restore_ctx *ctx, struct tailbuf_pv *buf, int fd, |
keir@20457 | 859 unsigned int max_vcpu_id, uint64_t vcpumap, |
keir@20457 | 860 int ext_vcpucontext) |
keir@20452 | 861 { |
keir@20452 | 862 unsigned int i; |
keir@20452 | 863 size_t pfnlen, vcpulen; |
keir@20589 | 864 struct domain_info_context *dinfo = &ctx->dinfo; |
keir@20452 | 865 |
keir@20452 | 866 /* TODO: handle changing pfntab and vcpu counts */ |
keir@20452 | 867 /* PFN tab */ |
keir@20452 | 868 if ( read_exact(fd, &buf->pfncount, sizeof(buf->pfncount)) || |
keir@20452 | 869 (buf->pfncount > (1U << 28)) ) /* up to 1TB of address space */ |
keir@20452 | 870 { |
keir@20452 | 871 ERROR("Error when reading pfn count"); |
keir@20452 | 872 return -1; |
keir@20452 | 873 } |
keir@20452 | 874 pfnlen = sizeof(unsigned long) * buf->pfncount; |
keir@20452 | 875 if ( !(buf->pfntab) ) { |
keir@20452 | 876 if ( !(buf->pfntab = malloc(pfnlen)) ) { |
keir@20452 | 877 ERROR("Error allocating PFN tail buffer"); |
keir@20452 | 878 return -1; |
keir@20452 | 879 } |
keir@20452 | 880 } |
keir@20452 | 881 // DPRINTF("Reading PFN tab: %d bytes\n", pfnlen); |
keir@20452 | 882 if ( read_exact(fd, buf->pfntab, pfnlen) ) { |
keir@20452 | 883 ERROR("Error when reading pfntab"); |
keir@20452 | 884 goto free_pfntab; |
keir@20452 | 885 } |
keir@20452 | 886 |
keir@20452 | 887 /* VCPU contexts */ |
keir@20452 | 888 buf->vcpucount = 0; |
keir@20452 | 889 for (i = 0; i <= max_vcpu_id; i++) { |
keir@20452 | 890 // DPRINTF("vcpumap: %llx, cpu: %d, bit: %llu\n", vcpumap, i, (vcpumap % (1ULL << i))); |
keir@20452 | 891 if ( (!(vcpumap & (1ULL << i))) ) |
keir@20452 | 892 continue; |
keir@20452 | 893 buf->vcpucount++; |
keir@20452 | 894 } |
keir@20452 | 895 // DPRINTF("VCPU count: %d\n", buf->vcpucount); |
keir@20585 | 896 vcpulen = ((dinfo->guest_width == 8) ? sizeof(vcpu_guest_context_x86_64_t) |
keir@20452 | 897 : sizeof(vcpu_guest_context_x86_32_t)) * buf->vcpucount; |
keir@20452 | 898 if ( ext_vcpucontext ) |
keir@20452 | 899 vcpulen += 128 * buf->vcpucount; |
keir@20452 | 900 |
keir@20452 | 901 if ( !(buf->vcpubuf) ) { |
keir@20452 | 902 if ( !(buf->vcpubuf = malloc(vcpulen)) ) { |
keir@20452 | 903 ERROR("Error allocating VCPU ctxt tail buffer"); |
keir@20452 | 904 goto free_pfntab; |
keir@20452 | 905 } |
keir@20452 | 906 } |
keir@20452 | 907 // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen); |
keir@20452 | 908 if ( read_exact(fd, buf->vcpubuf, vcpulen) ) { |
keir@20452 | 909 ERROR("Error when reading ctxt"); |
keir@20452 | 910 goto free_vcpus; |
keir@20452 | 911 } |
keir@20452 | 912 |
keir@20452 | 913 /* load shared_info_page */ |
keir@20452 | 914 // DPRINTF("Reading shared info: %lu bytes\n", PAGE_SIZE); |
keir@20452 | 915 if ( read_exact(fd, buf->shared_info_page, PAGE_SIZE) ) { |
keir@20452 | 916 ERROR("Error when reading shared info page"); |
keir@20452 | 917 goto free_vcpus; |
keir@20452 | 918 } |
keir@20452 | 919 |
keir@20452 | 920 return 0; |
keir@20452 | 921 |
keir@20452 | 922 free_vcpus: |
keir@20452 | 923 if (buf->vcpubuf) { |
keir@20452 | 924 free (buf->vcpubuf); |
keir@20452 | 925 buf->vcpubuf = NULL; |
keir@20452 | 926 } |
keir@20452 | 927 free_pfntab: |
keir@20452 | 928 if (buf->pfntab) { |
keir@20452 | 929 free (buf->pfntab); |
keir@20452 | 930 buf->pfntab = NULL; |
keir@20452 | 931 } |
keir@20452 | 932 |
keir@20452 | 933 return -1; |
keir@20452 | 934 } |
keir@20452 | 935 |
keir@20591 | 936 static int buffer_tail(struct restore_ctx *ctx, tailbuf_t *buf, int fd, unsigned int max_vcpu_id, |
keir@20457 | 937 uint64_t vcpumap, int ext_vcpucontext) |
keir@20457 | 938 { |
keir@20457 | 939 if ( buf->ishvm ) |
keir@20591 | 940 return buffer_tail_hvm(ctx, &buf->u.hvm, fd, max_vcpu_id, vcpumap, |
keir@20457 | 941 ext_vcpucontext); |
keir@20457 | 942 else |
keir@20591 | 943 return buffer_tail_pv(ctx, &buf->u.pv, fd, max_vcpu_id, vcpumap, |
keir@20457 | 944 ext_vcpucontext); |
keir@20457 | 945 } |
keir@20457 | 946 |
keir@20457 | 947 static void tailbuf_free_hvm(struct tailbuf_hvm *buf) |
keir@20452 | 948 { |
keir@20457 | 949 if ( buf->hvmbuf ) { |
keir@20457 | 950 free(buf->hvmbuf); |
keir@20457 | 951 buf->hvmbuf = NULL; |
keir@20457 | 952 } |
keir@20457 | 953 if ( buf->qemubuf ) { |
keir@20457 | 954 free(buf->qemubuf); |
keir@20457 | 955 buf->qemubuf = NULL; |
keir@20457 | 956 } |
keir@20457 | 957 } |
keir@20457 | 958 |
keir@20457 | 959 static void tailbuf_free_pv(struct tailbuf_pv *buf) |
keir@20457 | 960 { |
keir@20457 | 961 if ( buf->vcpubuf ) { |
keir@20452 | 962 free(buf->vcpubuf); |
keir@20452 | 963 buf->vcpubuf = NULL; |
keir@20452 | 964 } |
keir@20457 | 965 if ( buf->pfntab ) { |
keir@20452 | 966 free(buf->pfntab); |
keir@20452 | 967 buf->pfntab = NULL; |
keir@20452 | 968 } |
keir@20452 | 969 } |
keir@20452 | 970 |
keir@20457 | 971 static void tailbuf_free(tailbuf_t *buf) |
keir@20457 | 972 { |
keir@20457 | 973 if ( buf->ishvm ) |
keir@20457 | 974 tailbuf_free_hvm(&buf->u.hvm); |
keir@20457 | 975 else |
keir@20457 | 976 tailbuf_free_pv(&buf->u.pv); |
keir@20457 | 977 } |
keir@20457 | 978 |
keir@20452 | 979 typedef struct { |
keir@20452 | 980 void* pages; |
keir@20452 | 981 /* pages is of length nr_physpages, pfn_types is of length nr_pages */ |
keir@20452 | 982 unsigned int nr_physpages, nr_pages; |
keir@20452 | 983 |
keir@20452 | 984 /* Types of the pfns in the current region */ |
keir@20452 | 985 unsigned long* pfn_types; |
keir@20452 | 986 |
keir@20452 | 987 int verify; |
keir@20452 | 988 |
keir@20452 | 989 int new_ctxt_format; |
keir@20452 | 990 int max_vcpu_id; |
keir@20452 | 991 uint64_t vcpumap; |
keir@20452 | 992 uint64_t identpt; |
keir@20452 | 993 uint64_t vm86_tss; |
keir@20452 | 994 } pagebuf_t; |
keir@20452 | 995 |
keir@20452 | 996 static int pagebuf_init(pagebuf_t* buf) |
keir@20452 | 997 { |
keir@20452 | 998 memset(buf, 0, sizeof(*buf)); |
keir@20452 | 999 return 0; |
keir@20452 | 1000 } |
keir@20452 | 1001 |
keir@20452 | 1002 static void pagebuf_free(pagebuf_t* buf) |
keir@20452 | 1003 { |
keir@20452 | 1004 if (buf->pages) { |
keir@20452 | 1005 free(buf->pages); |
keir@20452 | 1006 buf->pages = NULL; |
keir@20452 | 1007 } |
keir@20452 | 1008 if(buf->pfn_types) { |
keir@20452 | 1009 free(buf->pfn_types); |
keir@20452 | 1010 buf->pfn_types = NULL; |
keir@20452 | 1011 } |
keir@20452 | 1012 } |
keir@20452 | 1013 |
keir@20452 | 1014 static int pagebuf_get_one(pagebuf_t* buf, int fd, int xch, uint32_t dom) |
keir@20452 | 1015 { |
keir@20452 | 1016 int count, countpages, oldcount, i; |
keir@20452 | 1017 void* ptmp; |
keir@20452 | 1018 |
keir@20452 | 1019 if ( read_exact(fd, &count, sizeof(count)) ) |
keir@20452 | 1020 { |
keir@20452 | 1021 ERROR("Error when reading batch size"); |
keir@20452 | 1022 return -1; |
keir@20452 | 1023 } |
keir@20452 | 1024 |
keir@20452 | 1025 // DPRINTF("reading batch of %d pages\n", count); |
keir@20452 | 1026 |
keir@20452 | 1027 if (!count) { |
keir@20452 | 1028 // DPRINTF("Last batch read\n"); |
keir@20452 | 1029 return 0; |
keir@20452 | 1030 } else if (count == -1) { |
keir@20452 | 1031 DPRINTF("Entering page verify mode\n"); |
keir@20452 | 1032 buf->verify = 1; |
keir@20452 | 1033 return pagebuf_get_one(buf, fd, xch, dom); |
keir@20452 | 1034 } else if (count == -2) { |
keir@20452 | 1035 buf->new_ctxt_format = 1; |
keir@20452 | 1036 if ( read_exact(fd, &buf->max_vcpu_id, sizeof(buf->max_vcpu_id)) || |
keir@20452 | 1037 buf->max_vcpu_id >= 64 || read_exact(fd, &buf->vcpumap, |
keir@20452 | 1038 sizeof(uint64_t)) ) { |
keir@20452 | 1039 ERROR("Error when reading max_vcpu_id"); |
keir@20452 | 1040 return -1; |
keir@20452 | 1041 } |
keir@20452 | 1042 // DPRINTF("Max VCPU ID: %d, vcpumap: %llx\n", buf->max_vcpu_id, buf->vcpumap); |
keir@20452 | 1043 return pagebuf_get_one(buf, fd, xch, dom); |
keir@20452 | 1044 } else if (count == -3) { |
keir@20452 | 1045 /* Skip padding 4 bytes then read the EPT identity PT location. */ |
keir@20452 | 1046 if ( read_exact(fd, &buf->identpt, sizeof(uint32_t)) || |
keir@20452 | 1047 read_exact(fd, &buf->identpt, sizeof(uint64_t)) ) |
keir@20452 | 1048 { |
keir@20452 | 1049 ERROR("error read the address of the EPT identity map"); |
keir@20452 | 1050 return -1; |
keir@20452 | 1051 } |
keir@20452 | 1052 // DPRINTF("EPT identity map address: %llx\n", buf->identpt); |
keir@20452 | 1053 return pagebuf_get_one(buf, fd, xch, dom); |
keir@20452 | 1054 } else if ( count == -4 ) { |
keir@20452 | 1055 /* Skip padding 4 bytes then read the vm86 TSS location. */ |
keir@20452 | 1056 if ( read_exact(fd, &buf->vm86_tss, sizeof(uint32_t)) || |
keir@20452 | 1057 read_exact(fd, &buf->vm86_tss, sizeof(uint64_t)) ) |
keir@20452 | 1058 { |
keir@20452 | 1059 ERROR("error read the address of the vm86 TSS"); |
keir@20452 | 1060 return -1; |
keir@20452 | 1061 } |
keir@20452 | 1062 // DPRINTF("VM86 TSS location: %llx\n", buf->vm86_tss); |
keir@20452 | 1063 return pagebuf_get_one(buf, fd, xch, dom); |
keir@20452 | 1064 } else if ( count == -5 ) { |
keir@20452 | 1065 DPRINTF("xc_domain_restore start tmem\n"); |
keir@20452 | 1066 if ( xc_tmem_restore(xch, dom, fd) ) { |
keir@20452 | 1067 ERROR("error reading/restoring tmem"); |
keir@20452 | 1068 return -1; |
keir@20452 | 1069 } |
keir@20452 | 1070 return pagebuf_get_one(buf, fd, xch, dom); |
keir@20452 | 1071 } |
keir@20452 | 1072 else if ( count == -6 ) { |
keir@20452 | 1073 if ( xc_tmem_restore_extra(xch, dom, fd) ) { |
keir@20452 | 1074 ERROR("error reading/restoring tmem extra"); |
keir@20452 | 1075 return -1; |
keir@20452 | 1076 } |
keir@20452 | 1077 return pagebuf_get_one(buf, fd, xch, dom); |
keir@20539 | 1078 } else if ( count == -7 ) { |
keir@20539 | 1079 uint32_t tsc_mode, khz, incarn; |
keir@20539 | 1080 uint64_t nsec; |
keir@20539 | 1081 if ( read_exact(fd, &tsc_mode, sizeof(uint32_t)) || |
keir@20539 | 1082 read_exact(fd, &nsec, sizeof(uint64_t)) || |
keir@20539 | 1083 read_exact(fd, &khz, sizeof(uint32_t)) || |
keir@20539 | 1084 read_exact(fd, &incarn, sizeof(uint32_t)) || |
keir@20539 | 1085 xc_domain_set_tsc_info(xch, dom, tsc_mode, nsec, khz, incarn) ) { |
keir@20539 | 1086 ERROR("error reading/restoring tsc info"); |
keir@20539 | 1087 return -1; |
keir@20539 | 1088 } |
keir@20539 | 1089 return pagebuf_get_one(buf, fd, xch, dom); |
keir@20452 | 1090 } else if ( (count > MAX_BATCH_SIZE) || (count < 0) ) { |
keir@20452 | 1091 ERROR("Max batch size exceeded (%d). Giving up.", count); |
keir@20452 | 1092 return -1; |
keir@20452 | 1093 } |
keir@20452 | 1094 |
keir@20452 | 1095 oldcount = buf->nr_pages; |
keir@20452 | 1096 buf->nr_pages += count; |
keir@20452 | 1097 if (!buf->pfn_types) { |
keir@20452 | 1098 if (!(buf->pfn_types = malloc(buf->nr_pages * sizeof(*(buf->pfn_types))))) { |
keir@20452 | 1099 ERROR("Could not allocate PFN type buffer"); |
keir@20452 | 1100 return -1; |
keir@20452 | 1101 } |
keir@20452 | 1102 } else { |
keir@20452 | 1103 if (!(ptmp = realloc(buf->pfn_types, buf->nr_pages * sizeof(*(buf->pfn_types))))) { |
keir@20452 | 1104 ERROR("Could not reallocate PFN type buffer"); |
keir@20452 | 1105 return -1; |
keir@20452 | 1106 } |
keir@20452 | 1107 buf->pfn_types = ptmp; |
keir@20452 | 1108 } |
keir@20452 | 1109 if ( read_exact(fd, buf->pfn_types + oldcount, count * sizeof(*(buf->pfn_types)))) { |
keir@20452 | 1110 ERROR("Error when reading region pfn types"); |
keir@20452 | 1111 return -1; |
keir@20452 | 1112 } |
keir@20452 | 1113 |
keir@20452 | 1114 countpages = count; |
keir@20452 | 1115 for (i = oldcount; i < buf->nr_pages; ++i) |
keir@20452 | 1116 if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB) |
keir@20452 | 1117 --countpages; |
keir@20452 | 1118 |
keir@20452 | 1119 if (!countpages) |
keir@20452 | 1120 return count; |
keir@20452 | 1121 |
keir@20452 | 1122 oldcount = buf->nr_physpages; |
keir@20452 | 1123 buf->nr_physpages += countpages; |
keir@20452 | 1124 if (!buf->pages) { |
keir@20452 | 1125 if (!(buf->pages = malloc(buf->nr_physpages * PAGE_SIZE))) { |
keir@20452 | 1126 ERROR("Could not allocate page buffer"); |
keir@20452 | 1127 return -1; |
keir@20452 | 1128 } |
keir@20452 | 1129 } else { |
keir@20452 | 1130 if (!(ptmp = realloc(buf->pages, buf->nr_physpages * PAGE_SIZE))) { |
keir@20452 | 1131 ERROR("Could not reallocate page buffer"); |
keir@20452 | 1132 return -1; |
keir@20452 | 1133 } |
keir@20452 | 1134 buf->pages = ptmp; |
keir@20452 | 1135 } |
keir@20452 | 1136 if ( read_exact(fd, buf->pages + oldcount * PAGE_SIZE, countpages * PAGE_SIZE) ) { |
keir@20452 | 1137 ERROR("Error when reading pages"); |
keir@20452 | 1138 return -1; |
keir@20452 | 1139 } |
keir@20452 | 1140 |
keir@20452 | 1141 return count; |
keir@20452 | 1142 } |
keir@20452 | 1143 |
keir@20452 | 1144 static int pagebuf_get(pagebuf_t* buf, int fd, int xch, uint32_t dom) |
keir@20452 | 1145 { |
keir@20452 | 1146 int rc; |
keir@20452 | 1147 |
keir@20452 | 1148 buf->nr_physpages = buf->nr_pages = 0; |
keir@20452 | 1149 |
keir@20452 | 1150 do { |
keir@20452 | 1151 rc = pagebuf_get_one(buf, fd, xch, dom); |
keir@20452 | 1152 } while (rc > 0); |
keir@20452 | 1153 |
keir@20452 | 1154 if (rc < 0) |
keir@20452 | 1155 pagebuf_free(buf); |
keir@20452 | 1156 |
keir@20452 | 1157 return rc; |
keir@20452 | 1158 } |
keir@20452 | 1159 |
keir@20591 | 1160 static int apply_batch(int xc_handle, uint32_t dom, struct restore_ctx *ctx, |
keir@20591 | 1161 xen_pfn_t* region_mfn, unsigned long* pfn_type, int pae_extended_cr3, |
keir@20452 | 1162 unsigned int hvm, struct xc_mmu* mmu, |
keir@20452 | 1163 pagebuf_t* pagebuf, int curbatch, int superpages) |
keir@20452 | 1164 { |
keir@20452 | 1165 int i, j, curpage; |
keir@20452 | 1166 /* used by debug verify code */ |
keir@20452 | 1167 unsigned long buf[PAGE_SIZE/sizeof(unsigned long)]; |
keir@20452 | 1168 /* Our mapping of the current region (batch) */ |
keir@20452 | 1169 char *region_base; |
keir@20452 | 1170 /* A temporary mapping, and a copy, of one frame of guest memory. */ |
keir@20452 | 1171 unsigned long *page = NULL; |
keir@20452 | 1172 int nraces = 0; |
keir@20589 | 1173 struct domain_info_context *dinfo = &ctx->dinfo; |
keir@20877 | 1174 int* pfn_err = NULL; |
keir@20877 | 1175 int rc = -1; |
keir@20452 | 1176 |
keir@20452 | 1177 unsigned long mfn, pfn, pagetype; |
keir@20452 | 1178 |
keir@20452 | 1179 j = pagebuf->nr_pages - curbatch; |
keir@20452 | 1180 if (j > MAX_BATCH_SIZE) |
keir@20452 | 1181 j = MAX_BATCH_SIZE; |
keir@20452 | 1182 |
keir@20591 | 1183 if (allocate_physmem(xc_handle, dom, ctx, &pagebuf->pfn_types[curbatch], |
keir@20452 | 1184 j, hvm, region_mfn, superpages) != 0) |
keir@20452 | 1185 { |
keir@20452 | 1186 ERROR("allocate_physmem() failed\n"); |
keir@20452 | 1187 return -1; |
keir@20452 | 1188 } |
keir@20452 | 1189 |
keir@20452 | 1190 /* Map relevant mfns */ |
keir@20877 | 1191 pfn_err = calloc(j, sizeof(*pfn_err)); |
keir@20877 | 1192 region_base = xc_map_foreign_bulk( |
keir@20877 | 1193 xc_handle, dom, PROT_WRITE, region_mfn, pfn_err, j); |
keir@20452 | 1194 |
keir@20452 | 1195 if ( region_base == NULL ) |
keir@20452 | 1196 { |
keir@20452 | 1197 ERROR("map batch failed"); |
keir@20877 | 1198 free(pfn_err); |
keir@20452 | 1199 return -1; |
keir@20452 | 1200 } |
keir@20452 | 1201 |
keir@20452 | 1202 for ( i = 0, curpage = -1; i < j; i++ ) |
keir@20452 | 1203 { |
keir@20452 | 1204 pfn = pagebuf->pfn_types[i + curbatch] & ~XEN_DOMCTL_PFINFO_LTAB_MASK; |
keir@20452 | 1205 pagetype = pagebuf->pfn_types[i + curbatch] & XEN_DOMCTL_PFINFO_LTAB_MASK; |
keir@20452 | 1206 |
keir@20452 | 1207 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB ) |
keir@20452 | 1208 /* a bogus/unmapped page: skip it */ |
keir@20452 | 1209 continue; |
keir@20452 | 1210 |
keir@20877 | 1211 if (pfn_err[i]) |
keir@20877 | 1212 { |
keir@20877 | 1213 ERROR("unexpected PFN mapping failure"); |
keir@20877 | 1214 goto err_mapped; |
keir@20877 | 1215 } |
keir@20877 | 1216 |
keir@20452 | 1217 ++curpage; |
keir@20452 | 1218 |
keir@20585 | 1219 if ( pfn > dinfo->p2m_size ) |
keir@20452 | 1220 { |
keir@20452 | 1221 ERROR("pfn out of range"); |
keir@20877 | 1222 goto err_mapped; |
keir@20452 | 1223 } |
keir@20452 | 1224 |
keir@20452 | 1225 pfn_type[pfn] = pagetype; |
keir@20452 | 1226 |
keir@20587 | 1227 mfn = ctx->p2m[pfn]; |
keir@20452 | 1228 |
keir@20452 | 1229 /* In verify mode, we use a copy; otherwise we work in place */ |
keir@20452 | 1230 page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE); |
keir@20452 | 1231 |
keir@20452 | 1232 memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, PAGE_SIZE); |
keir@20452 | 1233 |
keir@20452 | 1234 pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK; |
keir@20452 | 1235 |
keir@20452 | 1236 if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) && |
keir@20452 | 1237 (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) ) |
keir@20452 | 1238 { |
keir@20452 | 1239 /* |
keir@20452 | 1240 ** A page table page - need to 'uncanonicalize' it, i.e. |
keir@20452 | 1241 ** replace all the references to pfns with the corresponding |
keir@20452 | 1242 ** mfns for the new domain. |
keir@20452 | 1243 ** |
keir@20452 | 1244 ** On PAE we need to ensure that PGDs are in MFNs < 4G, and |
keir@20452 | 1245 ** so we may need to update the p2m after the main loop. |
keir@20452 | 1246 ** Hence we defer canonicalization of L1s until then. |
keir@20452 | 1247 */ |
keir@20587 | 1248 if ((ctx->pt_levels != 3) || |
keir@20452 | 1249 pae_extended_cr3 || |
keir@20452 | 1250 (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) { |
keir@20452 | 1251 |
keir@20591 | 1252 if (!uncanonicalize_pagetable(xc_handle, dom, ctx, |
keir@20837 | 1253 page, superpages)) { |
keir@20452 | 1254 /* |
keir@20452 | 1255 ** Failing to uncanonicalize a page table can be ok |
keir@20452 | 1256 ** under live migration since the pages type may have |
keir@20452 | 1257 ** changed by now (and we'll get an update later). |
keir@20452 | 1258 */ |
keir@20452 | 1259 DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n", |
keir@20452 | 1260 pagetype >> 28, pfn, mfn); |
keir@20452 | 1261 nraces++; |
keir@20452 | 1262 continue; |
keir@20452 | 1263 } |
keir@20452 | 1264 } |
keir@20452 | 1265 } |
keir@20452 | 1266 else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB ) |
keir@20452 | 1267 { |
keir@20452 | 1268 ERROR("Bogus page type %lx page table is out of range: " |
keir@20585 | 1269 "i=%d p2m_size=%lu", pagetype, i, dinfo->p2m_size); |
keir@20877 | 1270 goto err_mapped; |
keir@20452 | 1271 } |
keir@20452 | 1272 |
keir@20452 | 1273 if ( pagebuf->verify ) |
keir@20452 | 1274 { |
keir@20452 | 1275 int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE); |
keir@20452 | 1276 if ( res ) |
keir@20452 | 1277 { |
keir@20452 | 1278 int v; |
keir@20452 | 1279 |
keir@20452 | 1280 DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx " |
keir@20452 | 1281 "actualcs=%08lx\n", pfn, pagebuf->pfn_types[pfn], |
keir@20452 | 1282 csum_page(region_base + (i + curbatch)*PAGE_SIZE), |
keir@20452 | 1283 csum_page(buf)); |
keir@20452 | 1284 |
keir@20452 | 1285 for ( v = 0; v < 4; v++ ) |
keir@20452 | 1286 { |
keir@20452 | 1287 unsigned long *p = (unsigned long *) |
keir@20452 | 1288 (region_base + i*PAGE_SIZE); |
keir@20452 | 1289 if ( buf[v] != p[v] ) |
keir@20452 | 1290 DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]); |
keir@20452 | 1291 } |
keir@20452 | 1292 } |
keir@20452 | 1293 } |
keir@20452 | 1294 |
keir@20452 | 1295 if ( !hvm && |
keir@20452 | 1296 xc_add_mmu_update(xc_handle, mmu, |
keir@20452 | 1297 (((unsigned long long)mfn) << PAGE_SHIFT) |
keir@20452 | 1298 | MMU_MACHPHYS_UPDATE, pfn) ) |
keir@20452 | 1299 { |
keir@20452 | 1300 ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn); |
keir@20877 | 1301 goto err_mapped; |
keir@20452 | 1302 } |
keir@20452 | 1303 } /* end of 'batch' for loop */ |
keir@20452 | 1304 |
keir@20877 | 1305 rc = nraces; |
keir@20452 | 1306 |
keir@20877 | 1307 err_mapped: |
keir@20877 | 1308 munmap(region_base, j*PAGE_SIZE); |
keir@20877 | 1309 free(pfn_err); |
keir@20877 | 1310 |
keir@20877 | 1311 return rc; |
keir@20452 | 1312 } |
keir@20452 | 1313 |
Tim@14782 | 1314 int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom, |
Tim@14782 | 1315 unsigned int store_evtchn, unsigned long *store_mfn, |
Tim@14782 | 1316 unsigned int console_evtchn, unsigned long *console_mfn, |
keir@19677 | 1317 unsigned int hvm, unsigned int pae, int superpages) |
mjw@1661 | 1318 { |
kfraser@11295 | 1319 DECLARE_DOMCTL; |
keir@16257 | 1320 int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0; |
kaf24@9698 | 1321 unsigned long mfn, pfn; |
mjw@1661 | 1322 unsigned int prev_pc, this_pc; |
kaf24@9698 | 1323 int nraces = 0; |
mjw@1661 | 1324 |
mjw@1661 | 1325 /* The new domain's shared-info frame number. */ |
mjw@1661 | 1326 unsigned long shared_info_frame; |
cl349@2964 | 1327 unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */ |
keir@17918 | 1328 shared_info_any_t *old_shared_info = |
keir@17918 | 1329 (shared_info_any_t *)shared_info_page; |
keir@17918 | 1330 shared_info_any_t *new_shared_info; |
kaf24@9698 | 1331 |
mjw@1661 | 1332 /* A copy of the CPU context of the guest. */ |
keir@17918 | 1333 vcpu_guest_context_any_t ctxt; |
mjw@1661 | 1334 |
smh22@7740 | 1335 /* A table containing the type of each PFN (/not/ MFN!). */ |
mjw@1661 | 1336 unsigned long *pfn_type = NULL; |
mjw@1661 | 1337 |
mjw@1661 | 1338 /* A table of MFNs to map in the current region */ |
kaf24@10314 | 1339 xen_pfn_t *region_mfn = NULL; |
mjw@1661 | 1340 |
mjw@1661 | 1341 /* A copy of the pfn-to-mfn table frame list. */ |
kaf24@10314 | 1342 xen_pfn_t *p2m_frame_list = NULL; |
Tim@14782 | 1343 |
cl349@6659 | 1344 /* A temporary mapping of the guest's start_info page. */ |
keir@17918 | 1345 start_info_any_t *start_info; |
mjw@1661 | 1346 |
steven@13424 | 1347 /* Our mapping of the current region (batch) */ |
cl349@5014 | 1348 char *region_base; |
mjw@1661 | 1349 |
kfraser@14808 | 1350 struct xc_mmu *mmu = NULL; |
mjw@1661 | 1351 |
kaf24@4457 | 1352 struct mmuext_op pin[MAX_PIN_BATCH]; |
kaf24@9698 | 1353 unsigned int nr_pins; |
kaf24@4457 | 1354 |
kfraser@14236 | 1355 uint64_t vcpumap = 1ULL; |
kfraser@14236 | 1356 unsigned int max_vcpu_id = 0; |
kfraser@14388 | 1357 int new_ctxt_format = 0; |
smh22@7740 | 1358 |
keir@20452 | 1359 pagebuf_t pagebuf; |
keir@20452 | 1360 tailbuf_t tailbuf, tmptail; |
keir@20452 | 1361 void* vcpup; |
keir@20452 | 1362 |
keir@20591 | 1363 static struct restore_ctx _ctx = { |
keir@20591 | 1364 .live_p2m = NULL, |
keir@20591 | 1365 .p2m = NULL, |
keir@20591 | 1366 .no_superpage_mem = 0, |
keir@20591 | 1367 }; |
keir@20591 | 1368 static struct restore_ctx *ctx = &_ctx; |
keir@20589 | 1369 struct domain_info_context *dinfo = &ctx->dinfo; |
keir@20589 | 1370 |
keir@20452 | 1371 pagebuf_init(&pagebuf); |
keir@20452 | 1372 memset(&tailbuf, 0, sizeof(tailbuf)); |
keir@20457 | 1373 tailbuf.ishvm = hvm; |
keir@20452 | 1374 |
steven@14732 | 1375 /* For info only */ |
keir@20587 | 1376 ctx->nr_pfns = 0; |
steven@14732 | 1377 |
keir@20164 | 1378 /* Always try to allocate 2M pages for HVM */ |
keir@20164 | 1379 if ( hvm ) |
keir@20164 | 1380 superpages = 1; |
keir@20164 | 1381 |
keir@20585 | 1382 if ( read_exact(io_fd, &dinfo->p2m_size, sizeof(unsigned long)) ) |
kfraser@14754 | 1383 { |
kfraser@14754 | 1384 ERROR("read: p2m_size"); |
kfraser@14754 | 1385 goto out; |
kfraser@14754 | 1386 } |
keir@20585 | 1387 DPRINTF("xc_domain_restore start: p2m_size = %lx\n", dinfo->p2m_size); |
smh22@7740 | 1388 |
Tim@15955 | 1389 if ( !get_platform_info(xc_handle, dom, |
keir@20587 | 1390 &ctx->max_mfn, &ctx->hvirt_start, &ctx->pt_levels, &dinfo->guest_width) ) |
Tim@14782 | 1391 { |
Tim@15955 | 1392 ERROR("Unable to get platform info."); |
Tim@15955 | 1393 return 1; |
Tim@15955 | 1394 } |
Tim@15955 | 1395 |
Tim@15955 | 1396 /* The *current* word size of the guest isn't very interesting; for now |
Tim@15955 | 1397 * assume the guest will be the same as we are. We'll fix that later |
Tim@15955 | 1398 * if we discover otherwise. */ |
keir@20585 | 1399 dinfo->guest_width = sizeof(unsigned long); |
keir@20587 | 1400 ctx->pt_levels = (dinfo->guest_width == 8) ? 4 : (ctx->pt_levels == 2) ? 2 : 3; |
Tim@15955 | 1401 |
Tim@15955 | 1402 if ( !hvm ) |
Tim@15955 | 1403 { |
Tim@15955 | 1404 /* Load the p2m frame list, plus potential extended info chunk */ |
keir@20591 | 1405 p2m_frame_list = load_p2m_frame_list(ctx, |
keir@16257 | 1406 io_fd, &pae_extended_cr3, &ext_vcpucontext); |
Tim@15955 | 1407 if ( !p2m_frame_list ) |
Tim@15955 | 1408 goto out; |
Tim@15955 | 1409 |
Tim@15955 | 1410 /* Now that we know the word size, tell Xen about it */ |
Tim@14782 | 1411 memset(&domctl, 0, sizeof(domctl)); |
Tim@14782 | 1412 domctl.domain = dom; |
Tim@14782 | 1413 domctl.cmd = XEN_DOMCTL_set_address_size; |
keir@20585 | 1414 domctl.u.address_size.size = dinfo->guest_width * 8; |
keir@16257 | 1415 frc = do_domctl(xc_handle, &domctl); |
keir@16257 | 1416 if ( frc != 0 ) |
keir@14809 | 1417 { |
Tim@14782 | 1418 ERROR("Unable to set guest address size."); |
Tim@14782 | 1419 goto out; |
Tim@14782 | 1420 } |
Christian@14235 | 1421 } |
smh22@7740 | 1422 |
cl349@5091 | 1423 /* We want zeroed memory so use calloc rather than malloc. */ |
keir@20587 | 1424 ctx->p2m = calloc(dinfo->p2m_size, sizeof(xen_pfn_t)); |
keir@20585 | 1425 pfn_type = calloc(dinfo->p2m_size, sizeof(unsigned long)); |
keir@17462 | 1426 |
keir@20887 | 1427 region_mfn = xc_memalign(PAGE_SIZE, ROUNDUP( |
keir@17462 | 1428 MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); |
mjw@1661 | 1429 |
keir@20587 | 1430 if ( (ctx->p2m == NULL) || (pfn_type == NULL) || |
keir@19677 | 1431 (region_mfn == NULL) ) |
keir@14809 | 1432 { |
kfraser@11814 | 1433 ERROR("memory alloc failed"); |
mjw@1661 | 1434 errno = ENOMEM; |
mjw@1661 | 1435 goto out; |
mjw@1661 | 1436 } |
kaf24@9698 | 1437 |
keir@17462 | 1438 memset(region_mfn, 0, |
keir@17462 | 1439 ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT)); |
keir@17462 | 1440 |
keir@14809 | 1441 if ( lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE) ) |
keir@14809 | 1442 { |
kfraser@11895 | 1443 ERROR("Could not lock region_mfn"); |
iap10@2202 | 1444 goto out; |
iap10@2202 | 1445 } |
mjw@1661 | 1446 |
mjw@1661 | 1447 /* Get the domain's shared-info frame. */ |
kfraser@11295 | 1448 domctl.cmd = XEN_DOMCTL_getdomaininfo; |
kfraser@11295 | 1449 domctl.domain = (domid_t)dom; |
keir@14809 | 1450 if ( xc_domctl(xc_handle, &domctl) < 0 ) |
keir@14809 | 1451 { |
kfraser@11814 | 1452 ERROR("Could not get information on new domain"); |
mjw@1661 | 1453 goto out; |
mjw@1661 | 1454 } |
kfraser@11295 | 1455 shared_info_frame = domctl.u.getdomaininfo.shared_info_frame; |
mjw@1661 | 1456 |
steven@13424 | 1457 /* Mark all PFNs as invalid; we allocate on demand */ |
keir@20585 | 1458 for ( pfn = 0; pfn < dinfo->p2m_size; pfn++ ) |
keir@20587 | 1459 ctx->p2m[pfn] = INVALID_P2M_ENTRY; |
smh22@7886 | 1460 |
kfraser@14808 | 1461 mmu = xc_alloc_mmu_updates(xc_handle, dom); |
kfraser@14808 | 1462 if ( mmu == NULL ) |
kfraser@14808 | 1463 { |
kfraser@11814 | 1464 ERROR("Could not initialise for MMU updates"); |
mjw@1661 | 1465 goto out; |
mjw@1661 | 1466 } |
mjw@1661 | 1467 |
smh22@7740 | 1468 DPRINTF("Reloading memory pages: 0%%\n"); |
mjw@1661 | 1469 |
mjw@1661 | 1470 /* |
mjw@1661 | 1471 * Now simply read each saved frame into its new machine frame. |
mjw@1661 | 1472 * We uncanonicalise page tables as we go. |
mjw@1661 | 1473 */ |
mjw@1661 | 1474 prev_pc = 0; |
mjw@1661 | 1475 |
keir@14142 | 1476 n = m = 0; |
keir@20452 | 1477 loadpages: |
keir@14809 | 1478 for ( ; ; ) |
keir@14809 | 1479 { |
keir@20452 | 1480 int j, curbatch; |
mjw@1661 | 1481 |
keir@20585 | 1482 this_pc = (n * 100) / dinfo->p2m_size; |
kaf24@1683 | 1483 if ( (this_pc - prev_pc) >= 5 ) |
kaf24@1683 | 1484 { |
cl349@5091 | 1485 PPRINTF("\b\b\b\b%3d%%", this_pc); |
mjw@1661 | 1486 prev_pc = this_pc; |
mjw@1661 | 1487 } |
mjw@1661 | 1488 |
keir@20452 | 1489 if ( !completed ) { |
keir@20452 | 1490 pagebuf.nr_physpages = pagebuf.nr_pages = 0; |
keir@20452 | 1491 if ( pagebuf_get_one(&pagebuf, io_fd, xc_handle, dom) < 0 ) { |
keir@20452 | 1492 ERROR("Error when reading batch\n"); |
keir@20452 | 1493 goto out; |
keir@20452 | 1494 } |
mjw@1661 | 1495 } |
keir@20452 | 1496 j = pagebuf.nr_pages; |
mjw@1661 | 1497 |
cl349@5091 | 1498 PPRINTF("batch %d\n",j); |
kaf24@9698 | 1499 |
keir@20452 | 1500 if ( j == 0 ) { |
keir@20452 | 1501 /* catch vcpu updates */ |
keir@20452 | 1502 if (pagebuf.new_ctxt_format) { |
keir@20452 | 1503 vcpumap = pagebuf.vcpumap; |
keir@20452 | 1504 max_vcpu_id = pagebuf.max_vcpu_id; |
keir@18929 | 1505 } |
keir@20452 | 1506 /* should this be deferred? does it change? */ |
keir@20452 | 1507 if ( pagebuf.identpt ) |
keir@20452 | 1508 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT, pagebuf.identpt); |
keir@20452 | 1509 if ( pagebuf.vm86_tss ) |
keir@20452 | 1510 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, pagebuf.vm86_tss); |
kaf24@1683 | 1511 break; /* our work here is done */ |
mjw@1661 | 1512 } |
mjw@1661 | 1513 |
keir@20452 | 1514 /* break pagebuf into batches */ |
keir@20452 | 1515 curbatch = 0; |
keir@20452 | 1516 while ( curbatch < j ) { |
keir@20452 | 1517 int brc; |
kaf24@9698 | 1518 |
keir@20591 | 1519 brc = apply_batch(xc_handle, dom, ctx, region_mfn, pfn_type, |
keir@20452 | 1520 pae_extended_cr3, hvm, mmu, &pagebuf, curbatch, superpages); |
keir@20452 | 1521 if ( brc < 0 ) |
mjw@1661 | 1522 goto out; |
mjw@1661 | 1523 |
keir@20452 | 1524 nraces += brc; |
smh22@7740 | 1525 |
keir@20452 | 1526 curbatch += MAX_BATCH_SIZE; |
keir@20452 | 1527 } |
mjw@1661 | 1528 |
keir@20452 | 1529 pagebuf.nr_physpages = pagebuf.nr_pages = 0; |
mjw@1661 | 1530 |
keir@20452 | 1531 n += j; /* crude stats */ |
keir@14142 | 1532 |
keir@14142 | 1533 /* |
keir@14142 | 1534 * Discard cache for portion of file read so far up to last |
keir@14142 | 1535 * page boundary every 16MB or so. |
keir@14142 | 1536 */ |
keir@14142 | 1537 m += j; |
keir@14142 | 1538 if ( m > MAX_PAGECACHE_USAGE ) |
keir@14142 | 1539 { |
keir@14142 | 1540 discard_file_cache(io_fd, 0 /* no flush */); |
keir@14142 | 1541 m = 0; |
keir@14142 | 1542 } |
mjw@1661 | 1543 } |
mjw@1661 | 1544 |
kfraser@10383 | 1545 /* |
kfraser@10383 | 1546 * Ensure we flush all machphys updates before potential PAE-specific |
kfraser@10383 | 1547 * reallocations below. |
kfraser@10383 | 1548 */ |
keir@14809 | 1549 if ( !hvm && xc_flush_mmu_updates(xc_handle, mmu) ) |
keir@14809 | 1550 { |
kfraser@14808 | 1551 ERROR("Error doing flush_mmu_updates()"); |
kfraser@10383 | 1552 goto out; |
kfraser@10383 | 1553 } |
kfraser@10383 | 1554 |
keir@20452 | 1555 // DPRINTF("Received all pages (%d races)\n", nraces); |
mjw@1661 | 1556 |
keir@20452 | 1557 if ( !completed ) { |
keir@20453 | 1558 int flags = 0; |
keir@20453 | 1559 |
keir@20591 | 1560 if ( buffer_tail(ctx, &tailbuf, io_fd, max_vcpu_id, vcpumap, |
keir@20452 | 1561 ext_vcpucontext) < 0 ) { |
keir@20452 | 1562 ERROR ("error buffering image tail"); |
keir@20452 | 1563 goto out; |
keir@20452 | 1564 } |
keir@20452 | 1565 completed = 1; |
keir@20453 | 1566 /* shift into nonblocking mode for the remainder */ |
keir@20453 | 1567 if ( (flags = fcntl(io_fd, F_GETFL,0)) < 0 ) |
keir@20453 | 1568 flags = 0; |
keir@20453 | 1569 fcntl(io_fd, F_SETFL, flags | O_NONBLOCK); |
keir@20452 | 1570 } |
keir@20452 | 1571 |
keir@20452 | 1572 // DPRINTF("Buffered checkpoint\n"); |
keir@20452 | 1573 |
keir@20452 | 1574 if ( pagebuf_get(&pagebuf, io_fd, xc_handle, dom) ) { |
keir@20452 | 1575 ERROR("error when buffering batch, finishing\n"); |
keir@20452 | 1576 goto finish; |
keir@20452 | 1577 } |
keir@20452 | 1578 memset(&tmptail, 0, sizeof(tmptail)); |
keir@20457 | 1579 tmptail.ishvm = hvm; |
keir@20591 | 1580 if ( buffer_tail(ctx, &tmptail, io_fd, max_vcpu_id, vcpumap, |
keir@20452 | 1581 ext_vcpucontext) < 0 ) { |
keir@20452 | 1582 ERROR ("error buffering image tail, finishing"); |
keir@20452 | 1583 goto finish; |
keir@20452 | 1584 } |
keir@20452 | 1585 tailbuf_free(&tailbuf); |
keir@20452 | 1586 memcpy(&tailbuf, &tmptail, sizeof(tailbuf)); |
keir@20452 | 1587 |
keir@20452 | 1588 goto loadpages; |
keir@20452 | 1589 |
keir@20452 | 1590 finish: |
keir@20457 | 1591 if ( hvm ) |
keir@20457 | 1592 goto finish_hvm; |
keir@20452 | 1593 |
keir@20587 | 1594 if ( (ctx->pt_levels == 3) && !pae_extended_cr3 ) |
keir@14809 | 1595 { |
kaf24@10304 | 1596 /* |
kaf24@10304 | 1597 ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This |
kaf24@10304 | 1598 ** is a little awkward and involves (a) finding all such PGDs and |
kaf24@10304 | 1599 ** replacing them with 'lowmem' versions; (b) upating the p2m[] |
kaf24@10304 | 1600 ** with the new info; and (c) canonicalizing all the L1s using the |
kaf24@10304 | 1601 ** (potentially updated) p2m[]. |
kaf24@10304 | 1602 ** |
kaf24@10304 | 1603 ** This is relatively slow (and currently involves two passes through |
kaf24@10304 | 1604 ** the pfn_type[] array), but at least seems to be correct. May wish |
kaf24@10304 | 1605 ** to consider more complex approaches to optimize this later. |
kaf24@10304 | 1606 */ |
kaf24@10304 | 1607 |
kaf24@10304 | 1608 int j, k; |
steven@13424 | 1609 |
kaf24@10304 | 1610 /* First pass: find all L3TABs current in > 4G mfns and get new mfns */ |
keir@20585 | 1611 for ( i = 0; i < dinfo->p2m_size; i++ ) |
kfraser@11295 | 1612 { |
kfraser@11295 | 1613 if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == |
kfraser@11295 | 1614 XEN_DOMCTL_PFINFO_L3TAB) && |
keir@20587 | 1615 (ctx->p2m[i] > 0xfffffUL) ) |
kfraser@11295 | 1616 { |
kaf24@10304 | 1617 unsigned long new_mfn; |
kaf24@10304 | 1618 uint64_t l3ptes[4]; |
kaf24@10304 | 1619 uint64_t *l3tab; |
kaf24@10304 | 1620 |
kaf24@10304 | 1621 l3tab = (uint64_t *) |
kaf24@10304 | 1622 xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, |
keir@20587 | 1623 PROT_READ, ctx->p2m[i]); |
kaf24@10304 | 1624 |
keir@14809 | 1625 for ( j = 0; j < 4; j++ ) |
kaf24@10304 | 1626 l3ptes[j] = l3tab[j]; |
kaf24@10304 | 1627 |
kaf24@10304 | 1628 munmap(l3tab, PAGE_SIZE); |
kaf24@10304 | 1629 |
keir@20587 | 1630 new_mfn = xc_make_page_below_4G(xc_handle, dom, ctx->p2m[i]); |
keir@14809 | 1631 if ( !new_mfn ) |
keir@14809 | 1632 { |
kfraser@11814 | 1633 ERROR("Couldn't get a page below 4GB :-("); |
kaf24@10304 | 1634 goto out; |
kaf24@10304 | 1635 } |
kaf24@10304 | 1636 |
keir@20587 | 1637 ctx->p2m[i] = new_mfn; |
keir@14809 | 1638 if ( xc_add_mmu_update(xc_handle, mmu, |
keir@14809 | 1639 (((unsigned long long)new_mfn) |
keir@14809 | 1640 << PAGE_SHIFT) | |
keir@14809 | 1641 MMU_MACHPHYS_UPDATE, i) ) |
keir@14809 | 1642 { |
kfraser@11814 | 1643 ERROR("Couldn't m2p on PAE root pgdir"); |
kaf24@10304 | 1644 goto out; |
kaf24@10304 | 1645 } |
kaf24@10304 | 1646 |
kaf24@10304 | 1647 l3tab = (uint64_t *) |
kaf24@10304 | 1648 xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, |
keir@20587 | 1649 PROT_READ | PROT_WRITE, ctx->p2m[i]); |
kaf24@10304 | 1650 |
keir@14809 | 1651 for ( j = 0; j < 4; j++ ) |
kaf24@10304 | 1652 l3tab[j] = l3ptes[j]; |
kaf24@10304 | 1653 |
kaf24@10304 | 1654 munmap(l3tab, PAGE_SIZE); |
kaf24@10304 | 1655 } |
kaf24@10304 | 1656 } |
kaf24@10304 | 1657 |
kaf24@10304 | 1658 /* Second pass: find all L1TABs and uncanonicalize them */ |
kaf24@10304 | 1659 j = 0; |
kaf24@10304 | 1660 |
keir@20585 | 1661 for ( i = 0; i < dinfo->p2m_size; i++ ) |
kfraser@11295 | 1662 { |
kfraser@11295 | 1663 if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == |
kfraser@11295 | 1664 XEN_DOMCTL_PFINFO_L1TAB) ) |
kfraser@11295 | 1665 { |
keir@20587 | 1666 region_mfn[j] = ctx->p2m[i]; |
kaf24@10304 | 1667 j++; |
kaf24@10304 | 1668 } |
kaf24@10304 | 1669 |
keir@20585 | 1670 if ( (i == (dinfo->p2m_size-1)) || (j == MAX_BATCH_SIZE) ) |
keir@14809 | 1671 { |
keir@20837 | 1672 region_base = xc_map_foreign_pages( |
keir@14809 | 1673 xc_handle, dom, PROT_READ | PROT_WRITE, region_mfn, j); |
keir@14809 | 1674 if ( region_base == NULL ) |
keir@14809 | 1675 { |
kfraser@11814 | 1676 ERROR("map batch failed"); |
kaf24@10304 | 1677 goto out; |
kaf24@10304 | 1678 } |
kaf24@10304 | 1679 |
keir@14809 | 1680 for ( k = 0; k < j; k++ ) |
keir@14809 | 1681 { |
keir@14809 | 1682 if ( !uncanonicalize_pagetable( |
keir@20837 | 1683 xc_handle, dom, ctx, |
keir@19677 | 1684 region_base + k*PAGE_SIZE, superpages) ) |
keir@14809 | 1685 { |
kfraser@11814 | 1686 ERROR("failed uncanonicalize pt!"); |
kaf24@10304 | 1687 goto out; |
kaf24@10304 | 1688 } |
kaf24@10304 | 1689 } |
kaf24@10304 | 1690 |
kaf24@10304 | 1691 munmap(region_base, j*PAGE_SIZE); |
kaf24@10304 | 1692 j = 0; |
kaf24@10304 | 1693 } |
kaf24@10304 | 1694 } |
kaf24@10304 | 1695 |
keir@14809 | 1696 if ( xc_flush_mmu_updates(xc_handle, mmu) ) |
keir@14809 | 1697 { |
kfraser@14808 | 1698 ERROR("Error doing xc_flush_mmu_updates()"); |
kfraser@10383 | 1699 goto out; |
kfraser@10383 | 1700 } |
kaf24@10304 | 1701 } |
kaf24@10304 | 1702 |
mjw@1661 | 1703 /* |
mjw@1661 | 1704 * Pin page tables. Do this after writing to them as otherwise Xen |
mjw@1661 | 1705 * will barf when doing the type-checking. |
mjw@1661 | 1706 */ |
kaf24@9698 | 1707 nr_pins = 0; |
keir@20585 | 1708 for ( i = 0; i < dinfo->p2m_size; i++ ) |
kfraser@11295 | 1709 { |
kfraser@11295 | 1710 if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 ) |
kaf24@4457 | 1711 continue; |
smh22@7886 | 1712 |
kfraser@11295 | 1713 switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) |
kfraser@11295 | 1714 { |
kfraser@11295 | 1715 case XEN_DOMCTL_PFINFO_L1TAB: |
kaf24@4457 | 1716 pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE; |
kaf24@9698 | 1717 break; |
kaf24@9698 | 1718 |
kfraser@11295 | 1719 case XEN_DOMCTL_PFINFO_L2TAB: |
kaf24@4457 | 1720 pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE; |
kaf24@9698 | 1721 break; |
kaf24@9698 | 1722 |
kfraser@11295 | 1723 case XEN_DOMCTL_PFINFO_L3TAB: |
smh22@7740 | 1724 pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE; |
kaf24@9698 | 1725 break; |
smh22@7740 | 1726 |
kfraser@11295 | 1727 case XEN_DOMCTL_PFINFO_L4TAB: |
smh22@7740 | 1728 pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE; |
kaf24@9698 | 1729 break; |
kaf24@9698 | 1730 |
kaf24@9698 | 1731 default: |
kaf24@9698 | 1732 continue; |
smh22@7740 | 1733 } |
smh22@7740 | 1734 |
keir@20587 | 1735 pin[nr_pins].arg1.mfn = ctx->p2m[i]; |
kaf24@9698 | 1736 nr_pins++; |
smh22@7886 | 1737 |
kaf24@10513 | 1738 /* Batch full? Then flush. */ |
keir@14809 | 1739 if ( nr_pins == MAX_PIN_BATCH ) |
keir@14809 | 1740 { |
keir@14809 | 1741 if ( xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0 ) |
keir@14809 | 1742 { |
kfraser@11814 | 1743 ERROR("Failed to pin batch of %d page tables", nr_pins); |
kaf24@10513 | 1744 goto out; |
kaf24@10513 | 1745 } |
kaf24@10513 | 1746 nr_pins = 0; |
kaf24@10513 | 1747 } |
kaf24@10513 | 1748 } |
kaf24@10513 | 1749 |
kaf24@10513 | 1750 /* Flush final partial batch. */ |
keir@14809 | 1751 if ( (nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) ) |
keir@14809 | 1752 { |
kfraser@11814 | 1753 ERROR("Failed to pin batch of %d page tables", nr_pins); |
kaf24@10513 | 1754 goto out; |
iap10@2507 | 1755 } |
mjw@1661 | 1756 |
cl349@5091 | 1757 DPRINTF("\b\b\b\b100%%\n"); |
keir@20587 | 1758 DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns); |
mjw@1661 | 1759 |
iap10@2291 | 1760 /* Get the list of PFNs that are not in the psuedo-phys map */ |
iap10@2291 | 1761 { |
keir@20452 | 1762 int nr_frees = 0; |
kaf24@2624 | 1763 |
keir@20457 | 1764 for ( i = 0; i < tailbuf.u.pv.pfncount; i++ ) |
keir@14809 | 1765 { |
keir@20457 | 1766 unsigned long pfn = tailbuf.u.pv.pfntab[i]; |
smh22@7740 | 1767 |
keir@20587 | 1768 if ( ctx->p2m[pfn] != INVALID_P2M_ENTRY ) |
keir@14809 | 1769 { |
keir@20452 | 1770 /* pfn is not in physmap now, but was at some point during |
steven@13424 | 1771 the save/migration process - need to free it */ |
keir@20587 | 1772 tailbuf.u.pv.pfntab[nr_frees++] = ctx->p2m[pfn]; |
keir@20587 | 1773 ctx->p2m[pfn] = INVALID_P2M_ENTRY; /* not in pseudo-physical map */ |
steven@13424 | 1774 } |
kaf24@6775 | 1775 } |
kaf24@9698 | 1776 |
keir@14809 | 1777 if ( nr_frees > 0 ) |
keir@14809 | 1778 { |
kaf24@6506 | 1779 struct xen_memory_reservation reservation = { |
steven@13424 | 1780 .nr_extents = nr_frees, |
kaf24@6506 | 1781 .extent_order = 0, |
kaf24@6510 | 1782 .domid = dom |
kaf24@6506 | 1783 }; |
keir@20457 | 1784 set_xen_guest_handle(reservation.extent_start, tailbuf.u.pv.pfntab); |
smh22@7740 | 1785 |
keir@16257 | 1786 if ( (frc = xc_memory_op(xc_handle, XENMEM_decrease_reservation, |
keir@16257 | 1787 &reservation)) != nr_frees ) |
keir@14809 | 1788 { |
keir@16257 | 1789 ERROR("Could not decrease reservation : %d", frc); |
kaf24@6775 | 1790 goto out; |
keir@14809 | 1791 } |
keir@14809 | 1792 else |
keir@20457 | 1793 DPRINTF("Decreased reservation by %d pages\n", tailbuf.u.pv.pfncount); |
kaf24@9698 | 1794 } |
iap10@2291 | 1795 } |
iap10@2291 | 1796 |
keir@17462 | 1797 if ( lock_pages(&ctxt, sizeof(ctxt)) ) |
keir@17462 | 1798 { |
keir@17462 | 1799 ERROR("Unable to lock ctxt"); |
keir@17462 | 1800 return 1; |
keir@17462 | 1801 } |
keir@17462 | 1802 |
keir@20457 | 1803 vcpup = tailbuf.u.pv.vcpubuf; |
keir@14809 | 1804 for ( i = 0; i <= max_vcpu_id; i++ ) |
keir@14809 | 1805 { |
keir@14809 | 1806 if ( !(vcpumap & (1ULL << i)) ) |
kfraser@14236 | 1807 continue; |
kfraser@14236 | 1808 |
keir@20585 | 1809 memcpy(&ctxt, vcpup, ((dinfo->guest_width == 8) ? sizeof(ctxt.x64) |
keir@20452 | 1810 : sizeof(ctxt.x32))); |
keir@20585 | 1811 vcpup += (dinfo->guest_width == 8) ? sizeof(ctxt.x64) : sizeof(ctxt.x32); |
keir@20452 | 1812 |
keir@20452 | 1813 DPRINTF("read VCPU %d\n", i); |
kfraser@14236 | 1814 |
kfraser@14388 | 1815 if ( !new_ctxt_format ) |
Tim@15955 | 1816 SET_FIELD(&ctxt, flags, GET_FIELD(&ctxt, flags) | VGCF_online); |
kfraser@14388 | 1817 |
keir@14809 | 1818 if ( i == 0 ) |
keir@14809 | 1819 { |
kfraser@14236 | 1820 /* |
kfraser@14236 | 1821 * Uncanonicalise the suspend-record frame number and poke |
kfraser@14236 | 1822 * resume record. |
kfraser@14236 | 1823 */ |
Tim@15955 | 1824 pfn = GET_FIELD(&ctxt, user_regs.edx); |
keir@20585 | 1825 if ( (pfn >= dinfo->p2m_size) || |
keir@14809 | 1826 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) ) |
keir@14809 | 1827 { |
kfraser@14236 | 1828 ERROR("Suspend record frame number is bad"); |
kfraser@14236 | 1829 goto out; |
kfraser@14236 | 1830 } |
keir@20587 | 1831 mfn = ctx->p2m[pfn]; |
Tim@15955 | 1832 SET_FIELD(&ctxt, user_regs.edx, mfn); |
kfraser@14236 | 1833 start_info = xc_map_foreign_range( |
kfraser@14236 | 1834 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn); |
keir@20585 | 1835 SET_FIELD(start_info, nr_pages, dinfo->p2m_size); |
Tim@15955 | 1836 SET_FIELD(start_info, shared_info, shared_info_frame<<PAGE_SHIFT); |
Tim@15955 | 1837 SET_FIELD(start_info, flags, 0); |
keir@20587 | 1838 *store_mfn = ctx->p2m[GET_FIELD(start_info, store_mfn)]; |
Tim@15955 | 1839 SET_FIELD(start_info, store_mfn, *store_mfn); |
Tim@15955 | 1840 SET_FIELD(start_info, store_evtchn, store_evtchn); |
keir@20587 | 1841 *console_mfn = ctx->p2m[GET_FIELD(start_info, console.domU.mfn)]; |
Tim@15955 | 1842 SET_FIELD(start_info, console.domU.mfn, *console_mfn); |
Tim@15955 | 1843 SET_FIELD(start_info, console.domU.evtchn, console_evtchn); |
kfraser@14236 | 1844 munmap(start_info, PAGE_SIZE); |
kfraser@14236 | 1845 } |
kfraser@14236 | 1846 /* Uncanonicalise each GDT frame number. */ |
Tim@15955 | 1847 if ( GET_FIELD(&ctxt, gdt_ents) > 8192 ) |
keir@14809 | 1848 { |
kfraser@14236 | 1849 ERROR("GDT entry count out of range"); |
kfraser@14236 | 1850 goto out; |
kfraser@14236 | 1851 } |
kfraser@14236 | 1852 |
Tim@15955 | 1853 for ( j = 0; (512*j) < GET_FIELD(&ctxt, gdt_ents); j++ ) |
keir@14809 | 1854 { |
Tim@15955 | 1855 pfn = GET_FIELD(&ctxt, gdt_frames[j]); |
keir@20585 | 1856 if ( (pfn >= dinfo->p2m_size) || |
keir@14809 | 1857 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) ) |
keir@14809 | 1858 { |
Tim@15955 | 1859 ERROR("GDT frame number %i (0x%lx) is bad", |
Tim@15955 | 1860 j, (unsigned long)pfn); |
kfraser@14236 | 1861 goto out; |
kfraser@14236 | 1862 } |
keir@20587 | 1863 SET_FIELD(&ctxt, gdt_frames[j], ctx->p2m[pfn]); |
kfraser@14236 | 1864 } |
kfraser@14236 | 1865 /* Uncanonicalise the page table base pointer. */ |
keir@17074 | 1866 pfn = UNFOLD_CR3(GET_FIELD(&ctxt, ctrlreg[3])); |
kfraser@14236 | 1867 |
keir@20585 | 1868 if ( pfn >= dinfo->p2m_size ) |
keir@14809 | 1869 { |
steven@14732 | 1870 ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx", |
keir@20585 | 1871 pfn, dinfo->p2m_size, pfn_type[pfn]); |
kfraser@14236 | 1872 goto out; |
kfraser@14236 | 1873 } |
kfraser@14236 | 1874 |
kfraser@14236 | 1875 if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != |
keir@20587 | 1876 ((unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) |
keir@14809 | 1877 { |
kfraser@14236 | 1878 ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", |
keir@20585 | 1879 pfn, dinfo->p2m_size, pfn_type[pfn], |
keir@20587 | 1880 (unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); |
kfraser@14236 | 1881 goto out; |
kfraser@14236 | 1882 } |
keir@20587 | 1883 SET_FIELD(&ctxt, ctrlreg[3], FOLD_CR3(ctx->p2m[pfn])); |
kfraser@14236 | 1884 |
keir@14254 | 1885 /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */ |
keir@20587 | 1886 if ( (ctx->pt_levels == 4) && (ctxt.x64.ctrlreg[1] & 1) ) |
keir@14254 | 1887 { |
keir@17074 | 1888 pfn = UNFOLD_CR3(ctxt.x64.ctrlreg[1] & ~1); |
keir@20585 | 1889 if ( pfn >= dinfo->p2m_size ) |
keir@14809 | 1890 { |
Tim@15955 | 1891 ERROR("User PT base is bad: pfn=%lu p2m_size=%lu", |
keir@20585 | 1892 pfn, dinfo->p2m_size); |
keir@14254 | 1893 goto out; |
keir@14254 | 1894 } |
keir@14254 | 1895 if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) != |
keir@20587 | 1896 ((unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) ) |
keir@14809 | 1897 { |
keir@14254 | 1898 ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx", |
keir@20585 | 1899 pfn, dinfo->p2m_size, pfn_type[pfn], |
keir@20587 | 1900 (unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT); |
keir@14254 | 1901 goto out; |
keir@14254 | 1902 } |
keir@20587 | 1903 ctxt.x64.ctrlreg[1] = FOLD_CR3(ctx->p2m[pfn]); |
keir@14254 | 1904 } |
kfraser@14236 | 1905 domctl.cmd = XEN_DOMCTL_setvcpucontext; |
kfraser@14236 | 1906 domctl.domain = (domid_t)dom; |
kfraser@14236 | 1907 domctl.u.vcpucontext.vcpu = i; |
Tim@15955 | 1908 set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt.c); |
keir@16257 | 1909 frc = xc_domctl(xc_handle, &domctl); |
keir@16257 | 1910 if ( frc != 0 ) |
keir@14809 | 1911 { |
kfraser@14236 | 1912 ERROR("Couldn't build vcpu%d", i); |
kfraser@14236 | 1913 goto out; |
kfraser@14236 | 1914 } |
keir@16257 | 1915 |
keir@16257 | 1916 if ( !ext_vcpucontext ) |
keir@16257 | 1917 continue; |
keir@20452 | 1918 memcpy(&domctl.u.ext_vcpucontext, vcpup, 128); |
keir@20452 | 1919 vcpup += 128; |
keir@16257 | 1920 domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext; |
keir@16257 | 1921 domctl.domain = dom; |
keir@16257 | 1922 frc = xc_domctl(xc_handle, &domctl); |
keir@16257 | 1923 if ( frc != 0 ) |
keir@16257 | 1924 { |
keir@16257 | 1925 ERROR("Couldn't set extended vcpu%d info\n", i); |
keir@16257 | 1926 goto out; |
keir@16257 | 1927 } |
mjw@1661 | 1928 } |
mjw@1661 | 1929 |
keir@20457 | 1930 memcpy(shared_info_page, tailbuf.u.pv.shared_info_page, PAGE_SIZE); |
keir@20452 | 1931 |
keir@20452 | 1932 DPRINTF("Completed checkpoint load\n"); |
mjw@1661 | 1933 |
ian@15744 | 1934 /* Restore contents of shared-info page. No checking needed. */ |
ian@15744 | 1935 new_shared_info = xc_map_foreign_range( |
ian@15744 | 1936 xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame); |
ian@15744 | 1937 |
ian@15744 | 1938 /* restore saved vcpu_info and arch specific info */ |
Tim@15955 | 1939 MEMCPY_FIELD(new_shared_info, old_shared_info, vcpu_info); |
Tim@15955 | 1940 MEMCPY_FIELD(new_shared_info, old_shared_info, arch); |
mjw@1661 | 1941 |
ian@15744 | 1942 /* clear any pending events and the selector */ |
Tim@15955 | 1943 MEMSET_ARRAY_FIELD(new_shared_info, evtchn_pending, 0); |
keir@19826 | 1944 for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ ) |
Tim@15955 | 1945 SET_FIELD(new_shared_info, vcpu_info[i].evtchn_pending_sel, 0); |
ian@15744 | 1946 |
ian@15744 | 1947 /* mask event channels */ |
Tim@15955 | 1948 MEMSET_ARRAY_FIELD(new_shared_info, evtchn_mask, 0xff); |
ian@15744 | 1949 |
ian@15744 | 1950 /* leave wallclock time. set by hypervisor */ |
ian@15744 | 1951 munmap(new_shared_info, PAGE_SIZE); |
kaf24@9698 | 1952 |
mjw@1661 | 1953 /* Uncanonicalise the pfn-to-mfn table frame-number list. */ |
keir@14809 | 1954 for ( i = 0; i < P2M_FL_ENTRIES; i++ ) |
keir@14809 | 1955 { |
smh22@7740 | 1956 pfn = p2m_frame_list[i]; |
keir@20585 | 1957 if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) ) |
keir@14809 | 1958 { |
Tim@15955 | 1959 ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn); |
mjw@1661 | 1960 goto out; |
mjw@1661 | 1961 } |
keir@20587 | 1962 p2m_frame_list[i] = ctx->p2m[pfn]; |
mjw@1661 | 1963 } |
kaf24@9698 | 1964 |
smh22@7740 | 1965 /* Copy the P2M we've constructed to the 'live' P2M */ |
keir@20837 | 1966 if ( !(ctx->live_p2m = xc_map_foreign_pages(xc_handle, dom, PROT_WRITE, |
keir@14809 | 1967 p2m_frame_list, P2M_FL_ENTRIES)) ) |
keir@14809 | 1968 { |
kfraser@11814 | 1969 ERROR("Couldn't map p2m table"); |
mjw@1661 | 1970 goto out; |
mjw@1661 | 1971 } |
mjw@1661 | 1972 |
Tim@15955 | 1973 /* If the domain we're restoring has a different word size to ours, |
keir@18367 | 1974 * we need to adjust the live_p2m assignment appropriately */ |
keir@20585 | 1975 if ( dinfo->guest_width > sizeof (xen_pfn_t) ) |
keir@20585 | 1976 for ( i = dinfo->p2m_size - 1; i >= 0; i-- ) |
keir@20587 | 1977 ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i]; |
keir@20585 | 1978 else if ( dinfo->guest_width < sizeof (xen_pfn_t) ) |
keir@20585 | 1979 for ( i = 0; i < dinfo->p2m_size; i++ ) |
keir@20587 | 1980 ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i]; |
keir@18367 | 1981 else |
keir@20587 | 1982 memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t)); |
keir@20587 | 1983 munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE); |
mjw@1661 | 1984 |
cl349@5091 | 1985 DPRINTF("Domain ready to be built.\n"); |
Tim@14782 | 1986 rc = 0; |
keir@20457 | 1987 goto out; |
keir@20457 | 1988 |
keir@20457 | 1989 finish_hvm: |
keir@20457 | 1990 /* Dump the QEMU state to a state file for QEMU to load */ |
keir@20457 | 1991 if ( dump_qemu(dom, &tailbuf.u.hvm) ) { |
keir@20457 | 1992 ERROR("Error dumping QEMU state to file"); |
keir@20457 | 1993 goto out; |
keir@20457 | 1994 } |
keir@20457 | 1995 |
keir@20457 | 1996 /* These comms pages need to be zeroed at the start of day */ |
keir@20457 | 1997 if ( xc_clear_domain_page(xc_handle, dom, tailbuf.u.hvm.magicpfns[0]) || |
keir@20457 | 1998 xc_clear_domain_page(xc_handle, dom, tailbuf.u.hvm.magicpfns[1]) || |
keir@20457 | 1999 xc_clear_domain_page(xc_handle, dom, tailbuf.u.hvm.magicpfns[2]) ) |
keir@20457 | 2000 { |
keir@20457 | 2001 ERROR("error zeroing magic pages"); |
keir@20457 | 2002 goto out; |
keir@20457 | 2003 } |
keir@20457 | 2004 |
keir@20457 | 2005 if ( (frc = xc_set_hvm_param(xc_handle, dom, |
keir@20457 | 2006 HVM_PARAM_IOREQ_PFN, tailbuf.u.hvm.magicpfns[0])) |
keir@20457 | 2007 || (frc = xc_set_hvm_param(xc_handle, dom, |
keir@20457 | 2008 HVM_PARAM_BUFIOREQ_PFN, tailbuf.u.hvm.magicpfns[1])) |
keir@20457 | 2009 || (frc = xc_set_hvm_param(xc_handle, dom, |
keir@20457 | 2010 HVM_PARAM_STORE_PFN, tailbuf.u.hvm.magicpfns[2])) |
keir@20457 | 2011 || (frc = xc_set_hvm_param(xc_handle, dom, |
keir@20457 | 2012 HVM_PARAM_PAE_ENABLED, pae)) |
keir@20457 | 2013 || (frc = xc_set_hvm_param(xc_handle, dom, |
keir@20457 | 2014 HVM_PARAM_STORE_EVTCHN, |
keir@20457 | 2015 store_evtchn)) ) |
keir@20457 | 2016 { |
keir@20457 | 2017 ERROR("error setting HVM params: %i", frc); |
keir@20457 | 2018 goto out; |
keir@20457 | 2019 } |
keir@20457 | 2020 *store_mfn = tailbuf.u.hvm.magicpfns[2]; |
keir@20457 | 2021 |
keir@20457 | 2022 frc = xc_domain_hvm_setcontext(xc_handle, dom, tailbuf.u.hvm.hvmbuf, |
keir@20457 | 2023 tailbuf.u.hvm.reclen); |
keir@20457 | 2024 if ( frc ) |
keir@20457 | 2025 { |
keir@20457 | 2026 ERROR("error setting the HVM context"); |
keir@20457 | 2027 goto out; |
keir@20457 | 2028 } |
keir@20457 | 2029 |
keir@20457 | 2030 /* HVM success! */ |
keir@20457 | 2031 rc = 0; |
cl349@2791 | 2032 |
mjw@1661 | 2033 out: |
kaf24@1683 | 2034 if ( (rc != 0) && (dom != 0) ) |
mjw@1661 | 2035 xc_domain_destroy(xc_handle, dom); |
vh249@6159 | 2036 free(mmu); |
keir@20587 | 2037 free(ctx->p2m); |
vh249@6159 | 2038 free(pfn_type); |
keir@20457 | 2039 tailbuf_free(&tailbuf); |
mjw@1661 | 2040 |
keir@14142 | 2041 /* discard cache for save file */ |
keir@14142 | 2042 discard_file_cache(io_fd, 1 /*flush*/); |
keir@14142 | 2043 |
cl349@5091 | 2044 DPRINTF("Restore exit with rc=%d\n", rc); |
steven@13424 | 2045 |
mjw@1661 | 2046 return rc; |
mjw@1661 | 2047 } |
keir@19677 | 2048 /* |
keir@19677 | 2049 * Local variables: |
keir@19677 | 2050 * mode: C |
keir@19677 | 2051 * c-set-style: "BSD" |
keir@19677 | 2052 * c-basic-offset: 4 |
keir@19677 | 2053 * tab-width: 4 |
keir@19677 | 2054 * indent-tabs-mode: nil |
keir@19677 | 2055 * End: |
keir@19677 | 2056 */ |