debuggers.hg

view tools/libxc/xc_domain_restore.c @ 20877:6a5d8284b6e6

libxc: Unbreak HVM live migration after 0b138a019292.

0b138a019292 was a little too ambitious replacing xc_map_foreign_batch
with xc_map_foreign_pages in xc_domain_restore. With HVM, some of the
mappings are expected to fail (as "XTAB" pages).

Signed-off-by: Brendan Cully <brendan@cs.ubc.ca>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jan 21 09:03:20 2010 +0000 (2010-01-21)
parents 0b138a019292
children fbe8f32fa257
line source
1 /******************************************************************************
2 * xc_domain_restore.c
3 *
4 * Restore the state of a guest session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 * Copyright (c) 2006, Intel Corporation
8 * Copyright (c) 2007, XenSource Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
21 * Place - Suite 330, Boston, MA 02111-1307 USA.
22 *
23 */
25 #include <stdlib.h>
26 #include <unistd.h>
28 #include "xg_private.h"
29 #include "xg_save_restore.h"
30 #include "xc_dom.h"
32 #include <xen/hvm/ioreq.h>
33 #include <xen/hvm/params.h>
35 struct restore_ctx {
36 unsigned long max_mfn; /* max mfn of the current host machine */
37 unsigned long hvirt_start; /* virtual starting address of the hypervisor */
38 unsigned int pt_levels; /* #levels of page tables used by the current guest */
39 unsigned long nr_pfns; /* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
40 xen_pfn_t *live_p2m; /* Live mapping of the table mapping each PFN to its current MFN. */
41 xen_pfn_t *p2m; /* A table mapping each PFN to its new MFN. */
42 unsigned no_superpage_mem; /* If have enough continuous memory for super page allocation */
43 struct domain_info_context dinfo;
44 };
46 /*
47 **
48 **
49 */
50 #define SUPERPAGE_PFN_SHIFT 9
51 #define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT)
53 /*
54 * Setting bit 31 force to allocate super page even not all pfns come out,
55 * bit 30 indicate that not is in a super page tracking.
56 */
57 #define FORCE_SP_SHIFT 31
58 #define FORCE_SP_MASK (1UL << FORCE_SP_SHIFT)
60 #define INVALID_SUPER_PAGE ((1UL << 30) + 1)
61 #define SUPER_PAGE_START(pfn) (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 )
62 #define SUPER_PAGE_TRACKING(pfn) ( (pfn) != INVALID_SUPER_PAGE )
63 #define SUPER_PAGE_DONE(pfn) ( SUPER_PAGE_START(pfn) )
65 static int super_page_populated(struct restore_ctx *ctx, unsigned long pfn)
66 {
67 int i;
68 pfn &= ~(SUPERPAGE_NR_PFNS - 1);
69 for ( i = pfn; i < pfn + SUPERPAGE_NR_PFNS; i++ )
70 {
71 if ( ctx->p2m[i] != INVALID_P2M_ENTRY )
72 return 1;
73 }
74 return 0;
75 }
77 /*
78 * Break a 2M page and move contents of [extent start, next_pfn-1] to
79 * some new allocated 4K pages
80 */
81 static int break_super_page(int xc_handle,
82 uint32_t dom,
83 struct restore_ctx *ctx,
84 xen_pfn_t next_pfn)
85 {
86 xen_pfn_t *page_array, start_pfn, mfn;
87 uint8_t *ram_base, *save_buf;
88 unsigned long i;
89 int tot_pfns, rc = 0;
91 tot_pfns = (next_pfn & (SUPERPAGE_NR_PFNS - 1));
93 start_pfn = next_pfn & ~(SUPERPAGE_NR_PFNS - 1);
94 for ( i = start_pfn; i < start_pfn + SUPERPAGE_NR_PFNS; i++ )
95 {
96 /* check the 2M page are populated */
97 if ( ctx->p2m[i] == INVALID_P2M_ENTRY ) {
98 DPRINTF("Previous super page was populated wrongly!\n");
99 return 1;
100 }
101 }
103 page_array = (xen_pfn_t*)malloc(tot_pfns * sizeof(xen_pfn_t));
104 save_buf = (uint8_t*)malloc(tot_pfns * PAGE_SIZE);
106 if ( !page_array || !save_buf )
107 {
108 ERROR("alloc page_array failed\n");
109 errno = ENOMEM;
110 rc = 1;
111 goto out;
112 }
114 /* save previous super page contents */
115 for ( i = 0; i < tot_pfns; i++ )
116 {
117 /* only support HVM, as the mfn of the 2M page is missing */
118 page_array[i] = start_pfn + i;
119 }
121 ram_base = xc_map_foreign_pages(xc_handle, dom, PROT_READ,
122 page_array, tot_pfns);
124 if ( ram_base == NULL )
125 {
126 ERROR("map batch failed\n");
127 rc = 1;
128 goto out;
129 }
131 memcpy(save_buf, ram_base, tot_pfns * PAGE_SIZE);
132 munmap(ram_base, tot_pfns * PAGE_SIZE);
134 /* free the super page */
135 if ( xc_domain_memory_decrease_reservation(xc_handle, dom, 1,
136 SUPERPAGE_PFN_SHIFT, &start_pfn) != 0 )
137 {
138 ERROR("free 2M page failure @ 0x%ld.\n", next_pfn);
139 rc = 1;
140 goto out;
141 }
143 start_pfn = next_pfn & ~(SUPERPAGE_NR_PFNS - 1);
144 for ( i = start_pfn; i < start_pfn + SUPERPAGE_NR_PFNS; i++ )
145 {
146 ctx->p2m[i] = INVALID_P2M_ENTRY;
147 }
149 for ( i = start_pfn; i < start_pfn + tot_pfns; i++ )
150 {
151 mfn = i;
152 if (xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0,
153 0, &mfn) != 0)
154 {
155 ERROR("Failed to allocate physical memory.!\n");
156 errno = ENOMEM;
157 rc = 1;
158 goto out;
159 }
160 ctx->p2m[i] = mfn;
161 }
163 /* restore contents */
164 for ( i = 0; i < tot_pfns; i++ )
165 {
166 page_array[i] = start_pfn + i;
167 }
169 ram_base = xc_map_foreign_pages(xc_handle, dom, PROT_WRITE,
170 page_array, tot_pfns);
171 if ( ram_base == NULL )
172 {
173 ERROR("map batch failed\n");
174 rc = 1;
175 goto out;
176 }
178 memcpy(ram_base, save_buf, tot_pfns * PAGE_SIZE);
179 munmap(ram_base, tot_pfns * PAGE_SIZE);
181 out:
182 free(page_array);
183 free(save_buf);
184 return rc;
185 }
188 /*
189 * According to pfn list allocate pages: one 2M page or series of 4K pages.
190 * Also optimistically allocate a 2M page even when not all pages in the 2M
191 * extent come out, and fix it up in next batch:
192 * If new pages fit the missing one in the 2M extent, do nothing; Else take
193 * place of the original 2M page by some 4K pages.
194 */
195 static int allocate_mfn_list(int xc_handle,
196 uint32_t dom,
197 struct restore_ctx *ctx,
198 unsigned long nr_extents,
199 xen_pfn_t *batch_buf,
200 xen_pfn_t *next_pfn,
201 int superpages)
202 {
203 unsigned int i;
204 unsigned long mfn, pfn, sp_pfn;
206 /*Check if force super page, then clear it */
207 unsigned force_super_page = !!(*next_pfn & FORCE_SP_MASK);
208 *next_pfn &= ~FORCE_SP_MASK;
210 sp_pfn = *next_pfn;
212 if ( !superpages ||
213 ctx->no_superpage_mem ||
214 !SUPER_PAGE_TRACKING(sp_pfn) )
215 goto normal_page;
217 if ( !batch_buf )
218 {
219 /* Break previous 2M page, if 512 pages split across a batch boundary */
220 if ( SUPER_PAGE_TRACKING(sp_pfn) &&
221 !SUPER_PAGE_DONE(sp_pfn))
222 {
223 /* break previously allocated super page*/
224 if ( break_super_page(xc_handle, dom, ctx, sp_pfn) != 0 )
225 {
226 ERROR("Break previous super page fail!\n");
227 return 1;
228 }
229 }
231 /* follwing pages fit the order in 2M extent */
232 return 0;
233 }
235 /*
236 * We try to allocate a 2M page only when:
237 * user require this(superpages),
238 * AND have enough memory,
239 * AND is in the tracking,
240 * AND tracked all pages in 2M extent, OR partial 2M extent for speculation
241 * AND any page in 2M extent are not populated
242 */
243 if ( !SUPER_PAGE_DONE(sp_pfn) && !force_super_page )
244 goto normal_page;
246 pfn = batch_buf[0] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
247 if ( super_page_populated(ctx, pfn) )
248 goto normal_page;
250 pfn &= ~(SUPERPAGE_NR_PFNS - 1);
251 mfn = pfn;
253 if ( xc_domain_memory_populate_physmap(xc_handle, dom, 1,
254 SUPERPAGE_PFN_SHIFT, 0, &mfn) == 0)
255 {
256 for ( i = pfn; i < pfn + SUPERPAGE_NR_PFNS; i++, mfn++ )
257 {
258 ctx->p2m[i] = mfn;
259 }
260 return 0;
261 }
262 DPRINTF("No 2M page available for pfn 0x%lx, fall back to 4K page.\n",
263 pfn);
264 ctx->no_superpage_mem = 1;
266 normal_page:
267 if ( !batch_buf )
268 return 0;
270 /* End the tracking, if want a 2M page but end by 4K pages, */
271 *next_pfn = INVALID_SUPER_PAGE;
273 for ( i = 0; i < nr_extents; i++ )
274 {
275 unsigned long pagetype = batch_buf[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
276 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
277 continue;
279 pfn = mfn = batch_buf[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
280 if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY )
281 {
282 if (xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0,
283 0, &mfn) != 0)
284 {
285 ERROR("Failed to allocate physical memory.! pfn=0x%lx, mfn=0x%lx.\n",
286 pfn, mfn);
287 errno = ENOMEM;
288 return 1;
289 }
290 ctx->p2m[pfn] = mfn;
291 }
292 }
294 return 0;
295 }
297 static int allocate_physmem(int xc_handle, uint32_t dom,
298 struct restore_ctx *ctx,
299 unsigned long *region_pfn_type, int region_size,
300 unsigned int hvm, xen_pfn_t *region_mfn, int superpages)
301 {
302 int i;
303 unsigned long pfn;
304 unsigned long pagetype;
306 /* Next expected pfn in order to track a possible 2M page */
307 static unsigned long required_pfn = INVALID_SUPER_PAGE;
309 /* Buffer of pfn list for 2M page, or series of 4K pages */
310 xen_pfn_t *batch_buf;
311 unsigned int batch_buf_len;
312 struct domain_info_context *dinfo = &ctx->dinfo;
314 if ( !superpages )
315 {
316 batch_buf = &region_pfn_type[0];
317 batch_buf_len = region_size;
318 goto alloc_page;
319 }
321 batch_buf = NULL;
322 batch_buf_len = 0;
323 /* This loop tracks the possible 2M page */
324 for (i = 0; i < region_size; i++)
325 {
326 pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
327 pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
329 if (pagetype == XEN_DOMCTL_PFINFO_XTAB)
330 {
331 /* Do not start collecting pfns until get a valid pfn */
332 if ( batch_buf_len != 0 )
333 batch_buf_len++;
334 continue;
335 }
337 if ( SUPER_PAGE_START(pfn) )
338 {
339 /* Start of a 2M extent, populate previsous buf */
340 if ( allocate_mfn_list(xc_handle, dom, ctx,
341 batch_buf_len, batch_buf,
342 &required_pfn, superpages) != 0 )
343 {
344 errno = ENOMEM;
345 return 1;
346 }
348 /* start new tracking for 2M page */
349 batch_buf = &region_pfn_type[i];
350 batch_buf_len = 1;
351 required_pfn = pfn + 1;
352 }
353 else if ( pfn == required_pfn )
354 {
355 /* this page fit the 2M extent in order */
356 batch_buf_len++;
357 required_pfn++;
358 }
359 else if ( SUPER_PAGE_TRACKING(required_pfn) )
360 {
361 /* break of a 2M extent, populate previous buf */
362 if ( allocate_mfn_list(xc_handle, dom, ctx,
363 batch_buf_len, batch_buf,
364 &required_pfn, superpages) != 0 )
365 {
366 errno = ENOMEM;
367 return 1;
368 }
369 /* start new tracking for a series of 4K pages */
370 batch_buf = &region_pfn_type[i];
371 batch_buf_len = 1;
372 required_pfn = INVALID_SUPER_PAGE;
373 }
374 else
375 {
376 /* this page is 4K */
377 if ( !batch_buf )
378 batch_buf = &region_pfn_type[i];
379 batch_buf_len++;
380 }
381 }
383 /*
384 * populate rest batch_buf in the end.
385 * In a speculative way, we allocate a 2M page even when not see all the
386 * pages in order(set bit 31). If not require super page support,
387 * we can skip the tracking loop and come here directly.
388 * Speculative allocation can't be used for PV guest, as we have no mfn to
389 * map previous 2M mem range if need break it.
390 */
391 if ( SUPER_PAGE_TRACKING(required_pfn) &&
392 !SUPER_PAGE_DONE(required_pfn) )
393 {
394 if (hvm)
395 required_pfn |= FORCE_SP_MASK;
396 else
397 required_pfn = INVALID_SUPER_PAGE;
398 }
400 alloc_page:
401 if ( batch_buf )
402 {
403 if ( allocate_mfn_list(xc_handle, dom, ctx,
404 batch_buf_len, batch_buf,
405 &required_pfn,
406 superpages) != 0 )
407 {
408 errno = ENOMEM;
409 return 1;
410 }
411 }
413 for (i = 0; i < region_size; i++)
414 {
415 pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
416 pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
418 if ( pfn > dinfo->p2m_size )
419 {
420 ERROR("pfn out of range");
421 return 1;
422 }
423 if (pagetype == XEN_DOMCTL_PFINFO_XTAB)
424 {
425 region_mfn[i] = ~0UL;
426 }
427 else
428 {
429 if (ctx->p2m[pfn] == INVALID_P2M_ENTRY)
430 {
431 DPRINTF("Warning: pfn 0x%lx are not allocated!\n", pfn);
432 /*XXX:allocate this page?*/
433 }
435 /* setup region_mfn[] for batch map.
436 * For HVM guests, this interface takes PFNs, not MFNs */
437 region_mfn[i] = hvm ? pfn : ctx->p2m[pfn];
438 }
439 }
440 return 0;
441 }
444 /* set when a consistent image is available */
445 static int completed = 0;
447 #define HEARTBEAT_MS 500
449 #ifndef __MINIOS__
450 static ssize_t read_exact_timed(int fd, void* buf, size_t size)
451 {
452 size_t offset = 0;
453 ssize_t len;
454 struct timeval tv;
455 fd_set rfds;
457 while ( offset < size )
458 {
459 if ( completed ) {
460 /* expect a heartbeat every HEARBEAT_MS ms maximum */
461 tv.tv_sec = 0;
462 tv.tv_usec = HEARTBEAT_MS * 1000;
464 FD_ZERO(&rfds);
465 FD_SET(fd, &rfds);
466 len = select(fd + 1, &rfds, NULL, NULL, &tv);
467 if ( !FD_ISSET(fd, &rfds) ) {
468 fprintf(stderr, "read_exact_timed failed (select returned %zd)\n", len);
469 return -1;
470 }
471 }
473 len = read(fd, buf + offset, size - offset);
474 if ( (len == -1) && ((errno == EINTR) || (errno == EAGAIN)) )
475 continue;
476 if ( len <= 0 )
477 return -1;
478 offset += len;
479 }
481 return 0;
482 }
484 #define read_exact read_exact_timed
486 #else
487 #define read_exact_timed read_exact
488 #endif
489 /*
490 ** In the state file (or during transfer), all page-table pages are
491 ** converted into a 'canonical' form where references to actual mfns
492 ** are replaced with references to the corresponding pfns.
493 ** This function inverts that operation, replacing the pfn values with
494 ** the (now known) appropriate mfn values.
495 */
496 static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, struct restore_ctx *ctx,
497 void *page, int superpages)
498 {
499 int i, pte_last;
500 unsigned long pfn;
501 uint64_t pte;
502 struct domain_info_context *dinfo = &ctx->dinfo;
504 pte_last = PAGE_SIZE / ((ctx->pt_levels == 2)? 4 : 8);
506 for ( i = 0; i < pte_last; i++ )
507 {
508 if ( ctx->pt_levels == 2 )
509 pte = ((uint32_t *)page)[i];
510 else
511 pte = ((uint64_t *)page)[i];
513 /* XXX SMH: below needs fixing for PROT_NONE etc */
514 if ( !(pte & _PAGE_PRESENT) )
515 continue;
517 pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
519 /* Allocate mfn if necessary */
520 if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY )
521 {
522 unsigned long force_pfn = superpages ? FORCE_SP_MASK : pfn;
523 if (allocate_mfn_list(xc_handle, dom, ctx,
524 1, &pfn, &force_pfn, superpages) != 0)
525 return 0;
526 }
527 pte &= ~MADDR_MASK_X86;
528 pte |= (uint64_t)ctx->p2m[pfn] << PAGE_SHIFT;
530 if ( ctx->pt_levels == 2 )
531 ((uint32_t *)page)[i] = (uint32_t)pte;
532 else
533 ((uint64_t *)page)[i] = (uint64_t)pte;
534 }
536 return 1;
537 }
540 /* Load the p2m frame list, plus potential extended info chunk */
541 static xen_pfn_t *load_p2m_frame_list(struct restore_ctx *ctx,
542 int io_fd, int *pae_extended_cr3, int *ext_vcpucontext)
543 {
544 xen_pfn_t *p2m_frame_list;
545 vcpu_guest_context_any_t ctxt;
546 xen_pfn_t p2m_fl_zero;
547 struct domain_info_context *dinfo = &ctx->dinfo;
549 /* Read first entry of P2M list, or extended-info signature (~0UL). */
550 if ( read_exact(io_fd, &p2m_fl_zero, sizeof(long)) )
551 {
552 ERROR("read extended-info signature failed");
553 return NULL;
554 }
556 if ( p2m_fl_zero == ~0UL )
557 {
558 uint32_t tot_bytes;
560 /* Next 4 bytes: total size of following extended info. */
561 if ( read_exact(io_fd, &tot_bytes, sizeof(tot_bytes)) )
562 {
563 ERROR("read extended-info size failed");
564 return NULL;
565 }
567 while ( tot_bytes )
568 {
569 uint32_t chunk_bytes;
570 char chunk_sig[4];
572 /* 4-character chunk signature + 4-byte remaining chunk size. */
573 if ( read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
574 read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes)) ||
575 (tot_bytes < (chunk_bytes + 8)) )
576 {
577 ERROR("read extended-info chunk signature failed");
578 return NULL;
579 }
580 tot_bytes -= 8;
582 /* VCPU context structure? */
583 if ( !strncmp(chunk_sig, "vcpu", 4) )
584 {
585 /* Pick a guest word-size and PT depth from the ctxt size */
586 if ( chunk_bytes == sizeof (ctxt.x32) )
587 {
588 dinfo->guest_width = 4;
589 if ( ctx->pt_levels > 2 )
590 ctx->pt_levels = 3;
591 }
592 else if ( chunk_bytes == sizeof (ctxt.x64) )
593 {
594 dinfo->guest_width = 8;
595 ctx->pt_levels = 4;
596 }
597 else
598 {
599 ERROR("bad extended-info context size %d", chunk_bytes);
600 return NULL;
601 }
603 if ( read_exact(io_fd, &ctxt, chunk_bytes) )
604 {
605 ERROR("read extended-info vcpu context failed");
606 return NULL;
607 }
608 tot_bytes -= chunk_bytes;
609 chunk_bytes = 0;
611 if ( GET_FIELD(&ctxt, vm_assist)
612 & (1UL << VMASST_TYPE_pae_extended_cr3) )
613 *pae_extended_cr3 = 1;
614 }
615 else if ( !strncmp(chunk_sig, "extv", 4) )
616 {
617 *ext_vcpucontext = 1;
618 }
620 /* Any remaining bytes of this chunk: read and discard. */
621 while ( chunk_bytes )
622 {
623 unsigned long sz = MIN(chunk_bytes, sizeof(xen_pfn_t));
624 if ( read_exact(io_fd, &p2m_fl_zero, sz) )
625 {
626 ERROR("read-and-discard extended-info chunk bytes failed");
627 return NULL;
628 }
629 chunk_bytes -= sz;
630 tot_bytes -= sz;
631 }
632 }
634 /* Now read the real first entry of P2M list. */
635 if ( read_exact(io_fd, &p2m_fl_zero, sizeof(xen_pfn_t)) )
636 {
637 ERROR("read first entry of p2m_frame_list failed");
638 return NULL;
639 }
640 }
642 /* Now that we know the guest's word-size, can safely allocate
643 * the p2m frame list */
644 if ( (p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) == NULL )
645 {
646 ERROR("Couldn't allocate p2m_frame_list array");
647 return NULL;
648 }
650 /* First entry has already been read. */
651 p2m_frame_list[0] = p2m_fl_zero;
652 if ( read_exact(io_fd, &p2m_frame_list[1],
653 (P2M_FL_ENTRIES - 1) * sizeof(xen_pfn_t)) )
654 {
655 ERROR("read p2m_frame_list failed");
656 return NULL;
657 }
659 return p2m_frame_list;
660 }
662 typedef struct {
663 int ishvm;
664 union {
665 struct tailbuf_pv {
666 unsigned int pfncount;
667 unsigned long* pfntab;
668 unsigned int vcpucount;
669 unsigned char* vcpubuf;
670 unsigned char shared_info_page[PAGE_SIZE];
671 } pv;
672 struct tailbuf_hvm {
673 uint64_t magicpfns[3];
674 uint32_t hvmbufsize, reclen;
675 uint8_t* hvmbuf;
676 struct {
677 uint32_t magic;
678 uint32_t version;
679 uint64_t len;
680 } qemuhdr;
681 uint32_t qemubufsize;
682 uint8_t* qemubuf;
683 } hvm;
684 } u;
685 } tailbuf_t;
687 /* read stream until EOF, growing buffer as necssary */
688 static int compat_buffer_qemu(int fd, struct tailbuf_hvm *buf)
689 {
690 uint8_t *qbuf, *tmp;
691 int blen = 0, dlen = 0;
692 int rc;
694 /* currently save records tend to be about 7K */
695 blen = 8192;
696 if ( !(qbuf = malloc(blen)) ) {
697 ERROR("Error allocating QEMU buffer");
698 return -1;
699 }
701 while( (rc = read(fd, qbuf+dlen, blen-dlen)) > 0 ) {
702 DPRINTF("Read %d bytes of QEMU data\n", rc);
703 dlen += rc;
705 if (dlen == blen) {
706 DPRINTF("%d-byte QEMU buffer full, reallocating...\n", dlen);
707 blen += 4096;
708 tmp = realloc(qbuf, blen);
709 if ( !tmp ) {
710 ERROR("Error growing QEMU buffer to %d bytes", blen);
711 free(qbuf);
712 return -1;
713 }
714 qbuf = tmp;
715 }
716 }
718 if ( rc < 0 ) {
719 ERROR("Error reading QEMU data");
720 free(qbuf);
721 return -1;
722 }
724 if ( memcmp(qbuf, "QEVM", 4) ) {
725 ERROR("Invalid QEMU magic: 0x%08x", *(unsigned long*)qbuf);
726 free(qbuf);
727 return -1;
728 }
730 buf->qemubuf = qbuf;
731 buf->qemubufsize = dlen;
733 return 0;
734 }
736 static int buffer_qemu(int fd, struct tailbuf_hvm *buf)
737 {
738 uint32_t qlen;
739 uint8_t *tmp;
741 if ( read_exact(fd, &qlen, sizeof(qlen)) ) {
742 ERROR("Error reading QEMU header length");
743 return -1;
744 }
746 if ( qlen > buf->qemubufsize ) {
747 if ( buf->qemubuf) {
748 tmp = realloc(buf->qemubuf, qlen);
749 if ( tmp )
750 buf->qemubuf = tmp;
751 else {
752 ERROR("Error reallocating QEMU state buffer");
753 return -1;
754 }
755 } else {
756 buf->qemubuf = malloc(qlen);
757 if ( !buf->qemubuf ) {
758 ERROR("Error allocating QEMU state buffer");
759 return -1;
760 }
761 }
762 }
763 buf->qemubufsize = qlen;
765 if ( read_exact(fd, buf->qemubuf, buf->qemubufsize) ) {
766 ERROR("Error reading QEMU state");
767 return -1;
768 }
770 return 0;
771 }
773 static int dump_qemu(uint32_t dom, struct tailbuf_hvm *buf)
774 {
775 int saved_errno;
776 char path[256];
777 FILE *fp;
779 sprintf(path, "/var/lib/xen/qemu-save.%u", dom);
780 fp = fopen(path, "wb");
781 if ( !fp )
782 return -1;
784 DPRINTF("Writing %d bytes of QEMU data\n", buf->qemubufsize);
785 if ( fwrite(buf->qemubuf, 1, buf->qemubufsize, fp) != buf->qemubufsize) {
786 saved_errno = errno;
787 fclose(fp);
788 errno = saved_errno;
789 return -1;
790 }
792 fclose(fp);
794 return 0;
795 }
797 static int buffer_tail_hvm(struct restore_ctx *ctx, struct tailbuf_hvm *buf, int fd,
798 unsigned int max_vcpu_id, uint64_t vcpumap,
799 int ext_vcpucontext)
800 {
801 uint8_t *tmp;
802 unsigned char qemusig[21];
804 if ( read_exact(fd, buf->magicpfns, sizeof(buf->magicpfns)) ) {
805 ERROR("Error reading magic PFNs");
806 return -1;
807 }
809 if ( read_exact(fd, &buf->reclen, sizeof(buf->reclen)) ) {
810 ERROR("Error reading HVM params size");
811 return -1;
812 }
814 if ( buf->reclen > buf->hvmbufsize ) {
815 if ( buf->hvmbuf) {
816 tmp = realloc(buf->hvmbuf, buf->reclen);
817 if ( tmp ) {
818 buf->hvmbuf = tmp;
819 buf->hvmbufsize = buf->reclen;
820 } else {
821 ERROR("Error reallocating HVM param buffer");
822 return -1;
823 }
824 } else {
825 buf->hvmbuf = malloc(buf->reclen);
826 if ( !buf->hvmbuf ) {
827 ERROR("Error allocating HVM param buffer");
828 return -1;
829 }
830 buf->hvmbufsize = buf->reclen;
831 }
832 }
834 if ( read_exact(fd, buf->hvmbuf, buf->reclen) ) {
835 ERROR("Error reading HVM params");
836 return -1;
837 }
839 if ( read_exact(fd, qemusig, sizeof(qemusig)) ) {
840 ERROR("Error reading QEMU signature");
841 return -1;
842 }
844 /* The normal live-migration QEMU record has no length information.
845 * Short of reimplementing the QEMU parser, we're forced to just read
846 * until EOF. Remus gets around this by sending a different signature
847 * which includes a length prefix */
848 if ( !memcmp(qemusig, "QemuDeviceModelRecord", sizeof(qemusig)) )
849 return compat_buffer_qemu(fd, buf);
850 else if ( !memcmp(qemusig, "RemusDeviceModelState", sizeof(qemusig)) )
851 return buffer_qemu(fd, buf);
853 qemusig[20] = '\0';
854 ERROR("Invalid QEMU signature: %s", qemusig);
855 return -1;
856 }
858 static int buffer_tail_pv(struct restore_ctx *ctx, struct tailbuf_pv *buf, int fd,
859 unsigned int max_vcpu_id, uint64_t vcpumap,
860 int ext_vcpucontext)
861 {
862 unsigned int i;
863 size_t pfnlen, vcpulen;
864 struct domain_info_context *dinfo = &ctx->dinfo;
866 /* TODO: handle changing pfntab and vcpu counts */
867 /* PFN tab */
868 if ( read_exact(fd, &buf->pfncount, sizeof(buf->pfncount)) ||
869 (buf->pfncount > (1U << 28)) ) /* up to 1TB of address space */
870 {
871 ERROR("Error when reading pfn count");
872 return -1;
873 }
874 pfnlen = sizeof(unsigned long) * buf->pfncount;
875 if ( !(buf->pfntab) ) {
876 if ( !(buf->pfntab = malloc(pfnlen)) ) {
877 ERROR("Error allocating PFN tail buffer");
878 return -1;
879 }
880 }
881 // DPRINTF("Reading PFN tab: %d bytes\n", pfnlen);
882 if ( read_exact(fd, buf->pfntab, pfnlen) ) {
883 ERROR("Error when reading pfntab");
884 goto free_pfntab;
885 }
887 /* VCPU contexts */
888 buf->vcpucount = 0;
889 for (i = 0; i <= max_vcpu_id; i++) {
890 // DPRINTF("vcpumap: %llx, cpu: %d, bit: %llu\n", vcpumap, i, (vcpumap % (1ULL << i)));
891 if ( (!(vcpumap & (1ULL << i))) )
892 continue;
893 buf->vcpucount++;
894 }
895 // DPRINTF("VCPU count: %d\n", buf->vcpucount);
896 vcpulen = ((dinfo->guest_width == 8) ? sizeof(vcpu_guest_context_x86_64_t)
897 : sizeof(vcpu_guest_context_x86_32_t)) * buf->vcpucount;
898 if ( ext_vcpucontext )
899 vcpulen += 128 * buf->vcpucount;
901 if ( !(buf->vcpubuf) ) {
902 if ( !(buf->vcpubuf = malloc(vcpulen)) ) {
903 ERROR("Error allocating VCPU ctxt tail buffer");
904 goto free_pfntab;
905 }
906 }
907 // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen);
908 if ( read_exact(fd, buf->vcpubuf, vcpulen) ) {
909 ERROR("Error when reading ctxt");
910 goto free_vcpus;
911 }
913 /* load shared_info_page */
914 // DPRINTF("Reading shared info: %lu bytes\n", PAGE_SIZE);
915 if ( read_exact(fd, buf->shared_info_page, PAGE_SIZE) ) {
916 ERROR("Error when reading shared info page");
917 goto free_vcpus;
918 }
920 return 0;
922 free_vcpus:
923 if (buf->vcpubuf) {
924 free (buf->vcpubuf);
925 buf->vcpubuf = NULL;
926 }
927 free_pfntab:
928 if (buf->pfntab) {
929 free (buf->pfntab);
930 buf->pfntab = NULL;
931 }
933 return -1;
934 }
936 static int buffer_tail(struct restore_ctx *ctx, tailbuf_t *buf, int fd, unsigned int max_vcpu_id,
937 uint64_t vcpumap, int ext_vcpucontext)
938 {
939 if ( buf->ishvm )
940 return buffer_tail_hvm(ctx, &buf->u.hvm, fd, max_vcpu_id, vcpumap,
941 ext_vcpucontext);
942 else
943 return buffer_tail_pv(ctx, &buf->u.pv, fd, max_vcpu_id, vcpumap,
944 ext_vcpucontext);
945 }
947 static void tailbuf_free_hvm(struct tailbuf_hvm *buf)
948 {
949 if ( buf->hvmbuf ) {
950 free(buf->hvmbuf);
951 buf->hvmbuf = NULL;
952 }
953 if ( buf->qemubuf ) {
954 free(buf->qemubuf);
955 buf->qemubuf = NULL;
956 }
957 }
959 static void tailbuf_free_pv(struct tailbuf_pv *buf)
960 {
961 if ( buf->vcpubuf ) {
962 free(buf->vcpubuf);
963 buf->vcpubuf = NULL;
964 }
965 if ( buf->pfntab ) {
966 free(buf->pfntab);
967 buf->pfntab = NULL;
968 }
969 }
971 static void tailbuf_free(tailbuf_t *buf)
972 {
973 if ( buf->ishvm )
974 tailbuf_free_hvm(&buf->u.hvm);
975 else
976 tailbuf_free_pv(&buf->u.pv);
977 }
979 typedef struct {
980 void* pages;
981 /* pages is of length nr_physpages, pfn_types is of length nr_pages */
982 unsigned int nr_physpages, nr_pages;
984 /* Types of the pfns in the current region */
985 unsigned long* pfn_types;
987 int verify;
989 int new_ctxt_format;
990 int max_vcpu_id;
991 uint64_t vcpumap;
992 uint64_t identpt;
993 uint64_t vm86_tss;
994 } pagebuf_t;
996 static int pagebuf_init(pagebuf_t* buf)
997 {
998 memset(buf, 0, sizeof(*buf));
999 return 0;
1002 static void pagebuf_free(pagebuf_t* buf)
1004 if (buf->pages) {
1005 free(buf->pages);
1006 buf->pages = NULL;
1008 if(buf->pfn_types) {
1009 free(buf->pfn_types);
1010 buf->pfn_types = NULL;
1014 static int pagebuf_get_one(pagebuf_t* buf, int fd, int xch, uint32_t dom)
1016 int count, countpages, oldcount, i;
1017 void* ptmp;
1019 if ( read_exact(fd, &count, sizeof(count)) )
1021 ERROR("Error when reading batch size");
1022 return -1;
1025 // DPRINTF("reading batch of %d pages\n", count);
1027 if (!count) {
1028 // DPRINTF("Last batch read\n");
1029 return 0;
1030 } else if (count == -1) {
1031 DPRINTF("Entering page verify mode\n");
1032 buf->verify = 1;
1033 return pagebuf_get_one(buf, fd, xch, dom);
1034 } else if (count == -2) {
1035 buf->new_ctxt_format = 1;
1036 if ( read_exact(fd, &buf->max_vcpu_id, sizeof(buf->max_vcpu_id)) ||
1037 buf->max_vcpu_id >= 64 || read_exact(fd, &buf->vcpumap,
1038 sizeof(uint64_t)) ) {
1039 ERROR("Error when reading max_vcpu_id");
1040 return -1;
1042 // DPRINTF("Max VCPU ID: %d, vcpumap: %llx\n", buf->max_vcpu_id, buf->vcpumap);
1043 return pagebuf_get_one(buf, fd, xch, dom);
1044 } else if (count == -3) {
1045 /* Skip padding 4 bytes then read the EPT identity PT location. */
1046 if ( read_exact(fd, &buf->identpt, sizeof(uint32_t)) ||
1047 read_exact(fd, &buf->identpt, sizeof(uint64_t)) )
1049 ERROR("error read the address of the EPT identity map");
1050 return -1;
1052 // DPRINTF("EPT identity map address: %llx\n", buf->identpt);
1053 return pagebuf_get_one(buf, fd, xch, dom);
1054 } else if ( count == -4 ) {
1055 /* Skip padding 4 bytes then read the vm86 TSS location. */
1056 if ( read_exact(fd, &buf->vm86_tss, sizeof(uint32_t)) ||
1057 read_exact(fd, &buf->vm86_tss, sizeof(uint64_t)) )
1059 ERROR("error read the address of the vm86 TSS");
1060 return -1;
1062 // DPRINTF("VM86 TSS location: %llx\n", buf->vm86_tss);
1063 return pagebuf_get_one(buf, fd, xch, dom);
1064 } else if ( count == -5 ) {
1065 DPRINTF("xc_domain_restore start tmem\n");
1066 if ( xc_tmem_restore(xch, dom, fd) ) {
1067 ERROR("error reading/restoring tmem");
1068 return -1;
1070 return pagebuf_get_one(buf, fd, xch, dom);
1072 else if ( count == -6 ) {
1073 if ( xc_tmem_restore_extra(xch, dom, fd) ) {
1074 ERROR("error reading/restoring tmem extra");
1075 return -1;
1077 return pagebuf_get_one(buf, fd, xch, dom);
1078 } else if ( count == -7 ) {
1079 uint32_t tsc_mode, khz, incarn;
1080 uint64_t nsec;
1081 if ( read_exact(fd, &tsc_mode, sizeof(uint32_t)) ||
1082 read_exact(fd, &nsec, sizeof(uint64_t)) ||
1083 read_exact(fd, &khz, sizeof(uint32_t)) ||
1084 read_exact(fd, &incarn, sizeof(uint32_t)) ||
1085 xc_domain_set_tsc_info(xch, dom, tsc_mode, nsec, khz, incarn) ) {
1086 ERROR("error reading/restoring tsc info");
1087 return -1;
1089 return pagebuf_get_one(buf, fd, xch, dom);
1090 } else if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
1091 ERROR("Max batch size exceeded (%d). Giving up.", count);
1092 return -1;
1095 oldcount = buf->nr_pages;
1096 buf->nr_pages += count;
1097 if (!buf->pfn_types) {
1098 if (!(buf->pfn_types = malloc(buf->nr_pages * sizeof(*(buf->pfn_types))))) {
1099 ERROR("Could not allocate PFN type buffer");
1100 return -1;
1102 } else {
1103 if (!(ptmp = realloc(buf->pfn_types, buf->nr_pages * sizeof(*(buf->pfn_types))))) {
1104 ERROR("Could not reallocate PFN type buffer");
1105 return -1;
1107 buf->pfn_types = ptmp;
1109 if ( read_exact(fd, buf->pfn_types + oldcount, count * sizeof(*(buf->pfn_types)))) {
1110 ERROR("Error when reading region pfn types");
1111 return -1;
1114 countpages = count;
1115 for (i = oldcount; i < buf->nr_pages; ++i)
1116 if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB)
1117 --countpages;
1119 if (!countpages)
1120 return count;
1122 oldcount = buf->nr_physpages;
1123 buf->nr_physpages += countpages;
1124 if (!buf->pages) {
1125 if (!(buf->pages = malloc(buf->nr_physpages * PAGE_SIZE))) {
1126 ERROR("Could not allocate page buffer");
1127 return -1;
1129 } else {
1130 if (!(ptmp = realloc(buf->pages, buf->nr_physpages * PAGE_SIZE))) {
1131 ERROR("Could not reallocate page buffer");
1132 return -1;
1134 buf->pages = ptmp;
1136 if ( read_exact(fd, buf->pages + oldcount * PAGE_SIZE, countpages * PAGE_SIZE) ) {
1137 ERROR("Error when reading pages");
1138 return -1;
1141 return count;
1144 static int pagebuf_get(pagebuf_t* buf, int fd, int xch, uint32_t dom)
1146 int rc;
1148 buf->nr_physpages = buf->nr_pages = 0;
1150 do {
1151 rc = pagebuf_get_one(buf, fd, xch, dom);
1152 } while (rc > 0);
1154 if (rc < 0)
1155 pagebuf_free(buf);
1157 return rc;
1160 static int apply_batch(int xc_handle, uint32_t dom, struct restore_ctx *ctx,
1161 xen_pfn_t* region_mfn, unsigned long* pfn_type, int pae_extended_cr3,
1162 unsigned int hvm, struct xc_mmu* mmu,
1163 pagebuf_t* pagebuf, int curbatch, int superpages)
1165 int i, j, curpage;
1166 /* used by debug verify code */
1167 unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
1168 /* Our mapping of the current region (batch) */
1169 char *region_base;
1170 /* A temporary mapping, and a copy, of one frame of guest memory. */
1171 unsigned long *page = NULL;
1172 int nraces = 0;
1173 struct domain_info_context *dinfo = &ctx->dinfo;
1174 int* pfn_err = NULL;
1175 int rc = -1;
1177 unsigned long mfn, pfn, pagetype;
1179 j = pagebuf->nr_pages - curbatch;
1180 if (j > MAX_BATCH_SIZE)
1181 j = MAX_BATCH_SIZE;
1183 if (allocate_physmem(xc_handle, dom, ctx, &pagebuf->pfn_types[curbatch],
1184 j, hvm, region_mfn, superpages) != 0)
1186 ERROR("allocate_physmem() failed\n");
1187 return -1;
1190 /* Map relevant mfns */
1191 pfn_err = calloc(j, sizeof(*pfn_err));
1192 region_base = xc_map_foreign_bulk(
1193 xc_handle, dom, PROT_WRITE, region_mfn, pfn_err, j);
1195 if ( region_base == NULL )
1197 ERROR("map batch failed");
1198 free(pfn_err);
1199 return -1;
1202 for ( i = 0, curpage = -1; i < j; i++ )
1204 pfn = pagebuf->pfn_types[i + curbatch] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
1205 pagetype = pagebuf->pfn_types[i + curbatch] & XEN_DOMCTL_PFINFO_LTAB_MASK;
1207 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
1208 /* a bogus/unmapped page: skip it */
1209 continue;
1211 if (pfn_err[i])
1213 ERROR("unexpected PFN mapping failure");
1214 goto err_mapped;
1217 ++curpage;
1219 if ( pfn > dinfo->p2m_size )
1221 ERROR("pfn out of range");
1222 goto err_mapped;
1225 pfn_type[pfn] = pagetype;
1227 mfn = ctx->p2m[pfn];
1229 /* In verify mode, we use a copy; otherwise we work in place */
1230 page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
1232 memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, PAGE_SIZE);
1234 pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
1236 if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
1237 (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
1239 /*
1240 ** A page table page - need to 'uncanonicalize' it, i.e.
1241 ** replace all the references to pfns with the corresponding
1242 ** mfns for the new domain.
1243 **
1244 ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
1245 ** so we may need to update the p2m after the main loop.
1246 ** Hence we defer canonicalization of L1s until then.
1247 */
1248 if ((ctx->pt_levels != 3) ||
1249 pae_extended_cr3 ||
1250 (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) {
1252 if (!uncanonicalize_pagetable(xc_handle, dom, ctx,
1253 page, superpages)) {
1254 /*
1255 ** Failing to uncanonicalize a page table can be ok
1256 ** under live migration since the pages type may have
1257 ** changed by now (and we'll get an update later).
1258 */
1259 DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
1260 pagetype >> 28, pfn, mfn);
1261 nraces++;
1262 continue;
1266 else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
1268 ERROR("Bogus page type %lx page table is out of range: "
1269 "i=%d p2m_size=%lu", pagetype, i, dinfo->p2m_size);
1270 goto err_mapped;
1273 if ( pagebuf->verify )
1275 int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
1276 if ( res )
1278 int v;
1280 DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
1281 "actualcs=%08lx\n", pfn, pagebuf->pfn_types[pfn],
1282 csum_page(region_base + (i + curbatch)*PAGE_SIZE),
1283 csum_page(buf));
1285 for ( v = 0; v < 4; v++ )
1287 unsigned long *p = (unsigned long *)
1288 (region_base + i*PAGE_SIZE);
1289 if ( buf[v] != p[v] )
1290 DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]);
1295 if ( !hvm &&
1296 xc_add_mmu_update(xc_handle, mmu,
1297 (((unsigned long long)mfn) << PAGE_SHIFT)
1298 | MMU_MACHPHYS_UPDATE, pfn) )
1300 ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
1301 goto err_mapped;
1303 } /* end of 'batch' for loop */
1305 rc = nraces;
1307 err_mapped:
1308 munmap(region_base, j*PAGE_SIZE);
1309 free(pfn_err);
1311 return rc;
1314 int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
1315 unsigned int store_evtchn, unsigned long *store_mfn,
1316 unsigned int console_evtchn, unsigned long *console_mfn,
1317 unsigned int hvm, unsigned int pae, int superpages)
1319 DECLARE_DOMCTL;
1320 int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
1321 unsigned long mfn, pfn;
1322 unsigned int prev_pc, this_pc;
1323 int nraces = 0;
1325 /* The new domain's shared-info frame number. */
1326 unsigned long shared_info_frame;
1327 unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
1328 shared_info_any_t *old_shared_info =
1329 (shared_info_any_t *)shared_info_page;
1330 shared_info_any_t *new_shared_info;
1332 /* A copy of the CPU context of the guest. */
1333 vcpu_guest_context_any_t ctxt;
1335 /* A table containing the type of each PFN (/not/ MFN!). */
1336 unsigned long *pfn_type = NULL;
1338 /* A table of MFNs to map in the current region */
1339 xen_pfn_t *region_mfn = NULL;
1341 /* A copy of the pfn-to-mfn table frame list. */
1342 xen_pfn_t *p2m_frame_list = NULL;
1344 /* A temporary mapping of the guest's start_info page. */
1345 start_info_any_t *start_info;
1347 /* Our mapping of the current region (batch) */
1348 char *region_base;
1350 struct xc_mmu *mmu = NULL;
1352 struct mmuext_op pin[MAX_PIN_BATCH];
1353 unsigned int nr_pins;
1355 uint64_t vcpumap = 1ULL;
1356 unsigned int max_vcpu_id = 0;
1357 int new_ctxt_format = 0;
1359 pagebuf_t pagebuf;
1360 tailbuf_t tailbuf, tmptail;
1361 void* vcpup;
1363 static struct restore_ctx _ctx = {
1364 .live_p2m = NULL,
1365 .p2m = NULL,
1366 .no_superpage_mem = 0,
1367 };
1368 static struct restore_ctx *ctx = &_ctx;
1369 struct domain_info_context *dinfo = &ctx->dinfo;
1371 pagebuf_init(&pagebuf);
1372 memset(&tailbuf, 0, sizeof(tailbuf));
1373 tailbuf.ishvm = hvm;
1375 /* For info only */
1376 ctx->nr_pfns = 0;
1378 /* Always try to allocate 2M pages for HVM */
1379 if ( hvm )
1380 superpages = 1;
1382 if ( read_exact(io_fd, &dinfo->p2m_size, sizeof(unsigned long)) )
1384 ERROR("read: p2m_size");
1385 goto out;
1387 DPRINTF("xc_domain_restore start: p2m_size = %lx\n", dinfo->p2m_size);
1389 if ( !get_platform_info(xc_handle, dom,
1390 &ctx->max_mfn, &ctx->hvirt_start, &ctx->pt_levels, &dinfo->guest_width) )
1392 ERROR("Unable to get platform info.");
1393 return 1;
1396 /* The *current* word size of the guest isn't very interesting; for now
1397 * assume the guest will be the same as we are. We'll fix that later
1398 * if we discover otherwise. */
1399 dinfo->guest_width = sizeof(unsigned long);
1400 ctx->pt_levels = (dinfo->guest_width == 8) ? 4 : (ctx->pt_levels == 2) ? 2 : 3;
1402 if ( !hvm )
1404 /* Load the p2m frame list, plus potential extended info chunk */
1405 p2m_frame_list = load_p2m_frame_list(ctx,
1406 io_fd, &pae_extended_cr3, &ext_vcpucontext);
1407 if ( !p2m_frame_list )
1408 goto out;
1410 /* Now that we know the word size, tell Xen about it */
1411 memset(&domctl, 0, sizeof(domctl));
1412 domctl.domain = dom;
1413 domctl.cmd = XEN_DOMCTL_set_address_size;
1414 domctl.u.address_size.size = dinfo->guest_width * 8;
1415 frc = do_domctl(xc_handle, &domctl);
1416 if ( frc != 0 )
1418 ERROR("Unable to set guest address size.");
1419 goto out;
1423 /* We want zeroed memory so use calloc rather than malloc. */
1424 ctx->p2m = calloc(dinfo->p2m_size, sizeof(xen_pfn_t));
1425 pfn_type = calloc(dinfo->p2m_size, sizeof(unsigned long));
1427 region_mfn = xg_memalign(PAGE_SIZE, ROUNDUP(
1428 MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT));
1430 if ( (ctx->p2m == NULL) || (pfn_type == NULL) ||
1431 (region_mfn == NULL) )
1433 ERROR("memory alloc failed");
1434 errno = ENOMEM;
1435 goto out;
1438 memset(region_mfn, 0,
1439 ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT));
1441 if ( lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE) )
1443 ERROR("Could not lock region_mfn");
1444 goto out;
1447 /* Get the domain's shared-info frame. */
1448 domctl.cmd = XEN_DOMCTL_getdomaininfo;
1449 domctl.domain = (domid_t)dom;
1450 if ( xc_domctl(xc_handle, &domctl) < 0 )
1452 ERROR("Could not get information on new domain");
1453 goto out;
1455 shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
1457 /* Mark all PFNs as invalid; we allocate on demand */
1458 for ( pfn = 0; pfn < dinfo->p2m_size; pfn++ )
1459 ctx->p2m[pfn] = INVALID_P2M_ENTRY;
1461 mmu = xc_alloc_mmu_updates(xc_handle, dom);
1462 if ( mmu == NULL )
1464 ERROR("Could not initialise for MMU updates");
1465 goto out;
1468 DPRINTF("Reloading memory pages: 0%%\n");
1470 /*
1471 * Now simply read each saved frame into its new machine frame.
1472 * We uncanonicalise page tables as we go.
1473 */
1474 prev_pc = 0;
1476 n = m = 0;
1477 loadpages:
1478 for ( ; ; )
1480 int j, curbatch;
1482 this_pc = (n * 100) / dinfo->p2m_size;
1483 if ( (this_pc - prev_pc) >= 5 )
1485 PPRINTF("\b\b\b\b%3d%%", this_pc);
1486 prev_pc = this_pc;
1489 if ( !completed ) {
1490 pagebuf.nr_physpages = pagebuf.nr_pages = 0;
1491 if ( pagebuf_get_one(&pagebuf, io_fd, xc_handle, dom) < 0 ) {
1492 ERROR("Error when reading batch\n");
1493 goto out;
1496 j = pagebuf.nr_pages;
1498 PPRINTF("batch %d\n",j);
1500 if ( j == 0 ) {
1501 /* catch vcpu updates */
1502 if (pagebuf.new_ctxt_format) {
1503 vcpumap = pagebuf.vcpumap;
1504 max_vcpu_id = pagebuf.max_vcpu_id;
1506 /* should this be deferred? does it change? */
1507 if ( pagebuf.identpt )
1508 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT, pagebuf.identpt);
1509 if ( pagebuf.vm86_tss )
1510 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, pagebuf.vm86_tss);
1511 break; /* our work here is done */
1514 /* break pagebuf into batches */
1515 curbatch = 0;
1516 while ( curbatch < j ) {
1517 int brc;
1519 brc = apply_batch(xc_handle, dom, ctx, region_mfn, pfn_type,
1520 pae_extended_cr3, hvm, mmu, &pagebuf, curbatch, superpages);
1521 if ( brc < 0 )
1522 goto out;
1524 nraces += brc;
1526 curbatch += MAX_BATCH_SIZE;
1529 pagebuf.nr_physpages = pagebuf.nr_pages = 0;
1531 n += j; /* crude stats */
1533 /*
1534 * Discard cache for portion of file read so far up to last
1535 * page boundary every 16MB or so.
1536 */
1537 m += j;
1538 if ( m > MAX_PAGECACHE_USAGE )
1540 discard_file_cache(io_fd, 0 /* no flush */);
1541 m = 0;
1545 /*
1546 * Ensure we flush all machphys updates before potential PAE-specific
1547 * reallocations below.
1548 */
1549 if ( !hvm && xc_flush_mmu_updates(xc_handle, mmu) )
1551 ERROR("Error doing flush_mmu_updates()");
1552 goto out;
1555 // DPRINTF("Received all pages (%d races)\n", nraces);
1557 if ( !completed ) {
1558 int flags = 0;
1560 if ( buffer_tail(ctx, &tailbuf, io_fd, max_vcpu_id, vcpumap,
1561 ext_vcpucontext) < 0 ) {
1562 ERROR ("error buffering image tail");
1563 goto out;
1565 completed = 1;
1566 /* shift into nonblocking mode for the remainder */
1567 if ( (flags = fcntl(io_fd, F_GETFL,0)) < 0 )
1568 flags = 0;
1569 fcntl(io_fd, F_SETFL, flags | O_NONBLOCK);
1572 // DPRINTF("Buffered checkpoint\n");
1574 if ( pagebuf_get(&pagebuf, io_fd, xc_handle, dom) ) {
1575 ERROR("error when buffering batch, finishing\n");
1576 goto finish;
1578 memset(&tmptail, 0, sizeof(tmptail));
1579 tmptail.ishvm = hvm;
1580 if ( buffer_tail(ctx, &tmptail, io_fd, max_vcpu_id, vcpumap,
1581 ext_vcpucontext) < 0 ) {
1582 ERROR ("error buffering image tail, finishing");
1583 goto finish;
1585 tailbuf_free(&tailbuf);
1586 memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
1588 goto loadpages;
1590 finish:
1591 if ( hvm )
1592 goto finish_hvm;
1594 if ( (ctx->pt_levels == 3) && !pae_extended_cr3 )
1596 /*
1597 ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
1598 ** is a little awkward and involves (a) finding all such PGDs and
1599 ** replacing them with 'lowmem' versions; (b) upating the p2m[]
1600 ** with the new info; and (c) canonicalizing all the L1s using the
1601 ** (potentially updated) p2m[].
1602 **
1603 ** This is relatively slow (and currently involves two passes through
1604 ** the pfn_type[] array), but at least seems to be correct. May wish
1605 ** to consider more complex approaches to optimize this later.
1606 */
1608 int j, k;
1610 /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
1611 for ( i = 0; i < dinfo->p2m_size; i++ )
1613 if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
1614 XEN_DOMCTL_PFINFO_L3TAB) &&
1615 (ctx->p2m[i] > 0xfffffUL) )
1617 unsigned long new_mfn;
1618 uint64_t l3ptes[4];
1619 uint64_t *l3tab;
1621 l3tab = (uint64_t *)
1622 xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
1623 PROT_READ, ctx->p2m[i]);
1625 for ( j = 0; j < 4; j++ )
1626 l3ptes[j] = l3tab[j];
1628 munmap(l3tab, PAGE_SIZE);
1630 new_mfn = xc_make_page_below_4G(xc_handle, dom, ctx->p2m[i]);
1631 if ( !new_mfn )
1633 ERROR("Couldn't get a page below 4GB :-(");
1634 goto out;
1637 ctx->p2m[i] = new_mfn;
1638 if ( xc_add_mmu_update(xc_handle, mmu,
1639 (((unsigned long long)new_mfn)
1640 << PAGE_SHIFT) |
1641 MMU_MACHPHYS_UPDATE, i) )
1643 ERROR("Couldn't m2p on PAE root pgdir");
1644 goto out;
1647 l3tab = (uint64_t *)
1648 xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
1649 PROT_READ | PROT_WRITE, ctx->p2m[i]);
1651 for ( j = 0; j < 4; j++ )
1652 l3tab[j] = l3ptes[j];
1654 munmap(l3tab, PAGE_SIZE);
1658 /* Second pass: find all L1TABs and uncanonicalize them */
1659 j = 0;
1661 for ( i = 0; i < dinfo->p2m_size; i++ )
1663 if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
1664 XEN_DOMCTL_PFINFO_L1TAB) )
1666 region_mfn[j] = ctx->p2m[i];
1667 j++;
1670 if ( (i == (dinfo->p2m_size-1)) || (j == MAX_BATCH_SIZE) )
1672 region_base = xc_map_foreign_pages(
1673 xc_handle, dom, PROT_READ | PROT_WRITE, region_mfn, j);
1674 if ( region_base == NULL )
1676 ERROR("map batch failed");
1677 goto out;
1680 for ( k = 0; k < j; k++ )
1682 if ( !uncanonicalize_pagetable(
1683 xc_handle, dom, ctx,
1684 region_base + k*PAGE_SIZE, superpages) )
1686 ERROR("failed uncanonicalize pt!");
1687 goto out;
1691 munmap(region_base, j*PAGE_SIZE);
1692 j = 0;
1696 if ( xc_flush_mmu_updates(xc_handle, mmu) )
1698 ERROR("Error doing xc_flush_mmu_updates()");
1699 goto out;
1703 /*
1704 * Pin page tables. Do this after writing to them as otherwise Xen
1705 * will barf when doing the type-checking.
1706 */
1707 nr_pins = 0;
1708 for ( i = 0; i < dinfo->p2m_size; i++ )
1710 if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
1711 continue;
1713 switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
1715 case XEN_DOMCTL_PFINFO_L1TAB:
1716 pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
1717 break;
1719 case XEN_DOMCTL_PFINFO_L2TAB:
1720 pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
1721 break;
1723 case XEN_DOMCTL_PFINFO_L3TAB:
1724 pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
1725 break;
1727 case XEN_DOMCTL_PFINFO_L4TAB:
1728 pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
1729 break;
1731 default:
1732 continue;
1735 pin[nr_pins].arg1.mfn = ctx->p2m[i];
1736 nr_pins++;
1738 /* Batch full? Then flush. */
1739 if ( nr_pins == MAX_PIN_BATCH )
1741 if ( xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0 )
1743 ERROR("Failed to pin batch of %d page tables", nr_pins);
1744 goto out;
1746 nr_pins = 0;
1750 /* Flush final partial batch. */
1751 if ( (nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) )
1753 ERROR("Failed to pin batch of %d page tables", nr_pins);
1754 goto out;
1757 DPRINTF("\b\b\b\b100%%\n");
1758 DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns);
1760 /* Get the list of PFNs that are not in the psuedo-phys map */
1762 int nr_frees = 0;
1764 for ( i = 0; i < tailbuf.u.pv.pfncount; i++ )
1766 unsigned long pfn = tailbuf.u.pv.pfntab[i];
1768 if ( ctx->p2m[pfn] != INVALID_P2M_ENTRY )
1770 /* pfn is not in physmap now, but was at some point during
1771 the save/migration process - need to free it */
1772 tailbuf.u.pv.pfntab[nr_frees++] = ctx->p2m[pfn];
1773 ctx->p2m[pfn] = INVALID_P2M_ENTRY; /* not in pseudo-physical map */
1777 if ( nr_frees > 0 )
1779 struct xen_memory_reservation reservation = {
1780 .nr_extents = nr_frees,
1781 .extent_order = 0,
1782 .domid = dom
1783 };
1784 set_xen_guest_handle(reservation.extent_start, tailbuf.u.pv.pfntab);
1786 if ( (frc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
1787 &reservation)) != nr_frees )
1789 ERROR("Could not decrease reservation : %d", frc);
1790 goto out;
1792 else
1793 DPRINTF("Decreased reservation by %d pages\n", tailbuf.u.pv.pfncount);
1797 if ( lock_pages(&ctxt, sizeof(ctxt)) )
1799 ERROR("Unable to lock ctxt");
1800 return 1;
1803 vcpup = tailbuf.u.pv.vcpubuf;
1804 for ( i = 0; i <= max_vcpu_id; i++ )
1806 if ( !(vcpumap & (1ULL << i)) )
1807 continue;
1809 memcpy(&ctxt, vcpup, ((dinfo->guest_width == 8) ? sizeof(ctxt.x64)
1810 : sizeof(ctxt.x32)));
1811 vcpup += (dinfo->guest_width == 8) ? sizeof(ctxt.x64) : sizeof(ctxt.x32);
1813 DPRINTF("read VCPU %d\n", i);
1815 if ( !new_ctxt_format )
1816 SET_FIELD(&ctxt, flags, GET_FIELD(&ctxt, flags) | VGCF_online);
1818 if ( i == 0 )
1820 /*
1821 * Uncanonicalise the suspend-record frame number and poke
1822 * resume record.
1823 */
1824 pfn = GET_FIELD(&ctxt, user_regs.edx);
1825 if ( (pfn >= dinfo->p2m_size) ||
1826 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
1828 ERROR("Suspend record frame number is bad");
1829 goto out;
1831 mfn = ctx->p2m[pfn];
1832 SET_FIELD(&ctxt, user_regs.edx, mfn);
1833 start_info = xc_map_foreign_range(
1834 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
1835 SET_FIELD(start_info, nr_pages, dinfo->p2m_size);
1836 SET_FIELD(start_info, shared_info, shared_info_frame<<PAGE_SHIFT);
1837 SET_FIELD(start_info, flags, 0);
1838 *store_mfn = ctx->p2m[GET_FIELD(start_info, store_mfn)];
1839 SET_FIELD(start_info, store_mfn, *store_mfn);
1840 SET_FIELD(start_info, store_evtchn, store_evtchn);
1841 *console_mfn = ctx->p2m[GET_FIELD(start_info, console.domU.mfn)];
1842 SET_FIELD(start_info, console.domU.mfn, *console_mfn);
1843 SET_FIELD(start_info, console.domU.evtchn, console_evtchn);
1844 munmap(start_info, PAGE_SIZE);
1846 /* Uncanonicalise each GDT frame number. */
1847 if ( GET_FIELD(&ctxt, gdt_ents) > 8192 )
1849 ERROR("GDT entry count out of range");
1850 goto out;
1853 for ( j = 0; (512*j) < GET_FIELD(&ctxt, gdt_ents); j++ )
1855 pfn = GET_FIELD(&ctxt, gdt_frames[j]);
1856 if ( (pfn >= dinfo->p2m_size) ||
1857 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
1859 ERROR("GDT frame number %i (0x%lx) is bad",
1860 j, (unsigned long)pfn);
1861 goto out;
1863 SET_FIELD(&ctxt, gdt_frames[j], ctx->p2m[pfn]);
1865 /* Uncanonicalise the page table base pointer. */
1866 pfn = UNFOLD_CR3(GET_FIELD(&ctxt, ctrlreg[3]));
1868 if ( pfn >= dinfo->p2m_size )
1870 ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
1871 pfn, dinfo->p2m_size, pfn_type[pfn]);
1872 goto out;
1875 if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
1876 ((unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
1878 ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
1879 pfn, dinfo->p2m_size, pfn_type[pfn],
1880 (unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
1881 goto out;
1883 SET_FIELD(&ctxt, ctrlreg[3], FOLD_CR3(ctx->p2m[pfn]));
1885 /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
1886 if ( (ctx->pt_levels == 4) && (ctxt.x64.ctrlreg[1] & 1) )
1888 pfn = UNFOLD_CR3(ctxt.x64.ctrlreg[1] & ~1);
1889 if ( pfn >= dinfo->p2m_size )
1891 ERROR("User PT base is bad: pfn=%lu p2m_size=%lu",
1892 pfn, dinfo->p2m_size);
1893 goto out;
1895 if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
1896 ((unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
1898 ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
1899 pfn, dinfo->p2m_size, pfn_type[pfn],
1900 (unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
1901 goto out;
1903 ctxt.x64.ctrlreg[1] = FOLD_CR3(ctx->p2m[pfn]);
1905 domctl.cmd = XEN_DOMCTL_setvcpucontext;
1906 domctl.domain = (domid_t)dom;
1907 domctl.u.vcpucontext.vcpu = i;
1908 set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt.c);
1909 frc = xc_domctl(xc_handle, &domctl);
1910 if ( frc != 0 )
1912 ERROR("Couldn't build vcpu%d", i);
1913 goto out;
1916 if ( !ext_vcpucontext )
1917 continue;
1918 memcpy(&domctl.u.ext_vcpucontext, vcpup, 128);
1919 vcpup += 128;
1920 domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
1921 domctl.domain = dom;
1922 frc = xc_domctl(xc_handle, &domctl);
1923 if ( frc != 0 )
1925 ERROR("Couldn't set extended vcpu%d info\n", i);
1926 goto out;
1930 memcpy(shared_info_page, tailbuf.u.pv.shared_info_page, PAGE_SIZE);
1932 DPRINTF("Completed checkpoint load\n");
1934 /* Restore contents of shared-info page. No checking needed. */
1935 new_shared_info = xc_map_foreign_range(
1936 xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
1938 /* restore saved vcpu_info and arch specific info */
1939 MEMCPY_FIELD(new_shared_info, old_shared_info, vcpu_info);
1940 MEMCPY_FIELD(new_shared_info, old_shared_info, arch);
1942 /* clear any pending events and the selector */
1943 MEMSET_ARRAY_FIELD(new_shared_info, evtchn_pending, 0);
1944 for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
1945 SET_FIELD(new_shared_info, vcpu_info[i].evtchn_pending_sel, 0);
1947 /* mask event channels */
1948 MEMSET_ARRAY_FIELD(new_shared_info, evtchn_mask, 0xff);
1950 /* leave wallclock time. set by hypervisor */
1951 munmap(new_shared_info, PAGE_SIZE);
1953 /* Uncanonicalise the pfn-to-mfn table frame-number list. */
1954 for ( i = 0; i < P2M_FL_ENTRIES; i++ )
1956 pfn = p2m_frame_list[i];
1957 if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
1959 ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
1960 goto out;
1962 p2m_frame_list[i] = ctx->p2m[pfn];
1965 /* Copy the P2M we've constructed to the 'live' P2M */
1966 if ( !(ctx->live_p2m = xc_map_foreign_pages(xc_handle, dom, PROT_WRITE,
1967 p2m_frame_list, P2M_FL_ENTRIES)) )
1969 ERROR("Couldn't map p2m table");
1970 goto out;
1973 /* If the domain we're restoring has a different word size to ours,
1974 * we need to adjust the live_p2m assignment appropriately */
1975 if ( dinfo->guest_width > sizeof (xen_pfn_t) )
1976 for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
1977 ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
1978 else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
1979 for ( i = 0; i < dinfo->p2m_size; i++ )
1980 ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
1981 else
1982 memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t));
1983 munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
1985 DPRINTF("Domain ready to be built.\n");
1986 rc = 0;
1987 goto out;
1989 finish_hvm:
1990 /* Dump the QEMU state to a state file for QEMU to load */
1991 if ( dump_qemu(dom, &tailbuf.u.hvm) ) {
1992 ERROR("Error dumping QEMU state to file");
1993 goto out;
1996 /* These comms pages need to be zeroed at the start of day */
1997 if ( xc_clear_domain_page(xc_handle, dom, tailbuf.u.hvm.magicpfns[0]) ||
1998 xc_clear_domain_page(xc_handle, dom, tailbuf.u.hvm.magicpfns[1]) ||
1999 xc_clear_domain_page(xc_handle, dom, tailbuf.u.hvm.magicpfns[2]) )
2001 ERROR("error zeroing magic pages");
2002 goto out;
2005 if ( (frc = xc_set_hvm_param(xc_handle, dom,
2006 HVM_PARAM_IOREQ_PFN, tailbuf.u.hvm.magicpfns[0]))
2007 || (frc = xc_set_hvm_param(xc_handle, dom,
2008 HVM_PARAM_BUFIOREQ_PFN, tailbuf.u.hvm.magicpfns[1]))
2009 || (frc = xc_set_hvm_param(xc_handle, dom,
2010 HVM_PARAM_STORE_PFN, tailbuf.u.hvm.magicpfns[2]))
2011 || (frc = xc_set_hvm_param(xc_handle, dom,
2012 HVM_PARAM_PAE_ENABLED, pae))
2013 || (frc = xc_set_hvm_param(xc_handle, dom,
2014 HVM_PARAM_STORE_EVTCHN,
2015 store_evtchn)) )
2017 ERROR("error setting HVM params: %i", frc);
2018 goto out;
2020 *store_mfn = tailbuf.u.hvm.magicpfns[2];
2022 frc = xc_domain_hvm_setcontext(xc_handle, dom, tailbuf.u.hvm.hvmbuf,
2023 tailbuf.u.hvm.reclen);
2024 if ( frc )
2026 ERROR("error setting the HVM context");
2027 goto out;
2030 /* HVM success! */
2031 rc = 0;
2033 out:
2034 if ( (rc != 0) && (dom != 0) )
2035 xc_domain_destroy(xc_handle, dom);
2036 free(mmu);
2037 free(ctx->p2m);
2038 free(pfn_type);
2039 tailbuf_free(&tailbuf);
2041 /* discard cache for save file */
2042 discard_file_cache(io_fd, 1 /*flush*/);
2044 DPRINTF("Restore exit with rc=%d\n", rc);
2046 return rc;
2048 /*
2049 * Local variables:
2050 * mode: C
2051 * c-set-style: "BSD"
2052 * c-basic-offset: 4
2053 * tab-width: 4
2054 * indent-tabs-mode: nil
2055 * End:
2056 */