debuggers.hg

view tools/libxc/xc_domain_restore.c @ 20837:0b138a019292

libxc: use new (replacement) mmap-batch ioctl

Replace all calls to xc_map_foreign_batch() where the caller doesn't
look at the passed in array to check for errors by calls to
xc_map_foreign_pages(). Replace all remaining calls by such to the
newly introduced xc_map_foreign_bulk().

As a sideband modification (needed while writing the patch to ensure
they're unused) eliminate unused parameters to
uncanonicalize_pagetable() and xc_map_foreign_batch_single(). Also
unmap live_p2m_frame_list earlier in map_and_save_p2m_table(),
reducing the peak amount of virtual address space required.

All supported OSes other than Linux continue to use the old ioctl for
the time being.

Also change libxc's MAJOR to 4.0 to reflect the API change.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Jan 13 08:12:56 2010 +0000 (2010-01-13)
parents cc7d66ba0dad
children 6a5d8284b6e6
line source
1 /******************************************************************************
2 * xc_domain_restore.c
3 *
4 * Restore the state of a guest session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 * Copyright (c) 2006, Intel Corporation
8 * Copyright (c) 2007, XenSource Inc.
9 *
10 * This program is free software; you can redistribute it and/or modify it
11 * under the terms and conditions of the GNU General Public License,
12 * version 2, as published by the Free Software Foundation.
13 *
14 * This program is distributed in the hope it will be useful, but WITHOUT
15 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
17 * more details.
18 *
19 * You should have received a copy of the GNU General Public License along with
20 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
21 * Place - Suite 330, Boston, MA 02111-1307 USA.
22 *
23 */
25 #include <stdlib.h>
26 #include <unistd.h>
28 #include "xg_private.h"
29 #include "xg_save_restore.h"
30 #include "xc_dom.h"
32 #include <xen/hvm/ioreq.h>
33 #include <xen/hvm/params.h>
35 struct restore_ctx {
36 unsigned long max_mfn; /* max mfn of the current host machine */
37 unsigned long hvirt_start; /* virtual starting address of the hypervisor */
38 unsigned int pt_levels; /* #levels of page tables used by the current guest */
39 unsigned long nr_pfns; /* number of 'in use' pfns in the guest (i.e. #P2M entries with a valid mfn) */
40 xen_pfn_t *live_p2m; /* Live mapping of the table mapping each PFN to its current MFN. */
41 xen_pfn_t *p2m; /* A table mapping each PFN to its new MFN. */
42 unsigned no_superpage_mem; /* If have enough continuous memory for super page allocation */
43 struct domain_info_context dinfo;
44 };
46 /*
47 **
48 **
49 */
50 #define SUPERPAGE_PFN_SHIFT 9
51 #define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT)
53 /*
54 * Setting bit 31 force to allocate super page even not all pfns come out,
55 * bit 30 indicate that not is in a super page tracking.
56 */
57 #define FORCE_SP_SHIFT 31
58 #define FORCE_SP_MASK (1UL << FORCE_SP_SHIFT)
60 #define INVALID_SUPER_PAGE ((1UL << 30) + 1)
61 #define SUPER_PAGE_START(pfn) (((pfn) & (SUPERPAGE_NR_PFNS-1)) == 0 )
62 #define SUPER_PAGE_TRACKING(pfn) ( (pfn) != INVALID_SUPER_PAGE )
63 #define SUPER_PAGE_DONE(pfn) ( SUPER_PAGE_START(pfn) )
65 static int super_page_populated(struct restore_ctx *ctx, unsigned long pfn)
66 {
67 int i;
68 pfn &= ~(SUPERPAGE_NR_PFNS - 1);
69 for ( i = pfn; i < pfn + SUPERPAGE_NR_PFNS; i++ )
70 {
71 if ( ctx->p2m[i] != INVALID_P2M_ENTRY )
72 return 1;
73 }
74 return 0;
75 }
77 /*
78 * Break a 2M page and move contents of [extent start, next_pfn-1] to
79 * some new allocated 4K pages
80 */
81 static int break_super_page(int xc_handle,
82 uint32_t dom,
83 struct restore_ctx *ctx,
84 xen_pfn_t next_pfn)
85 {
86 xen_pfn_t *page_array, start_pfn, mfn;
87 uint8_t *ram_base, *save_buf;
88 unsigned long i;
89 int tot_pfns, rc = 0;
91 tot_pfns = (next_pfn & (SUPERPAGE_NR_PFNS - 1));
93 start_pfn = next_pfn & ~(SUPERPAGE_NR_PFNS - 1);
94 for ( i = start_pfn; i < start_pfn + SUPERPAGE_NR_PFNS; i++ )
95 {
96 /* check the 2M page are populated */
97 if ( ctx->p2m[i] == INVALID_P2M_ENTRY ) {
98 DPRINTF("Previous super page was populated wrongly!\n");
99 return 1;
100 }
101 }
103 page_array = (xen_pfn_t*)malloc(tot_pfns * sizeof(xen_pfn_t));
104 save_buf = (uint8_t*)malloc(tot_pfns * PAGE_SIZE);
106 if ( !page_array || !save_buf )
107 {
108 ERROR("alloc page_array failed\n");
109 errno = ENOMEM;
110 rc = 1;
111 goto out;
112 }
114 /* save previous super page contents */
115 for ( i = 0; i < tot_pfns; i++ )
116 {
117 /* only support HVM, as the mfn of the 2M page is missing */
118 page_array[i] = start_pfn + i;
119 }
121 ram_base = xc_map_foreign_pages(xc_handle, dom, PROT_READ,
122 page_array, tot_pfns);
124 if ( ram_base == NULL )
125 {
126 ERROR("map batch failed\n");
127 rc = 1;
128 goto out;
129 }
131 memcpy(save_buf, ram_base, tot_pfns * PAGE_SIZE);
132 munmap(ram_base, tot_pfns * PAGE_SIZE);
134 /* free the super page */
135 if ( xc_domain_memory_decrease_reservation(xc_handle, dom, 1,
136 SUPERPAGE_PFN_SHIFT, &start_pfn) != 0 )
137 {
138 ERROR("free 2M page failure @ 0x%ld.\n", next_pfn);
139 rc = 1;
140 goto out;
141 }
143 start_pfn = next_pfn & ~(SUPERPAGE_NR_PFNS - 1);
144 for ( i = start_pfn; i < start_pfn + SUPERPAGE_NR_PFNS; i++ )
145 {
146 ctx->p2m[i] = INVALID_P2M_ENTRY;
147 }
149 for ( i = start_pfn; i < start_pfn + tot_pfns; i++ )
150 {
151 mfn = i;
152 if (xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0,
153 0, &mfn) != 0)
154 {
155 ERROR("Failed to allocate physical memory.!\n");
156 errno = ENOMEM;
157 rc = 1;
158 goto out;
159 }
160 ctx->p2m[i] = mfn;
161 }
163 /* restore contents */
164 for ( i = 0; i < tot_pfns; i++ )
165 {
166 page_array[i] = start_pfn + i;
167 }
169 ram_base = xc_map_foreign_pages(xc_handle, dom, PROT_WRITE,
170 page_array, tot_pfns);
171 if ( ram_base == NULL )
172 {
173 ERROR("map batch failed\n");
174 rc = 1;
175 goto out;
176 }
178 memcpy(ram_base, save_buf, tot_pfns * PAGE_SIZE);
179 munmap(ram_base, tot_pfns * PAGE_SIZE);
181 out:
182 free(page_array);
183 free(save_buf);
184 return rc;
185 }
188 /*
189 * According to pfn list allocate pages: one 2M page or series of 4K pages.
190 * Also optimistically allocate a 2M page even when not all pages in the 2M
191 * extent come out, and fix it up in next batch:
192 * If new pages fit the missing one in the 2M extent, do nothing; Else take
193 * place of the original 2M page by some 4K pages.
194 */
195 static int allocate_mfn_list(int xc_handle,
196 uint32_t dom,
197 struct restore_ctx *ctx,
198 unsigned long nr_extents,
199 xen_pfn_t *batch_buf,
200 xen_pfn_t *next_pfn,
201 int superpages)
202 {
203 unsigned int i;
204 unsigned long mfn, pfn, sp_pfn;
206 /*Check if force super page, then clear it */
207 unsigned force_super_page = !!(*next_pfn & FORCE_SP_MASK);
208 *next_pfn &= ~FORCE_SP_MASK;
210 sp_pfn = *next_pfn;
212 if ( !superpages ||
213 ctx->no_superpage_mem ||
214 !SUPER_PAGE_TRACKING(sp_pfn) )
215 goto normal_page;
217 if ( !batch_buf )
218 {
219 /* Break previous 2M page, if 512 pages split across a batch boundary */
220 if ( SUPER_PAGE_TRACKING(sp_pfn) &&
221 !SUPER_PAGE_DONE(sp_pfn))
222 {
223 /* break previously allocated super page*/
224 if ( break_super_page(xc_handle, dom, ctx, sp_pfn) != 0 )
225 {
226 ERROR("Break previous super page fail!\n");
227 return 1;
228 }
229 }
231 /* follwing pages fit the order in 2M extent */
232 return 0;
233 }
235 /*
236 * We try to allocate a 2M page only when:
237 * user require this(superpages),
238 * AND have enough memory,
239 * AND is in the tracking,
240 * AND tracked all pages in 2M extent, OR partial 2M extent for speculation
241 * AND any page in 2M extent are not populated
242 */
243 if ( !SUPER_PAGE_DONE(sp_pfn) && !force_super_page )
244 goto normal_page;
246 pfn = batch_buf[0] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
247 if ( super_page_populated(ctx, pfn) )
248 goto normal_page;
250 pfn &= ~(SUPERPAGE_NR_PFNS - 1);
251 mfn = pfn;
253 if ( xc_domain_memory_populate_physmap(xc_handle, dom, 1,
254 SUPERPAGE_PFN_SHIFT, 0, &mfn) == 0)
255 {
256 for ( i = pfn; i < pfn + SUPERPAGE_NR_PFNS; i++, mfn++ )
257 {
258 ctx->p2m[i] = mfn;
259 }
260 return 0;
261 }
262 DPRINTF("No 2M page available for pfn 0x%lx, fall back to 4K page.\n",
263 pfn);
264 ctx->no_superpage_mem = 1;
266 normal_page:
267 if ( !batch_buf )
268 return 0;
270 /* End the tracking, if want a 2M page but end by 4K pages, */
271 *next_pfn = INVALID_SUPER_PAGE;
273 for ( i = 0; i < nr_extents; i++ )
274 {
275 unsigned long pagetype = batch_buf[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
276 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
277 continue;
279 pfn = mfn = batch_buf[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
280 if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY )
281 {
282 if (xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0,
283 0, &mfn) != 0)
284 {
285 ERROR("Failed to allocate physical memory.! pfn=0x%lx, mfn=0x%lx.\n",
286 pfn, mfn);
287 errno = ENOMEM;
288 return 1;
289 }
290 ctx->p2m[pfn] = mfn;
291 }
292 }
294 return 0;
295 }
297 static int allocate_physmem(int xc_handle, uint32_t dom,
298 struct restore_ctx *ctx,
299 unsigned long *region_pfn_type, int region_size,
300 unsigned int hvm, xen_pfn_t *region_mfn, int superpages)
301 {
302 int i;
303 unsigned long pfn;
304 unsigned long pagetype;
306 /* Next expected pfn in order to track a possible 2M page */
307 static unsigned long required_pfn = INVALID_SUPER_PAGE;
309 /* Buffer of pfn list for 2M page, or series of 4K pages */
310 xen_pfn_t *batch_buf;
311 unsigned int batch_buf_len;
312 struct domain_info_context *dinfo = &ctx->dinfo;
314 if ( !superpages )
315 {
316 batch_buf = &region_pfn_type[0];
317 batch_buf_len = region_size;
318 goto alloc_page;
319 }
321 batch_buf = NULL;
322 batch_buf_len = 0;
323 /* This loop tracks the possible 2M page */
324 for (i = 0; i < region_size; i++)
325 {
326 pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
327 pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
329 if (pagetype == XEN_DOMCTL_PFINFO_XTAB)
330 {
331 /* Do not start collecting pfns until get a valid pfn */
332 if ( batch_buf_len != 0 )
333 batch_buf_len++;
334 continue;
335 }
337 if ( SUPER_PAGE_START(pfn) )
338 {
339 /* Start of a 2M extent, populate previsous buf */
340 if ( allocate_mfn_list(xc_handle, dom, ctx,
341 batch_buf_len, batch_buf,
342 &required_pfn, superpages) != 0 )
343 {
344 errno = ENOMEM;
345 return 1;
346 }
348 /* start new tracking for 2M page */
349 batch_buf = &region_pfn_type[i];
350 batch_buf_len = 1;
351 required_pfn = pfn + 1;
352 }
353 else if ( pfn == required_pfn )
354 {
355 /* this page fit the 2M extent in order */
356 batch_buf_len++;
357 required_pfn++;
358 }
359 else if ( SUPER_PAGE_TRACKING(required_pfn) )
360 {
361 /* break of a 2M extent, populate previous buf */
362 if ( allocate_mfn_list(xc_handle, dom, ctx,
363 batch_buf_len, batch_buf,
364 &required_pfn, superpages) != 0 )
365 {
366 errno = ENOMEM;
367 return 1;
368 }
369 /* start new tracking for a series of 4K pages */
370 batch_buf = &region_pfn_type[i];
371 batch_buf_len = 1;
372 required_pfn = INVALID_SUPER_PAGE;
373 }
374 else
375 {
376 /* this page is 4K */
377 if ( !batch_buf )
378 batch_buf = &region_pfn_type[i];
379 batch_buf_len++;
380 }
381 }
383 /*
384 * populate rest batch_buf in the end.
385 * In a speculative way, we allocate a 2M page even when not see all the
386 * pages in order(set bit 31). If not require super page support,
387 * we can skip the tracking loop and come here directly.
388 * Speculative allocation can't be used for PV guest, as we have no mfn to
389 * map previous 2M mem range if need break it.
390 */
391 if ( SUPER_PAGE_TRACKING(required_pfn) &&
392 !SUPER_PAGE_DONE(required_pfn) )
393 {
394 if (hvm)
395 required_pfn |= FORCE_SP_MASK;
396 else
397 required_pfn = INVALID_SUPER_PAGE;
398 }
400 alloc_page:
401 if ( batch_buf )
402 {
403 if ( allocate_mfn_list(xc_handle, dom, ctx,
404 batch_buf_len, batch_buf,
405 &required_pfn,
406 superpages) != 0 )
407 {
408 errno = ENOMEM;
409 return 1;
410 }
411 }
413 for (i = 0; i < region_size; i++)
414 {
415 pfn = region_pfn_type[i] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
416 pagetype = region_pfn_type[i] & XEN_DOMCTL_PFINFO_LTAB_MASK;
418 if ( pfn > dinfo->p2m_size )
419 {
420 ERROR("pfn out of range");
421 return 1;
422 }
423 if (pagetype == XEN_DOMCTL_PFINFO_XTAB)
424 {
425 region_mfn[i] = ~0UL;
426 }
427 else
428 {
429 if (ctx->p2m[pfn] == INVALID_P2M_ENTRY)
430 {
431 DPRINTF("Warning: pfn 0x%lx are not allocated!\n", pfn);
432 /*XXX:allocate this page?*/
433 }
435 /* setup region_mfn[] for batch map.
436 * For HVM guests, this interface takes PFNs, not MFNs */
437 region_mfn[i] = hvm ? pfn : ctx->p2m[pfn];
438 }
439 }
440 return 0;
441 }
444 /* set when a consistent image is available */
445 static int completed = 0;
447 #define HEARTBEAT_MS 500
449 #ifndef __MINIOS__
450 static ssize_t read_exact_timed(int fd, void* buf, size_t size)
451 {
452 size_t offset = 0;
453 ssize_t len;
454 struct timeval tv;
455 fd_set rfds;
457 while ( offset < size )
458 {
459 if ( completed ) {
460 /* expect a heartbeat every HEARBEAT_MS ms maximum */
461 tv.tv_sec = 0;
462 tv.tv_usec = HEARTBEAT_MS * 1000;
464 FD_ZERO(&rfds);
465 FD_SET(fd, &rfds);
466 len = select(fd + 1, &rfds, NULL, NULL, &tv);
467 if ( !FD_ISSET(fd, &rfds) ) {
468 fprintf(stderr, "read_exact_timed failed (select returned %zd)\n", len);
469 return -1;
470 }
471 }
473 len = read(fd, buf + offset, size - offset);
474 if ( (len == -1) && ((errno == EINTR) || (errno == EAGAIN)) )
475 continue;
476 if ( len <= 0 )
477 return -1;
478 offset += len;
479 }
481 return 0;
482 }
484 #define read_exact read_exact_timed
486 #else
487 #define read_exact_timed read_exact
488 #endif
489 /*
490 ** In the state file (or during transfer), all page-table pages are
491 ** converted into a 'canonical' form where references to actual mfns
492 ** are replaced with references to the corresponding pfns.
493 ** This function inverts that operation, replacing the pfn values with
494 ** the (now known) appropriate mfn values.
495 */
496 static int uncanonicalize_pagetable(int xc_handle, uint32_t dom, struct restore_ctx *ctx,
497 void *page, int superpages)
498 {
499 int i, pte_last;
500 unsigned long pfn;
501 uint64_t pte;
502 struct domain_info_context *dinfo = &ctx->dinfo;
504 pte_last = PAGE_SIZE / ((ctx->pt_levels == 2)? 4 : 8);
506 for ( i = 0; i < pte_last; i++ )
507 {
508 if ( ctx->pt_levels == 2 )
509 pte = ((uint32_t *)page)[i];
510 else
511 pte = ((uint64_t *)page)[i];
513 /* XXX SMH: below needs fixing for PROT_NONE etc */
514 if ( !(pte & _PAGE_PRESENT) )
515 continue;
517 pfn = (pte >> PAGE_SHIFT) & MFN_MASK_X86;
519 /* Allocate mfn if necessary */
520 if ( ctx->p2m[pfn] == INVALID_P2M_ENTRY )
521 {
522 unsigned long force_pfn = superpages ? FORCE_SP_MASK : pfn;
523 if (allocate_mfn_list(xc_handle, dom, ctx,
524 1, &pfn, &force_pfn, superpages) != 0)
525 return 0;
526 }
527 pte &= ~MADDR_MASK_X86;
528 pte |= (uint64_t)ctx->p2m[pfn] << PAGE_SHIFT;
530 if ( ctx->pt_levels == 2 )
531 ((uint32_t *)page)[i] = (uint32_t)pte;
532 else
533 ((uint64_t *)page)[i] = (uint64_t)pte;
534 }
536 return 1;
537 }
540 /* Load the p2m frame list, plus potential extended info chunk */
541 static xen_pfn_t *load_p2m_frame_list(struct restore_ctx *ctx,
542 int io_fd, int *pae_extended_cr3, int *ext_vcpucontext)
543 {
544 xen_pfn_t *p2m_frame_list;
545 vcpu_guest_context_any_t ctxt;
546 xen_pfn_t p2m_fl_zero;
547 struct domain_info_context *dinfo = &ctx->dinfo;
549 /* Read first entry of P2M list, or extended-info signature (~0UL). */
550 if ( read_exact(io_fd, &p2m_fl_zero, sizeof(long)) )
551 {
552 ERROR("read extended-info signature failed");
553 return NULL;
554 }
556 if ( p2m_fl_zero == ~0UL )
557 {
558 uint32_t tot_bytes;
560 /* Next 4 bytes: total size of following extended info. */
561 if ( read_exact(io_fd, &tot_bytes, sizeof(tot_bytes)) )
562 {
563 ERROR("read extended-info size failed");
564 return NULL;
565 }
567 while ( tot_bytes )
568 {
569 uint32_t chunk_bytes;
570 char chunk_sig[4];
572 /* 4-character chunk signature + 4-byte remaining chunk size. */
573 if ( read_exact(io_fd, chunk_sig, sizeof(chunk_sig)) ||
574 read_exact(io_fd, &chunk_bytes, sizeof(chunk_bytes)) ||
575 (tot_bytes < (chunk_bytes + 8)) )
576 {
577 ERROR("read extended-info chunk signature failed");
578 return NULL;
579 }
580 tot_bytes -= 8;
582 /* VCPU context structure? */
583 if ( !strncmp(chunk_sig, "vcpu", 4) )
584 {
585 /* Pick a guest word-size and PT depth from the ctxt size */
586 if ( chunk_bytes == sizeof (ctxt.x32) )
587 {
588 dinfo->guest_width = 4;
589 if ( ctx->pt_levels > 2 )
590 ctx->pt_levels = 3;
591 }
592 else if ( chunk_bytes == sizeof (ctxt.x64) )
593 {
594 dinfo->guest_width = 8;
595 ctx->pt_levels = 4;
596 }
597 else
598 {
599 ERROR("bad extended-info context size %d", chunk_bytes);
600 return NULL;
601 }
603 if ( read_exact(io_fd, &ctxt, chunk_bytes) )
604 {
605 ERROR("read extended-info vcpu context failed");
606 return NULL;
607 }
608 tot_bytes -= chunk_bytes;
609 chunk_bytes = 0;
611 if ( GET_FIELD(&ctxt, vm_assist)
612 & (1UL << VMASST_TYPE_pae_extended_cr3) )
613 *pae_extended_cr3 = 1;
614 }
615 else if ( !strncmp(chunk_sig, "extv", 4) )
616 {
617 *ext_vcpucontext = 1;
618 }
620 /* Any remaining bytes of this chunk: read and discard. */
621 while ( chunk_bytes )
622 {
623 unsigned long sz = MIN(chunk_bytes, sizeof(xen_pfn_t));
624 if ( read_exact(io_fd, &p2m_fl_zero, sz) )
625 {
626 ERROR("read-and-discard extended-info chunk bytes failed");
627 return NULL;
628 }
629 chunk_bytes -= sz;
630 tot_bytes -= sz;
631 }
632 }
634 /* Now read the real first entry of P2M list. */
635 if ( read_exact(io_fd, &p2m_fl_zero, sizeof(xen_pfn_t)) )
636 {
637 ERROR("read first entry of p2m_frame_list failed");
638 return NULL;
639 }
640 }
642 /* Now that we know the guest's word-size, can safely allocate
643 * the p2m frame list */
644 if ( (p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) == NULL )
645 {
646 ERROR("Couldn't allocate p2m_frame_list array");
647 return NULL;
648 }
650 /* First entry has already been read. */
651 p2m_frame_list[0] = p2m_fl_zero;
652 if ( read_exact(io_fd, &p2m_frame_list[1],
653 (P2M_FL_ENTRIES - 1) * sizeof(xen_pfn_t)) )
654 {
655 ERROR("read p2m_frame_list failed");
656 return NULL;
657 }
659 return p2m_frame_list;
660 }
662 typedef struct {
663 int ishvm;
664 union {
665 struct tailbuf_pv {
666 unsigned int pfncount;
667 unsigned long* pfntab;
668 unsigned int vcpucount;
669 unsigned char* vcpubuf;
670 unsigned char shared_info_page[PAGE_SIZE];
671 } pv;
672 struct tailbuf_hvm {
673 uint64_t magicpfns[3];
674 uint32_t hvmbufsize, reclen;
675 uint8_t* hvmbuf;
676 struct {
677 uint32_t magic;
678 uint32_t version;
679 uint64_t len;
680 } qemuhdr;
681 uint32_t qemubufsize;
682 uint8_t* qemubuf;
683 } hvm;
684 } u;
685 } tailbuf_t;
687 /* read stream until EOF, growing buffer as necssary */
688 static int compat_buffer_qemu(int fd, struct tailbuf_hvm *buf)
689 {
690 uint8_t *qbuf, *tmp;
691 int blen = 0, dlen = 0;
692 int rc;
694 /* currently save records tend to be about 7K */
695 blen = 8192;
696 if ( !(qbuf = malloc(blen)) ) {
697 ERROR("Error allocating QEMU buffer");
698 return -1;
699 }
701 while( (rc = read(fd, qbuf+dlen, blen-dlen)) > 0 ) {
702 DPRINTF("Read %d bytes of QEMU data\n", rc);
703 dlen += rc;
705 if (dlen == blen) {
706 DPRINTF("%d-byte QEMU buffer full, reallocating...\n", dlen);
707 blen += 4096;
708 tmp = realloc(qbuf, blen);
709 if ( !tmp ) {
710 ERROR("Error growing QEMU buffer to %d bytes", blen);
711 free(qbuf);
712 return -1;
713 }
714 qbuf = tmp;
715 }
716 }
718 if ( rc < 0 ) {
719 ERROR("Error reading QEMU data");
720 free(qbuf);
721 return -1;
722 }
724 if ( memcmp(qbuf, "QEVM", 4) ) {
725 ERROR("Invalid QEMU magic: 0x%08x", *(unsigned long*)qbuf);
726 free(qbuf);
727 return -1;
728 }
730 buf->qemubuf = qbuf;
731 buf->qemubufsize = dlen;
733 return 0;
734 }
736 static int buffer_qemu(int fd, struct tailbuf_hvm *buf)
737 {
738 uint32_t qlen;
739 uint8_t *tmp;
741 if ( read_exact(fd, &qlen, sizeof(qlen)) ) {
742 ERROR("Error reading QEMU header length");
743 return -1;
744 }
746 if ( qlen > buf->qemubufsize ) {
747 if ( buf->qemubuf) {
748 tmp = realloc(buf->qemubuf, qlen);
749 if ( tmp )
750 buf->qemubuf = tmp;
751 else {
752 ERROR("Error reallocating QEMU state buffer");
753 return -1;
754 }
755 } else {
756 buf->qemubuf = malloc(qlen);
757 if ( !buf->qemubuf ) {
758 ERROR("Error allocating QEMU state buffer");
759 return -1;
760 }
761 }
762 }
763 buf->qemubufsize = qlen;
765 if ( read_exact(fd, buf->qemubuf, buf->qemubufsize) ) {
766 ERROR("Error reading QEMU state");
767 return -1;
768 }
770 return 0;
771 }
773 static int dump_qemu(uint32_t dom, struct tailbuf_hvm *buf)
774 {
775 int saved_errno;
776 char path[256];
777 FILE *fp;
779 sprintf(path, "/var/lib/xen/qemu-save.%u", dom);
780 fp = fopen(path, "wb");
781 if ( !fp )
782 return -1;
784 DPRINTF("Writing %d bytes of QEMU data\n", buf->qemubufsize);
785 if ( fwrite(buf->qemubuf, 1, buf->qemubufsize, fp) != buf->qemubufsize) {
786 saved_errno = errno;
787 fclose(fp);
788 errno = saved_errno;
789 return -1;
790 }
792 fclose(fp);
794 return 0;
795 }
797 static int buffer_tail_hvm(struct restore_ctx *ctx, struct tailbuf_hvm *buf, int fd,
798 unsigned int max_vcpu_id, uint64_t vcpumap,
799 int ext_vcpucontext)
800 {
801 uint8_t *tmp;
802 unsigned char qemusig[21];
804 if ( read_exact(fd, buf->magicpfns, sizeof(buf->magicpfns)) ) {
805 ERROR("Error reading magic PFNs");
806 return -1;
807 }
809 if ( read_exact(fd, &buf->reclen, sizeof(buf->reclen)) ) {
810 ERROR("Error reading HVM params size");
811 return -1;
812 }
814 if ( buf->reclen > buf->hvmbufsize ) {
815 if ( buf->hvmbuf) {
816 tmp = realloc(buf->hvmbuf, buf->reclen);
817 if ( tmp ) {
818 buf->hvmbuf = tmp;
819 buf->hvmbufsize = buf->reclen;
820 } else {
821 ERROR("Error reallocating HVM param buffer");
822 return -1;
823 }
824 } else {
825 buf->hvmbuf = malloc(buf->reclen);
826 if ( !buf->hvmbuf ) {
827 ERROR("Error allocating HVM param buffer");
828 return -1;
829 }
830 buf->hvmbufsize = buf->reclen;
831 }
832 }
834 if ( read_exact(fd, buf->hvmbuf, buf->reclen) ) {
835 ERROR("Error reading HVM params");
836 return -1;
837 }
839 if ( read_exact(fd, qemusig, sizeof(qemusig)) ) {
840 ERROR("Error reading QEMU signature");
841 return -1;
842 }
844 /* The normal live-migration QEMU record has no length information.
845 * Short of reimplementing the QEMU parser, we're forced to just read
846 * until EOF. Remus gets around this by sending a different signature
847 * which includes a length prefix */
848 if ( !memcmp(qemusig, "QemuDeviceModelRecord", sizeof(qemusig)) )
849 return compat_buffer_qemu(fd, buf);
850 else if ( !memcmp(qemusig, "RemusDeviceModelState", sizeof(qemusig)) )
851 return buffer_qemu(fd, buf);
853 qemusig[20] = '\0';
854 ERROR("Invalid QEMU signature: %s", qemusig);
855 return -1;
856 }
858 static int buffer_tail_pv(struct restore_ctx *ctx, struct tailbuf_pv *buf, int fd,
859 unsigned int max_vcpu_id, uint64_t vcpumap,
860 int ext_vcpucontext)
861 {
862 unsigned int i;
863 size_t pfnlen, vcpulen;
864 struct domain_info_context *dinfo = &ctx->dinfo;
866 /* TODO: handle changing pfntab and vcpu counts */
867 /* PFN tab */
868 if ( read_exact(fd, &buf->pfncount, sizeof(buf->pfncount)) ||
869 (buf->pfncount > (1U << 28)) ) /* up to 1TB of address space */
870 {
871 ERROR("Error when reading pfn count");
872 return -1;
873 }
874 pfnlen = sizeof(unsigned long) * buf->pfncount;
875 if ( !(buf->pfntab) ) {
876 if ( !(buf->pfntab = malloc(pfnlen)) ) {
877 ERROR("Error allocating PFN tail buffer");
878 return -1;
879 }
880 }
881 // DPRINTF("Reading PFN tab: %d bytes\n", pfnlen);
882 if ( read_exact(fd, buf->pfntab, pfnlen) ) {
883 ERROR("Error when reading pfntab");
884 goto free_pfntab;
885 }
887 /* VCPU contexts */
888 buf->vcpucount = 0;
889 for (i = 0; i <= max_vcpu_id; i++) {
890 // DPRINTF("vcpumap: %llx, cpu: %d, bit: %llu\n", vcpumap, i, (vcpumap % (1ULL << i)));
891 if ( (!(vcpumap & (1ULL << i))) )
892 continue;
893 buf->vcpucount++;
894 }
895 // DPRINTF("VCPU count: %d\n", buf->vcpucount);
896 vcpulen = ((dinfo->guest_width == 8) ? sizeof(vcpu_guest_context_x86_64_t)
897 : sizeof(vcpu_guest_context_x86_32_t)) * buf->vcpucount;
898 if ( ext_vcpucontext )
899 vcpulen += 128 * buf->vcpucount;
901 if ( !(buf->vcpubuf) ) {
902 if ( !(buf->vcpubuf = malloc(vcpulen)) ) {
903 ERROR("Error allocating VCPU ctxt tail buffer");
904 goto free_pfntab;
905 }
906 }
907 // DPRINTF("Reading VCPUS: %d bytes\n", vcpulen);
908 if ( read_exact(fd, buf->vcpubuf, vcpulen) ) {
909 ERROR("Error when reading ctxt");
910 goto free_vcpus;
911 }
913 /* load shared_info_page */
914 // DPRINTF("Reading shared info: %lu bytes\n", PAGE_SIZE);
915 if ( read_exact(fd, buf->shared_info_page, PAGE_SIZE) ) {
916 ERROR("Error when reading shared info page");
917 goto free_vcpus;
918 }
920 return 0;
922 free_vcpus:
923 if (buf->vcpubuf) {
924 free (buf->vcpubuf);
925 buf->vcpubuf = NULL;
926 }
927 free_pfntab:
928 if (buf->pfntab) {
929 free (buf->pfntab);
930 buf->pfntab = NULL;
931 }
933 return -1;
934 }
936 static int buffer_tail(struct restore_ctx *ctx, tailbuf_t *buf, int fd, unsigned int max_vcpu_id,
937 uint64_t vcpumap, int ext_vcpucontext)
938 {
939 if ( buf->ishvm )
940 return buffer_tail_hvm(ctx, &buf->u.hvm, fd, max_vcpu_id, vcpumap,
941 ext_vcpucontext);
942 else
943 return buffer_tail_pv(ctx, &buf->u.pv, fd, max_vcpu_id, vcpumap,
944 ext_vcpucontext);
945 }
947 static void tailbuf_free_hvm(struct tailbuf_hvm *buf)
948 {
949 if ( buf->hvmbuf ) {
950 free(buf->hvmbuf);
951 buf->hvmbuf = NULL;
952 }
953 if ( buf->qemubuf ) {
954 free(buf->qemubuf);
955 buf->qemubuf = NULL;
956 }
957 }
959 static void tailbuf_free_pv(struct tailbuf_pv *buf)
960 {
961 if ( buf->vcpubuf ) {
962 free(buf->vcpubuf);
963 buf->vcpubuf = NULL;
964 }
965 if ( buf->pfntab ) {
966 free(buf->pfntab);
967 buf->pfntab = NULL;
968 }
969 }
971 static void tailbuf_free(tailbuf_t *buf)
972 {
973 if ( buf->ishvm )
974 tailbuf_free_hvm(&buf->u.hvm);
975 else
976 tailbuf_free_pv(&buf->u.pv);
977 }
979 typedef struct {
980 void* pages;
981 /* pages is of length nr_physpages, pfn_types is of length nr_pages */
982 unsigned int nr_physpages, nr_pages;
984 /* Types of the pfns in the current region */
985 unsigned long* pfn_types;
987 int verify;
989 int new_ctxt_format;
990 int max_vcpu_id;
991 uint64_t vcpumap;
992 uint64_t identpt;
993 uint64_t vm86_tss;
994 } pagebuf_t;
996 static int pagebuf_init(pagebuf_t* buf)
997 {
998 memset(buf, 0, sizeof(*buf));
999 return 0;
1002 static void pagebuf_free(pagebuf_t* buf)
1004 if (buf->pages) {
1005 free(buf->pages);
1006 buf->pages = NULL;
1008 if(buf->pfn_types) {
1009 free(buf->pfn_types);
1010 buf->pfn_types = NULL;
1014 static int pagebuf_get_one(pagebuf_t* buf, int fd, int xch, uint32_t dom)
1016 int count, countpages, oldcount, i;
1017 void* ptmp;
1019 if ( read_exact(fd, &count, sizeof(count)) )
1021 ERROR("Error when reading batch size");
1022 return -1;
1025 // DPRINTF("reading batch of %d pages\n", count);
1027 if (!count) {
1028 // DPRINTF("Last batch read\n");
1029 return 0;
1030 } else if (count == -1) {
1031 DPRINTF("Entering page verify mode\n");
1032 buf->verify = 1;
1033 return pagebuf_get_one(buf, fd, xch, dom);
1034 } else if (count == -2) {
1035 buf->new_ctxt_format = 1;
1036 if ( read_exact(fd, &buf->max_vcpu_id, sizeof(buf->max_vcpu_id)) ||
1037 buf->max_vcpu_id >= 64 || read_exact(fd, &buf->vcpumap,
1038 sizeof(uint64_t)) ) {
1039 ERROR("Error when reading max_vcpu_id");
1040 return -1;
1042 // DPRINTF("Max VCPU ID: %d, vcpumap: %llx\n", buf->max_vcpu_id, buf->vcpumap);
1043 return pagebuf_get_one(buf, fd, xch, dom);
1044 } else if (count == -3) {
1045 /* Skip padding 4 bytes then read the EPT identity PT location. */
1046 if ( read_exact(fd, &buf->identpt, sizeof(uint32_t)) ||
1047 read_exact(fd, &buf->identpt, sizeof(uint64_t)) )
1049 ERROR("error read the address of the EPT identity map");
1050 return -1;
1052 // DPRINTF("EPT identity map address: %llx\n", buf->identpt);
1053 return pagebuf_get_one(buf, fd, xch, dom);
1054 } else if ( count == -4 ) {
1055 /* Skip padding 4 bytes then read the vm86 TSS location. */
1056 if ( read_exact(fd, &buf->vm86_tss, sizeof(uint32_t)) ||
1057 read_exact(fd, &buf->vm86_tss, sizeof(uint64_t)) )
1059 ERROR("error read the address of the vm86 TSS");
1060 return -1;
1062 // DPRINTF("VM86 TSS location: %llx\n", buf->vm86_tss);
1063 return pagebuf_get_one(buf, fd, xch, dom);
1064 } else if ( count == -5 ) {
1065 DPRINTF("xc_domain_restore start tmem\n");
1066 if ( xc_tmem_restore(xch, dom, fd) ) {
1067 ERROR("error reading/restoring tmem");
1068 return -1;
1070 return pagebuf_get_one(buf, fd, xch, dom);
1072 else if ( count == -6 ) {
1073 if ( xc_tmem_restore_extra(xch, dom, fd) ) {
1074 ERROR("error reading/restoring tmem extra");
1075 return -1;
1077 return pagebuf_get_one(buf, fd, xch, dom);
1078 } else if ( count == -7 ) {
1079 uint32_t tsc_mode, khz, incarn;
1080 uint64_t nsec;
1081 if ( read_exact(fd, &tsc_mode, sizeof(uint32_t)) ||
1082 read_exact(fd, &nsec, sizeof(uint64_t)) ||
1083 read_exact(fd, &khz, sizeof(uint32_t)) ||
1084 read_exact(fd, &incarn, sizeof(uint32_t)) ||
1085 xc_domain_set_tsc_info(xch, dom, tsc_mode, nsec, khz, incarn) ) {
1086 ERROR("error reading/restoring tsc info");
1087 return -1;
1089 return pagebuf_get_one(buf, fd, xch, dom);
1090 } else if ( (count > MAX_BATCH_SIZE) || (count < 0) ) {
1091 ERROR("Max batch size exceeded (%d). Giving up.", count);
1092 return -1;
1095 oldcount = buf->nr_pages;
1096 buf->nr_pages += count;
1097 if (!buf->pfn_types) {
1098 if (!(buf->pfn_types = malloc(buf->nr_pages * sizeof(*(buf->pfn_types))))) {
1099 ERROR("Could not allocate PFN type buffer");
1100 return -1;
1102 } else {
1103 if (!(ptmp = realloc(buf->pfn_types, buf->nr_pages * sizeof(*(buf->pfn_types))))) {
1104 ERROR("Could not reallocate PFN type buffer");
1105 return -1;
1107 buf->pfn_types = ptmp;
1109 if ( read_exact(fd, buf->pfn_types + oldcount, count * sizeof(*(buf->pfn_types)))) {
1110 ERROR("Error when reading region pfn types");
1111 return -1;
1114 countpages = count;
1115 for (i = oldcount; i < buf->nr_pages; ++i)
1116 if ((buf->pfn_types[i] & XEN_DOMCTL_PFINFO_LTAB_MASK) == XEN_DOMCTL_PFINFO_XTAB)
1117 --countpages;
1119 if (!countpages)
1120 return count;
1122 oldcount = buf->nr_physpages;
1123 buf->nr_physpages += countpages;
1124 if (!buf->pages) {
1125 if (!(buf->pages = malloc(buf->nr_physpages * PAGE_SIZE))) {
1126 ERROR("Could not allocate page buffer");
1127 return -1;
1129 } else {
1130 if (!(ptmp = realloc(buf->pages, buf->nr_physpages * PAGE_SIZE))) {
1131 ERROR("Could not reallocate page buffer");
1132 return -1;
1134 buf->pages = ptmp;
1136 if ( read_exact(fd, buf->pages + oldcount * PAGE_SIZE, countpages * PAGE_SIZE) ) {
1137 ERROR("Error when reading pages");
1138 return -1;
1141 return count;
1144 static int pagebuf_get(pagebuf_t* buf, int fd, int xch, uint32_t dom)
1146 int rc;
1148 buf->nr_physpages = buf->nr_pages = 0;
1150 do {
1151 rc = pagebuf_get_one(buf, fd, xch, dom);
1152 } while (rc > 0);
1154 if (rc < 0)
1155 pagebuf_free(buf);
1157 return rc;
1160 static int apply_batch(int xc_handle, uint32_t dom, struct restore_ctx *ctx,
1161 xen_pfn_t* region_mfn, unsigned long* pfn_type, int pae_extended_cr3,
1162 unsigned int hvm, struct xc_mmu* mmu,
1163 pagebuf_t* pagebuf, int curbatch, int superpages)
1165 int i, j, curpage;
1166 /* used by debug verify code */
1167 unsigned long buf[PAGE_SIZE/sizeof(unsigned long)];
1168 /* Our mapping of the current region (batch) */
1169 char *region_base;
1170 /* A temporary mapping, and a copy, of one frame of guest memory. */
1171 unsigned long *page = NULL;
1172 int nraces = 0;
1173 struct domain_info_context *dinfo = &ctx->dinfo;
1175 unsigned long mfn, pfn, pagetype;
1177 j = pagebuf->nr_pages - curbatch;
1178 if (j > MAX_BATCH_SIZE)
1179 j = MAX_BATCH_SIZE;
1181 if (allocate_physmem(xc_handle, dom, ctx, &pagebuf->pfn_types[curbatch],
1182 j, hvm, region_mfn, superpages) != 0)
1184 ERROR("allocate_physmem() failed\n");
1185 return -1;
1188 /* Map relevant mfns */
1189 region_base = xc_map_foreign_pages(
1190 xc_handle, dom, PROT_WRITE, region_mfn, j);
1192 if ( region_base == NULL )
1194 ERROR("map batch failed");
1195 return -1;
1198 for ( i = 0, curpage = -1; i < j; i++ )
1200 pfn = pagebuf->pfn_types[i + curbatch] & ~XEN_DOMCTL_PFINFO_LTAB_MASK;
1201 pagetype = pagebuf->pfn_types[i + curbatch] & XEN_DOMCTL_PFINFO_LTAB_MASK;
1203 if ( pagetype == XEN_DOMCTL_PFINFO_XTAB )
1204 /* a bogus/unmapped page: skip it */
1205 continue;
1207 ++curpage;
1209 if ( pfn > dinfo->p2m_size )
1211 ERROR("pfn out of range");
1212 return -1;
1215 pfn_type[pfn] = pagetype;
1217 mfn = ctx->p2m[pfn];
1219 /* In verify mode, we use a copy; otherwise we work in place */
1220 page = pagebuf->verify ? (void *)buf : (region_base + i*PAGE_SIZE);
1222 memcpy(page, pagebuf->pages + (curpage + curbatch) * PAGE_SIZE, PAGE_SIZE);
1224 pagetype &= XEN_DOMCTL_PFINFO_LTABTYPE_MASK;
1226 if ( (pagetype >= XEN_DOMCTL_PFINFO_L1TAB) &&
1227 (pagetype <= XEN_DOMCTL_PFINFO_L4TAB) )
1229 /*
1230 ** A page table page - need to 'uncanonicalize' it, i.e.
1231 ** replace all the references to pfns with the corresponding
1232 ** mfns for the new domain.
1233 **
1234 ** On PAE we need to ensure that PGDs are in MFNs < 4G, and
1235 ** so we may need to update the p2m after the main loop.
1236 ** Hence we defer canonicalization of L1s until then.
1237 */
1238 if ((ctx->pt_levels != 3) ||
1239 pae_extended_cr3 ||
1240 (pagetype != XEN_DOMCTL_PFINFO_L1TAB)) {
1242 if (!uncanonicalize_pagetable(xc_handle, dom, ctx,
1243 page, superpages)) {
1244 /*
1245 ** Failing to uncanonicalize a page table can be ok
1246 ** under live migration since the pages type may have
1247 ** changed by now (and we'll get an update later).
1248 */
1249 DPRINTF("PT L%ld race on pfn=%08lx mfn=%08lx\n",
1250 pagetype >> 28, pfn, mfn);
1251 nraces++;
1252 continue;
1256 else if ( pagetype != XEN_DOMCTL_PFINFO_NOTAB )
1258 ERROR("Bogus page type %lx page table is out of range: "
1259 "i=%d p2m_size=%lu", pagetype, i, dinfo->p2m_size);
1260 return -1;
1263 if ( pagebuf->verify )
1265 int res = memcmp(buf, (region_base + i*PAGE_SIZE), PAGE_SIZE);
1266 if ( res )
1268 int v;
1270 DPRINTF("************** pfn=%lx type=%lx gotcs=%08lx "
1271 "actualcs=%08lx\n", pfn, pagebuf->pfn_types[pfn],
1272 csum_page(region_base + (i + curbatch)*PAGE_SIZE),
1273 csum_page(buf));
1275 for ( v = 0; v < 4; v++ )
1277 unsigned long *p = (unsigned long *)
1278 (region_base + i*PAGE_SIZE);
1279 if ( buf[v] != p[v] )
1280 DPRINTF(" %d: %08lx %08lx\n", v, buf[v], p[v]);
1285 if ( !hvm &&
1286 xc_add_mmu_update(xc_handle, mmu,
1287 (((unsigned long long)mfn) << PAGE_SHIFT)
1288 | MMU_MACHPHYS_UPDATE, pfn) )
1290 ERROR("failed machpys update mfn=%lx pfn=%lx", mfn, pfn);
1291 return -1;
1293 } /* end of 'batch' for loop */
1295 munmap(region_base, j*PAGE_SIZE);
1297 return nraces;
1300 int xc_domain_restore(int xc_handle, int io_fd, uint32_t dom,
1301 unsigned int store_evtchn, unsigned long *store_mfn,
1302 unsigned int console_evtchn, unsigned long *console_mfn,
1303 unsigned int hvm, unsigned int pae, int superpages)
1305 DECLARE_DOMCTL;
1306 int rc = 1, frc, i, j, n, m, pae_extended_cr3 = 0, ext_vcpucontext = 0;
1307 unsigned long mfn, pfn;
1308 unsigned int prev_pc, this_pc;
1309 int nraces = 0;
1311 /* The new domain's shared-info frame number. */
1312 unsigned long shared_info_frame;
1313 unsigned char shared_info_page[PAGE_SIZE]; /* saved contents from file */
1314 shared_info_any_t *old_shared_info =
1315 (shared_info_any_t *)shared_info_page;
1316 shared_info_any_t *new_shared_info;
1318 /* A copy of the CPU context of the guest. */
1319 vcpu_guest_context_any_t ctxt;
1321 /* A table containing the type of each PFN (/not/ MFN!). */
1322 unsigned long *pfn_type = NULL;
1324 /* A table of MFNs to map in the current region */
1325 xen_pfn_t *region_mfn = NULL;
1327 /* A copy of the pfn-to-mfn table frame list. */
1328 xen_pfn_t *p2m_frame_list = NULL;
1330 /* A temporary mapping of the guest's start_info page. */
1331 start_info_any_t *start_info;
1333 /* Our mapping of the current region (batch) */
1334 char *region_base;
1336 struct xc_mmu *mmu = NULL;
1338 struct mmuext_op pin[MAX_PIN_BATCH];
1339 unsigned int nr_pins;
1341 uint64_t vcpumap = 1ULL;
1342 unsigned int max_vcpu_id = 0;
1343 int new_ctxt_format = 0;
1345 pagebuf_t pagebuf;
1346 tailbuf_t tailbuf, tmptail;
1347 void* vcpup;
1349 static struct restore_ctx _ctx = {
1350 .live_p2m = NULL,
1351 .p2m = NULL,
1352 .no_superpage_mem = 0,
1353 };
1354 static struct restore_ctx *ctx = &_ctx;
1355 struct domain_info_context *dinfo = &ctx->dinfo;
1357 pagebuf_init(&pagebuf);
1358 memset(&tailbuf, 0, sizeof(tailbuf));
1359 tailbuf.ishvm = hvm;
1361 /* For info only */
1362 ctx->nr_pfns = 0;
1364 /* Always try to allocate 2M pages for HVM */
1365 if ( hvm )
1366 superpages = 1;
1368 if ( read_exact(io_fd, &dinfo->p2m_size, sizeof(unsigned long)) )
1370 ERROR("read: p2m_size");
1371 goto out;
1373 DPRINTF("xc_domain_restore start: p2m_size = %lx\n", dinfo->p2m_size);
1375 if ( !get_platform_info(xc_handle, dom,
1376 &ctx->max_mfn, &ctx->hvirt_start, &ctx->pt_levels, &dinfo->guest_width) )
1378 ERROR("Unable to get platform info.");
1379 return 1;
1382 /* The *current* word size of the guest isn't very interesting; for now
1383 * assume the guest will be the same as we are. We'll fix that later
1384 * if we discover otherwise. */
1385 dinfo->guest_width = sizeof(unsigned long);
1386 ctx->pt_levels = (dinfo->guest_width == 8) ? 4 : (ctx->pt_levels == 2) ? 2 : 3;
1388 if ( !hvm )
1390 /* Load the p2m frame list, plus potential extended info chunk */
1391 p2m_frame_list = load_p2m_frame_list(ctx,
1392 io_fd, &pae_extended_cr3, &ext_vcpucontext);
1393 if ( !p2m_frame_list )
1394 goto out;
1396 /* Now that we know the word size, tell Xen about it */
1397 memset(&domctl, 0, sizeof(domctl));
1398 domctl.domain = dom;
1399 domctl.cmd = XEN_DOMCTL_set_address_size;
1400 domctl.u.address_size.size = dinfo->guest_width * 8;
1401 frc = do_domctl(xc_handle, &domctl);
1402 if ( frc != 0 )
1404 ERROR("Unable to set guest address size.");
1405 goto out;
1409 /* We want zeroed memory so use calloc rather than malloc. */
1410 ctx->p2m = calloc(dinfo->p2m_size, sizeof(xen_pfn_t));
1411 pfn_type = calloc(dinfo->p2m_size, sizeof(unsigned long));
1413 region_mfn = xg_memalign(PAGE_SIZE, ROUNDUP(
1414 MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT));
1416 if ( (ctx->p2m == NULL) || (pfn_type == NULL) ||
1417 (region_mfn == NULL) )
1419 ERROR("memory alloc failed");
1420 errno = ENOMEM;
1421 goto out;
1424 memset(region_mfn, 0,
1425 ROUNDUP(MAX_BATCH_SIZE * sizeof(xen_pfn_t), PAGE_SHIFT));
1427 if ( lock_pages(region_mfn, sizeof(xen_pfn_t) * MAX_BATCH_SIZE) )
1429 ERROR("Could not lock region_mfn");
1430 goto out;
1433 /* Get the domain's shared-info frame. */
1434 domctl.cmd = XEN_DOMCTL_getdomaininfo;
1435 domctl.domain = (domid_t)dom;
1436 if ( xc_domctl(xc_handle, &domctl) < 0 )
1438 ERROR("Could not get information on new domain");
1439 goto out;
1441 shared_info_frame = domctl.u.getdomaininfo.shared_info_frame;
1443 /* Mark all PFNs as invalid; we allocate on demand */
1444 for ( pfn = 0; pfn < dinfo->p2m_size; pfn++ )
1445 ctx->p2m[pfn] = INVALID_P2M_ENTRY;
1447 mmu = xc_alloc_mmu_updates(xc_handle, dom);
1448 if ( mmu == NULL )
1450 ERROR("Could not initialise for MMU updates");
1451 goto out;
1454 DPRINTF("Reloading memory pages: 0%%\n");
1456 /*
1457 * Now simply read each saved frame into its new machine frame.
1458 * We uncanonicalise page tables as we go.
1459 */
1460 prev_pc = 0;
1462 n = m = 0;
1463 loadpages:
1464 for ( ; ; )
1466 int j, curbatch;
1468 this_pc = (n * 100) / dinfo->p2m_size;
1469 if ( (this_pc - prev_pc) >= 5 )
1471 PPRINTF("\b\b\b\b%3d%%", this_pc);
1472 prev_pc = this_pc;
1475 if ( !completed ) {
1476 pagebuf.nr_physpages = pagebuf.nr_pages = 0;
1477 if ( pagebuf_get_one(&pagebuf, io_fd, xc_handle, dom) < 0 ) {
1478 ERROR("Error when reading batch\n");
1479 goto out;
1482 j = pagebuf.nr_pages;
1484 PPRINTF("batch %d\n",j);
1486 if ( j == 0 ) {
1487 /* catch vcpu updates */
1488 if (pagebuf.new_ctxt_format) {
1489 vcpumap = pagebuf.vcpumap;
1490 max_vcpu_id = pagebuf.max_vcpu_id;
1492 /* should this be deferred? does it change? */
1493 if ( pagebuf.identpt )
1494 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT, pagebuf.identpt);
1495 if ( pagebuf.vm86_tss )
1496 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, pagebuf.vm86_tss);
1497 break; /* our work here is done */
1500 /* break pagebuf into batches */
1501 curbatch = 0;
1502 while ( curbatch < j ) {
1503 int brc;
1505 brc = apply_batch(xc_handle, dom, ctx, region_mfn, pfn_type,
1506 pae_extended_cr3, hvm, mmu, &pagebuf, curbatch, superpages);
1507 if ( brc < 0 )
1508 goto out;
1510 nraces += brc;
1512 curbatch += MAX_BATCH_SIZE;
1515 pagebuf.nr_physpages = pagebuf.nr_pages = 0;
1517 n += j; /* crude stats */
1519 /*
1520 * Discard cache for portion of file read so far up to last
1521 * page boundary every 16MB or so.
1522 */
1523 m += j;
1524 if ( m > MAX_PAGECACHE_USAGE )
1526 discard_file_cache(io_fd, 0 /* no flush */);
1527 m = 0;
1531 /*
1532 * Ensure we flush all machphys updates before potential PAE-specific
1533 * reallocations below.
1534 */
1535 if ( !hvm && xc_flush_mmu_updates(xc_handle, mmu) )
1537 ERROR("Error doing flush_mmu_updates()");
1538 goto out;
1541 // DPRINTF("Received all pages (%d races)\n", nraces);
1543 if ( !completed ) {
1544 int flags = 0;
1546 if ( buffer_tail(ctx, &tailbuf, io_fd, max_vcpu_id, vcpumap,
1547 ext_vcpucontext) < 0 ) {
1548 ERROR ("error buffering image tail");
1549 goto out;
1551 completed = 1;
1552 /* shift into nonblocking mode for the remainder */
1553 if ( (flags = fcntl(io_fd, F_GETFL,0)) < 0 )
1554 flags = 0;
1555 fcntl(io_fd, F_SETFL, flags | O_NONBLOCK);
1558 // DPRINTF("Buffered checkpoint\n");
1560 if ( pagebuf_get(&pagebuf, io_fd, xc_handle, dom) ) {
1561 ERROR("error when buffering batch, finishing\n");
1562 goto finish;
1564 memset(&tmptail, 0, sizeof(tmptail));
1565 tmptail.ishvm = hvm;
1566 if ( buffer_tail(ctx, &tmptail, io_fd, max_vcpu_id, vcpumap,
1567 ext_vcpucontext) < 0 ) {
1568 ERROR ("error buffering image tail, finishing");
1569 goto finish;
1571 tailbuf_free(&tailbuf);
1572 memcpy(&tailbuf, &tmptail, sizeof(tailbuf));
1574 goto loadpages;
1576 finish:
1577 if ( hvm )
1578 goto finish_hvm;
1580 if ( (ctx->pt_levels == 3) && !pae_extended_cr3 )
1582 /*
1583 ** XXX SMH on PAE we need to ensure PGDs are in MFNs < 4G. This
1584 ** is a little awkward and involves (a) finding all such PGDs and
1585 ** replacing them with 'lowmem' versions; (b) upating the p2m[]
1586 ** with the new info; and (c) canonicalizing all the L1s using the
1587 ** (potentially updated) p2m[].
1588 **
1589 ** This is relatively slow (and currently involves two passes through
1590 ** the pfn_type[] array), but at least seems to be correct. May wish
1591 ** to consider more complex approaches to optimize this later.
1592 */
1594 int j, k;
1596 /* First pass: find all L3TABs current in > 4G mfns and get new mfns */
1597 for ( i = 0; i < dinfo->p2m_size; i++ )
1599 if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
1600 XEN_DOMCTL_PFINFO_L3TAB) &&
1601 (ctx->p2m[i] > 0xfffffUL) )
1603 unsigned long new_mfn;
1604 uint64_t l3ptes[4];
1605 uint64_t *l3tab;
1607 l3tab = (uint64_t *)
1608 xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
1609 PROT_READ, ctx->p2m[i]);
1611 for ( j = 0; j < 4; j++ )
1612 l3ptes[j] = l3tab[j];
1614 munmap(l3tab, PAGE_SIZE);
1616 new_mfn = xc_make_page_below_4G(xc_handle, dom, ctx->p2m[i]);
1617 if ( !new_mfn )
1619 ERROR("Couldn't get a page below 4GB :-(");
1620 goto out;
1623 ctx->p2m[i] = new_mfn;
1624 if ( xc_add_mmu_update(xc_handle, mmu,
1625 (((unsigned long long)new_mfn)
1626 << PAGE_SHIFT) |
1627 MMU_MACHPHYS_UPDATE, i) )
1629 ERROR("Couldn't m2p on PAE root pgdir");
1630 goto out;
1633 l3tab = (uint64_t *)
1634 xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
1635 PROT_READ | PROT_WRITE, ctx->p2m[i]);
1637 for ( j = 0; j < 4; j++ )
1638 l3tab[j] = l3ptes[j];
1640 munmap(l3tab, PAGE_SIZE);
1644 /* Second pass: find all L1TABs and uncanonicalize them */
1645 j = 0;
1647 for ( i = 0; i < dinfo->p2m_size; i++ )
1649 if ( ((pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
1650 XEN_DOMCTL_PFINFO_L1TAB) )
1652 region_mfn[j] = ctx->p2m[i];
1653 j++;
1656 if ( (i == (dinfo->p2m_size-1)) || (j == MAX_BATCH_SIZE) )
1658 region_base = xc_map_foreign_pages(
1659 xc_handle, dom, PROT_READ | PROT_WRITE, region_mfn, j);
1660 if ( region_base == NULL )
1662 ERROR("map batch failed");
1663 goto out;
1666 for ( k = 0; k < j; k++ )
1668 if ( !uncanonicalize_pagetable(
1669 xc_handle, dom, ctx,
1670 region_base + k*PAGE_SIZE, superpages) )
1672 ERROR("failed uncanonicalize pt!");
1673 goto out;
1677 munmap(region_base, j*PAGE_SIZE);
1678 j = 0;
1682 if ( xc_flush_mmu_updates(xc_handle, mmu) )
1684 ERROR("Error doing xc_flush_mmu_updates()");
1685 goto out;
1689 /*
1690 * Pin page tables. Do this after writing to them as otherwise Xen
1691 * will barf when doing the type-checking.
1692 */
1693 nr_pins = 0;
1694 for ( i = 0; i < dinfo->p2m_size; i++ )
1696 if ( (pfn_type[i] & XEN_DOMCTL_PFINFO_LPINTAB) == 0 )
1697 continue;
1699 switch ( pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
1701 case XEN_DOMCTL_PFINFO_L1TAB:
1702 pin[nr_pins].cmd = MMUEXT_PIN_L1_TABLE;
1703 break;
1705 case XEN_DOMCTL_PFINFO_L2TAB:
1706 pin[nr_pins].cmd = MMUEXT_PIN_L2_TABLE;
1707 break;
1709 case XEN_DOMCTL_PFINFO_L3TAB:
1710 pin[nr_pins].cmd = MMUEXT_PIN_L3_TABLE;
1711 break;
1713 case XEN_DOMCTL_PFINFO_L4TAB:
1714 pin[nr_pins].cmd = MMUEXT_PIN_L4_TABLE;
1715 break;
1717 default:
1718 continue;
1721 pin[nr_pins].arg1.mfn = ctx->p2m[i];
1722 nr_pins++;
1724 /* Batch full? Then flush. */
1725 if ( nr_pins == MAX_PIN_BATCH )
1727 if ( xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0 )
1729 ERROR("Failed to pin batch of %d page tables", nr_pins);
1730 goto out;
1732 nr_pins = 0;
1736 /* Flush final partial batch. */
1737 if ( (nr_pins != 0) && (xc_mmuext_op(xc_handle, pin, nr_pins, dom) < 0) )
1739 ERROR("Failed to pin batch of %d page tables", nr_pins);
1740 goto out;
1743 DPRINTF("\b\b\b\b100%%\n");
1744 DPRINTF("Memory reloaded (%ld pages)\n", ctx->nr_pfns);
1746 /* Get the list of PFNs that are not in the psuedo-phys map */
1748 int nr_frees = 0;
1750 for ( i = 0; i < tailbuf.u.pv.pfncount; i++ )
1752 unsigned long pfn = tailbuf.u.pv.pfntab[i];
1754 if ( ctx->p2m[pfn] != INVALID_P2M_ENTRY )
1756 /* pfn is not in physmap now, but was at some point during
1757 the save/migration process - need to free it */
1758 tailbuf.u.pv.pfntab[nr_frees++] = ctx->p2m[pfn];
1759 ctx->p2m[pfn] = INVALID_P2M_ENTRY; /* not in pseudo-physical map */
1763 if ( nr_frees > 0 )
1765 struct xen_memory_reservation reservation = {
1766 .nr_extents = nr_frees,
1767 .extent_order = 0,
1768 .domid = dom
1769 };
1770 set_xen_guest_handle(reservation.extent_start, tailbuf.u.pv.pfntab);
1772 if ( (frc = xc_memory_op(xc_handle, XENMEM_decrease_reservation,
1773 &reservation)) != nr_frees )
1775 ERROR("Could not decrease reservation : %d", frc);
1776 goto out;
1778 else
1779 DPRINTF("Decreased reservation by %d pages\n", tailbuf.u.pv.pfncount);
1783 if ( lock_pages(&ctxt, sizeof(ctxt)) )
1785 ERROR("Unable to lock ctxt");
1786 return 1;
1789 vcpup = tailbuf.u.pv.vcpubuf;
1790 for ( i = 0; i <= max_vcpu_id; i++ )
1792 if ( !(vcpumap & (1ULL << i)) )
1793 continue;
1795 memcpy(&ctxt, vcpup, ((dinfo->guest_width == 8) ? sizeof(ctxt.x64)
1796 : sizeof(ctxt.x32)));
1797 vcpup += (dinfo->guest_width == 8) ? sizeof(ctxt.x64) : sizeof(ctxt.x32);
1799 DPRINTF("read VCPU %d\n", i);
1801 if ( !new_ctxt_format )
1802 SET_FIELD(&ctxt, flags, GET_FIELD(&ctxt, flags) | VGCF_online);
1804 if ( i == 0 )
1806 /*
1807 * Uncanonicalise the suspend-record frame number and poke
1808 * resume record.
1809 */
1810 pfn = GET_FIELD(&ctxt, user_regs.edx);
1811 if ( (pfn >= dinfo->p2m_size) ||
1812 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
1814 ERROR("Suspend record frame number is bad");
1815 goto out;
1817 mfn = ctx->p2m[pfn];
1818 SET_FIELD(&ctxt, user_regs.edx, mfn);
1819 start_info = xc_map_foreign_range(
1820 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE, mfn);
1821 SET_FIELD(start_info, nr_pages, dinfo->p2m_size);
1822 SET_FIELD(start_info, shared_info, shared_info_frame<<PAGE_SHIFT);
1823 SET_FIELD(start_info, flags, 0);
1824 *store_mfn = ctx->p2m[GET_FIELD(start_info, store_mfn)];
1825 SET_FIELD(start_info, store_mfn, *store_mfn);
1826 SET_FIELD(start_info, store_evtchn, store_evtchn);
1827 *console_mfn = ctx->p2m[GET_FIELD(start_info, console.domU.mfn)];
1828 SET_FIELD(start_info, console.domU.mfn, *console_mfn);
1829 SET_FIELD(start_info, console.domU.evtchn, console_evtchn);
1830 munmap(start_info, PAGE_SIZE);
1832 /* Uncanonicalise each GDT frame number. */
1833 if ( GET_FIELD(&ctxt, gdt_ents) > 8192 )
1835 ERROR("GDT entry count out of range");
1836 goto out;
1839 for ( j = 0; (512*j) < GET_FIELD(&ctxt, gdt_ents); j++ )
1841 pfn = GET_FIELD(&ctxt, gdt_frames[j]);
1842 if ( (pfn >= dinfo->p2m_size) ||
1843 (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
1845 ERROR("GDT frame number %i (0x%lx) is bad",
1846 j, (unsigned long)pfn);
1847 goto out;
1849 SET_FIELD(&ctxt, gdt_frames[j], ctx->p2m[pfn]);
1851 /* Uncanonicalise the page table base pointer. */
1852 pfn = UNFOLD_CR3(GET_FIELD(&ctxt, ctrlreg[3]));
1854 if ( pfn >= dinfo->p2m_size )
1856 ERROR("PT base is bad: pfn=%lu p2m_size=%lu type=%08lx",
1857 pfn, dinfo->p2m_size, pfn_type[pfn]);
1858 goto out;
1861 if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
1862 ((unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
1864 ERROR("PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
1865 pfn, dinfo->p2m_size, pfn_type[pfn],
1866 (unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
1867 goto out;
1869 SET_FIELD(&ctxt, ctrlreg[3], FOLD_CR3(ctx->p2m[pfn]));
1871 /* Guest pagetable (x86/64) stored in otherwise-unused CR1. */
1872 if ( (ctx->pt_levels == 4) && (ctxt.x64.ctrlreg[1] & 1) )
1874 pfn = UNFOLD_CR3(ctxt.x64.ctrlreg[1] & ~1);
1875 if ( pfn >= dinfo->p2m_size )
1877 ERROR("User PT base is bad: pfn=%lu p2m_size=%lu",
1878 pfn, dinfo->p2m_size);
1879 goto out;
1881 if ( (pfn_type[pfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) !=
1882 ((unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT) )
1884 ERROR("User PT base is bad. pfn=%lu nr=%lu type=%08lx %08lx",
1885 pfn, dinfo->p2m_size, pfn_type[pfn],
1886 (unsigned long)ctx->pt_levels<<XEN_DOMCTL_PFINFO_LTAB_SHIFT);
1887 goto out;
1889 ctxt.x64.ctrlreg[1] = FOLD_CR3(ctx->p2m[pfn]);
1891 domctl.cmd = XEN_DOMCTL_setvcpucontext;
1892 domctl.domain = (domid_t)dom;
1893 domctl.u.vcpucontext.vcpu = i;
1894 set_xen_guest_handle(domctl.u.vcpucontext.ctxt, &ctxt.c);
1895 frc = xc_domctl(xc_handle, &domctl);
1896 if ( frc != 0 )
1898 ERROR("Couldn't build vcpu%d", i);
1899 goto out;
1902 if ( !ext_vcpucontext )
1903 continue;
1904 memcpy(&domctl.u.ext_vcpucontext, vcpup, 128);
1905 vcpup += 128;
1906 domctl.cmd = XEN_DOMCTL_set_ext_vcpucontext;
1907 domctl.domain = dom;
1908 frc = xc_domctl(xc_handle, &domctl);
1909 if ( frc != 0 )
1911 ERROR("Couldn't set extended vcpu%d info\n", i);
1912 goto out;
1916 memcpy(shared_info_page, tailbuf.u.pv.shared_info_page, PAGE_SIZE);
1918 DPRINTF("Completed checkpoint load\n");
1920 /* Restore contents of shared-info page. No checking needed. */
1921 new_shared_info = xc_map_foreign_range(
1922 xc_handle, dom, PAGE_SIZE, PROT_WRITE, shared_info_frame);
1924 /* restore saved vcpu_info and arch specific info */
1925 MEMCPY_FIELD(new_shared_info, old_shared_info, vcpu_info);
1926 MEMCPY_FIELD(new_shared_info, old_shared_info, arch);
1928 /* clear any pending events and the selector */
1929 MEMSET_ARRAY_FIELD(new_shared_info, evtchn_pending, 0);
1930 for ( i = 0; i < XEN_LEGACY_MAX_VCPUS; i++ )
1931 SET_FIELD(new_shared_info, vcpu_info[i].evtchn_pending_sel, 0);
1933 /* mask event channels */
1934 MEMSET_ARRAY_FIELD(new_shared_info, evtchn_mask, 0xff);
1936 /* leave wallclock time. set by hypervisor */
1937 munmap(new_shared_info, PAGE_SIZE);
1939 /* Uncanonicalise the pfn-to-mfn table frame-number list. */
1940 for ( i = 0; i < P2M_FL_ENTRIES; i++ )
1942 pfn = p2m_frame_list[i];
1943 if ( (pfn >= dinfo->p2m_size) || (pfn_type[pfn] != XEN_DOMCTL_PFINFO_NOTAB) )
1945 ERROR("PFN-to-MFN frame number %i (%#lx) is bad", i, pfn);
1946 goto out;
1948 p2m_frame_list[i] = ctx->p2m[pfn];
1951 /* Copy the P2M we've constructed to the 'live' P2M */
1952 if ( !(ctx->live_p2m = xc_map_foreign_pages(xc_handle, dom, PROT_WRITE,
1953 p2m_frame_list, P2M_FL_ENTRIES)) )
1955 ERROR("Couldn't map p2m table");
1956 goto out;
1959 /* If the domain we're restoring has a different word size to ours,
1960 * we need to adjust the live_p2m assignment appropriately */
1961 if ( dinfo->guest_width > sizeof (xen_pfn_t) )
1962 for ( i = dinfo->p2m_size - 1; i >= 0; i-- )
1963 ((int64_t *)ctx->live_p2m)[i] = (long)ctx->p2m[i];
1964 else if ( dinfo->guest_width < sizeof (xen_pfn_t) )
1965 for ( i = 0; i < dinfo->p2m_size; i++ )
1966 ((uint32_t *)ctx->live_p2m)[i] = ctx->p2m[i];
1967 else
1968 memcpy(ctx->live_p2m, ctx->p2m, dinfo->p2m_size * sizeof(xen_pfn_t));
1969 munmap(ctx->live_p2m, P2M_FL_ENTRIES * PAGE_SIZE);
1971 DPRINTF("Domain ready to be built.\n");
1972 rc = 0;
1973 goto out;
1975 finish_hvm:
1976 /* Dump the QEMU state to a state file for QEMU to load */
1977 if ( dump_qemu(dom, &tailbuf.u.hvm) ) {
1978 ERROR("Error dumping QEMU state to file");
1979 goto out;
1982 /* These comms pages need to be zeroed at the start of day */
1983 if ( xc_clear_domain_page(xc_handle, dom, tailbuf.u.hvm.magicpfns[0]) ||
1984 xc_clear_domain_page(xc_handle, dom, tailbuf.u.hvm.magicpfns[1]) ||
1985 xc_clear_domain_page(xc_handle, dom, tailbuf.u.hvm.magicpfns[2]) )
1987 ERROR("error zeroing magic pages");
1988 goto out;
1991 if ( (frc = xc_set_hvm_param(xc_handle, dom,
1992 HVM_PARAM_IOREQ_PFN, tailbuf.u.hvm.magicpfns[0]))
1993 || (frc = xc_set_hvm_param(xc_handle, dom,
1994 HVM_PARAM_BUFIOREQ_PFN, tailbuf.u.hvm.magicpfns[1]))
1995 || (frc = xc_set_hvm_param(xc_handle, dom,
1996 HVM_PARAM_STORE_PFN, tailbuf.u.hvm.magicpfns[2]))
1997 || (frc = xc_set_hvm_param(xc_handle, dom,
1998 HVM_PARAM_PAE_ENABLED, pae))
1999 || (frc = xc_set_hvm_param(xc_handle, dom,
2000 HVM_PARAM_STORE_EVTCHN,
2001 store_evtchn)) )
2003 ERROR("error setting HVM params: %i", frc);
2004 goto out;
2006 *store_mfn = tailbuf.u.hvm.magicpfns[2];
2008 frc = xc_domain_hvm_setcontext(xc_handle, dom, tailbuf.u.hvm.hvmbuf,
2009 tailbuf.u.hvm.reclen);
2010 if ( frc )
2012 ERROR("error setting the HVM context");
2013 goto out;
2016 /* HVM success! */
2017 rc = 0;
2019 out:
2020 if ( (rc != 0) && (dom != 0) )
2021 xc_domain_destroy(xc_handle, dom);
2022 free(mmu);
2023 free(ctx->p2m);
2024 free(pfn_type);
2025 tailbuf_free(&tailbuf);
2027 /* discard cache for save file */
2028 discard_file_cache(io_fd, 1 /*flush*/);
2030 DPRINTF("Restore exit with rc=%d\n", rc);
2032 return rc;
2034 /*
2035 * Local variables:
2036 * mode: C
2037 * c-set-style: "BSD"
2038 * c-basic-offset: 4
2039 * tab-width: 4
2040 * indent-tabs-mode: nil
2041 * End:
2042 */