debuggers.hg

view tools/libxc/xc_linux_save.c @ 2673:0174982516f6

bitkeeper revision 1.1159.1.229 (416d3ad1BpCS1RVPjkX14HUpsanlGw)

Shadow pagetable walkthrough.
author kaf24@freefall.cl.cam.ac.uk
date Wed Oct 13 14:25:21 2004 +0000 (2004-10-13)
parents 6ceaf7d959a7
children d8e27145f1eb 2584528df9e1 8aa9d487a8dd
line source
1 /******************************************************************************
2 * xc_linux_save.c
3 *
4 * Save the state of a running Linux session.
5 *
6 * Copyright (c) 2003, K A Fraser.
7 */
9 #include <sys/time.h>
10 #include "xc_private.h"
11 #include <asm-xen/suspend.h>
13 #define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */
15 #define DEBUG 0
16 #define DDEBUG 0
18 #if DEBUG
19 #define DPRINTF(_f, _a...) printf ( _f , ## _a )
20 #else
21 #define DPRINTF(_f, _a...) ((void)0)
22 #endif
24 #if DDEBUG
25 #define DDPRINTF(_f, _a...) printf ( _f , ## _a )
26 #else
27 #define DDPRINTF(_f, _a...) ((void)0)
28 #endif
30 /*
31 * Returns TRUE if the given machine frame number has a unique mapping
32 * in the guest's pseudophysical map.
33 */
35 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \
36 (((_mfn) < (1024*1024)) && \
37 ((live_mfn_to_pfn_table[_mfn] < nr_pfns) && \
38 (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn))))
41 /* Returns TRUE if MFN is successfully converted to a PFN. */
42 #define translate_mfn_to_pfn(_pmfn) \
43 ({ \
44 unsigned long mfn = *(_pmfn); \
45 int _res = 1; \
46 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) ) \
47 _res = 0; \
48 else \
49 *(_pmfn) = live_mfn_to_pfn_table[mfn]; \
50 _res; \
51 })
53 #define is_mapped(pfn) (!((pfn) & 0x80000000UL))
55 static inline int test_bit ( int nr, volatile void * addr)
56 {
57 return (((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] >>
58 (nr % (sizeof(unsigned long)*8))) & 1;
59 }
61 static inline void clear_bit ( int nr, volatile void * addr)
62 {
63 ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] &=
64 ~(1 << (nr % (sizeof(unsigned long)*8) ) );
65 }
67 static inline void set_bit ( int nr, volatile void * addr)
68 {
69 ((unsigned long*)addr)[nr/(sizeof(unsigned long)*8)] |=
70 (1 << (nr % (sizeof(unsigned long)*8) ) );
71 }
73 /* Returns the hamming weight (i.e. the number of bits set) in a N-bit word */
74 static inline unsigned int hweight32(unsigned int w)
75 {
76 unsigned int res = (w & 0x55555555) + ((w >> 1) & 0x55555555);
77 res = (res & 0x33333333) + ((res >> 2) & 0x33333333);
78 res = (res & 0x0F0F0F0F) + ((res >> 4) & 0x0F0F0F0F);
79 res = (res & 0x00FF00FF) + ((res >> 8) & 0x00FF00FF);
80 return (res & 0x0000FFFF) + ((res >> 16) & 0x0000FFFF);
81 }
83 static inline int count_bits ( int nr, volatile void *addr)
84 {
85 int i, count = 0;
86 unsigned long *p = (unsigned long *)addr;
87 /* We know that the array is padded to unsigned long. */
88 for(i=0;i<nr/(sizeof(unsigned long)*8);i++,p++)
89 count += hweight32( *p );
90 return count;
91 }
93 static inline int permute( int i, int nr, int order_nr )
94 {
95 /* Need a simple permutation function so that we scan pages in a
96 pseudo random order, enabling us to get a better estimate of
97 the domain's page dirtying rate as we go (there are often
98 contiguous ranges of pfns that have similar behaviour, and we
99 want to mix them up. */
101 /* e.g. nr->oder 15->4 16->4 17->5 */
102 /* 512MB domain, 128k pages, order 17 */
104 /*
105 QPONMLKJIHGFEDCBA
106 QPONMLKJIH
107 GFEDCBA
108 */
110 /*
111 QPONMLKJIHGFEDCBA
112 EDCBA
113 QPONM
114 LKJIHGF
115 */
117 do { i = ((i>>(order_nr-10)) | ( i<<10 ) ) & ((1<<order_nr)-1); }
118 while ( i >= nr ); /* this won't ever loop if nr is a power of 2 */
120 return i;
121 }
123 static long long tv_to_us( struct timeval *new )
124 {
125 return (new->tv_sec * 1000000) + new->tv_usec;
126 }
128 static long long llgettimeofday()
129 {
130 struct timeval now;
131 gettimeofday(&now, NULL);
132 return tv_to_us(&now);
133 }
135 static long long tv_delta( struct timeval *new, struct timeval *old )
136 {
137 return ((new->tv_sec - old->tv_sec)*1000000 ) +
138 (new->tv_usec - old->tv_usec);
139 }
141 static int print_stats( int xc_handle, u32 domid,
142 int pages_sent, xc_shadow_control_stats_t *stats,
143 int print )
144 {
145 static struct timeval wall_last;
146 static long long d0_cpu_last;
147 static long long d1_cpu_last;
149 struct timeval wall_now;
150 long long wall_delta;
151 long long d0_cpu_now, d0_cpu_delta;
152 long long d1_cpu_now, d1_cpu_delta;
154 gettimeofday(&wall_now, NULL);
156 d0_cpu_now = xc_domain_get_cpu_usage( xc_handle, 0 )/1000;
157 d1_cpu_now = xc_domain_get_cpu_usage( xc_handle, domid )/1000;
159 if ( (d0_cpu_now == -1) || (d1_cpu_now == -1) )
160 printf("ARRHHH!!\n");
162 wall_delta = tv_delta(&wall_now,&wall_last)/1000;
164 if ( wall_delta == 0 ) wall_delta = 1;
166 d0_cpu_delta = (d0_cpu_now - d0_cpu_last)/1000;
167 d1_cpu_delta = (d1_cpu_now - d1_cpu_last)/1000;
169 if ( print )
170 printf("delta %lldms, dom0 %d%%, target %d%%, sent %dMb/s, "
171 "dirtied %dMb/s\n",
172 wall_delta,
173 (int)((d0_cpu_delta*100)/wall_delta),
174 (int)((d1_cpu_delta*100)/wall_delta),
175 (int)((pages_sent*PAGE_SIZE*8)/(wall_delta*1000)),
176 (int)((stats->dirty_count*PAGE_SIZE*8)/(wall_delta*1000)));
178 d0_cpu_last = d0_cpu_now;
179 d1_cpu_last = d1_cpu_now;
180 wall_last = wall_now;
182 return 0;
183 }
185 /** Write the vmconfig string.
186 * It is stored as a 4-byte count 'n' followed by n bytes.
187 *
188 * @param ioctxt i/o context
189 * @return 0 on success, non-zero on error.
190 */
191 static int write_vmconfig(XcIOContext *ioctxt){
192 int err = -1;
193 if(xcio_write(ioctxt, &ioctxt->vmconfig_n, sizeof(ioctxt->vmconfig_n))) goto exit;
194 if(xcio_write(ioctxt, ioctxt->vmconfig, ioctxt->vmconfig_n)) goto exit;
195 err = 0;
196 exit:
197 return err;
198 }
200 static int analysis_phase( int xc_handle, u32 domid,
201 int nr_pfns, unsigned long *arr )
202 {
203 long long start, now;
204 xc_shadow_control_stats_t stats;
206 start = llgettimeofday();
208 while ( 0 )
209 {
210 int i;
212 xc_shadow_control( xc_handle, domid,
213 DOM0_SHADOW_CONTROL_OP_CLEAN,
214 arr, nr_pfns, NULL);
215 printf("#Flush\n");
216 for ( i = 0; i < 100; i++ )
217 {
218 usleep(10000);
219 now = llgettimeofday();
220 xc_shadow_control( xc_handle, domid,
221 DOM0_SHADOW_CONTROL_OP_PEEK,
222 NULL, 0, &stats);
224 printf("now= %lld faults= %ld dirty= %ld dirty_net= %ld "
225 "dirty_block= %ld\n",
226 ((now-start)+500)/1000,
227 stats.fault_count, stats.dirty_count,
228 stats.dirty_net_count, stats.dirty_block_count);
229 }
230 }
232 return -1;
233 }
236 int suspend_and_state( int xc_handle, XcIOContext *ioctxt,
237 dom0_op_t *op,
238 full_execution_context_t *ctxt )
239 {
240 int i=0;
242 xcio_suspend_domain(ioctxt);
244 retry:
246 if ( xc_domain_getfullinfo( xc_handle, ioctxt->domain, op, ctxt) )
247 {
248 xcio_error(ioctxt, "Could not get full domain info");
249 return -1;
250 }
252 if ( (op->u.getdomaininfo.flags &
253 ( DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<<DOMFLAGS_SHUTDOWNSHIFT) ))
254 == ( DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<<DOMFLAGS_SHUTDOWNSHIFT) ))
255 {
256 return 0; // success
257 }
259 if ( op->u.getdomaininfo.flags & DOMFLAGS_PAUSED )
260 {
261 // try unpausing domain, wait, and retest
262 xc_domain_unpause( xc_handle, ioctxt->domain );
264 xcio_error(ioctxt, "Domain was paused. Wait and re-test. (%lx)",
265 op->u.getdomaininfo.flags);
266 usleep(10000); // 10ms
268 goto retry;
269 }
272 if( ++i < 100 )
273 {
274 xcio_error(ioctxt, "Retry suspend domain (%lx)",
275 op->u.getdomaininfo.flags);
276 usleep(10000); // 10ms
277 goto retry;
278 }
280 xcio_error(ioctxt, "Unable to suspend domain. (%lx)",
281 op->u.getdomaininfo.flags);
283 return -1;
284 }
286 int xc_linux_save(int xc_handle, XcIOContext *ioctxt)
287 {
288 dom0_op_t op;
289 int rc = 1, i, j, k, last_iter, iter = 0;
290 unsigned long mfn;
291 u32 domid = ioctxt->domain;
292 int live = (ioctxt->flags & XCFLAGS_LIVE);
293 int debug = (ioctxt->flags & XCFLAGS_DEBUG);
294 int sent_last_iter, skip_this_iter;
296 /* Important tuning parameters */
297 int max_iters = 29; /* limit us to 30 times round loop */
298 int max_factor = 3; /* never send more than 3x nr_pfns */
300 /* The new domain's shared-info frame number. */
301 unsigned long shared_info_frame;
303 /* A copy of the CPU context of the guest. */
304 full_execution_context_t ctxt;
306 /* A copy of the domain's name. */
307 char name[MAX_DOMAIN_NAME];
309 /* A table containg the type of each PFN (/not/ MFN!). */
310 unsigned long *pfn_type = NULL;
311 unsigned long *pfn_batch = NULL;
313 /* A temporary mapping, and a copy, of one frame of guest memory. */
314 unsigned long page[1024];
316 /* A copy of the pfn-to-mfn table frame list. */
317 unsigned long *live_pfn_to_mfn_frame_list = NULL;
318 unsigned long pfn_to_mfn_frame_list[1024];
320 /* Live mapping of the table mapping each PFN to its current MFN. */
321 unsigned long *live_pfn_to_mfn_table = NULL;
322 /* Live mapping of system MFN to PFN table. */
323 unsigned long *live_mfn_to_pfn_table = NULL;
324 unsigned long mfn_to_pfn_table_start_mfn;
326 /* Live mapping of shared info structure */
327 shared_info_t *live_shinfo = NULL;
329 /* base of the region in which domain memory is mapped */
330 unsigned char *region_base = NULL;
332 /* A temporary mapping, and a copy, of the guest's suspend record. */
333 suspend_record_t *p_srec = NULL;
335 /* number of pages we're dealing with */
336 unsigned long nr_pfns;
338 /* power of 2 order of nr_pfns */
339 int order_nr;
341 /* bitmap of pages:
342 - that should be sent this iteration (unless later marked as skip);
343 - to skip this iteration because already dirty;
344 - to fixup by sending at the end if not already resent; */
345 unsigned long *to_send, *to_skip, *to_fix;
347 xc_shadow_control_stats_t stats;
349 int needed_to_fix = 0;
350 int total_sent = 0;
352 if (mlock(&ctxt, sizeof(ctxt))) {
353 xcio_perror(ioctxt, "Unable to mlock ctxt");
354 return 1;
355 }
357 if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) )
358 {
359 xcio_error(ioctxt, "Could not get full domain info");
360 goto out;
361 }
362 memcpy(name, op.u.getdomaininfo.name, sizeof(name));
363 shared_info_frame = op.u.getdomaininfo.shared_info_frame;
365 /* A cheesy test to see whether the domain contains valid state. */
366 if ( ctxt.pt_base == 0 ){
367 xcio_error(ioctxt, "Domain is not in a valid Linux guest OS state");
368 goto out;
369 }
371 nr_pfns = op.u.getdomaininfo.max_pages;
373 /* cheesy sanity check */
374 if ( nr_pfns > 1024*1024 ){
375 xcio_error(ioctxt, "Invalid state record -- pfn count out of range: %lu", nr_pfns);
376 goto out;
377 }
380 /* Map the shared info frame */
381 live_shinfo = xc_map_foreign_range(xc_handle, domid,
382 PAGE_SIZE, PROT_READ,
383 shared_info_frame);
385 if (!live_shinfo){
386 xcio_error(ioctxt, "Couldn't map live_shinfo");
387 goto out;
388 }
390 /* the pfn_to_mfn_frame_list fits in a single page */
391 live_pfn_to_mfn_frame_list =
392 xc_map_foreign_range(xc_handle, domid,
393 PAGE_SIZE, PROT_READ,
394 live_shinfo->arch.pfn_to_mfn_frame_list );
396 if (!live_pfn_to_mfn_frame_list){
397 xcio_error(ioctxt, "Couldn't map pfn_to_mfn_frame_list");
398 goto out;
399 }
402 /* Map all the frames of the pfn->mfn table. For migrate to succeed,
403 the guest must not change which frames are used for this purpose.
404 (its not clear why it would want to change them, and we'll be OK
405 from a safety POV anyhow. */
407 live_pfn_to_mfn_table = xc_map_foreign_batch(xc_handle, domid,
408 PROT_READ,
409 live_pfn_to_mfn_frame_list,
410 (nr_pfns+1023)/1024 );
411 if( !live_pfn_to_mfn_table ){
412 xcio_perror(ioctxt, "Couldn't map pfn_to_mfn table");
413 goto out;
414 }
416 /* Setup the mfn_to_pfn table mapping */
417 mfn_to_pfn_table_start_mfn = xc_get_m2p_start_mfn( xc_handle );
419 live_mfn_to_pfn_table =
420 xc_map_foreign_range(xc_handle, DOMID_XEN,
421 PAGE_SIZE*1024, PROT_READ,
422 mfn_to_pfn_table_start_mfn );
424 /* Canonicalise the pfn-to-mfn table frame-number list. */
425 memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE );
427 for ( i = 0; i < nr_pfns; i += 1024 ){
428 if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ){
429 xcio_error(ioctxt, "Frame # in pfn-to-mfn frame list is not in pseudophys");
430 goto out;
431 }
432 }
435 /* Domain is still running at this point */
437 if( live )
438 {
439 if ( xc_shadow_control( xc_handle, domid,
440 DOM0_SHADOW_CONTROL_OP_ENABLE_LOGDIRTY,
441 NULL, 0, NULL ) < 0 ) {
442 xcio_error(ioctxt, "Couldn't enable shadow mode");
443 goto out;
444 }
446 last_iter = 0;
447 sent_last_iter = 1<<20; /* 4GB of pages */
448 } else{
449 /* This is a non-live suspend. Issue the call back to get the
450 domain suspended */
452 last_iter = 1;
454 if ( suspend_and_state( xc_handle, ioctxt, &op, &ctxt) )
455 {
456 xcio_error(ioctxt, "Domain appears not to have suspended: %lx",
457 op.u.getdomaininfo.flags);
458 goto out;
459 }
461 }
463 /* calculate the power of 2 order of nr_pfns, e.g.
464 15->4 16->4 17->5 */
465 for( i=nr_pfns-1, order_nr=0; i ; i>>=1, order_nr++ );
467 /* Setup to_send bitmap */
468 {
469 /* size these for a maximal 4GB domain, to make interaction
470 with balloon driver easier. It's only user space memory,
471 ater all... (3x 128KB) */
473 int sz = ( 1<<20 ) / 8;
475 to_send = malloc( sz );
476 to_fix = calloc( 1, sz );
477 to_skip = malloc( sz );
479 if (!to_send || !to_fix || !to_skip){
480 xcio_error(ioctxt, "Couldn't allocate to_send array");
481 goto out;
482 }
484 memset( to_send, 0xff, sz );
486 if ( mlock( to_send, sz ) ){
487 xcio_perror(ioctxt, "Unable to mlock to_send");
488 return 1;
489 }
491 /* (to fix is local only) */
493 if ( mlock( to_skip, sz ) ){
494 xcio_perror(ioctxt, "Unable to mlock to_skip");
495 return 1;
496 }
498 }
500 analysis_phase( xc_handle, domid, nr_pfns, to_skip );
502 /* We want zeroed memory so use calloc rather than malloc. */
503 pfn_type = calloc(BATCH_SIZE, sizeof(unsigned long));
504 pfn_batch = calloc(BATCH_SIZE, sizeof(unsigned long));
506 if ( (pfn_type == NULL) || (pfn_batch == NULL) ){
507 errno = ENOMEM;
508 goto out;
509 }
511 if ( mlock( pfn_type, BATCH_SIZE * sizeof(unsigned long) ) ){
512 xcio_error(ioctxt, "Unable to mlock");
513 goto out;
514 }
517 /*
518 * Quick belt and braces sanity check.
519 */
520 #if DEBUG
521 {
522 int err=0;
523 for ( i = 0; i < nr_pfns; i++ )
524 {
525 mfn = live_pfn_to_mfn_table[i];
527 if( (live_mfn_to_pfn_table[mfn] != i) && (mfn != 0xffffffffUL) )
528 {
529 printf("i=0x%x mfn=%lx live_mfn_to_pfn_table=%lx\n",
530 i,mfn,live_mfn_to_pfn_table[mfn]);
531 err++;
532 }
533 }
534 printf("Had %d unexplained entries in p2m table\n",err);
535 }
536 #endif
539 /* Start writing out the saved-domain record. */
541 if ( xcio_write(ioctxt, "LinuxGuestRecord", 16) ||
542 xcio_write(ioctxt, name, sizeof(name)) ||
543 xcio_write(ioctxt, &nr_pfns, sizeof(unsigned long)) ||
544 xcio_write(ioctxt, pfn_to_mfn_frame_list, PAGE_SIZE) ){
545 xcio_error(ioctxt, "Error writing header");
546 goto out;
547 }
548 if(write_vmconfig(ioctxt)){
549 xcio_error(ioctxt, "Error writing vmconfig");
550 goto out;
551 }
553 print_stats( xc_handle, domid, 0, &stats, 0 );
555 /* Now write out each data page, canonicalising page tables as we go... */
557 while(1){
558 unsigned int prev_pc, sent_this_iter, N, batch;
560 iter++;
561 sent_this_iter = 0;
562 skip_this_iter = 0;
563 prev_pc = 0;
564 N=0;
566 xcio_info(ioctxt, "Saving memory pages: iter %d 0%%", iter);
568 while( N < nr_pfns ){
569 unsigned int this_pc = (N * 100) / nr_pfns;
571 if ( (this_pc - prev_pc) >= 5 ){
572 xcio_info(ioctxt, "\b\b\b\b%3d%%", this_pc);
573 prev_pc = this_pc;
574 }
576 /* slightly wasteful to peek the whole array evey time,
577 but this is fast enough for the moment. */
579 if ( !last_iter &&
580 xc_shadow_control(xc_handle, domid,
581 DOM0_SHADOW_CONTROL_OP_PEEK,
582 to_skip, nr_pfns, NULL) != nr_pfns )
583 {
584 xcio_error(ioctxt, "Error peeking shadow bitmap");
585 goto out;
586 }
589 /* load pfn_type[] with the mfn of all the pages we're doing in
590 this batch. */
592 for ( batch = 0; batch < BATCH_SIZE && N < nr_pfns ; N++ )
593 {
594 int n = permute(N, nr_pfns, order_nr );
596 if ( 0 && debug ) {
597 fprintf(stderr,"%d pfn= %08lx mfn= %08lx %d "
598 " [mfn]= %08lx\n",
599 iter, (unsigned long)n, live_pfn_to_mfn_table[n],
600 test_bit(n,to_send),
601 live_mfn_to_pfn_table[live_pfn_to_mfn_table[n]&
602 0xFFFFF]);
603 }
605 if ( !last_iter &&
606 test_bit(n, to_send) &&
607 test_bit(n, to_skip) ) {
608 skip_this_iter++; /* stats keeping */
609 }
611 if ( !((test_bit(n, to_send) && !test_bit(n, to_skip)) ||
612 (test_bit(n, to_send) && last_iter) ||
613 (test_bit(n, to_fix) && last_iter)) ) {
614 continue;
615 }
617 /* we get here if:
618 1. page is marked to_send & hasn't already been re-dirtied
619 2. (ignore to_skip in last iteration)
620 3. add in pages that still need fixup (net bufs)
621 */
623 pfn_batch[batch] = n;
624 pfn_type[batch] = live_pfn_to_mfn_table[n];
626 if( ! is_mapped(pfn_type[batch]) )
627 {
628 /* not currently in pusedo-physical map -- set bit
629 in to_fix that we must send this page in last_iter
630 unless its sent sooner anyhow */
632 set_bit( n, to_fix );
633 if( iter>1 )
634 DDPRINTF("netbuf race: iter %d, pfn %x. mfn %lx\n",
635 iter,n,pfn_type[batch]);
636 continue;
637 }
639 if ( last_iter &&
640 test_bit(n, to_fix) &&
641 !test_bit(n, to_send) )
642 {
643 needed_to_fix++;
644 DPRINTF("Fix! iter %d, pfn %x. mfn %lx\n",
645 iter,n,pfn_type[batch]);
646 }
648 clear_bit(n, to_fix);
650 batch++;
651 }
653 // DDPRINTF("batch %d:%d (n=%d)\n", iter, batch, n);
655 if ( batch == 0 )
656 goto skip; /* vanishingly unlikely... */
658 if ( (region_base = xc_map_foreign_batch(xc_handle, domid,
659 PROT_READ,
660 pfn_type,
661 batch)) == 0 ){
662 xcio_perror(ioctxt, "map batch failed");
663 goto out;
664 }
666 if ( get_pfn_type_batch(xc_handle, domid, batch, pfn_type) ){
667 xcio_error(ioctxt, "get_pfn_type_batch failed");
668 goto out;
669 }
671 for ( j = 0; j < batch; j++ ){
672 if ( (pfn_type[j] & LTAB_MASK) == XTAB ){
673 DDPRINTF("type fail: page %i mfn %08lx\n",j,pfn_type[j]);
674 continue;
675 }
677 if ( 0 && debug )
678 fprintf(stderr, "%d pfn= %08lx mfn= %08lx [mfn]= %08lx"
679 " sum= %08lx\n",
680 iter,
681 (pfn_type[j] & LTAB_MASK) | pfn_batch[j],
682 pfn_type[j],
683 live_mfn_to_pfn_table[pfn_type[j]&(~LTAB_MASK)],
684 csum_page(region_base + (PAGE_SIZE*j)));
686 /* canonicalise mfn->pfn */
687 pfn_type[j] = (pfn_type[j] & LTAB_MASK) | pfn_batch[j];
688 }
690 if ( xcio_write(ioctxt, &batch, sizeof(int) ) ){
691 xcio_error(ioctxt, "Error when writing to state file (2)");
692 goto out;
693 }
695 if ( xcio_write(ioctxt, pfn_type, sizeof(unsigned long)*j ) ){
696 xcio_error(ioctxt, "Error when writing to state file (3)");
697 goto out;
698 }
700 /* entering this loop, pfn_type is now in pfns (Not mfns) */
701 for( j = 0; j < batch; j++ ){
702 /* write out pages in batch */
703 if( (pfn_type[j] & LTAB_MASK) == XTAB){
704 DDPRINTF("SKIP BOGUS page %i mfn %08lx\n",j,pfn_type[j]);
705 continue;
706 }
708 if ( ((pfn_type[j] & LTABTYPE_MASK) == L1TAB) ||
709 ((pfn_type[j] & LTABTYPE_MASK) == L2TAB) ){
710 memcpy(page, region_base + (PAGE_SIZE*j), PAGE_SIZE);
712 for ( k = 0;
713 k < (((pfn_type[j] & LTABTYPE_MASK) == L2TAB) ?
714 (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT) :
715 1024);
716 k++ ){
717 unsigned long pfn;
719 if ( !(page[k] & _PAGE_PRESENT) )
720 continue;
722 mfn = page[k] >> PAGE_SHIFT;
723 pfn = live_mfn_to_pfn_table[mfn];
725 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(mfn) )
726 {
727 /* I don't think this should ever happen */
728 printf("FNI %d : [%08lx,%d] pte=%08lx, "
729 "mfn=%08lx, pfn=%08lx [mfn]=%08lx\n",
730 j, pfn_type[j], k,
731 page[k], mfn, live_mfn_to_pfn_table[mfn],
732 (live_mfn_to_pfn_table[mfn]<nr_pfns)?
733 live_pfn_to_mfn_table[
734 live_mfn_to_pfn_table[mfn]] :
735 0xdeadbeef);
737 pfn = 0; /* be suspicious */
738 }
740 page[k] &= PAGE_SIZE - 1;
741 page[k] |= pfn << PAGE_SHIFT;
743 #if 0
744 printf("L%d i=%d pfn=%d mfn=%d k=%d pte=%08lx "
745 "xpfn=%d\n",
746 pfn_type[j]>>28,
747 j,i,mfn,k,page[k],page[k]>>PAGE_SHIFT);
748 #endif
750 } /* end of page table rewrite for loop */
752 if ( xcio_write(ioctxt, page, PAGE_SIZE) ){
753 xcio_error(ioctxt, "Error when writing to state file (4)");
754 goto out;
755 }
757 } /* end of it's a PT page */ else { /* normal page */
759 if ( xcio_write(ioctxt, region_base + (PAGE_SIZE*j),
760 PAGE_SIZE) ){
761 xcio_error(ioctxt, "Error when writing to state file (5)");
762 goto out;
763 }
764 }
765 } /* end of the write out for this batch */
767 sent_this_iter += batch;
769 } /* end of this while loop for this iteration */
771 munmap(region_base, batch*PAGE_SIZE);
773 skip:
775 total_sent += sent_this_iter;
777 xcio_info(ioctxt, "\r %d: sent %d, skipped %d, ",
778 iter, sent_this_iter, skip_this_iter );
780 if ( last_iter ) {
781 print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
783 xcio_info(ioctxt, "Total pages sent= %d (%.2fx)\n",
784 total_sent, ((float)total_sent)/nr_pfns );
785 xcio_info(ioctxt, "(of which %d were fixups)\n", needed_to_fix );
786 }
788 if (last_iter && debug){
789 int minusone = -1;
790 memset( to_send, 0xff, (nr_pfns+8)/8 );
791 debug = 0;
792 printf("Entering debug resend-all mode\n");
794 /* send "-1" to put receiver into debug mode */
795 if ( xcio_write(ioctxt, &minusone, sizeof(int)) )
796 {
797 xcio_error(ioctxt, "Error when writing to state file (6)");
798 goto out;
799 }
801 continue;
802 }
804 if ( last_iter ) break;
806 if ( live )
807 {
808 if (
809 /* ( sent_this_iter > (sent_last_iter * 0.95) ) || */
810 (iter >= max_iters) ||
811 (sent_this_iter+skip_this_iter < 50) ||
812 (total_sent > nr_pfns*max_factor) )
813 {
814 DPRINTF("Start last iteration\n");
815 last_iter = 1;
817 if ( suspend_and_state( xc_handle, ioctxt, &op, &ctxt) )
818 {
819 xcio_error(ioctxt, "Domain appears not to have suspended: %lx",
820 op.u.getdomaininfo.flags);
821 goto out;
822 }
824 printf("SUSPEND flags %08lx shinfo %08lx eip %08lx esi %08lx\n",
825 op.u.getdomaininfo.flags, op.u.getdomaininfo.shared_info_frame,
826 ctxt.cpu_ctxt.eip, ctxt.cpu_ctxt.esi );
829 }
831 if ( xc_shadow_control( xc_handle, domid,
832 DOM0_SHADOW_CONTROL_OP_CLEAN,
833 to_send, nr_pfns, &stats ) != nr_pfns )
834 {
835 xcio_error(ioctxt, "Error flushing shadow PT");
836 goto out;
837 }
839 sent_last_iter = sent_this_iter;
841 print_stats( xc_handle, domid, sent_this_iter, &stats, 1);
843 }
846 } /* end of while 1 */
848 DPRINTF("All memory is saved\n");
850 /* Success! */
851 rc = 0;
853 /* Zero terminate */
854 if ( xcio_write(ioctxt, &rc, sizeof(int)) )
855 {
856 xcio_error(ioctxt, "Error when writing to state file (6)");
857 goto out;
858 }
860 /* Send through a list of all the PFNs that were not in map at the close */
861 {
862 unsigned int i,j;
863 unsigned int pfntab[1024];
865 for ( i = 0, j = 0; i < nr_pfns; i++ )
866 {
867 if ( ! is_mapped(live_pfn_to_mfn_table[i]) )
868 j++;
869 }
871 if ( xcio_write(ioctxt, &j, sizeof(unsigned int)) )
872 {
873 xcio_error(ioctxt, "Error when writing to state file (6a)");
874 goto out;
875 }
877 for ( i = 0, j = 0; i < nr_pfns; )
878 {
879 if ( ! is_mapped(live_pfn_to_mfn_table[i]) )
880 {
881 pfntab[j++] = i;
882 }
883 i++;
884 if ( j == 1024 || i == nr_pfns )
885 {
886 if ( xcio_write(ioctxt, &pfntab, sizeof(unsigned long)*j) )
887 {
888 xcio_error(ioctxt, "Error when writing to state file (6b)");
889 goto out;
890 }
891 j = 0;
892 }
893 }
894 }
896 /* Map the suspend-record MFN to pin it. The page must be owned by
897 domid for this to succeed. */
898 p_srec = xc_map_foreign_range(xc_handle, domid,
899 sizeof(*p_srec), PROT_READ,
900 ctxt.cpu_ctxt.esi);
901 if (!p_srec){
902 xcio_error(ioctxt, "Couldn't map suspend record");
903 goto out;
904 }
906 if (nr_pfns != p_srec->nr_pfns )
907 {
908 xcio_error(ioctxt, "Suspend record nr_pfns unexpected (%ld != %ld)",
909 p_srec->nr_pfns, nr_pfns);
910 goto out;
911 }
913 /* Canonicalise the suspend-record frame number. */
914 if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ){
915 xcio_error(ioctxt, "Suspend record is not in range of pseudophys map");
916 goto out;
917 }
919 /* Canonicalise each GDT frame number. */
920 for ( i = 0; i < ctxt.gdt_ents; i += 512 ) {
921 if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) {
922 xcio_error(ioctxt, "GDT frame is not in range of pseudophys map");
923 goto out;
924 }
925 }
927 /* Canonicalise the page table base pointer. */
928 if ( !MFN_IS_IN_PSEUDOPHYS_MAP(ctxt.pt_base >> PAGE_SHIFT) ) {
929 xcio_error(ioctxt, "PT base is not in range of pseudophys map");
930 goto out;
931 }
932 ctxt.pt_base = live_mfn_to_pfn_table[ctxt.pt_base >> PAGE_SHIFT] <<
933 PAGE_SHIFT;
935 if ( xcio_write(ioctxt, &ctxt, sizeof(ctxt)) ||
936 xcio_write(ioctxt, live_shinfo, PAGE_SIZE) ) {
937 xcio_error(ioctxt, "Error when writing to state file (1)");
938 goto out;
939 }
941 out:
943 if ( live_shinfo ) munmap(live_shinfo, PAGE_SIZE);
944 if ( p_srec ) munmap(p_srec, sizeof(*p_srec));
945 if ( live_pfn_to_mfn_frame_list ) munmap(live_pfn_to_mfn_frame_list, PAGE_SIZE);
946 if ( live_pfn_to_mfn_table ) munmap(live_pfn_to_mfn_table, nr_pfns*4 );
947 if ( live_mfn_to_pfn_table ) munmap(live_mfn_to_pfn_table, PAGE_SIZE*1024 );
949 if ( pfn_type != NULL ) free(pfn_type);
950 DPRINTF("Save exit rc=%d\n",rc);
951 return !!rc;
953 }