debuggers.hg

view tools/libxc/xc_linux_build.c @ 6709:7d0fb56b4a91

merge?
author cl349@firebug.cl.cam.ac.uk
date Wed Sep 07 19:01:31 2005 +0000 (2005-09-07)
parents 549f4256ab3c 8db9c5873b9b
children f0d728001aaa 2704a88c3295
line source
1 /******************************************************************************
2 * xc_linux_build.c
3 */
5 #include "xg_private.h"
6 #include <xenctrl.h>
8 #if defined(__i386__)
9 #define ELFSIZE 32
10 #endif
12 #if defined(__x86_64__) || defined(__ia64__)
13 #define ELFSIZE 64
14 #endif
17 #include "xc_elf.h"
18 #include "xc_aout9.h"
19 #include <stdlib.h>
20 #include <unistd.h>
21 #include <zlib.h>
23 #if defined(__i386__)
24 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
25 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
26 #define L3_PROT (_PAGE_PRESENT)
27 #endif
29 #if defined(__x86_64__)
30 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_USER)
31 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
32 #define L3_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
33 #define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
34 #endif
37 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
38 #define round_pgdown(_p) ((_p)&PAGE_MASK)
40 #ifdef __ia64__
41 #define probe_aout9(image,image_size,load_funcs) 1
42 #endif
44 static int probeimageformat(char *image,
45 unsigned long image_size,
46 struct load_funcs *load_funcs)
47 {
48 if ( probe_elf(image, image_size, load_funcs) &&
49 probe_bin(image, image_size, load_funcs) &&
50 probe_aout9(image, image_size, load_funcs) )
51 {
52 ERROR( "Unrecognized image format" );
53 return -EINVAL;
54 }
56 return 0;
57 }
59 #define alloc_pt(ltab, vltab) \
60 ltab = page_array[ppt_alloc++] << PAGE_SHIFT; \
61 if (vltab != NULL) { \
62 munmap(vltab, PAGE_SIZE); \
63 } \
64 if ((vltab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, \
65 PROT_READ|PROT_WRITE, \
66 ltab >> PAGE_SHIFT)) == NULL) { \
67 goto error_out; \
68 } \
69 memset(vltab, 0, PAGE_SIZE);
71 #if defined(__i386__)
73 static int setup_pg_tables(int xc_handle, u32 dom,
74 vcpu_guest_context_t *ctxt,
75 unsigned long dsi_v_start,
76 unsigned long v_end,
77 unsigned long *page_array,
78 unsigned long vpt_start,
79 unsigned long vpt_end)
80 {
81 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
82 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
83 unsigned long l1tab = 0;
84 unsigned long l2tab = 0;
85 unsigned long ppt_alloc;
86 unsigned long count;
88 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
89 alloc_pt(l2tab, vl2tab);
90 vl2e = &vl2tab[l2_table_offset(dsi_v_start)];
91 ctxt->ctrlreg[3] = l2tab;
93 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++ )
94 {
95 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
96 {
97 alloc_pt(l1tab, vl1tab);
98 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
99 *vl2e++ = l1tab | L2_PROT;
100 }
102 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
103 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
104 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
105 *vl1e &= ~_PAGE_RW;
106 vl1e++;
107 }
108 munmap(vl1tab, PAGE_SIZE);
109 munmap(vl2tab, PAGE_SIZE);
110 return 0;
112 error_out:
113 if (vl1tab)
114 munmap(vl1tab, PAGE_SIZE);
115 if (vl2tab)
116 munmap(vl2tab, PAGE_SIZE);
117 return -1;
118 }
120 static int setup_pg_tables_pae(int xc_handle, u32 dom,
121 vcpu_guest_context_t *ctxt,
122 unsigned long dsi_v_start,
123 unsigned long v_end,
124 unsigned long *page_array,
125 unsigned long vpt_start,
126 unsigned long vpt_end)
127 {
128 l1_pgentry_64_t *vl1tab=NULL, *vl1e=NULL;
129 l2_pgentry_64_t *vl2tab=NULL, *vl2e=NULL;
130 l3_pgentry_64_t *vl3tab=NULL, *vl3e=NULL;
131 unsigned long l1tab = 0;
132 unsigned long l2tab = 0;
133 unsigned long l3tab = 0;
134 unsigned long ppt_alloc;
135 unsigned long count;
137 /* First allocate page for page dir. */
138 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
139 alloc_pt(l3tab, vl3tab);
140 vl3e = &vl3tab[l3_table_offset_pae(dsi_v_start)];
141 ctxt->ctrlreg[3] = l3tab;
143 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
144 {
145 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
146 {
147 alloc_pt(l1tab, vl1tab);
149 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
150 {
151 alloc_pt(l2tab, vl2tab);
152 vl2e = &vl2tab[l2_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
153 *vl3e = l2tab | L3_PROT;
154 vl3e++;
155 }
156 vl1e = &vl1tab[l1_table_offset_pae(dsi_v_start + (count<<PAGE_SHIFT))];
157 *vl2e = l1tab | L2_PROT;
158 vl2e++;
159 }
161 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
162 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
163 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
164 {
165 *vl1e &= ~_PAGE_RW;
166 }
167 vl1e++;
168 }
170 munmap(vl1tab, PAGE_SIZE);
171 munmap(vl2tab, PAGE_SIZE);
172 munmap(vl3tab, PAGE_SIZE);
173 return 0;
175 error_out:
176 if (vl1tab)
177 munmap(vl1tab, PAGE_SIZE);
178 if (vl2tab)
179 munmap(vl2tab, PAGE_SIZE);
180 if (vl3tab)
181 munmap(vl3tab, PAGE_SIZE);
182 return -1;
183 }
185 #endif
187 #if defined(__x86_64__)
189 static int setup_pg_tables_64(int xc_handle, u32 dom,
190 vcpu_guest_context_t *ctxt,
191 unsigned long dsi_v_start,
192 unsigned long v_end,
193 unsigned long *page_array,
194 unsigned long vpt_start,
195 unsigned long vpt_end)
196 {
197 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
198 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
199 l3_pgentry_t *vl3tab=NULL, *vl3e=NULL;
200 l4_pgentry_t *vl4tab=NULL, *vl4e=NULL;
201 unsigned long l2tab = 0;
202 unsigned long l1tab = 0;
203 unsigned long l3tab = 0;
204 unsigned long l4tab = 0;
205 unsigned long ppt_alloc;
206 unsigned long count;
208 /* First allocate page for page dir. */
209 ppt_alloc = (vpt_start - dsi_v_start) >> PAGE_SHIFT;
210 alloc_pt(l4tab, vl4tab);
211 vl4e = &vl4tab[l4_table_offset(dsi_v_start)];
212 ctxt->ctrlreg[3] = l4tab;
214 for ( count = 0; count < ((v_end-dsi_v_start)>>PAGE_SHIFT); count++)
215 {
216 if ( !((unsigned long)vl1e & (PAGE_SIZE-1)) )
217 {
218 alloc_pt(l1tab, vl1tab);
220 if ( !((unsigned long)vl2e & (PAGE_SIZE-1)) )
221 {
222 alloc_pt(l2tab, vl2tab);
223 if ( !((unsigned long)vl3e & (PAGE_SIZE-1)) )
224 {
225 alloc_pt(l3tab, vl3tab);
226 vl3e = &vl3tab[l3_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
227 *vl4e = l3tab | L4_PROT;
228 vl4e++;
229 }
230 vl2e = &vl2tab[l2_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
231 *vl3e = l2tab | L3_PROT;
232 vl3e++;
233 }
234 vl1e = &vl1tab[l1_table_offset(dsi_v_start + (count<<PAGE_SHIFT))];
235 *vl2e = l1tab | L2_PROT;
236 vl2e++;
237 }
239 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
240 if ( (count >= ((vpt_start-dsi_v_start)>>PAGE_SHIFT)) &&
241 (count < ((vpt_end -dsi_v_start)>>PAGE_SHIFT)) )
242 {
243 *vl1e &= ~_PAGE_RW;
244 }
245 vl1e++;
246 }
248 munmap(vl1tab, PAGE_SIZE);
249 munmap(vl2tab, PAGE_SIZE);
250 munmap(vl3tab, PAGE_SIZE);
251 munmap(vl4tab, PAGE_SIZE);
252 return 0;
254 error_out:
255 if (vl1tab)
256 munmap(vl1tab, PAGE_SIZE);
257 if (vl2tab)
258 munmap(vl2tab, PAGE_SIZE);
259 if (vl3tab)
260 munmap(vl3tab, PAGE_SIZE);
261 if (vl4tab)
262 munmap(vl4tab, PAGE_SIZE);
263 return -1;
264 }
265 #endif
267 #ifdef __ia64__
268 #include <asm/fpu.h> /* for FPSR_DEFAULT */
269 static int setup_guest(int xc_handle,
270 u32 dom,
271 char *image, unsigned long image_size,
272 gzFile initrd_gfd, unsigned long initrd_len,
273 unsigned long nr_pages,
274 unsigned long *pvsi, unsigned long *pvke,
275 unsigned long *pvss, vcpu_guest_context_t *ctxt,
276 const char *cmdline,
277 unsigned long shared_info_frame,
278 unsigned long flags,
279 unsigned int vcpus,
280 unsigned int store_evtchn, unsigned long *store_mfn)
281 {
282 unsigned long *page_array = NULL;
283 struct load_funcs load_funcs;
284 struct domain_setup_info dsi;
285 unsigned long start_page;
286 int rc;
288 rc = probeimageformat(image, image_size, &load_funcs);
289 if ( rc != 0 )
290 goto error_out;
292 memset(&dsi, 0, sizeof(struct domain_setup_info));
294 rc = (load_funcs.parseimage)(image, image_size, &dsi);
295 if ( rc != 0 )
296 goto error_out;
298 dsi.v_start = round_pgdown(dsi.v_start);
299 dsi.v_end = round_pgup(dsi.v_end);
301 start_page = dsi.v_start >> PAGE_SHIFT;
302 nr_pages = (dsi.v_end - dsi.v_start) >> PAGE_SHIFT;
303 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
304 {
305 PERROR("Could not allocate memory");
306 goto error_out;
307 }
309 if ( xc_ia64_get_pfn_list(xc_handle, dom, page_array, start_page, nr_pages) != nr_pages )
310 {
311 PERROR("Could not get the page frame list");
312 goto error_out;
313 }
315 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
316 &dsi);
318 *pvke = dsi.v_kernentry;
319 return 0;
321 error_out:
322 free(page_array);
323 return -1;
324 }
325 #else /* x86 */
326 static int setup_guest(int xc_handle,
327 u32 dom,
328 char *image, unsigned long image_size,
329 gzFile initrd_gfd, unsigned long initrd_len,
330 unsigned long nr_pages,
331 unsigned long *pvsi, unsigned long *pvke,
332 unsigned long *pvss, vcpu_guest_context_t *ctxt,
333 const char *cmdline,
334 unsigned long shared_info_frame,
335 unsigned long flags,
336 unsigned int vcpus,
337 unsigned int store_evtchn, unsigned long *store_mfn,
338 unsigned int console_evtchn, unsigned long *console_mfn)
339 {
340 unsigned long *page_array = NULL;
341 unsigned long count, i;
342 start_info_t *start_info;
343 shared_info_t *shared_info;
344 xc_mmu_t *mmu = NULL;
345 int rc;
347 unsigned long nr_pt_pages;
348 unsigned long physmap_pfn;
349 unsigned long *physmap, *physmap_e;
351 struct load_funcs load_funcs;
352 struct domain_setup_info dsi;
353 unsigned long vinitrd_start;
354 unsigned long vinitrd_end;
355 unsigned long vphysmap_start;
356 unsigned long vphysmap_end;
357 unsigned long vstartinfo_start;
358 unsigned long vstartinfo_end;
359 unsigned long vstoreinfo_start;
360 unsigned long vstoreinfo_end;
361 unsigned long vconsole_start;
362 unsigned long vconsole_end;
363 unsigned long vstack_start;
364 unsigned long vstack_end;
365 unsigned long vpt_start;
366 unsigned long vpt_end;
367 unsigned long v_end;
369 rc = probeimageformat(image, image_size, &load_funcs);
370 if ( rc != 0 )
371 goto error_out;
373 memset(&dsi, 0, sizeof(struct domain_setup_info));
375 rc = (load_funcs.parseimage)(image, image_size, &dsi);
376 if ( rc != 0 )
377 goto error_out;
379 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
380 {
381 PERROR("Guest OS must load to a page boundary.\n");
382 goto error_out;
383 }
385 /*
386 * Why do we need this? The number of page-table frames depends on the
387 * size of the bootstrap address space. But the size of the address space
388 * depends on the number of page-table frames (since each one is mapped
389 * read-only). We have a pair of simultaneous equations in two unknowns,
390 * which we solve by exhaustive search.
391 */
392 vinitrd_start = round_pgup(dsi.v_end);
393 vinitrd_end = vinitrd_start + initrd_len;
394 vphysmap_start = round_pgup(vinitrd_end);
395 vphysmap_end = vphysmap_start + (nr_pages * sizeof(unsigned long));
396 vstartinfo_start = round_pgup(vphysmap_end);
397 vstartinfo_end = vstartinfo_start + PAGE_SIZE;
398 vstoreinfo_start = vstartinfo_end;
399 vstoreinfo_end = vstoreinfo_start + PAGE_SIZE;
400 vconsole_start = vstoreinfo_end;
401 vconsole_end = vconsole_start + PAGE_SIZE;
402 vpt_start = vconsole_end;
404 for ( nr_pt_pages = 2; ; nr_pt_pages++ )
405 {
406 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
407 vstack_start = vpt_end;
408 vstack_end = vstack_start + PAGE_SIZE;
409 v_end = (vstack_end + (1UL<<22)-1) & ~((1UL<<22)-1);
410 if ( (v_end - vstack_end) < (512UL << 10) )
411 v_end += 1UL << 22; /* Add extra 4MB to get >= 512kB padding. */
412 #if defined(__i386__)
413 if (dsi.pae_kernel) {
414 /* FIXME: assumes one L2 pgtable @ 0xc0000000 */
415 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT_PAE)-1)) >>
416 L2_PAGETABLE_SHIFT_PAE) + 2) <= nr_pt_pages )
417 break;
418 } else {
419 if ( (((v_end - dsi.v_start + ((1<<L2_PAGETABLE_SHIFT)-1)) >>
420 L2_PAGETABLE_SHIFT) + 1) <= nr_pt_pages )
421 break;
422 }
423 #endif
424 #if defined(__x86_64__)
425 #define NR(_l,_h,_s) \
426 (((((_h) + ((1UL<<(_s))-1)) & ~((1UL<<(_s))-1)) - \
427 ((_l) & ~((1UL<<(_s))-1))) >> (_s))
428 if ( (1 + /* # L4 */
429 NR(dsi.v_start, v_end, L4_PAGETABLE_SHIFT) + /* # L3 */
430 NR(dsi.v_start, v_end, L3_PAGETABLE_SHIFT) + /* # L2 */
431 NR(dsi.v_start, v_end, L2_PAGETABLE_SHIFT)) /* # L1 */
432 <= nr_pt_pages )
433 break;
434 #endif
435 }
437 #define _p(a) ((void *) (a))
439 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
440 " Loaded kernel: %p->%p\n"
441 " Init. ramdisk: %p->%p\n"
442 " Phys-Mach map: %p->%p\n"
443 " Start info: %p->%p\n"
444 " Store page: %p->%p\n"
445 " Console page: %p->%p\n"
446 " Page tables: %p->%p\n"
447 " Boot stack: %p->%p\n"
448 " TOTAL: %p->%p\n",
449 _p(dsi.v_kernstart), _p(dsi.v_kernend),
450 _p(vinitrd_start), _p(vinitrd_end),
451 _p(vphysmap_start), _p(vphysmap_end),
452 _p(vstartinfo_start), _p(vstartinfo_end),
453 _p(vstoreinfo_start), _p(vstoreinfo_end),
454 _p(vconsole_start), _p(vconsole_end),
455 _p(vpt_start), _p(vpt_end),
456 _p(vstack_start), _p(vstack_end),
457 _p(dsi.v_start), _p(v_end));
458 printf(" ENTRY ADDRESS: %p\n", _p(dsi.v_kernentry));
460 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
461 {
462 printf("Initial guest OS requires too much space\n"
463 "(%luMB is greater than %luMB limit)\n",
464 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
465 goto error_out;
466 }
468 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
469 {
470 PERROR("Could not allocate memory");
471 goto error_out;
472 }
474 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
475 {
476 PERROR("Could not get the page frame list");
477 goto error_out;
478 }
480 (load_funcs.loadimage)(image, image_size, xc_handle, dom, page_array,
481 &dsi);
483 /* Load the initial ramdisk image. */
484 if ( initrd_len != 0 )
485 {
486 for ( i = (vinitrd_start - dsi.v_start);
487 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
488 {
489 char page[PAGE_SIZE];
490 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
491 {
492 PERROR("Error reading initrd image, could not");
493 goto error_out;
494 }
495 xc_copy_to_domain_page(xc_handle, dom,
496 page_array[i>>PAGE_SHIFT], page);
497 }
498 }
500 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
501 goto error_out;
503 /* setup page tables */
504 #if defined(__i386__)
505 if (dsi.pae_kernel)
506 rc = setup_pg_tables_pae(xc_handle, dom, ctxt,
507 dsi.v_start, v_end,
508 page_array, vpt_start, vpt_end);
509 else {
510 rc = setup_pg_tables(xc_handle, dom, ctxt,
511 dsi.v_start, v_end,
512 page_array, vpt_start, vpt_end);
513 }
514 #endif
515 #if defined(__x86_64__)
516 rc = setup_pg_tables_64(xc_handle, dom, ctxt,
517 dsi.v_start, v_end,
518 page_array, vpt_start, vpt_end);
519 #endif
520 if (0 != rc)
521 goto error_out;
523 /* Write the phys->machine and machine->phys table entries. */
524 physmap_pfn = (vphysmap_start - dsi.v_start) >> PAGE_SHIFT;
525 physmap = physmap_e = xc_map_foreign_range(
526 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
527 page_array[physmap_pfn++]);
528 for ( count = 0; count < nr_pages; count++ )
529 {
530 if ( xc_add_mmu_update(xc_handle, mmu,
531 (page_array[count] << PAGE_SHIFT) |
532 MMU_MACHPHYS_UPDATE, count) )
533 {
534 munmap(physmap, PAGE_SIZE);
535 goto error_out;
536 }
537 *physmap_e++ = page_array[count];
538 if ( ((unsigned long)physmap_e & (PAGE_SIZE-1)) == 0 )
539 {
540 munmap(physmap, PAGE_SIZE);
541 physmap = physmap_e = xc_map_foreign_range(
542 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
543 page_array[physmap_pfn++]);
544 }
545 }
546 munmap(physmap, PAGE_SIZE);
548 #if defined(__i386__)
549 /*
550 * Pin down l2tab addr as page dir page - causes hypervisor to provide
551 * correct protection for the page
552 */
553 if (dsi.pae_kernel) {
554 if ( pin_table(xc_handle, MMUEXT_PIN_L3_TABLE,
555 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
556 goto error_out;
557 } else {
558 if ( pin_table(xc_handle, MMUEXT_PIN_L2_TABLE,
559 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
560 goto error_out;
561 }
562 #endif
564 #if defined(__x86_64__)
565 /*
566 * Pin down l4tab addr as page dir page - causes hypervisor to provide
567 * correct protection for the page
568 */
569 if ( pin_table(xc_handle, MMUEXT_PIN_L4_TABLE,
570 ctxt->ctrlreg[3] >> PAGE_SHIFT, dom) )
571 goto error_out;
572 #endif
574 *store_mfn = page_array[(vstoreinfo_start-dsi.v_start) >> PAGE_SHIFT];
575 *console_mfn = page_array[(vconsole_start-dsi.v_start) >> PAGE_SHIFT];
578 start_info = xc_map_foreign_range(
579 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
580 page_array[(vstartinfo_start-dsi.v_start)>>PAGE_SHIFT]);
581 memset(start_info, 0, sizeof(*start_info));
582 start_info->nr_pages = nr_pages;
583 start_info->shared_info = shared_info_frame << PAGE_SHIFT;
584 start_info->flags = flags;
585 start_info->pt_base = vpt_start;
586 start_info->nr_pt_frames = nr_pt_pages;
587 start_info->mfn_list = vphysmap_start;
588 start_info->store_mfn = *store_mfn;
589 start_info->store_evtchn = store_evtchn;
590 start_info->console_mfn = *console_mfn;
591 start_info->console_evtchn = console_evtchn;
592 if ( initrd_len != 0 )
593 {
594 start_info->mod_start = vinitrd_start;
595 start_info->mod_len = initrd_len;
596 }
597 strncpy((char *)start_info->cmd_line, cmdline, MAX_GUEST_CMDLINE);
598 start_info->cmd_line[MAX_GUEST_CMDLINE-1] = '\0';
599 munmap(start_info, PAGE_SIZE);
601 /* shared_info page starts its life empty. */
602 shared_info = xc_map_foreign_range(
603 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
604 memset(shared_info, 0, sizeof(shared_info_t));
605 /* Mask all upcalls... */
606 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
607 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
609 shared_info->n_vcpu = vcpus;
610 printf(" VCPUS: %d\n", shared_info->n_vcpu);
612 munmap(shared_info, PAGE_SIZE);
614 /* Send the page update requests down to the hypervisor. */
615 if ( xc_finish_mmu_updates(xc_handle, mmu) )
616 goto error_out;
618 free(mmu);
619 free(page_array);
621 *pvsi = vstartinfo_start;
622 *pvss = vstack_start;
623 *pvke = dsi.v_kernentry;
625 return 0;
627 error_out:
628 free(mmu);
629 free(page_array);
630 return -1;
631 }
632 #endif
634 int xc_linux_build(int xc_handle,
635 u32 domid,
636 const char *image_name,
637 const char *ramdisk_name,
638 const char *cmdline,
639 unsigned long flags,
640 unsigned int vcpus,
641 unsigned int store_evtchn,
642 unsigned long *store_mfn,
643 unsigned int console_evtchn,
644 unsigned long *console_mfn)
645 {
646 dom0_op_t launch_op, op;
647 int initrd_fd = -1;
648 gzFile initrd_gfd = NULL;
649 int rc, i;
650 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
651 unsigned long nr_pages;
652 char *image = NULL;
653 unsigned long image_size, initrd_size=0;
654 unsigned long vstartinfo_start, vkern_entry, vstack_start;
656 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
657 {
658 PERROR("Could not find total pages for domain");
659 goto error_out;
660 }
662 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
663 goto error_out;
665 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
666 {
667 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
668 {
669 PERROR("Could not open the initial ramdisk image");
670 goto error_out;
671 }
673 initrd_size = xc_get_filesz(initrd_fd);
675 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
676 {
677 PERROR("Could not allocate decompression state for initrd");
678 goto error_out;
679 }
680 }
682 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
683 {
684 PERROR("xc_linux_build: ctxt mlock failed");
685 return 1;
686 }
688 op.cmd = DOM0_GETDOMAININFO;
689 op.u.getdomaininfo.domain = (domid_t)domid;
690 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
691 ((u16)op.u.getdomaininfo.domain != domid) )
692 {
693 PERROR("Could not get info on domain");
694 goto error_out;
695 }
697 if ( xc_domain_get_vcpu_context(xc_handle, domid, 0, ctxt) )
698 {
699 PERROR("Could not get vcpu context");
700 goto error_out;
701 }
703 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
704 #ifdef __ia64__
705 0 )
706 #else
707 (ctxt->ctrlreg[3] != 0) )
708 #endif
709 {
710 ERROR("Domain is already constructed");
711 goto error_out;
712 }
714 if ( setup_guest(xc_handle, domid, image, image_size,
715 initrd_gfd, initrd_size, nr_pages,
716 &vstartinfo_start, &vkern_entry,
717 &vstack_start, ctxt, cmdline,
718 op.u.getdomaininfo.shared_info_frame,
719 flags, vcpus,
720 store_evtchn, store_mfn,
721 console_evtchn, console_mfn) < 0 )
722 {
723 ERROR("Error constructing guest OS");
724 goto error_out;
725 }
727 if ( initrd_fd >= 0 )
728 close(initrd_fd);
729 if ( initrd_gfd )
730 gzclose(initrd_gfd);
731 free(image);
733 #ifdef __ia64__
734 /* based on new_thread in xen/arch/ia64/domain.c */
735 ctxt->regs.cr_ipsr = 0; /* all necessary bits filled by hypervisor */
736 ctxt->regs.cr_iip = vkern_entry;
737 ctxt->regs.cr_ifs = 1UL << 63;
738 ctxt->regs.ar_fpsr = FPSR_DEFAULT;
739 /* ctxt->regs.r28 = dom_fw_setup(); currently done by hypervisor, should move here */
740 ctxt->vcpu.privregs = 0;
741 ctxt->shared.flags = flags;
742 i = 0; /* silence unused variable warning */
743 #else /* x86 */
744 /*
745 * Initial register values:
746 * DS,ES,FS,GS = FLAT_KERNEL_DS
747 * CS:EIP = FLAT_KERNEL_CS:start_pc
748 * SS:ESP = FLAT_KERNEL_DS:start_stack
749 * ESI = start_info
750 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
751 * EFLAGS = IF | 2 (bit 1 is reserved and should always be 1)
752 */
753 ctxt->user_regs.ds = FLAT_KERNEL_DS;
754 ctxt->user_regs.es = FLAT_KERNEL_DS;
755 ctxt->user_regs.fs = FLAT_KERNEL_DS;
756 ctxt->user_regs.gs = FLAT_KERNEL_DS;
757 ctxt->user_regs.ss = FLAT_KERNEL_SS;
758 ctxt->user_regs.cs = FLAT_KERNEL_CS;
759 ctxt->user_regs.eip = vkern_entry;
760 ctxt->user_regs.esp = vstack_start + PAGE_SIZE;
761 ctxt->user_regs.esi = vstartinfo_start;
762 ctxt->user_regs.eflags = 1 << 9; /* Interrupt Enable */
764 /* FPU is set up to default initial state. */
765 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
767 /* Virtual IDT is empty at start-of-day. */
768 for ( i = 0; i < 256; i++ )
769 {
770 ctxt->trap_ctxt[i].vector = i;
771 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
772 }
774 /* No LDT. */
775 ctxt->ldt_ents = 0;
777 /* Use the default Xen-provided GDT. */
778 ctxt->gdt_ents = 0;
780 /* Ring 1 stack is the initial stack. */
781 ctxt->kernel_ss = FLAT_KERNEL_SS;
782 ctxt->kernel_sp = vstack_start + PAGE_SIZE;
784 /* No debugging. */
785 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
787 /* No callback handlers. */
788 #if defined(__i386__)
789 ctxt->event_callback_cs = FLAT_KERNEL_CS;
790 ctxt->event_callback_eip = 0;
791 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
792 ctxt->failsafe_callback_eip = 0;
793 #elif defined(__x86_64__)
794 ctxt->event_callback_eip = 0;
795 ctxt->failsafe_callback_eip = 0;
796 ctxt->syscall_callback_eip = 0;
797 #endif
798 #endif /* x86 */
800 memset( &launch_op, 0, sizeof(launch_op) );
802 launch_op.u.setdomaininfo.domain = (domid_t)domid;
803 launch_op.u.setdomaininfo.vcpu = 0;
804 launch_op.u.setdomaininfo.ctxt = ctxt;
806 launch_op.cmd = DOM0_SETDOMAININFO;
807 rc = xc_dom0_op(xc_handle, &launch_op);
809 return rc;
811 error_out:
812 if ( initrd_gfd != NULL )
813 gzclose(initrd_gfd);
814 else if ( initrd_fd >= 0 )
815 close(initrd_fd);
816 free(image);
818 return -1;
819 }