debuggers.hg

view tools/libxc/xc_hvm_build.c @ 10949:b33c08de3d98

[HVM] Add a concept of HVM parameters to the hypervisor.

Each HVM domain has a space of HVM parameters associated with it,
and these can be manipulated via a new hvm_op hypercall. This means
that the hypervisor no longer needs to parse the hvm_info table, so
remove that code.

Signed-off-by: Steven Smith <ssmith@xensource.com>
Signed-off-by: Keir Fraser <keir@xensource.com>
author kfraser@localhost.localdomain
date Thu Aug 03 13:53:33 2006 +0100 (2006-08-03)
parents 38943ca3c8cc
children 415614d3a1ee
line source
1 /******************************************************************************
2 * xc_hvm_build.c
3 */
5 #define ELFSIZE 32
6 #include <stddef.h>
7 #include <inttypes.h>
8 #include "xg_private.h"
9 #include "xc_private.h"
10 #include "xc_elf.h"
11 #include <stdlib.h>
12 #include <unistd.h>
13 #include <zlib.h>
14 #include <xen/hvm/hvm_info_table.h>
15 #include <xen/hvm/ioreq.h>
16 #include <xen/hvm/params.h>
18 #define HVM_LOADER_ENTR_ADDR 0x00100000
20 #define E820MAX 128
22 #define E820_RAM 1
23 #define E820_RESERVED 2
24 #define E820_ACPI 3
25 #define E820_NVS 4
26 #define E820_IO 16
27 #define E820_SHARED_PAGE 17
28 #define E820_XENSTORE 18
30 #define E820_MAP_PAGE 0x00090000
31 #define E820_MAP_NR_OFFSET 0x000001E8
32 #define E820_MAP_OFFSET 0x000002D0
34 struct e820entry {
35 uint64_t addr;
36 uint64_t size;
37 uint32_t type;
38 } __attribute__((packed));
40 static int
41 parseelfimage(
42 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
43 static int
44 loadelfimage(
45 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
46 struct domain_setup_info *dsi);
48 static void xc_set_hvm_param(int handle,
49 domid_t dom, int param, unsigned long value)
50 {
51 DECLARE_HYPERCALL;
52 xen_hvm_param_t arg;
53 int rc;
55 hypercall.op = __HYPERVISOR_hvm_op;
56 hypercall.arg[0] = HVMOP_set_param;
57 hypercall.arg[1] = (unsigned long)&arg;
58 arg.domid = dom;
59 arg.index = param;
60 arg.value = value;
61 if ( mlock(&arg, sizeof(arg)) != 0 )
62 {
63 PERROR("Could not lock memory for set parameter");
64 return;
65 }
66 rc = do_xen_hypercall(handle, &hypercall);
67 safe_munlock(&arg, sizeof(arg));
68 if (rc < 0)
69 PERROR("set HVM parameter failed (%d)", rc);
70 }
72 static void build_e820map(void *e820_page, unsigned long long mem_size)
73 {
74 struct e820entry *e820entry =
75 (struct e820entry *)(((unsigned char *)e820_page) + E820_MAP_OFFSET);
76 unsigned char nr_map = 0;
78 /* XXX: Doesn't work for > 4GB yet */
79 e820entry[nr_map].addr = 0x0;
80 e820entry[nr_map].size = 0x9F800;
81 e820entry[nr_map].type = E820_RAM;
82 nr_map++;
84 e820entry[nr_map].addr = 0x9F800;
85 e820entry[nr_map].size = 0x800;
86 e820entry[nr_map].type = E820_RESERVED;
87 nr_map++;
89 e820entry[nr_map].addr = 0xA0000;
90 e820entry[nr_map].size = 0x20000;
91 e820entry[nr_map].type = E820_IO;
92 nr_map++;
94 e820entry[nr_map].addr = 0xF0000;
95 e820entry[nr_map].size = 0x10000;
96 e820entry[nr_map].type = E820_RESERVED;
97 nr_map++;
99 #define STATIC_PAGES 2 /* for ioreq_t and store_mfn */
100 /* Most of the ram goes here */
101 e820entry[nr_map].addr = 0x100000;
102 e820entry[nr_map].size = mem_size - 0x100000 - STATIC_PAGES * PAGE_SIZE;
103 e820entry[nr_map].type = E820_RAM;
104 nr_map++;
106 /* Statically allocated special pages */
108 /* For xenstore */
109 e820entry[nr_map].addr = mem_size - 2 * PAGE_SIZE;
110 e820entry[nr_map].size = PAGE_SIZE;
111 e820entry[nr_map].type = E820_XENSTORE;
112 nr_map++;
114 /* Shared ioreq_t page */
115 e820entry[nr_map].addr = mem_size - PAGE_SIZE;
116 e820entry[nr_map].size = PAGE_SIZE;
117 e820entry[nr_map].type = E820_SHARED_PAGE;
118 nr_map++;
120 e820entry[nr_map].addr = mem_size;
121 e820entry[nr_map].size = 0x3 * PAGE_SIZE;
122 e820entry[nr_map].type = E820_NVS;
123 nr_map++;
125 e820entry[nr_map].addr = mem_size + 0x3 * PAGE_SIZE;
126 e820entry[nr_map].size = 0xA * PAGE_SIZE;
127 e820entry[nr_map].type = E820_ACPI;
128 nr_map++;
130 e820entry[nr_map].addr = 0xFEC00000;
131 e820entry[nr_map].size = 0x1400000;
132 e820entry[nr_map].type = E820_IO;
133 nr_map++;
135 *(((unsigned char *)e820_page) + E820_MAP_NR_OFFSET) = nr_map;
136 }
138 static void set_hvm_info_checksum(struct hvm_info_table *t)
139 {
140 uint8_t *ptr = (uint8_t *)t, sum = 0;
141 unsigned int i;
143 t->checksum = 0;
145 for (i = 0; i < t->length; i++)
146 sum += *ptr++;
148 t->checksum = -sum;
149 }
151 /*
152 * Use E820 reserved memory 0x9F800 to pass HVM info to hvmloader
153 * hvmloader will use this info to set BIOS accordingly
154 */
155 static int set_hvm_info(int xc_handle, uint32_t dom,
156 xen_pfn_t *pfn_list, unsigned int vcpus,
157 unsigned int pae, unsigned int acpi, unsigned int apic)
158 {
159 char *va_map;
160 struct hvm_info_table *va_hvm;
162 va_map = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
163 PROT_READ | PROT_WRITE,
164 pfn_list[HVM_INFO_PFN]);
166 if ( va_map == NULL )
167 return -1;
169 va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
170 memset(va_hvm, 0, sizeof(*va_hvm));
172 strncpy(va_hvm->signature, "HVM INFO", 8);
173 va_hvm->length = sizeof(struct hvm_info_table);
174 va_hvm->acpi_enabled = acpi;
175 va_hvm->apic_enabled = apic;
176 va_hvm->pae_enabled = pae;
177 va_hvm->nr_vcpus = vcpus;
179 set_hvm_info_checksum(va_hvm);
181 munmap(va_map, PAGE_SIZE);
183 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_APIC_ENABLED, apic);
184 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_PAE_ENABLED, pae);
186 return 0;
187 }
189 static int setup_guest(int xc_handle,
190 uint32_t dom, int memsize,
191 char *image, unsigned long image_size,
192 unsigned long nr_pages,
193 vcpu_guest_context_t *ctxt,
194 unsigned long shared_info_frame,
195 unsigned int vcpus,
196 unsigned int pae,
197 unsigned int acpi,
198 unsigned int apic,
199 unsigned int store_evtchn,
200 unsigned long *store_mfn)
201 {
202 xen_pfn_t *page_array = NULL;
203 unsigned long count, i;
204 unsigned long long ptr;
205 xc_mmu_t *mmu = NULL;
207 shared_info_t *shared_info;
208 void *e820_page;
210 struct domain_setup_info dsi;
211 uint64_t v_end;
213 unsigned long shared_page_frame = 0;
214 shared_iopage_t *sp;
216 memset(&dsi, 0, sizeof(struct domain_setup_info));
218 if ( (parseelfimage(image, image_size, &dsi)) != 0 )
219 goto error_out;
221 if ( (dsi.v_kernstart & (PAGE_SIZE - 1)) != 0 )
222 {
223 PERROR("Guest OS must load to a page boundary.\n");
224 goto error_out;
225 }
227 /* memsize is in megabytes */
228 v_end = (unsigned long long)memsize << 20;
230 IPRINTF("VIRTUAL MEMORY ARRANGEMENT:\n"
231 " Loaded HVM loader: %016"PRIx64"->%016"PRIx64"\n"
232 " TOTAL: %016"PRIx64"->%016"PRIx64"\n",
233 dsi.v_kernstart, dsi.v_kernend,
234 dsi.v_start, v_end);
235 IPRINTF(" ENTRY ADDRESS: %016"PRIx64"\n", dsi.v_kernentry);
237 if ( (v_end - dsi.v_start) > ((unsigned long long)nr_pages << PAGE_SHIFT) )
238 {
239 PERROR("Initial guest OS requires too much space: "
240 "(%lluMB is greater than %lluMB limit)\n",
241 (unsigned long long)(v_end - dsi.v_start) >> 20,
242 ((unsigned long long)nr_pages << PAGE_SHIFT) >> 20);
243 goto error_out;
244 }
246 if ( (page_array = malloc(nr_pages * sizeof(xen_pfn_t))) == NULL )
247 {
248 PERROR("Could not allocate memory.\n");
249 goto error_out;
250 }
252 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
253 {
254 PERROR("Could not get the page frame list.\n");
255 goto error_out;
256 }
258 loadelfimage(image, xc_handle, dom, page_array, &dsi);
260 if ( (mmu = xc_init_mmu_updates(xc_handle, dom)) == NULL )
261 goto error_out;
263 /* Write the machine->phys table entries. */
264 for ( count = 0; count < nr_pages; count++ )
265 {
266 ptr = (unsigned long long)page_array[count] << PAGE_SHIFT;
267 if ( xc_add_mmu_update(xc_handle, mmu,
268 ptr | MMU_MACHPHYS_UPDATE, count) )
269 goto error_out;
270 }
272 if ( set_hvm_info(xc_handle, dom, page_array, vcpus, pae, acpi, apic) )
273 {
274 ERROR("Couldn't set hvm info for HVM guest.\n");
275 goto error_out;
276 }
278 if ( (e820_page = xc_map_foreign_range(
279 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
280 page_array[E820_MAP_PAGE >> PAGE_SHIFT])) == 0 )
281 goto error_out;
282 memset(e820_page, 0, PAGE_SIZE);
283 build_e820map(e820_page, v_end);
284 munmap(e820_page, PAGE_SIZE);
286 /* shared_info page starts its life empty. */
287 if ( (shared_info = xc_map_foreign_range(
288 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
289 shared_info_frame)) == 0 )
290 goto error_out;
291 memset(shared_info, 0, sizeof(shared_info_t));
292 /* Mask all upcalls... */
293 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
294 shared_info->vcpu_info[i].evtchn_upcall_mask = 1;
295 munmap(shared_info, PAGE_SIZE);
297 /* Populate the event channel port in the shared page */
298 shared_page_frame = page_array[(v_end >> PAGE_SHIFT) - 1];
299 if ( (sp = (shared_iopage_t *) xc_map_foreign_range(
300 xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
301 shared_page_frame)) == 0 )
302 goto error_out;
303 memset(sp, 0, PAGE_SIZE);
305 /* FIXME: how about if we overflow the page here? */
306 for ( i = 0; i < vcpus; i++ ) {
307 unsigned int vp_eport;
309 vp_eport = xc_evtchn_alloc_unbound(xc_handle, dom, 0);
310 if ( vp_eport < 0 ) {
311 PERROR("Couldn't get unbound port from VMX guest.\n");
312 goto error_out;
313 }
314 sp->vcpu_iodata[i].vp_eport = vp_eport;
315 }
317 munmap(sp, PAGE_SIZE);
319 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN, (v_end >> PAGE_SHIFT) - 2);
320 xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_EVTCHN, store_evtchn);
322 *store_mfn = page_array[(v_end >> PAGE_SHIFT) - 2];
323 if ( xc_clear_domain_page(xc_handle, dom, *store_mfn) )
324 goto error_out;
326 /* Send the page update requests down to the hypervisor. */
327 if ( xc_finish_mmu_updates(xc_handle, mmu) )
328 goto error_out;
330 free(mmu);
331 free(page_array);
333 /*
334 * Initial register values:
335 */
336 ctxt->user_regs.eip = dsi.v_kernentry;
338 return 0;
340 error_out:
341 free(mmu);
342 free(page_array);
343 return -1;
344 }
346 static int xc_hvm_build_internal(int xc_handle,
347 uint32_t domid,
348 int memsize,
349 char *image,
350 unsigned long image_size,
351 unsigned int vcpus,
352 unsigned int pae,
353 unsigned int acpi,
354 unsigned int apic,
355 unsigned int store_evtchn,
356 unsigned long *store_mfn)
357 {
358 dom0_op_t launch_op, op;
359 int rc, i;
360 vcpu_guest_context_t st_ctxt, *ctxt = &st_ctxt;
361 unsigned long nr_pages;
362 xen_capabilities_info_t xen_caps;
364 if ( (image == NULL) || (image_size == 0) )
365 {
366 ERROR("Image required");
367 goto error_out;
368 }
370 if ( (rc = xc_version(xc_handle, XENVER_capabilities, &xen_caps)) != 0 )
371 {
372 PERROR("Failed to get xen version info");
373 goto error_out;
374 }
376 if ( !strstr(xen_caps, "hvm") )
377 {
378 PERROR("CPU doesn't support HVM extensions or "
379 "the extensions are not enabled");
380 goto error_out;
381 }
383 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
384 {
385 PERROR("Could not find total pages for domain");
386 goto error_out;
387 }
389 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
390 {
391 PERROR("%s: ctxt mlock failed", __func__);
392 return 1;
393 }
395 op.cmd = DOM0_GETDOMAININFO;
396 op.u.getdomaininfo.domain = (domid_t)domid;
397 if ( (xc_dom0_op(xc_handle, &op) < 0) ||
398 ((uint16_t)op.u.getdomaininfo.domain != domid) )
399 {
400 PERROR("Could not get info on domain");
401 goto error_out;
402 }
404 memset(ctxt, 0, sizeof(*ctxt));
406 ctxt->flags = VGCF_HVM_GUEST;
407 if ( setup_guest(xc_handle, domid, memsize, image, image_size, nr_pages,
408 ctxt, op.u.getdomaininfo.shared_info_frame,
409 vcpus, pae, acpi, apic, store_evtchn, store_mfn) < 0)
410 {
411 ERROR("Error constructing guest OS");
412 goto error_out;
413 }
415 /* FPU is set up to default initial state. */
416 memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
418 /* Virtual IDT is empty at start-of-day. */
419 for ( i = 0; i < 256; i++ )
420 {
421 ctxt->trap_ctxt[i].vector = i;
422 ctxt->trap_ctxt[i].cs = FLAT_KERNEL_CS;
423 }
425 /* No LDT. */
426 ctxt->ldt_ents = 0;
428 /* Use the default Xen-provided GDT. */
429 ctxt->gdt_ents = 0;
431 /* No debugging. */
432 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
434 /* No callback handlers. */
435 #if defined(__i386__)
436 ctxt->event_callback_cs = FLAT_KERNEL_CS;
437 ctxt->event_callback_eip = 0;
438 ctxt->failsafe_callback_cs = FLAT_KERNEL_CS;
439 ctxt->failsafe_callback_eip = 0;
440 #elif defined(__x86_64__)
441 ctxt->event_callback_eip = 0;
442 ctxt->failsafe_callback_eip = 0;
443 ctxt->syscall_callback_eip = 0;
444 #endif
446 memset( &launch_op, 0, sizeof(launch_op) );
448 launch_op.u.setvcpucontext.domain = (domid_t)domid;
449 launch_op.u.setvcpucontext.vcpu = 0;
450 set_xen_guest_handle(launch_op.u.setvcpucontext.ctxt, ctxt);
452 launch_op.cmd = DOM0_SETVCPUCONTEXT;
453 rc = xc_dom0_op(xc_handle, &launch_op);
455 return rc;
457 error_out:
458 return -1;
459 }
461 static inline int is_loadable_phdr(Elf32_Phdr *phdr)
462 {
463 return ((phdr->p_type == PT_LOAD) &&
464 ((phdr->p_flags & (PF_W|PF_X)) != 0));
465 }
467 static int parseelfimage(char *elfbase,
468 unsigned long elfsize,
469 struct domain_setup_info *dsi)
470 {
471 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
472 Elf32_Phdr *phdr;
473 Elf32_Shdr *shdr;
474 unsigned long kernstart = ~0UL, kernend=0UL;
475 char *shstrtab;
476 int h;
478 if ( !IS_ELF(*ehdr) )
479 {
480 ERROR("Kernel image does not have an ELF header.");
481 return -EINVAL;
482 }
484 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
485 {
486 ERROR("ELF program headers extend beyond end of image.");
487 return -EINVAL;
488 }
490 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
491 {
492 ERROR("ELF section headers extend beyond end of image.");
493 return -EINVAL;
494 }
496 /* Find the section-header strings table. */
497 if ( ehdr->e_shstrndx == SHN_UNDEF )
498 {
499 ERROR("ELF image has no section-header strings table (shstrtab).");
500 return -EINVAL;
501 }
502 shdr = (Elf32_Shdr *)(elfbase + ehdr->e_shoff +
503 (ehdr->e_shstrndx*ehdr->e_shentsize));
504 shstrtab = elfbase + shdr->sh_offset;
506 for ( h = 0; h < ehdr->e_phnum; h++ )
507 {
508 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
509 if ( !is_loadable_phdr(phdr) )
510 continue;
511 if ( phdr->p_paddr < kernstart )
512 kernstart = phdr->p_paddr;
513 if ( (phdr->p_paddr + phdr->p_memsz) > kernend )
514 kernend = phdr->p_paddr + phdr->p_memsz;
515 }
517 if ( (kernstart > kernend) ||
518 (ehdr->e_entry < kernstart) ||
519 (ehdr->e_entry > kernend) )
520 {
521 ERROR("Malformed ELF image.");
522 return -EINVAL;
523 }
525 dsi->v_start = 0x00000000;
527 dsi->v_kernstart = kernstart;
528 dsi->v_kernend = kernend;
529 dsi->v_kernentry = HVM_LOADER_ENTR_ADDR;
531 dsi->v_end = dsi->v_kernend;
533 return 0;
534 }
536 static int
537 loadelfimage(
538 char *elfbase, int xch, uint32_t dom, unsigned long *parray,
539 struct domain_setup_info *dsi)
540 {
541 Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfbase;
542 Elf32_Phdr *phdr;
543 int h;
545 char *va;
546 unsigned long pa, done, chunksz;
548 for ( h = 0; h < ehdr->e_phnum; h++ )
549 {
550 phdr = (Elf32_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
551 if ( !is_loadable_phdr(phdr) )
552 continue;
554 for ( done = 0; done < phdr->p_filesz; done += chunksz )
555 {
556 pa = (phdr->p_paddr + done) - dsi->v_start;
557 if ((va = xc_map_foreign_range(
558 xch, dom, PAGE_SIZE, PROT_WRITE,
559 parray[pa >> PAGE_SHIFT])) == 0)
560 return -1;
561 chunksz = phdr->p_filesz - done;
562 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
563 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
564 memcpy(va + (pa & (PAGE_SIZE-1)),
565 elfbase + phdr->p_offset + done, chunksz);
566 munmap(va, PAGE_SIZE);
567 }
569 for ( ; done < phdr->p_memsz; done += chunksz )
570 {
571 pa = (phdr->p_paddr + done) - dsi->v_start;
572 if ((va = xc_map_foreign_range(
573 xch, dom, PAGE_SIZE, PROT_WRITE,
574 parray[pa >> PAGE_SHIFT])) == 0)
575 return -1;
576 chunksz = phdr->p_memsz - done;
577 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
578 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
579 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
580 munmap(va, PAGE_SIZE);
581 }
582 }
584 return 0;
585 }
587 /* xc_hvm_build
588 *
589 * Create a domain for a virtualized Linux, using files/filenames
590 *
591 */
593 int xc_hvm_build(int xc_handle,
594 uint32_t domid,
595 int memsize,
596 const char *image_name,
597 unsigned int vcpus,
598 unsigned int pae,
599 unsigned int acpi,
600 unsigned int apic,
601 unsigned int store_evtchn,
602 unsigned long *store_mfn)
603 {
604 char *image;
605 int sts;
606 unsigned long image_size;
608 if ( (image_name == NULL) ||
609 ((image = xc_read_image(image_name, &image_size)) == NULL) )
610 return -1;
612 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
613 image, image_size,
614 vcpus, pae, acpi, apic,
615 store_evtchn, store_mfn);
617 free(image);
619 return sts;
620 }
622 /* xc_hvm_build_mem
623 *
624 * Create a domain for a virtualized Linux, using buffers
625 *
626 */
628 int xc_hvm_build_mem(int xc_handle,
629 uint32_t domid,
630 int memsize,
631 const char *image_buffer,
632 unsigned long image_size,
633 unsigned int vcpus,
634 unsigned int pae,
635 unsigned int acpi,
636 unsigned int apic,
637 unsigned int store_evtchn,
638 unsigned long *store_mfn)
639 {
640 int sts;
641 unsigned long img_len;
642 char *img;
644 /* Validate that there is a kernel buffer */
646 if ( (image_buffer == NULL) || (image_size == 0) )
647 {
648 ERROR("kernel image buffer not present");
649 return -1;
650 }
652 img = xc_inflate_buffer(image_buffer, image_size, &img_len);
653 if (img == NULL)
654 {
655 ERROR("unable to inflate ram disk buffer");
656 return -1;
657 }
659 sts = xc_hvm_build_internal(xc_handle, domid, memsize,
660 img, img_len,
661 vcpus, pae, acpi, apic,
662 store_evtchn, store_mfn);
664 /* xc_inflate_buffer may return the original buffer pointer (for
665 for already inflated buffers), so exercise some care in freeing */
667 if ( (img != NULL) && (img != image_buffer) )
668 free(img);
670 return sts;
671 }
673 /*
674 * Local variables:
675 * mode: C
676 * c-set-style: "BSD"
677 * c-basic-offset: 4
678 * tab-width: 4
679 * indent-tabs-mode: nil
680 * End:
681 */