debuggers.hg

view tools/libxc/xc_vmx_build.c @ 3644:281346e5fc97

bitkeeper revision 1.1159.212.67 (42001d370Zk3jsfUjyC3iV9vgEJAlA)

Create the e820 map dynamically. This makes it easy to run VMX guests
of different memory sizes (no need to manually edit the e820 map).

Signed-off-by: Xin B Li <xin.b.li@intel.com>
Signed-off-by: Arun Sharma <arun.sharma@intel.com>
Signed-off-by: ian.pratt@cl.cam.ac.uk
author iap10@labyrinth.cl.cam.ac.uk
date Wed Feb 02 00:22:15 2005 +0000 (2005-02-02)
parents a920f0ced90f
children bbe8541361dd 5612c06cde33 9db7fbdf56b6
line source
1 /******************************************************************************
2 * xc_vmx_build.c
3 */
5 #include "xc_private.h"
6 #define ELFSIZE 32
7 #include "xc_elf.h"
8 #include <stdlib.h>
9 #include <zlib.h>
10 #include "linux_boot_params.h"
12 #define L1_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED)
13 #define L2_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
15 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
16 #define round_pgdown(_p) ((_p)&PAGE_MASK)
18 #define LINUX_BOOT_PARAMS_ADDR 0x00090000
19 #define LINUX_KERNEL_ENTR_ADDR 0x00100000
20 #define LINUX_PAGE_OFFSET 0xC0000000
22 struct domain_setup_info
23 {
24 unsigned long v_start;
25 unsigned long v_end;
26 unsigned long v_kernstart;
27 unsigned long v_kernend;
28 unsigned long v_kernentry;
30 unsigned int use_writable_pagetables;
31 unsigned int load_bsd_symtab;
33 unsigned long symtab_addr;
34 unsigned long symtab_len;
35 };
37 static int
38 parseelfimage(
39 char *elfbase, unsigned long elfsize, struct domain_setup_info *dsi);
40 static int
41 loadelfimage(
42 char *elfbase, int xch, u32 dom, unsigned long *parray,
43 unsigned long vstart);
44 static int
45 loadelfsymtab(
46 char *elfbase, int xch, u32 dom, unsigned long *parray,
47 struct domain_setup_info *dsi);
49 static void build_e820map(struct mem_map *mem_mapp, unsigned long mem_size)
50 {
51 int nr_map = 0;
53 /* XXX: Doesn't work for > 4GB yet */
54 mem_mapp->map[0].addr = 0x0;
55 mem_mapp->map[0].size = 0x9F800;
56 mem_mapp->map[0].type = E820_RAM;
57 mem_mapp->map[0].caching_attr = MEMMAP_WB;
58 nr_map++;
60 mem_mapp->map[1].addr = 0x9F800;
61 mem_mapp->map[1].size = 0x800;
62 mem_mapp->map[1].type = E820_RESERVED;
63 mem_mapp->map[1].caching_attr = MEMMAP_UC;
64 nr_map++;
66 mem_mapp->map[2].addr = 0xA0000;
67 mem_mapp->map[2].size = 0x20000;
68 mem_mapp->map[2].type = E820_IO;
69 mem_mapp->map[2].caching_attr = MEMMAP_UC;
70 nr_map++;
72 mem_mapp->map[3].addr = 0xF0000;
73 mem_mapp->map[3].size = 0x10000;
74 mem_mapp->map[3].type = E820_RESERVED;
75 mem_mapp->map[3].caching_attr = MEMMAP_UC;
76 nr_map++;
78 mem_mapp->map[4].addr = 0x100000;
79 mem_mapp->map[4].size = mem_size - 0x100000 - PAGE_SIZE;
80 mem_mapp->map[4].type = E820_RAM;
81 mem_mapp->map[4].caching_attr = MEMMAP_WB;
82 nr_map++;
84 mem_mapp->map[5].addr = mem_size - PAGE_SIZE;
85 mem_mapp->map[5].size = PAGE_SIZE;
86 mem_mapp->map[5].type = E820_SHARED;
87 mem_mapp->map[5].caching_attr = MEMMAP_WB;
88 nr_map++;
90 mem_mapp->map[6].addr = mem_size;
91 mem_mapp->map[6].size = 0x3 * PAGE_SIZE;
92 mem_mapp->map[6].type = E820_NVS;
93 mem_mapp->map[6].caching_attr = MEMMAP_UC;
94 nr_map++;
96 mem_mapp->map[7].addr = mem_size + 0x3 * PAGE_SIZE;
97 mem_mapp->map[7].size = 0xA * PAGE_SIZE;
98 mem_mapp->map[7].type = E820_ACPI;
99 mem_mapp->map[7].caching_attr = MEMMAP_WB;
100 nr_map++;
102 mem_mapp->map[8].addr = 0xFEC00000;
103 mem_mapp->map[8].size = 0x1400000;
104 mem_mapp->map[8].type = E820_IO;
105 mem_mapp->map[8].caching_attr = MEMMAP_UC;
106 nr_map++;
108 mem_mapp->nr_map = nr_map;
109 }
111 static int setup_guestos(int xc_handle,
112 u32 dom, int memsize,
113 char *image, unsigned long image_size,
114 gzFile initrd_gfd, unsigned long initrd_len,
115 unsigned long nr_pages,
116 full_execution_context_t *ctxt,
117 const char *cmdline,
118 unsigned long shared_info_frame,
119 unsigned int control_evtchn,
120 unsigned long flags,
121 struct mem_map * mem_mapp)
122 {
123 l1_pgentry_t *vl1tab=NULL, *vl1e=NULL;
124 l2_pgentry_t *vl2tab=NULL, *vl2e=NULL;
125 unsigned long *page_array = NULL;
126 unsigned long l2tab;
127 unsigned long l1tab;
128 unsigned long count, i;
129 shared_info_t *shared_info;
130 struct linux_boot_params * boot_paramsp;
131 __u16 * boot_gdtp;
132 mmu_t *mmu = NULL;
133 int rc;
135 unsigned long nr_pt_pages;
136 unsigned long ppt_alloc;
138 struct domain_setup_info dsi;
139 unsigned long vinitrd_start;
140 unsigned long vinitrd_end;
141 unsigned long vboot_params_start;
142 unsigned long vboot_params_end;
143 unsigned long vboot_gdt_start;
144 unsigned long vboot_gdt_end;
145 unsigned long vpt_start;
146 unsigned long vpt_end;
147 unsigned long v_end;
149 memset(&dsi, 0, sizeof(struct domain_setup_info));
151 rc = parseelfimage(image, image_size, &dsi);
152 if ( rc != 0 )
153 goto error_out;
155 if (dsi.use_writable_pagetables)
156 xc_domain_setvmassist(xc_handle, dom, VMASST_CMD_enable,
157 VMASST_TYPE_writable_pagetables);
159 if (dsi.load_bsd_symtab)
160 loadelfsymtab(image, xc_handle, dom, NULL, &dsi);
162 if ( (dsi.v_start & (PAGE_SIZE-1)) != 0 )
163 {
164 PERROR("Guest OS must load to a page boundary.\n");
165 goto error_out;
166 }
168 /*
169 * Why do we need this? The number of page-table frames depends on the
170 * size of the bootstrap address space. But the size of the address space
171 * depends on the number of page-table frames (since each one is mapped
172 * read-only). We have a pair of simultaneous equations in two unknowns,
173 * which we solve by exhaustive search.
174 */
175 vboot_params_start = LINUX_BOOT_PARAMS_ADDR;
176 vboot_params_end = vboot_params_start + PAGE_SIZE;
177 vboot_gdt_start = vboot_params_end;
178 vboot_gdt_end = vboot_gdt_start + PAGE_SIZE;
180 /* memsize is in megabytes */
181 v_end = memsize << 20;
182 vinitrd_end = v_end - PAGE_SIZE; /* leaving the top 4k untouched for IO requests page use */
183 vinitrd_start = vinitrd_end - initrd_len;
184 vinitrd_start = vinitrd_start & (~(PAGE_SIZE - 1));
186 if(initrd_len == 0)
187 vinitrd_start = vinitrd_end = 0;
189 nr_pt_pages = 1 + ((memsize + 3) >> 2);
190 vpt_start = v_end;
191 vpt_end = vpt_start + (nr_pt_pages * PAGE_SIZE);
193 printf("VIRTUAL MEMORY ARRANGEMENT:\n"
194 " Boot_params: %08lx->%08lx\n"
195 " boot_gdt: %08lx->%08lx\n"
196 " Loaded kernel: %08lx->%08lx\n"
197 " Init. ramdisk: %08lx->%08lx\n"
198 " Page tables: %08lx->%08lx\n"
199 " TOTAL: %08lx->%08lx\n",
200 vboot_params_start, vboot_params_end,
201 vboot_gdt_start, vboot_gdt_end,
202 dsi.v_kernstart, dsi.v_kernend,
203 vinitrd_start, vinitrd_end,
204 vpt_start, vpt_end,
205 dsi.v_start, v_end);
206 printf(" ENTRY ADDRESS: %08lx\n", dsi.v_kernentry);
207 printf(" INITRD LENGTH: %08lx\n", initrd_len);
209 if ( (v_end - dsi.v_start) > (nr_pages * PAGE_SIZE) )
210 {
211 printf("Initial guest OS requires too much space\n"
212 "(%luMB is greater than %luMB limit)\n",
213 (v_end-dsi.v_start)>>20, (nr_pages<<PAGE_SHIFT)>>20);
214 goto error_out;
215 }
217 if ( (page_array = malloc(nr_pages * sizeof(unsigned long))) == NULL )
218 {
219 PERROR("Could not allocate memory");
220 goto error_out;
221 }
223 if ( xc_get_pfn_list(xc_handle, dom, page_array, nr_pages) != nr_pages )
224 {
225 PERROR("Could not get the page frame list");
226 goto error_out;
227 }
229 loadelfimage(image, xc_handle, dom, page_array, dsi.v_start);
231 if (dsi.load_bsd_symtab)
232 loadelfsymtab(image, xc_handle, dom, page_array, &dsi);
234 /* Load the initial ramdisk image. */
235 if ( initrd_len != 0 )
236 {
237 for ( i = (vinitrd_start - dsi.v_start);
238 i < (vinitrd_end - dsi.v_start); i += PAGE_SIZE )
239 {
240 char page[PAGE_SIZE];
241 if ( gzread(initrd_gfd, page, PAGE_SIZE) == -1 )
242 {
243 PERROR("Error reading initrd image, could not");
244 goto error_out;
245 }
246 xc_copy_to_domain_page(xc_handle, dom,
247 page_array[i>>PAGE_SHIFT], page);
248 }
249 }
251 if ( (mmu = init_mmu_updates(xc_handle, dom)) == NULL )
252 goto error_out;
254 /* First allocate page for page dir. */
255 ppt_alloc = (vpt_start - dsi.v_start) >> PAGE_SHIFT;
256 l2tab = page_array[ppt_alloc++] << PAGE_SHIFT;
257 ctxt->pt_base = l2tab;
259 /* Initialise the page tables. */
260 if ( (vl2tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
261 PROT_READ|PROT_WRITE,
262 l2tab >> PAGE_SHIFT)) == NULL )
263 goto error_out;
264 memset(vl2tab, 0, PAGE_SIZE);
265 vl2e = &vl2tab[l2_table_offset(dsi.v_start)];
266 for ( count = 0; count < ((v_end-dsi.v_start)>>PAGE_SHIFT); count++ )
267 {
268 if ( ((unsigned long)vl1e & (PAGE_SIZE-1)) == 0 )
269 {
270 l1tab = page_array[ppt_alloc++] << PAGE_SHIFT;
271 if ( vl1tab != NULL )
272 munmap(vl1tab, PAGE_SIZE);
273 if ( (vl1tab = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE,
274 PROT_READ|PROT_WRITE,
275 l1tab >> PAGE_SHIFT)) == NULL )
276 {
277 munmap(vl2tab, PAGE_SIZE);
278 goto error_out;
279 }
280 memset(vl1tab, 0, PAGE_SIZE);
281 vl1e = &vl1tab[l1_table_offset(dsi.v_start + (count<<PAGE_SHIFT))];
282 *vl2e++ = l1tab | L2_PROT;
283 }
285 *vl1e = (page_array[count] << PAGE_SHIFT) | L1_PROT;
286 vl1e++;
287 }
288 munmap(vl1tab, PAGE_SIZE);
289 munmap(vl2tab, PAGE_SIZE);
291 /*
292 * Pin down l2tab addr as page dir page - causes hypervisor to provide
293 * correct protection for the page
294 */
295 if ( add_mmu_update(xc_handle, mmu,
296 l2tab | MMU_EXTENDED_COMMAND, MMUEXT_PIN_L2_TABLE) )
297 goto error_out;
299 boot_paramsp = xc_map_foreign_range(
300 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
301 page_array[(vboot_params_start-dsi.v_start)>>PAGE_SHIFT]);
302 memset(boot_paramsp, 0, sizeof(*boot_paramsp));
304 strncpy(boot_paramsp->cmd_line, cmdline, 0x800);
305 boot_paramsp->cmd_line[0x800-1] = '\0';
306 boot_paramsp->cmd_line_ptr = ((unsigned long) vboot_params_start) + offsetof(struct linux_boot_params, cmd_line);
308 boot_paramsp->setup_sects = 0;
309 boot_paramsp->mount_root_rdonly = 1;
310 boot_paramsp->swapdev = 0x0;
311 boot_paramsp->ramdisk_flags = 0x0;
312 boot_paramsp->root_dev = 0x0; /* We must tell kernel root dev by kernel command line. */
314 /* we don't have a ps/2 mouse now.
315 * 0xAA means a aux mouse is there.
316 * See detect_auxiliary_port() in pc_keyb.c.
317 */
318 boot_paramsp->aux_device_info = 0x0;
320 boot_paramsp->header_magic[0] = 0x48; /* "H" */
321 boot_paramsp->header_magic[1] = 0x64; /* "d" */
322 boot_paramsp->header_magic[2] = 0x72; /* "r" */
323 boot_paramsp->header_magic[3] = 0x53; /* "S" */
325 boot_paramsp->protocol_version = 0x0203; /* 2.03 */
326 boot_paramsp->loader_type = 0x71; /* GRUB */
327 boot_paramsp->loader_flags = 0x1; /* loaded high */
328 boot_paramsp->code32_start = LINUX_KERNEL_ENTR_ADDR; /* 1MB */
329 boot_paramsp->initrd_start = vinitrd_start;
330 boot_paramsp->initrd_size = initrd_len;
332 i = ((memsize - 1) << 10) - 4;
333 boot_paramsp->alt_mem_k = i; /* alt_mem_k */
334 boot_paramsp->screen.overlap.ext_mem_k = i & 0xFFFF; /* ext_mem_k */
336 /*
337 * Stuff SCREAN_INFO
338 */
339 boot_paramsp->screen.info.orig_x = 0;
340 boot_paramsp->screen.info.orig_y = 0;
341 boot_paramsp->screen.info.orig_video_page = 8;
342 boot_paramsp->screen.info.orig_video_mode = 3;
343 boot_paramsp->screen.info.orig_video_cols = 80;
344 boot_paramsp->screen.info.orig_video_ega_bx = 0;
345 boot_paramsp->screen.info.orig_video_lines = 25;
346 boot_paramsp->screen.info.orig_video_isVGA = 1;
347 boot_paramsp->screen.info.orig_video_points = 0x0010;
349 /* seems we may NOT stuff boot_paramsp->apm_bios_info */
350 /* seems we may NOT stuff boot_paramsp->drive_info */
351 /* seems we may NOT stuff boot_paramsp->sys_desc_table */
352 *((unsigned short *) &boot_paramsp->drive_info.dummy[0]) = 800;
353 boot_paramsp->drive_info.dummy[2] = 4;
354 boot_paramsp->drive_info.dummy[14] = 32;
356 /* memsize is in megabytes */
357 build_e820map(mem_mapp, memsize << 20);
358 boot_paramsp->e820_map_nr = mem_mapp->nr_map;
359 for (i=0; i<mem_mapp->nr_map; i++) {
360 boot_paramsp->e820_map[i].addr = mem_mapp->map[i].addr;
361 boot_paramsp->e820_map[i].size = mem_mapp->map[i].size;
362 boot_paramsp->e820_map[i].type = mem_mapp->map[i].type;
363 }
364 munmap(boot_paramsp, PAGE_SIZE);
366 boot_gdtp = xc_map_foreign_range(
367 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE,
368 page_array[(vboot_gdt_start-dsi.v_start)>>PAGE_SHIFT]);
369 memset(boot_gdtp, 0, PAGE_SIZE);
370 boot_gdtp[12*4 + 0] = boot_gdtp[13*4 + 0] = 0xffff; /* limit */
371 boot_gdtp[12*4 + 1] = boot_gdtp[13*4 + 1] = 0x0000; /* base */
372 boot_gdtp[12*4 + 2] = 0x9a00; boot_gdtp[13*4 + 2] = 0x9200; /* perms */
373 boot_gdtp[12*4 + 3] = boot_gdtp[13*4 + 3] = 0x00cf; /* granu + top of limit */
374 munmap(boot_gdtp, PAGE_SIZE);
376 /* shared_info page starts its life empty. */
377 shared_info = xc_map_foreign_range(
378 xc_handle, dom, PAGE_SIZE, PROT_READ|PROT_WRITE, shared_info_frame);
379 memset(shared_info, 0, sizeof(shared_info_t));
380 /* Mask all upcalls... */
381 for ( i = 0; i < MAX_VIRT_CPUS; i++ )
382 shared_info->vcpu_data[i].evtchn_upcall_mask = 1;
383 munmap(shared_info, PAGE_SIZE);
385 /* Send the page update requests down to the hypervisor. */
386 if ( finish_mmu_updates(xc_handle, mmu) )
387 goto error_out;
389 free(mmu);
390 free(page_array);
392 /*
393 * Initial register values:
394 */
395 ctxt->cpu_ctxt.ds = 0x68;
396 ctxt->cpu_ctxt.es = 0x0;
397 ctxt->cpu_ctxt.fs = 0x0;
398 ctxt->cpu_ctxt.gs = 0x0;
399 ctxt->cpu_ctxt.ss = 0x68;
400 ctxt->cpu_ctxt.cs = 0x60;
401 ctxt->cpu_ctxt.eip = dsi.v_kernentry;
402 ctxt->cpu_ctxt.edx = vboot_gdt_start;
403 ctxt->cpu_ctxt.eax = 0x800;
404 ctxt->cpu_ctxt.esp = vboot_gdt_end;
405 ctxt->cpu_ctxt.ebx = 0; /* startup_32 expects this to be 0 to signal boot cpu */
406 ctxt->cpu_ctxt.ecx = mem_mapp->nr_map;
407 ctxt->cpu_ctxt.esi = vboot_params_start;
408 ctxt->cpu_ctxt.edi = vboot_params_start + 0x2d0;
410 ctxt->cpu_ctxt.eflags = (1<<2);
412 return 0;
414 error_out:
415 if ( mmu != NULL )
416 free(mmu);
417 if ( page_array != NULL )
418 free(page_array);
419 return -1;
420 }
423 #define VMX_FEATURE_FLAG 0x20
425 int vmx_identify(void)
426 {
427 int eax, ecx;
429 __asm__ __volatile__ ("cpuid"
430 : "=a" (eax), "=c" (ecx)
431 : "0" (1)
432 : "bx", "dx");
433 if (!(ecx & VMX_FEATURE_FLAG)) {
434 return -1;
435 }
436 return 0;
437 }
439 int xc_vmx_build(int xc_handle,
440 u32 domid,
441 int memsize,
442 const char *image_name,
443 struct mem_map *mem_mapp,
444 const char *ramdisk_name,
445 const char *cmdline,
446 unsigned int control_evtchn,
447 unsigned long flags)
448 {
449 dom0_op_t launch_op, op;
450 int initrd_fd = -1;
451 gzFile initrd_gfd = NULL;
452 int rc, i;
453 full_execution_context_t st_ctxt, *ctxt = &st_ctxt;
454 unsigned long nr_pages;
455 char *image = NULL;
456 unsigned long image_size, initrd_size=0;
458 if ( vmx_identify() < 0 )
459 {
460 PERROR("CPU doesn't support VMX Extensions");
461 goto error_out;
462 }
464 if ( (nr_pages = xc_get_tot_pages(xc_handle, domid)) < 0 )
465 {
466 PERROR("Could not find total pages for domain");
467 goto error_out;
468 }
470 if ( (image = xc_read_kernel_image(image_name, &image_size)) == NULL )
471 goto error_out;
473 if ( (ramdisk_name != NULL) && (strlen(ramdisk_name) != 0) )
474 {
475 if ( (initrd_fd = open(ramdisk_name, O_RDONLY)) < 0 )
476 {
477 PERROR("Could not open the initial ramdisk image");
478 goto error_out;
479 }
481 initrd_size = xc_get_filesz(initrd_fd);
483 if ( (initrd_gfd = gzdopen(initrd_fd, "rb")) == NULL )
484 {
485 PERROR("Could not allocate decompression state for initrd");
486 goto error_out;
487 }
488 }
490 if ( mlock(&st_ctxt, sizeof(st_ctxt) ) )
491 {
492 PERROR("Unable to mlock ctxt");
493 return 1;
494 }
496 op.cmd = DOM0_GETDOMAININFO;
497 op.u.getdomaininfo.domain = (domid_t)domid;
498 op.u.getdomaininfo.exec_domain = 0;
499 op.u.getdomaininfo.ctxt = ctxt;
500 if ( (do_dom0_op(xc_handle, &op) < 0) ||
501 ((u16)op.u.getdomaininfo.domain != domid) )
502 {
503 PERROR("Could not get info on domain");
504 goto error_out;
505 }
506 if ( !(op.u.getdomaininfo.flags & DOMFLAGS_PAUSED) ||
507 (ctxt->pt_base != 0) )
508 {
509 ERROR("Domain is already constructed");
510 goto error_out;
511 }
513 if ( setup_guestos(xc_handle, domid, memsize, image, image_size,
514 initrd_gfd, initrd_size, nr_pages,
515 ctxt, cmdline,
516 op.u.getdomaininfo.shared_info_frame,
517 control_evtchn, flags, mem_mapp) < 0 )
518 {
519 ERROR("Error constructing guest OS");
520 goto error_out;
521 }
523 if ( initrd_fd >= 0 )
524 close(initrd_fd);
525 if ( initrd_gfd )
526 gzclose(initrd_gfd);
527 if ( image != NULL )
528 free(image);
530 ctxt->flags = ECF_VMX_GUEST;
531 /* FPU is set up to default initial state. */
532 memset(ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt));
534 /* Virtual IDT is empty at start-of-day. */
535 for ( i = 0; i < 256; i++ )
536 {
537 ctxt->trap_ctxt[i].vector = i;
538 ctxt->trap_ctxt[i].cs = FLAT_GUESTOS_CS;
539 }
540 ctxt->fast_trap_idx = 0;
542 /* No LDT. */
543 ctxt->ldt_ents = 0;
545 /* Use the default Xen-provided GDT. */
546 ctxt->gdt_ents = 0;
548 /* Ring 1 stack is the initial stack. */
549 /*
550 ctxt->guestos_ss = FLAT_GUESTOS_DS;
551 ctxt->guestos_esp = vstartinfo_start;
552 */
553 /* No debugging. */
554 memset(ctxt->debugreg, 0, sizeof(ctxt->debugreg));
556 /* No callback handlers. */
557 ctxt->event_callback_cs = FLAT_GUESTOS_CS;
558 ctxt->event_callback_eip = 0;
559 ctxt->failsafe_callback_cs = FLAT_GUESTOS_CS;
560 ctxt->failsafe_callback_eip = 0;
562 memset( &launch_op, 0, sizeof(launch_op) );
564 launch_op.u.builddomain.domain = (domid_t)domid;
565 launch_op.u.builddomain.ctxt = ctxt;
567 launch_op.cmd = DOM0_BUILDDOMAIN;
568 rc = do_dom0_op(xc_handle, &launch_op);
569 return rc;
571 error_out:
572 if ( initrd_gfd != NULL )
573 gzclose(initrd_gfd);
574 else if ( initrd_fd >= 0 )
575 close(initrd_fd);
576 if ( image != NULL )
577 free(image);
579 return -1;
580 }
582 static inline int is_loadable_phdr(Elf_Phdr *phdr)
583 {
584 return ((phdr->p_type == PT_LOAD) &&
585 ((phdr->p_flags & (PF_W|PF_X)) != 0));
586 }
588 static int parseelfimage(char *elfbase,
589 unsigned long elfsize,
590 struct domain_setup_info *dsi)
591 {
592 Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
593 Elf_Phdr *phdr;
594 Elf_Shdr *shdr;
595 unsigned long kernstart = ~0UL, kernend=0UL;
596 char *shstrtab;
597 int h;
599 if ( !IS_ELF(*ehdr) )
600 {
601 ERROR("Kernel image does not have an ELF header.");
602 return -EINVAL;
603 }
605 if ( (ehdr->e_phoff + (ehdr->e_phnum * ehdr->e_phentsize)) > elfsize )
606 {
607 ERROR("ELF program headers extend beyond end of image.");
608 return -EINVAL;
609 }
611 if ( (ehdr->e_shoff + (ehdr->e_shnum * ehdr->e_shentsize)) > elfsize )
612 {
613 ERROR("ELF section headers extend beyond end of image.");
614 return -EINVAL;
615 }
617 /* Find the section-header strings table. */
618 if ( ehdr->e_shstrndx == SHN_UNDEF )
619 {
620 ERROR("ELF image has no section-header strings table (shstrtab).");
621 return -EINVAL;
622 }
623 shdr = (Elf_Shdr *)(elfbase + ehdr->e_shoff +
624 (ehdr->e_shstrndx*ehdr->e_shentsize));
625 shstrtab = elfbase + shdr->sh_offset;
627 for ( h = 0; h < ehdr->e_phnum; h++ )
628 {
629 phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
630 if ( !is_loadable_phdr(phdr) )
631 continue;
632 if ( phdr->p_vaddr < kernstart )
633 kernstart = phdr->p_vaddr;
634 if ( (phdr->p_vaddr + phdr->p_memsz) > kernend )
635 kernend = phdr->p_vaddr + phdr->p_memsz;
636 }
638 if ( (kernstart > kernend) ||
639 (ehdr->e_entry < kernstart) ||
640 (ehdr->e_entry > kernend) )
641 {
642 ERROR("Malformed ELF image.");
643 return -EINVAL;
644 }
646 dsi->v_start = 0x00000000;
647 dsi->use_writable_pagetables = 0;
648 dsi->load_bsd_symtab = 0;
650 dsi->v_kernstart = kernstart - LINUX_PAGE_OFFSET;
651 dsi->v_kernend = kernend - LINUX_PAGE_OFFSET;
652 dsi->v_kernentry = LINUX_KERNEL_ENTR_ADDR;
654 dsi->v_end = dsi->v_kernend;
656 return 0;
657 }
659 static int
660 loadelfimage(
661 char *elfbase, int xch, u32 dom, unsigned long *parray,
662 unsigned long vstart)
663 {
664 Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase;
665 Elf_Phdr *phdr;
666 int h;
668 char *va;
669 unsigned long pa, done, chunksz;
671 for ( h = 0; h < ehdr->e_phnum; h++ )
672 {
673 phdr = (Elf_Phdr *)(elfbase + ehdr->e_phoff + (h*ehdr->e_phentsize));
674 if ( !is_loadable_phdr(phdr) )
675 continue;
677 for ( done = 0; done < phdr->p_filesz; done += chunksz )
678 {
679 pa = (phdr->p_vaddr + done) - vstart - LINUX_PAGE_OFFSET;
680 va = xc_map_foreign_range(
681 xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
682 chunksz = phdr->p_filesz - done;
683 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
684 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
685 memcpy(va + (pa & (PAGE_SIZE-1)),
686 elfbase + phdr->p_offset + done, chunksz);
687 munmap(va, PAGE_SIZE);
688 }
690 for ( ; done < phdr->p_memsz; done += chunksz )
691 {
692 pa = (phdr->p_vaddr + done) - vstart - LINUX_PAGE_OFFSET;
693 va = xc_map_foreign_range(
694 xch, dom, PAGE_SIZE, PROT_WRITE, parray[pa>>PAGE_SHIFT]);
695 chunksz = phdr->p_memsz - done;
696 if ( chunksz > (PAGE_SIZE - (pa & (PAGE_SIZE-1))) )
697 chunksz = PAGE_SIZE - (pa & (PAGE_SIZE-1));
698 memset(va + (pa & (PAGE_SIZE-1)), 0, chunksz);
699 munmap(va, PAGE_SIZE);
700 }
701 }
703 return 0;
704 }
707 #define ELFROUND (ELFSIZE / 8)
709 static int
710 loadelfsymtab(
711 char *elfbase, int xch, u32 dom, unsigned long *parray,
712 struct domain_setup_info *dsi)
713 {
714 Elf_Ehdr *ehdr = (Elf_Ehdr *)elfbase, *sym_ehdr;
715 Elf_Shdr *shdr;
716 unsigned long maxva, symva;
717 char *p;
718 int h, i;
720 p = malloc(sizeof(int) + sizeof(Elf_Ehdr) +
721 ehdr->e_shnum * sizeof(Elf_Shdr));
722 if (p == NULL)
723 return 0;
725 maxva = (dsi->v_kernend + ELFROUND - 1) & ~(ELFROUND - 1);
726 symva = maxva;
727 maxva += sizeof(int);
728 dsi->symtab_addr = maxva;
729 dsi->symtab_len = 0;
730 maxva += sizeof(Elf_Ehdr) + ehdr->e_shnum * sizeof(Elf_Shdr);
731 maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
733 shdr = (Elf_Shdr *)(p + sizeof(int) + sizeof(Elf_Ehdr));
734 memcpy(shdr, elfbase + ehdr->e_shoff, ehdr->e_shnum * sizeof(Elf_Shdr));
736 for ( h = 0; h < ehdr->e_shnum; h++ )
737 {
738 if ( shdr[h].sh_type == SHT_STRTAB )
739 {
740 /* Look for a strtab @i linked to symtab @h. */
741 for ( i = 0; i < ehdr->e_shnum; i++ )
742 if ( (shdr[i].sh_type == SHT_SYMTAB) &&
743 (shdr[i].sh_link == h) )
744 break;
745 /* Skip symtab @h if we found no corresponding strtab @i. */
746 if ( i == ehdr->e_shnum )
747 {
748 shdr[h].sh_offset = 0;
749 continue;
750 }
751 }
753 if ( (shdr[h].sh_type == SHT_STRTAB) ||
754 (shdr[h].sh_type == SHT_SYMTAB) )
755 {
756 if ( parray != NULL )
757 xc_map_memcpy(maxva, elfbase + shdr[h].sh_offset, shdr[h].sh_size,
758 xch, dom, parray, dsi->v_start);
760 /* Mangled to be based on ELF header location. */
761 shdr[h].sh_offset = maxva - dsi->symtab_addr;
763 dsi->symtab_len += shdr[h].sh_size;
764 maxva += shdr[h].sh_size;
765 maxva = (maxva + ELFROUND - 1) & ~(ELFROUND - 1);
766 }
768 shdr[h].sh_name = 0; /* Name is NULL. */
769 }
771 if ( dsi->symtab_len == 0 )
772 {
773 dsi->symtab_addr = 0;
774 goto out;
775 }
777 if ( parray != NULL )
778 {
779 *(int *)p = maxva - dsi->symtab_addr;
780 sym_ehdr = (Elf_Ehdr *)(p + sizeof(int));
781 memcpy(sym_ehdr, ehdr, sizeof(Elf_Ehdr));
782 sym_ehdr->e_phoff = 0;
783 sym_ehdr->e_shoff = sizeof(Elf_Ehdr);
784 sym_ehdr->e_phentsize = 0;
785 sym_ehdr->e_phnum = 0;
786 sym_ehdr->e_shstrndx = SHN_UNDEF;
788 /* Copy total length, crafted ELF header and section header table */
789 xc_map_memcpy(symva, p, sizeof(int) + sizeof(Elf_Ehdr) +
790 ehdr->e_shnum * sizeof(Elf_Shdr), xch, dom, parray,
791 dsi->v_start);
792 }
794 dsi->symtab_len = maxva - dsi->symtab_addr;
795 dsi->v_end = round_pgup(maxva);
797 out:
798 if ( p != NULL )
799 free(p);
801 return 0;
802 }