debuggers.hg

view xen/arch/x86/vmx_vmcs.c @ 3726:88957a238191

bitkeeper revision 1.1159.1.544 (4207248crq3YxiyLWjUehtHv_Yd3tg)

Merge tempest.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xeno.bk
into tempest.cl.cam.ac.uk:/local/scratch/smh22/xen-unstable.bk
author smh22@tempest.cl.cam.ac.uk
date Mon Feb 07 08:19:24 2005 +0000 (2005-02-07)
parents a7f99d7a4027 d93748c50893
children f5f2757b3aa2
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /*
3 * vmx_vmcs.c: VMCS management
4 * Copyright (c) 2004, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/mm.h>
24 #include <xen/lib.h>
25 #include <xen/errno.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/vmx.h>
31 #include <xen/event.h>
32 #include <xen/kernel.h>
33 #include <public/io/ioreq.h>
34 #include <asm/domain_page.h>
36 struct vmcs_struct *alloc_vmcs(void)
37 {
38 struct vmcs_struct *vmcs;
39 unsigned int cpu_sig = cpuid_eax(0x00000001);
41 vmcs = (struct vmcs_struct *) alloc_xenheap_pages(get_order(vmcs_size));
42 memset((char *) vmcs, 0, vmcs_size); /* don't remove this */
44 vmcs->vmcs_revision_id = (cpu_sig > 0xf41)? 3 : 1;
45 return vmcs;
46 }
48 void free_vmcs(struct vmcs_struct *vmcs)
49 {
50 int order;
52 order = (vmcs_size >> PAGE_SHIFT) - 1;
53 free_xenheap_pages((unsigned long) vmcs, order);
54 }
56 static inline int construct_vmcs_controls(void)
57 {
58 int error = 0;
60 error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
61 MONITOR_PIN_BASED_EXEC_CONTROLS);
63 error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
64 MONITOR_CPU_BASED_EXEC_CONTROLS);
66 error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
67 error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
69 return error;
70 }
72 #define GUEST_SEGMENT_LIMIT 0xffffffff
73 #define HOST_SEGMENT_LIMIT 0xffffffff
75 struct host_execution_env {
76 /* selectors */
77 unsigned short ldtr_selector;
78 unsigned short tr_selector;
79 unsigned short ds_selector;
80 unsigned short cs_selector;
81 /* limits */
82 unsigned short gdtr_limit;
83 unsigned short ldtr_limit;
84 unsigned short idtr_limit;
85 unsigned short tr_limit;
86 /* base */
87 unsigned long gdtr_base;
88 unsigned long ldtr_base;
89 unsigned long idtr_base;
90 unsigned long tr_base;
91 unsigned long ds_base;
92 unsigned long cs_base;
93 /* control registers */
94 unsigned long cr3;
95 unsigned long cr0;
96 unsigned long cr4;
97 unsigned long dr7;
98 };
100 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
102 int vmx_setup_platform(struct exec_domain *d, execution_context_t *context)
103 {
104 int i;
105 unsigned int n;
106 unsigned long *p, mpfn, offset, addr;
107 struct e820entry *e820p;
108 unsigned long gpfn = 0;
110 context->ebx = 0; /* Linux expects ebx to be 0 for boot proc */
112 n = context->ecx;
113 if (n > 32) {
114 VMX_DBG_LOG(DBG_LEVEL_1, "Too many e820 entries: %d\n", n);
115 return -1;
116 }
118 addr = context->edi;
119 offset = (addr & ~PAGE_MASK);
120 addr = round_pgdown(addr);
121 mpfn = phys_to_machine_mapping[addr >> PAGE_SHIFT];
122 p = map_domain_mem(mpfn << PAGE_SHIFT);
124 e820p = (struct e820entry *) ((unsigned long) p + offset);
126 for (i = 0; i < n; i++) {
127 if (e820p[i].type == E820_SHARED_PAGE) {
128 gpfn = (e820p[i].addr >> PAGE_SHIFT);
129 break;
130 }
131 }
133 if (gpfn == 0) {
134 VMX_DBG_LOG(DBG_LEVEL_1, "No shared Page ?\n");
135 return -1;
136 }
137 unmap_domain_mem(p);
139 mpfn = phys_to_machine_mapping[gpfn];
140 p = map_domain_mem(mpfn << PAGE_SHIFT);
141 d->arch.arch_vmx.vmx_platform.shared_page_va = (unsigned long) p;
143 return 0;
144 }
147 /*
148 * Add <guest pfn, machine pfn> mapping to per-domain mapping. Full
149 * virtualization does not need per-domain mapping.
150 */
151 static int add_mapping_perdomain(struct exec_domain *d, unsigned long gpfn,
152 unsigned long mpfn)
153 {
154 struct pfn_info *page;
155 unsigned long pfn = 0;
157 /*
158 * We support up to 4GB memory for a guest at this point
159 */
160 if (gpfn > ENTRIES_PER_L2_PAGETABLE * ENTRIES_PER_L1_PAGETABLE)
161 return -1;
163 if (!(l1_pgentry_val(d->domain->arch.mm_perdomain_pt[
164 gpfn >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)]) & _PAGE_PRESENT))
165 {
166 page = (struct pfn_info *) alloc_domheap_page(NULL);
167 if (!page) {
168 return -1;
169 }
171 pfn = (unsigned long) (page - frame_table);
172 d->domain->arch.mm_perdomain_pt[gpfn >> (L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT)] =
173 mk_l1_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
174 }
175 phys_to_machine_mapping[gpfn] = mpfn;
177 return 0;
178 }
180 void vmx_do_launch(struct exec_domain *ed)
181 {
182 /* Update CR3, GDT, LDT, TR */
183 unsigned int tr, cpu, error = 0;
184 struct host_execution_env host_env;
185 struct Xgt_desc_struct desc;
186 struct list_head *list_ent;
187 l2_pgentry_t *mpl2e, *guest_pl2e_cache;
188 unsigned long i, pfn = 0;
189 struct pfn_info *page;
190 execution_context_t *ec = get_execution_context();
191 struct domain *d = ed->domain;
193 cpu = smp_processor_id();
194 d->arch.min_pfn = d->arch.max_pfn = 0;
196 spin_lock(&d->page_alloc_lock);
197 list_ent = d->page_list.next;
199 mpl2e = (l2_pgentry_t *)map_domain_mem(pagetable_val(ed->arch.monitor_table));
201 for ( i = 0; list_ent != &d->page_list; i++ )
202 {
203 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
204 d->arch.min_pfn = min(d->arch.min_pfn, pfn);
205 d->arch.max_pfn = max(d->arch.max_pfn, pfn);
206 list_ent = frame_table[pfn].list.next;
207 add_mapping_perdomain(ed, i, pfn);
208 }
210 spin_unlock(&d->page_alloc_lock);
212 page = (struct pfn_info *) alloc_domheap_page(NULL);
213 pfn = (unsigned long) (page - frame_table);
215 /*
216 * make linear_pt_table work for guest ptes
217 */
218 mpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
219 mk_l2_pgentry((pfn << PAGE_SHIFT)| __PAGE_HYPERVISOR);
221 guest_pl2e_cache = map_domain_mem(pfn << PAGE_SHIFT);
222 memset(guest_pl2e_cache, 0, PAGE_SIZE); /* clean it up */
223 ed->arch.guest_pl2e_cache = guest_pl2e_cache;
225 unmap_domain_mem(mpl2e);
227 vmx_setup_platform(ed, ec);
229 __asm__ __volatile__ ("sgdt (%%eax) \n" :: "a"(&desc) : "memory");
230 host_env.gdtr_limit = desc.size;
231 host_env.gdtr_base = desc.address;
233 error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
235 error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
236 error |= __vmwrite(GUEST_LDTR_BASE, 0);
237 error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
239 __asm__ __volatile__ ("str (%%eax) \n" :: "a"(&tr) : "memory");
240 host_env.tr_selector = tr;
241 host_env.tr_limit = sizeof(struct tss_struct);
242 host_env.tr_base = (unsigned long) &init_tss[cpu];
244 error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
245 error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
246 error |= __vmwrite(GUEST_TR_BASE, 0);
247 error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
249 ed->arch.shadow_table = ed->arch.pagetable;
250 __vmwrite(GUEST_CR3, pagetable_val(ed->arch.pagetable));
251 __vmwrite(HOST_CR3, pagetable_val(ed->arch.monitor_table));
252 __vmwrite(HOST_ESP, (unsigned long) get_stack_top());
254 ed->arch.schedule_tail = arch_vmx_do_resume;
255 }
257 /*
258 * Initially set the same environement as host.
259 */
260 static inline int
261 construct_init_vmcs_guest(execution_context_t *context,
262 full_execution_context_t *full_context,
263 struct host_execution_env *host_env)
264 {
265 int error = 0;
266 union vmcs_arbytes arbytes;
267 unsigned long dr7;
268 unsigned long eflags, shadow_cr;
270 /* MSR */
271 error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
272 error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
274 error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
275 error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
276 error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
277 /* interrupt */
278 error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
279 /* mask */
280 error |= __vmwrite(CR0_GUEST_HOST_MASK, 0xffffffff);
281 error |= __vmwrite(CR4_GUEST_HOST_MASK, 0xffffffff);
283 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
284 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
286 /* TSC */
287 error |= __vmwrite(TSC_OFFSET, 0);
288 error |= __vmwrite(CR3_TARGET_COUNT, 0);
290 /* Guest Selectors */
291 error |= __vmwrite(GUEST_CS_SELECTOR, context->cs);
292 error |= __vmwrite(GUEST_ES_SELECTOR, context->es);
293 error |= __vmwrite(GUEST_SS_SELECTOR, context->ss);
294 error |= __vmwrite(GUEST_DS_SELECTOR, context->ds);
295 error |= __vmwrite(GUEST_FS_SELECTOR, context->fs);
296 error |= __vmwrite(GUEST_GS_SELECTOR, context->gs);
298 /* Guest segment Limits */
299 error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
300 error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
301 error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
302 error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
303 error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
304 error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
306 error |= __vmwrite(GUEST_IDTR_LIMIT, host_env->idtr_limit);
308 /* AR bytes */
309 arbytes.bytes = 0;
310 arbytes.fields.seg_type = 0x3; /* type = 3 */
311 arbytes.fields.s = 1; /* code or data, i.e. not system */
312 arbytes.fields.dpl = 0; /* DPL = 3 */
313 arbytes.fields.p = 1; /* segment present */
314 arbytes.fields.default_ops_size = 1; /* 32-bit */
315 arbytes.fields.g = 1;
316 arbytes.fields.null_bit = 0; /* not null */
318 error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
319 error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
320 error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
321 error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
322 error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
324 arbytes.fields.seg_type = 0xb; /* type = 0xb */
325 error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
327 error |= __vmwrite(GUEST_GDTR_BASE, context->edx);
328 context->edx = 0;
329 error |= __vmwrite(GUEST_GDTR_LIMIT, context->eax);
330 context->eax = 0;
332 arbytes.fields.s = 0; /* not code or data segement */
333 arbytes.fields.seg_type = 0x2; /* LTD */
334 arbytes.fields.default_ops_size = 0; /* 16-bit */
335 arbytes.fields.g = 0;
336 error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
338 arbytes.fields.seg_type = 0xb; /* 32-bit TSS (busy) */
339 error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
341 error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
343 /* Initally PG, PE are not set*/
344 shadow_cr = host_env->cr0;
345 shadow_cr &= ~(X86_CR0_PE | X86_CR0_PG);
346 error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
347 /* CR3 is set in vmx_final_setup_guestos */
348 error |= __vmwrite(GUEST_CR4, host_env->cr4);
349 shadow_cr = host_env->cr4;
350 shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
351 error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
353 error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
354 error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
355 error |= __vmwrite(GUEST_SS_BASE, host_env->ds_base);
356 error |= __vmwrite(GUEST_DS_BASE, host_env->ds_base);
357 error |= __vmwrite(GUEST_FS_BASE, host_env->ds_base);
358 error |= __vmwrite(GUEST_GS_BASE, host_env->ds_base);
359 error |= __vmwrite(GUEST_IDTR_BASE, host_env->idtr_base);
361 error |= __vmwrite(GUEST_ESP, context->esp);
362 error |= __vmwrite(GUEST_EIP, context->eip);
364 eflags = context->eflags & ~VMCS_EFLAGS_RESERVED_0; /* clear 0s */
365 eflags |= VMCS_EFLAGS_RESERVED_1; /* set 1s */
367 error |= __vmwrite(GUEST_EFLAGS, eflags);
369 error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
370 __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
371 error |= __vmwrite(GUEST_DR7, dr7);
372 error |= __vmwrite(GUEST_VMCS0, 0xffffffff);
373 error |= __vmwrite(GUEST_VMCS1, 0xffffffff);
375 return error;
376 }
378 static inline int construct_vmcs_host(struct host_execution_env *host_env)
379 {
380 int error = 0;
381 unsigned long crn;
382 struct Xgt_desc_struct desc;
384 /* Host Selectors */
385 host_env->ds_selector = __HYPERVISOR_DS;
386 error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector);
387 error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector);
388 error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector);
389 error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector);
390 error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector);
392 host_env->cs_selector = __HYPERVISOR_CS;
393 error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector);
395 host_env->ds_base = 0;
396 host_env->cs_base = 0;
397 error |= __vmwrite(HOST_FS_BASE, host_env->ds_base);
398 error |= __vmwrite(HOST_GS_BASE, host_env->ds_base);
400 /* Debug */
401 __asm__ __volatile__ ("sidt (%%eax) \n" :: "a"(&desc) : "memory");
402 host_env->idtr_limit = desc.size;
403 host_env->idtr_base = desc.address;
404 error |= __vmwrite(HOST_IDTR_BASE, host_env->idtr_base);
406 __asm__ __volatile__ ("movl %%cr0,%0" : "=r" (crn) : );
407 host_env->cr0 = crn;
408 error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
410 /* CR3 is set in vmx_final_setup_hostos */
411 __asm__ __volatile__ ("movl %%cr4,%0" : "=r" (crn) : );
412 host_env->cr4 = crn;
413 error |= __vmwrite(HOST_CR4, crn);
414 error |= __vmwrite(HOST_EIP, (unsigned long) vmx_asm_vmexit_handler);
416 return error;
417 }
419 /*
420 * Need to extend to support full virtualization.
421 * The variable use_host_env indicates if the new VMCS needs to use
422 * the same setups as the host has (xenolinux).
423 */
425 int construct_vmcs(struct arch_vmx_struct *arch_vmx,
426 execution_context_t *context,
427 full_execution_context_t *full_context,
428 int use_host_env)
429 {
430 int error;
431 u64 vmcs_phys_ptr;
433 struct host_execution_env host_env;
435 if (use_host_env != VMCS_USE_HOST_ENV)
436 return -EINVAL;
438 memset(&host_env, 0, sizeof(struct host_execution_env));
440 vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
442 if ((error = __vmpclear (vmcs_phys_ptr))) {
443 printk("construct_vmcs: VMCLEAR failed\n");
444 return -EINVAL;
445 }
446 if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
447 printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
448 (unsigned long) vmcs_phys_ptr);
449 return -EINVAL;
450 }
451 if ((error = construct_vmcs_controls())) {
452 printk("construct_vmcs: construct_vmcs_controls failed\n");
453 return -EINVAL;
454 }
455 /* host selectors */
456 if ((error = construct_vmcs_host(&host_env))) {
457 printk("construct_vmcs: construct_vmcs_host failed\n");
458 return -EINVAL;
459 }
460 /* guest selectors */
461 if ((error = construct_init_vmcs_guest(context, full_context, &host_env))) {
462 printk("construct_vmcs: construct_vmcs_guest failed\n");
463 return -EINVAL;
464 }
466 if ((error |= __vmwrite(EXCEPTION_BITMAP,
467 MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
468 printk("construct_vmcs: setting Exception bitmap failed\n");
469 return -EINVAL;
470 }
472 return 0;
473 }
475 int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
476 {
477 int error;
479 if ((error = __vmptrld(phys_ptr))) {
480 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
481 return error;
482 }
483 set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
484 return 0;
485 }
487 int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
488 {
489 /* take the current VMCS */
490 __vmptrst(phys_ptr);
491 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
492 return 0;
493 }
495 void vm_launch_fail(unsigned long eflags)
496 {
497 BUG();
498 }
500 void vm_resume_fail(unsigned long eflags)
501 {
502 BUG();
503 }