debuggers.hg

view xen/arch/x86/vmx_vmcs.c @ 3755:ea98f0bb6510

bitkeeper revision 1.1159.212.127 (4208b02bTdSR4AVYRg8diDkKZmIVUg)

General shadow code cleanup.

Fixed compilation problems when SHADOW_DEBUG is enabled.
Fixed compilation problems when CONFIG_VMX is undefined.

Simplified l1pte_write_fault and l1pte_read_fault.
Name change: spfn => smfn (shadow machine frame numbers).

In general, the terms pfn and gpfn now refer to pages in the
guest's idea of physical frames (which diffs for full shadow
guests). mfn always refers to a machine frame number.

One bug fix for check_pagetable():
If we're using writable page tables
along with shadow mode, don't check the currently writable page table
page -- check its snapshot instead.

Signed-off-by: michael.fetterman@cl.cam.ac.uk
author mafetter@fleming.research
date Tue Feb 08 12:27:23 2005 +0000 (2005-02-08)
parents 9e80fc0dcac5
children f5f2757b3aa2
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /*
3 * vmx_vmcs.c: VMCS management
4 * Copyright (c) 2004, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/mm.h>
24 #include <xen/lib.h>
25 #include <xen/errno.h>
27 #include <asm/cpufeature.h>
28 #include <asm/processor.h>
29 #include <asm/msr.h>
30 #include <asm/vmx.h>
31 #include <xen/event.h>
32 #include <xen/kernel.h>
33 #include <public/io/ioreq.h>
34 #include <asm/domain_page.h>
36 #ifdef CONFIG_VMX
38 struct vmcs_struct *alloc_vmcs(void)
39 {
40 struct vmcs_struct *vmcs;
41 unsigned int cpu_sig = cpuid_eax(0x00000001);
43 vmcs = (struct vmcs_struct *) alloc_xenheap_pages(get_order(vmcs_size));
44 memset((char *) vmcs, 0, vmcs_size); /* don't remove this */
46 vmcs->vmcs_revision_id = (cpu_sig > 0xf41)? 3 : 1;
47 return vmcs;
48 }
50 void free_vmcs(struct vmcs_struct *vmcs)
51 {
52 int order;
54 order = (vmcs_size >> PAGE_SHIFT) - 1;
55 free_xenheap_pages((unsigned long) vmcs, order);
56 }
58 static inline int construct_vmcs_controls(void)
59 {
60 int error = 0;
62 error |= __vmwrite(PIN_BASED_VM_EXEC_CONTROL,
63 MONITOR_PIN_BASED_EXEC_CONTROLS);
65 error |= __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
66 MONITOR_CPU_BASED_EXEC_CONTROLS);
68 error |= __vmwrite(VM_EXIT_CONTROLS, MONITOR_VM_EXIT_CONTROLS);
69 error |= __vmwrite(VM_ENTRY_CONTROLS, MONITOR_VM_ENTRY_CONTROLS);
71 return error;
72 }
74 #define GUEST_SEGMENT_LIMIT 0xffffffff
75 #define HOST_SEGMENT_LIMIT 0xffffffff
77 struct host_execution_env {
78 /* selectors */
79 unsigned short ldtr_selector;
80 unsigned short tr_selector;
81 unsigned short ds_selector;
82 unsigned short cs_selector;
83 /* limits */
84 unsigned short gdtr_limit;
85 unsigned short ldtr_limit;
86 unsigned short idtr_limit;
87 unsigned short tr_limit;
88 /* base */
89 unsigned long gdtr_base;
90 unsigned long ldtr_base;
91 unsigned long idtr_base;
92 unsigned long tr_base;
93 unsigned long ds_base;
94 unsigned long cs_base;
95 /* control registers */
96 unsigned long cr3;
97 unsigned long cr0;
98 unsigned long cr4;
99 unsigned long dr7;
100 };
102 #define round_pgdown(_p) ((_p)&PAGE_MASK) /* coped from domain.c */
104 int vmx_setup_platform(struct exec_domain *d, execution_context_t *context)
105 {
106 int i;
107 unsigned int n;
108 unsigned long *p, mpfn, offset, addr;
109 struct e820entry *e820p;
110 unsigned long gpfn = 0;
112 context->ebx = 0; /* Linux expects ebx to be 0 for boot proc */
114 n = context->ecx;
115 if (n > 32) {
116 VMX_DBG_LOG(DBG_LEVEL_1, "Too many e820 entries: %d\n", n);
117 return -1;
118 }
120 addr = context->edi;
121 offset = (addr & ~PAGE_MASK);
122 addr = round_pgdown(addr);
123 mpfn = phys_to_machine_mapping(addr >> PAGE_SHIFT);
124 p = map_domain_mem(mpfn << PAGE_SHIFT);
126 e820p = (struct e820entry *) ((unsigned long) p + offset);
128 for (i = 0; i < n; i++) {
129 if (e820p[i].type == E820_SHARED_PAGE) {
130 gpfn = (e820p[i].addr >> PAGE_SHIFT);
131 break;
132 }
133 }
135 if (gpfn == 0) {
136 printk("No shared Page ?\n");
137 unmap_domain_mem(p);
138 return -1;
139 }
140 unmap_domain_mem(p);
142 mpfn = phys_to_machine_mapping(gpfn);
143 p = map_domain_mem(mpfn << PAGE_SHIFT);
144 ASSERT(p != NULL);
145 d->arch.arch_vmx.vmx_platform.shared_page_va = (unsigned long) p;
147 return 0;
148 }
150 void vmx_do_launch(struct exec_domain *ed)
151 {
152 /* Update CR3, GDT, LDT, TR */
153 unsigned int tr, cpu, error = 0;
154 struct host_execution_env host_env;
155 struct Xgt_desc_struct desc;
156 struct list_head *list_ent;
157 l2_pgentry_t *mpl2e, *guest_pl2e_cache;
158 unsigned long i, pfn = 0;
159 struct pfn_info *page;
160 execution_context_t *ec = get_execution_context();
161 struct domain *d = ed->domain;
163 cpu = smp_processor_id();
164 d->arch.min_pfn = d->arch.max_pfn = 0;
166 spin_lock(&d->page_alloc_lock);
167 list_ent = d->page_list.next;
169 mpl2e = (l2_pgentry_t *)map_domain_mem(pagetable_val(ed->arch.monitor_table));
171 for ( i = 0; list_ent != &d->page_list; i++ )
172 {
173 pfn = list_entry(list_ent, struct pfn_info, list) - frame_table;
174 d->arch.min_pfn = min(d->arch.min_pfn, pfn);
175 d->arch.max_pfn = max(d->arch.max_pfn, pfn);
176 list_ent = frame_table[pfn].list.next;
177 }
179 spin_unlock(&d->page_alloc_lock);
181 page = (struct pfn_info *) alloc_domheap_page(NULL);
182 pfn = (unsigned long) (page - frame_table);
184 /*
185 * make linear_pt_table work for guest ptes
186 */
187 mpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
188 mk_l2_pgentry((pfn << PAGE_SHIFT)| __PAGE_HYPERVISOR);
190 guest_pl2e_cache = map_domain_mem(pfn << PAGE_SHIFT);
191 memset(guest_pl2e_cache, 0, PAGE_SIZE); /* clean it up */
192 ed->arch.guest_pl2e_cache = guest_pl2e_cache;
194 unmap_domain_mem(mpl2e);
196 vmx_setup_platform(ed, ec);
198 __asm__ __volatile__ ("sgdt (%%eax) \n" :: "a"(&desc) : "memory");
199 host_env.gdtr_limit = desc.size;
200 host_env.gdtr_base = desc.address;
202 error |= __vmwrite(HOST_GDTR_BASE, host_env.gdtr_base);
204 error |= __vmwrite(GUEST_LDTR_SELECTOR, 0);
205 error |= __vmwrite(GUEST_LDTR_BASE, 0);
206 error |= __vmwrite(GUEST_LDTR_LIMIT, 0);
208 __asm__ __volatile__ ("str (%%eax) \n" :: "a"(&tr) : "memory");
209 host_env.tr_selector = tr;
210 host_env.tr_limit = sizeof(struct tss_struct);
211 host_env.tr_base = (unsigned long) &init_tss[cpu];
213 error |= __vmwrite(HOST_TR_SELECTOR, host_env.tr_selector);
214 error |= __vmwrite(HOST_TR_BASE, host_env.tr_base);
215 error |= __vmwrite(GUEST_TR_BASE, 0);
216 error |= __vmwrite(GUEST_TR_LIMIT, 0xff);
218 ed->arch.shadow_table = ed->arch.pagetable;
219 __vmwrite(GUEST_CR3, pagetable_val(ed->arch.pagetable));
220 __vmwrite(HOST_CR3, pagetable_val(ed->arch.monitor_table));
221 __vmwrite(HOST_ESP, (unsigned long) get_stack_top());
223 ed->arch.schedule_tail = arch_vmx_do_resume;
224 }
226 /*
227 * Initially set the same environement as host.
228 */
229 static inline int
230 construct_init_vmcs_guest(execution_context_t *context,
231 full_execution_context_t *full_context,
232 struct host_execution_env *host_env)
233 {
234 int error = 0;
235 union vmcs_arbytes arbytes;
236 unsigned long dr7;
237 unsigned long eflags, shadow_cr;
239 /* MSR */
240 error |= __vmwrite(VM_EXIT_MSR_LOAD_ADDR, 0);
241 error |= __vmwrite(VM_EXIT_MSR_STORE_ADDR, 0);
243 error |= __vmwrite(VM_EXIT_MSR_STORE_COUNT, 0);
244 error |= __vmwrite(VM_EXIT_MSR_LOAD_COUNT, 0);
245 error |= __vmwrite(VM_ENTRY_MSR_LOAD_COUNT, 0);
246 /* interrupt */
247 error |= __vmwrite(VM_ENTRY_INTR_INFO_FIELD, 0);
248 /* mask */
249 error |= __vmwrite(CR0_GUEST_HOST_MASK, 0xffffffff);
250 error |= __vmwrite(CR4_GUEST_HOST_MASK, 0xffffffff);
252 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MASK, 0);
253 error |= __vmwrite(PAGE_FAULT_ERROR_CODE_MATCH, 0);
255 /* TSC */
256 error |= __vmwrite(TSC_OFFSET, 0);
257 error |= __vmwrite(CR3_TARGET_COUNT, 0);
259 /* Guest Selectors */
260 error |= __vmwrite(GUEST_CS_SELECTOR, context->cs);
261 error |= __vmwrite(GUEST_ES_SELECTOR, context->es);
262 error |= __vmwrite(GUEST_SS_SELECTOR, context->ss);
263 error |= __vmwrite(GUEST_DS_SELECTOR, context->ds);
264 error |= __vmwrite(GUEST_FS_SELECTOR, context->fs);
265 error |= __vmwrite(GUEST_GS_SELECTOR, context->gs);
267 /* Guest segment Limits */
268 error |= __vmwrite(GUEST_CS_LIMIT, GUEST_SEGMENT_LIMIT);
269 error |= __vmwrite(GUEST_ES_LIMIT, GUEST_SEGMENT_LIMIT);
270 error |= __vmwrite(GUEST_SS_LIMIT, GUEST_SEGMENT_LIMIT);
271 error |= __vmwrite(GUEST_DS_LIMIT, GUEST_SEGMENT_LIMIT);
272 error |= __vmwrite(GUEST_FS_LIMIT, GUEST_SEGMENT_LIMIT);
273 error |= __vmwrite(GUEST_GS_LIMIT, GUEST_SEGMENT_LIMIT);
275 error |= __vmwrite(GUEST_IDTR_LIMIT, host_env->idtr_limit);
277 /* AR bytes */
278 arbytes.bytes = 0;
279 arbytes.fields.seg_type = 0x3; /* type = 3 */
280 arbytes.fields.s = 1; /* code or data, i.e. not system */
281 arbytes.fields.dpl = 0; /* DPL = 3 */
282 arbytes.fields.p = 1; /* segment present */
283 arbytes.fields.default_ops_size = 1; /* 32-bit */
284 arbytes.fields.g = 1;
285 arbytes.fields.null_bit = 0; /* not null */
287 error |= __vmwrite(GUEST_ES_AR_BYTES, arbytes.bytes);
288 error |= __vmwrite(GUEST_SS_AR_BYTES, arbytes.bytes);
289 error |= __vmwrite(GUEST_DS_AR_BYTES, arbytes.bytes);
290 error |= __vmwrite(GUEST_FS_AR_BYTES, arbytes.bytes);
291 error |= __vmwrite(GUEST_GS_AR_BYTES, arbytes.bytes);
293 arbytes.fields.seg_type = 0xb; /* type = 0xb */
294 error |= __vmwrite(GUEST_CS_AR_BYTES, arbytes.bytes);
296 error |= __vmwrite(GUEST_GDTR_BASE, context->edx);
297 context->edx = 0;
298 error |= __vmwrite(GUEST_GDTR_LIMIT, context->eax);
299 context->eax = 0;
301 arbytes.fields.s = 0; /* not code or data segement */
302 arbytes.fields.seg_type = 0x2; /* LTD */
303 arbytes.fields.default_ops_size = 0; /* 16-bit */
304 arbytes.fields.g = 0;
305 error |= __vmwrite(GUEST_LDTR_AR_BYTES, arbytes.bytes);
307 arbytes.fields.seg_type = 0xb; /* 32-bit TSS (busy) */
308 error |= __vmwrite(GUEST_TR_AR_BYTES, arbytes.bytes);
310 error |= __vmwrite(GUEST_CR0, host_env->cr0); /* same CR0 */
312 /* Initally PG, PE are not set*/
313 shadow_cr = host_env->cr0;
314 shadow_cr &= ~(X86_CR0_PE | X86_CR0_PG);
315 error |= __vmwrite(CR0_READ_SHADOW, shadow_cr);
316 /* CR3 is set in vmx_final_setup_guestos */
317 error |= __vmwrite(GUEST_CR4, host_env->cr4);
318 shadow_cr = host_env->cr4;
319 shadow_cr &= ~(X86_CR4_PGE | X86_CR4_VMXE);
320 error |= __vmwrite(CR4_READ_SHADOW, shadow_cr);
322 error |= __vmwrite(GUEST_ES_BASE, host_env->ds_base);
323 error |= __vmwrite(GUEST_CS_BASE, host_env->cs_base);
324 error |= __vmwrite(GUEST_SS_BASE, host_env->ds_base);
325 error |= __vmwrite(GUEST_DS_BASE, host_env->ds_base);
326 error |= __vmwrite(GUEST_FS_BASE, host_env->ds_base);
327 error |= __vmwrite(GUEST_GS_BASE, host_env->ds_base);
328 error |= __vmwrite(GUEST_IDTR_BASE, host_env->idtr_base);
330 error |= __vmwrite(GUEST_ESP, context->esp);
331 error |= __vmwrite(GUEST_EIP, context->eip);
333 eflags = context->eflags & ~VMCS_EFLAGS_RESERVED_0; /* clear 0s */
334 eflags |= VMCS_EFLAGS_RESERVED_1; /* set 1s */
336 error |= __vmwrite(GUEST_EFLAGS, eflags);
338 error |= __vmwrite(GUEST_INTERRUPTIBILITY_INFO, 0);
339 __asm__ __volatile__ ("mov %%dr7, %0\n" : "=r" (dr7));
340 error |= __vmwrite(GUEST_DR7, dr7);
341 error |= __vmwrite(GUEST_VMCS0, 0xffffffff);
342 error |= __vmwrite(GUEST_VMCS1, 0xffffffff);
344 return error;
345 }
347 static inline int construct_vmcs_host(struct host_execution_env *host_env)
348 {
349 int error = 0;
350 unsigned long crn;
351 struct Xgt_desc_struct desc;
353 /* Host Selectors */
354 host_env->ds_selector = __HYPERVISOR_DS;
355 error |= __vmwrite(HOST_ES_SELECTOR, host_env->ds_selector);
356 error |= __vmwrite(HOST_SS_SELECTOR, host_env->ds_selector);
357 error |= __vmwrite(HOST_DS_SELECTOR, host_env->ds_selector);
358 error |= __vmwrite(HOST_FS_SELECTOR, host_env->ds_selector);
359 error |= __vmwrite(HOST_GS_SELECTOR, host_env->ds_selector);
361 host_env->cs_selector = __HYPERVISOR_CS;
362 error |= __vmwrite(HOST_CS_SELECTOR, host_env->cs_selector);
364 host_env->ds_base = 0;
365 host_env->cs_base = 0;
366 error |= __vmwrite(HOST_FS_BASE, host_env->ds_base);
367 error |= __vmwrite(HOST_GS_BASE, host_env->ds_base);
369 /* Debug */
370 __asm__ __volatile__ ("sidt (%%eax) \n" :: "a"(&desc) : "memory");
371 host_env->idtr_limit = desc.size;
372 host_env->idtr_base = desc.address;
373 error |= __vmwrite(HOST_IDTR_BASE, host_env->idtr_base);
375 __asm__ __volatile__ ("movl %%cr0,%0" : "=r" (crn) : );
376 host_env->cr0 = crn;
377 error |= __vmwrite(HOST_CR0, crn); /* same CR0 */
379 /* CR3 is set in vmx_final_setup_hostos */
380 __asm__ __volatile__ ("movl %%cr4,%0" : "=r" (crn) : );
381 host_env->cr4 = crn;
382 error |= __vmwrite(HOST_CR4, crn);
383 error |= __vmwrite(HOST_EIP, (unsigned long) vmx_asm_vmexit_handler);
385 return error;
386 }
388 /*
389 * Need to extend to support full virtualization.
390 * The variable use_host_env indicates if the new VMCS needs to use
391 * the same setups as the host has (xenolinux).
392 */
394 int construct_vmcs(struct arch_vmx_struct *arch_vmx,
395 execution_context_t *context,
396 full_execution_context_t *full_context,
397 int use_host_env)
398 {
399 int error;
400 u64 vmcs_phys_ptr;
402 struct host_execution_env host_env;
404 if (use_host_env != VMCS_USE_HOST_ENV)
405 return -EINVAL;
407 memset(&host_env, 0, sizeof(struct host_execution_env));
409 vmcs_phys_ptr = (u64) virt_to_phys(arch_vmx->vmcs);
411 if ((error = __vmpclear (vmcs_phys_ptr))) {
412 printk("construct_vmcs: VMCLEAR failed\n");
413 return -EINVAL;
414 }
415 if ((error = load_vmcs(arch_vmx, vmcs_phys_ptr))) {
416 printk("construct_vmcs: load_vmcs failed: VMCS = %lx\n",
417 (unsigned long) vmcs_phys_ptr);
418 return -EINVAL;
419 }
420 if ((error = construct_vmcs_controls())) {
421 printk("construct_vmcs: construct_vmcs_controls failed\n");
422 return -EINVAL;
423 }
424 /* host selectors */
425 if ((error = construct_vmcs_host(&host_env))) {
426 printk("construct_vmcs: construct_vmcs_host failed\n");
427 return -EINVAL;
428 }
429 /* guest selectors */
430 if ((error = construct_init_vmcs_guest(context, full_context, &host_env))) {
431 printk("construct_vmcs: construct_vmcs_guest failed\n");
432 return -EINVAL;
433 }
435 if ((error |= __vmwrite(EXCEPTION_BITMAP,
436 MONITOR_DEFAULT_EXCEPTION_BITMAP))) {
437 printk("construct_vmcs: setting Exception bitmap failed\n");
438 return -EINVAL;
439 }
441 return 0;
442 }
444 int load_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
445 {
446 int error;
448 if ((error = __vmptrld(phys_ptr))) {
449 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
450 return error;
451 }
452 set_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
453 return 0;
454 }
456 int store_vmcs(struct arch_vmx_struct *arch_vmx, u64 phys_ptr)
457 {
458 /* take the current VMCS */
459 __vmptrst(phys_ptr);
460 clear_bit(ARCH_VMX_VMCS_LOADED, &arch_vmx->flags);
461 return 0;
462 }
464 void vm_launch_fail(unsigned long eflags)
465 {
466 BUG();
467 }
469 void vm_resume_fail(unsigned long eflags)
470 {
471 BUG();
472 }
474 #endif /* CONFIG_VMX */