debuggers.hg

view xen/arch/x86/vmx.c @ 3770:d21fbb46b9d8

bitkeeper revision 1.1159.253.1 (4208f8a54Zaz-XgC11YTHeLxPHPoZg)

Merge scramble.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into scramble.cl.cam.ac.uk:/local/scratch/kaf24/xen-unstable.bk
author kaf24@scramble.cl.cam.ac.uk
date Tue Feb 08 17:36:37 2005 +0000 (2005-02-08)
parents f5f2757b3aa2 cb87fd290eb0
children 12104922e743
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /*
3 * vmx.c: handling VMX architecture-related VM exits
4 * Copyright (c) 2004, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/sched.h>
25 #include <asm/current.h>
26 #include <asm/io.h>
27 #include <asm/irq.h>
28 #include <asm/shadow.h>
29 #include <asm/regs.h>
30 #include <asm/cpufeature.h>
31 #include <asm/processor.h>
32 #include <asm/types.h>
33 #include <asm/msr.h>
34 #include <asm/spinlock.h>
35 #include <asm/vmx.h>
36 #include <asm/vmx_vmcs.h>
37 #include <public/io/ioreq.h>
39 #ifdef CONFIG_VMX
41 int vmcs_size;
42 unsigned int opt_vmx_debug_level;
44 extern long evtchn_send(int lport);
45 extern long do_block(void);
47 #define VECTOR_DB 1
48 #define VECTOR_BP 3
49 #define VECTOR_GP 13
50 #define VECTOR_PG 14
52 int start_vmx()
53 {
54 struct vmcs_struct *vmcs;
55 unsigned long ecx;
56 u64 phys_vmcs; /* debugging */
58 vmcs_size = VMCS_SIZE;
59 /*
60 * Xen does not fill x86_capability words except 0.
61 */
62 ecx = cpuid_ecx(1);
63 boot_cpu_data.x86_capability[4] = ecx;
65 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
66 return 0;
68 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
70 if (!(vmcs = alloc_vmcs())) {
71 printk("Failed to allocate VMCS\n");
72 return 0;
73 }
75 phys_vmcs = (u64) virt_to_phys(vmcs);
77 if (!(__vmxon(phys_vmcs))) {
78 printk("VMXON is done\n");
79 }
81 return 1;
82 }
84 void stop_vmx()
85 {
86 if (read_cr4() & X86_CR4_VMXE)
87 __vmxoff();
88 }
90 /*
91 * Not all cases recevie valid value in the VM-exit instruction length field.
92 */
93 #define __get_instruction_length(len) \
94 __vmread(INSTRUCTION_LEN, &(len)); \
95 if ((len) < 1 || (len) > 15) \
96 __vmx_bug(&regs);
98 static void inline __update_guest_eip(unsigned long inst_len)
99 {
100 unsigned long current_eip;
102 __vmread(GUEST_EIP, &current_eip);
103 __vmwrite(GUEST_EIP, current_eip + inst_len);
104 }
107 #include <asm/domain_page.h>
109 static int vmx_do_page_fault(unsigned long va, unsigned long error_code)
110 {
111 unsigned long eip, pfn;
112 unsigned int index;
113 unsigned long gpde = 0, gpte, gpa;
114 int result;
115 struct exec_domain *ed = current;
117 #if VMX_DEBUG
118 {
119 __vmread(GUEST_EIP, &eip);
120 VMX_DBG_LOG(DBG_LEVEL_VMMU,
121 "vmx_do_page_fault = 0x%lx, eip = %lx, erro_code = %lx\n",
122 va, eip, error_code);
123 }
124 #endif
125 /*
126 * Set up guest page directory cache to make linear_pt_table[] work.
127 */
128 __guest_get_l2e(ed, va, &gpde);
129 if (!(gpde & _PAGE_PRESENT))
130 return 0;
132 index = (va >> L2_PAGETABLE_SHIFT);
133 if (!l2_pgentry_val(ed->arch.guest_pl2e_cache[index])) {
134 pfn = phys_to_machine_mapping(gpde >> PAGE_SHIFT);
136 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_do_page_fault: pagetable = %lx\n",
137 pagetable_val(ed->arch.pagetable));
139 ed->arch.guest_pl2e_cache[index] =
140 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
141 }
143 if (unlikely(__get_user(gpte, (unsigned long *)
144 &linear_pg_table[va >> PAGE_SHIFT])))
145 return 0;
147 gpa = (gpte & PAGE_MASK) | (va & (PAGE_SIZE - 1));
149 if (mmio_space(gpa))
150 handle_mmio(va, gpte, gpa);
152 if ((result = shadow_fault(va, error_code)))
153 return result;
155 return 0; /* failed to resolve, i.e raise #PG */
156 }
158 static void vmx_do_general_protection_fault(struct xen_regs *regs)
159 {
160 unsigned long eip, error_code;
161 unsigned long intr_fields;
163 __vmread(GUEST_EIP, &eip);
164 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
166 VMX_DBG_LOG(DBG_LEVEL_1,
167 "vmx_general_protection_fault: eip = %lx, erro_code = %lx\n",
168 eip, error_code);
170 VMX_DBG_LOG(DBG_LEVEL_1,
171 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx\n",
172 regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
174 /* Reflect it back into the guest */
175 intr_fields = (INTR_INFO_VALID_MASK |
176 INTR_TYPE_EXCEPTION |
177 INTR_INFO_DELIEVER_CODE_MASK |
178 VECTOR_GP);
179 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
180 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
181 }
183 static void vmx_vmexit_do_cpuid(unsigned long input, struct xen_regs *regs)
184 {
185 int eax, ebx, ecx, edx;
186 unsigned long eip;
188 __vmread(GUEST_EIP, &eip);
190 VMX_DBG_LOG(DBG_LEVEL_1,
191 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
192 " (esi) %lx, (edi) %lx\n",
193 regs->eax, regs->ebx, regs->ecx, regs->edx,
194 regs->esi, regs->edi);
196 cpuid(input, &eax, &ebx, &ecx, &edx);
198 if (input == 1) {
199 clear_bit(X86_FEATURE_PSE, &edx);
200 clear_bit(X86_FEATURE_PAE, &edx);
201 clear_bit(X86_FEATURE_PSE36, &edx);
202 }
204 regs->eax = (unsigned long) eax;
205 regs->ebx = (unsigned long) ebx;
206 regs->ecx = (unsigned long) ecx;
207 regs->edx = (unsigned long) edx;
209 VMX_DBG_LOG(DBG_LEVEL_1,
210 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x\n",
211 eip, input, eax, ebx, ecx, edx);
213 }
215 #define CASE_GET_REG_P(REG, reg) \
216 case REG_ ## REG: reg_p = &(regs->reg); break
218 static void vmx_dr_access (unsigned long exit_qualification, struct xen_regs *regs)
219 {
220 unsigned int reg;
221 unsigned long *reg_p = 0;
222 struct exec_domain *ed = current;
223 unsigned long eip;
225 __vmread(GUEST_EIP, &eip);
227 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
229 VMX_DBG_LOG(DBG_LEVEL_1,
230 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx\n",
231 eip, reg, exit_qualification);
233 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
234 CASE_GET_REG_P(EAX, eax);
235 CASE_GET_REG_P(ECX, ecx);
236 CASE_GET_REG_P(EDX, edx);
237 CASE_GET_REG_P(EBX, ebx);
238 CASE_GET_REG_P(EBP, ebp);
239 CASE_GET_REG_P(ESI, esi);
240 CASE_GET_REG_P(EDI, edi);
241 case REG_ESP:
242 break;
243 default:
244 __vmx_bug(regs);
245 }
247 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
248 case TYPE_MOV_TO_DR:
249 /* don't need to check the range */
250 if (reg != REG_ESP)
251 ed->arch.debugreg[reg] = *reg_p;
252 else {
253 unsigned long value;
254 __vmread(GUEST_ESP, &value);
255 ed->arch.debugreg[reg] = value;
256 }
257 break;
258 case TYPE_MOV_FROM_DR:
259 if (reg != REG_ESP)
260 *reg_p = ed->arch.debugreg[reg];
261 else {
262 __vmwrite(GUEST_ESP, ed->arch.debugreg[reg]);
263 }
264 break;
265 }
266 }
268 /*
269 * Invalidate the TLB for va. Invalidate the shadow page corresponding
270 * the address va.
271 */
272 static void vmx_vmexit_do_invlpg(unsigned long va)
273 {
274 unsigned long eip;
275 struct exec_domain *ed = current;
276 unsigned int index;
278 __vmread(GUEST_EIP, &eip);
280 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg:eip=%p, va=%p\n",
281 eip, va);
283 /*
284 * We do the safest things first, then try to update the shadow
285 * copying from guest
286 */
287 vmx_shadow_invlpg(ed->domain, va);
288 index = (va >> L2_PAGETABLE_SHIFT);
289 ed->arch.guest_pl2e_cache[index] =
290 mk_l2_pgentry(0); /* invalidate pgd cache */
291 }
293 static inline void guest_pl2e_cache_invalidate(struct exec_domain *ed)
294 {
295 /*
296 * Need to optimize this
297 */
298 memset(ed->arch.guest_pl2e_cache, 0, PAGE_SIZE);
299 }
301 inline unsigned long gva_to_gpa(unsigned long gva)
302 {
303 unsigned long gpde, gpte, pfn, index;
304 struct exec_domain *ed = current;
306 __guest_get_l2e(ed, gva, &gpde);
307 index = (gva >> L2_PAGETABLE_SHIFT);
309 pfn = phys_to_machine_mapping(gpde >> PAGE_SHIFT);
311 ed->arch.guest_pl2e_cache[index] =
312 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
314 if ( unlikely(__get_user(gpte, (unsigned long *)
315 &linear_pg_table[gva >> PAGE_SHIFT])) )
316 {
317 printk("gva_to_gpa EXIT: read gpte faulted" );
318 return 0;
319 }
321 if ( !(gpte & _PAGE_PRESENT) )
322 {
323 printk("gva_to_gpa - EXIT: gpte not present (%lx)",gpte );
324 return 0;
325 }
327 return (gpte & PAGE_MASK) + (gva & ~PAGE_MASK);
328 }
330 static void vmx_io_instruction(struct xen_regs *regs,
331 unsigned long exit_qualification, unsigned long inst_len)
332 {
333 struct exec_domain *d = current;
334 vcpu_iodata_t *vio;
335 ioreq_t *p;
336 unsigned long addr;
337 unsigned long eip;
339 __vmread(GUEST_EIP, &eip);
341 VMX_DBG_LOG(DBG_LEVEL_1,
342 "vmx_io_instruction: eip=%p, exit_qualification = %lx\n",
343 eip, exit_qualification);
345 if (test_bit(6, &exit_qualification))
346 addr = (exit_qualification >> 16) & (0xffff);
347 else
348 addr = regs->edx & 0xffff;
350 if (addr == 0x80) {
351 __update_guest_eip(inst_len);
352 return;
353 }
355 vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
356 if (vio == 0) {
357 VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx\n", (unsigned long) vio);
358 domain_crash();
359 }
360 p = &vio->vp_ioreq;
361 p->dir = test_bit(3, &exit_qualification);
362 set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
364 p->pdata_valid = 0;
365 p->count = 1;
366 p->size = (exit_qualification & 7) + 1;
368 if (test_bit(4, &exit_qualification)) {
369 unsigned long eflags;
371 __vmread(GUEST_EFLAGS, &eflags);
372 p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
373 p->pdata_valid = 1;
374 p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
375 regs->esi
376 : regs->edi);
377 p->u.pdata = (void *) gva_to_gpa(p->u.data);
378 if (test_bit(5, &exit_qualification))
379 p->count = regs->ecx;
380 if ((p->u.data & PAGE_MASK) !=
381 ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
382 printk("stringio crosses page boundary!\n");
383 if (p->u.data & (p->size - 1)) {
384 printk("Not aligned I/O!\n");
385 domain_crash();
386 }
387 p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
388 } else {
389 __update_guest_eip(inst_len);
390 }
391 } else if (p->dir == IOREQ_WRITE) {
392 p->u.data = regs->eax;
393 __update_guest_eip(inst_len);
394 } else
395 __update_guest_eip(inst_len);
397 p->addr = addr;
398 p->port_mm = 0;
399 p->state = STATE_IOREQ_READY;
400 evtchn_send(IOPACKET_PORT);
401 do_block();
402 }
404 #define CASE_GET_REG(REG, reg) \
405 case REG_ ## REG: value = regs->reg; break
407 /*
408 * Write to control registers
409 */
410 static void mov_to_cr(int gp, int cr, struct xen_regs *regs)
411 {
412 unsigned long value;
413 unsigned long old_cr;
414 struct exec_domain *d = current;
416 switch (gp) {
417 CASE_GET_REG(EAX, eax);
418 CASE_GET_REG(ECX, ecx);
419 CASE_GET_REG(EDX, edx);
420 CASE_GET_REG(EBX, ebx);
421 CASE_GET_REG(EBP, ebp);
422 CASE_GET_REG(ESI, esi);
423 CASE_GET_REG(EDI, edi);
424 case REG_ESP:
425 __vmread(GUEST_ESP, &value);
426 break;
427 default:
428 printk("invalid gp: %d\n", gp);
429 __vmx_bug(regs);
430 }
432 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, \n", cr, value);
433 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx, \n", (unsigned long) current);
435 switch(cr) {
436 case 0:
437 {
438 unsigned long old_base_pfn = 0, pfn;
440 /*
441 * CR0:
442 * We don't want to lose PE and PG.
443 */
444 __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
445 __vmwrite(CR0_READ_SHADOW, value);
447 if (value & (X86_CR0_PE | X86_CR0_PG) &&
448 !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
449 /*
450 * Enable paging
451 */
452 set_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state);
453 /*
454 * The guest CR3 must be pointing to the guest physical.
455 */
456 if (!(pfn = phys_to_machine_mapping(
457 d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)))
458 {
459 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx\n",
460 d->arch.arch_vmx.cpu_cr3);
461 domain_crash(); /* need to take a clean path */
462 }
463 old_base_pfn = pagetable_val(d->arch.pagetable) >> PAGE_SHIFT;
464 /*
465 * Now mm.pagetable points to machine physical.
466 */
467 d->arch.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
469 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New mm.pagetable = %lx\n",
470 (unsigned long) (pfn << PAGE_SHIFT));
472 shadow_lock(d->domain);
473 shadow_mode_enable(d->domain, SHM_full_32);
474 shadow_unlock(d->domain);
476 __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
477 /*
478 * mm->shadow_table should hold the next CR3 for shadow
479 */
480 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, pfn = %lx\n",
481 d->arch.arch_vmx.cpu_cr3, pfn);
482 put_page_and_type(&frame_table[old_base_pfn]);
484 }
485 break;
486 }
487 case 3:
488 {
489 unsigned long pfn;
491 /*
492 * If paging is not enabled yet, simply copy the valut to CR3.
493 */
494 if (!test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
495 d->arch.arch_vmx.cpu_cr3 = value;
496 return;
497 }
499 guest_pl2e_cache_invalidate(d);
500 /*
501 * We make a new one if the shadow does not exist.
502 */
503 if (value == d->arch.arch_vmx.cpu_cr3) {
504 /*
505 * This is simple TLB flush, implying the guest has
506 * removed some translation or changed page attributes.
507 * We simply invalidate the shadow.
508 */
509 pfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
510 if ((pfn << PAGE_SHIFT) != pagetable_val(d->arch.pagetable))
511 __vmx_bug(regs);
512 vmx_shadow_clear_state(d->domain);
513 shadow_invalidate(d);
514 } else {
515 /*
516 * If different, make a shadow. Check if the PDBR is valid
517 * first.
518 */
519 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx\n", value);
520 if ((value >> PAGE_SHIFT) > d->domain->max_pages)
521 {
522 VMX_DBG_LOG(DBG_LEVEL_VMMU,
523 "Invalid CR3 value=%lx\n", value);
524 domain_crash(); /* need to take a clean path */
525 }
526 pfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
527 vmx_shadow_clear_state(d->domain);
528 d->arch.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
529 shadow_mk_pagetable(d);
530 /*
531 * mm->shadow_table should hold the next CR3 for shadow
532 */
533 d->arch.arch_vmx.cpu_cr3 = value;
534 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx\n",
535 value);
536 __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
537 }
538 break;
539 }
540 case 4:
541 /* CR4 */
542 if (value & X86_CR4_PAE)
543 __vmx_bug(regs); /* not implemented */
544 __vmread(CR4_READ_SHADOW, &old_cr);
546 __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
547 __vmwrite(CR4_READ_SHADOW, value);
549 /*
550 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
551 * all TLB entries except global entries.
552 */
553 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
554 vmx_shadow_clear_state(d->domain);
555 shadow_invalidate(d);
556 guest_pl2e_cache_invalidate(d);
557 }
558 break;
559 default:
560 printk("invalid cr: %d\n", gp);
561 __vmx_bug(regs);
562 }
563 }
565 #define CASE_SET_REG(REG, reg) \
566 case REG_ ## REG: \
567 regs->reg = value; \
568 break
570 /*
571 * Read from control registers. CR0 and CR4 are read from the shadow.
572 */
573 static void mov_from_cr(int cr, int gp, struct xen_regs *regs)
574 {
575 unsigned long value;
576 struct exec_domain *d = current;
578 if (cr != 3)
579 __vmx_bug(regs);
581 value = (unsigned long) d->arch.arch_vmx.cpu_cr3;
582 ASSERT(value);
584 switch (gp) {
585 CASE_SET_REG(EAX, eax);
586 CASE_SET_REG(ECX, ecx);
587 CASE_SET_REG(EDX, edx);
588 CASE_SET_REG(EBX, ebx);
589 CASE_SET_REG(EBP, ebp);
590 CASE_SET_REG(ESI, esi);
591 CASE_SET_REG(EDI, edi);
592 case REG_ESP:
593 __vmwrite(GUEST_ESP, value);
594 regs->esp = value;
595 break;
596 default:
597 printk("invalid gp: %d\n", gp);
598 __vmx_bug(regs);
599 }
601 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx, \n", cr, value);
602 }
604 static void vmx_cr_access (unsigned long exit_qualification, struct xen_regs *regs)
605 {
606 unsigned int gp, cr;
607 unsigned long value;
609 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
610 case TYPE_MOV_TO_CR:
611 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
612 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
613 mov_to_cr(gp, cr, regs);
614 break;
615 case TYPE_MOV_FROM_CR:
616 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
617 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
618 mov_from_cr(cr, gp, regs);
619 break;
620 case TYPE_CLTS:
621 __vmread(GUEST_CR0, &value);
622 value &= ~X86_CR0_TS; /* clear TS */
623 __vmwrite(GUEST_CR0, value);
625 __vmread(CR0_READ_SHADOW, &value);
626 value &= ~X86_CR0_TS; /* clear TS */
627 __vmwrite(CR0_READ_SHADOW, value);
628 break;
629 default:
630 __vmx_bug(regs);
631 break;
632 }
633 }
635 static inline void vmx_do_msr_read(struct xen_regs *regs)
636 {
637 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
638 regs->ecx, regs->eax, regs->edx);
640 rdmsr(regs->ecx, regs->eax, regs->edx);
642 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
643 "ecx=%lx, eax=%lx, edx=%lx",
644 regs->ecx, regs->eax, regs->edx);
645 }
647 /*
648 * Need to use this exit to rescheule
649 */
650 static inline void vmx_vmexit_do_hlt()
651 {
652 #if VMX_DEBUG
653 unsigned long eip;
654 __vmread(GUEST_EIP, &eip);
655 #endif
656 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%p\n", eip);
657 __enter_scheduler();
658 }
660 static inline void vmx_vmexit_do_mwait()
661 {
662 #if VMX_DEBUG
663 unsigned long eip;
664 __vmread(GUEST_EIP, &eip);
665 #endif
666 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%p\n", eip);
667 __enter_scheduler();
668 }
670 #define BUF_SIZ 256
671 #define MAX_LINE 80
672 char print_buf[BUF_SIZ];
673 static int index;
675 static void vmx_print_line(const char c, struct exec_domain *d)
676 {
678 if (index == MAX_LINE || c == '\n') {
679 if (index == MAX_LINE) {
680 print_buf[index++] = c;
681 }
682 print_buf[index] = '\0';
683 printk("(GUEST: %u) %s\n", d->domain->id, (char *) &print_buf);
684 index = 0;
685 }
686 else
687 print_buf[index++] = c;
688 }
690 #ifdef XEN_DEBUGGER
691 void save_xen_regs(struct xen_regs *regs)
692 {
693 __vmread(GUEST_SS_SELECTOR, &regs->xss);
694 __vmread(GUEST_ESP, &regs->esp);
695 __vmread(GUEST_EFLAGS, &regs->eflags);
696 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
697 __vmread(GUEST_EIP, &regs->eip);
699 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
700 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
701 __vmread(GUEST_ES_SELECTOR, &regs->xes);
702 __vmread(GUEST_DS_SELECTOR, &regs->xds);
703 }
705 void restore_xen_regs(struct xen_regs *regs)
706 {
707 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
708 __vmwrite(GUEST_ESP, regs->esp);
709 __vmwrite(GUEST_EFLAGS, regs->eflags);
710 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
711 __vmwrite(GUEST_EIP, regs->eip);
713 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
714 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
715 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
716 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
717 }
718 #endif
720 asmlinkage void vmx_vmexit_handler(struct xen_regs regs)
721 {
722 unsigned int exit_reason, idtv_info_field;
723 unsigned long exit_qualification, eip, inst_len = 0;
724 struct exec_domain *d = current;
725 int error;
727 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
728 __vmx_bug(&regs);
730 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
731 if (idtv_info_field & INTR_INFO_VALID_MASK) {
732 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
733 if ((idtv_info_field & 0xff) == 14) {
734 unsigned long error_code;
736 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
737 printk("#PG error code: %lx\n", error_code);
738 }
739 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x\n",
740 idtv_info_field);
741 }
743 /* don't bother H/W interrutps */
744 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
745 exit_reason != EXIT_REASON_VMCALL &&
746 exit_reason != EXIT_REASON_IO_INSTRUCTION)
747 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x\n", exit_reason);
749 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
750 __vmread(EXIT_QUALIFICATION, &exit_qualification);
751 __vmread(GUEST_EIP, &eip);
752 domain_crash();
753 return;
754 }
756 switch (exit_reason) {
757 case EXIT_REASON_EXCEPTION_NMI:
758 {
759 /*
760 * We don't set the software-interrupt exiting (INT n).
761 * (1) We can get an exception (e.g. #PG) in the guest, or
762 * (2) NMI
763 */
764 int error;
765 unsigned int vector;
766 unsigned long va;
767 unsigned long error_code;
769 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
770 && !(vector & INTR_INFO_VALID_MASK))
771 __vmx_bug(&regs);
772 vector &= 0xff;
774 switch (vector) {
775 #ifdef XEN_DEBUGGER
776 case VECTOR_DB:
777 {
778 save_xen_regs(&regs);
779 pdb_handle_exception(1, &regs, 1);
780 restore_xen_regs(&regs);
781 break;
782 }
783 case VECTOR_BP:
784 {
785 save_xen_regs(&regs);
786 pdb_handle_exception(3, &regs, 1);
787 restore_xen_regs(&regs);
788 break;
789 }
790 #endif
791 case VECTOR_GP:
792 {
793 vmx_do_general_protection_fault(&regs);
794 break;
795 }
796 case VECTOR_PG:
797 {
798 __vmread(EXIT_QUALIFICATION, &va);
799 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
800 VMX_DBG_LOG(DBG_LEVEL_VMMU,
801 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx\n",
802 regs.eax, regs.ebx, regs.ecx, regs.edx, regs.esi,
803 regs.edi);
804 d->arch.arch_vmx.vmx_platform.mpci.inst_decoder_regs = &regs;
806 if (!(error = vmx_do_page_fault(va, error_code))) {
807 /*
808 * Inject #PG using Interruption-Information Fields
809 */
810 unsigned long intr_fields;
812 intr_fields = (INTR_INFO_VALID_MASK |
813 INTR_TYPE_EXCEPTION |
814 INTR_INFO_DELIEVER_CODE_MASK |
815 VECTOR_PG);
816 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
817 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
818 d->arch.arch_vmx.cpu_cr2 = va;
819 }
820 break;
821 }
822 default:
823 __vmx_bug(&regs);
824 break;
825 }
826 break;
827 }
828 case EXIT_REASON_EXTERNAL_INTERRUPT:
829 {
830 extern int vector_irq[];
831 extern asmlinkage void do_IRQ(struct xen_regs);
832 extern void smp_apic_timer_interrupt(struct xen_regs *);
833 extern void timer_interrupt(int, void *, struct xen_regs *);
834 unsigned int vector;
836 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
837 && !(vector & INTR_INFO_VALID_MASK))
838 __vmx_bug(&regs);
840 vector &= 0xff;
841 local_irq_disable();
843 if (vector == LOCAL_TIMER_VECTOR) {
844 smp_apic_timer_interrupt(&regs);
845 } else {
846 regs.entry_vector = (vector == FIRST_DEVICE_VECTOR?
847 0 : vector_irq[vector]);
848 do_IRQ(regs);
849 }
850 break;
851 }
852 case EXIT_REASON_PENDING_INTERRUPT:
853 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
854 MONITOR_CPU_BASED_EXEC_CONTROLS);
855 vmx_intr_assist(d);
856 break;
857 case EXIT_REASON_TASK_SWITCH:
858 __vmx_bug(&regs);
859 break;
860 case EXIT_REASON_CPUID:
861 __get_instruction_length(inst_len);
862 vmx_vmexit_do_cpuid(regs.eax, &regs);
863 __update_guest_eip(inst_len);
864 break;
865 case EXIT_REASON_HLT:
866 __get_instruction_length(inst_len);
867 __update_guest_eip(inst_len);
868 vmx_vmexit_do_hlt();
869 break;
870 case EXIT_REASON_INVLPG:
871 {
872 unsigned long va;
874 __vmread(EXIT_QUALIFICATION, &va);
875 vmx_vmexit_do_invlpg(va);
876 __get_instruction_length(inst_len);
877 __update_guest_eip(inst_len);
878 break;
879 }
880 case EXIT_REASON_VMCALL:
881 __get_instruction_length(inst_len);
882 __vmread(GUEST_EIP, &eip);
883 __vmread(EXIT_QUALIFICATION, &exit_qualification);
885 vmx_print_line(regs.eax, d); /* provides the current domain */
886 __update_guest_eip(inst_len);
887 break;
888 case EXIT_REASON_CR_ACCESS:
889 {
890 __vmread(GUEST_EIP, &eip);
891 __get_instruction_length(inst_len);
892 __vmread(EXIT_QUALIFICATION, &exit_qualification);
894 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx\n",
895 eip, inst_len, exit_qualification);
896 vmx_cr_access(exit_qualification, &regs);
897 __update_guest_eip(inst_len);
898 break;
899 }
900 case EXIT_REASON_DR_ACCESS:
901 __vmread(EXIT_QUALIFICATION, &exit_qualification);
902 vmx_dr_access(exit_qualification, &regs);
903 __get_instruction_length(inst_len);
904 __update_guest_eip(inst_len);
905 break;
906 case EXIT_REASON_IO_INSTRUCTION:
907 __vmread(EXIT_QUALIFICATION, &exit_qualification);
908 __get_instruction_length(inst_len);
909 vmx_io_instruction(&regs, exit_qualification, inst_len);
910 break;
911 case EXIT_REASON_MSR_READ:
912 __get_instruction_length(inst_len);
913 vmx_do_msr_read(&regs);
914 __update_guest_eip(inst_len);
915 break;
916 case EXIT_REASON_MSR_WRITE:
917 __vmread(GUEST_EIP, &eip);
918 VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%p, eax=%p, edx=%p",
919 eip, regs.eax, regs.edx);
920 /* just ignore this point */
921 __get_instruction_length(inst_len);
922 __update_guest_eip(inst_len);
923 break;
924 case EXIT_REASON_MWAIT_INSTRUCTION:
925 __get_instruction_length(inst_len);
926 __update_guest_eip(inst_len);
927 vmx_vmexit_do_mwait();
928 break;
929 default:
930 __vmx_bug(&regs); /* should not happen */
931 }
933 vmx_intr_assist(d);
934 return;
935 }
937 asmlinkage void load_cr2(void)
938 {
939 struct exec_domain *d = current;
941 local_irq_disable();
942 asm volatile("movl %0,%%cr2": :"r" (d->arch.arch_vmx.cpu_cr2));
943 }
945 #endif /* CONFIG_VMX */