debuggers.hg

view xen/arch/x86/vmx.c @ 4629:6375127fdf23

bitkeeper revision 1.1311.1.1 (426641eeBv97w6sl983zxeR4Dc3Utg)

Cleanup page table handling. Add macros to access page table
entries, fixup plenty of places in the code to use the page
table types instead of "unsigned long".

Signed-off-by: Gerd Knorr <kraxel@bytesex.org>
Signed-off-by: michael.fetterman@cl.cam.ac.uk
author mafetter@fleming.research
date Wed Apr 20 11:50:06 2005 +0000 (2005-04-20)
parents aa21bd6f8677
children 1803018b3b05
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/softirq.h>
26 #include <asm/current.h>
27 #include <asm/io.h>
28 #include <asm/irq.h>
29 #include <asm/shadow.h>
30 #include <asm/regs.h>
31 #include <asm/cpufeature.h>
32 #include <asm/processor.h>
33 #include <asm/types.h>
34 #include <asm/msr.h>
35 #include <asm/spinlock.h>
36 #include <asm/vmx.h>
37 #include <asm/vmx_vmcs.h>
38 #include <asm/vmx_intercept.h>
39 #include <asm/shadow.h>
40 #include <public/io/ioreq.h>
42 #ifdef CONFIG_VMX
44 int vmcs_size;
45 unsigned int opt_vmx_debug_level = 0;
47 extern long evtchn_send(int lport);
48 extern long do_block(void);
49 void do_nmi(struct xen_regs *, unsigned long);
51 int start_vmx()
52 {
53 struct vmcs_struct *vmcs;
54 unsigned long ecx;
55 u64 phys_vmcs; /* debugging */
57 vmcs_size = VMCS_SIZE;
58 /*
59 * Xen does not fill x86_capability words except 0.
60 */
61 ecx = cpuid_ecx(1);
62 boot_cpu_data.x86_capability[4] = ecx;
64 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
65 return 0;
67 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
69 if (!(vmcs = alloc_vmcs())) {
70 printk("Failed to allocate VMCS\n");
71 return 0;
72 }
74 phys_vmcs = (u64) virt_to_phys(vmcs);
76 if (!(__vmxon(phys_vmcs))) {
77 printk("VMXON is done\n");
78 }
80 return 1;
81 }
83 void stop_vmx()
84 {
85 if (read_cr4() & X86_CR4_VMXE)
86 __vmxoff();
87 }
89 /*
90 * Not all cases recevie valid value in the VM-exit instruction length field.
91 */
92 #define __get_instruction_length(len) \
93 __vmread(INSTRUCTION_LEN, &(len)); \
94 if ((len) < 1 || (len) > 15) \
95 __vmx_bug(&regs);
97 static void inline __update_guest_eip(unsigned long inst_len)
98 {
99 unsigned long current_eip;
101 __vmread(GUEST_EIP, &current_eip);
102 __vmwrite(GUEST_EIP, current_eip + inst_len);
103 }
106 #include <asm/domain_page.h>
108 static int vmx_do_page_fault(unsigned long va, struct xen_regs *regs)
109 {
110 struct exec_domain *ed = current;
111 unsigned long eip;
112 l1_pgentry_t gpte;
113 unsigned long gpa; /* FIXME: PAE */
114 int result;
116 #if VMX_DEBUG
117 {
118 __vmread(GUEST_EIP, &eip);
119 VMX_DBG_LOG(DBG_LEVEL_VMMU,
120 "vmx_do_page_fault = 0x%lx, eip = %lx, error_code = %lx",
121 va, eip, regs->error_code);
122 }
123 #endif
125 /*
126 * If vpagetable is zero, then we are still emulating 1:1 page tables,
127 * and we should have never gotten here.
128 */
129 if ( !test_bit(VMX_CPU_STATE_PG_ENABLED, &ed->arch.arch_vmx.cpu_state) )
130 {
131 printk("vmx_do_page_fault while running on 1:1 page table\n");
132 return 0;
133 }
135 gpte = gva_to_gpte(va);
136 if (!(l1e_get_flags(gpte) & _PAGE_PRESENT) )
137 return 0;
138 gpa = l1e_get_phys(gpte) + (va & ~PAGE_MASK);
140 /* Use 1:1 page table to identify MMIO address space */
141 if (mmio_space(gpa))
142 handle_mmio(va, gpa);
144 result = shadow_fault(va, regs);
146 #if 0
147 if ( !result )
148 {
149 __vmread(GUEST_EIP, &eip);
150 printk("vmx pgfault to guest va=%p eip=%p\n", va, eip);
151 }
152 #endif
154 return result;
155 }
157 static void vmx_do_general_protection_fault(struct xen_regs *regs)
158 {
159 unsigned long eip, error_code;
160 unsigned long intr_fields;
162 __vmread(GUEST_EIP, &eip);
163 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
165 VMX_DBG_LOG(DBG_LEVEL_1,
166 "vmx_general_protection_fault: eip = %lx, erro_code = %lx",
167 eip, error_code);
169 VMX_DBG_LOG(DBG_LEVEL_1,
170 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
171 regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
173 /* Reflect it back into the guest */
174 intr_fields = (INTR_INFO_VALID_MASK |
175 INTR_TYPE_EXCEPTION |
176 INTR_INFO_DELIEVER_CODE_MASK |
177 TRAP_gp_fault);
178 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
179 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
180 }
182 static void vmx_vmexit_do_cpuid(unsigned long input, struct xen_regs *regs)
183 {
184 unsigned int eax, ebx, ecx, edx;
185 unsigned long eip;
187 __vmread(GUEST_EIP, &eip);
189 VMX_DBG_LOG(DBG_LEVEL_1,
190 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
191 " (esi) %lx, (edi) %lx",
192 regs->eax, regs->ebx, regs->ecx, regs->edx,
193 regs->esi, regs->edi);
195 cpuid(input, &eax, &ebx, &ecx, &edx);
197 if (input == 1) {
198 clear_bit(X86_FEATURE_PSE, &edx);
199 clear_bit(X86_FEATURE_PAE, &edx);
200 clear_bit(X86_FEATURE_PSE36, &edx);
201 }
203 regs->eax = (unsigned long) eax;
204 regs->ebx = (unsigned long) ebx;
205 regs->ecx = (unsigned long) ecx;
206 regs->edx = (unsigned long) edx;
208 VMX_DBG_LOG(DBG_LEVEL_1,
209 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x",
210 eip, input, eax, ebx, ecx, edx);
212 }
214 #define CASE_GET_REG_P(REG, reg) \
215 case REG_ ## REG: reg_p = (unsigned long *)&(regs->reg); break
217 static void vmx_dr_access (unsigned long exit_qualification, struct xen_regs *regs)
218 {
219 unsigned int reg;
220 unsigned long *reg_p = 0;
221 struct exec_domain *ed = current;
222 unsigned long eip;
224 __vmread(GUEST_EIP, &eip);
226 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
228 VMX_DBG_LOG(DBG_LEVEL_1,
229 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx",
230 eip, reg, exit_qualification);
232 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
233 CASE_GET_REG_P(EAX, eax);
234 CASE_GET_REG_P(ECX, ecx);
235 CASE_GET_REG_P(EDX, edx);
236 CASE_GET_REG_P(EBX, ebx);
237 CASE_GET_REG_P(EBP, ebp);
238 CASE_GET_REG_P(ESI, esi);
239 CASE_GET_REG_P(EDI, edi);
240 case REG_ESP:
241 break;
242 default:
243 __vmx_bug(regs);
244 }
246 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
247 case TYPE_MOV_TO_DR:
248 /* don't need to check the range */
249 if (reg != REG_ESP)
250 ed->arch.debugreg[reg] = *reg_p;
251 else {
252 unsigned long value;
253 __vmread(GUEST_ESP, &value);
254 ed->arch.debugreg[reg] = value;
255 }
256 break;
257 case TYPE_MOV_FROM_DR:
258 if (reg != REG_ESP)
259 *reg_p = ed->arch.debugreg[reg];
260 else {
261 __vmwrite(GUEST_ESP, ed->arch.debugreg[reg]);
262 }
263 break;
264 }
265 }
267 /*
268 * Invalidate the TLB for va. Invalidate the shadow page corresponding
269 * the address va.
270 */
271 static void vmx_vmexit_do_invlpg(unsigned long va)
272 {
273 unsigned long eip;
274 struct exec_domain *ed = current;
276 __vmread(GUEST_EIP, &eip);
278 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg: eip=%p, va=%p",
279 eip, va);
281 /*
282 * We do the safest things first, then try to update the shadow
283 * copying from guest
284 */
285 shadow_invlpg(ed, va);
286 }
288 static void vmx_io_instruction(struct xen_regs *regs,
289 unsigned long exit_qualification, unsigned long inst_len)
290 {
291 struct exec_domain *d = current;
292 vcpu_iodata_t *vio;
293 ioreq_t *p;
294 unsigned long addr;
295 unsigned long eip, cs, eflags;
296 int vm86;
298 __vmread(GUEST_EIP, &eip);
299 __vmread(GUEST_CS_SELECTOR, &cs);
300 __vmread(GUEST_EFLAGS, &eflags);
301 vm86 = eflags & X86_EFLAGS_VM ? 1 : 0;
303 VMX_DBG_LOG(DBG_LEVEL_1,
304 "vmx_io_instruction: vm86 %d, eip=%p:%p, exit_qualification = %lx",
305 vm86, cs, eip, exit_qualification);
307 if (test_bit(6, &exit_qualification))
308 addr = (exit_qualification >> 16) & (0xffff);
309 else
310 addr = regs->edx & 0xffff;
312 if (addr == 0x80) {
313 __update_guest_eip(inst_len);
314 return;
315 }
317 vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
318 if (vio == 0) {
319 VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx", (unsigned long) vio);
320 domain_crash_synchronous();
321 }
322 p = &vio->vp_ioreq;
323 p->dir = test_bit(3, &exit_qualification);
325 p->pdata_valid = 0;
326 p->count = 1;
327 p->size = (exit_qualification & 7) + 1;
329 if (test_bit(4, &exit_qualification)) {
330 p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
331 p->pdata_valid = 1;
333 if (vm86) {
334 unsigned long seg;
335 if (p->dir == IOREQ_WRITE) {
336 __vmread(GUEST_DS_SELECTOR, &seg);
337 p->u.pdata = (void *)
338 ((seg << 4) | (regs->esi & 0xFFFF));
339 } else {
340 __vmread(GUEST_ES_SELECTOR, &seg);
341 p->u.pdata = (void *)
342 ((seg << 4) | (regs->edi & 0xFFFF));
343 }
344 } else {
345 p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
346 regs->esi : regs->edi);
347 }
348 p->u.pdata = (void *) gva_to_gpa(p->u.data);
351 if (test_bit(5, &exit_qualification))
352 p->count = vm86 ? regs->ecx & 0xFFFF : regs->ecx;
353 if ((p->u.data & PAGE_MASK) !=
354 ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
355 printk("stringio crosses page boundary!\n");
356 if (p->u.data & (p->size - 1)) {
357 printk("Not aligned I/O!\n");
358 domain_crash_synchronous();
359 }
360 p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
361 } else {
362 __update_guest_eip(inst_len);
363 }
364 } else if (p->dir == IOREQ_WRITE) {
365 p->u.data = regs->eax;
366 __update_guest_eip(inst_len);
367 } else
368 __update_guest_eip(inst_len);
370 p->addr = addr;
371 p->port_mm = 0;
373 /* Check if the packet needs to be intercepted */
374 if (vmx_io_intercept(p)) {
375 /* no blocking & no evtchn notification */
376 return;
377 }
379 set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
380 p->state = STATE_IOREQ_READY;
381 evtchn_send(IOPACKET_PORT);
382 do_block();
383 }
385 static int
386 vm86assist(struct exec_domain *d)
387 {
388 /* stay tuned ... */
389 return 0;
390 }
392 #define CASE_GET_REG(REG, reg) \
393 case REG_ ## REG: value = regs->reg; break
395 /*
396 * Write to control registers
397 */
398 static int mov_to_cr(int gp, int cr, struct xen_regs *regs)
399 {
400 unsigned long value;
401 unsigned long old_cr;
402 struct exec_domain *d = current;
404 switch (gp) {
405 CASE_GET_REG(EAX, eax);
406 CASE_GET_REG(ECX, ecx);
407 CASE_GET_REG(EDX, edx);
408 CASE_GET_REG(EBX, ebx);
409 CASE_GET_REG(EBP, ebp);
410 CASE_GET_REG(ESI, esi);
411 CASE_GET_REG(EDI, edi);
412 case REG_ESP:
413 __vmread(GUEST_ESP, &value);
414 break;
415 default:
416 printk("invalid gp: %d\n", gp);
417 __vmx_bug(regs);
418 }
420 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx,", cr, value);
421 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx,", (unsigned long) current);
423 switch(cr) {
424 case 0:
425 {
426 unsigned long old_base_mfn = 0, mfn;
428 /*
429 * CR0:
430 * We don't want to lose PE and PG.
431 */
432 __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
433 __vmwrite(CR0_READ_SHADOW, value);
435 if (value & (X86_CR0_PE | X86_CR0_PG) &&
436 !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
437 /*
438 * Enable paging
439 */
440 set_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state);
441 /*
442 * The guest CR3 must be pointing to the guest physical.
443 */
444 if (!VALID_MFN(mfn = phys_to_machine_mapping(
445 d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)))
446 {
447 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx",
448 d->arch.arch_vmx.cpu_cr3);
449 domain_crash_synchronous(); /* need to take a clean path */
450 }
451 old_base_mfn = pagetable_val(d->arch.guest_table) >> PAGE_SHIFT;
453 /*
454 * Now arch.guest_table points to machine physical.
455 */
456 d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
457 update_pagetables(d);
459 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New arch.guest_table = %lx",
460 (unsigned long) (mfn << PAGE_SHIFT));
462 __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
463 /*
464 * arch->shadow_table should hold the next CR3 for shadow
465 */
466 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, mfn = %lx",
467 d->arch.arch_vmx.cpu_cr3, mfn);
468 /* undo the get_page done in the para virt case */
469 put_page_and_type(&frame_table[old_base_mfn]);
470 } else {
471 if ((value & X86_CR0_PE) == 0) {
472 unsigned long eip;
474 __vmread(GUEST_EIP, &eip);
475 VMX_DBG_LOG(DBG_LEVEL_1,
476 "Disabling CR0.PE at %%eip 0x%lx", eip);
477 if (vm86assist(d)) {
478 __vmread(GUEST_EIP, &eip);
479 VMX_DBG_LOG(DBG_LEVEL_1,
480 "Transfering control to vm86assist %%eip 0x%lx", eip);
481 return 0; /* do not update eip! */
482 }
483 }
484 }
485 break;
486 }
487 case 3:
488 {
489 unsigned long mfn;
491 /*
492 * If paging is not enabled yet, simply copy the value to CR3.
493 */
494 if (!test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
495 d->arch.arch_vmx.cpu_cr3 = value;
496 break;
497 }
499 /*
500 * We make a new one if the shadow does not exist.
501 */
502 if (value == d->arch.arch_vmx.cpu_cr3) {
503 /*
504 * This is simple TLB flush, implying the guest has
505 * removed some translation or changed page attributes.
506 * We simply invalidate the shadow.
507 */
508 mfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
509 if ((mfn << PAGE_SHIFT) != pagetable_val(d->arch.guest_table))
510 __vmx_bug(regs);
511 shadow_sync_all(d->domain);
512 } else {
513 /*
514 * If different, make a shadow. Check if the PDBR is valid
515 * first.
516 */
517 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx", value);
518 if ((value >> PAGE_SHIFT) > d->domain->max_pages)
519 {
520 VMX_DBG_LOG(DBG_LEVEL_VMMU,
521 "Invalid CR3 value=%lx", value);
522 domain_crash_synchronous(); /* need to take a clean path */
523 }
524 mfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
525 d->arch.guest_table = mk_pagetable(mfn << PAGE_SHIFT);
526 update_pagetables(d);
527 /*
528 * arch.shadow_table should now hold the next CR3 for shadow
529 */
530 d->arch.arch_vmx.cpu_cr3 = value;
531 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx",
532 value);
533 __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
534 }
535 break;
536 }
537 case 4:
538 /* CR4 */
539 if (value & X86_CR4_PAE)
540 __vmx_bug(regs); /* not implemented */
541 __vmread(CR4_READ_SHADOW, &old_cr);
543 __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
544 __vmwrite(CR4_READ_SHADOW, value);
546 /*
547 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
548 * all TLB entries except global entries.
549 */
550 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
551 vmx_shadow_clear_state(d->domain);
552 }
553 break;
554 default:
555 printk("invalid cr: %d\n", gp);
556 __vmx_bug(regs);
557 }
559 return 1;
560 }
562 #define CASE_SET_REG(REG, reg) \
563 case REG_ ## REG: \
564 regs->reg = value; \
565 break
567 /*
568 * Read from control registers. CR0 and CR4 are read from the shadow.
569 */
570 static void mov_from_cr(int cr, int gp, struct xen_regs *regs)
571 {
572 unsigned long value;
573 struct exec_domain *d = current;
575 if (cr != 3)
576 __vmx_bug(regs);
578 value = (unsigned long) d->arch.arch_vmx.cpu_cr3;
579 ASSERT(value);
581 switch (gp) {
582 CASE_SET_REG(EAX, eax);
583 CASE_SET_REG(ECX, ecx);
584 CASE_SET_REG(EDX, edx);
585 CASE_SET_REG(EBX, ebx);
586 CASE_SET_REG(EBP, ebp);
587 CASE_SET_REG(ESI, esi);
588 CASE_SET_REG(EDI, edi);
589 case REG_ESP:
590 __vmwrite(GUEST_ESP, value);
591 regs->esp = value;
592 break;
593 default:
594 printk("invalid gp: %d\n", gp);
595 __vmx_bug(regs);
596 }
598 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx,", cr, value);
599 }
601 static int vmx_cr_access(unsigned long exit_qualification, struct xen_regs *regs)
602 {
603 unsigned int gp, cr;
604 unsigned long value;
606 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
607 case TYPE_MOV_TO_CR:
608 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
609 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
610 return mov_to_cr(gp, cr, regs);
611 case TYPE_MOV_FROM_CR:
612 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
613 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
614 mov_from_cr(cr, gp, regs);
615 break;
616 case TYPE_CLTS:
617 __vmread(GUEST_CR0, &value);
618 value &= ~X86_CR0_TS; /* clear TS */
619 __vmwrite(GUEST_CR0, value);
621 __vmread(CR0_READ_SHADOW, &value);
622 value &= ~X86_CR0_TS; /* clear TS */
623 __vmwrite(CR0_READ_SHADOW, value);
624 break;
625 default:
626 __vmx_bug(regs);
627 break;
628 }
629 return 1;
630 }
632 static inline void vmx_do_msr_read(struct xen_regs *regs)
633 {
634 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
635 regs->ecx, regs->eax, regs->edx);
637 rdmsr(regs->ecx, regs->eax, regs->edx);
639 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
640 "ecx=%lx, eax=%lx, edx=%lx",
641 regs->ecx, regs->eax, regs->edx);
642 }
644 /*
645 * Need to use this exit to reschedule
646 */
647 static inline void vmx_vmexit_do_hlt(void)
648 {
649 #if VMX_DEBUG
650 unsigned long eip;
651 __vmread(GUEST_EIP, &eip);
652 #endif
653 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%p", eip);
654 raise_softirq(SCHEDULE_SOFTIRQ);
655 }
657 static inline void vmx_vmexit_do_mwait(void)
658 {
659 #if VMX_DEBUG
660 unsigned long eip;
661 __vmread(GUEST_EIP, &eip);
662 #endif
663 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%p", eip);
664 raise_softirq(SCHEDULE_SOFTIRQ);
665 }
667 #define BUF_SIZ 256
668 #define MAX_LINE 80
669 char print_buf[BUF_SIZ];
670 static int index;
672 static void vmx_print_line(const char c, struct exec_domain *d)
673 {
675 if (index == MAX_LINE || c == '\n') {
676 if (index == MAX_LINE) {
677 print_buf[index++] = c;
678 }
679 print_buf[index] = '\0';
680 printk("(GUEST: %u) %s\n", d->domain->id, (char *) &print_buf);
681 index = 0;
682 }
683 else
684 print_buf[index++] = c;
685 }
687 void save_vmx_execution_context(execution_context_t *ctxt)
688 {
689 __vmread(GUEST_SS_SELECTOR, &ctxt->ss);
690 __vmread(GUEST_ESP, &ctxt->esp);
691 __vmread(GUEST_EFLAGS, &ctxt->eflags);
692 __vmread(GUEST_CS_SELECTOR, &ctxt->cs);
693 __vmread(GUEST_EIP, &ctxt->eip);
695 __vmread(GUEST_GS_SELECTOR, &ctxt->gs);
696 __vmread(GUEST_FS_SELECTOR, &ctxt->fs);
697 __vmread(GUEST_ES_SELECTOR, &ctxt->es);
698 __vmread(GUEST_DS_SELECTOR, &ctxt->ds);
699 }
701 #ifdef XEN_DEBUGGER
702 void save_xen_regs(struct xen_regs *regs)
703 {
704 __vmread(GUEST_SS_SELECTOR, &regs->xss);
705 __vmread(GUEST_ESP, &regs->esp);
706 __vmread(GUEST_EFLAGS, &regs->eflags);
707 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
708 __vmread(GUEST_EIP, &regs->eip);
710 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
711 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
712 __vmread(GUEST_ES_SELECTOR, &regs->xes);
713 __vmread(GUEST_DS_SELECTOR, &regs->xds);
714 }
716 void restore_xen_regs(struct xen_regs *regs)
717 {
718 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
719 __vmwrite(GUEST_ESP, regs->esp);
720 __vmwrite(GUEST_EFLAGS, regs->eflags);
721 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
722 __vmwrite(GUEST_EIP, regs->eip);
724 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
725 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
726 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
727 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
728 }
729 #endif
731 asmlinkage void vmx_vmexit_handler(struct xen_regs regs)
732 {
733 unsigned int exit_reason, idtv_info_field;
734 unsigned long exit_qualification, eip, inst_len = 0;
735 struct exec_domain *ed = current;
736 int error;
738 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
739 __vmx_bug(&regs);
741 perfc_incra(vmexits, exit_reason);
743 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
744 if (idtv_info_field & INTR_INFO_VALID_MASK) {
745 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
746 if ((idtv_info_field & 0xff) == 14) {
747 unsigned long error_code;
749 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
750 printk("#PG error code: %lx\n", error_code);
751 }
752 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x",
753 idtv_info_field);
754 }
756 /* don't bother H/W interrutps */
757 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
758 exit_reason != EXIT_REASON_VMCALL &&
759 exit_reason != EXIT_REASON_IO_INSTRUCTION)
760 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x", exit_reason);
762 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
763 domain_crash_synchronous();
764 return;
765 }
767 __vmread(GUEST_EIP, &eip);
768 TRACE_3D(TRC_VMX_VMEXIT, ed->domain->id, eip, exit_reason);
770 switch (exit_reason) {
771 case EXIT_REASON_EXCEPTION_NMI:
772 {
773 /*
774 * We don't set the software-interrupt exiting (INT n).
775 * (1) We can get an exception (e.g. #PG) in the guest, or
776 * (2) NMI
777 */
778 int error;
779 unsigned int vector;
780 unsigned long va;
782 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
783 && !(vector & INTR_INFO_VALID_MASK))
784 __vmx_bug(&regs);
785 vector &= 0xff;
787 perfc_incra(cause_vector, vector);
789 TRACE_3D(TRC_VMX_VECTOR, ed->domain->id, eip, vector);
790 switch (vector) {
791 #ifdef XEN_DEBUGGER
792 case TRAP_debug:
793 {
794 save_xen_regs(&regs);
795 pdb_handle_exception(1, &regs, 1);
796 restore_xen_regs(&regs);
797 break;
798 }
799 case TRAP_int3:
800 {
801 save_xen_regs(&regs);
802 pdb_handle_exception(3, &regs, 1);
803 restore_xen_regs(&regs);
804 break;
805 }
806 #endif
807 case TRAP_gp_fault:
808 {
809 vmx_do_general_protection_fault(&regs);
810 break;
811 }
812 case TRAP_page_fault:
813 {
814 __vmread(EXIT_QUALIFICATION, &va);
815 __vmread(VM_EXIT_INTR_ERROR_CODE, &regs.error_code);
816 VMX_DBG_LOG(DBG_LEVEL_VMMU,
817 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
818 regs.eax, regs.ebx, regs.ecx, regs.edx, regs.esi,
819 regs.edi);
820 ed->arch.arch_vmx.vmx_platform.mpci.inst_decoder_regs = &regs;
822 if (!(error = vmx_do_page_fault(va, &regs))) {
823 /*
824 * Inject #PG using Interruption-Information Fields
825 */
826 unsigned long intr_fields;
828 intr_fields = (INTR_INFO_VALID_MASK |
829 INTR_TYPE_EXCEPTION |
830 INTR_INFO_DELIEVER_CODE_MASK |
831 TRAP_page_fault);
832 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
833 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, regs.error_code);
834 ed->arch.arch_vmx.cpu_cr2 = va;
835 TRACE_3D(TRC_VMX_INT, ed->domain->id, TRAP_page_fault, va);
836 }
837 break;
838 }
839 case TRAP_nmi:
840 do_nmi(&regs, 0);
841 break;
842 default:
843 printk("unexpected VMexit for exception vector 0x%x\n", vector);
844 //__vmx_bug(&regs);
845 break;
846 }
847 break;
848 }
849 case EXIT_REASON_EXTERNAL_INTERRUPT:
850 {
851 extern int vector_irq[];
852 extern asmlinkage void do_IRQ(struct xen_regs *);
853 extern void smp_apic_timer_interrupt(struct xen_regs *);
854 extern void timer_interrupt(int, void *, struct xen_regs *);
855 unsigned int vector;
857 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
858 && !(vector & INTR_INFO_VALID_MASK))
859 __vmx_bug(&regs);
861 vector &= 0xff;
862 local_irq_disable();
864 if (vector == LOCAL_TIMER_VECTOR) {
865 smp_apic_timer_interrupt(&regs);
866 } else {
867 regs.entry_vector = (vector == FIRST_DEVICE_VECTOR?
868 0 : vector_irq[vector]);
869 do_IRQ(&regs);
870 }
871 break;
872 }
873 case EXIT_REASON_PENDING_INTERRUPT:
874 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
875 MONITOR_CPU_BASED_EXEC_CONTROLS);
876 vmx_intr_assist(ed);
877 break;
878 case EXIT_REASON_TASK_SWITCH:
879 __vmx_bug(&regs);
880 break;
881 case EXIT_REASON_CPUID:
882 __get_instruction_length(inst_len);
883 vmx_vmexit_do_cpuid(regs.eax, &regs);
884 __update_guest_eip(inst_len);
885 break;
886 case EXIT_REASON_HLT:
887 __get_instruction_length(inst_len);
888 __update_guest_eip(inst_len);
889 vmx_vmexit_do_hlt();
890 break;
891 case EXIT_REASON_INVLPG:
892 {
893 unsigned long va;
895 __vmread(EXIT_QUALIFICATION, &va);
896 vmx_vmexit_do_invlpg(va);
897 __get_instruction_length(inst_len);
898 __update_guest_eip(inst_len);
899 break;
900 }
901 case EXIT_REASON_VMCALL:
902 __get_instruction_length(inst_len);
903 __vmread(GUEST_EIP, &eip);
904 __vmread(EXIT_QUALIFICATION, &exit_qualification);
906 vmx_print_line(regs.eax, ed); /* provides the current domain */
907 __update_guest_eip(inst_len);
908 break;
909 case EXIT_REASON_CR_ACCESS:
910 {
911 __vmread(GUEST_EIP, &eip);
912 __get_instruction_length(inst_len);
913 __vmread(EXIT_QUALIFICATION, &exit_qualification);
915 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx",
916 eip, inst_len, exit_qualification);
917 if (vmx_cr_access(exit_qualification, &regs))
918 __update_guest_eip(inst_len);
919 break;
920 }
921 case EXIT_REASON_DR_ACCESS:
922 __vmread(EXIT_QUALIFICATION, &exit_qualification);
923 vmx_dr_access(exit_qualification, &regs);
924 __get_instruction_length(inst_len);
925 __update_guest_eip(inst_len);
926 break;
927 case EXIT_REASON_IO_INSTRUCTION:
928 __vmread(EXIT_QUALIFICATION, &exit_qualification);
929 __get_instruction_length(inst_len);
930 vmx_io_instruction(&regs, exit_qualification, inst_len);
931 break;
932 case EXIT_REASON_MSR_READ:
933 __get_instruction_length(inst_len);
934 vmx_do_msr_read(&regs);
935 __update_guest_eip(inst_len);
936 break;
937 case EXIT_REASON_MSR_WRITE:
938 __vmread(GUEST_EIP, &eip);
939 VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%p, eax=%p, edx=%p",
940 eip, regs.eax, regs.edx);
941 /* just ignore this point */
942 __get_instruction_length(inst_len);
943 __update_guest_eip(inst_len);
944 break;
945 case EXIT_REASON_MWAIT_INSTRUCTION:
946 __get_instruction_length(inst_len);
947 __update_guest_eip(inst_len);
948 vmx_vmexit_do_mwait();
949 break;
950 default:
951 __vmx_bug(&regs); /* should not happen */
952 }
954 vmx_intr_assist(ed);
955 return;
956 }
958 asmlinkage void load_cr2(void)
959 {
960 struct exec_domain *d = current;
962 local_irq_disable();
963 asm volatile("movl %0,%%cr2": :"r" (d->arch.arch_vmx.cpu_cr2));
964 }
966 #endif /* CONFIG_VMX */
968 /*
969 * Local variables:
970 * mode: C
971 * c-set-style: "BSD"
972 * c-basic-offset: 4
973 * tab-width: 4
974 * indent-tabs-mode: nil
975 * End:
976 */