debuggers.hg

view xen/arch/x86/vmx.c @ 3607:cd26f113b1b1

bitkeeper revision 1.1159.231.12 (41f97ef6r1c2TDcgR-o8jFV1IWm5dA)

Lean decoder for MMIO instructions.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Chengyuan Li <chengyuan.li@intel.com>
Signed-off-by: ian.pratt@cl.cam.ac.uk
author iap10@labyrinth.cl.cam.ac.uk
date Thu Jan 27 23:53:26 2005 +0000 (2005-01-27)
parents 002034af24e6
children bc0fbb38cb25 d9cdcc864e90
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/sched.h>
24 #include <asm/current.h>
25 #include <asm/io.h>
26 #include <asm/irq.h>
27 #include <asm/shadow.h>
28 #include <asm/regs.h>
29 #include <asm/cpufeature.h>
30 #include <asm/processor.h>
31 #include <asm/types.h>
32 #include <asm/msr.h>
33 #include <asm/spinlock.h>
34 #include <asm/vmx.h>
35 #include <asm/vmx_vmcs.h>
36 #include <public/io/ioreq.h>
38 int vmcs_size;
39 unsigned int opt_vmx_debug_level;
41 extern long evtchn_send(int lport);
42 extern long do_block(void);
44 #define VECTOR_DB 1
45 #define VECTOR_BP 3
46 #define VECTOR_GP 13
47 #define VECTOR_PG 14
49 int start_vmx()
50 {
51 struct vmcs_struct *vmcs;
52 unsigned long ecx;
53 u64 phys_vmcs; /* debugging */
55 vmcs_size = VMCS_SIZE;
56 /*
57 * Xen does not fill x86_capability words except 0.
58 */
59 ecx = cpuid_ecx(1);
60 boot_cpu_data.x86_capability[4] = ecx;
62 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
63 return 0;
65 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
67 if (!(vmcs = alloc_vmcs())) {
68 printk("Failed to allocate VMCS\n");
69 return 0;
70 }
72 phys_vmcs = (u64) virt_to_phys(vmcs);
74 if (!(__vmxon(phys_vmcs))) {
75 printk("VMXON is done\n");
76 }
78 return 1;
79 }
81 void stop_vmx()
82 {
83 if (read_cr4() & X86_CR4_VMXE)
84 __vmxoff();
85 }
87 /*
88 * Not all cases recevie valid value in the VM-exit instruction length field.
89 */
90 #define __get_instruction_length(len) \
91 __vmread(INSTRUCTION_LEN, &(len)); \
92 if ((len) < 1 || (len) > 15) \
93 __vmx_bug(&regs);
95 static void inline __update_guest_eip(unsigned long inst_len)
96 {
97 unsigned long current_eip;
99 __vmread(GUEST_EIP, &current_eip);
100 __vmwrite(GUEST_EIP, current_eip + inst_len);
101 }
104 #include <asm/domain_page.h>
106 static int vmx_do_page_fault(unsigned long va, unsigned long error_code)
107 {
108 unsigned long eip, pfn;
109 unsigned int index;
110 unsigned long gpde = 0, gpte, gpa;
111 int result;
112 struct exec_domain *ed = current;
113 struct mm_struct *m = &ed->mm;
115 #if VMX_DEBUG
116 {
117 __vmread(GUEST_EIP, &eip);
118 VMX_DBG_LOG(DBG_LEVEL_VMMU,
119 "vmx_do_page_fault = 0x%lx, eip = %lx, erro_code = %lx\n",
120 va, eip, error_code);
121 }
122 #endif
123 /*
124 * Set up guest page directory cache to make linear_pt_table[] work.
125 */
126 __guest_get_pl2e(m, va, &gpde);
127 if (!(gpde & _PAGE_PRESENT))
128 return 0;
130 index = (va >> L2_PAGETABLE_SHIFT);
131 if (!l2_pgentry_val(m->guest_pl2e_cache[index])) {
132 pfn = phys_to_machine_mapping[gpde >> PAGE_SHIFT];
134 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_do_page_fault: pagetable = %lx\n",
135 pagetable_val(m->pagetable));
137 m->guest_pl2e_cache[index] =
138 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
139 }
141 if (unlikely(__get_user(gpte, (unsigned long *)
142 &linear_pg_table[va >> PAGE_SHIFT])))
143 return 0;
145 gpa = (gpte & PAGE_MASK) | (va & (PAGE_SIZE - 1));
147 if (mmio_space(gpa))
148 handle_mmio(va, gpte, gpa);
150 if ((result = shadow_fault(va, error_code)))
151 return result;
153 return 0; /* failed to resolve, i.e raise #PG */
154 }
156 static void vmx_do_general_protection_fault(struct xen_regs *regs)
157 {
158 unsigned long eip, error_code;
159 unsigned long intr_fields;
161 __vmread(GUEST_EIP, &eip);
162 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
164 VMX_DBG_LOG(DBG_LEVEL_1,
165 "vmx_general_protection_fault: eip = %lx, erro_code = %lx\n",
166 eip, error_code);
168 VMX_DBG_LOG(DBG_LEVEL_1,
169 "eax=%x, ebx=%x, ecx=%x, edx=%x, esi=%x, edi=%x\n",
170 regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
172 /* Reflect it back into the guest */
173 intr_fields = (INTR_INFO_VALID_MASK |
174 INTR_TYPE_EXCEPTION |
175 INTR_INFO_DELIEVER_CODE_MASK |
176 VECTOR_GP);
177 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
178 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
179 }
181 static void vmx_vmexit_do_cpuid(unsigned long input, struct xen_regs *regs)
182 {
183 int eax, ebx, ecx, edx;
184 unsigned long eip;
186 __vmread(GUEST_EIP, &eip);
188 VMX_DBG_LOG(DBG_LEVEL_1,
189 "do_cpuid: (eax) %x, (ebx) %x, (ecx) %x, (edx) %x, (esi) %x, (edi) %x\n", regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
191 cpuid(input, &eax, &ebx, &ecx, &edx);
193 if (input == 1) {
194 clear_bit(X86_FEATURE_PSE, &edx);
195 clear_bit(X86_FEATURE_PAE, &edx);
196 clear_bit(X86_FEATURE_PSE36, &edx);
197 }
199 regs->eax = (unsigned long) eax;
200 regs->ebx = (unsigned long) ebx;
201 regs->ecx = (unsigned long) ecx;
202 regs->edx = (unsigned long) edx;
204 VMX_DBG_LOG(DBG_LEVEL_1,
205 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x\n",
206 eip, input, eax, ebx, ecx, edx);
208 }
210 #define CASE_GET_REG_P(REG, reg) \
211 case REG_ ## REG: reg_p = &(regs->reg); break
213 static void vmx_dr_access (unsigned long exit_qualification, struct xen_regs *regs)
214 {
215 unsigned int reg;
216 u32 *reg_p = 0;
217 struct exec_domain *ed = current;
218 u32 eip;
220 __vmread(GUEST_EIP, &eip);
222 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
224 VMX_DBG_LOG(DBG_LEVEL_1,
225 "vmx_dr_access : eip=%08x, reg=%d, exit_qualification = %lx\n",
226 eip, reg, exit_qualification);
228 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
229 CASE_GET_REG_P(EAX, eax);
230 CASE_GET_REG_P(ECX, ecx);
231 CASE_GET_REG_P(EDX, edx);
232 CASE_GET_REG_P(EBX, ebx);
233 CASE_GET_REG_P(EBP, ebp);
234 CASE_GET_REG_P(ESI, esi);
235 CASE_GET_REG_P(EDI, edi);
236 case REG_ESP:
237 break;
238 default:
239 __vmx_bug(regs);
240 }
242 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
243 case TYPE_MOV_TO_DR:
244 /* don't need to check the range */
245 if (reg != REG_ESP)
246 ed->thread.debugreg[reg] = *reg_p;
247 else {
248 unsigned long value;
249 __vmread(GUEST_ESP, &value);
250 ed->thread.debugreg[reg] = value;
251 }
252 break;
253 case TYPE_MOV_FROM_DR:
254 if (reg != REG_ESP)
255 *reg_p = ed->thread.debugreg[reg];
256 else {
257 __vmwrite(GUEST_ESP, ed->thread.debugreg[reg]);
258 }
259 break;
260 }
261 }
263 /*
264 * Invalidate the TLB for va. Invalidate the shadow page corresponding
265 * the address va.
266 */
267 static void vmx_vmexit_do_invlpg(unsigned long va)
268 {
269 unsigned long eip;
270 struct exec_domain *d = current;
271 unsigned int index;
273 __vmread(GUEST_EIP, &eip);
275 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg:eip=%08lx, va=%08lx\n",
276 eip, va);
278 /*
279 * We do the safest things first, then try to update the shadow
280 * copying from guest
281 */
282 vmx_shadow_invlpg(&d->mm, va);
283 index = (va >> L2_PAGETABLE_SHIFT);
284 d->mm.guest_pl2e_cache[index] = mk_l2_pgentry(0); /* invalidate pgd cache */
285 }
287 static inline void guest_pl2e_cache_invalidate(struct mm_struct *m)
288 {
289 /*
290 * Need to optimize this
291 */
292 memset(m->guest_pl2e_cache, 0, PAGE_SIZE);
293 }
295 inline unsigned long gva_to_gpa(unsigned long gva)
296 {
297 unsigned long gpde, gpte, pfn, index;
298 struct exec_domain *d = current;
299 struct mm_struct *m = &d->mm;
301 __guest_get_pl2e(m, gva, &gpde);
302 index = (gva >> L2_PAGETABLE_SHIFT);
304 pfn = phys_to_machine_mapping[gpde >> PAGE_SHIFT];
306 m->guest_pl2e_cache[index] =
307 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
309 if ( unlikely(__get_user(gpte, (unsigned long *)
310 &linear_pg_table[gva >> PAGE_SHIFT])) )
311 {
312 printk("gva_to_gpa EXIT: read gpte faulted" );
313 return 0;
314 }
316 if ( !(gpte & _PAGE_PRESENT) )
317 {
318 printk("gva_to_gpa - EXIT: gpte not present (%lx)",gpte );
319 return 0;
320 }
322 return (gpte & PAGE_MASK) + (gva & ~PAGE_MASK);
323 }
325 static void vmx_io_instruction(struct xen_regs *regs,
326 unsigned long exit_qualification, unsigned long inst_len)
327 {
328 struct exec_domain *d = current;
329 vcpu_iodata_t *vio;
330 ioreq_t *p;
331 unsigned long addr;
332 unsigned long eip;
334 __vmread(GUEST_EIP, &eip);
336 VMX_DBG_LOG(DBG_LEVEL_1,
337 "vmx_io_instruction: eip=%08lx, exit_qualification = %lx\n",
338 eip, exit_qualification);
340 if (test_bit(6, &exit_qualification))
341 addr = (exit_qualification >> 16) & (0xffff);
342 else
343 addr = regs->edx & 0xffff;
345 if (addr == 0x80) {
346 __update_guest_eip(inst_len);
347 return;
348 }
350 vio = (vcpu_iodata_t *) d->thread.arch_vmx.vmx_platform.shared_page_va;
351 if (vio == 0) {
352 VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx\n", (unsigned long) vio);
353 domain_crash();
354 }
355 p = &vio->vp_ioreq;
356 p->dir = test_bit(3, &exit_qualification);
357 set_bit(ARCH_VMX_IO_WAIT, &d->thread.arch_vmx.flags);
359 p->pdata_valid = 0;
360 p->count = 1;
361 p->size = (exit_qualification & 7) + 1;
363 if (test_bit(4, &exit_qualification)) {
364 unsigned long eflags;
366 __vmread(GUEST_EFLAGS, &eflags);
367 p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
368 p->pdata_valid = 1;
369 p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
370 regs->esi
371 : regs->edi);
372 p->u.pdata = (void *) gva_to_gpa(p->u.data);
373 if (test_bit(5, &exit_qualification))
374 p->count = regs->ecx;
375 if ((p->u.data & PAGE_MASK) !=
376 ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
377 printk("stringio crosses page boundary!\n");
378 if (p->u.data & (p->size - 1)) {
379 printk("Not aligned I/O!\n");
380 domain_crash();
381 }
382 p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
383 } else {
384 __update_guest_eip(inst_len);
385 }
386 } else if (p->dir == IOREQ_WRITE) {
387 p->u.data = regs->eax;
388 __update_guest_eip(inst_len);
389 } else
390 __update_guest_eip(inst_len);
392 p->addr = addr;
393 p->port_mm = 0;
394 p->state = STATE_IOREQ_READY;
395 evtchn_send(IOPACKET_PORT);
396 do_block();
397 }
399 #define CASE_GET_REG(REG, reg) \
400 case REG_ ## REG: value = regs->reg; break
402 /*
403 * Write to control registers
404 */
405 static void mov_to_cr(int gp, int cr, struct xen_regs *regs)
406 {
407 unsigned long value;
408 unsigned long old_cr;
409 struct exec_domain *d = current;
411 switch (gp) {
412 CASE_GET_REG(EAX, eax);
413 CASE_GET_REG(ECX, ecx);
414 CASE_GET_REG(EDX, edx);
415 CASE_GET_REG(EBX, ebx);
416 CASE_GET_REG(EBP, ebp);
417 CASE_GET_REG(ESI, esi);
418 CASE_GET_REG(EDI, edi);
419 case REG_ESP:
420 __vmread(GUEST_ESP, &value);
421 break;
422 default:
423 printk("invalid gp: %d\n", gp);
424 __vmx_bug(regs);
425 }
427 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, \n", cr, value);
428 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx, \n", (unsigned long) current);
430 switch(cr) {
431 case 0:
432 {
433 unsigned long old_base_pfn = 0, pfn;
435 /*
436 * CR0:
437 * We don't want to lose PE and PG.
438 */
439 __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
440 __vmwrite(CR0_READ_SHADOW, value);
442 if (value & (X86_CR0_PE | X86_CR0_PG) &&
443 !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state)) {
444 /*
445 * Enable paging
446 */
447 set_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state);
448 /*
449 * The guest CR3 must be pointing to the guest physical.
450 */
451 if (!(pfn = phys_to_machine_mapping[
452 d->thread.arch_vmx.cpu_cr3 >> PAGE_SHIFT]))
453 {
454 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx\n",
455 d->thread.arch_vmx.cpu_cr3);
456 domain_crash(); /* need to take a clean path */
457 }
458 old_base_pfn = pagetable_val(d->mm.pagetable) >> PAGE_SHIFT;
459 /*
460 * Now mm.pagetable points to machine physical.
461 */
462 d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
464 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New mm.pagetable = %lx\n",
465 (unsigned long) (pfn << PAGE_SHIFT));
467 shadow_lock(&d->mm);
468 shadow_mode_enable(d->domain, SHM_full_32);
469 shadow_unlock(&d->mm);
471 __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
472 /*
473 * mm->shadow_table should hold the next CR3 for shadow
474 */
475 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, pfn = %lx\n",
476 d->thread.arch_vmx.cpu_cr3, pfn);
477 put_page_and_type(&frame_table[old_base_pfn]);
479 }
480 break;
481 }
482 case 3:
483 {
484 unsigned long pfn;
486 /*
487 * If paging is not enabled yet, simply copy the valut to CR3.
488 */
489 if (!test_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state)) {
490 d->thread.arch_vmx.cpu_cr3 = value;
491 return;
492 }
494 guest_pl2e_cache_invalidate(&d->mm);
495 /*
496 * We make a new one if the shadow does not exist.
497 */
498 if (value == d->thread.arch_vmx.cpu_cr3) {
499 /*
500 * This is simple TLB flush, implying the guest has
501 * removed some translation or changed page attributes.
502 * We simply invalidate the shadow.
503 */
504 pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
505 if ((pfn << PAGE_SHIFT) != pagetable_val(d->mm.pagetable))
506 __vmx_bug(regs);
507 vmx_shadow_clear_state(&d->mm);
508 shadow_invalidate(&d->mm);
509 } else {
510 /*
511 * If different, make a shadow. Check if the PDBR is valid
512 * first.
513 */
514 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx\n", value);
515 if ((value >> PAGE_SHIFT) > d->domain->max_pages)
516 {
517 VMX_DBG_LOG(DBG_LEVEL_VMMU,
518 "Invalid CR3 value=%lx\n", value);
519 domain_crash(); /* need to take a clean path */
520 }
521 pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
522 vmx_shadow_clear_state(&d->mm);
523 d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
524 shadow_mk_pagetable(&d->mm);
525 /*
526 * mm->shadow_table should hold the next CR3 for shadow
527 */
528 d->thread.arch_vmx.cpu_cr3 = value;
529 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx\n",
530 value);
531 __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
532 }
533 break;
534 }
535 case 4:
536 /* CR4 */
537 if (value & X86_CR4_PAE)
538 __vmx_bug(regs); /* not implemented */
539 __vmread(CR4_READ_SHADOW, &old_cr);
541 __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
542 __vmwrite(CR4_READ_SHADOW, value);
544 /*
545 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
546 * all TLB entries except global entries.
547 */
548 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
549 vmx_shadow_clear_state(&d->mm);
550 shadow_invalidate(&d->mm);
551 guest_pl2e_cache_invalidate(&d->mm);
552 }
553 break;
554 default:
555 printk("invalid cr: %d\n", gp);
556 __vmx_bug(regs);
557 }
558 }
560 #define CASE_SET_REG(REG, reg) \
561 case REG_ ## REG: \
562 regs->reg = value; \
563 break
565 /*
566 * Read from control registers. CR0 and CR4 are read from the shadow.
567 */
568 static void mov_from_cr(int cr, int gp, struct xen_regs *regs)
569 {
570 unsigned long value;
571 struct exec_domain *d = current;
573 if (cr != 3)
574 __vmx_bug(regs);
576 value = (unsigned long) d->thread.arch_vmx.cpu_cr3;
577 ASSERT(value);
579 switch (gp) {
580 CASE_SET_REG(EAX, eax);
581 CASE_SET_REG(ECX, ecx);
582 CASE_SET_REG(EDX, edx);
583 CASE_SET_REG(EBX, ebx);
584 CASE_SET_REG(EBP, ebp);
585 CASE_SET_REG(ESI, esi);
586 CASE_SET_REG(EDI, edi);
587 case REG_ESP:
588 __vmwrite(GUEST_ESP, value);
589 regs->esp = value;
590 break;
591 default:
592 printk("invalid gp: %d\n", gp);
593 __vmx_bug(regs);
594 }
596 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx, \n", cr, value);
597 }
599 static void vmx_cr_access (unsigned long exit_qualification, struct xen_regs *regs)
600 {
601 unsigned int gp, cr;
602 unsigned long value;
604 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
605 case TYPE_MOV_TO_CR:
606 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
607 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
608 mov_to_cr(gp, cr, regs);
609 break;
610 case TYPE_MOV_FROM_CR:
611 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
612 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
613 mov_from_cr(cr, gp, regs);
614 break;
615 case TYPE_CLTS:
616 __vmread(GUEST_CR0, &value);
617 value &= ~X86_CR0_TS; /* clear TS */
618 __vmwrite(GUEST_CR0, value);
620 __vmread(CR0_READ_SHADOW, &value);
621 value &= ~X86_CR0_TS; /* clear TS */
622 __vmwrite(CR0_READ_SHADOW, value);
623 break;
624 default:
625 __vmx_bug(regs);
626 break;
627 }
628 }
630 static inline void vmx_do_msr_read(struct xen_regs *regs)
631 {
632 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%x, eax=%x, edx=%x",
633 regs->ecx, regs->eax, regs->edx);
635 rdmsr(regs->ecx, regs->eax, regs->edx);
637 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: ecx=%x, eax=%x, edx=%x",
638 regs->ecx, regs->eax, regs->edx);
639 }
641 /*
642 * Need to use this exit to rescheule
643 */
644 static inline void vmx_vmexit_do_hlt()
645 {
646 #if VMX_DEBUG
647 unsigned long eip;
648 __vmread(GUEST_EIP, &eip);
649 #endif
650 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%08lx\n", eip);
651 __enter_scheduler();
652 }
654 static inline void vmx_vmexit_do_mwait()
655 {
656 #if VMX_DEBUG
657 unsigned long eip;
658 __vmread(GUEST_EIP, &eip);
659 #endif
660 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%08lx\n", eip);
661 __enter_scheduler();
662 }
664 #define BUF_SIZ 256
665 #define MAX_LINE 80
666 char print_buf[BUF_SIZ];
667 static int index;
669 static void vmx_print_line(const char c, struct exec_domain *d)
670 {
672 if (index == MAX_LINE || c == '\n') {
673 if (index == MAX_LINE) {
674 print_buf[index++] = c;
675 }
676 print_buf[index] = '\0';
677 printk("(GUEST: %u) %s\n", d->domain->id, (char *) &print_buf);
678 index = 0;
679 }
680 else
681 print_buf[index++] = c;
682 }
684 #ifdef XEN_DEBUGGER
685 void save_xen_regs(struct xen_regs *regs)
686 {
687 __vmread(GUEST_SS_SELECTOR, &regs->xss);
688 __vmread(GUEST_ESP, &regs->esp);
689 __vmread(GUEST_EFLAGS, &regs->eflags);
690 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
691 __vmread(GUEST_EIP, &regs->eip);
693 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
694 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
695 __vmread(GUEST_ES_SELECTOR, &regs->xes);
696 __vmread(GUEST_DS_SELECTOR, &regs->xds);
697 }
699 void restore_xen_regs(struct xen_regs *regs)
700 {
701 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
702 __vmwrite(GUEST_ESP, regs->esp);
703 __vmwrite(GUEST_EFLAGS, regs->eflags);
704 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
705 __vmwrite(GUEST_EIP, regs->eip);
707 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
708 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
709 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
710 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
711 }
712 #endif
714 asmlinkage void vmx_vmexit_handler(struct xen_regs regs)
715 {
716 unsigned int exit_reason, idtv_info_field;
717 unsigned long exit_qualification, eip, inst_len = 0;
718 struct exec_domain *d = current;
719 int error;
721 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
722 __vmx_bug(&regs);
724 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
725 if (idtv_info_field & INTR_INFO_VALID_MASK) {
726 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
727 if ((idtv_info_field & 0xff) == 14) {
728 unsigned long error_code;
730 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
731 printk("#PG error code: %lx\n", error_code);
732 }
733 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x\n",
734 idtv_info_field);
735 }
737 /* don't bother H/W interrutps */
738 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
739 exit_reason != EXIT_REASON_VMCALL &&
740 exit_reason != EXIT_REASON_IO_INSTRUCTION)
741 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x\n", exit_reason);
743 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
744 __vmread(EXIT_QUALIFICATION, &exit_qualification);
745 __vmread(GUEST_EIP, &eip);
746 domain_crash();
747 return;
748 }
750 switch (exit_reason) {
751 case EXIT_REASON_EXCEPTION_NMI:
752 {
753 /*
754 * We don't set the software-interrupt exiting (INT n).
755 * (1) We can get an exception (e.g. #PG) in the guest, or
756 * (2) NMI
757 */
758 int error;
759 unsigned int vector;
760 unsigned long va;
761 unsigned long error_code;
763 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
764 && !(vector & INTR_INFO_VALID_MASK))
765 __vmx_bug(&regs);
766 vector &= 0xff;
768 switch (vector) {
769 #ifdef XEN_DEBUGGER
770 case VECTOR_DB:
771 {
772 save_xen_regs(&regs);
773 pdb_handle_exception(1, &regs, 1);
774 restore_xen_regs(&regs);
775 break;
776 }
777 case VECTOR_BP:
778 {
779 save_xen_regs(&regs);
780 pdb_handle_exception(3, &regs, 1);
781 restore_xen_regs(&regs);
782 break;
783 }
784 #endif
785 case VECTOR_GP:
786 {
787 vmx_do_general_protection_fault(&regs);
788 break;
789 }
790 case VECTOR_PG:
791 {
792 __vmread(EXIT_QUALIFICATION, &va);
793 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
794 VMX_DBG_LOG(DBG_LEVEL_VMMU,
795 "eax=%x, ebx=%x, ecx=%x, edx=%x, esi=%x, edi=%x\n", regs.eax, regs.ebx, regs.ecx, regs.edx, regs.esi, regs.edi);
796 d->thread.arch_vmx.vmx_platform.mpci.inst_decoder_regs = &regs;
798 if (!(error = vmx_do_page_fault(va, error_code))) {
799 /*
800 * Inject #PG using Interruption-Information Fields
801 */
802 unsigned long intr_fields;
804 intr_fields = (INTR_INFO_VALID_MASK |
805 INTR_TYPE_EXCEPTION |
806 INTR_INFO_DELIEVER_CODE_MASK |
807 VECTOR_PG);
808 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
809 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
810 d->thread.arch_vmx.cpu_cr2 = va;
811 }
812 break;
813 }
814 default:
815 __vmx_bug(&regs);
816 break;
817 }
818 break;
819 }
820 case EXIT_REASON_EXTERNAL_INTERRUPT:
821 {
822 extern int vector_irq[];
823 extern asmlinkage void do_IRQ(struct xen_regs);
824 extern void smp_apic_timer_interrupt(struct xen_regs *);
825 extern void timer_interrupt(int, void *, struct xen_regs *);
826 unsigned int vector;
828 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
829 && !(vector & INTR_INFO_VALID_MASK))
830 __vmx_bug(&regs);
832 vector &= 0xff;
833 local_irq_disable();
835 if (vector == LOCAL_TIMER_VECTOR) {
836 smp_apic_timer_interrupt(&regs);
837 } else {
838 regs.entry_vector = (vector == FIRST_DEVICE_VECTOR?
839 0 : vector_irq[vector]);
840 do_IRQ(regs);
841 }
842 break;
843 }
844 case EXIT_REASON_PENDING_INTERRUPT:
845 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
846 MONITOR_CPU_BASED_EXEC_CONTROLS);
847 vmx_intr_assist(d);
848 break;
849 case EXIT_REASON_TASK_SWITCH:
850 __vmx_bug(&regs);
851 break;
852 case EXIT_REASON_CPUID:
853 __get_instruction_length(inst_len);
854 vmx_vmexit_do_cpuid(regs.eax, &regs);
855 __update_guest_eip(inst_len);
856 break;
857 case EXIT_REASON_HLT:
858 __get_instruction_length(inst_len);
859 __update_guest_eip(inst_len);
860 vmx_vmexit_do_hlt();
861 break;
862 case EXIT_REASON_INVLPG:
863 {
864 unsigned long va;
866 __vmread(EXIT_QUALIFICATION, &va);
867 vmx_vmexit_do_invlpg(va);
868 __get_instruction_length(inst_len);
869 __update_guest_eip(inst_len);
870 break;
871 }
872 case EXIT_REASON_VMCALL:
873 __get_instruction_length(inst_len);
874 __vmread(GUEST_EIP, &eip);
875 __vmread(EXIT_QUALIFICATION, &exit_qualification);
877 vmx_print_line(regs.eax, d); /* provides the current domain */
878 __update_guest_eip(inst_len);
879 break;
880 case EXIT_REASON_CR_ACCESS:
881 {
882 __vmread(GUEST_EIP, &eip);
883 __get_instruction_length(inst_len);
884 __vmread(EXIT_QUALIFICATION, &exit_qualification);
886 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx\n",
887 eip, inst_len, exit_qualification);
888 vmx_cr_access(exit_qualification, &regs);
889 __update_guest_eip(inst_len);
890 break;
891 }
892 case EXIT_REASON_DR_ACCESS:
893 __vmread(EXIT_QUALIFICATION, &exit_qualification);
894 vmx_dr_access(exit_qualification, &regs);
895 __get_instruction_length(inst_len);
896 __update_guest_eip(inst_len);
897 break;
898 case EXIT_REASON_IO_INSTRUCTION:
899 __vmread(EXIT_QUALIFICATION, &exit_qualification);
900 __get_instruction_length(inst_len);
901 vmx_io_instruction(&regs, exit_qualification, inst_len);
902 break;
903 case EXIT_REASON_MSR_READ:
904 __get_instruction_length(inst_len);
905 vmx_do_msr_read(&regs);
906 __update_guest_eip(inst_len);
907 break;
908 case EXIT_REASON_MSR_WRITE:
909 __vmread(GUEST_EIP, &eip);
910 VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%08lx, eax=%08x, edx=%08x",
911 eip, regs.eax, regs.edx);
912 /* just ignore this point */
913 __get_instruction_length(inst_len);
914 __update_guest_eip(inst_len);
915 break;
916 case EXIT_REASON_MWAIT_INSTRUCTION:
917 __get_instruction_length(inst_len);
918 __update_guest_eip(inst_len);
919 vmx_vmexit_do_mwait();
920 break;
921 default:
922 __vmx_bug(&regs); /* should not happen */
923 }
924 return;
925 }
927 asmlinkage void load_cr2(void)
928 {
929 struct exec_domain *d = current;
931 local_irq_disable();
932 asm volatile("movl %0,%%cr2": :"r" (d->thread.arch_vmx.cpu_cr2));
933 }