debuggers.hg

view xen/arch/x86/vmx.c @ 3606:002034af24e6

bitkeeper revision 1.1159.231.11 (41f97e1amKuDHSrW5ZWKsTy4ZIfNRA)

Fix the direction flag for string I/O instructions.

Signed-off-by: Jun Nakajima <jun.nakajima@intel.com>
Signed-off-by: Chengyuan Li <chengyuan.li@intel.com>
Signed-off-by: ian.pratt@cl.cam.ac.uk
author iap10@labyrinth.cl.cam.ac.uk
date Thu Jan 27 23:49:46 2005 +0000 (2005-01-27)
parents a396b30da79d
children cd26f113b1b1
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/sched.h>
24 #include <asm/current.h>
25 #include <asm/io.h>
26 #include <asm/irq.h>
27 #include <asm/shadow.h>
28 #include <asm/regs.h>
29 #include <asm/cpufeature.h>
30 #include <asm/processor.h>
31 #include <asm/types.h>
32 #include <asm/msr.h>
33 #include <asm/spinlock.h>
34 #include <asm/vmx.h>
35 #include <asm/vmx_vmcs.h>
36 #include <public/io/ioreq.h>
38 int vmcs_size;
39 unsigned int opt_vmx_debug_level;
41 extern long evtchn_send(int lport);
42 extern long do_block(void);
44 #define VECTOR_DB 1
45 #define VECTOR_BP 3
46 #define VECTOR_GP 13
47 #define VECTOR_PG 14
49 int start_vmx()
50 {
51 struct vmcs_struct *vmcs;
52 unsigned long ecx;
53 u64 phys_vmcs; /* debugging */
55 vmcs_size = VMCS_SIZE;
56 /*
57 * Xen does not fill x86_capability words except 0.
58 */
59 ecx = cpuid_ecx(1);
60 boot_cpu_data.x86_capability[4] = ecx;
62 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
63 return 0;
65 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
67 if (!(vmcs = alloc_vmcs())) {
68 printk("Failed to allocate VMCS\n");
69 return 0;
70 }
72 phys_vmcs = (u64) virt_to_phys(vmcs);
74 if (!(__vmxon(phys_vmcs))) {
75 printk("VMXON is done\n");
76 }
78 return 1;
79 }
81 void stop_vmx()
82 {
83 if (read_cr4() & X86_CR4_VMXE)
84 __vmxoff();
85 }
87 /*
88 * Not all cases recevie valid value in the VM-exit instruction length field.
89 */
90 #define __get_instruction_length(len) \
91 __vmread(INSTRUCTION_LEN, &(len)); \
92 if ((len) < 1 || (len) > 15) \
93 __vmx_bug(&regs);
95 static void inline __update_guest_eip(unsigned long inst_len)
96 {
97 unsigned long current_eip;
99 __vmread(GUEST_EIP, &current_eip);
100 __vmwrite(GUEST_EIP, current_eip + inst_len);
101 }
104 #include <asm/domain_page.h>
106 static int vmx_do_page_fault(unsigned long va, unsigned long error_code)
107 {
108 unsigned long eip, pfn;
109 unsigned int index;
110 unsigned long gpde = 0;
111 int result;
112 struct exec_domain *ed = current;
113 struct mm_struct *m = &ed->mm;
115 #if VMX_DEBUG
116 {
117 __vmread(GUEST_EIP, &eip);
118 VMX_DBG_LOG(DBG_LEVEL_VMMU,
119 "vmx_do_page_fault = 0x%lx, eip = %lx, erro_code = %lx\n",
120 va, eip, error_code);
121 }
122 #endif
123 /*
124 * Set up guest page directory cache to make linear_pt_table[] work.
125 */
126 __guest_get_pl2e(m, va, &gpde);
127 if (!(gpde & _PAGE_PRESENT))
128 return 0;
130 index = (va >> L2_PAGETABLE_SHIFT);
131 if (!l2_pgentry_val(m->guest_pl2e_cache[index])) {
132 pfn = phys_to_machine_mapping[gpde >> PAGE_SHIFT];
134 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_do_page_fault: pagetable = %lx\n",
135 pagetable_val(m->pagetable));
137 m->guest_pl2e_cache[index] =
138 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
139 }
141 if ((result = shadow_fault(va, error_code)))
142 return result;
144 return 0; /* failed to resolve, i.e raise #PG */
145 }
147 static void vmx_do_general_protection_fault(struct xen_regs *regs)
148 {
149 unsigned long eip, error_code;
150 unsigned long intr_fields;
152 __vmread(GUEST_EIP, &eip);
153 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
155 VMX_DBG_LOG(DBG_LEVEL_1,
156 "vmx_general_protection_fault: eip = %lx, erro_code = %lx\n",
157 eip, error_code);
159 VMX_DBG_LOG(DBG_LEVEL_1,
160 "eax=%x, ebx=%x, ecx=%x, edx=%x, esi=%x, edi=%x\n",
161 regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
163 /* Reflect it back into the guest */
164 intr_fields = (INTR_INFO_VALID_MASK |
165 INTR_TYPE_EXCEPTION |
166 INTR_INFO_DELIEVER_CODE_MASK |
167 VECTOR_GP);
168 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
169 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
170 }
172 static void vmx_vmexit_do_cpuid(unsigned long input, struct xen_regs *regs)
173 {
174 int eax, ebx, ecx, edx;
175 unsigned long eip;
177 __vmread(GUEST_EIP, &eip);
179 VMX_DBG_LOG(DBG_LEVEL_1,
180 "do_cpuid: (eax) %x, (ebx) %x, (ecx) %x, (edx) %x, (esi) %x, (edi) %x\n", regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
182 cpuid(input, &eax, &ebx, &ecx, &edx);
184 if (input == 1) {
185 clear_bit(X86_FEATURE_PSE, &edx);
186 clear_bit(X86_FEATURE_PAE, &edx);
187 clear_bit(X86_FEATURE_PSE36, &edx);
188 }
190 regs->eax = (unsigned long) eax;
191 regs->ebx = (unsigned long) ebx;
192 regs->ecx = (unsigned long) ecx;
193 regs->edx = (unsigned long) edx;
195 VMX_DBG_LOG(DBG_LEVEL_1,
196 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x\n",
197 eip, input, eax, ebx, ecx, edx);
199 }
201 #define CASE_GET_REG_P(REG, reg) \
202 case REG_ ## REG: reg_p = &(regs->reg); break
204 static void vmx_dr_access (unsigned long exit_qualification, struct xen_regs *regs)
205 {
206 unsigned int reg;
207 u32 *reg_p = 0;
208 struct exec_domain *ed = current;
209 u32 eip;
211 __vmread(GUEST_EIP, &eip);
213 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
215 VMX_DBG_LOG(DBG_LEVEL_1,
216 "vmx_dr_access : eip=%08x, reg=%d, exit_qualification = %lx\n",
217 eip, reg, exit_qualification);
219 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
220 CASE_GET_REG_P(EAX, eax);
221 CASE_GET_REG_P(ECX, ecx);
222 CASE_GET_REG_P(EDX, edx);
223 CASE_GET_REG_P(EBX, ebx);
224 CASE_GET_REG_P(EBP, ebp);
225 CASE_GET_REG_P(ESI, esi);
226 CASE_GET_REG_P(EDI, edi);
227 case REG_ESP:
228 break;
229 default:
230 __vmx_bug(regs);
231 }
233 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
234 case TYPE_MOV_TO_DR:
235 /* don't need to check the range */
236 if (reg != REG_ESP)
237 ed->thread.debugreg[reg] = *reg_p;
238 else {
239 unsigned long value;
240 __vmread(GUEST_ESP, &value);
241 ed->thread.debugreg[reg] = value;
242 }
243 break;
244 case TYPE_MOV_FROM_DR:
245 if (reg != REG_ESP)
246 *reg_p = ed->thread.debugreg[reg];
247 else {
248 __vmwrite(GUEST_ESP, ed->thread.debugreg[reg]);
249 }
250 break;
251 }
252 }
254 /*
255 * Invalidate the TLB for va. Invalidate the shadow page corresponding
256 * the address va.
257 */
258 static void vmx_vmexit_do_invlpg(unsigned long va)
259 {
260 unsigned long eip;
261 struct exec_domain *d = current;
262 unsigned int index;
264 __vmread(GUEST_EIP, &eip);
266 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg:eip=%08lx, va=%08lx\n",
267 eip, va);
269 /*
270 * We do the safest things first, then try to update the shadow
271 * copying from guest
272 */
273 vmx_shadow_invlpg(&d->mm, va);
274 index = (va >> L2_PAGETABLE_SHIFT);
275 d->mm.guest_pl2e_cache[index] = mk_l2_pgentry(0); /* invalidate pgd cache */
276 }
278 static inline void guest_pl2e_cache_invalidate(struct mm_struct *m)
279 {
280 /*
281 * Need to optimize this
282 */
283 memset(m->guest_pl2e_cache, 0, PAGE_SIZE);
284 }
286 static inline unsigned long gva_to_gpa(unsigned long gva)
287 {
288 unsigned long gpde, gpte, pfn, index;
289 struct exec_domain *d = current;
290 struct mm_struct *m = &d->mm;
292 __guest_get_pl2e(m, gva, &gpde);
293 index = (gva >> L2_PAGETABLE_SHIFT);
295 pfn = phys_to_machine_mapping[gpde >> PAGE_SHIFT];
297 m->guest_pl2e_cache[index] =
298 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
300 if ( unlikely(__get_user(gpte, (unsigned long *)
301 &linear_pg_table[gva >> PAGE_SHIFT])) )
302 {
303 printk("gva_to_gpa EXIT: read gpte faulted" );
304 return 0;
305 }
307 if ( !(gpte & _PAGE_PRESENT) )
308 {
309 printk("gva_to_gpa - EXIT: gpte not present (%lx)",gpte );
310 return 0;
311 }
313 return (gpte & PAGE_MASK) + (gva & ~PAGE_MASK);
314 }
316 static void vmx_io_instruction(struct xen_regs *regs,
317 unsigned long exit_qualification, unsigned long inst_len)
318 {
319 struct exec_domain *d = current;
320 vcpu_iodata_t *vio;
321 ioreq_t *p;
322 unsigned long addr;
323 unsigned long eip;
325 __vmread(GUEST_EIP, &eip);
327 VMX_DBG_LOG(DBG_LEVEL_1,
328 "vmx_io_instruction: eip=%08lx, exit_qualification = %lx\n",
329 eip, exit_qualification);
331 if (test_bit(6, &exit_qualification))
332 addr = (exit_qualification >> 16) & (0xffff);
333 else
334 addr = regs->edx & 0xffff;
336 if (addr == 0x80) {
337 __update_guest_eip(inst_len);
338 return;
339 }
341 vio = (vcpu_iodata_t *) d->thread.arch_vmx.vmx_platform.shared_page_va;
342 if (vio == 0) {
343 VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx\n", (unsigned long) vio);
344 domain_crash();
345 }
346 p = &vio->vp_ioreq;
347 p->dir = test_bit(3, &exit_qualification);
348 set_bit(ARCH_VMX_IO_WAIT, &d->thread.arch_vmx.flags);
350 p->pdata_valid = 0;
351 p->count = 1;
352 p->size = (exit_qualification & 7) + 1;
354 if (test_bit(4, &exit_qualification)) {
355 unsigned long eflags;
357 __vmread(GUEST_EFLAGS, &eflags);
358 p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
359 p->pdata_valid = 1;
360 p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
361 regs->esi
362 : regs->edi);
363 p->u.pdata = (void *) gva_to_gpa(p->u.data);
364 if (test_bit(5, &exit_qualification))
365 p->count = regs->ecx;
366 if ((p->u.data & PAGE_MASK) !=
367 ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
368 printk("stringio crosses page boundary!\n");
369 if (p->u.data & (p->size - 1)) {
370 printk("Not aligned I/O!\n");
371 domain_crash();
372 }
373 p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
374 } else {
375 __update_guest_eip(inst_len);
376 }
377 } else if (p->dir == IOREQ_WRITE) {
378 p->u.data = regs->eax;
379 __update_guest_eip(inst_len);
380 } else
381 __update_guest_eip(inst_len);
383 p->addr = addr;
384 p->port_mm = 0;
385 p->state = STATE_IOREQ_READY;
386 evtchn_send(IOPACKET_PORT);
387 do_block();
388 }
390 #define CASE_GET_REG(REG, reg) \
391 case REG_ ## REG: value = regs->reg; break
393 /*
394 * Write to control registers
395 */
396 static void mov_to_cr(int gp, int cr, struct xen_regs *regs)
397 {
398 unsigned long value;
399 unsigned long old_cr;
400 struct exec_domain *d = current;
402 switch (gp) {
403 CASE_GET_REG(EAX, eax);
404 CASE_GET_REG(ECX, ecx);
405 CASE_GET_REG(EDX, edx);
406 CASE_GET_REG(EBX, ebx);
407 CASE_GET_REG(EBP, ebp);
408 CASE_GET_REG(ESI, esi);
409 CASE_GET_REG(EDI, edi);
410 case REG_ESP:
411 __vmread(GUEST_ESP, &value);
412 break;
413 default:
414 printk("invalid gp: %d\n", gp);
415 __vmx_bug(regs);
416 }
418 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, \n", cr, value);
419 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx, \n", (unsigned long) current);
421 switch(cr) {
422 case 0:
423 {
424 unsigned long old_base_pfn = 0, pfn;
426 /*
427 * CR0:
428 * We don't want to lose PE and PG.
429 */
430 __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
431 __vmwrite(CR0_READ_SHADOW, value);
433 if (value & (X86_CR0_PE | X86_CR0_PG) &&
434 !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state)) {
435 /*
436 * Enable paging
437 */
438 set_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state);
439 /*
440 * The guest CR3 must be pointing to the guest physical.
441 */
442 if (!(pfn = phys_to_machine_mapping[
443 d->thread.arch_vmx.cpu_cr3 >> PAGE_SHIFT]))
444 {
445 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx\n",
446 d->thread.arch_vmx.cpu_cr3);
447 domain_crash(); /* need to take a clean path */
448 }
449 old_base_pfn = pagetable_val(d->mm.pagetable) >> PAGE_SHIFT;
450 /*
451 * Now mm.pagetable points to machine physical.
452 */
453 d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
455 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New mm.pagetable = %lx\n",
456 (unsigned long) (pfn << PAGE_SHIFT));
458 shadow_lock(&d->mm);
459 shadow_mode_enable(d->domain, SHM_full_32);
460 shadow_unlock(&d->mm);
462 __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
463 /*
464 * mm->shadow_table should hold the next CR3 for shadow
465 */
466 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, pfn = %lx\n",
467 d->thread.arch_vmx.cpu_cr3, pfn);
468 put_page_and_type(&frame_table[old_base_pfn]);
470 }
471 break;
472 }
473 case 3:
474 {
475 unsigned long pfn;
477 /*
478 * If paging is not enabled yet, simply copy the valut to CR3.
479 */
480 if (!test_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state)) {
481 d->thread.arch_vmx.cpu_cr3 = value;
482 return;
483 }
485 guest_pl2e_cache_invalidate(&d->mm);
486 /*
487 * We make a new one if the shadow does not exist.
488 */
489 if (value == d->thread.arch_vmx.cpu_cr3) {
490 /*
491 * This is simple TLB flush, implying the guest has
492 * removed some translation or changed page attributes.
493 * We simply invalidate the shadow.
494 */
495 pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
496 if ((pfn << PAGE_SHIFT) != pagetable_val(d->mm.pagetable))
497 __vmx_bug(regs);
498 vmx_shadow_clear_state(&d->mm);
499 shadow_invalidate(&d->mm);
500 } else {
501 /*
502 * If different, make a shadow. Check if the PDBR is valid
503 * first.
504 */
505 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx\n", value);
506 if ((value >> PAGE_SHIFT) > d->domain->max_pages)
507 {
508 VMX_DBG_LOG(DBG_LEVEL_VMMU,
509 "Invalid CR3 value=%lx\n", value);
510 domain_crash(); /* need to take a clean path */
511 }
512 pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
513 vmx_shadow_clear_state(&d->mm);
514 d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
515 shadow_mk_pagetable(&d->mm);
516 /*
517 * mm->shadow_table should hold the next CR3 for shadow
518 */
519 d->thread.arch_vmx.cpu_cr3 = value;
520 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx\n",
521 value);
522 __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
523 }
524 break;
525 }
526 case 4:
527 /* CR4 */
528 if (value & X86_CR4_PAE)
529 __vmx_bug(regs); /* not implemented */
530 __vmread(CR4_READ_SHADOW, &old_cr);
532 __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
533 __vmwrite(CR4_READ_SHADOW, value);
535 /*
536 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
537 * all TLB entries except global entries.
538 */
539 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
540 vmx_shadow_clear_state(&d->mm);
541 shadow_invalidate(&d->mm);
542 guest_pl2e_cache_invalidate(&d->mm);
543 }
544 break;
545 default:
546 printk("invalid cr: %d\n", gp);
547 __vmx_bug(regs);
548 }
549 }
551 #define CASE_SET_REG(REG, reg) \
552 case REG_ ## REG: \
553 regs->reg = value; \
554 break
556 /*
557 * Read from control registers. CR0 and CR4 are read from the shadow.
558 */
559 static void mov_from_cr(int cr, int gp, struct xen_regs *regs)
560 {
561 unsigned long value;
562 struct exec_domain *d = current;
564 if (cr != 3)
565 __vmx_bug(regs);
567 value = (unsigned long) d->thread.arch_vmx.cpu_cr3;
568 ASSERT(value);
570 switch (gp) {
571 CASE_SET_REG(EAX, eax);
572 CASE_SET_REG(ECX, ecx);
573 CASE_SET_REG(EDX, edx);
574 CASE_SET_REG(EBX, ebx);
575 CASE_SET_REG(EBP, ebp);
576 CASE_SET_REG(ESI, esi);
577 CASE_SET_REG(EDI, edi);
578 case REG_ESP:
579 __vmwrite(GUEST_ESP, value);
580 regs->esp = value;
581 break;
582 default:
583 printk("invalid gp: %d\n", gp);
584 __vmx_bug(regs);
585 }
587 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx, \n", cr, value);
588 }
590 static void vmx_cr_access (unsigned long exit_qualification, struct xen_regs *regs)
591 {
592 unsigned int gp, cr;
593 unsigned long value;
595 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
596 case TYPE_MOV_TO_CR:
597 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
598 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
599 mov_to_cr(gp, cr, regs);
600 break;
601 case TYPE_MOV_FROM_CR:
602 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
603 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
604 mov_from_cr(cr, gp, regs);
605 break;
606 case TYPE_CLTS:
607 __vmread(GUEST_CR0, &value);
608 value &= ~X86_CR0_TS; /* clear TS */
609 __vmwrite(GUEST_CR0, value);
611 __vmread(CR0_READ_SHADOW, &value);
612 value &= ~X86_CR0_TS; /* clear TS */
613 __vmwrite(CR0_READ_SHADOW, value);
614 break;
615 default:
616 __vmx_bug(regs);
617 break;
618 }
619 }
621 static inline void vmx_do_msr_read(struct xen_regs *regs)
622 {
623 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%x, eax=%x, edx=%x",
624 regs->ecx, regs->eax, regs->edx);
626 rdmsr(regs->ecx, regs->eax, regs->edx);
628 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: ecx=%x, eax=%x, edx=%x",
629 regs->ecx, regs->eax, regs->edx);
630 }
632 /*
633 * Need to use this exit to rescheule
634 */
635 static inline void vmx_vmexit_do_hlt()
636 {
637 #if VMX_DEBUG
638 unsigned long eip;
639 __vmread(GUEST_EIP, &eip);
640 #endif
641 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%08lx\n", eip);
642 __enter_scheduler();
643 }
645 static inline void vmx_vmexit_do_mwait()
646 {
647 #if VMX_DEBUG
648 unsigned long eip;
649 __vmread(GUEST_EIP, &eip);
650 #endif
651 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%08lx\n", eip);
652 __enter_scheduler();
653 }
655 #define BUF_SIZ 256
656 #define MAX_LINE 80
657 char print_buf[BUF_SIZ];
658 static int index;
660 static void vmx_print_line(const char c, struct exec_domain *d)
661 {
663 if (index == MAX_LINE || c == '\n') {
664 if (index == MAX_LINE) {
665 print_buf[index++] = c;
666 }
667 print_buf[index] = '\0';
668 printk("(GUEST: %u) %s\n", d->domain->id, (char *) &print_buf);
669 index = 0;
670 }
671 else
672 print_buf[index++] = c;
673 }
675 #ifdef XEN_DEBUGGER
676 void save_xen_regs(struct xen_regs *regs)
677 {
678 __vmread(GUEST_SS_SELECTOR, &regs->xss);
679 __vmread(GUEST_ESP, &regs->esp);
680 __vmread(GUEST_EFLAGS, &regs->eflags);
681 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
682 __vmread(GUEST_EIP, &regs->eip);
684 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
685 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
686 __vmread(GUEST_ES_SELECTOR, &regs->xes);
687 __vmread(GUEST_DS_SELECTOR, &regs->xds);
688 }
690 void restore_xen_regs(struct xen_regs *regs)
691 {
692 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
693 __vmwrite(GUEST_ESP, regs->esp);
694 __vmwrite(GUEST_EFLAGS, regs->eflags);
695 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
696 __vmwrite(GUEST_EIP, regs->eip);
698 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
699 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
700 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
701 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
702 }
703 #endif
705 asmlinkage void vmx_vmexit_handler(struct xen_regs regs)
706 {
707 unsigned int exit_reason, idtv_info_field;
708 unsigned long exit_qualification, eip, inst_len = 0;
709 struct exec_domain *d = current;
710 int error;
712 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
713 __vmx_bug(&regs);
715 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
716 if (idtv_info_field & INTR_INFO_VALID_MASK) {
717 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
718 if ((idtv_info_field & 0xff) == 14) {
719 unsigned long error_code;
721 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
722 printk("#PG error code: %lx\n", error_code);
723 }
724 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x\n",
725 idtv_info_field);
726 }
728 /* don't bother H/W interrutps */
729 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
730 exit_reason != EXIT_REASON_VMCALL &&
731 exit_reason != EXIT_REASON_IO_INSTRUCTION)
732 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x\n", exit_reason);
734 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
735 __vmread(EXIT_QUALIFICATION, &exit_qualification);
736 __vmread(GUEST_EIP, &eip);
737 domain_crash();
738 return;
739 }
741 switch (exit_reason) {
742 case EXIT_REASON_EXCEPTION_NMI:
743 {
744 /*
745 * We don't set the software-interrupt exiting (INT n).
746 * (1) We can get an exception (e.g. #PG) in the guest, or
747 * (2) NMI
748 */
749 int error;
750 unsigned int vector;
751 unsigned long va;
752 unsigned long error_code;
754 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
755 && !(vector & INTR_INFO_VALID_MASK))
756 __vmx_bug(&regs);
757 vector &= 0xff;
759 switch (vector) {
760 #ifdef XEN_DEBUGGER
761 case VECTOR_DB:
762 {
763 save_xen_regs(&regs);
764 pdb_handle_exception(1, &regs, 1);
765 restore_xen_regs(&regs);
766 break;
767 }
768 case VECTOR_BP:
769 {
770 save_xen_regs(&regs);
771 pdb_handle_exception(3, &regs, 1);
772 restore_xen_regs(&regs);
773 break;
774 }
775 #endif
776 case VECTOR_GP:
777 {
778 vmx_do_general_protection_fault(&regs);
779 break;
780 }
781 case VECTOR_PG:
782 {
783 __vmread(EXIT_QUALIFICATION, &va);
784 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
785 VMX_DBG_LOG(DBG_LEVEL_VMMU,
786 "eax=%x, ebx=%x, ecx=%x, edx=%x, esi=%x, edi=%x\n", regs.eax, regs.ebx, regs.ecx, regs.edx, regs.esi, regs.edi);
788 if (!(error = vmx_do_page_fault(va, error_code))) {
789 /*
790 * Inject #PG using Interruption-Information Fields
791 */
792 unsigned long intr_fields;
794 intr_fields = (INTR_INFO_VALID_MASK |
795 INTR_TYPE_EXCEPTION |
796 INTR_INFO_DELIEVER_CODE_MASK |
797 VECTOR_PG);
798 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
799 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
800 d->thread.arch_vmx.cpu_cr2 = va;
801 }
802 break;
803 }
804 default:
805 __vmx_bug(&regs);
806 break;
807 }
808 break;
809 }
810 case EXIT_REASON_EXTERNAL_INTERRUPT:
811 {
812 extern int vector_irq[];
813 extern asmlinkage void do_IRQ(struct xen_regs);
814 extern void smp_apic_timer_interrupt(struct xen_regs *);
815 extern void timer_interrupt(int, void *, struct xen_regs *);
816 unsigned int vector;
818 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
819 && !(vector & INTR_INFO_VALID_MASK))
820 __vmx_bug(&regs);
822 vector &= 0xff;
823 local_irq_disable();
825 if (vector == LOCAL_TIMER_VECTOR) {
826 smp_apic_timer_interrupt(&regs);
827 } else {
828 regs.entry_vector = (vector == FIRST_DEVICE_VECTOR?
829 0 : vector_irq[vector]);
830 do_IRQ(regs);
831 }
832 break;
833 }
834 case EXIT_REASON_PENDING_INTERRUPT:
835 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
836 MONITOR_CPU_BASED_EXEC_CONTROLS);
837 vmx_intr_assist(d);
838 break;
839 case EXIT_REASON_TASK_SWITCH:
840 __vmx_bug(&regs);
841 break;
842 case EXIT_REASON_CPUID:
843 __get_instruction_length(inst_len);
844 vmx_vmexit_do_cpuid(regs.eax, &regs);
845 __update_guest_eip(inst_len);
846 break;
847 case EXIT_REASON_HLT:
848 __get_instruction_length(inst_len);
849 __update_guest_eip(inst_len);
850 vmx_vmexit_do_hlt();
851 break;
852 case EXIT_REASON_INVLPG:
853 {
854 unsigned long va;
856 __vmread(EXIT_QUALIFICATION, &va);
857 vmx_vmexit_do_invlpg(va);
858 __get_instruction_length(inst_len);
859 __update_guest_eip(inst_len);
860 break;
861 }
862 case EXIT_REASON_VMCALL:
863 __get_instruction_length(inst_len);
864 __vmread(GUEST_EIP, &eip);
865 __vmread(EXIT_QUALIFICATION, &exit_qualification);
867 vmx_print_line(regs.eax, d); /* provides the current domain */
868 __update_guest_eip(inst_len);
869 break;
870 case EXIT_REASON_CR_ACCESS:
871 {
872 __vmread(GUEST_EIP, &eip);
873 __get_instruction_length(inst_len);
874 __vmread(EXIT_QUALIFICATION, &exit_qualification);
876 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx\n",
877 eip, inst_len, exit_qualification);
878 vmx_cr_access(exit_qualification, &regs);
879 __update_guest_eip(inst_len);
880 break;
881 }
882 case EXIT_REASON_DR_ACCESS:
883 __vmread(EXIT_QUALIFICATION, &exit_qualification);
884 vmx_dr_access(exit_qualification, &regs);
885 __get_instruction_length(inst_len);
886 __update_guest_eip(inst_len);
887 break;
888 case EXIT_REASON_IO_INSTRUCTION:
889 __vmread(EXIT_QUALIFICATION, &exit_qualification);
890 __get_instruction_length(inst_len);
891 vmx_io_instruction(&regs, exit_qualification, inst_len);
892 break;
893 case EXIT_REASON_MSR_READ:
894 __get_instruction_length(inst_len);
895 vmx_do_msr_read(&regs);
896 __update_guest_eip(inst_len);
897 break;
898 case EXIT_REASON_MSR_WRITE:
899 __vmread(GUEST_EIP, &eip);
900 VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%08lx, eax=%08x, edx=%08x",
901 eip, regs.eax, regs.edx);
902 /* just ignore this point */
903 __get_instruction_length(inst_len);
904 __update_guest_eip(inst_len);
905 break;
906 case EXIT_REASON_MWAIT_INSTRUCTION:
907 __get_instruction_length(inst_len);
908 __update_guest_eip(inst_len);
909 vmx_vmexit_do_mwait();
910 break;
911 default:
912 __vmx_bug(&regs); /* should not happen */
913 }
914 return;
915 }
917 asmlinkage void load_cr2(void)
918 {
919 struct exec_domain *d = current;
921 local_irq_disable();
922 asm volatile("movl %0,%%cr2": :"r" (d->thread.arch_vmx.cpu_cr2));
923 }