debuggers.hg

view xen/arch/x86/vmx.c @ 3685:bbe8541361dd

bitkeeper revision 1.1159.1.542 (42038a42_52IAalMZRKdTn0UbVN5fw)

Merge tempest.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xeno.bk
into tempest.cl.cam.ac.uk:/local/scratch/smh22/xen-unstable.bk
author smh22@tempest.cl.cam.ac.uk
date Fri Feb 04 14:44:18 2005 +0000 (2005-02-04)
parents bc0fbb38cb25 ed902e5c4b49
children 88957a238191
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/sched.h>
24 #include <asm/current.h>
25 #include <asm/io.h>
26 #include <asm/irq.h>
27 #include <asm/shadow.h>
28 #include <asm/regs.h>
29 #include <asm/cpufeature.h>
30 #include <asm/processor.h>
31 #include <asm/types.h>
32 #include <asm/msr.h>
33 #include <asm/spinlock.h>
34 #include <asm/vmx.h>
35 #include <asm/vmx_vmcs.h>
36 #include <public/io/ioreq.h>
38 int vmcs_size;
39 unsigned int opt_vmx_debug_level;
41 extern long evtchn_send(int lport);
42 extern long do_block(void);
44 #define VECTOR_DB 1
45 #define VECTOR_BP 3
46 #define VECTOR_GP 13
47 #define VECTOR_PG 14
49 int start_vmx()
50 {
51 struct vmcs_struct *vmcs;
52 unsigned long ecx;
53 u64 phys_vmcs; /* debugging */
55 vmcs_size = VMCS_SIZE;
56 /*
57 * Xen does not fill x86_capability words except 0.
58 */
59 ecx = cpuid_ecx(1);
60 boot_cpu_data.x86_capability[4] = ecx;
62 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
63 return 0;
65 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
67 if (!(vmcs = alloc_vmcs())) {
68 printk("Failed to allocate VMCS\n");
69 return 0;
70 }
72 phys_vmcs = (u64) virt_to_phys(vmcs);
74 if (!(__vmxon(phys_vmcs))) {
75 printk("VMXON is done\n");
76 }
78 return 1;
79 }
81 void stop_vmx()
82 {
83 if (read_cr4() & X86_CR4_VMXE)
84 __vmxoff();
85 }
87 /*
88 * Not all cases recevie valid value in the VM-exit instruction length field.
89 */
90 #define __get_instruction_length(len) \
91 __vmread(INSTRUCTION_LEN, &(len)); \
92 if ((len) < 1 || (len) > 15) \
93 __vmx_bug(&regs);
95 static void inline __update_guest_eip(unsigned long inst_len)
96 {
97 unsigned long current_eip;
99 __vmread(GUEST_EIP, &current_eip);
100 __vmwrite(GUEST_EIP, current_eip + inst_len);
101 }
104 #include <asm/domain_page.h>
106 static int vmx_do_page_fault(unsigned long va, unsigned long error_code)
107 {
108 unsigned long eip, pfn;
109 unsigned int index;
110 unsigned long gpde = 0, gpte, gpa;
111 int result;
112 struct exec_domain *ed = current;
113 struct mm_struct *m = &ed->mm;
115 #if VMX_DEBUG
116 {
117 __vmread(GUEST_EIP, &eip);
118 VMX_DBG_LOG(DBG_LEVEL_VMMU,
119 "vmx_do_page_fault = 0x%lx, eip = %lx, erro_code = %lx\n",
120 va, eip, error_code);
121 }
122 #endif
123 /*
124 * Set up guest page directory cache to make linear_pt_table[] work.
125 */
126 __guest_get_pl2e(m, va, &gpde);
127 if (!(gpde & _PAGE_PRESENT))
128 return 0;
130 index = (va >> L2_PAGETABLE_SHIFT);
131 if (!l2_pgentry_val(m->guest_pl2e_cache[index])) {
132 pfn = phys_to_machine_mapping[gpde >> PAGE_SHIFT];
134 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_do_page_fault: pagetable = %lx\n",
135 pagetable_val(m->pagetable));
137 m->guest_pl2e_cache[index] =
138 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
139 }
141 if (unlikely(__get_user(gpte, (unsigned long *)
142 &linear_pg_table[va >> PAGE_SHIFT])))
143 return 0;
145 gpa = (gpte & PAGE_MASK) | (va & (PAGE_SIZE - 1));
147 if (mmio_space(gpa))
148 handle_mmio(va, gpte, gpa);
150 if ((result = shadow_fault(va, error_code)))
151 return result;
153 return 0; /* failed to resolve, i.e raise #PG */
154 }
156 static void vmx_do_general_protection_fault(struct xen_regs *regs)
157 {
158 unsigned long eip, error_code;
159 unsigned long intr_fields;
161 __vmread(GUEST_EIP, &eip);
162 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
164 VMX_DBG_LOG(DBG_LEVEL_1,
165 "vmx_general_protection_fault: eip = %lx, erro_code = %lx\n",
166 eip, error_code);
168 VMX_DBG_LOG(DBG_LEVEL_1,
169 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx\n",
170 regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
172 /* Reflect it back into the guest */
173 intr_fields = (INTR_INFO_VALID_MASK |
174 INTR_TYPE_EXCEPTION |
175 INTR_INFO_DELIEVER_CODE_MASK |
176 VECTOR_GP);
177 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
178 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
179 }
181 static void vmx_vmexit_do_cpuid(unsigned long input, struct xen_regs *regs)
182 {
183 int eax, ebx, ecx, edx;
184 unsigned long eip;
186 __vmread(GUEST_EIP, &eip);
188 VMX_DBG_LOG(DBG_LEVEL_1,
189 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
190 " (esi) %lx, (edi) %lx\n",
191 regs->eax, regs->ebx, regs->ecx, regs->edx,
192 regs->esi, regs->edi);
194 cpuid(input, &eax, &ebx, &ecx, &edx);
196 if (input == 1) {
197 clear_bit(X86_FEATURE_PSE, &edx);
198 clear_bit(X86_FEATURE_PAE, &edx);
199 clear_bit(X86_FEATURE_PSE36, &edx);
200 }
202 regs->eax = (unsigned long) eax;
203 regs->ebx = (unsigned long) ebx;
204 regs->ecx = (unsigned long) ecx;
205 regs->edx = (unsigned long) edx;
207 VMX_DBG_LOG(DBG_LEVEL_1,
208 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x\n",
209 eip, input, eax, ebx, ecx, edx);
211 }
213 #define CASE_GET_REG_P(REG, reg) \
214 case REG_ ## REG: reg_p = &(regs->reg); break
216 static void vmx_dr_access (unsigned long exit_qualification, struct xen_regs *regs)
217 {
218 unsigned int reg;
219 unsigned long *reg_p = 0;
220 struct exec_domain *ed = current;
221 unsigned long eip;
223 __vmread(GUEST_EIP, &eip);
225 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
227 VMX_DBG_LOG(DBG_LEVEL_1,
228 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx\n",
229 eip, reg, exit_qualification);
231 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
232 CASE_GET_REG_P(EAX, eax);
233 CASE_GET_REG_P(ECX, ecx);
234 CASE_GET_REG_P(EDX, edx);
235 CASE_GET_REG_P(EBX, ebx);
236 CASE_GET_REG_P(EBP, ebp);
237 CASE_GET_REG_P(ESI, esi);
238 CASE_GET_REG_P(EDI, edi);
239 case REG_ESP:
240 break;
241 default:
242 __vmx_bug(regs);
243 }
245 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
246 case TYPE_MOV_TO_DR:
247 /* don't need to check the range */
248 if (reg != REG_ESP)
249 ed->thread.debugreg[reg] = *reg_p;
250 else {
251 unsigned long value;
252 __vmread(GUEST_ESP, &value);
253 ed->thread.debugreg[reg] = value;
254 }
255 break;
256 case TYPE_MOV_FROM_DR:
257 if (reg != REG_ESP)
258 *reg_p = ed->thread.debugreg[reg];
259 else {
260 __vmwrite(GUEST_ESP, ed->thread.debugreg[reg]);
261 }
262 break;
263 }
264 }
266 /*
267 * Invalidate the TLB for va. Invalidate the shadow page corresponding
268 * the address va.
269 */
270 static void vmx_vmexit_do_invlpg(unsigned long va)
271 {
272 unsigned long eip;
273 struct exec_domain *d = current;
274 unsigned int index;
276 __vmread(GUEST_EIP, &eip);
278 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg:eip=%08lx, va=%08lx\n",
279 eip, va);
281 /*
282 * We do the safest things first, then try to update the shadow
283 * copying from guest
284 */
285 vmx_shadow_invlpg(&d->mm, va);
286 index = (va >> L2_PAGETABLE_SHIFT);
287 d->mm.guest_pl2e_cache[index] = mk_l2_pgentry(0); /* invalidate pgd cache */
288 }
290 static inline void guest_pl2e_cache_invalidate(struct mm_struct *m)
291 {
292 /*
293 * Need to optimize this
294 */
295 memset(m->guest_pl2e_cache, 0, PAGE_SIZE);
296 }
298 inline unsigned long gva_to_gpa(unsigned long gva)
299 {
300 unsigned long gpde, gpte, pfn, index;
301 struct exec_domain *d = current;
302 struct mm_struct *m = &d->mm;
304 __guest_get_pl2e(m, gva, &gpde);
305 index = (gva >> L2_PAGETABLE_SHIFT);
307 pfn = phys_to_machine_mapping[gpde >> PAGE_SHIFT];
309 m->guest_pl2e_cache[index] =
310 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
312 if ( unlikely(__get_user(gpte, (unsigned long *)
313 &linear_pg_table[gva >> PAGE_SHIFT])) )
314 {
315 printk("gva_to_gpa EXIT: read gpte faulted" );
316 return 0;
317 }
319 if ( !(gpte & _PAGE_PRESENT) )
320 {
321 printk("gva_to_gpa - EXIT: gpte not present (%lx)",gpte );
322 return 0;
323 }
325 return (gpte & PAGE_MASK) + (gva & ~PAGE_MASK);
326 }
328 static void vmx_io_instruction(struct xen_regs *regs,
329 unsigned long exit_qualification, unsigned long inst_len)
330 {
331 struct exec_domain *d = current;
332 vcpu_iodata_t *vio;
333 ioreq_t *p;
334 unsigned long addr;
335 unsigned long eip;
337 __vmread(GUEST_EIP, &eip);
339 VMX_DBG_LOG(DBG_LEVEL_1,
340 "vmx_io_instruction: eip=%08lx, exit_qualification = %lx\n",
341 eip, exit_qualification);
343 if (test_bit(6, &exit_qualification))
344 addr = (exit_qualification >> 16) & (0xffff);
345 else
346 addr = regs->edx & 0xffff;
348 if (addr == 0x80) {
349 __update_guest_eip(inst_len);
350 return;
351 }
353 vio = (vcpu_iodata_t *) d->thread.arch_vmx.vmx_platform.shared_page_va;
354 if (vio == 0) {
355 VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx\n", (unsigned long) vio);
356 domain_crash();
357 }
358 p = &vio->vp_ioreq;
359 p->dir = test_bit(3, &exit_qualification);
360 set_bit(ARCH_VMX_IO_WAIT, &d->thread.arch_vmx.flags);
362 p->pdata_valid = 0;
363 p->count = 1;
364 p->size = (exit_qualification & 7) + 1;
366 if (test_bit(4, &exit_qualification)) {
367 unsigned long eflags;
369 __vmread(GUEST_EFLAGS, &eflags);
370 p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
371 p->pdata_valid = 1;
372 p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
373 regs->esi
374 : regs->edi);
375 p->u.pdata = (void *) gva_to_gpa(p->u.data);
376 if (test_bit(5, &exit_qualification))
377 p->count = regs->ecx;
378 if ((p->u.data & PAGE_MASK) !=
379 ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
380 printk("stringio crosses page boundary!\n");
381 if (p->u.data & (p->size - 1)) {
382 printk("Not aligned I/O!\n");
383 domain_crash();
384 }
385 p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
386 } else {
387 __update_guest_eip(inst_len);
388 }
389 } else if (p->dir == IOREQ_WRITE) {
390 p->u.data = regs->eax;
391 __update_guest_eip(inst_len);
392 } else
393 __update_guest_eip(inst_len);
395 p->addr = addr;
396 p->port_mm = 0;
397 p->state = STATE_IOREQ_READY;
398 evtchn_send(IOPACKET_PORT);
399 do_block();
400 }
402 #define CASE_GET_REG(REG, reg) \
403 case REG_ ## REG: value = regs->reg; break
405 /*
406 * Write to control registers
407 */
408 static void mov_to_cr(int gp, int cr, struct xen_regs *regs)
409 {
410 unsigned long value;
411 unsigned long old_cr;
412 struct exec_domain *d = current;
414 switch (gp) {
415 CASE_GET_REG(EAX, eax);
416 CASE_GET_REG(ECX, ecx);
417 CASE_GET_REG(EDX, edx);
418 CASE_GET_REG(EBX, ebx);
419 CASE_GET_REG(EBP, ebp);
420 CASE_GET_REG(ESI, esi);
421 CASE_GET_REG(EDI, edi);
422 case REG_ESP:
423 __vmread(GUEST_ESP, &value);
424 break;
425 default:
426 printk("invalid gp: %d\n", gp);
427 __vmx_bug(regs);
428 }
430 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, \n", cr, value);
431 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx, \n", (unsigned long) current);
433 switch(cr) {
434 case 0:
435 {
436 unsigned long old_base_pfn = 0, pfn;
438 /*
439 * CR0:
440 * We don't want to lose PE and PG.
441 */
442 __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
443 __vmwrite(CR0_READ_SHADOW, value);
445 if (value & (X86_CR0_PE | X86_CR0_PG) &&
446 !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state)) {
447 /*
448 * Enable paging
449 */
450 set_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state);
451 /*
452 * The guest CR3 must be pointing to the guest physical.
453 */
454 if (!(pfn = phys_to_machine_mapping[
455 d->thread.arch_vmx.cpu_cr3 >> PAGE_SHIFT]))
456 {
457 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx\n",
458 d->thread.arch_vmx.cpu_cr3);
459 domain_crash(); /* need to take a clean path */
460 }
461 old_base_pfn = pagetable_val(d->mm.pagetable) >> PAGE_SHIFT;
462 /*
463 * Now mm.pagetable points to machine physical.
464 */
465 d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
467 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New mm.pagetable = %lx\n",
468 (unsigned long) (pfn << PAGE_SHIFT));
470 shadow_lock(&d->mm);
471 shadow_mode_enable(d->domain, SHM_full_32);
472 shadow_unlock(&d->mm);
474 __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
475 /*
476 * mm->shadow_table should hold the next CR3 for shadow
477 */
478 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, pfn = %lx\n",
479 d->thread.arch_vmx.cpu_cr3, pfn);
480 put_page_and_type(&frame_table[old_base_pfn]);
482 }
483 break;
484 }
485 case 3:
486 {
487 unsigned long pfn;
489 /*
490 * If paging is not enabled yet, simply copy the valut to CR3.
491 */
492 if (!test_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state)) {
493 d->thread.arch_vmx.cpu_cr3 = value;
494 return;
495 }
497 guest_pl2e_cache_invalidate(&d->mm);
498 /*
499 * We make a new one if the shadow does not exist.
500 */
501 if (value == d->thread.arch_vmx.cpu_cr3) {
502 /*
503 * This is simple TLB flush, implying the guest has
504 * removed some translation or changed page attributes.
505 * We simply invalidate the shadow.
506 */
507 pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
508 if ((pfn << PAGE_SHIFT) != pagetable_val(d->mm.pagetable))
509 __vmx_bug(regs);
510 vmx_shadow_clear_state(&d->mm);
511 shadow_invalidate(&d->mm);
512 } else {
513 /*
514 * If different, make a shadow. Check if the PDBR is valid
515 * first.
516 */
517 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx\n", value);
518 if ((value >> PAGE_SHIFT) > d->domain->max_pages)
519 {
520 VMX_DBG_LOG(DBG_LEVEL_VMMU,
521 "Invalid CR3 value=%lx\n", value);
522 domain_crash(); /* need to take a clean path */
523 }
524 pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
525 vmx_shadow_clear_state(&d->mm);
526 d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
527 shadow_mk_pagetable(&d->mm);
528 /*
529 * mm->shadow_table should hold the next CR3 for shadow
530 */
531 d->thread.arch_vmx.cpu_cr3 = value;
532 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx\n",
533 value);
534 __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
535 }
536 break;
537 }
538 case 4:
539 /* CR4 */
540 if (value & X86_CR4_PAE)
541 __vmx_bug(regs); /* not implemented */
542 __vmread(CR4_READ_SHADOW, &old_cr);
544 __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
545 __vmwrite(CR4_READ_SHADOW, value);
547 /*
548 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
549 * all TLB entries except global entries.
550 */
551 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
552 vmx_shadow_clear_state(&d->mm);
553 shadow_invalidate(&d->mm);
554 guest_pl2e_cache_invalidate(&d->mm);
555 }
556 break;
557 default:
558 printk("invalid cr: %d\n", gp);
559 __vmx_bug(regs);
560 }
561 }
563 #define CASE_SET_REG(REG, reg) \
564 case REG_ ## REG: \
565 regs->reg = value; \
566 break
568 /*
569 * Read from control registers. CR0 and CR4 are read from the shadow.
570 */
571 static void mov_from_cr(int cr, int gp, struct xen_regs *regs)
572 {
573 unsigned long value;
574 struct exec_domain *d = current;
576 if (cr != 3)
577 __vmx_bug(regs);
579 value = (unsigned long) d->thread.arch_vmx.cpu_cr3;
580 ASSERT(value);
582 switch (gp) {
583 CASE_SET_REG(EAX, eax);
584 CASE_SET_REG(ECX, ecx);
585 CASE_SET_REG(EDX, edx);
586 CASE_SET_REG(EBX, ebx);
587 CASE_SET_REG(EBP, ebp);
588 CASE_SET_REG(ESI, esi);
589 CASE_SET_REG(EDI, edi);
590 case REG_ESP:
591 __vmwrite(GUEST_ESP, value);
592 regs->esp = value;
593 break;
594 default:
595 printk("invalid gp: %d\n", gp);
596 __vmx_bug(regs);
597 }
599 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx, \n", cr, value);
600 }
602 static void vmx_cr_access (unsigned long exit_qualification, struct xen_regs *regs)
603 {
604 unsigned int gp, cr;
605 unsigned long value;
607 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
608 case TYPE_MOV_TO_CR:
609 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
610 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
611 mov_to_cr(gp, cr, regs);
612 break;
613 case TYPE_MOV_FROM_CR:
614 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
615 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
616 mov_from_cr(cr, gp, regs);
617 break;
618 case TYPE_CLTS:
619 __vmread(GUEST_CR0, &value);
620 value &= ~X86_CR0_TS; /* clear TS */
621 __vmwrite(GUEST_CR0, value);
623 __vmread(CR0_READ_SHADOW, &value);
624 value &= ~X86_CR0_TS; /* clear TS */
625 __vmwrite(CR0_READ_SHADOW, value);
626 break;
627 default:
628 __vmx_bug(regs);
629 break;
630 }
631 }
633 static inline void vmx_do_msr_read(struct xen_regs *regs)
634 {
635 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
636 regs->ecx, regs->eax, regs->edx);
638 rdmsr(regs->ecx, regs->eax, regs->edx);
640 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
641 "ecx=%lx, eax=%lx, edx=%lx",
642 regs->ecx, regs->eax, regs->edx);
643 }
645 /*
646 * Need to use this exit to rescheule
647 */
648 static inline void vmx_vmexit_do_hlt()
649 {
650 #if VMX_DEBUG
651 unsigned long eip;
652 __vmread(GUEST_EIP, &eip);
653 #endif
654 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%08lx\n", eip);
655 __enter_scheduler();
656 }
658 static inline void vmx_vmexit_do_mwait()
659 {
660 #if VMX_DEBUG
661 unsigned long eip;
662 __vmread(GUEST_EIP, &eip);
663 #endif
664 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%08lx\n", eip);
665 __enter_scheduler();
666 }
668 #define BUF_SIZ 256
669 #define MAX_LINE 80
670 char print_buf[BUF_SIZ];
671 static int index;
673 static void vmx_print_line(const char c, struct exec_domain *d)
674 {
676 if (index == MAX_LINE || c == '\n') {
677 if (index == MAX_LINE) {
678 print_buf[index++] = c;
679 }
680 print_buf[index] = '\0';
681 printk("(GUEST: %u) %s\n", d->domain->id, (char *) &print_buf);
682 index = 0;
683 }
684 else
685 print_buf[index++] = c;
686 }
688 #ifdef XEN_DEBUGGER
689 void save_xen_regs(struct xen_regs *regs)
690 {
691 __vmread(GUEST_SS_SELECTOR, &regs->xss);
692 __vmread(GUEST_ESP, &regs->esp);
693 __vmread(GUEST_EFLAGS, &regs->eflags);
694 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
695 __vmread(GUEST_EIP, &regs->eip);
697 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
698 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
699 __vmread(GUEST_ES_SELECTOR, &regs->xes);
700 __vmread(GUEST_DS_SELECTOR, &regs->xds);
701 }
703 void restore_xen_regs(struct xen_regs *regs)
704 {
705 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
706 __vmwrite(GUEST_ESP, regs->esp);
707 __vmwrite(GUEST_EFLAGS, regs->eflags);
708 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
709 __vmwrite(GUEST_EIP, regs->eip);
711 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
712 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
713 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
714 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
715 }
716 #endif
718 asmlinkage void vmx_vmexit_handler(struct xen_regs regs)
719 {
720 unsigned int exit_reason, idtv_info_field;
721 unsigned long exit_qualification, eip, inst_len = 0;
722 struct exec_domain *d = current;
723 int error;
725 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
726 __vmx_bug(&regs);
728 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
729 if (idtv_info_field & INTR_INFO_VALID_MASK) {
730 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
731 if ((idtv_info_field & 0xff) == 14) {
732 unsigned long error_code;
734 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
735 printk("#PG error code: %lx\n", error_code);
736 }
737 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x\n",
738 idtv_info_field);
739 }
741 /* don't bother H/W interrutps */
742 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
743 exit_reason != EXIT_REASON_VMCALL &&
744 exit_reason != EXIT_REASON_IO_INSTRUCTION)
745 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x\n", exit_reason);
747 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
748 __vmread(EXIT_QUALIFICATION, &exit_qualification);
749 __vmread(GUEST_EIP, &eip);
750 domain_crash();
751 return;
752 }
754 switch (exit_reason) {
755 case EXIT_REASON_EXCEPTION_NMI:
756 {
757 /*
758 * We don't set the software-interrupt exiting (INT n).
759 * (1) We can get an exception (e.g. #PG) in the guest, or
760 * (2) NMI
761 */
762 int error;
763 unsigned int vector;
764 unsigned long va;
765 unsigned long error_code;
767 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
768 && !(vector & INTR_INFO_VALID_MASK))
769 __vmx_bug(&regs);
770 vector &= 0xff;
772 switch (vector) {
773 #ifdef XEN_DEBUGGER
774 case VECTOR_DB:
775 {
776 save_xen_regs(&regs);
777 pdb_handle_exception(1, &regs, 1);
778 restore_xen_regs(&regs);
779 break;
780 }
781 case VECTOR_BP:
782 {
783 save_xen_regs(&regs);
784 pdb_handle_exception(3, &regs, 1);
785 restore_xen_regs(&regs);
786 break;
787 }
788 #endif
789 case VECTOR_GP:
790 {
791 vmx_do_general_protection_fault(&regs);
792 break;
793 }
794 case VECTOR_PG:
795 {
796 __vmread(EXIT_QUALIFICATION, &va);
797 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
798 VMX_DBG_LOG(DBG_LEVEL_VMMU,
799 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx\n",
800 regs.eax, regs.ebx, regs.ecx, regs.edx, regs.esi,
801 regs.edi);
802 d->thread.arch_vmx.vmx_platform.mpci.inst_decoder_regs = &regs;
804 if (!(error = vmx_do_page_fault(va, error_code))) {
805 /*
806 * Inject #PG using Interruption-Information Fields
807 */
808 unsigned long intr_fields;
810 intr_fields = (INTR_INFO_VALID_MASK |
811 INTR_TYPE_EXCEPTION |
812 INTR_INFO_DELIEVER_CODE_MASK |
813 VECTOR_PG);
814 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
815 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
816 d->thread.arch_vmx.cpu_cr2 = va;
817 }
818 break;
819 }
820 default:
821 __vmx_bug(&regs);
822 break;
823 }
824 break;
825 }
826 case EXIT_REASON_EXTERNAL_INTERRUPT:
827 {
828 extern int vector_irq[];
829 extern asmlinkage void do_IRQ(struct xen_regs);
830 extern void smp_apic_timer_interrupt(struct xen_regs *);
831 extern void timer_interrupt(int, void *, struct xen_regs *);
832 unsigned int vector;
834 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
835 && !(vector & INTR_INFO_VALID_MASK))
836 __vmx_bug(&regs);
838 vector &= 0xff;
839 local_irq_disable();
841 if (vector == LOCAL_TIMER_VECTOR) {
842 smp_apic_timer_interrupt(&regs);
843 } else {
844 regs.entry_vector = (vector == FIRST_DEVICE_VECTOR?
845 0 : vector_irq[vector]);
846 do_IRQ(regs);
847 }
848 break;
849 }
850 case EXIT_REASON_PENDING_INTERRUPT:
851 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
852 MONITOR_CPU_BASED_EXEC_CONTROLS);
853 vmx_intr_assist(d);
854 break;
855 case EXIT_REASON_TASK_SWITCH:
856 __vmx_bug(&regs);
857 break;
858 case EXIT_REASON_CPUID:
859 __get_instruction_length(inst_len);
860 vmx_vmexit_do_cpuid(regs.eax, &regs);
861 __update_guest_eip(inst_len);
862 break;
863 case EXIT_REASON_HLT:
864 __get_instruction_length(inst_len);
865 __update_guest_eip(inst_len);
866 vmx_vmexit_do_hlt();
867 break;
868 case EXIT_REASON_INVLPG:
869 {
870 unsigned long va;
872 __vmread(EXIT_QUALIFICATION, &va);
873 vmx_vmexit_do_invlpg(va);
874 __get_instruction_length(inst_len);
875 __update_guest_eip(inst_len);
876 break;
877 }
878 case EXIT_REASON_VMCALL:
879 __get_instruction_length(inst_len);
880 __vmread(GUEST_EIP, &eip);
881 __vmread(EXIT_QUALIFICATION, &exit_qualification);
883 vmx_print_line(regs.eax, d); /* provides the current domain */
884 __update_guest_eip(inst_len);
885 break;
886 case EXIT_REASON_CR_ACCESS:
887 {
888 __vmread(GUEST_EIP, &eip);
889 __get_instruction_length(inst_len);
890 __vmread(EXIT_QUALIFICATION, &exit_qualification);
892 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx\n",
893 eip, inst_len, exit_qualification);
894 vmx_cr_access(exit_qualification, &regs);
895 __update_guest_eip(inst_len);
896 break;
897 }
898 case EXIT_REASON_DR_ACCESS:
899 __vmread(EXIT_QUALIFICATION, &exit_qualification);
900 vmx_dr_access(exit_qualification, &regs);
901 __get_instruction_length(inst_len);
902 __update_guest_eip(inst_len);
903 break;
904 case EXIT_REASON_IO_INSTRUCTION:
905 __vmread(EXIT_QUALIFICATION, &exit_qualification);
906 __get_instruction_length(inst_len);
907 vmx_io_instruction(&regs, exit_qualification, inst_len);
908 break;
909 case EXIT_REASON_MSR_READ:
910 __get_instruction_length(inst_len);
911 vmx_do_msr_read(&regs);
912 __update_guest_eip(inst_len);
913 break;
914 case EXIT_REASON_MSR_WRITE:
915 __vmread(GUEST_EIP, &eip);
916 VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%08lx, eax=%08lx, edx=%08lx",
917 eip, regs.eax, regs.edx);
918 /* just ignore this point */
919 __get_instruction_length(inst_len);
920 __update_guest_eip(inst_len);
921 break;
922 case EXIT_REASON_MWAIT_INSTRUCTION:
923 __get_instruction_length(inst_len);
924 __update_guest_eip(inst_len);
925 vmx_vmexit_do_mwait();
926 break;
927 default:
928 __vmx_bug(&regs); /* should not happen */
929 }
930 return;
931 }
933 asmlinkage void load_cr2(void)
934 {
935 struct exec_domain *d = current;
937 local_irq_disable();
938 asm volatile("movl %0,%%cr2": :"r" (d->thread.arch_vmx.cpu_cr2));
939 }