debuggers.hg

view xen/arch/x86/vmx.c @ 3604:a396b30da79d

bitkeeper revision 1.1159.231.9 (41f97c50Eg32TPmD9lVOyWCldhbhDA)

Some guests depend on getting a #GP fault in order to boot. This patch
makes sure that the GP fault is injected into the guest properly.

Signed-off-by: Arun Sharma <arun.sharma@intel.com>
Signed-off-by: ian.pratt@cl.cam.ac.uk
author iap10@labyrinth.cl.cam.ac.uk
date Thu Jan 27 23:42:08 2005 +0000 (2005-01-27)
parents ef529c8bd197
children 002034af24e6
line source
1 /*
2 * vmx.c: handling VMX architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 *
5 * This program is free software; you can redistribute it and/or modify it
6 * under the terms and conditions of the GNU General Public License,
7 * version 2, as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
12 * more details.
13 *
14 * You should have received a copy of the GNU General Public License along with
15 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
16 * Place - Suite 330, Boston, MA 02111-1307 USA.
17 *
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/sched.h>
24 #include <asm/current.h>
25 #include <asm/io.h>
26 #include <asm/irq.h>
27 #include <asm/shadow.h>
28 #include <asm/regs.h>
29 #include <asm/cpufeature.h>
30 #include <asm/processor.h>
31 #include <asm/types.h>
32 #include <asm/msr.h>
33 #include <asm/spinlock.h>
34 #include <asm/vmx.h>
35 #include <asm/vmx_vmcs.h>
36 #include <public/io/ioreq.h>
38 int vmcs_size;
39 unsigned int opt_vmx_debug_level;
41 extern long evtchn_send(int lport);
42 extern long do_block(void);
44 #define VECTOR_DB 1
45 #define VECTOR_BP 3
46 #define VECTOR_GP 13
47 #define VECTOR_PG 14
49 int start_vmx()
50 {
51 struct vmcs_struct *vmcs;
52 unsigned long ecx;
53 u64 phys_vmcs; /* debugging */
55 vmcs_size = VMCS_SIZE;
56 /*
57 * Xen does not fill x86_capability words except 0.
58 */
59 ecx = cpuid_ecx(1);
60 boot_cpu_data.x86_capability[4] = ecx;
62 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
63 return 0;
65 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
67 if (!(vmcs = alloc_vmcs())) {
68 printk("Failed to allocate VMCS\n");
69 return 0;
70 }
72 phys_vmcs = (u64) virt_to_phys(vmcs);
74 if (!(__vmxon(phys_vmcs))) {
75 printk("VMXON is done\n");
76 }
78 return 1;
79 }
81 void stop_vmx()
82 {
83 if (read_cr4() & X86_CR4_VMXE)
84 __vmxoff();
85 }
87 /*
88 * Not all cases recevie valid value in the VM-exit instruction length field.
89 */
90 #define __get_instruction_length(len) \
91 __vmread(INSTRUCTION_LEN, &(len)); \
92 if ((len) < 1 || (len) > 15) \
93 __vmx_bug(&regs);
95 static void inline __update_guest_eip(unsigned long inst_len)
96 {
97 unsigned long current_eip;
99 __vmread(GUEST_EIP, &current_eip);
100 __vmwrite(GUEST_EIP, current_eip + inst_len);
101 }
104 #include <asm/domain_page.h>
106 static int vmx_do_page_fault(unsigned long va, unsigned long error_code)
107 {
108 unsigned long eip, pfn;
109 unsigned int index;
110 unsigned long gpde = 0;
111 int result;
112 struct exec_domain *ed = current;
113 struct mm_struct *m = &ed->mm;
115 #if VMX_DEBUG
116 {
117 __vmread(GUEST_EIP, &eip);
118 VMX_DBG_LOG(DBG_LEVEL_VMMU,
119 "vmx_do_page_fault = 0x%lx, eip = %lx, erro_code = %lx\n",
120 va, eip, error_code);
121 }
122 #endif
123 /*
124 * Set up guest page directory cache to make linear_pt_table[] work.
125 */
126 __guest_get_pl2e(m, va, &gpde);
127 if (!(gpde & _PAGE_PRESENT))
128 return 0;
130 index = (va >> L2_PAGETABLE_SHIFT);
131 if (!l2_pgentry_val(m->guest_pl2e_cache[index])) {
132 pfn = phys_to_machine_mapping[gpde >> PAGE_SHIFT];
134 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_do_page_fault: pagetable = %lx\n",
135 pagetable_val(m->pagetable));
137 m->guest_pl2e_cache[index] =
138 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
139 }
141 if ((result = shadow_fault(va, error_code)))
142 return result;
144 return 0; /* failed to resolve, i.e raise #PG */
145 }
147 static void vmx_do_general_protection_fault(struct xen_regs *regs)
148 {
149 unsigned long eip, error_code;
150 unsigned long intr_fields;
152 __vmread(GUEST_EIP, &eip);
153 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
155 VMX_DBG_LOG(DBG_LEVEL_1,
156 "vmx_general_protection_fault: eip = %lx, erro_code = %lx\n",
157 eip, error_code);
159 VMX_DBG_LOG(DBG_LEVEL_1,
160 "eax=%x, ebx=%x, ecx=%x, edx=%x, esi=%x, edi=%x\n",
161 regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
163 /* Reflect it back into the guest */
164 intr_fields = (INTR_INFO_VALID_MASK |
165 INTR_TYPE_EXCEPTION |
166 INTR_INFO_DELIEVER_CODE_MASK |
167 VECTOR_GP);
168 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
169 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
170 }
172 static void vmx_vmexit_do_cpuid(unsigned long input, struct xen_regs *regs)
173 {
174 int eax, ebx, ecx, edx;
175 unsigned long eip;
177 __vmread(GUEST_EIP, &eip);
179 VMX_DBG_LOG(DBG_LEVEL_1,
180 "do_cpuid: (eax) %x, (ebx) %x, (ecx) %x, (edx) %x, (esi) %x, (edi) %x\n", regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
182 cpuid(input, &eax, &ebx, &ecx, &edx);
184 if (input == 1) {
185 clear_bit(X86_FEATURE_PSE, &edx);
186 clear_bit(X86_FEATURE_PAE, &edx);
187 clear_bit(X86_FEATURE_PSE36, &edx);
188 }
190 regs->eax = (unsigned long) eax;
191 regs->ebx = (unsigned long) ebx;
192 regs->ecx = (unsigned long) ecx;
193 regs->edx = (unsigned long) edx;
195 VMX_DBG_LOG(DBG_LEVEL_1,
196 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x\n",
197 eip, input, eax, ebx, ecx, edx);
199 }
201 #define CASE_GET_REG_P(REG, reg) \
202 case REG_ ## REG: reg_p = &(regs->reg); break
204 static void vmx_dr_access (unsigned long exit_qualification, struct xen_regs *regs)
205 {
206 unsigned int reg;
207 u32 *reg_p = 0;
208 struct exec_domain *ed = current;
209 u32 eip;
211 __vmread(GUEST_EIP, &eip);
213 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
215 VMX_DBG_LOG(DBG_LEVEL_1,
216 "vmx_dr_access : eip=%08x, reg=%d, exit_qualification = %lx\n",
217 eip, reg, exit_qualification);
219 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
220 CASE_GET_REG_P(EAX, eax);
221 CASE_GET_REG_P(ECX, ecx);
222 CASE_GET_REG_P(EDX, edx);
223 CASE_GET_REG_P(EBX, ebx);
224 CASE_GET_REG_P(EBP, ebp);
225 CASE_GET_REG_P(ESI, esi);
226 CASE_GET_REG_P(EDI, edi);
227 case REG_ESP:
228 break;
229 default:
230 __vmx_bug(regs);
231 }
233 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
234 case TYPE_MOV_TO_DR:
235 /* don't need to check the range */
236 if (reg != REG_ESP)
237 ed->thread.debugreg[reg] = *reg_p;
238 else {
239 unsigned long value;
240 __vmread(GUEST_ESP, &value);
241 ed->thread.debugreg[reg] = value;
242 }
243 break;
244 case TYPE_MOV_FROM_DR:
245 if (reg != REG_ESP)
246 *reg_p = ed->thread.debugreg[reg];
247 else {
248 __vmwrite(GUEST_ESP, ed->thread.debugreg[reg]);
249 }
250 break;
251 }
252 }
254 /*
255 * Invalidate the TLB for va. Invalidate the shadow page corresponding
256 * the address va.
257 */
258 static void vmx_vmexit_do_invlpg(unsigned long va)
259 {
260 unsigned long eip;
261 struct exec_domain *d = current;
262 unsigned int index;
264 __vmread(GUEST_EIP, &eip);
266 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg:eip=%08lx, va=%08lx\n",
267 eip, va);
269 /*
270 * We do the safest things first, then try to update the shadow
271 * copying from guest
272 */
273 vmx_shadow_invlpg(&d->mm, va);
274 index = (va >> L2_PAGETABLE_SHIFT);
275 d->mm.guest_pl2e_cache[index] = mk_l2_pgentry(0); /* invalidate pgd cache */
276 }
278 static inline void guest_pl2e_cache_invalidate(struct mm_struct *m)
279 {
280 /*
281 * Need to optimize this
282 */
283 memset(m->guest_pl2e_cache, 0, PAGE_SIZE);
284 }
286 static inline unsigned long gva_to_gpa(unsigned long gva)
287 {
288 unsigned long gpde, gpte, pfn, index;
289 struct exec_domain *d = current;
290 struct mm_struct *m = &d->mm;
292 __guest_get_pl2e(m, gva, &gpde);
293 index = (gva >> L2_PAGETABLE_SHIFT);
295 pfn = phys_to_machine_mapping[gpde >> PAGE_SHIFT];
297 m->guest_pl2e_cache[index] =
298 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
300 if ( unlikely(__get_user(gpte, (unsigned long *)
301 &linear_pg_table[gva >> PAGE_SHIFT])) )
302 {
303 printk("gva_to_gpa EXIT: read gpte faulted" );
304 return 0;
305 }
307 if ( !(gpte & _PAGE_PRESENT) )
308 {
309 printk("gva_to_gpa - EXIT: gpte not present (%lx)",gpte );
310 return 0;
311 }
313 return (gpte & PAGE_MASK) + (gva & ~PAGE_MASK);
314 }
316 static void vmx_io_instruction(struct xen_regs *regs,
317 unsigned long exit_qualification, unsigned long inst_len)
318 {
319 struct exec_domain *d = current;
320 vcpu_iodata_t *vio;
321 ioreq_t *p;
322 unsigned long addr;
323 unsigned long eip;
325 __vmread(GUEST_EIP, &eip);
327 VMX_DBG_LOG(DBG_LEVEL_1,
328 "vmx_io_instruction: eip=%08lx, exit_qualification = %lx\n",
329 eip, exit_qualification);
331 if (test_bit(6, &exit_qualification))
332 addr = (exit_qualification >> 16) & (0xffff);
333 else
334 addr = regs->edx & 0xffff;
336 if (addr == 0x80) {
337 __update_guest_eip(inst_len);
338 return;
339 }
341 vio = (vcpu_iodata_t *) d->thread.arch_vmx.vmx_platform.shared_page_va;
342 if (vio == 0) {
343 VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx\n", (unsigned long) vio);
344 domain_crash();
345 }
346 p = &vio->vp_ioreq;
347 p->dir = test_bit(3, &exit_qualification);
348 set_bit(ARCH_VMX_IO_WAIT, &d->thread.arch_vmx.flags);
350 p->pdata_valid = 0;
351 p->count = 1;
352 p->size = (exit_qualification & 7) + 1;
354 if (test_bit(4, &exit_qualification)) {
355 p->pdata_valid = 1;
356 p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
357 regs->esi
358 : regs->edi);
359 p->u.pdata = (void *) gva_to_gpa(p->u.data);
360 if (test_bit(5, &exit_qualification))
361 p->count = regs->ecx;
362 if ((p->u.data & PAGE_MASK) !=
363 ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
364 printk("stringio crosses page boundary!\n");
365 if (p->u.data & (p->size - 1)) {
366 printk("Not aligned I/O!\n");
367 domain_crash();
368 }
369 p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
370 } else {
371 __update_guest_eip(inst_len);
372 }
373 } else if (p->dir == IOREQ_WRITE) {
374 p->u.data = regs->eax;
375 __update_guest_eip(inst_len);
376 } else
377 __update_guest_eip(inst_len);
379 p->addr = addr;
380 p->port_mm = 0;
381 p->state = STATE_IOREQ_READY;
382 evtchn_send(IOPACKET_PORT);
383 do_block();
384 }
386 #define CASE_GET_REG(REG, reg) \
387 case REG_ ## REG: value = regs->reg; break
389 /*
390 * Write to control registers
391 */
392 static void mov_to_cr(int gp, int cr, struct xen_regs *regs)
393 {
394 unsigned long value;
395 unsigned long old_cr;
396 struct exec_domain *d = current;
398 switch (gp) {
399 CASE_GET_REG(EAX, eax);
400 CASE_GET_REG(ECX, ecx);
401 CASE_GET_REG(EDX, edx);
402 CASE_GET_REG(EBX, ebx);
403 CASE_GET_REG(EBP, ebp);
404 CASE_GET_REG(ESI, esi);
405 CASE_GET_REG(EDI, edi);
406 case REG_ESP:
407 __vmread(GUEST_ESP, &value);
408 break;
409 default:
410 printk("invalid gp: %d\n", gp);
411 __vmx_bug(regs);
412 }
414 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, \n", cr, value);
415 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx, \n", (unsigned long) current);
417 switch(cr) {
418 case 0:
419 {
420 unsigned long old_base_pfn = 0, pfn;
422 /*
423 * CR0:
424 * We don't want to lose PE and PG.
425 */
426 __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
427 __vmwrite(CR0_READ_SHADOW, value);
429 if (value & (X86_CR0_PE | X86_CR0_PG) &&
430 !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state)) {
431 /*
432 * Enable paging
433 */
434 set_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state);
435 /*
436 * The guest CR3 must be pointing to the guest physical.
437 */
438 if (!(pfn = phys_to_machine_mapping[
439 d->thread.arch_vmx.cpu_cr3 >> PAGE_SHIFT]))
440 {
441 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx\n",
442 d->thread.arch_vmx.cpu_cr3);
443 domain_crash(); /* need to take a clean path */
444 }
445 old_base_pfn = pagetable_val(d->mm.pagetable) >> PAGE_SHIFT;
446 /*
447 * Now mm.pagetable points to machine physical.
448 */
449 d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
451 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New mm.pagetable = %lx\n",
452 (unsigned long) (pfn << PAGE_SHIFT));
454 shadow_lock(&d->mm);
455 shadow_mode_enable(d->domain, SHM_full_32);
456 shadow_unlock(&d->mm);
458 __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
459 /*
460 * mm->shadow_table should hold the next CR3 for shadow
461 */
462 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, pfn = %lx\n",
463 d->thread.arch_vmx.cpu_cr3, pfn);
464 put_page_and_type(&frame_table[old_base_pfn]);
466 }
467 break;
468 }
469 case 3:
470 {
471 unsigned long pfn;
473 /*
474 * If paging is not enabled yet, simply copy the valut to CR3.
475 */
476 if (!test_bit(VMX_CPU_STATE_PG_ENABLED, &d->thread.arch_vmx.cpu_state)) {
477 d->thread.arch_vmx.cpu_cr3 = value;
478 return;
479 }
481 guest_pl2e_cache_invalidate(&d->mm);
482 /*
483 * We make a new one if the shadow does not exist.
484 */
485 if (value == d->thread.arch_vmx.cpu_cr3) {
486 /*
487 * This is simple TLB flush, implying the guest has
488 * removed some translation or changed page attributes.
489 * We simply invalidate the shadow.
490 */
491 pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
492 if ((pfn << PAGE_SHIFT) != pagetable_val(d->mm.pagetable))
493 __vmx_bug(regs);
494 vmx_shadow_clear_state(&d->mm);
495 shadow_invalidate(&d->mm);
496 } else {
497 /*
498 * If different, make a shadow. Check if the PDBR is valid
499 * first.
500 */
501 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx\n", value);
502 if ((value >> PAGE_SHIFT) > d->domain->max_pages)
503 {
504 VMX_DBG_LOG(DBG_LEVEL_VMMU,
505 "Invalid CR3 value=%lx\n", value);
506 domain_crash(); /* need to take a clean path */
507 }
508 pfn = phys_to_machine_mapping[value >> PAGE_SHIFT];
509 vmx_shadow_clear_state(&d->mm);
510 d->mm.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
511 shadow_mk_pagetable(&d->mm);
512 /*
513 * mm->shadow_table should hold the next CR3 for shadow
514 */
515 d->thread.arch_vmx.cpu_cr3 = value;
516 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx\n",
517 value);
518 __vmwrite(GUEST_CR3, pagetable_val(d->mm.shadow_table));
519 }
520 break;
521 }
522 case 4:
523 /* CR4 */
524 if (value & X86_CR4_PAE)
525 __vmx_bug(regs); /* not implemented */
526 __vmread(CR4_READ_SHADOW, &old_cr);
528 __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
529 __vmwrite(CR4_READ_SHADOW, value);
531 /*
532 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
533 * all TLB entries except global entries.
534 */
535 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
536 vmx_shadow_clear_state(&d->mm);
537 shadow_invalidate(&d->mm);
538 guest_pl2e_cache_invalidate(&d->mm);
539 }
540 break;
541 default:
542 printk("invalid cr: %d\n", gp);
543 __vmx_bug(regs);
544 }
545 }
547 #define CASE_SET_REG(REG, reg) \
548 case REG_ ## REG: \
549 regs->reg = value; \
550 break
552 /*
553 * Read from control registers. CR0 and CR4 are read from the shadow.
554 */
555 static void mov_from_cr(int cr, int gp, struct xen_regs *regs)
556 {
557 unsigned long value;
558 struct exec_domain *d = current;
560 if (cr != 3)
561 __vmx_bug(regs);
563 value = (unsigned long) d->thread.arch_vmx.cpu_cr3;
564 ASSERT(value);
566 switch (gp) {
567 CASE_SET_REG(EAX, eax);
568 CASE_SET_REG(ECX, ecx);
569 CASE_SET_REG(EDX, edx);
570 CASE_SET_REG(EBX, ebx);
571 CASE_SET_REG(EBP, ebp);
572 CASE_SET_REG(ESI, esi);
573 CASE_SET_REG(EDI, edi);
574 case REG_ESP:
575 __vmwrite(GUEST_ESP, value);
576 regs->esp = value;
577 break;
578 default:
579 printk("invalid gp: %d\n", gp);
580 __vmx_bug(regs);
581 }
583 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx, \n", cr, value);
584 }
586 static void vmx_cr_access (unsigned long exit_qualification, struct xen_regs *regs)
587 {
588 unsigned int gp, cr;
589 unsigned long value;
591 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
592 case TYPE_MOV_TO_CR:
593 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
594 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
595 mov_to_cr(gp, cr, regs);
596 break;
597 case TYPE_MOV_FROM_CR:
598 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
599 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
600 mov_from_cr(cr, gp, regs);
601 break;
602 case TYPE_CLTS:
603 __vmread(GUEST_CR0, &value);
604 value &= ~X86_CR0_TS; /* clear TS */
605 __vmwrite(GUEST_CR0, value);
607 __vmread(CR0_READ_SHADOW, &value);
608 value &= ~X86_CR0_TS; /* clear TS */
609 __vmwrite(CR0_READ_SHADOW, value);
610 break;
611 default:
612 __vmx_bug(regs);
613 break;
614 }
615 }
617 static inline void vmx_do_msr_read(struct xen_regs *regs)
618 {
619 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%x, eax=%x, edx=%x",
620 regs->ecx, regs->eax, regs->edx);
622 rdmsr(regs->ecx, regs->eax, regs->edx);
624 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: ecx=%x, eax=%x, edx=%x",
625 regs->ecx, regs->eax, regs->edx);
626 }
628 /*
629 * Need to use this exit to rescheule
630 */
631 static inline void vmx_vmexit_do_hlt()
632 {
633 #if VMX_DEBUG
634 unsigned long eip;
635 __vmread(GUEST_EIP, &eip);
636 #endif
637 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%08lx\n", eip);
638 __enter_scheduler();
639 }
641 static inline void vmx_vmexit_do_mwait()
642 {
643 #if VMX_DEBUG
644 unsigned long eip;
645 __vmread(GUEST_EIP, &eip);
646 #endif
647 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%08lx\n", eip);
648 __enter_scheduler();
649 }
651 #define BUF_SIZ 256
652 #define MAX_LINE 80
653 char print_buf[BUF_SIZ];
654 static int index;
656 static void vmx_print_line(const char c, struct exec_domain *d)
657 {
659 if (index == MAX_LINE || c == '\n') {
660 if (index == MAX_LINE) {
661 print_buf[index++] = c;
662 }
663 print_buf[index] = '\0';
664 printk("(GUEST: %u) %s\n", d->domain->id, (char *) &print_buf);
665 index = 0;
666 }
667 else
668 print_buf[index++] = c;
669 }
671 #ifdef XEN_DEBUGGER
672 void save_xen_regs(struct xen_regs *regs)
673 {
674 __vmread(GUEST_SS_SELECTOR, &regs->xss);
675 __vmread(GUEST_ESP, &regs->esp);
676 __vmread(GUEST_EFLAGS, &regs->eflags);
677 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
678 __vmread(GUEST_EIP, &regs->eip);
680 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
681 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
682 __vmread(GUEST_ES_SELECTOR, &regs->xes);
683 __vmread(GUEST_DS_SELECTOR, &regs->xds);
684 }
686 void restore_xen_regs(struct xen_regs *regs)
687 {
688 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
689 __vmwrite(GUEST_ESP, regs->esp);
690 __vmwrite(GUEST_EFLAGS, regs->eflags);
691 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
692 __vmwrite(GUEST_EIP, regs->eip);
694 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
695 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
696 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
697 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
698 }
699 #endif
701 asmlinkage void vmx_vmexit_handler(struct xen_regs regs)
702 {
703 unsigned int exit_reason, idtv_info_field;
704 unsigned long exit_qualification, eip, inst_len = 0;
705 struct exec_domain *d = current;
706 int error;
708 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
709 __vmx_bug(&regs);
711 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
712 if (idtv_info_field & INTR_INFO_VALID_MASK) {
713 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
714 if ((idtv_info_field & 0xff) == 14) {
715 unsigned long error_code;
717 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
718 printk("#PG error code: %lx\n", error_code);
719 }
720 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x\n",
721 idtv_info_field);
722 }
724 /* don't bother H/W interrutps */
725 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
726 exit_reason != EXIT_REASON_VMCALL &&
727 exit_reason != EXIT_REASON_IO_INSTRUCTION)
728 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x\n", exit_reason);
730 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
731 __vmread(EXIT_QUALIFICATION, &exit_qualification);
732 __vmread(GUEST_EIP, &eip);
733 domain_crash();
734 return;
735 }
737 switch (exit_reason) {
738 case EXIT_REASON_EXCEPTION_NMI:
739 {
740 /*
741 * We don't set the software-interrupt exiting (INT n).
742 * (1) We can get an exception (e.g. #PG) in the guest, or
743 * (2) NMI
744 */
745 int error;
746 unsigned int vector;
747 unsigned long va;
748 unsigned long error_code;
750 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
751 && !(vector & INTR_INFO_VALID_MASK))
752 __vmx_bug(&regs);
753 vector &= 0xff;
755 switch (vector) {
756 #ifdef XEN_DEBUGGER
757 case VECTOR_DB:
758 {
759 save_xen_regs(&regs);
760 pdb_handle_exception(1, &regs, 1);
761 restore_xen_regs(&regs);
762 break;
763 }
764 case VECTOR_BP:
765 {
766 save_xen_regs(&regs);
767 pdb_handle_exception(3, &regs, 1);
768 restore_xen_regs(&regs);
769 break;
770 }
771 #endif
772 case VECTOR_GP:
773 {
774 vmx_do_general_protection_fault(&regs);
775 break;
776 }
777 case VECTOR_PG:
778 {
779 __vmread(EXIT_QUALIFICATION, &va);
780 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
781 VMX_DBG_LOG(DBG_LEVEL_VMMU,
782 "eax=%x, ebx=%x, ecx=%x, edx=%x, esi=%x, edi=%x\n", regs.eax, regs.ebx, regs.ecx, regs.edx, regs.esi, regs.edi);
784 if (!(error = vmx_do_page_fault(va, error_code))) {
785 /*
786 * Inject #PG using Interruption-Information Fields
787 */
788 unsigned long intr_fields;
790 intr_fields = (INTR_INFO_VALID_MASK |
791 INTR_TYPE_EXCEPTION |
792 INTR_INFO_DELIEVER_CODE_MASK |
793 VECTOR_PG);
794 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
795 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
796 d->thread.arch_vmx.cpu_cr2 = va;
797 }
798 break;
799 }
800 default:
801 __vmx_bug(&regs);
802 break;
803 }
804 break;
805 }
806 case EXIT_REASON_EXTERNAL_INTERRUPT:
807 {
808 extern int vector_irq[];
809 extern asmlinkage void do_IRQ(struct xen_regs);
810 extern void smp_apic_timer_interrupt(struct xen_regs *);
811 extern void timer_interrupt(int, void *, struct xen_regs *);
812 unsigned int vector;
814 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
815 && !(vector & INTR_INFO_VALID_MASK))
816 __vmx_bug(&regs);
818 vector &= 0xff;
819 local_irq_disable();
821 if (vector == LOCAL_TIMER_VECTOR) {
822 smp_apic_timer_interrupt(&regs);
823 } else {
824 regs.entry_vector = (vector == FIRST_DEVICE_VECTOR?
825 0 : vector_irq[vector]);
826 do_IRQ(regs);
827 }
828 break;
829 }
830 case EXIT_REASON_PENDING_INTERRUPT:
831 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
832 MONITOR_CPU_BASED_EXEC_CONTROLS);
833 vmx_intr_assist(d);
834 break;
835 case EXIT_REASON_TASK_SWITCH:
836 __vmx_bug(&regs);
837 break;
838 case EXIT_REASON_CPUID:
839 __get_instruction_length(inst_len);
840 vmx_vmexit_do_cpuid(regs.eax, &regs);
841 __update_guest_eip(inst_len);
842 break;
843 case EXIT_REASON_HLT:
844 __get_instruction_length(inst_len);
845 __update_guest_eip(inst_len);
846 vmx_vmexit_do_hlt();
847 break;
848 case EXIT_REASON_INVLPG:
849 {
850 unsigned long va;
852 __vmread(EXIT_QUALIFICATION, &va);
853 vmx_vmexit_do_invlpg(va);
854 __get_instruction_length(inst_len);
855 __update_guest_eip(inst_len);
856 break;
857 }
858 case EXIT_REASON_VMCALL:
859 __get_instruction_length(inst_len);
860 __vmread(GUEST_EIP, &eip);
861 __vmread(EXIT_QUALIFICATION, &exit_qualification);
863 vmx_print_line(regs.eax, d); /* provides the current domain */
864 __update_guest_eip(inst_len);
865 break;
866 case EXIT_REASON_CR_ACCESS:
867 {
868 __vmread(GUEST_EIP, &eip);
869 __get_instruction_length(inst_len);
870 __vmread(EXIT_QUALIFICATION, &exit_qualification);
872 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx\n",
873 eip, inst_len, exit_qualification);
874 vmx_cr_access(exit_qualification, &regs);
875 __update_guest_eip(inst_len);
876 break;
877 }
878 case EXIT_REASON_DR_ACCESS:
879 __vmread(EXIT_QUALIFICATION, &exit_qualification);
880 vmx_dr_access(exit_qualification, &regs);
881 __get_instruction_length(inst_len);
882 __update_guest_eip(inst_len);
883 break;
884 case EXIT_REASON_IO_INSTRUCTION:
885 __vmread(EXIT_QUALIFICATION, &exit_qualification);
886 __get_instruction_length(inst_len);
887 vmx_io_instruction(&regs, exit_qualification, inst_len);
888 break;
889 case EXIT_REASON_MSR_READ:
890 __get_instruction_length(inst_len);
891 vmx_do_msr_read(&regs);
892 __update_guest_eip(inst_len);
893 break;
894 case EXIT_REASON_MSR_WRITE:
895 __vmread(GUEST_EIP, &eip);
896 VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%08lx, eax=%08x, edx=%08x",
897 eip, regs.eax, regs.edx);
898 /* just ignore this point */
899 __get_instruction_length(inst_len);
900 __update_guest_eip(inst_len);
901 break;
902 case EXIT_REASON_MWAIT_INSTRUCTION:
903 __get_instruction_length(inst_len);
904 __update_guest_eip(inst_len);
905 vmx_vmexit_do_mwait();
906 break;
907 default:
908 __vmx_bug(&regs); /* should not happen */
909 }
910 return;
911 }
913 asmlinkage void load_cr2(void)
914 {
915 struct exec_domain *d = current;
917 local_irq_disable();
918 asm volatile("movl %0,%%cr2": :"r" (d->thread.arch_vmx.cpu_cr2));
919 }