debuggers.hg

view xen/arch/x86/vmx.c @ 3755:ea98f0bb6510

bitkeeper revision 1.1159.212.127 (4208b02bTdSR4AVYRg8diDkKZmIVUg)

General shadow code cleanup.

Fixed compilation problems when SHADOW_DEBUG is enabled.
Fixed compilation problems when CONFIG_VMX is undefined.

Simplified l1pte_write_fault and l1pte_read_fault.
Name change: spfn => smfn (shadow machine frame numbers).

In general, the terms pfn and gpfn now refer to pages in the
guest's idea of physical frames (which diffs for full shadow
guests). mfn always refers to a machine frame number.

One bug fix for check_pagetable():
If we're using writable page tables
along with shadow mode, don't check the currently writable page table
page -- check its snapshot instead.

Signed-off-by: michael.fetterman@cl.cam.ac.uk
author mafetter@fleming.research
date Tue Feb 08 12:27:23 2005 +0000 (2005-02-08)
parents ef5e5cd10778
children f5f2757b3aa2 cb87fd290eb0 872ae5bed5f4
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /*
3 * vmx.c: handling VMX architecture-related VM exits
4 * Copyright (c) 2004, Intel Corporation.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 *
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/sched.h>
25 #include <asm/current.h>
26 #include <asm/io.h>
27 #include <asm/irq.h>
28 #include <asm/shadow.h>
29 #include <asm/regs.h>
30 #include <asm/cpufeature.h>
31 #include <asm/processor.h>
32 #include <asm/types.h>
33 #include <asm/msr.h>
34 #include <asm/spinlock.h>
35 #include <asm/vmx.h>
36 #include <asm/vmx_vmcs.h>
37 #include <public/io/ioreq.h>
39 #ifdef CONFIG_VMX
41 int vmcs_size;
42 unsigned int opt_vmx_debug_level;
44 extern long evtchn_send(int lport);
45 extern long do_block(void);
47 #define VECTOR_DB 1
48 #define VECTOR_BP 3
49 #define VECTOR_GP 13
50 #define VECTOR_PG 14
52 int start_vmx()
53 {
54 struct vmcs_struct *vmcs;
55 unsigned long ecx;
56 u64 phys_vmcs; /* debugging */
58 vmcs_size = VMCS_SIZE;
59 /*
60 * Xen does not fill x86_capability words except 0.
61 */
62 ecx = cpuid_ecx(1);
63 boot_cpu_data.x86_capability[4] = ecx;
65 if (!(test_bit(X86_FEATURE_VMXE, &boot_cpu_data.x86_capability)))
66 return 0;
68 set_in_cr4(X86_CR4_VMXE); /* Enable VMXE */
70 if (!(vmcs = alloc_vmcs())) {
71 printk("Failed to allocate VMCS\n");
72 return 0;
73 }
75 phys_vmcs = (u64) virt_to_phys(vmcs);
77 if (!(__vmxon(phys_vmcs))) {
78 printk("VMXON is done\n");
79 }
81 return 1;
82 }
84 void stop_vmx()
85 {
86 if (read_cr4() & X86_CR4_VMXE)
87 __vmxoff();
88 }
90 /*
91 * Not all cases recevie valid value in the VM-exit instruction length field.
92 */
93 #define __get_instruction_length(len) \
94 __vmread(INSTRUCTION_LEN, &(len)); \
95 if ((len) < 1 || (len) > 15) \
96 __vmx_bug(&regs);
98 static void inline __update_guest_eip(unsigned long inst_len)
99 {
100 unsigned long current_eip;
102 __vmread(GUEST_EIP, &current_eip);
103 __vmwrite(GUEST_EIP, current_eip + inst_len);
104 }
107 #include <asm/domain_page.h>
109 static int vmx_do_page_fault(unsigned long va, unsigned long error_code)
110 {
111 unsigned long eip, pfn;
112 unsigned int index;
113 unsigned long gpde = 0, gpte, gpa;
114 int result;
115 struct exec_domain *ed = current;
117 #if VMX_DEBUG
118 {
119 __vmread(GUEST_EIP, &eip);
120 VMX_DBG_LOG(DBG_LEVEL_VMMU,
121 "vmx_do_page_fault = 0x%lx, eip = %lx, erro_code = %lx\n",
122 va, eip, error_code);
123 }
124 #endif
125 /*
126 * Set up guest page directory cache to make linear_pt_table[] work.
127 */
128 __guest_get_l2e(ed, va, &gpde);
129 if (!(gpde & _PAGE_PRESENT))
130 return 0;
132 index = (va >> L2_PAGETABLE_SHIFT);
133 if (!l2_pgentry_val(ed->arch.guest_pl2e_cache[index])) {
134 pfn = phys_to_machine_mapping(gpde >> PAGE_SHIFT);
136 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_do_page_fault: pagetable = %lx\n",
137 pagetable_val(ed->arch.pagetable));
139 ed->arch.guest_pl2e_cache[index] =
140 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
141 }
143 if (unlikely(__get_user(gpte, (unsigned long *)
144 &linear_pg_table[va >> PAGE_SHIFT])))
145 return 0;
147 gpa = (gpte & PAGE_MASK) | (va & (PAGE_SIZE - 1));
149 if (mmio_space(gpa))
150 handle_mmio(va, gpte, gpa);
152 if ((result = shadow_fault(va, error_code)))
153 return result;
155 return 0; /* failed to resolve, i.e raise #PG */
156 }
158 static void vmx_do_general_protection_fault(struct xen_regs *regs)
159 {
160 unsigned long eip, error_code;
161 unsigned long intr_fields;
163 __vmread(GUEST_EIP, &eip);
164 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
166 VMX_DBG_LOG(DBG_LEVEL_1,
167 "vmx_general_protection_fault: eip = %lx, erro_code = %lx\n",
168 eip, error_code);
170 VMX_DBG_LOG(DBG_LEVEL_1,
171 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx\n",
172 regs->eax, regs->ebx, regs->ecx, regs->edx, regs->esi, regs->edi);
174 /* Reflect it back into the guest */
175 intr_fields = (INTR_INFO_VALID_MASK |
176 INTR_TYPE_EXCEPTION |
177 INTR_INFO_DELIEVER_CODE_MASK |
178 VECTOR_GP);
179 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
180 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
181 }
183 static void vmx_vmexit_do_cpuid(unsigned long input, struct xen_regs *regs)
184 {
185 int eax, ebx, ecx, edx;
186 unsigned long eip;
188 __vmread(GUEST_EIP, &eip);
190 VMX_DBG_LOG(DBG_LEVEL_1,
191 "do_cpuid: (eax) %lx, (ebx) %lx, (ecx) %lx, (edx) %lx,"
192 " (esi) %lx, (edi) %lx\n",
193 regs->eax, regs->ebx, regs->ecx, regs->edx,
194 regs->esi, regs->edi);
196 cpuid(input, &eax, &ebx, &ecx, &edx);
198 if (input == 1) {
199 clear_bit(X86_FEATURE_PSE, &edx);
200 clear_bit(X86_FEATURE_PAE, &edx);
201 clear_bit(X86_FEATURE_PSE36, &edx);
202 }
204 regs->eax = (unsigned long) eax;
205 regs->ebx = (unsigned long) ebx;
206 regs->ecx = (unsigned long) ecx;
207 regs->edx = (unsigned long) edx;
209 VMX_DBG_LOG(DBG_LEVEL_1,
210 "vmx_vmexit_do_cpuid: eip: %lx, input: %lx, out:eax=%x, ebx=%x, ecx=%x, edx=%x\n",
211 eip, input, eax, ebx, ecx, edx);
213 }
215 #define CASE_GET_REG_P(REG, reg) \
216 case REG_ ## REG: reg_p = &(regs->reg); break
218 static void vmx_dr_access (unsigned long exit_qualification, struct xen_regs *regs)
219 {
220 unsigned int reg;
221 unsigned long *reg_p = 0;
222 struct exec_domain *ed = current;
223 unsigned long eip;
225 __vmread(GUEST_EIP, &eip);
227 reg = exit_qualification & DEBUG_REG_ACCESS_NUM;
229 VMX_DBG_LOG(DBG_LEVEL_1,
230 "vmx_dr_access : eip=%lx, reg=%d, exit_qualification = %lx\n",
231 eip, reg, exit_qualification);
233 switch(exit_qualification & DEBUG_REG_ACCESS_REG) {
234 CASE_GET_REG_P(EAX, eax);
235 CASE_GET_REG_P(ECX, ecx);
236 CASE_GET_REG_P(EDX, edx);
237 CASE_GET_REG_P(EBX, ebx);
238 CASE_GET_REG_P(EBP, ebp);
239 CASE_GET_REG_P(ESI, esi);
240 CASE_GET_REG_P(EDI, edi);
241 case REG_ESP:
242 break;
243 default:
244 __vmx_bug(regs);
245 }
247 switch (exit_qualification & DEBUG_REG_ACCESS_TYPE) {
248 case TYPE_MOV_TO_DR:
249 /* don't need to check the range */
250 if (reg != REG_ESP)
251 ed->arch.debugreg[reg] = *reg_p;
252 else {
253 unsigned long value;
254 __vmread(GUEST_ESP, &value);
255 ed->arch.debugreg[reg] = value;
256 }
257 break;
258 case TYPE_MOV_FROM_DR:
259 if (reg != REG_ESP)
260 *reg_p = ed->arch.debugreg[reg];
261 else {
262 __vmwrite(GUEST_ESP, ed->arch.debugreg[reg]);
263 }
264 break;
265 }
266 }
268 /*
269 * Invalidate the TLB for va. Invalidate the shadow page corresponding
270 * the address va.
271 */
272 static void vmx_vmexit_do_invlpg(unsigned long va)
273 {
274 unsigned long eip;
275 struct exec_domain *ed = current;
276 unsigned int index;
278 __vmread(GUEST_EIP, &eip);
280 VMX_DBG_LOG(DBG_LEVEL_VMMU, "vmx_vmexit_do_invlpg:eip=%08lx, va=%08lx\n",
281 eip, va);
283 /*
284 * We do the safest things first, then try to update the shadow
285 * copying from guest
286 */
287 vmx_shadow_invlpg(ed->domain, va);
288 index = (va >> L2_PAGETABLE_SHIFT);
289 ed->arch.guest_pl2e_cache[index] =
290 mk_l2_pgentry(0); /* invalidate pgd cache */
291 }
293 static inline void guest_pl2e_cache_invalidate(struct exec_domain *ed)
294 {
295 /*
296 * Need to optimize this
297 */
298 memset(ed->arch.guest_pl2e_cache, 0, PAGE_SIZE);
299 }
301 inline unsigned long gva_to_gpa(unsigned long gva)
302 {
303 unsigned long gpde, gpte, pfn, index;
304 struct exec_domain *ed = current;
306 __guest_get_l2e(ed, gva, &gpde);
307 index = (gva >> L2_PAGETABLE_SHIFT);
309 pfn = phys_to_machine_mapping(gpde >> PAGE_SHIFT);
311 ed->arch.guest_pl2e_cache[index] =
312 mk_l2_pgentry((pfn << PAGE_SHIFT) | __PAGE_HYPERVISOR);
314 if ( unlikely(__get_user(gpte, (unsigned long *)
315 &linear_pg_table[gva >> PAGE_SHIFT])) )
316 {
317 printk("gva_to_gpa EXIT: read gpte faulted" );
318 return 0;
319 }
321 if ( !(gpte & _PAGE_PRESENT) )
322 {
323 printk("gva_to_gpa - EXIT: gpte not present (%lx)",gpte );
324 return 0;
325 }
327 return (gpte & PAGE_MASK) + (gva & ~PAGE_MASK);
328 }
330 static void vmx_io_instruction(struct xen_regs *regs,
331 unsigned long exit_qualification, unsigned long inst_len)
332 {
333 struct exec_domain *d = current;
334 vcpu_iodata_t *vio;
335 ioreq_t *p;
336 unsigned long addr;
337 unsigned long eip;
339 __vmread(GUEST_EIP, &eip);
341 VMX_DBG_LOG(DBG_LEVEL_1,
342 "vmx_io_instruction: eip=%08lx, exit_qualification = %lx\n",
343 eip, exit_qualification);
345 if (test_bit(6, &exit_qualification))
346 addr = (exit_qualification >> 16) & (0xffff);
347 else
348 addr = regs->edx & 0xffff;
350 if (addr == 0x80) {
351 __update_guest_eip(inst_len);
352 return;
353 }
355 vio = (vcpu_iodata_t *) d->arch.arch_vmx.vmx_platform.shared_page_va;
356 if (vio == 0) {
357 VMX_DBG_LOG(DBG_LEVEL_1, "bad shared page: %lx\n", (unsigned long) vio);
358 domain_crash();
359 }
360 p = &vio->vp_ioreq;
361 p->dir = test_bit(3, &exit_qualification);
362 set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags);
364 p->pdata_valid = 0;
365 p->count = 1;
366 p->size = (exit_qualification & 7) + 1;
368 if (test_bit(4, &exit_qualification)) {
369 unsigned long eflags;
371 __vmread(GUEST_EFLAGS, &eflags);
372 p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0;
373 p->pdata_valid = 1;
374 p->u.pdata = (void *) ((p->dir == IOREQ_WRITE) ?
375 regs->esi
376 : regs->edi);
377 p->u.pdata = (void *) gva_to_gpa(p->u.data);
378 if (test_bit(5, &exit_qualification))
379 p->count = regs->ecx;
380 if ((p->u.data & PAGE_MASK) !=
381 ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) {
382 printk("stringio crosses page boundary!\n");
383 if (p->u.data & (p->size - 1)) {
384 printk("Not aligned I/O!\n");
385 domain_crash();
386 }
387 p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size;
388 } else {
389 __update_guest_eip(inst_len);
390 }
391 } else if (p->dir == IOREQ_WRITE) {
392 p->u.data = regs->eax;
393 __update_guest_eip(inst_len);
394 } else
395 __update_guest_eip(inst_len);
397 p->addr = addr;
398 p->port_mm = 0;
399 p->state = STATE_IOREQ_READY;
400 evtchn_send(IOPACKET_PORT);
401 do_block();
402 }
404 #define CASE_GET_REG(REG, reg) \
405 case REG_ ## REG: value = regs->reg; break
407 /*
408 * Write to control registers
409 */
410 static void mov_to_cr(int gp, int cr, struct xen_regs *regs)
411 {
412 unsigned long value;
413 unsigned long old_cr;
414 struct exec_domain *d = current;
416 switch (gp) {
417 CASE_GET_REG(EAX, eax);
418 CASE_GET_REG(ECX, ecx);
419 CASE_GET_REG(EDX, edx);
420 CASE_GET_REG(EBX, ebx);
421 CASE_GET_REG(EBP, ebp);
422 CASE_GET_REG(ESI, esi);
423 CASE_GET_REG(EDI, edi);
424 case REG_ESP:
425 __vmread(GUEST_ESP, &value);
426 break;
427 default:
428 printk("invalid gp: %d\n", gp);
429 __vmx_bug(regs);
430 }
432 VMX_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, \n", cr, value);
433 VMX_DBG_LOG(DBG_LEVEL_1, "current = %lx, \n", (unsigned long) current);
435 switch(cr) {
436 case 0:
437 {
438 unsigned long old_base_pfn = 0, pfn;
440 /*
441 * CR0:
442 * We don't want to lose PE and PG.
443 */
444 __vmwrite(GUEST_CR0, (value | X86_CR0_PE | X86_CR0_PG));
445 __vmwrite(CR0_READ_SHADOW, value);
447 if (value & (X86_CR0_PE | X86_CR0_PG) &&
448 !test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
449 /*
450 * Enable paging
451 */
452 set_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state);
453 /*
454 * The guest CR3 must be pointing to the guest physical.
455 */
456 if (!(pfn = phys_to_machine_mapping(
457 d->arch.arch_vmx.cpu_cr3 >> PAGE_SHIFT)))
458 {
459 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Invalid CR3 value = %lx\n",
460 d->arch.arch_vmx.cpu_cr3);
461 domain_crash(); /* need to take a clean path */
462 }
463 old_base_pfn = pagetable_val(d->arch.pagetable) >> PAGE_SHIFT;
464 /*
465 * Now mm.pagetable points to machine physical.
466 */
467 d->arch.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
469 VMX_DBG_LOG(DBG_LEVEL_VMMU, "New mm.pagetable = %lx\n",
470 (unsigned long) (pfn << PAGE_SHIFT));
472 shadow_lock(d->domain);
473 shadow_mode_enable(d->domain, SHM_full_32);
474 shadow_unlock(d->domain);
476 __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
477 /*
478 * mm->shadow_table should hold the next CR3 for shadow
479 */
480 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx, pfn = %lx\n",
481 d->arch.arch_vmx.cpu_cr3, pfn);
482 put_page_and_type(&frame_table[old_base_pfn]);
484 }
485 break;
486 }
487 case 3:
488 {
489 unsigned long pfn;
491 /*
492 * If paging is not enabled yet, simply copy the valut to CR3.
493 */
494 if (!test_bit(VMX_CPU_STATE_PG_ENABLED, &d->arch.arch_vmx.cpu_state)) {
495 d->arch.arch_vmx.cpu_cr3 = value;
496 return;
497 }
499 guest_pl2e_cache_invalidate(d);
500 /*
501 * We make a new one if the shadow does not exist.
502 */
503 if (value == d->arch.arch_vmx.cpu_cr3) {
504 /*
505 * This is simple TLB flush, implying the guest has
506 * removed some translation or changed page attributes.
507 * We simply invalidate the shadow.
508 */
509 pfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
510 if ((pfn << PAGE_SHIFT) != pagetable_val(d->arch.pagetable))
511 __vmx_bug(regs);
512 vmx_shadow_clear_state(d->domain);
513 shadow_invalidate(d);
514 } else {
515 /*
516 * If different, make a shadow. Check if the PDBR is valid
517 * first.
518 */
519 VMX_DBG_LOG(DBG_LEVEL_VMMU, "CR3 value = %lx\n", value);
520 if ((value >> PAGE_SHIFT) > d->domain->max_pages)
521 {
522 VMX_DBG_LOG(DBG_LEVEL_VMMU,
523 "Invalid CR3 value=%lx\n", value);
524 domain_crash(); /* need to take a clean path */
525 }
526 pfn = phys_to_machine_mapping(value >> PAGE_SHIFT);
527 vmx_shadow_clear_state(d->domain);
528 d->arch.pagetable = mk_pagetable(pfn << PAGE_SHIFT);
529 shadow_mk_pagetable(d);
530 /*
531 * mm->shadow_table should hold the next CR3 for shadow
532 */
533 d->arch.arch_vmx.cpu_cr3 = value;
534 VMX_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %lx\n",
535 value);
536 __vmwrite(GUEST_CR3, pagetable_val(d->arch.shadow_table));
537 }
538 break;
539 }
540 case 4:
541 /* CR4 */
542 if (value & X86_CR4_PAE)
543 __vmx_bug(regs); /* not implemented */
544 __vmread(CR4_READ_SHADOW, &old_cr);
546 __vmwrite(GUEST_CR4, (value | X86_CR4_VMXE));
547 __vmwrite(CR4_READ_SHADOW, value);
549 /*
550 * Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
551 * all TLB entries except global entries.
552 */
553 if ((old_cr ^ value) & (X86_CR4_PSE | X86_CR4_PGE | X86_CR4_PAE)) {
554 vmx_shadow_clear_state(d->domain);
555 shadow_invalidate(d);
556 guest_pl2e_cache_invalidate(d);
557 }
558 break;
559 default:
560 printk("invalid cr: %d\n", gp);
561 __vmx_bug(regs);
562 }
563 }
565 #define CASE_SET_REG(REG, reg) \
566 case REG_ ## REG: \
567 regs->reg = value; \
568 break
570 /*
571 * Read from control registers. CR0 and CR4 are read from the shadow.
572 */
573 static void mov_from_cr(int cr, int gp, struct xen_regs *regs)
574 {
575 unsigned long value;
576 struct exec_domain *d = current;
578 if (cr != 3)
579 __vmx_bug(regs);
581 value = (unsigned long) d->arch.arch_vmx.cpu_cr3;
582 ASSERT(value);
584 switch (gp) {
585 CASE_SET_REG(EAX, eax);
586 CASE_SET_REG(ECX, ecx);
587 CASE_SET_REG(EDX, edx);
588 CASE_SET_REG(EBX, ebx);
589 CASE_SET_REG(EBP, ebp);
590 CASE_SET_REG(ESI, esi);
591 CASE_SET_REG(EDI, edi);
592 case REG_ESP:
593 __vmwrite(GUEST_ESP, value);
594 regs->esp = value;
595 break;
596 default:
597 printk("invalid gp: %d\n", gp);
598 __vmx_bug(regs);
599 }
601 VMX_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx, \n", cr, value);
602 }
604 static void vmx_cr_access (unsigned long exit_qualification, struct xen_regs *regs)
605 {
606 unsigned int gp, cr;
607 unsigned long value;
609 switch (exit_qualification & CONTROL_REG_ACCESS_TYPE) {
610 case TYPE_MOV_TO_CR:
611 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
612 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
613 mov_to_cr(gp, cr, regs);
614 break;
615 case TYPE_MOV_FROM_CR:
616 gp = exit_qualification & CONTROL_REG_ACCESS_REG;
617 cr = exit_qualification & CONTROL_REG_ACCESS_NUM;
618 mov_from_cr(cr, gp, regs);
619 break;
620 case TYPE_CLTS:
621 __vmread(GUEST_CR0, &value);
622 value &= ~X86_CR0_TS; /* clear TS */
623 __vmwrite(GUEST_CR0, value);
625 __vmread(CR0_READ_SHADOW, &value);
626 value &= ~X86_CR0_TS; /* clear TS */
627 __vmwrite(CR0_READ_SHADOW, value);
628 break;
629 default:
630 __vmx_bug(regs);
631 break;
632 }
633 }
635 static inline void vmx_do_msr_read(struct xen_regs *regs)
636 {
637 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read: ecx=%lx, eax=%lx, edx=%lx",
638 regs->ecx, regs->eax, regs->edx);
640 rdmsr(regs->ecx, regs->eax, regs->edx);
642 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_do_msr_read returns: "
643 "ecx=%lx, eax=%lx, edx=%lx",
644 regs->ecx, regs->eax, regs->edx);
645 }
647 /*
648 * Need to use this exit to rescheule
649 */
650 static inline void vmx_vmexit_do_hlt()
651 {
652 #if VMX_DEBUG
653 unsigned long eip;
654 __vmread(GUEST_EIP, &eip);
655 #endif
656 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_hlt:eip=%08lx\n", eip);
657 __enter_scheduler();
658 }
660 static inline void vmx_vmexit_do_mwait()
661 {
662 #if VMX_DEBUG
663 unsigned long eip;
664 __vmread(GUEST_EIP, &eip);
665 #endif
666 VMX_DBG_LOG(DBG_LEVEL_1, "vmx_vmexit_do_mwait:eip=%08lx\n", eip);
667 __enter_scheduler();
668 }
670 #define BUF_SIZ 256
671 #define MAX_LINE 80
672 char print_buf[BUF_SIZ];
673 static int index;
675 static void vmx_print_line(const char c, struct exec_domain *d)
676 {
678 if (index == MAX_LINE || c == '\n') {
679 if (index == MAX_LINE) {
680 print_buf[index++] = c;
681 }
682 print_buf[index] = '\0';
683 printk("(GUEST: %u) %s\n", d->domain->id, (char *) &print_buf);
684 index = 0;
685 }
686 else
687 print_buf[index++] = c;
688 }
690 #ifdef XEN_DEBUGGER
691 void save_xen_regs(struct xen_regs *regs)
692 {
693 __vmread(GUEST_SS_SELECTOR, &regs->xss);
694 __vmread(GUEST_ESP, &regs->esp);
695 __vmread(GUEST_EFLAGS, &regs->eflags);
696 __vmread(GUEST_CS_SELECTOR, &regs->xcs);
697 __vmread(GUEST_EIP, &regs->eip);
699 __vmread(GUEST_GS_SELECTOR, &regs->xgs);
700 __vmread(GUEST_FS_SELECTOR, &regs->xfs);
701 __vmread(GUEST_ES_SELECTOR, &regs->xes);
702 __vmread(GUEST_DS_SELECTOR, &regs->xds);
703 }
705 void restore_xen_regs(struct xen_regs *regs)
706 {
707 __vmwrite(GUEST_SS_SELECTOR, regs->xss);
708 __vmwrite(GUEST_ESP, regs->esp);
709 __vmwrite(GUEST_EFLAGS, regs->eflags);
710 __vmwrite(GUEST_CS_SELECTOR, regs->xcs);
711 __vmwrite(GUEST_EIP, regs->eip);
713 __vmwrite(GUEST_GS_SELECTOR, regs->xgs);
714 __vmwrite(GUEST_FS_SELECTOR, regs->xfs);
715 __vmwrite(GUEST_ES_SELECTOR, regs->xes);
716 __vmwrite(GUEST_DS_SELECTOR, regs->xds);
717 }
718 #endif
720 asmlinkage void vmx_vmexit_handler(struct xen_regs regs)
721 {
722 unsigned int exit_reason, idtv_info_field;
723 unsigned long exit_qualification, eip, inst_len = 0;
724 struct exec_domain *d = current;
725 int error;
727 if ((error = __vmread(VM_EXIT_REASON, &exit_reason)))
728 __vmx_bug(&regs);
730 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field);
731 if (idtv_info_field & INTR_INFO_VALID_MASK) {
732 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field);
733 if ((idtv_info_field & 0xff) == 14) {
734 unsigned long error_code;
736 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
737 printk("#PG error code: %lx\n", error_code);
738 }
739 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x\n",
740 idtv_info_field);
741 }
743 /* don't bother H/W interrutps */
744 if (exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT &&
745 exit_reason != EXIT_REASON_VMCALL &&
746 exit_reason != EXIT_REASON_IO_INSTRUCTION)
747 VMX_DBG_LOG(DBG_LEVEL_0, "exit reason = %x\n", exit_reason);
749 if (exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) {
750 __vmread(EXIT_QUALIFICATION, &exit_qualification);
751 __vmread(GUEST_EIP, &eip);
752 domain_crash();
753 return;
754 }
756 switch (exit_reason) {
757 case EXIT_REASON_EXCEPTION_NMI:
758 {
759 /*
760 * We don't set the software-interrupt exiting (INT n).
761 * (1) We can get an exception (e.g. #PG) in the guest, or
762 * (2) NMI
763 */
764 int error;
765 unsigned int vector;
766 unsigned long va;
767 unsigned long error_code;
769 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
770 && !(vector & INTR_INFO_VALID_MASK))
771 __vmx_bug(&regs);
772 vector &= 0xff;
774 switch (vector) {
775 #ifdef XEN_DEBUGGER
776 case VECTOR_DB:
777 {
778 save_xen_regs(&regs);
779 pdb_handle_exception(1, &regs, 1);
780 restore_xen_regs(&regs);
781 break;
782 }
783 case VECTOR_BP:
784 {
785 save_xen_regs(&regs);
786 pdb_handle_exception(3, &regs, 1);
787 restore_xen_regs(&regs);
788 break;
789 }
790 #endif
791 case VECTOR_GP:
792 {
793 vmx_do_general_protection_fault(&regs);
794 break;
795 }
796 case VECTOR_PG:
797 {
798 __vmread(EXIT_QUALIFICATION, &va);
799 __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code);
800 VMX_DBG_LOG(DBG_LEVEL_VMMU,
801 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx\n",
802 regs.eax, regs.ebx, regs.ecx, regs.edx, regs.esi,
803 regs.edi);
804 d->arch.arch_vmx.vmx_platform.mpci.inst_decoder_regs = &regs;
806 if (!(error = vmx_do_page_fault(va, error_code))) {
807 /*
808 * Inject #PG using Interruption-Information Fields
809 */
810 unsigned long intr_fields;
812 intr_fields = (INTR_INFO_VALID_MASK |
813 INTR_TYPE_EXCEPTION |
814 INTR_INFO_DELIEVER_CODE_MASK |
815 VECTOR_PG);
816 __vmwrite(VM_ENTRY_INTR_INFO_FIELD, intr_fields);
817 __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code);
818 d->arch.arch_vmx.cpu_cr2 = va;
819 }
820 break;
821 }
822 default:
823 __vmx_bug(&regs);
824 break;
825 }
826 break;
827 }
828 case EXIT_REASON_EXTERNAL_INTERRUPT:
829 {
830 extern int vector_irq[];
831 extern asmlinkage void do_IRQ(struct xen_regs);
832 extern void smp_apic_timer_interrupt(struct xen_regs *);
833 extern void timer_interrupt(int, void *, struct xen_regs *);
834 unsigned int vector;
836 if ((error = __vmread(VM_EXIT_INTR_INFO, &vector))
837 && !(vector & INTR_INFO_VALID_MASK))
838 __vmx_bug(&regs);
840 vector &= 0xff;
841 local_irq_disable();
843 if (vector == LOCAL_TIMER_VECTOR) {
844 smp_apic_timer_interrupt(&regs);
845 } else {
846 regs.entry_vector = (vector == FIRST_DEVICE_VECTOR?
847 0 : vector_irq[vector]);
848 do_IRQ(regs);
849 }
850 break;
851 }
852 case EXIT_REASON_PENDING_INTERRUPT:
853 __vmwrite(CPU_BASED_VM_EXEC_CONTROL,
854 MONITOR_CPU_BASED_EXEC_CONTROLS);
855 vmx_intr_assist(d);
856 break;
857 case EXIT_REASON_TASK_SWITCH:
858 __vmx_bug(&regs);
859 break;
860 case EXIT_REASON_CPUID:
861 __get_instruction_length(inst_len);
862 vmx_vmexit_do_cpuid(regs.eax, &regs);
863 __update_guest_eip(inst_len);
864 break;
865 case EXIT_REASON_HLT:
866 __get_instruction_length(inst_len);
867 __update_guest_eip(inst_len);
868 vmx_vmexit_do_hlt();
869 break;
870 case EXIT_REASON_INVLPG:
871 {
872 unsigned long va;
874 __vmread(EXIT_QUALIFICATION, &va);
875 vmx_vmexit_do_invlpg(va);
876 __get_instruction_length(inst_len);
877 __update_guest_eip(inst_len);
878 break;
879 }
880 case EXIT_REASON_VMCALL:
881 __get_instruction_length(inst_len);
882 __vmread(GUEST_EIP, &eip);
883 __vmread(EXIT_QUALIFICATION, &exit_qualification);
885 vmx_print_line(regs.eax, d); /* provides the current domain */
886 __update_guest_eip(inst_len);
887 break;
888 case EXIT_REASON_CR_ACCESS:
889 {
890 __vmread(GUEST_EIP, &eip);
891 __get_instruction_length(inst_len);
892 __vmread(EXIT_QUALIFICATION, &exit_qualification);
894 VMX_DBG_LOG(DBG_LEVEL_1, "eip = %lx, inst_len =%lx, exit_qualification = %lx\n",
895 eip, inst_len, exit_qualification);
896 vmx_cr_access(exit_qualification, &regs);
897 __update_guest_eip(inst_len);
898 break;
899 }
900 case EXIT_REASON_DR_ACCESS:
901 __vmread(EXIT_QUALIFICATION, &exit_qualification);
902 vmx_dr_access(exit_qualification, &regs);
903 __get_instruction_length(inst_len);
904 __update_guest_eip(inst_len);
905 break;
906 case EXIT_REASON_IO_INSTRUCTION:
907 __vmread(EXIT_QUALIFICATION, &exit_qualification);
908 __get_instruction_length(inst_len);
909 vmx_io_instruction(&regs, exit_qualification, inst_len);
910 break;
911 case EXIT_REASON_MSR_READ:
912 __get_instruction_length(inst_len);
913 vmx_do_msr_read(&regs);
914 __update_guest_eip(inst_len);
915 break;
916 case EXIT_REASON_MSR_WRITE:
917 __vmread(GUEST_EIP, &eip);
918 VMX_DBG_LOG(DBG_LEVEL_1, "MSR_WRITE: eip=%08lx, eax=%08lx, edx=%08lx",
919 eip, regs.eax, regs.edx);
920 /* just ignore this point */
921 __get_instruction_length(inst_len);
922 __update_guest_eip(inst_len);
923 break;
924 case EXIT_REASON_MWAIT_INSTRUCTION:
925 __get_instruction_length(inst_len);
926 __update_guest_eip(inst_len);
927 vmx_vmexit_do_mwait();
928 break;
929 default:
930 __vmx_bug(&regs); /* should not happen */
931 }
933 vmx_intr_assist(d);
934 return;
935 }
937 asmlinkage void load_cr2(void)
938 {
939 struct exec_domain *d = current;
941 local_irq_disable();
942 asm volatile("movl %0,%%cr2": :"r" (d->arch.arch_vmx.cpu_cr2));
943 }
945 #endif /* CONFIG_VMX */