debuggers.hg

view xen/arch/x86/hvm/svm/svm.c @ 20924:6ade83cb21ca

xentrace: Trace p2m events

Add more tracing to aid in debugging ballooning / PoD:
* Nested page faults for EPT/NPT systems
* set_p2m_enry
* Decrease reservation (for ballooning)
* PoD populate, zero reclaim, superpage splinter

Signed-off-by: George Dunlap <george.dunlap@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 03 09:35:23 2010 +0000 (2010-02-03)
parents 508f457aa439
children e5e1e1532b97
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/hypercall.h>
28 #include <xen/domain_page.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/paging.h>
32 #include <asm/p2m.h>
33 #include <asm/mem_sharing.h>
34 #include <asm/regs.h>
35 #include <asm/cpufeature.h>
36 #include <asm/processor.h>
37 #include <asm/types.h>
38 #include <asm/debugreg.h>
39 #include <asm/msr.h>
40 #include <asm/spinlock.h>
41 #include <asm/hvm/emulate.h>
42 #include <asm/hvm/hvm.h>
43 #include <asm/hvm/support.h>
44 #include <asm/hvm/io.h>
45 #include <asm/hvm/svm/asid.h>
46 #include <asm/hvm/svm/svm.h>
47 #include <asm/hvm/svm/vmcb.h>
48 #include <asm/hvm/svm/emulate.h>
49 #include <asm/hvm/svm/intr.h>
50 #include <asm/x86_emulate.h>
51 #include <public/sched.h>
52 #include <asm/hvm/vpt.h>
53 #include <asm/hvm/trace.h>
54 #include <asm/hap.h>
55 #include <asm/debugger.h>
57 u32 svm_feature_flags;
59 #define set_segment_register(name, value) \
60 asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
62 static struct hvm_function_table svm_function_table;
64 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
66 /* va of hardware host save area */
67 static void *hsa[NR_CPUS] __read_mostly;
69 /* vmcb used for extended host state */
70 static void *root_vmcb[NR_CPUS] __read_mostly;
72 static void inline __update_guest_eip(
73 struct cpu_user_regs *regs, unsigned int inst_len)
74 {
75 struct vcpu *curr = current;
77 if ( unlikely(inst_len == 0) )
78 return;
80 if ( unlikely(inst_len > 15) )
81 {
82 gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len);
83 domain_crash(curr->domain);
84 return;
85 }
87 ASSERT(regs == guest_cpu_user_regs());
89 regs->eip += inst_len;
90 regs->eflags &= ~X86_EFLAGS_RF;
92 curr->arch.hvm_svm.vmcb->interrupt_shadow = 0;
94 if ( regs->eflags & X86_EFLAGS_TF )
95 hvm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
96 }
98 static void svm_cpu_down(void)
99 {
100 write_efer(read_efer() & ~EFER_SVME);
101 }
103 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
104 {
105 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
106 u32 ecx = regs->ecx;
108 HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,
109 ecx, msr_content);
111 switch ( ecx )
112 {
113 case MSR_EFER:
114 if ( hvm_set_efer(msr_content) )
115 return HNDL_exception_raised;
116 break;
118 case MSR_IA32_MC4_MISC: /* Threshold register */
119 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
120 /*
121 * MCA/MCE: Threshold register is reported to be locked, so we ignore
122 * all write accesses. This behaviour matches real HW, so guests should
123 * have no problem with this.
124 */
125 break;
127 default:
128 return HNDL_unhandled;
129 }
131 return HNDL_done;
132 }
134 static void svm_save_dr(struct vcpu *v)
135 {
136 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
138 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
139 return;
141 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
142 v->arch.hvm_vcpu.flag_dr_dirty = 0;
143 v->arch.hvm_svm.vmcb->dr_intercepts = ~0u;
145 v->arch.guest_context.debugreg[0] = read_debugreg(0);
146 v->arch.guest_context.debugreg[1] = read_debugreg(1);
147 v->arch.guest_context.debugreg[2] = read_debugreg(2);
148 v->arch.guest_context.debugreg[3] = read_debugreg(3);
149 v->arch.guest_context.debugreg[6] = vmcb->dr6;
150 v->arch.guest_context.debugreg[7] = vmcb->dr7;
151 }
153 static void __restore_debug_registers(struct vcpu *v)
154 {
155 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
157 if ( v->arch.hvm_vcpu.flag_dr_dirty )
158 return;
160 v->arch.hvm_vcpu.flag_dr_dirty = 1;
161 vmcb->dr_intercepts = 0;
163 write_debugreg(0, v->arch.guest_context.debugreg[0]);
164 write_debugreg(1, v->arch.guest_context.debugreg[1]);
165 write_debugreg(2, v->arch.guest_context.debugreg[2]);
166 write_debugreg(3, v->arch.guest_context.debugreg[3]);
167 vmcb->dr6 = v->arch.guest_context.debugreg[6];
168 vmcb->dr7 = v->arch.guest_context.debugreg[7];
169 }
171 /*
172 * DR7 is saved and restored on every vmexit. Other debug registers only
173 * need to be restored if their value is going to affect execution -- i.e.,
174 * if one of the breakpoints is enabled. So mask out all bits that don't
175 * enable some breakpoint functionality.
176 */
177 static void svm_restore_dr(struct vcpu *v)
178 {
179 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
180 __restore_debug_registers(v);
181 }
183 static int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
184 {
185 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
187 c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
188 c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
189 c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
190 c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
192 c->sysenter_cs = v->arch.hvm_svm.guest_sysenter_cs;
193 c->sysenter_esp = v->arch.hvm_svm.guest_sysenter_esp;
194 c->sysenter_eip = v->arch.hvm_svm.guest_sysenter_eip;
196 c->pending_event = 0;
197 c->error_code = 0;
198 if ( vmcb->eventinj.fields.v &&
199 hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
200 vmcb->eventinj.fields.vector) )
201 {
202 c->pending_event = (uint32_t)vmcb->eventinj.bytes;
203 c->error_code = vmcb->eventinj.fields.errorcode;
204 }
206 return 1;
207 }
209 static int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
210 {
211 unsigned long mfn = 0;
212 p2m_type_t p2mt;
213 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
215 if ( c->pending_valid &&
216 ((c->pending_type == 1) || (c->pending_type > 6) ||
217 (c->pending_reserved != 0)) )
218 {
219 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
220 c->pending_event);
221 return -EINVAL;
222 }
224 if ( !paging_mode_hap(v->domain) )
225 {
226 if ( c->cr0 & X86_CR0_PG )
227 {
228 mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
229 if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
230 {
231 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
232 c->cr3);
233 return -EINVAL;
234 }
235 }
237 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
238 put_page(pagetable_get_page(v->arch.guest_table));
240 v->arch.guest_table = pagetable_from_pfn(mfn);
241 }
243 v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
244 v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
245 v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
246 v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
247 hvm_update_guest_cr(v, 0);
248 hvm_update_guest_cr(v, 2);
249 hvm_update_guest_cr(v, 4);
251 v->arch.hvm_svm.guest_sysenter_cs = c->sysenter_cs;
252 v->arch.hvm_svm.guest_sysenter_esp = c->sysenter_esp;
253 v->arch.hvm_svm.guest_sysenter_eip = c->sysenter_eip;
255 if ( paging_mode_hap(v->domain) )
256 {
257 vmcb->np_enable = 1;
258 vmcb->g_pat = MSR_IA32_CR_PAT_RESET; /* guest PAT */
259 vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
260 }
262 if ( c->pending_valid )
263 {
264 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
265 c->pending_event, c->error_code);
267 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
268 {
269 vmcb->eventinj.bytes = c->pending_event;
270 vmcb->eventinj.fields.errorcode = c->error_code;
271 }
272 }
274 paging_update_paging_modes(v);
276 return 0;
277 }
280 static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
281 {
282 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
284 data->shadow_gs = vmcb->kerngsbase;
285 data->msr_lstar = vmcb->lstar;
286 data->msr_star = vmcb->star;
287 data->msr_cstar = vmcb->cstar;
288 data->msr_syscall_mask = vmcb->sfmask;
289 data->msr_efer = v->arch.hvm_vcpu.guest_efer;
290 data->msr_flags = -1ULL;
292 data->tsc = hvm_get_guest_tsc(v);
293 }
296 static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
297 {
298 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
300 vmcb->kerngsbase = data->shadow_gs;
301 vmcb->lstar = data->msr_lstar;
302 vmcb->star = data->msr_star;
303 vmcb->cstar = data->msr_cstar;
304 vmcb->sfmask = data->msr_syscall_mask;
305 v->arch.hvm_vcpu.guest_efer = data->msr_efer;
306 hvm_update_guest_efer(v);
308 hvm_set_guest_tsc(v, data->tsc);
309 }
311 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
312 {
313 svm_save_cpu_state(v, ctxt);
314 svm_vmcb_save(v, ctxt);
315 }
317 static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
318 {
319 svm_load_cpu_state(v, ctxt);
320 if (svm_vmcb_restore(v, ctxt)) {
321 printk("svm_vmcb restore failed!\n");
322 domain_crash(v->domain);
323 return -EINVAL;
324 }
326 return 0;
327 }
329 static void svm_fpu_enter(struct vcpu *v)
330 {
331 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
333 setup_fpu(v);
334 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
335 }
337 static void svm_fpu_leave(struct vcpu *v)
338 {
339 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
341 ASSERT(!v->fpu_dirtied);
342 ASSERT(read_cr0() & X86_CR0_TS);
344 /*
345 * If the guest does not have TS enabled then we must cause and handle an
346 * exception on first use of the FPU. If the guest *does* have TS enabled
347 * then this is not necessary: no FPU activity can occur until the guest
348 * clears CR0.TS, and we will initialise the FPU when that happens.
349 */
350 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
351 {
352 v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
353 vmcb->cr0 |= X86_CR0_TS;
354 }
355 }
357 static unsigned int svm_get_interrupt_shadow(struct vcpu *v)
358 {
359 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
360 unsigned int intr_shadow = 0;
362 if ( vmcb->interrupt_shadow )
363 intr_shadow |= HVM_INTR_SHADOW_MOV_SS | HVM_INTR_SHADOW_STI;
365 if ( vmcb->general1_intercepts & GENERAL1_INTERCEPT_IRET )
366 intr_shadow |= HVM_INTR_SHADOW_NMI;
368 return intr_shadow;
369 }
371 static void svm_set_interrupt_shadow(struct vcpu *v, unsigned int intr_shadow)
372 {
373 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
375 vmcb->interrupt_shadow =
376 !!(intr_shadow & (HVM_INTR_SHADOW_MOV_SS|HVM_INTR_SHADOW_STI));
378 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_IRET;
379 if ( intr_shadow & HVM_INTR_SHADOW_NMI )
380 vmcb->general1_intercepts |= GENERAL1_INTERCEPT_IRET;
381 }
383 static int svm_guest_x86_mode(struct vcpu *v)
384 {
385 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
387 if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
388 return 0;
389 if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )
390 return 1;
391 if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
392 return 8;
393 return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
394 }
396 static void svm_update_host_cr3(struct vcpu *v)
397 {
398 /* SVM doesn't have a HOST_CR3 equivalent to update. */
399 }
401 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
402 {
403 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
405 switch ( cr )
406 {
407 case 0: {
408 unsigned long hw_cr0_mask = 0;
410 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
411 {
412 if ( v != current )
413 hw_cr0_mask |= X86_CR0_TS;
414 else if ( vmcb->cr0 & X86_CR0_TS )
415 svm_fpu_enter(v);
416 }
418 vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
419 if ( !paging_mode_hap(v->domain) )
420 vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
421 break;
422 }
423 case 2:
424 vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
425 break;
426 case 3:
427 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
428 hvm_asid_flush_vcpu(v);
429 break;
430 case 4:
431 vmcb->cr4 = HVM_CR4_HOST_MASK;
432 if ( paging_mode_hap(v->domain) )
433 vmcb->cr4 &= ~X86_CR4_PAE;
434 vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
435 break;
436 default:
437 BUG();
438 }
439 }
441 static void svm_update_guest_efer(struct vcpu *v)
442 {
443 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
444 bool_t lma = !!(v->arch.hvm_vcpu.guest_efer & EFER_LMA);
446 vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;
447 if ( lma )
448 vmcb->efer |= EFER_LME;
450 /*
451 * In legacy mode (EFER.LMA=0) we natively support SYSENTER/SYSEXIT with
452 * no need for MSR intercepts. When EFER.LMA=1 we must trap and emulate.
453 */
454 svm_intercept_msr(v, MSR_IA32_SYSENTER_CS, lma);
455 svm_intercept_msr(v, MSR_IA32_SYSENTER_ESP, lma);
456 svm_intercept_msr(v, MSR_IA32_SYSENTER_EIP, lma);
457 }
459 static void svm_sync_vmcb(struct vcpu *v)
460 {
461 struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
463 if ( arch_svm->vmcb_in_sync )
464 return;
466 arch_svm->vmcb_in_sync = 1;
468 svm_vmsave(arch_svm->vmcb);
469 }
471 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
472 struct segment_register *reg)
473 {
474 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
476 ASSERT((v == current) || !vcpu_runnable(v));
478 switch ( seg )
479 {
480 case x86_seg_cs:
481 memcpy(reg, &vmcb->cs, sizeof(*reg));
482 reg->attr.fields.g = reg->limit > 0xFFFFF;
483 break;
484 case x86_seg_ds:
485 memcpy(reg, &vmcb->ds, sizeof(*reg));
486 if ( reg->attr.fields.type != 0 )
487 reg->attr.fields.type |= 0x1;
488 break;
489 case x86_seg_es:
490 memcpy(reg, &vmcb->es, sizeof(*reg));
491 if ( reg->attr.fields.type != 0 )
492 reg->attr.fields.type |= 0x1;
493 break;
494 case x86_seg_fs:
495 svm_sync_vmcb(v);
496 memcpy(reg, &vmcb->fs, sizeof(*reg));
497 if ( reg->attr.fields.type != 0 )
498 reg->attr.fields.type |= 0x1;
499 break;
500 case x86_seg_gs:
501 svm_sync_vmcb(v);
502 memcpy(reg, &vmcb->gs, sizeof(*reg));
503 if ( reg->attr.fields.type != 0 )
504 reg->attr.fields.type |= 0x1;
505 break;
506 case x86_seg_ss:
507 memcpy(reg, &vmcb->ss, sizeof(*reg));
508 reg->attr.fields.dpl = vmcb->cpl;
509 if ( reg->attr.fields.type == 0 )
510 reg->attr.fields.db = 0;
511 break;
512 case x86_seg_tr:
513 svm_sync_vmcb(v);
514 memcpy(reg, &vmcb->tr, sizeof(*reg));
515 reg->attr.fields.type |= 0x2;
516 break;
517 case x86_seg_gdtr:
518 memcpy(reg, &vmcb->gdtr, sizeof(*reg));
519 break;
520 case x86_seg_idtr:
521 memcpy(reg, &vmcb->idtr, sizeof(*reg));
522 break;
523 case x86_seg_ldtr:
524 svm_sync_vmcb(v);
525 memcpy(reg, &vmcb->ldtr, sizeof(*reg));
526 break;
527 default:
528 BUG();
529 }
530 }
532 static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg,
533 struct segment_register *reg)
534 {
535 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
536 int sync = 0;
538 ASSERT((v == current) || !vcpu_runnable(v));
540 switch ( seg )
541 {
542 case x86_seg_fs:
543 case x86_seg_gs:
544 case x86_seg_tr:
545 case x86_seg_ldtr:
546 sync = (v == current);
547 break;
548 default:
549 break;
550 }
552 if ( sync )
553 svm_sync_vmcb(v);
555 switch ( seg )
556 {
557 case x86_seg_cs:
558 memcpy(&vmcb->cs, reg, sizeof(*reg));
559 break;
560 case x86_seg_ds:
561 memcpy(&vmcb->ds, reg, sizeof(*reg));
562 break;
563 case x86_seg_es:
564 memcpy(&vmcb->es, reg, sizeof(*reg));
565 break;
566 case x86_seg_fs:
567 memcpy(&vmcb->fs, reg, sizeof(*reg));
568 break;
569 case x86_seg_gs:
570 memcpy(&vmcb->gs, reg, sizeof(*reg));
571 break;
572 case x86_seg_ss:
573 memcpy(&vmcb->ss, reg, sizeof(*reg));
574 vmcb->cpl = vmcb->ss.attr.fields.dpl;
575 break;
576 case x86_seg_tr:
577 memcpy(&vmcb->tr, reg, sizeof(*reg));
578 break;
579 case x86_seg_gdtr:
580 vmcb->gdtr.base = reg->base;
581 vmcb->gdtr.limit = (uint16_t)reg->limit;
582 break;
583 case x86_seg_idtr:
584 vmcb->idtr.base = reg->base;
585 vmcb->idtr.limit = (uint16_t)reg->limit;
586 break;
587 case x86_seg_ldtr:
588 memcpy(&vmcb->ldtr, reg, sizeof(*reg));
589 break;
590 default:
591 BUG();
592 }
594 if ( sync )
595 svm_vmload(vmcb);
596 }
598 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
599 {
600 v->arch.hvm_svm.vmcb->tsc_offset = offset;
601 }
603 static void svm_set_rdtsc_exiting(struct vcpu *v, bool_t enable)
604 {
605 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
606 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_RDTSC;
607 if ( enable )
608 vmcb->general1_intercepts |= GENERAL1_INTERCEPT_RDTSC;
609 }
611 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
612 {
613 char *p;
614 int i;
616 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
617 {
618 p = (char *)(hypercall_page + (i * 32));
619 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
620 *(u32 *)(p + 1) = i;
621 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
622 *(u8 *)(p + 6) = 0x01;
623 *(u8 *)(p + 7) = 0xd9;
624 *(u8 *)(p + 8) = 0xc3; /* ret */
625 }
627 /* Don't support HYPERVISOR_iret at the moment */
628 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
629 }
631 static void svm_ctxt_switch_from(struct vcpu *v)
632 {
633 int cpu = smp_processor_id();
635 svm_fpu_leave(v);
637 svm_save_dr(v);
639 svm_sync_vmcb(v);
640 svm_vmload(root_vmcb[cpu]);
642 #ifdef __x86_64__
643 /* Resume use of ISTs now that the host TR is reinstated. */
644 idt_tables[cpu][TRAP_double_fault].a |= IST_DF << 32;
645 idt_tables[cpu][TRAP_nmi].a |= IST_NMI << 32;
646 idt_tables[cpu][TRAP_machine_check].a |= IST_MCE << 32;
647 #endif
648 }
650 static void svm_ctxt_switch_to(struct vcpu *v)
651 {
652 int cpu = smp_processor_id();
654 #ifdef __x86_64__
655 /*
656 * This is required, because VMRUN does consistency check
657 * and some of the DOM0 selectors are pointing to
658 * invalid GDT locations, and cause AMD processors
659 * to shutdown.
660 */
661 set_segment_register(ds, 0);
662 set_segment_register(es, 0);
663 set_segment_register(ss, 0);
665 /*
666 * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
667 * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
668 */
669 idt_tables[cpu][TRAP_double_fault].a &= ~(7UL << 32);
670 idt_tables[cpu][TRAP_nmi].a &= ~(7UL << 32);
671 idt_tables[cpu][TRAP_machine_check].a &= ~(7UL << 32);
672 #endif
674 svm_restore_dr(v);
676 svm_vmsave(root_vmcb[cpu]);
677 svm_vmload(v->arch.hvm_svm.vmcb);
679 if ( cpu_has_rdtscp )
680 wrmsrl(MSR_TSC_AUX, hvm_msr_tsc_aux(v));
681 }
683 static void svm_do_resume(struct vcpu *v)
684 {
685 bool_t debug_state = v->domain->debugger_attached;
687 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
688 {
689 uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
690 v->arch.hvm_vcpu.debug_state_latch = debug_state;
691 if ( debug_state )
692 v->arch.hvm_svm.vmcb->exception_intercepts |= mask;
693 else
694 v->arch.hvm_svm.vmcb->exception_intercepts &= ~mask;
695 }
697 if ( v->arch.hvm_svm.launch_core != smp_processor_id() )
698 {
699 v->arch.hvm_svm.launch_core = smp_processor_id();
700 hvm_migrate_timers(v);
702 /* Migrating to another ASID domain. Request a new ASID. */
703 hvm_asid_flush_vcpu(v);
704 }
706 /* Reflect the vlapic's TPR in the hardware vtpr */
707 v->arch.hvm_svm.vmcb->vintr.fields.tpr =
708 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
710 hvm_do_resume(v);
711 reset_stack_and_jump(svm_asm_do_resume);
712 }
714 static int svm_domain_initialise(struct domain *d)
715 {
716 return 0;
717 }
719 static void svm_domain_destroy(struct domain *d)
720 {
721 }
723 static int svm_vcpu_initialise(struct vcpu *v)
724 {
725 int rc;
727 v->arch.schedule_tail = svm_do_resume;
728 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
729 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
731 v->arch.hvm_svm.launch_core = -1;
733 if ( (rc = svm_create_vmcb(v)) != 0 )
734 {
735 dprintk(XENLOG_WARNING,
736 "Failed to create VMCB for vcpu %d: err=%d.\n",
737 v->vcpu_id, rc);
738 return rc;
739 }
741 return 0;
742 }
744 static void svm_vcpu_destroy(struct vcpu *v)
745 {
746 svm_destroy_vmcb(v);
747 }
749 static void svm_inject_exception(
750 unsigned int trapnr, int errcode, unsigned long cr2)
751 {
752 struct vcpu *curr = current;
753 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
754 eventinj_t event = vmcb->eventinj;
756 switch ( trapnr )
757 {
758 case TRAP_debug:
759 if ( guest_cpu_user_regs()->eflags & X86_EFLAGS_TF )
760 {
761 __restore_debug_registers(curr);
762 vmcb->dr6 |= 0x4000;
763 }
764 case TRAP_int3:
765 if ( curr->domain->debugger_attached )
766 {
767 /* Debug/Int3: Trap to debugger. */
768 domain_pause_for_debugger();
769 return;
770 }
771 }
773 if ( unlikely(event.fields.v) &&
774 (event.fields.type == X86_EVENTTYPE_HW_EXCEPTION) )
775 {
776 trapnr = hvm_combine_hw_exceptions(event.fields.vector, trapnr);
777 if ( trapnr == TRAP_double_fault )
778 errcode = 0;
779 }
781 event.bytes = 0;
782 event.fields.v = 1;
783 event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
784 event.fields.vector = trapnr;
785 event.fields.ev = (errcode != HVM_DELIVER_NO_ERROR_CODE);
786 event.fields.errorcode = errcode;
788 vmcb->eventinj = event;
790 if ( trapnr == TRAP_page_fault )
791 {
792 vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
793 HVMTRACE_LONG_2D(PF_INJECT, errcode, TRC_PAR_LONG(cr2));
794 }
795 else
796 {
797 HVMTRACE_2D(INJ_EXC, trapnr, errcode);
798 }
799 }
801 static int svm_event_pending(struct vcpu *v)
802 {
803 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
804 return vmcb->eventinj.fields.v;
805 }
807 static int svm_do_pmu_interrupt(struct cpu_user_regs *regs)
808 {
809 return 0;
810 }
812 static int svm_cpu_prepare(unsigned int cpu)
813 {
814 if ( ((hsa[cpu] == NULL) &&
815 ((hsa[cpu] = alloc_host_save_area()) == NULL)) ||
816 ((root_vmcb[cpu] == NULL) &&
817 ((root_vmcb[cpu] = alloc_vmcb()) == NULL)) )
818 return -ENOMEM;
819 return 0;
820 }
822 static int svm_cpu_up(struct cpuinfo_x86 *c)
823 {
824 u32 eax, edx, phys_hsa_lo, phys_hsa_hi;
825 u64 phys_hsa;
826 int cpu = smp_processor_id();
828 /* Check whether SVM feature is disabled in BIOS */
829 rdmsr(MSR_K8_VM_CR, eax, edx);
830 if ( eax & K8_VMCR_SVME_DISABLE )
831 {
832 printk("CPU%d: AMD SVM Extension is disabled in BIOS.\n", cpu);
833 return 0;
834 }
836 if ( svm_cpu_prepare(cpu) != 0 )
837 return 0;
839 write_efer(read_efer() | EFER_SVME);
841 /* Initialize the HSA for this core. */
842 phys_hsa = (u64)virt_to_maddr(hsa[cpu]);
843 phys_hsa_lo = (u32)phys_hsa;
844 phys_hsa_hi = (u32)(phys_hsa >> 32);
845 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
847 /* Initialize core's ASID handling. */
848 svm_asid_init(c);
850 return 1;
851 }
853 void start_svm(struct cpuinfo_x86 *c)
854 {
855 static bool_t bootstrapped;
857 if ( test_and_set_bool(bootstrapped) )
858 {
859 if ( hvm_enabled && !svm_cpu_up(c) )
860 {
861 printk("SVM: FATAL: failed to initialise CPU%d!\n",
862 smp_processor_id());
863 BUG();
864 }
865 return;
866 }
868 /* Xen does not fill x86_capability words except 0. */
869 boot_cpu_data.x86_capability[5] = cpuid_ecx(0x80000001);
871 if ( !test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability) )
872 return;
874 if ( !svm_cpu_up(c) )
875 {
876 printk("SVM: failed to initialise.\n");
877 return;
878 }
880 setup_vmcb_dump();
882 svm_feature_flags = ((cpuid_eax(0x80000000) >= 0x8000000A) ?
883 cpuid_edx(0x8000000A) : 0);
885 svm_function_table.hap_supported = cpu_has_svm_npt;
887 hvm_enable(&svm_function_table);
888 }
890 static void svm_do_nested_pgfault(paddr_t gpa)
891 {
892 unsigned long gfn = gpa >> PAGE_SHIFT;
893 mfn_t mfn;
894 p2m_type_t p2mt;
896 if ( tb_init_done )
897 {
898 struct {
899 uint64_t gpa;
900 uint64_t mfn;
901 u32 qualification;
902 u32 p2mt;
903 } _d;
905 _d.gpa = gpa;
906 _d.qualification = 0;
907 _d.mfn = mfn_x(gfn_to_mfn_query(current->domain, gfn, &_d.p2mt));
909 __trace_var(TRC_HVM_NPF, 0, sizeof(_d), (unsigned char *)&_d);
910 }
912 if ( hvm_hap_nested_page_fault(gfn) )
913 return;
915 /* Everything else is an error. */
916 mfn = gfn_to_mfn_type_current(gfn, &p2mt, p2m_guest);
917 gdprintk(XENLOG_ERR, "SVM violation gpa %#"PRIpaddr", mfn %#lx, type %i\n",
918 gpa, mfn_x(mfn), p2mt);
919 domain_crash(current->domain);
920 }
922 static void svm_fpu_dirty_intercept(void)
923 {
924 struct vcpu *curr = current;
925 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
927 svm_fpu_enter(curr);
929 if ( !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
930 vmcb->cr0 &= ~X86_CR0_TS;
931 }
933 #define bitmaskof(idx) (1U << ((idx) & 31))
934 static void svm_cpuid_intercept(
935 unsigned int *eax, unsigned int *ebx,
936 unsigned int *ecx, unsigned int *edx)
937 {
938 unsigned int input = *eax;
939 struct vcpu *v = current;
941 hvm_cpuid(input, eax, ebx, ecx, edx);
943 if ( input == 0x80000001 )
944 {
945 /* Fix up VLAPIC details. */
946 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
947 __clear_bit(X86_FEATURE_APIC & 31, edx);
948 }
950 HVMTRACE_5D (CPUID, input, *eax, *ebx, *ecx, *edx);
951 }
953 static void svm_vmexit_do_cpuid(struct cpu_user_regs *regs)
954 {
955 unsigned int eax, ebx, ecx, edx, inst_len;
957 if ( (inst_len = __get_instruction_length(current, INSTR_CPUID)) == 0 )
958 return;
960 eax = regs->eax;
961 ebx = regs->ebx;
962 ecx = regs->ecx;
963 edx = regs->edx;
965 svm_cpuid_intercept(&eax, &ebx, &ecx, &edx);
967 regs->eax = eax;
968 regs->ebx = ebx;
969 regs->ecx = ecx;
970 regs->edx = edx;
972 __update_guest_eip(regs, inst_len);
973 }
975 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
976 {
977 HVMTRACE_0D(DR_WRITE);
978 __restore_debug_registers(v);
979 }
981 static int svm_msr_read_intercept(struct cpu_user_regs *regs)
982 {
983 u64 msr_content = 0;
984 u32 ecx = regs->ecx, eax, edx;
985 struct vcpu *v = current;
986 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
988 switch ( ecx )
989 {
990 case MSR_EFER:
991 msr_content = v->arch.hvm_vcpu.guest_efer;
992 break;
994 case MSR_IA32_SYSENTER_CS:
995 msr_content = v->arch.hvm_svm.guest_sysenter_cs;
996 break;
997 case MSR_IA32_SYSENTER_ESP:
998 msr_content = v->arch.hvm_svm.guest_sysenter_esp;
999 break;
1000 case MSR_IA32_SYSENTER_EIP:
1001 msr_content = v->arch.hvm_svm.guest_sysenter_eip;
1002 break;
1004 case MSR_IA32_MC4_MISC: /* Threshold register */
1005 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
1006 /*
1007 * MCA/MCE: We report that the threshold register is unavailable
1008 * for OS use (locked by the BIOS).
1009 */
1010 msr_content = 1ULL << 61; /* MC4_MISC.Locked */
1011 break;
1013 case MSR_IA32_EBC_FREQUENCY_ID:
1014 /*
1015 * This Intel-only register may be accessed if this HVM guest
1016 * has been migrated from an Intel host. The value zero is not
1017 * particularly meaningful, but at least avoids the guest crashing!
1018 */
1019 msr_content = 0;
1020 break;
1022 case MSR_K8_VM_HSAVE_PA:
1023 goto gpf;
1025 case MSR_IA32_DEBUGCTLMSR:
1026 msr_content = vmcb->debugctlmsr;
1027 break;
1029 case MSR_IA32_LASTBRANCHFROMIP:
1030 msr_content = vmcb->lastbranchfromip;
1031 break;
1033 case MSR_IA32_LASTBRANCHTOIP:
1034 msr_content = vmcb->lastbranchtoip;
1035 break;
1037 case MSR_IA32_LASTINTFROMIP:
1038 msr_content = vmcb->lastintfromip;
1039 break;
1041 case MSR_IA32_LASTINTTOIP:
1042 msr_content = vmcb->lastinttoip;
1043 break;
1045 default:
1047 if ( rdmsr_viridian_regs(ecx, &msr_content) ||
1048 rdmsr_hypervisor_regs(ecx, &msr_content) )
1049 break;
1051 if ( rdmsr_safe(ecx, eax, edx) == 0 )
1053 msr_content = ((uint64_t)edx << 32) | eax;
1054 break;
1057 goto gpf;
1060 regs->eax = (uint32_t)msr_content;
1061 regs->edx = (uint32_t)(msr_content >> 32);
1063 HVMTRACE_3D (MSR_READ, ecx, regs->eax, regs->edx);
1064 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
1065 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
1066 return X86EMUL_OKAY;
1068 gpf:
1069 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1070 return X86EMUL_EXCEPTION;
1073 static int svm_msr_write_intercept(struct cpu_user_regs *regs)
1075 u64 msr_content = 0;
1076 u32 ecx = regs->ecx;
1077 struct vcpu *v = current;
1078 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1080 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1082 HVMTRACE_3D (MSR_WRITE, ecx, regs->eax, regs->edx);
1084 switch ( ecx )
1086 case MSR_K8_VM_HSAVE_PA:
1087 goto gpf;
1089 case MSR_IA32_SYSENTER_CS:
1090 v->arch.hvm_svm.guest_sysenter_cs = msr_content;
1091 break;
1092 case MSR_IA32_SYSENTER_ESP:
1093 v->arch.hvm_svm.guest_sysenter_esp = msr_content;
1094 break;
1095 case MSR_IA32_SYSENTER_EIP:
1096 v->arch.hvm_svm.guest_sysenter_eip = msr_content;
1097 break;
1099 case MSR_IA32_DEBUGCTLMSR:
1100 vmcb->debugctlmsr = msr_content;
1101 if ( !msr_content || !cpu_has_svm_lbrv )
1102 break;
1103 vmcb->lbr_control.fields.enable = 1;
1104 svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
1105 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
1106 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
1107 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
1108 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
1109 break;
1111 case MSR_IA32_LASTBRANCHFROMIP:
1112 vmcb->lastbranchfromip = msr_content;
1113 break;
1115 case MSR_IA32_LASTBRANCHTOIP:
1116 vmcb->lastbranchtoip = msr_content;
1117 break;
1119 case MSR_IA32_LASTINTFROMIP:
1120 vmcb->lastintfromip = msr_content;
1121 break;
1123 case MSR_IA32_LASTINTTOIP:
1124 vmcb->lastinttoip = msr_content;
1125 break;
1127 default:
1128 if ( wrmsr_viridian_regs(ecx, msr_content) )
1129 break;
1131 switch ( long_mode_do_msr_write(regs) )
1133 case HNDL_unhandled:
1134 wrmsr_hypervisor_regs(ecx, msr_content);
1135 break;
1136 case HNDL_exception_raised:
1137 return X86EMUL_EXCEPTION;
1138 case HNDL_done:
1139 break;
1141 break;
1144 return X86EMUL_OKAY;
1146 gpf:
1147 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1148 return X86EMUL_EXCEPTION;
1151 static void svm_do_msr_access(struct cpu_user_regs *regs)
1153 int rc, inst_len;
1154 struct vcpu *v = current;
1155 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1157 if ( vmcb->exitinfo1 == 0 )
1159 if ( (inst_len = __get_instruction_length(v, INSTR_RDMSR)) == 0 )
1160 return;
1161 rc = hvm_msr_read_intercept(regs);
1163 else
1165 if ( (inst_len = __get_instruction_length(v, INSTR_WRMSR)) == 0 )
1166 return;
1167 rc = hvm_msr_write_intercept(regs);
1170 if ( rc == X86EMUL_OKAY )
1171 __update_guest_eip(regs, inst_len);
1174 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
1175 struct cpu_user_regs *regs)
1177 unsigned int inst_len;
1179 if ( (inst_len = __get_instruction_length(current, INSTR_HLT)) == 0 )
1180 return;
1181 __update_guest_eip(regs, inst_len);
1183 hvm_hlt(regs->eflags);
1186 static void svm_vmexit_do_rdtsc(struct cpu_user_regs *regs)
1188 unsigned int inst_len;
1190 if ( (inst_len = __get_instruction_length(current, INSTR_RDTSC)) == 0 )
1191 return;
1192 __update_guest_eip(regs, inst_len);
1194 hvm_rdtsc_intercept(regs);
1197 static void svm_vmexit_ud_intercept(struct cpu_user_regs *regs)
1199 struct hvm_emulate_ctxt ctxt;
1200 int rc;
1202 hvm_emulate_prepare(&ctxt, regs);
1204 rc = hvm_emulate_one(&ctxt);
1206 switch ( rc )
1208 case X86EMUL_UNHANDLEABLE:
1209 hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
1210 break;
1211 case X86EMUL_EXCEPTION:
1212 if ( ctxt.exn_pending )
1213 hvm_inject_exception(ctxt.exn_vector, ctxt.exn_error_code, 0);
1214 /* fall through */
1215 default:
1216 hvm_emulate_writeback(&ctxt);
1217 break;
1221 static void wbinvd_ipi(void *info)
1223 wbinvd();
1226 static void svm_wbinvd_intercept(void)
1228 if ( has_arch_pdevs(current->domain) )
1229 on_each_cpu(wbinvd_ipi, NULL, 1);
1232 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs)
1234 enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD };
1235 int inst_len;
1237 inst_len = __get_instruction_length_from_list(
1238 current, list, ARRAY_SIZE(list));
1239 if ( inst_len == 0 )
1240 return;
1242 svm_wbinvd_intercept();
1244 __update_guest_eip(regs, inst_len);
1247 static void svm_invlpg_intercept(unsigned long vaddr)
1249 struct vcpu *curr = current;
1250 HVMTRACE_LONG_2D(INVLPG, 0, TRC_PAR_LONG(vaddr));
1251 paging_invlpg(curr, vaddr);
1252 svm_asid_g_invlpg(curr, vaddr);
1255 static struct hvm_function_table __read_mostly svm_function_table = {
1256 .name = "SVM",
1257 .cpu_prepare = svm_cpu_prepare,
1258 .cpu_down = svm_cpu_down,
1259 .domain_initialise = svm_domain_initialise,
1260 .domain_destroy = svm_domain_destroy,
1261 .vcpu_initialise = svm_vcpu_initialise,
1262 .vcpu_destroy = svm_vcpu_destroy,
1263 .save_cpu_ctxt = svm_save_vmcb_ctxt,
1264 .load_cpu_ctxt = svm_load_vmcb_ctxt,
1265 .get_interrupt_shadow = svm_get_interrupt_shadow,
1266 .set_interrupt_shadow = svm_set_interrupt_shadow,
1267 .guest_x86_mode = svm_guest_x86_mode,
1268 .get_segment_register = svm_get_segment_register,
1269 .set_segment_register = svm_set_segment_register,
1270 .update_host_cr3 = svm_update_host_cr3,
1271 .update_guest_cr = svm_update_guest_cr,
1272 .update_guest_efer = svm_update_guest_efer,
1273 .set_tsc_offset = svm_set_tsc_offset,
1274 .inject_exception = svm_inject_exception,
1275 .init_hypercall_page = svm_init_hypercall_page,
1276 .event_pending = svm_event_pending,
1277 .do_pmu_interrupt = svm_do_pmu_interrupt,
1278 .cpuid_intercept = svm_cpuid_intercept,
1279 .wbinvd_intercept = svm_wbinvd_intercept,
1280 .fpu_dirty_intercept = svm_fpu_dirty_intercept,
1281 .msr_read_intercept = svm_msr_read_intercept,
1282 .msr_write_intercept = svm_msr_write_intercept,
1283 .invlpg_intercept = svm_invlpg_intercept,
1284 .set_rdtsc_exiting = svm_set_rdtsc_exiting
1285 };
1287 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
1289 unsigned int exit_reason;
1290 struct vcpu *v = current;
1291 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1292 eventinj_t eventinj;
1293 int inst_len, rc;
1295 if ( paging_mode_hap(v->domain) )
1296 v->arch.hvm_vcpu.guest_cr[3] = v->arch.hvm_vcpu.hw_cr[3] = vmcb->cr3;
1298 /*
1299 * Before doing anything else, we need to sync up the VLAPIC's TPR with
1300 * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
1301 * because we update the vTPR on MMIO writes to the TPR.
1302 * NB. We need to preserve the low bits of the TPR to make checked builds
1303 * of Windows work, even though they don't actually do anything.
1304 */
1305 vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
1306 ((vmcb->vintr.fields.tpr & 0x0F) << 4) |
1307 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0x0F));
1309 exit_reason = vmcb->exitcode;
1311 if ( hvm_long_mode_enabled(v) )
1312 HVMTRACE_ND(VMEXIT64, 1/*cycles*/, 3, exit_reason,
1313 (uint32_t)regs->eip, (uint32_t)((uint64_t)regs->eip >> 32),
1314 0, 0, 0);
1315 else
1316 HVMTRACE_ND(VMEXIT, 1/*cycles*/, 2, exit_reason,
1317 (uint32_t)regs->eip,
1318 0, 0, 0, 0);
1320 if ( unlikely(exit_reason == VMEXIT_INVALID) )
1322 svm_dump_vmcb(__func__, vmcb);
1323 goto exit_and_crash;
1326 perfc_incra(svmexits, exit_reason);
1328 hvm_maybe_deassert_evtchn_irq();
1330 /* Event delivery caused this intercept? Queue for redelivery. */
1331 eventinj = vmcb->exitintinfo;
1332 if ( unlikely(eventinj.fields.v) &&
1333 hvm_event_needs_reinjection(eventinj.fields.type,
1334 eventinj.fields.vector) )
1335 vmcb->eventinj = eventinj;
1337 switch ( exit_reason )
1339 case VMEXIT_INTR:
1340 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1341 HVMTRACE_0D(INTR);
1342 break;
1344 case VMEXIT_NMI:
1345 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1346 HVMTRACE_0D(NMI);
1347 break;
1349 case VMEXIT_SMI:
1350 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1351 HVMTRACE_0D(SMI);
1352 break;
1354 case VMEXIT_EXCEPTION_DB:
1355 if ( !v->domain->debugger_attached )
1356 goto exit_and_crash;
1357 domain_pause_for_debugger();
1358 break;
1360 case VMEXIT_EXCEPTION_BP:
1361 if ( !v->domain->debugger_attached )
1362 goto exit_and_crash;
1363 /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */
1364 if ( (inst_len = __get_instruction_length(v, INSTR_INT3)) == 0 )
1365 break;
1366 __update_guest_eip(regs, inst_len);
1367 #ifdef XEN_GDBSX_CONFIG
1368 current->arch.gdbsx_vcpu_event = TRAP_int3;
1369 #endif
1370 domain_pause_for_debugger();
1371 break;
1373 case VMEXIT_EXCEPTION_NM:
1374 svm_fpu_dirty_intercept();
1375 break;
1377 case VMEXIT_EXCEPTION_PF: {
1378 unsigned long va;
1379 va = vmcb->exitinfo2;
1380 regs->error_code = vmcb->exitinfo1;
1381 HVM_DBG_LOG(DBG_LEVEL_VMMU,
1382 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
1383 (unsigned long)regs->eax, (unsigned long)regs->ebx,
1384 (unsigned long)regs->ecx, (unsigned long)regs->edx,
1385 (unsigned long)regs->esi, (unsigned long)regs->edi);
1387 if ( paging_fault(va, regs) )
1389 if ( trace_will_trace_event(TRC_SHADOW) )
1390 break;
1391 if ( hvm_long_mode_enabled(v) )
1392 HVMTRACE_LONG_2D(PF_XEN, regs->error_code, TRC_PAR_LONG(va));
1393 else
1394 HVMTRACE_2D(PF_XEN, regs->error_code, va);
1395 break;
1398 hvm_inject_exception(TRAP_page_fault, regs->error_code, va);
1399 break;
1402 case VMEXIT_EXCEPTION_UD:
1403 svm_vmexit_ud_intercept(regs);
1404 break;
1406 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
1407 case VMEXIT_EXCEPTION_MC:
1408 HVMTRACE_0D(MCE);
1409 break;
1411 case VMEXIT_VINTR:
1412 vmcb->vintr.fields.irq = 0;
1413 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
1414 break;
1416 case VMEXIT_INVD:
1417 case VMEXIT_WBINVD:
1418 svm_vmexit_do_invalidate_cache(regs);
1419 break;
1421 case VMEXIT_TASK_SWITCH: {
1422 enum hvm_task_switch_reason reason;
1423 int32_t errcode = -1;
1424 if ( (vmcb->exitinfo2 >> 36) & 1 )
1425 reason = TSW_iret;
1426 else if ( (vmcb->exitinfo2 >> 38) & 1 )
1427 reason = TSW_jmp;
1428 else
1429 reason = TSW_call_or_int;
1430 if ( (vmcb->exitinfo2 >> 44) & 1 )
1431 errcode = (uint32_t)vmcb->exitinfo2;
1433 /*
1434 * Some processors set the EXITINTINFO field when the task switch
1435 * is caused by a task gate in the IDT. In this case we will be
1436 * emulating the event injection, so we do not want the processor
1437 * to re-inject the original event!
1438 */
1439 vmcb->eventinj.bytes = 0;
1441 hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode);
1442 break;
1445 case VMEXIT_CPUID:
1446 svm_vmexit_do_cpuid(regs);
1447 break;
1449 case VMEXIT_HLT:
1450 svm_vmexit_do_hlt(vmcb, regs);
1451 break;
1453 case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
1454 case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
1455 case VMEXIT_INVLPG:
1456 case VMEXIT_INVLPGA:
1457 case VMEXIT_IOIO:
1458 if ( !handle_mmio() )
1459 hvm_inject_exception(TRAP_gp_fault, 0, 0);
1460 break;
1462 case VMEXIT_VMMCALL:
1463 if ( (inst_len = __get_instruction_length(v, INSTR_VMCALL)) == 0 )
1464 break;
1465 HVMTRACE_1D(VMMCALL, regs->eax);
1466 rc = hvm_do_hypercall(regs);
1467 if ( rc != HVM_HCALL_preempted )
1469 __update_guest_eip(regs, inst_len);
1470 if ( rc == HVM_HCALL_invalidate )
1471 send_invalidate_req();
1473 break;
1475 case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
1476 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
1477 svm_dr_access(v, regs);
1478 break;
1480 case VMEXIT_MSR:
1481 svm_do_msr_access(regs);
1482 break;
1484 case VMEXIT_SHUTDOWN:
1485 hvm_triple_fault();
1486 break;
1488 case VMEXIT_RDTSCP:
1489 regs->ecx = hvm_msr_tsc_aux(v);
1490 /* fall through */
1491 case VMEXIT_RDTSC:
1492 svm_vmexit_do_rdtsc(regs);
1493 break;
1495 case VMEXIT_MONITOR:
1496 case VMEXIT_MWAIT:
1497 case VMEXIT_VMRUN:
1498 case VMEXIT_VMLOAD:
1499 case VMEXIT_VMSAVE:
1500 case VMEXIT_STGI:
1501 case VMEXIT_CLGI:
1502 case VMEXIT_SKINIT:
1503 hvm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
1504 break;
1506 case VMEXIT_NPF:
1507 perfc_incra(svmexits, VMEXIT_NPF_PERFC);
1508 regs->error_code = vmcb->exitinfo1;
1509 svm_do_nested_pgfault(vmcb->exitinfo2);
1510 break;
1512 case VMEXIT_IRET:
1513 /*
1514 * IRET clears the NMI mask. However because we clear the mask
1515 * /before/ executing IRET, we set the interrupt shadow to prevent
1516 * a pending NMI from being injected immediately. This will work
1517 * perfectly unless the IRET instruction faults: in that case we
1518 * may inject an NMI before the NMI handler's IRET instruction is
1519 * retired.
1520 */
1521 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_IRET;
1522 vmcb->interrupt_shadow = 1;
1523 break;
1525 case VMEXIT_PAUSE:
1526 /*
1527 * The guest is running a contended spinlock and we've detected it.
1528 * Do something useful, like reschedule the guest
1529 */
1530 perfc_incr(pauseloop_exits);
1531 do_sched_op_compat(SCHEDOP_yield, 0);
1532 break;
1534 default:
1535 exit_and_crash:
1536 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
1537 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
1538 exit_reason,
1539 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
1540 domain_crash(v->domain);
1541 break;
1544 /* The exit may have updated the TPR: reflect this in the hardware vtpr */
1545 vmcb->vintr.fields.tpr =
1546 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
1549 asmlinkage void svm_trace_vmentry(void)
1551 HVMTRACE_ND (VMENTRY, 1/*cycles*/, 0, 0, 0, 0, 0, 0, 0);
1554 /*
1555 * Local variables:
1556 * mode: C
1557 * c-set-style: "BSD"
1558 * c-basic-offset: 4
1559 * tab-width: 4
1560 * indent-tabs-mode: nil
1561 * End:
1562 */