debuggers.hg

view xen/arch/x86/hvm/svm/svm.c @ 16746:a30aabe3c84a

svm: Properly mask unsupported bits in CPUID_80000001_ECX.
Signed-off-by: Keir Fraser <keir.fraser@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Sat Jan 12 09:29:38 2008 +0000 (2008-01-12)
parents e4fd457a3dd5
children 6ea3db7ae24d
line source
1 /*
2 * svm.c: handling SVM architecture-related VM exits
3 * Copyright (c) 2004, Intel Corporation.
4 * Copyright (c) 2005-2007, Advanced Micro Devices, Inc.
5 *
6 * This program is free software; you can redistribute it and/or modify it
7 * under the terms and conditions of the GNU General Public License,
8 * version 2, as published by the Free Software Foundation.
9 *
10 * This program is distributed in the hope it will be useful, but WITHOUT
11 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
13 * more details.
14 *
15 * You should have received a copy of the GNU General Public License along with
16 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
17 * Place - Suite 330, Boston, MA 02111-1307 USA.
18 */
20 #include <xen/config.h>
21 #include <xen/init.h>
22 #include <xen/lib.h>
23 #include <xen/trace.h>
24 #include <xen/sched.h>
25 #include <xen/irq.h>
26 #include <xen/softirq.h>
27 #include <xen/hypercall.h>
28 #include <xen/domain_page.h>
29 #include <asm/current.h>
30 #include <asm/io.h>
31 #include <asm/paging.h>
32 #include <asm/p2m.h>
33 #include <asm/regs.h>
34 #include <asm/cpufeature.h>
35 #include <asm/processor.h>
36 #include <asm/types.h>
37 #include <asm/debugreg.h>
38 #include <asm/msr.h>
39 #include <asm/spinlock.h>
40 #include <asm/hvm/hvm.h>
41 #include <asm/hvm/support.h>
42 #include <asm/hvm/io.h>
43 #include <asm/hvm/svm/asid.h>
44 #include <asm/hvm/svm/svm.h>
45 #include <asm/hvm/svm/vmcb.h>
46 #include <asm/hvm/svm/emulate.h>
47 #include <asm/hvm/svm/intr.h>
48 #include <asm/x86_emulate.h>
49 #include <public/sched.h>
50 #include <asm/hvm/vpt.h>
51 #include <asm/hvm/trace.h>
52 #include <asm/hap.h>
54 u32 svm_feature_flags;
56 #define set_segment_register(name, value) \
57 asm volatile ( "movw %%ax ,%%" STR(name) "" : : "a" (value) )
59 enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
61 int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip,
62 int inst_len);
63 asmlinkage void do_IRQ(struct cpu_user_regs *);
65 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr);
66 static void svm_update_guest_efer(struct vcpu *v);
67 static void svm_inject_exception(
68 unsigned int trapnr, int errcode, unsigned long cr2);
70 /* va of hardware host save area */
71 static void *hsa[NR_CPUS] __read_mostly;
73 /* vmcb used for extended host state */
74 static void *root_vmcb[NR_CPUS] __read_mostly;
76 static void inline __update_guest_eip(
77 struct cpu_user_regs *regs, unsigned int inst_len)
78 {
79 struct vcpu *curr = current;
81 if ( unlikely((inst_len == 0) || (inst_len > 15)) )
82 {
83 gdprintk(XENLOG_ERR, "Bad instruction length %u\n", inst_len);
84 domain_crash(curr->domain);
85 return;
86 }
88 ASSERT(regs == guest_cpu_user_regs());
90 regs->eip += inst_len;
91 regs->eflags &= ~X86_EFLAGS_RF;
93 curr->arch.hvm_svm.vmcb->interrupt_shadow = 0;
95 if ( regs->eflags & X86_EFLAGS_TF )
96 svm_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
97 }
99 static void svm_cpu_down(void)
100 {
101 write_efer(read_efer() & ~EFER_SVME);
102 }
104 static enum handler_return long_mode_do_msr_write(struct cpu_user_regs *regs)
105 {
106 u64 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
107 u32 ecx = regs->ecx;
109 HVM_DBG_LOG(DBG_LEVEL_0, "msr %x msr_content %"PRIx64,
110 ecx, msr_content);
112 switch ( ecx )
113 {
114 case MSR_EFER:
115 if ( !hvm_set_efer(msr_content) )
116 return HNDL_exception_raised;
117 break;
119 case MSR_IA32_MC4_MISC: /* Threshold register */
120 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
121 /*
122 * MCA/MCE: Threshold register is reported to be locked, so we ignore
123 * all write accesses. This behaviour matches real HW, so guests should
124 * have no problem with this.
125 */
126 break;
128 default:
129 return HNDL_unhandled;
130 }
132 return HNDL_done;
133 }
135 static void svm_save_dr(struct vcpu *v)
136 {
137 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
139 if ( !v->arch.hvm_vcpu.flag_dr_dirty )
140 return;
142 /* Clear the DR dirty flag and re-enable intercepts for DR accesses. */
143 v->arch.hvm_vcpu.flag_dr_dirty = 0;
144 v->arch.hvm_svm.vmcb->dr_intercepts = ~0u;
146 v->arch.guest_context.debugreg[0] = read_debugreg(0);
147 v->arch.guest_context.debugreg[1] = read_debugreg(1);
148 v->arch.guest_context.debugreg[2] = read_debugreg(2);
149 v->arch.guest_context.debugreg[3] = read_debugreg(3);
150 v->arch.guest_context.debugreg[6] = vmcb->dr6;
151 v->arch.guest_context.debugreg[7] = vmcb->dr7;
152 }
154 static void __restore_debug_registers(struct vcpu *v)
155 {
156 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
158 if ( v->arch.hvm_vcpu.flag_dr_dirty )
159 return;
161 v->arch.hvm_vcpu.flag_dr_dirty = 1;
162 vmcb->dr_intercepts = 0;
164 write_debugreg(0, v->arch.guest_context.debugreg[0]);
165 write_debugreg(1, v->arch.guest_context.debugreg[1]);
166 write_debugreg(2, v->arch.guest_context.debugreg[2]);
167 write_debugreg(3, v->arch.guest_context.debugreg[3]);
168 vmcb->dr6 = v->arch.guest_context.debugreg[6];
169 vmcb->dr7 = v->arch.guest_context.debugreg[7];
170 }
172 /*
173 * DR7 is saved and restored on every vmexit. Other debug registers only
174 * need to be restored if their value is going to affect execution -- i.e.,
175 * if one of the breakpoints is enabled. So mask out all bits that don't
176 * enable some breakpoint functionality.
177 */
178 static void svm_restore_dr(struct vcpu *v)
179 {
180 if ( unlikely(v->arch.guest_context.debugreg[7] & DR7_ACTIVE_MASK) )
181 __restore_debug_registers(v);
182 }
184 int svm_vmcb_save(struct vcpu *v, struct hvm_hw_cpu *c)
185 {
186 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
188 c->cr0 = v->arch.hvm_vcpu.guest_cr[0];
189 c->cr2 = v->arch.hvm_vcpu.guest_cr[2];
190 c->cr3 = v->arch.hvm_vcpu.guest_cr[3];
191 c->cr4 = v->arch.hvm_vcpu.guest_cr[4];
193 c->idtr_limit = vmcb->idtr.limit;
194 c->idtr_base = vmcb->idtr.base;
196 c->gdtr_limit = vmcb->gdtr.limit;
197 c->gdtr_base = vmcb->gdtr.base;
199 c->cs_sel = vmcb->cs.sel;
200 c->cs_limit = vmcb->cs.limit;
201 c->cs_base = vmcb->cs.base;
202 c->cs_arbytes = vmcb->cs.attr.bytes;
204 c->ds_sel = vmcb->ds.sel;
205 c->ds_limit = vmcb->ds.limit;
206 c->ds_base = vmcb->ds.base;
207 c->ds_arbytes = vmcb->ds.attr.bytes;
209 c->es_sel = vmcb->es.sel;
210 c->es_limit = vmcb->es.limit;
211 c->es_base = vmcb->es.base;
212 c->es_arbytes = vmcb->es.attr.bytes;
214 c->ss_sel = vmcb->ss.sel;
215 c->ss_limit = vmcb->ss.limit;
216 c->ss_base = vmcb->ss.base;
217 c->ss_arbytes = vmcb->ss.attr.bytes;
219 c->fs_sel = vmcb->fs.sel;
220 c->fs_limit = vmcb->fs.limit;
221 c->fs_base = vmcb->fs.base;
222 c->fs_arbytes = vmcb->fs.attr.bytes;
224 c->gs_sel = vmcb->gs.sel;
225 c->gs_limit = vmcb->gs.limit;
226 c->gs_base = vmcb->gs.base;
227 c->gs_arbytes = vmcb->gs.attr.bytes;
229 c->tr_sel = vmcb->tr.sel;
230 c->tr_limit = vmcb->tr.limit;
231 c->tr_base = vmcb->tr.base;
232 c->tr_arbytes = vmcb->tr.attr.bytes;
234 c->ldtr_sel = vmcb->ldtr.sel;
235 c->ldtr_limit = vmcb->ldtr.limit;
236 c->ldtr_base = vmcb->ldtr.base;
237 c->ldtr_arbytes = vmcb->ldtr.attr.bytes;
239 c->sysenter_cs = vmcb->sysenter_cs;
240 c->sysenter_esp = vmcb->sysenter_esp;
241 c->sysenter_eip = vmcb->sysenter_eip;
243 c->pending_event = 0;
244 c->error_code = 0;
245 if ( vmcb->eventinj.fields.v &&
246 hvm_event_needs_reinjection(vmcb->eventinj.fields.type,
247 vmcb->eventinj.fields.vector) )
248 {
249 c->pending_event = (uint32_t)vmcb->eventinj.bytes;
250 c->error_code = vmcb->eventinj.fields.errorcode;
251 }
253 return 1;
254 }
257 int svm_vmcb_restore(struct vcpu *v, struct hvm_hw_cpu *c)
258 {
259 unsigned long mfn = 0;
260 p2m_type_t p2mt;
261 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
263 if ( c->pending_valid &&
264 ((c->pending_type == 1) || (c->pending_type > 6) ||
265 (c->pending_reserved != 0)) )
266 {
267 gdprintk(XENLOG_ERR, "Invalid pending event 0x%"PRIx32".\n",
268 c->pending_event);
269 return -EINVAL;
270 }
272 if ( !paging_mode_hap(v->domain) )
273 {
274 if ( c->cr0 & X86_CR0_PG )
275 {
276 mfn = mfn_x(gfn_to_mfn(v->domain, c->cr3 >> PAGE_SHIFT, &p2mt));
277 if ( !p2m_is_ram(p2mt) || !get_page(mfn_to_page(mfn), v->domain) )
278 {
279 gdprintk(XENLOG_ERR, "Invalid CR3 value=0x%"PRIx64"\n",
280 c->cr3);
281 return -EINVAL;
282 }
283 }
285 if ( v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PG )
286 put_page(pagetable_get_page(v->arch.guest_table));
288 v->arch.guest_table = pagetable_from_pfn(mfn);
289 }
291 v->arch.hvm_vcpu.guest_cr[0] = c->cr0 | X86_CR0_ET;
292 v->arch.hvm_vcpu.guest_cr[2] = c->cr2;
293 v->arch.hvm_vcpu.guest_cr[3] = c->cr3;
294 v->arch.hvm_vcpu.guest_cr[4] = c->cr4;
295 svm_update_guest_cr(v, 0);
296 svm_update_guest_cr(v, 2);
297 svm_update_guest_cr(v, 4);
299 #ifdef HVM_DEBUG_SUSPEND
300 printk("%s: cr3=0x%"PRIx64", cr0=0x%"PRIx64", cr4=0x%"PRIx64".\n",
301 __func__, c->cr3, c->cr0, c->cr4);
302 #endif
304 vmcb->idtr.limit = c->idtr_limit;
305 vmcb->idtr.base = c->idtr_base;
307 vmcb->gdtr.limit = c->gdtr_limit;
308 vmcb->gdtr.base = c->gdtr_base;
310 vmcb->cs.sel = c->cs_sel;
311 vmcb->cs.limit = c->cs_limit;
312 vmcb->cs.base = c->cs_base;
313 vmcb->cs.attr.bytes = c->cs_arbytes;
315 vmcb->ds.sel = c->ds_sel;
316 vmcb->ds.limit = c->ds_limit;
317 vmcb->ds.base = c->ds_base;
318 vmcb->ds.attr.bytes = c->ds_arbytes;
320 vmcb->es.sel = c->es_sel;
321 vmcb->es.limit = c->es_limit;
322 vmcb->es.base = c->es_base;
323 vmcb->es.attr.bytes = c->es_arbytes;
325 vmcb->ss.sel = c->ss_sel;
326 vmcb->ss.limit = c->ss_limit;
327 vmcb->ss.base = c->ss_base;
328 vmcb->ss.attr.bytes = c->ss_arbytes;
329 vmcb->cpl = vmcb->ss.attr.fields.dpl;
331 vmcb->fs.sel = c->fs_sel;
332 vmcb->fs.limit = c->fs_limit;
333 vmcb->fs.base = c->fs_base;
334 vmcb->fs.attr.bytes = c->fs_arbytes;
336 vmcb->gs.sel = c->gs_sel;
337 vmcb->gs.limit = c->gs_limit;
338 vmcb->gs.base = c->gs_base;
339 vmcb->gs.attr.bytes = c->gs_arbytes;
341 vmcb->tr.sel = c->tr_sel;
342 vmcb->tr.limit = c->tr_limit;
343 vmcb->tr.base = c->tr_base;
344 vmcb->tr.attr.bytes = c->tr_arbytes;
346 vmcb->ldtr.sel = c->ldtr_sel;
347 vmcb->ldtr.limit = c->ldtr_limit;
348 vmcb->ldtr.base = c->ldtr_base;
349 vmcb->ldtr.attr.bytes = c->ldtr_arbytes;
351 vmcb->sysenter_cs = c->sysenter_cs;
352 vmcb->sysenter_esp = c->sysenter_esp;
353 vmcb->sysenter_eip = c->sysenter_eip;
355 if ( paging_mode_hap(v->domain) )
356 {
357 vmcb->np_enable = 1;
358 vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
359 vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
360 }
362 if ( c->pending_valid )
363 {
364 gdprintk(XENLOG_INFO, "Re-injecting 0x%"PRIx32", 0x%"PRIx32"\n",
365 c->pending_event, c->error_code);
367 if ( hvm_event_needs_reinjection(c->pending_type, c->pending_vector) )
368 {
369 vmcb->eventinj.bytes = c->pending_event;
370 vmcb->eventinj.fields.errorcode = c->error_code;
371 }
372 }
374 paging_update_paging_modes(v);
376 return 0;
377 }
380 static void svm_save_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
381 {
382 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
384 data->shadow_gs = vmcb->kerngsbase;
385 data->msr_lstar = vmcb->lstar;
386 data->msr_star = vmcb->star;
387 data->msr_cstar = vmcb->cstar;
388 data->msr_syscall_mask = vmcb->sfmask;
389 data->msr_efer = v->arch.hvm_vcpu.guest_efer;
390 data->msr_flags = -1ULL;
392 data->tsc = hvm_get_guest_time(v);
393 }
396 static void svm_load_cpu_state(struct vcpu *v, struct hvm_hw_cpu *data)
397 {
398 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
400 vmcb->kerngsbase = data->shadow_gs;
401 vmcb->lstar = data->msr_lstar;
402 vmcb->star = data->msr_star;
403 vmcb->cstar = data->msr_cstar;
404 vmcb->sfmask = data->msr_syscall_mask;
405 v->arch.hvm_vcpu.guest_efer = data->msr_efer;
406 svm_update_guest_efer(v);
408 hvm_set_guest_time(v, data->tsc);
409 }
411 static void svm_save_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
412 {
413 svm_save_cpu_state(v, ctxt);
414 svm_vmcb_save(v, ctxt);
415 }
417 static int svm_load_vmcb_ctxt(struct vcpu *v, struct hvm_hw_cpu *ctxt)
418 {
419 svm_load_cpu_state(v, ctxt);
420 if (svm_vmcb_restore(v, ctxt)) {
421 printk("svm_vmcb restore failed!\n");
422 domain_crash(v->domain);
423 return -EINVAL;
424 }
426 return 0;
427 }
429 static enum hvm_intblk svm_interrupt_blocked(
430 struct vcpu *v, struct hvm_intack intack)
431 {
432 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
434 if ( vmcb->interrupt_shadow )
435 return hvm_intblk_shadow;
437 if ( intack.source == hvm_intsrc_nmi )
438 return hvm_intblk_none;
440 ASSERT((intack.source == hvm_intsrc_pic) ||
441 (intack.source == hvm_intsrc_lapic));
443 if ( !(guest_cpu_user_regs()->eflags & X86_EFLAGS_IF) )
444 return hvm_intblk_rflags_ie;
446 return hvm_intblk_none;
447 }
449 static int svm_guest_x86_mode(struct vcpu *v)
450 {
451 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
453 if ( unlikely(!(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE)) )
454 return 0;
455 if ( unlikely(guest_cpu_user_regs()->eflags & X86_EFLAGS_VM) )
456 return 1;
457 if ( hvm_long_mode_enabled(v) && likely(vmcb->cs.attr.fields.l) )
458 return 8;
459 return (likely(vmcb->cs.attr.fields.db) ? 4 : 2);
460 }
462 static void svm_update_host_cr3(struct vcpu *v)
463 {
464 /* SVM doesn't have a HOST_CR3 equivalent to update. */
465 }
467 static void svm_update_guest_cr(struct vcpu *v, unsigned int cr)
468 {
469 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
471 switch ( cr )
472 {
473 case 0:
474 /* TS cleared? Then initialise FPU now. */
475 if ( (v == current) && !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) &&
476 (vmcb->cr0 & X86_CR0_TS) )
477 {
478 setup_fpu(v);
479 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
480 }
482 vmcb->cr0 = v->arch.hvm_vcpu.guest_cr[0];
483 if ( !paging_mode_hap(v->domain) )
484 vmcb->cr0 |= X86_CR0_PG | X86_CR0_WP;
485 break;
486 case 2:
487 vmcb->cr2 = v->arch.hvm_vcpu.guest_cr[2];
488 break;
489 case 3:
490 vmcb->cr3 = v->arch.hvm_vcpu.hw_cr[3];
491 svm_asid_inv_asid(v);
492 break;
493 case 4:
494 vmcb->cr4 = HVM_CR4_HOST_MASK;
495 if ( paging_mode_hap(v->domain) )
496 vmcb->cr4 &= ~X86_CR4_PAE;
497 vmcb->cr4 |= v->arch.hvm_vcpu.guest_cr[4];
498 break;
499 default:
500 BUG();
501 }
502 }
504 static void svm_update_guest_efer(struct vcpu *v)
505 {
506 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
508 vmcb->efer = (v->arch.hvm_vcpu.guest_efer | EFER_SVME) & ~EFER_LME;
509 if ( vmcb->efer & EFER_LMA )
510 vmcb->efer |= EFER_LME;
511 }
513 static void svm_flush_guest_tlbs(void)
514 {
515 /* Roll over the CPU's ASID generation, so it gets a clean TLB when we
516 * next VMRUN. (If ASIDs are disabled, the whole TLB is flushed on
517 * VMRUN anyway). */
518 svm_asid_inc_generation();
519 }
521 static void svm_sync_vmcb(struct vcpu *v)
522 {
523 struct arch_svm_struct *arch_svm = &v->arch.hvm_svm;
525 if ( arch_svm->vmcb_in_sync )
526 return;
528 arch_svm->vmcb_in_sync = 1;
530 svm_vmsave(arch_svm->vmcb);
531 }
533 static unsigned long svm_get_segment_base(struct vcpu *v, enum x86_segment seg)
534 {
535 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
536 int long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
538 switch ( seg )
539 {
540 case x86_seg_cs: return long_mode ? 0 : vmcb->cs.base;
541 case x86_seg_ds: return long_mode ? 0 : vmcb->ds.base;
542 case x86_seg_es: return long_mode ? 0 : vmcb->es.base;
543 case x86_seg_fs: svm_sync_vmcb(v); return vmcb->fs.base;
544 case x86_seg_gs: svm_sync_vmcb(v); return vmcb->gs.base;
545 case x86_seg_ss: return long_mode ? 0 : vmcb->ss.base;
546 case x86_seg_tr: svm_sync_vmcb(v); return vmcb->tr.base;
547 case x86_seg_gdtr: return vmcb->gdtr.base;
548 case x86_seg_idtr: return vmcb->idtr.base;
549 case x86_seg_ldtr: svm_sync_vmcb(v); return vmcb->ldtr.base;
550 }
551 BUG();
552 return 0;
553 }
555 static void svm_get_segment_register(struct vcpu *v, enum x86_segment seg,
556 struct segment_register *reg)
557 {
558 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
560 ASSERT(v == current);
562 switch ( seg )
563 {
564 case x86_seg_cs:
565 memcpy(reg, &vmcb->cs, sizeof(*reg));
566 break;
567 case x86_seg_ds:
568 memcpy(reg, &vmcb->ds, sizeof(*reg));
569 break;
570 case x86_seg_es:
571 memcpy(reg, &vmcb->es, sizeof(*reg));
572 break;
573 case x86_seg_fs:
574 svm_sync_vmcb(v);
575 memcpy(reg, &vmcb->fs, sizeof(*reg));
576 break;
577 case x86_seg_gs:
578 svm_sync_vmcb(v);
579 memcpy(reg, &vmcb->gs, sizeof(*reg));
580 break;
581 case x86_seg_ss:
582 memcpy(reg, &vmcb->ss, sizeof(*reg));
583 break;
584 case x86_seg_tr:
585 svm_sync_vmcb(v);
586 memcpy(reg, &vmcb->tr, sizeof(*reg));
587 break;
588 case x86_seg_gdtr:
589 memcpy(reg, &vmcb->gdtr, sizeof(*reg));
590 break;
591 case x86_seg_idtr:
592 memcpy(reg, &vmcb->idtr, sizeof(*reg));
593 break;
594 case x86_seg_ldtr:
595 svm_sync_vmcb(v);
596 memcpy(reg, &vmcb->ldtr, sizeof(*reg));
597 break;
598 default:
599 BUG();
600 }
601 }
603 static void svm_set_segment_register(struct vcpu *v, enum x86_segment seg,
604 struct segment_register *reg)
605 {
606 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
607 int sync = 0;
609 ASSERT((v == current) || !vcpu_runnable(v));
611 switch ( seg )
612 {
613 case x86_seg_fs:
614 case x86_seg_gs:
615 case x86_seg_tr:
616 case x86_seg_ldtr:
617 sync = (v == current);
618 break;
619 default:
620 break;
621 }
623 if ( sync )
624 svm_sync_vmcb(v);
626 switch ( seg )
627 {
628 case x86_seg_cs:
629 memcpy(&vmcb->cs, reg, sizeof(*reg));
630 break;
631 case x86_seg_ds:
632 memcpy(&vmcb->ds, reg, sizeof(*reg));
633 break;
634 case x86_seg_es:
635 memcpy(&vmcb->es, reg, sizeof(*reg));
636 break;
637 case x86_seg_fs:
638 memcpy(&vmcb->fs, reg, sizeof(*reg));
639 break;
640 case x86_seg_gs:
641 memcpy(&vmcb->gs, reg, sizeof(*reg));
642 break;
643 case x86_seg_ss:
644 memcpy(&vmcb->ss, reg, sizeof(*reg));
645 vmcb->cpl = vmcb->ss.attr.fields.dpl;
646 break;
647 case x86_seg_tr:
648 memcpy(&vmcb->tr, reg, sizeof(*reg));
649 break;
650 case x86_seg_gdtr:
651 memcpy(&vmcb->gdtr, reg, sizeof(*reg));
652 break;
653 case x86_seg_idtr:
654 memcpy(&vmcb->idtr, reg, sizeof(*reg));
655 break;
656 case x86_seg_ldtr:
657 memcpy(&vmcb->ldtr, reg, sizeof(*reg));
658 break;
659 default:
660 BUG();
661 }
663 if ( sync )
664 svm_vmload(vmcb);
665 }
667 /* Make sure that xen intercepts any FP accesses from current */
668 static void svm_stts(struct vcpu *v)
669 {
670 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
672 /*
673 * If the guest does not have TS enabled then we must cause and handle an
674 * exception on first use of the FPU. If the guest *does* have TS enabled
675 * then this is not necessary: no FPU activity can occur until the guest
676 * clears CR0.TS, and we will initialise the FPU when that happens.
677 */
678 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
679 {
680 v->arch.hvm_svm.vmcb->exception_intercepts |= 1U << TRAP_no_device;
681 vmcb->cr0 |= X86_CR0_TS;
682 }
683 }
685 static void svm_set_tsc_offset(struct vcpu *v, u64 offset)
686 {
687 v->arch.hvm_svm.vmcb->tsc_offset = offset;
688 }
690 static void svm_init_hypercall_page(struct domain *d, void *hypercall_page)
691 {
692 char *p;
693 int i;
695 for ( i = 0; i < (PAGE_SIZE / 32); i++ )
696 {
697 p = (char *)(hypercall_page + (i * 32));
698 *(u8 *)(p + 0) = 0xb8; /* mov imm32, %eax */
699 *(u32 *)(p + 1) = i;
700 *(u8 *)(p + 5) = 0x0f; /* vmmcall */
701 *(u8 *)(p + 6) = 0x01;
702 *(u8 *)(p + 7) = 0xd9;
703 *(u8 *)(p + 8) = 0xc3; /* ret */
704 }
706 /* Don't support HYPERVISOR_iret at the moment */
707 *(u16 *)(hypercall_page + (__HYPERVISOR_iret * 32)) = 0x0b0f; /* ud2 */
708 }
710 static void svm_ctxt_switch_from(struct vcpu *v)
711 {
712 int cpu = smp_processor_id();
714 svm_save_dr(v);
716 svm_sync_vmcb(v);
717 svm_vmload(root_vmcb[cpu]);
719 #ifdef __x86_64__
720 /* Resume use of ISTs now that the host TR is reinstated. */
721 idt_tables[cpu][TRAP_double_fault].a |= IST_DF << 32;
722 idt_tables[cpu][TRAP_nmi].a |= IST_NMI << 32;
723 idt_tables[cpu][TRAP_machine_check].a |= IST_MCE << 32;
724 #endif
725 }
727 static void svm_ctxt_switch_to(struct vcpu *v)
728 {
729 int cpu = smp_processor_id();
731 #ifdef __x86_64__
732 /*
733 * This is required, because VMRUN does consistency check
734 * and some of the DOM0 selectors are pointing to
735 * invalid GDT locations, and cause AMD processors
736 * to shutdown.
737 */
738 set_segment_register(ds, 0);
739 set_segment_register(es, 0);
740 set_segment_register(ss, 0);
742 /*
743 * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
744 * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
745 */
746 idt_tables[cpu][TRAP_double_fault].a &= ~(7UL << 32);
747 idt_tables[cpu][TRAP_nmi].a &= ~(7UL << 32);
748 idt_tables[cpu][TRAP_machine_check].a &= ~(7UL << 32);
749 #endif
751 svm_restore_dr(v);
753 svm_vmsave(root_vmcb[cpu]);
754 svm_vmload(v->arch.hvm_svm.vmcb);
755 }
757 static void svm_do_resume(struct vcpu *v)
758 {
759 bool_t debug_state = v->domain->debugger_attached;
761 if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
762 {
763 uint32_t mask = (1U << TRAP_debug) | (1U << TRAP_int3);
764 v->arch.hvm_vcpu.debug_state_latch = debug_state;
765 if ( debug_state )
766 v->arch.hvm_svm.vmcb->exception_intercepts |= mask;
767 else
768 v->arch.hvm_svm.vmcb->exception_intercepts &= ~mask;
769 }
771 if ( v->arch.hvm_svm.launch_core != smp_processor_id() )
772 {
773 v->arch.hvm_svm.launch_core = smp_processor_id();
774 hvm_migrate_timers(v);
776 /* Migrating to another ASID domain. Request a new ASID. */
777 svm_asid_init_vcpu(v);
778 }
780 /* Reflect the vlapic's TPR in the hardware vtpr */
781 v->arch.hvm_svm.vmcb->vintr.fields.tpr =
782 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
784 hvm_do_resume(v);
785 reset_stack_and_jump(svm_asm_do_resume);
786 }
788 static int svm_domain_initialise(struct domain *d)
789 {
790 return 0;
791 }
793 static void svm_domain_destroy(struct domain *d)
794 {
795 }
797 static int svm_vcpu_initialise(struct vcpu *v)
798 {
799 int rc;
801 v->arch.schedule_tail = svm_do_resume;
802 v->arch.ctxt_switch_from = svm_ctxt_switch_from;
803 v->arch.ctxt_switch_to = svm_ctxt_switch_to;
805 v->arch.hvm_svm.launch_core = -1;
807 if ( (rc = svm_create_vmcb(v)) != 0 )
808 {
809 dprintk(XENLOG_WARNING,
810 "Failed to create VMCB for vcpu %d: err=%d.\n",
811 v->vcpu_id, rc);
812 return rc;
813 }
815 return 0;
816 }
818 static void svm_vcpu_destroy(struct vcpu *v)
819 {
820 svm_destroy_vmcb(v);
821 }
823 static void svm_inject_exception(
824 unsigned int trapnr, int errcode, unsigned long cr2)
825 {
826 struct vcpu *curr = current;
827 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
828 eventinj_t event;
830 event.bytes = 0;
831 event.fields.v = 1;
832 event.fields.type = X86_EVENTTYPE_HW_EXCEPTION;
833 event.fields.vector = trapnr;
834 event.fields.ev = (errcode != HVM_DELIVER_NO_ERROR_CODE);
835 event.fields.errorcode = errcode;
837 vmcb->eventinj = event;
839 if ( trapnr == TRAP_page_fault )
840 {
841 vmcb->cr2 = curr->arch.hvm_vcpu.guest_cr[2] = cr2;
842 HVMTRACE_2D(PF_INJECT, curr, curr->arch.hvm_vcpu.guest_cr[2], errcode);
843 }
844 else
845 {
846 HVMTRACE_2D(INJ_EXC, curr, trapnr, errcode);
847 }
849 if ( (trapnr == TRAP_debug) &&
850 (guest_cpu_user_regs()->eflags & X86_EFLAGS_TF) )
851 {
852 __restore_debug_registers(curr);
853 vmcb->dr6 |= 0x4000;
854 }
855 }
857 static int svm_event_pending(struct vcpu *v)
858 {
859 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
860 return vmcb->eventinj.fields.v;
861 }
863 static struct hvm_function_table svm_function_table = {
864 .name = "SVM",
865 .cpu_down = svm_cpu_down,
866 .domain_initialise = svm_domain_initialise,
867 .domain_destroy = svm_domain_destroy,
868 .vcpu_initialise = svm_vcpu_initialise,
869 .vcpu_destroy = svm_vcpu_destroy,
870 .save_cpu_ctxt = svm_save_vmcb_ctxt,
871 .load_cpu_ctxt = svm_load_vmcb_ctxt,
872 .interrupt_blocked = svm_interrupt_blocked,
873 .guest_x86_mode = svm_guest_x86_mode,
874 .get_segment_base = svm_get_segment_base,
875 .get_segment_register = svm_get_segment_register,
876 .set_segment_register = svm_set_segment_register,
877 .update_host_cr3 = svm_update_host_cr3,
878 .update_guest_cr = svm_update_guest_cr,
879 .update_guest_efer = svm_update_guest_efer,
880 .flush_guest_tlbs = svm_flush_guest_tlbs,
881 .stts = svm_stts,
882 .set_tsc_offset = svm_set_tsc_offset,
883 .inject_exception = svm_inject_exception,
884 .init_hypercall_page = svm_init_hypercall_page,
885 .event_pending = svm_event_pending
886 };
888 int start_svm(struct cpuinfo_x86 *c)
889 {
890 u32 eax, ecx, edx;
891 u32 phys_hsa_lo, phys_hsa_hi;
892 u64 phys_hsa;
893 int cpu = smp_processor_id();
895 /* Xen does not fill x86_capability words except 0. */
896 ecx = cpuid_ecx(0x80000001);
897 boot_cpu_data.x86_capability[5] = ecx;
899 if ( !(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)) )
900 return 0;
902 /* Check whether SVM feature is disabled in BIOS */
903 rdmsr(MSR_K8_VM_CR, eax, edx);
904 if ( eax & K8_VMCR_SVME_DISABLE )
905 {
906 printk("AMD SVM Extension is disabled in BIOS.\n");
907 return 0;
908 }
910 if ( ((hsa[cpu] = alloc_host_save_area()) == NULL) ||
911 ((root_vmcb[cpu] = alloc_vmcb()) == NULL) )
912 return 0;
914 write_efer(read_efer() | EFER_SVME);
916 /* Initialize the HSA for this core. */
917 phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
918 phys_hsa_lo = (u32) phys_hsa;
919 phys_hsa_hi = (u32) (phys_hsa >> 32);
920 wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
922 /* Initialize core's ASID handling. */
923 svm_asid_init(c);
925 if ( cpu != 0 )
926 return 1;
928 setup_vmcb_dump();
930 svm_feature_flags = ((cpuid_eax(0x80000000) >= 0x8000000A) ?
931 cpuid_edx(0x8000000A) : 0);
933 svm_function_table.hap_supported = cpu_has_svm_npt;
935 hvm_enable(&svm_function_table);
937 return 1;
938 }
940 static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
941 {
942 p2m_type_t p2mt;
943 mfn_t mfn;
944 unsigned long gfn = gpa >> PAGE_SHIFT;
946 /* If this GFN is emulated MMIO, pass the fault to the mmio handler */
947 mfn = gfn_to_mfn_current(gfn, &p2mt);
948 if ( p2mt == p2m_mmio_dm )
949 {
950 handle_mmio(gpa);
951 return;
952 }
954 /* Log-dirty: mark the page dirty and let the guest write it again */
955 paging_mark_dirty(current->domain, mfn_x(mfn));
956 p2m_change_type(current->domain, gfn, p2m_ram_logdirty, p2m_ram_rw);
957 }
959 static void svm_do_no_device_fault(struct vmcb_struct *vmcb)
960 {
961 struct vcpu *v = current;
963 setup_fpu(v);
964 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
966 if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_TS) )
967 vmcb->cr0 &= ~X86_CR0_TS;
968 }
970 /* Reserved bits ECX: [31:14], [12:4], [2:1]*/
971 #define SVM_VCPU_CPUID_L1_ECX_RESERVED 0xffffdff6
972 /* Reserved bits EDX: [31:29], [27], [22:20], [18], [10] */
973 #define SVM_VCPU_CPUID_L1_EDX_RESERVED 0xe8740400
975 #define bitmaskof(idx) (1U << ((idx) & 31))
976 static void svm_vmexit_do_cpuid(struct vmcb_struct *vmcb,
977 struct cpu_user_regs *regs)
978 {
979 unsigned long input = regs->eax;
980 unsigned int eax, ebx, ecx, edx;
981 struct vcpu *v = current;
982 int inst_len;
984 hvm_cpuid(input, &eax, &ebx, &ecx, &edx);
986 switch ( input )
987 {
988 case 0x00000001:
989 /* Clear out reserved bits. */
990 ecx &= ~SVM_VCPU_CPUID_L1_ECX_RESERVED;
991 edx &= ~SVM_VCPU_CPUID_L1_EDX_RESERVED;
993 /* Guest should only see one logical processor.
994 * See details on page 23 of AMD CPUID Specification.
995 */
996 __clear_bit(X86_FEATURE_HT & 31, &edx);
997 ebx &= 0xFF00FFFF; /* clear the logical processor count when HTT=0 */
998 ebx |= 0x00010000; /* set to 1 just for precaution */
999 break;
1001 case 0x80000001:
1002 /* Filter features which are shared with 0x00000001:EDX. */
1003 if ( vlapic_hw_disabled(vcpu_vlapic(v)) )
1004 __clear_bit(X86_FEATURE_APIC & 31, &edx);
1005 #if CONFIG_PAGING_LEVELS >= 3
1006 if ( !v->domain->arch.hvm_domain.params[HVM_PARAM_PAE_ENABLED] )
1007 #endif
1008 __clear_bit(X86_FEATURE_PAE & 31, &edx);
1009 __clear_bit(X86_FEATURE_PSE36 & 31, &edx);
1011 /* Filter all other features according to a whitelist. */
1012 ecx &= (bitmaskof(X86_FEATURE_LAHF_LM) |
1013 bitmaskof(X86_FEATURE_ALTMOVCR) |
1014 bitmaskof(X86_FEATURE_ABM) |
1015 bitmaskof(X86_FEATURE_SSE4A) |
1016 bitmaskof(X86_FEATURE_MISALIGNSSE) |
1017 bitmaskof(X86_FEATURE_3DNOWPF));
1018 edx &= (0x0183f3ff | /* features shared with 0x00000001:EDX */
1019 bitmaskof(X86_FEATURE_NX) |
1020 bitmaskof(X86_FEATURE_LM) |
1021 bitmaskof(X86_FEATURE_SYSCALL) |
1022 bitmaskof(X86_FEATURE_MP) |
1023 bitmaskof(X86_FEATURE_MMXEXT) |
1024 bitmaskof(X86_FEATURE_FFXSR));
1025 break;
1027 case 0x80000007:
1028 case 0x8000000A:
1029 /* Mask out features of power management and SVM extension. */
1030 eax = ebx = ecx = edx = 0;
1031 break;
1033 case 0x80000008:
1034 /* Make sure Number of CPU core is 1 when HTT=0 */
1035 ecx &= 0xFFFFFF00;
1036 break;
1039 regs->eax = eax;
1040 regs->ebx = ebx;
1041 regs->ecx = ecx;
1042 regs->edx = edx;
1044 HVMTRACE_3D(CPUID, v, input,
1045 ((uint64_t)eax << 32) | ebx, ((uint64_t)ecx << 32) | edx);
1047 inst_len = __get_instruction_length(v, INSTR_CPUID, NULL);
1048 __update_guest_eip(regs, inst_len);
1051 static unsigned long *get_reg_p(
1052 unsigned int gpreg,
1053 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1055 unsigned long *reg_p = NULL;
1056 switch (gpreg)
1058 case SVM_REG_EAX:
1059 reg_p = (unsigned long *)&regs->eax;
1060 break;
1061 case SVM_REG_EBX:
1062 reg_p = (unsigned long *)&regs->ebx;
1063 break;
1064 case SVM_REG_ECX:
1065 reg_p = (unsigned long *)&regs->ecx;
1066 break;
1067 case SVM_REG_EDX:
1068 reg_p = (unsigned long *)&regs->edx;
1069 break;
1070 case SVM_REG_EDI:
1071 reg_p = (unsigned long *)&regs->edi;
1072 break;
1073 case SVM_REG_ESI:
1074 reg_p = (unsigned long *)&regs->esi;
1075 break;
1076 case SVM_REG_EBP:
1077 reg_p = (unsigned long *)&regs->ebp;
1078 break;
1079 case SVM_REG_ESP:
1080 reg_p = (unsigned long *)&regs->esp;
1081 break;
1082 #ifdef __x86_64__
1083 case SVM_REG_R8:
1084 reg_p = (unsigned long *)&regs->r8;
1085 break;
1086 case SVM_REG_R9:
1087 reg_p = (unsigned long *)&regs->r9;
1088 break;
1089 case SVM_REG_R10:
1090 reg_p = (unsigned long *)&regs->r10;
1091 break;
1092 case SVM_REG_R11:
1093 reg_p = (unsigned long *)&regs->r11;
1094 break;
1095 case SVM_REG_R12:
1096 reg_p = (unsigned long *)&regs->r12;
1097 break;
1098 case SVM_REG_R13:
1099 reg_p = (unsigned long *)&regs->r13;
1100 break;
1101 case SVM_REG_R14:
1102 reg_p = (unsigned long *)&regs->r14;
1103 break;
1104 case SVM_REG_R15:
1105 reg_p = (unsigned long *)&regs->r15;
1106 break;
1107 #endif
1108 default:
1109 BUG();
1112 return reg_p;
1116 static unsigned long get_reg(
1117 unsigned int gpreg, struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1119 unsigned long *gp;
1120 gp = get_reg_p(gpreg, regs, vmcb);
1121 return *gp;
1125 static void set_reg(
1126 unsigned int gpreg, unsigned long value,
1127 struct cpu_user_regs *regs, struct vmcb_struct *vmcb)
1129 unsigned long *gp;
1130 gp = get_reg_p(gpreg, regs, vmcb);
1131 *gp = value;
1135 static void svm_dr_access(struct vcpu *v, struct cpu_user_regs *regs)
1137 HVMTRACE_0D(DR_WRITE, v);
1138 __restore_debug_registers(v);
1142 static void svm_get_prefix_info(struct vcpu *v, unsigned int dir,
1143 svm_segment_register_t **seg,
1144 unsigned int *asize)
1146 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1147 unsigned char inst[MAX_INST_LEN];
1148 int i;
1150 memset(inst, 0, MAX_INST_LEN);
1151 if (inst_copy_from_guest(inst, svm_rip2pointer(v), sizeof(inst))
1152 != MAX_INST_LEN)
1154 gdprintk(XENLOG_ERR, "get guest instruction failed\n");
1155 domain_crash(current->domain);
1156 return;
1159 for (i = 0; i < MAX_INST_LEN; i++)
1161 switch (inst[i])
1163 case 0xf3: /* REPZ */
1164 case 0xf2: /* REPNZ */
1165 case 0xf0: /* LOCK */
1166 case 0x66: /* data32 */
1167 #ifdef __x86_64__
1168 /* REX prefixes */
1169 case 0x40:
1170 case 0x41:
1171 case 0x42:
1172 case 0x43:
1173 case 0x44:
1174 case 0x45:
1175 case 0x46:
1176 case 0x47:
1178 case 0x48:
1179 case 0x49:
1180 case 0x4a:
1181 case 0x4b:
1182 case 0x4c:
1183 case 0x4d:
1184 case 0x4e:
1185 case 0x4f:
1186 #endif
1187 continue;
1188 case 0x67: /* addr32 */
1189 *asize ^= 48; /* Switch 16/32 bits */
1190 continue;
1191 case 0x2e: /* CS */
1192 *seg = &vmcb->cs;
1193 continue;
1194 case 0x36: /* SS */
1195 *seg = &vmcb->ss;
1196 continue;
1197 case 0x26: /* ES */
1198 *seg = &vmcb->es;
1199 continue;
1200 case 0x64: /* FS */
1201 svm_sync_vmcb(v);
1202 *seg = &vmcb->fs;
1203 continue;
1204 case 0x65: /* GS */
1205 svm_sync_vmcb(v);
1206 *seg = &vmcb->gs;
1207 continue;
1208 case 0x3e: /* DS */
1209 *seg = &vmcb->ds;
1210 continue;
1211 default:
1212 break;
1214 return;
1219 /* Get the address of INS/OUTS instruction */
1220 static int svm_get_io_address(
1221 struct vcpu *v, struct cpu_user_regs *regs,
1222 unsigned int size, ioio_info_t info,
1223 unsigned long *count, unsigned long *addr)
1225 unsigned long reg;
1226 unsigned int asize, isize;
1227 int long_mode = 0;
1228 svm_segment_register_t *seg = NULL;
1229 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1231 /* If we're in long mode, don't check the segment presence & limit */
1232 long_mode = vmcb->cs.attr.fields.l && hvm_long_mode_enabled(v);
1234 /* d field of cs.attr is 1 for 32-bit, 0 for 16 or 64 bit.
1235 * l field combined with EFER_LMA says whether it's 16 or 64 bit.
1236 */
1237 asize = (long_mode)?64:((vmcb->cs.attr.fields.db)?32:16);
1240 /* The ins/outs instructions are single byte, so if we have got more
1241 * than one byte (+ maybe rep-prefix), we have some prefix so we need
1242 * to figure out what it is...
1243 */
1244 isize = vmcb->exitinfo2 - regs->eip;
1246 if (info.fields.rep)
1247 isize --;
1249 if (isize > 1)
1250 svm_get_prefix_info(v, info.fields.type, &seg, &asize);
1252 if (info.fields.type == IOREQ_WRITE)
1254 reg = regs->esi;
1255 if (!seg) /* If no prefix, used DS. */
1256 seg = &vmcb->ds;
1257 if (!long_mode && (seg->attr.fields.type & 0xa) == 0x8) {
1258 svm_inject_exception(TRAP_gp_fault, 0, 0);
1259 return 0;
1262 else
1264 reg = regs->edi;
1265 seg = &vmcb->es; /* Note: This is ALWAYS ES. */
1266 if (!long_mode && (seg->attr.fields.type & 0xa) != 0x2) {
1267 svm_inject_exception(TRAP_gp_fault, 0, 0);
1268 return 0;
1272 /* If the segment isn't present, give GP fault! */
1273 if (!long_mode && !seg->attr.fields.p)
1275 svm_inject_exception(TRAP_gp_fault, 0, 0);
1276 return 0;
1279 if (asize == 16)
1281 *addr = (reg & 0xFFFF);
1282 *count = regs->ecx & 0xffff;
1284 else
1286 *addr = reg;
1287 *count = regs->ecx;
1289 if (!info.fields.rep)
1290 *count = 1;
1292 if (!long_mode)
1294 ASSERT(*addr == (u32)*addr);
1295 if ((u32)(*addr + size - 1) < (u32)*addr ||
1296 (seg->attr.fields.type & 0xc) != 0x4 ?
1297 *addr + size - 1 > seg->limit :
1298 *addr <= seg->limit)
1300 svm_inject_exception(TRAP_gp_fault, 0, 0);
1301 return 0;
1304 /* Check the limit for repeated instructions, as above we checked only
1305 the first instance. Truncate the count if a limit violation would
1306 occur. Note that the checking is not necessary for page granular
1307 segments as transfers crossing page boundaries will be broken up
1308 anyway. */
1309 if (!seg->attr.fields.g && *count > 1)
1311 if ((seg->attr.fields.type & 0xc) != 0x4)
1313 /* expand-up */
1314 if (!(regs->eflags & EF_DF))
1316 if (*addr + *count * size - 1 < *addr ||
1317 *addr + *count * size - 1 > seg->limit)
1318 *count = (seg->limit + 1UL - *addr) / size;
1320 else
1322 if (*count - 1 > *addr / size)
1323 *count = *addr / size + 1;
1326 else
1328 /* expand-down */
1329 if (!(regs->eflags & EF_DF))
1331 if (*count - 1 > -(s32)*addr / size)
1332 *count = -(s32)*addr / size + 1UL;
1334 else
1336 if (*addr < (*count - 1) * size ||
1337 *addr - (*count - 1) * size <= seg->limit)
1338 *count = (*addr - seg->limit - 1) / size + 1;
1341 ASSERT(*count);
1344 *addr += seg->base;
1346 #ifdef __x86_64__
1347 else
1349 if (seg == &vmcb->fs || seg == &vmcb->gs)
1350 *addr += seg->base;
1352 if (!is_canonical_address(*addr) ||
1353 !is_canonical_address(*addr + size - 1))
1355 svm_inject_exception(TRAP_gp_fault, 0, 0);
1356 return 0;
1358 if (*count > (1UL << 48) / size)
1359 *count = (1UL << 48) / size;
1360 if (!(regs->eflags & EF_DF))
1362 if (*addr + *count * size - 1 < *addr ||
1363 !is_canonical_address(*addr + *count * size - 1))
1364 *count = (*addr & ~((1UL << 48) - 1)) / size;
1366 else
1368 if ((*count - 1) * size > *addr ||
1369 !is_canonical_address(*addr + (*count - 1) * size))
1370 *count = (*addr & ~((1UL << 48) - 1)) / size + 1;
1372 ASSERT(*count);
1374 #endif
1376 return 1;
1380 static void svm_io_instruction(struct vcpu *v)
1382 struct cpu_user_regs *regs;
1383 struct hvm_io_op *pio_opp;
1384 unsigned int port;
1385 unsigned int size, dir, df;
1386 ioio_info_t info;
1387 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1389 pio_opp = &current->arch.hvm_vcpu.io_op;
1390 pio_opp->instr = INSTR_PIO;
1391 pio_opp->flags = 0;
1393 regs = &pio_opp->io_context;
1395 /* Copy current guest state into io instruction state structure. */
1396 memcpy(regs, guest_cpu_user_regs(), HVM_CONTEXT_STACK_BYTES);
1398 info.bytes = vmcb->exitinfo1;
1400 port = info.fields.port; /* port used to be addr */
1401 dir = info.fields.type; /* direction */
1402 df = regs->eflags & X86_EFLAGS_DF ? 1 : 0;
1404 if (info.fields.sz32)
1405 size = 4;
1406 else if (info.fields.sz16)
1407 size = 2;
1408 else
1409 size = 1;
1411 if (dir==IOREQ_READ)
1412 HVMTRACE_2D(IO_READ, v, port, size);
1413 else
1414 HVMTRACE_3D(IO_WRITE, v, port, size, regs->eax);
1416 HVM_DBG_LOG(DBG_LEVEL_IO,
1417 "svm_io_instruction: port 0x%x eip=%x:%"PRIx64", "
1418 "exit_qualification = %"PRIx64,
1419 port, vmcb->cs.sel, (uint64_t)regs->eip, info.bytes);
1421 /* string instruction */
1422 if (info.fields.str)
1424 unsigned long addr, count;
1425 paddr_t paddr;
1426 unsigned long gfn;
1427 uint32_t pfec;
1428 int sign = regs->eflags & X86_EFLAGS_DF ? -1 : 1;
1430 if (!svm_get_io_address(v, regs, size, info, &count, &addr))
1432 /* We failed to get a valid address, so don't do the IO operation -
1433 * it would just get worse if we do! Hopefully the guest is handing
1434 * gp-faults...
1435 */
1436 return;
1439 /* "rep" prefix */
1440 if (info.fields.rep)
1442 pio_opp->flags |= REPZ;
1445 /* Translate the address to a physical address */
1446 pfec = PFEC_page_present;
1447 if ( dir == IOREQ_READ ) /* Read from PIO --> write to RAM */
1448 pfec |= PFEC_write_access;
1449 if ( vmcb->cpl == 3 )
1450 pfec |= PFEC_user_mode;
1451 gfn = paging_gva_to_gfn(v, addr, &pfec);
1452 if ( gfn == INVALID_GFN )
1454 /* The guest does not have the RAM address mapped.
1455 * Need to send in a page fault */
1456 svm_inject_exception(TRAP_page_fault, pfec, addr);
1457 return;
1459 paddr = (paddr_t)gfn << PAGE_SHIFT | (addr & ~PAGE_MASK);
1461 /*
1462 * Handle string pio instructions that cross pages or that
1463 * are unaligned. See the comments in hvm_platform.c/handle_mmio()
1464 */
1465 if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK))
1467 unsigned long value = 0;
1469 pio_opp->flags |= OVERLAP;
1470 pio_opp->addr = addr;
1472 if (dir == IOREQ_WRITE) /* OUTS */
1474 if ( hvm_paging_enabled(current) )
1476 int rv = hvm_copy_from_guest_virt(&value, addr, size);
1477 if ( rv == HVMCOPY_bad_gva_to_gfn )
1478 return; /* exception already injected */
1480 else
1481 (void)hvm_copy_from_guest_phys(&value, addr, size);
1483 else /* dir != IOREQ_WRITE */
1484 /* Remember where to write the result, as a *VA*.
1485 * Must be a VA so we can handle the page overlap
1486 * correctly in hvm_pio_assist() */
1487 pio_opp->addr = addr;
1489 if (count == 1)
1490 regs->eip = vmcb->exitinfo2;
1492 send_pio_req(port, 1, size, value, dir, df, 0);
1494 else
1496 unsigned long last_addr = sign > 0 ? addr + count * size - 1
1497 : addr - (count - 1) * size;
1499 if ((addr & PAGE_MASK) != (last_addr & PAGE_MASK))
1501 if (sign > 0)
1502 count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size;
1503 else
1504 count = (addr & ~PAGE_MASK) / size + 1;
1506 else
1507 regs->eip = vmcb->exitinfo2;
1509 send_pio_req(port, count, size, paddr, dir, df, 1);
1512 else
1514 /*
1515 * On SVM, the RIP of the intruction following the IN/OUT is saved in
1516 * ExitInfo2
1517 */
1518 regs->eip = vmcb->exitinfo2;
1520 if (port == 0xe9 && dir == IOREQ_WRITE && size == 1)
1521 hvm_print_line(v, regs->eax); /* guest debug output */
1523 send_pio_req(port, 1, size, regs->eax, dir, df, 0);
1527 static void mov_from_cr(int cr, int gp, struct cpu_user_regs *regs)
1529 unsigned long value = 0;
1530 struct vcpu *v = current;
1531 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1533 switch ( cr )
1535 case 0:
1536 value = v->arch.hvm_vcpu.guest_cr[0];
1537 break;
1538 case 3:
1539 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[3];
1540 break;
1541 case 4:
1542 value = (unsigned long)v->arch.hvm_vcpu.guest_cr[4];
1543 break;
1544 default:
1545 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1546 domain_crash(v->domain);
1547 return;
1550 HVMTRACE_2D(CR_READ, v, cr, value);
1552 set_reg(gp, value, regs, vmcb);
1554 HVM_DBG_LOG(DBG_LEVEL_VMMU, "mov_from_cr: CR%d, value = %lx", cr, value);
1557 static int mov_to_cr(int gpreg, int cr, struct cpu_user_regs *regs)
1559 unsigned long value;
1560 struct vcpu *v = current;
1561 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1563 value = get_reg(gpreg, regs, vmcb);
1565 HVMTRACE_2D(CR_WRITE, v, cr, value);
1567 HVM_DBG_LOG(DBG_LEVEL_1, "mov_to_cr: CR%d, value = %lx, current = %p",
1568 cr, value, v);
1570 switch ( cr )
1572 case 0:
1573 return hvm_set_cr0(value);
1574 case 3:
1575 return hvm_set_cr3(value);
1576 case 4:
1577 return hvm_set_cr4(value);
1578 default:
1579 gdprintk(XENLOG_ERR, "invalid cr: %d\n", cr);
1580 domain_crash(v->domain);
1581 return 0;
1584 return 1;
1587 static void svm_cr_access(
1588 struct vcpu *v, unsigned int cr, unsigned int type,
1589 struct cpu_user_regs *regs)
1591 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1592 int inst_len = 0;
1593 int index,addr_size,i;
1594 unsigned int gpreg,offset;
1595 unsigned long value,addr;
1596 u8 buffer[MAX_INST_LEN];
1597 u8 prefix = 0;
1598 u8 modrm;
1599 enum x86_segment seg;
1600 int result = 1;
1601 enum instruction_index list_a[] = {INSTR_MOV2CR, INSTR_CLTS, INSTR_LMSW};
1602 enum instruction_index list_b[] = {INSTR_MOVCR2, INSTR_SMSW};
1603 enum instruction_index match;
1605 inst_copy_from_guest(buffer, svm_rip2pointer(v), sizeof(buffer));
1607 /* get index to first actual instruction byte - as we will need to know
1608 where the prefix lives later on */
1609 index = skip_prefix_bytes(buffer, sizeof(buffer));
1611 if ( type == TYPE_MOV_TO_CR )
1613 inst_len = __get_instruction_length_from_list(
1614 v, list_a, ARRAY_SIZE(list_a), &buffer[index], &match);
1616 else /* type == TYPE_MOV_FROM_CR */
1618 inst_len = __get_instruction_length_from_list(
1619 v, list_b, ARRAY_SIZE(list_b), &buffer[index], &match);
1622 inst_len += index;
1624 /* Check for REX prefix - it's ALWAYS the last byte of any prefix bytes */
1625 if (index > 0 && (buffer[index-1] & 0xF0) == 0x40)
1626 prefix = buffer[index-1];
1628 HVM_DBG_LOG(DBG_LEVEL_1, "eip = %lx", (unsigned long)regs->eip);
1630 switch ( match )
1633 case INSTR_MOV2CR:
1634 gpreg = decode_src_reg(prefix, buffer[index+2]);
1635 result = mov_to_cr(gpreg, cr, regs);
1636 break;
1638 case INSTR_MOVCR2:
1639 gpreg = decode_src_reg(prefix, buffer[index+2]);
1640 mov_from_cr(cr, gpreg, regs);
1641 break;
1643 case INSTR_CLTS:
1644 /* TS being cleared means that it's time to restore fpu state. */
1645 setup_fpu(current);
1646 vmcb->exception_intercepts &= ~(1U << TRAP_no_device);
1647 vmcb->cr0 &= ~X86_CR0_TS; /* clear TS */
1648 v->arch.hvm_vcpu.guest_cr[0] &= ~X86_CR0_TS; /* clear TS */
1649 HVMTRACE_0D(CLTS, current);
1650 break;
1652 case INSTR_LMSW:
1653 gpreg = decode_src_reg(prefix, buffer[index+2]);
1654 value = get_reg(gpreg, regs, vmcb) & 0xF;
1655 value = (v->arch.hvm_vcpu.guest_cr[0] & ~0xF) | value;
1656 result = hvm_set_cr0(value);
1657 HVMTRACE_1D(LMSW, current, value);
1658 break;
1660 case INSTR_SMSW:
1661 value = v->arch.hvm_vcpu.guest_cr[0] & 0xFFFF;
1662 modrm = buffer[index+2];
1663 addr_size = svm_guest_x86_mode(v);
1664 if ( addr_size < 2 )
1665 addr_size = 2;
1666 if ( likely((modrm & 0xC0) >> 6 == 3) )
1668 gpreg = decode_src_reg(prefix, modrm);
1669 set_reg(gpreg, value, regs, vmcb);
1671 /*
1672 * For now, only implement decode of the offset mode, since that's the
1673 * only mode observed in a real-world OS. This code is also making the
1674 * assumption that we'll never hit this code in long mode.
1675 */
1676 else if ( (modrm == 0x26) || (modrm == 0x25) )
1678 seg = x86_seg_ds;
1679 i = index;
1680 /* Segment or address size overrides? */
1681 while ( i-- )
1683 switch ( buffer[i] )
1685 case 0x26: seg = x86_seg_es; break;
1686 case 0x2e: seg = x86_seg_cs; break;
1687 case 0x36: seg = x86_seg_ss; break;
1688 case 0x64: seg = x86_seg_fs; break;
1689 case 0x65: seg = x86_seg_gs; break;
1690 case 0x67: addr_size ^= 6; break;
1693 /* Bail unless this really is a seg_base + offset case */
1694 if ( ((modrm == 0x26) && (addr_size == 4)) ||
1695 ((modrm == 0x25) && (addr_size == 2)) )
1697 gdprintk(XENLOG_ERR, "SMSW emulation at guest address: "
1698 "%lx failed due to unhandled addressing mode."
1699 "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
1700 domain_crash(v->domain);
1702 inst_len += addr_size;
1703 offset = *(( unsigned int *) ( void *) &buffer[index + 3]);
1704 offset = ( addr_size == 4 ) ? offset : ( offset & 0xFFFF );
1705 addr = hvm_get_segment_base(v, seg);
1706 addr += offset;
1707 result = (hvm_copy_to_guest_virt(addr, &value, 2)
1708 != HVMCOPY_bad_gva_to_gfn);
1710 else
1712 gdprintk(XENLOG_ERR, "SMSW emulation at guest address: %lx "
1713 "failed due to unhandled addressing mode!"
1714 "ModRM byte was: %x \n", svm_rip2pointer(v), modrm);
1715 domain_crash(v->domain);
1717 break;
1719 default:
1720 BUG();
1723 if ( result )
1724 __update_guest_eip(regs, inst_len);
1727 static void svm_do_msr_access(
1728 struct vcpu *v, struct cpu_user_regs *regs)
1730 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
1731 int inst_len;
1732 u64 msr_content=0;
1733 u32 ecx = regs->ecx, eax, edx;
1735 HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x, eax=%x, edx=%x, exitinfo = %lx",
1736 ecx, (u32)regs->eax, (u32)regs->edx,
1737 (unsigned long)vmcb->exitinfo1);
1739 /* is it a read? */
1740 if (vmcb->exitinfo1 == 0)
1742 switch (ecx) {
1743 case MSR_IA32_TSC:
1744 msr_content = hvm_get_guest_time(v);
1745 break;
1747 case MSR_IA32_APICBASE:
1748 msr_content = vcpu_vlapic(v)->hw.apic_base_msr;
1749 break;
1751 case MSR_EFER:
1752 msr_content = v->arch.hvm_vcpu.guest_efer;
1753 break;
1755 case MSR_IA32_MC4_MISC: /* Threshold register */
1756 case MSR_F10_MC4_MISC1 ... MSR_F10_MC4_MISC3:
1757 /*
1758 * MCA/MCE: We report that the threshold register is unavailable
1759 * for OS use (locked by the BIOS).
1760 */
1761 msr_content = 1ULL << 61; /* MC4_MISC.Locked */
1762 break;
1764 case MSR_IA32_EBC_FREQUENCY_ID:
1765 /*
1766 * This Intel-only register may be accessed if this HVM guest
1767 * has been migrated from an Intel host. The value zero is not
1768 * particularly meaningful, but at least avoids the guest crashing!
1769 */
1770 msr_content = 0;
1771 break;
1773 case MSR_K8_VM_HSAVE_PA:
1774 svm_inject_exception(TRAP_gp_fault, 0, 0);
1775 break;
1777 case MSR_IA32_MCG_CAP:
1778 case MSR_IA32_MCG_STATUS:
1779 case MSR_IA32_MC0_STATUS:
1780 case MSR_IA32_MC1_STATUS:
1781 case MSR_IA32_MC2_STATUS:
1782 case MSR_IA32_MC3_STATUS:
1783 case MSR_IA32_MC4_STATUS:
1784 case MSR_IA32_MC5_STATUS:
1785 /* No point in letting the guest see real MCEs */
1786 msr_content = 0;
1787 break;
1789 case MSR_IA32_DEBUGCTLMSR:
1790 msr_content = vmcb->debugctlmsr;
1791 break;
1793 case MSR_IA32_LASTBRANCHFROMIP:
1794 msr_content = vmcb->lastbranchfromip;
1795 break;
1797 case MSR_IA32_LASTBRANCHTOIP:
1798 msr_content = vmcb->lastbranchtoip;
1799 break;
1801 case MSR_IA32_LASTINTFROMIP:
1802 msr_content = vmcb->lastintfromip;
1803 break;
1805 case MSR_IA32_LASTINTTOIP:
1806 msr_content = vmcb->lastinttoip;
1807 break;
1809 default:
1810 if ( rdmsr_hypervisor_regs(ecx, &eax, &edx) ||
1811 rdmsr_safe(ecx, eax, edx) == 0 )
1813 regs->eax = eax;
1814 regs->edx = edx;
1815 goto done;
1817 svm_inject_exception(TRAP_gp_fault, 0, 0);
1818 return;
1820 regs->eax = msr_content & 0xFFFFFFFF;
1821 regs->edx = msr_content >> 32;
1823 done:
1824 hvmtrace_msr_read(v, ecx, msr_content);
1825 HVM_DBG_LOG(DBG_LEVEL_1, "returns: ecx=%x, eax=%lx, edx=%lx",
1826 ecx, (unsigned long)regs->eax, (unsigned long)regs->edx);
1828 inst_len = __get_instruction_length(v, INSTR_RDMSR, NULL);
1830 else
1832 msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
1834 hvmtrace_msr_write(v, ecx, msr_content);
1836 switch (ecx)
1838 case MSR_IA32_TSC:
1839 hvm_set_guest_time(v, msr_content);
1840 pt_reset(v);
1841 break;
1843 case MSR_IA32_APICBASE:
1844 vlapic_msr_set(vcpu_vlapic(v), msr_content);
1845 break;
1847 case MSR_K8_VM_HSAVE_PA:
1848 svm_inject_exception(TRAP_gp_fault, 0, 0);
1849 break;
1851 case MSR_IA32_DEBUGCTLMSR:
1852 vmcb->debugctlmsr = msr_content;
1853 if ( !msr_content || !cpu_has_svm_lbrv )
1854 break;
1855 vmcb->lbr_control.fields.enable = 1;
1856 svm_disable_intercept_for_msr(v, MSR_IA32_DEBUGCTLMSR);
1857 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHFROMIP);
1858 svm_disable_intercept_for_msr(v, MSR_IA32_LASTBRANCHTOIP);
1859 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTFROMIP);
1860 svm_disable_intercept_for_msr(v, MSR_IA32_LASTINTTOIP);
1861 break;
1863 case MSR_IA32_LASTBRANCHFROMIP:
1864 vmcb->lastbranchfromip = msr_content;
1865 break;
1867 case MSR_IA32_LASTBRANCHTOIP:
1868 vmcb->lastbranchtoip = msr_content;
1869 break;
1871 case MSR_IA32_LASTINTFROMIP:
1872 vmcb->lastintfromip = msr_content;
1873 break;
1875 case MSR_IA32_LASTINTTOIP:
1876 vmcb->lastinttoip = msr_content;
1877 break;
1879 default:
1880 switch ( long_mode_do_msr_write(regs) )
1882 case HNDL_unhandled:
1883 wrmsr_hypervisor_regs(ecx, regs->eax, regs->edx);
1884 break;
1885 case HNDL_exception_raised:
1886 return;
1887 case HNDL_done:
1888 break;
1890 break;
1893 inst_len = __get_instruction_length(v, INSTR_WRMSR, NULL);
1896 __update_guest_eip(regs, inst_len);
1899 static void svm_vmexit_do_hlt(struct vmcb_struct *vmcb,
1900 struct cpu_user_regs *regs)
1902 struct vcpu *curr = current;
1903 struct hvm_intack intack = hvm_vcpu_has_pending_irq(curr);
1904 unsigned int inst_len;
1906 inst_len = __get_instruction_length(curr, INSTR_HLT, NULL);
1907 __update_guest_eip(regs, inst_len);
1909 /* Check for pending exception or new interrupt. */
1910 if ( vmcb->eventinj.fields.v ||
1911 ((intack.source != hvm_intsrc_none) &&
1912 !svm_interrupt_blocked(current, intack)) )
1914 HVMTRACE_1D(HLT, curr, /*int pending=*/ 1);
1915 return;
1918 HVMTRACE_1D(HLT, curr, /*int pending=*/ 0);
1919 hvm_hlt(regs->eflags);
1922 static void svm_vmexit_do_invalidate_cache(struct cpu_user_regs *regs)
1924 enum instruction_index list[] = { INSTR_INVD, INSTR_WBINVD };
1925 struct vcpu *curr = current;
1926 struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
1927 int inst_len;
1929 if ( !list_empty(&(domain_hvm_iommu(curr->domain)->pdev_list)) )
1931 vmcb->general2_intercepts &= ~GENERAL2_INTERCEPT_WBINVD;
1932 wbinvd();
1935 inst_len = __get_instruction_length_from_list(
1936 curr, list, ARRAY_SIZE(list), NULL, NULL);
1937 __update_guest_eip(regs, inst_len);
1940 void svm_handle_invlpg(const short invlpga, struct cpu_user_regs *regs)
1942 struct vcpu *v = current;
1943 u8 opcode[MAX_INST_LEN], prefix, length = MAX_INST_LEN;
1944 unsigned long g_vaddr;
1945 int inst_len;
1947 /*
1948 * Unknown how many bytes the invlpg instruction will take. Use the
1949 * maximum instruction length here
1950 */
1951 if ( inst_copy_from_guest(opcode, svm_rip2pointer(v), length) < length )
1953 gdprintk(XENLOG_ERR, "Error reading memory %d bytes\n", length);
1954 goto crash;
1957 if ( invlpga )
1959 inst_len = __get_instruction_length(v, INSTR_INVLPGA, opcode);
1960 __update_guest_eip(regs, inst_len);
1962 /*
1963 * The address is implicit on this instruction. At the moment, we don't
1964 * use ecx (ASID) to identify individual guests pages
1965 */
1966 g_vaddr = regs->eax;
1968 else
1970 /* What about multiple prefix codes? */
1971 prefix = (is_prefix(opcode[0]) ? opcode[0] : 0);
1972 inst_len = __get_instruction_length(v, INSTR_INVLPG, opcode);
1973 if ( inst_len <= 0 )
1975 gdprintk(XENLOG_ERR, "Error getting invlpg instr len\n");
1976 goto crash;
1979 inst_len--;
1980 length -= inst_len;
1982 /*
1983 * Decode memory operand of the instruction including ModRM, SIB, and
1984 * displacement to get effective address and length in bytes. Assume
1985 * the system in either 32- or 64-bit mode.
1986 */
1987 g_vaddr = get_effective_addr_modrm64(regs, prefix, inst_len,
1988 &opcode[inst_len], &length);
1990 inst_len += length;
1991 __update_guest_eip(regs, inst_len);
1994 HVMTRACE_3D(INVLPG, v, !!invlpga, g_vaddr, (invlpga ? regs->ecx : 0));
1996 paging_invlpg(v, g_vaddr);
1997 svm_asid_g_invlpg(v, g_vaddr);
1998 return;
2000 crash:
2001 domain_crash(v->domain);
2004 asmlinkage void svm_vmexit_handler(struct cpu_user_regs *regs)
2006 unsigned int exit_reason;
2007 struct vcpu *v = current;
2008 struct vmcb_struct *vmcb = v->arch.hvm_svm.vmcb;
2009 eventinj_t eventinj;
2010 int inst_len, rc;
2012 /*
2013 * Before doing anything else, we need to sync up the VLAPIC's TPR with
2014 * SVM's vTPR. It's OK if the guest doesn't touch CR8 (e.g. 32-bit Windows)
2015 * because we update the vTPR on MMIO writes to the TPR.
2016 */
2017 vlapic_set_reg(vcpu_vlapic(v), APIC_TASKPRI,
2018 (vmcb->vintr.fields.tpr & 0x0F) << 4);
2020 exit_reason = vmcb->exitcode;
2022 hvmtrace_vmexit(v, regs->eip, exit_reason);
2024 if ( unlikely(exit_reason == VMEXIT_INVALID) )
2026 svm_dump_vmcb(__func__, vmcb);
2027 goto exit_and_crash;
2030 perfc_incra(svmexits, exit_reason);
2032 hvm_maybe_deassert_evtchn_irq();
2034 /* Event delivery caused this intercept? Queue for redelivery. */
2035 eventinj = vmcb->exitintinfo;
2036 if ( unlikely(eventinj.fields.v) &&
2037 hvm_event_needs_reinjection(eventinj.fields.type,
2038 eventinj.fields.vector) )
2039 vmcb->eventinj = eventinj;
2041 switch ( exit_reason )
2043 case VMEXIT_INTR:
2044 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2045 HVMTRACE_0D(INTR, v);
2046 break;
2048 case VMEXIT_NMI:
2049 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2050 HVMTRACE_0D(NMI, v);
2051 break;
2053 case VMEXIT_SMI:
2054 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2055 HVMTRACE_0D(SMI, v);
2056 break;
2058 case VMEXIT_EXCEPTION_DB:
2059 if ( !v->domain->debugger_attached )
2060 goto exit_and_crash;
2061 domain_pause_for_debugger();
2062 break;
2064 case VMEXIT_EXCEPTION_BP:
2065 if ( !v->domain->debugger_attached )
2066 goto exit_and_crash;
2067 /* AMD Vol2, 15.11: INT3, INTO, BOUND intercepts do not update RIP. */
2068 inst_len = __get_instruction_length(v, INSTR_INT3, NULL);
2069 __update_guest_eip(regs, inst_len);
2070 domain_pause_for_debugger();
2071 break;
2073 case VMEXIT_EXCEPTION_NM:
2074 svm_do_no_device_fault(vmcb);
2075 break;
2077 case VMEXIT_EXCEPTION_PF: {
2078 unsigned long va;
2079 va = vmcb->exitinfo2;
2080 regs->error_code = vmcb->exitinfo1;
2081 HVM_DBG_LOG(DBG_LEVEL_VMMU,
2082 "eax=%lx, ebx=%lx, ecx=%lx, edx=%lx, esi=%lx, edi=%lx",
2083 (unsigned long)regs->eax, (unsigned long)regs->ebx,
2084 (unsigned long)regs->ecx, (unsigned long)regs->edx,
2085 (unsigned long)regs->esi, (unsigned long)regs->edi);
2087 if ( paging_fault(va, regs) )
2089 HVMTRACE_2D(PF_XEN, v, va, regs->error_code);
2090 break;
2093 svm_inject_exception(TRAP_page_fault, regs->error_code, va);
2094 break;
2097 /* Asynchronous event, handled when we STGI'd after the VMEXIT. */
2098 case VMEXIT_EXCEPTION_MC:
2099 HVMTRACE_0D(MCE, v);
2100 break;
2102 case VMEXIT_VINTR:
2103 vmcb->vintr.fields.irq = 0;
2104 vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
2105 break;
2107 case VMEXIT_INVD:
2108 case VMEXIT_WBINVD:
2109 svm_vmexit_do_invalidate_cache(regs);
2110 break;
2112 case VMEXIT_TASK_SWITCH: {
2113 enum hvm_task_switch_reason reason;
2114 int32_t errcode = -1;
2115 if ( (vmcb->exitinfo2 >> 36) & 1 )
2116 reason = TSW_iret;
2117 else if ( (vmcb->exitinfo2 >> 38) & 1 )
2118 reason = TSW_jmp;
2119 else
2120 reason = TSW_call_or_int;
2121 if ( (vmcb->exitinfo2 >> 44) & 1 )
2122 errcode = (uint32_t)vmcb->exitinfo2;
2123 hvm_task_switch((uint16_t)vmcb->exitinfo1, reason, errcode);
2124 break;
2127 case VMEXIT_CPUID:
2128 svm_vmexit_do_cpuid(vmcb, regs);
2129 break;
2131 case VMEXIT_HLT:
2132 svm_vmexit_do_hlt(vmcb, regs);
2133 break;
2135 case VMEXIT_INVLPG:
2136 svm_handle_invlpg(0, regs);
2137 break;
2139 case VMEXIT_INVLPGA:
2140 svm_handle_invlpg(1, regs);
2141 break;
2143 case VMEXIT_VMMCALL:
2144 inst_len = __get_instruction_length(v, INSTR_VMCALL, NULL);
2145 HVMTRACE_1D(VMMCALL, v, regs->eax);
2146 rc = hvm_do_hypercall(regs);
2147 if ( rc != HVM_HCALL_preempted )
2149 __update_guest_eip(regs, inst_len);
2150 if ( rc == HVM_HCALL_invalidate )
2151 send_invalidate_req();
2153 break;
2155 case VMEXIT_CR0_READ ... VMEXIT_CR15_READ:
2156 svm_cr_access(v, exit_reason - VMEXIT_CR0_READ,
2157 TYPE_MOV_FROM_CR, regs);
2158 break;
2160 case VMEXIT_CR0_WRITE ... VMEXIT_CR15_WRITE:
2161 svm_cr_access(v, exit_reason - VMEXIT_CR0_WRITE,
2162 TYPE_MOV_TO_CR, regs);
2163 break;
2165 case VMEXIT_DR0_READ ... VMEXIT_DR7_READ:
2166 case VMEXIT_DR0_WRITE ... VMEXIT_DR7_WRITE:
2167 svm_dr_access(v, regs);
2168 break;
2170 case VMEXIT_IOIO:
2171 svm_io_instruction(v);
2172 break;
2174 case VMEXIT_MSR:
2175 svm_do_msr_access(v, regs);
2176 break;
2178 case VMEXIT_SHUTDOWN:
2179 hvm_triple_fault();
2180 break;
2182 case VMEXIT_RDTSCP:
2183 case VMEXIT_MONITOR:
2184 case VMEXIT_MWAIT:
2185 case VMEXIT_VMRUN:
2186 case VMEXIT_VMLOAD:
2187 case VMEXIT_VMSAVE:
2188 case VMEXIT_STGI:
2189 case VMEXIT_CLGI:
2190 case VMEXIT_SKINIT:
2191 svm_inject_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE, 0);
2192 break;
2194 case VMEXIT_NPF:
2195 perfc_incra(svmexits, VMEXIT_NPF_PERFC);
2196 regs->error_code = vmcb->exitinfo1;
2197 svm_do_nested_pgfault(vmcb->exitinfo2, regs);
2198 break;
2200 default:
2201 exit_and_crash:
2202 gdprintk(XENLOG_ERR, "unexpected VMEXIT: exit reason = 0x%x, "
2203 "exitinfo1 = %"PRIx64", exitinfo2 = %"PRIx64"\n",
2204 exit_reason,
2205 (u64)vmcb->exitinfo1, (u64)vmcb->exitinfo2);
2206 domain_crash(v->domain);
2207 break;
2210 /* The exit may have updated the TPR: reflect this in the hardware vtpr */
2211 vmcb->vintr.fields.tpr =
2212 (vlapic_get_reg(vcpu_vlapic(v), APIC_TASKPRI) & 0xFF) >> 4;
2215 asmlinkage void svm_trace_vmentry(void)
2217 struct vcpu *v = current;
2219 /* This is the last C code before the VMRUN instruction. */
2220 hvmtrace_vmentry(v);
2223 /*
2224 * Local variables:
2225 * mode: C
2226 * c-set-style: "BSD"
2227 * c-basic-offset: 4
2228 * tab-width: 4
2229 * indent-tabs-mode: nil
2230 * End:
2231 */