kaf24@1710: /* kaf24@1710: * Hypercall and fault low-level handling routines. kaf24@1710: * kaf24@1710: * Copyright (c) 2002-2004, K A Fraser kaf24@1710: * Copyright (c) 1991, 1992 Linus Torvalds kaf24@1710: */ kaf24@1710: kaf24@1710: /* kaf24@1710: * The idea for callbacks to guest OSes kaf24@1710: * ==================================== kaf24@1710: * kaf24@1710: * First, we require that all callbacks (either via a supplied kaf24@1710: * interrupt-descriptor-table, or via the special event or failsafe callbacks kaf24@1710: * in the shared-info-structure) are to ring 1. This just makes life easier, kaf24@1710: * in that it means we don't have to do messy GDT/LDT lookups to find kaf24@1710: * out which the privilege-level of the return code-selector. That code kaf24@1710: * would just be a hassle to write, and would need to account for running kaf24@1710: * off the end of the GDT/LDT, for example. For all callbacks we check kaf24@1710: * that the provided kaf24@1710: * return CS is not == __HYPERVISOR_{CS,DS}. Apart from that we're safe as kaf24@1710: * don't allow a guest OS to install ring-0 privileges into the GDT/LDT. kaf24@1710: * It's up to the guest OS to ensure all returns via the IDT are to ring 1. kaf24@1710: * If not, we load incorrect SS/ESP values from the TSS (for ring 1 rather kaf24@1710: * than the correct ring) and bad things are bound to ensue -- IRET is kaf24@1710: * likely to fault, and we may end up killing the domain (no harm can kaf24@1710: * come to Xen, though). kaf24@1710: * kaf24@1710: * When doing a callback, we check if the return CS is in ring 0. If so, kaf24@1710: * callback is delayed until next return to ring != 0. kaf24@1710: * If return CS is in ring 1, then we create a callback frame kaf24@1710: * starting at return SS/ESP. The base of the frame does an intra-privilege kaf24@1710: * interrupt-return. kaf24@1710: * If return CS is in ring > 1, we create a callback frame starting kaf24@1710: * at SS/ESP taken from appropriate section of the current TSS. The base kaf24@1710: * of the frame does an inter-privilege interrupt-return. kaf24@1710: * kaf24@1710: * Note that the "failsafe callback" uses a special stackframe: kaf24@1710: * { return_DS, return_ES, return_FS, return_GS, return_EIP, kaf24@1710: * return_CS, return_EFLAGS[, return_ESP, return_SS] } kaf24@1710: * That is, original values for DS/ES/FS/GS are placed on stack rather than kaf24@1710: * in DS/ES/FS/GS themselves. Why? It saves us loading them, only to have them kaf24@1710: * saved/restored in guest OS. Furthermore, if we load them we may cause kaf24@1710: * a fault if they are invalid, which is a hassle to deal with. We avoid kaf24@1710: * that problem if we don't load them :-) This property allows us to use kaf24@1710: * the failsafe callback as a fallback: if we ever fault on loading DS/ES/FS/GS kaf24@1710: * on return to ring != 0, we can simply package it up as a return via kaf24@1710: * the failsafe callback, and let the guest OS sort it out (perhaps by kaf24@1710: * killing an application process). Note that we also do this for any kaf24@1710: * faulting IRET -- just let the guest OS handle it via the event kaf24@1710: * callback. kaf24@1710: * kaf24@1710: * We terminate a domain in the following cases: kaf24@1710: * - creating a callback stack frame (due to bad ring-1 stack). kaf24@1710: * - faulting IRET on entry to failsafe callback handler. kaf24@1710: * So, each domain must keep its ring-1 %ss/%esp and failsafe callback kaf24@1710: * handler in good order (absolutely no faults allowed!). kaf24@1710: */ kaf24@1710: kaf24@1710: #include kaf24@1710: #include kaf24@2085: #include kaf24@2954: #include kaf24@2827: #include kaf24@1710: kaf24@1710: #define GET_CURRENT(reg) \ kaf24@1710: movl $4096-4, reg; \ kaf24@1710: orl %esp, reg; \ kaf24@1710: andl $~3,reg; \ kaf24@1710: movl (reg),reg; kaf24@1710: kaf24@1710: ENTRY(continue_nonidle_task) kaf24@1710: GET_CURRENT(%ebx) kaf24@1710: jmp test_all_events kaf24@1710: kaf24@1710: ALIGN kaf24@1710: /* kaf24@1710: * HYPERVISOR_multicall(call_list, nr_calls) kaf24@1710: * Execute a list of 'nr_calls' hypercalls, pointed at by 'call_list'. kaf24@1710: * This is fairly easy except that: kaf24@1710: * 1. We may fault reading the call list, and must patch that up; and kaf24@1710: * 2. We cannot recursively call HYPERVISOR_multicall, or a malicious kaf24@1710: * caller could cause our stack to blow up. kaf24@1710: */ kaf24@2446: #define MULTICALL_ENTRY_ORDER 5 kaf24@1710: do_multicall: kaf24@1710: popl %eax kaf24@1710: cmpl $SYMBOL_NAME(multicall_return_from_call),%eax kaf24@1710: je multicall_return_from_call kaf24@1710: pushl %ebx kaf24@1710: movl 4(%esp),%ebx /* EBX == call_list */ kaf24@1710: movl 8(%esp),%ecx /* ECX == nr_calls */ kaf24@2446: /* Ensure the entire multicall list is below HYPERVISOR_VIRT_START. */ kaf24@2446: movl %ecx,%eax kaf24@2446: shll $MULTICALL_ENTRY_ORDER,%eax kaf24@2446: addl %ebx,%eax /* EAX == end of multicall list */ kaf24@2446: jc bad_multicall_address kaf24@2446: cmpl $__HYPERVISOR_VIRT_START,%eax kaf24@2446: jnc bad_multicall_address kaf24@1710: multicall_loop: kaf24@1710: pushl %ecx kaf24@1710: multicall_fault1: kaf24@1710: pushl 20(%ebx) # args[4] kaf24@1710: multicall_fault2: kaf24@1710: pushl 16(%ebx) # args[3] kaf24@1710: multicall_fault3: kaf24@1710: pushl 12(%ebx) # args[2] kaf24@1710: multicall_fault4: kaf24@1710: pushl 8(%ebx) # args[1] kaf24@1710: multicall_fault5: kaf24@1710: pushl 4(%ebx) # args[0] kaf24@1710: multicall_fault6: kaf24@1710: movl (%ebx),%eax # op kaf24@1710: andl $(NR_hypercalls-1),%eax kaf24@1710: call *SYMBOL_NAME(hypercall_table)(,%eax,4) kaf24@1710: multicall_return_from_call: kaf24@1710: multicall_fault7: kaf24@1710: movl %eax,24(%ebx) # args[5] == result kaf24@1710: addl $20,%esp kaf24@1710: popl %ecx kaf24@2446: addl $(1<esp1 */ kaf24@2954: FAULT6: movl 4(%eax),%gs /* tss->ss1 */ kaf24@1710: /* base of stack frame must contain ss/esp (inter-priv iret) */ kaf24@1710: subl $8,%esi kaf24@2954: movl XREGS_esp+4(%esp),%eax kaf24@2954: FAULT7: movl %eax,%gs:(%esi) kaf24@2954: movl XREGS_ss+4(%esp),%eax kaf24@2954: FAULT8: movl %eax,%gs:4(%esi) kaf24@1710: jmp 2f kaf24@1710: 1: /* obtain ss/esp from oldss/oldesp -- a ring-1 activation exists */ kaf24@2954: movl XREGS_esp+4(%esp),%esi kaf24@2954: FAULT9: movl XREGS_ss+4(%esp),%gs kaf24@1710: 2: /* Construct a stack frame: EFLAGS, CS/EIP */ kaf24@1710: subl $12,%esi kaf24@2954: movl XREGS_eip+4(%esp),%eax kaf24@2954: FAULT10:movl %eax,%gs:(%esi) kaf24@2954: movl XREGS_cs+4(%esp),%eax kaf24@2954: FAULT11:movl %eax,%gs:4(%esi) kaf24@2954: movl XREGS_eflags+4(%esp),%eax kaf24@2954: FAULT12:movl %eax,%gs:8(%esi) kaf24@1710: /* Rewrite our stack frame and return to ring 1. */ kaf24@1710: /* IA32 Ref. Vol. 3: TF, VM, RF and NT flags are cleared on trap. */ kaf24@1710: andl $0xfffcbeff,%eax kaf24@2954: movl %eax,XREGS_eflags+4(%esp) kaf24@2954: movl %gs,XREGS_ss+4(%esp) kaf24@2954: movl %esi,XREGS_esp+4(%esp) kaf24@2954: movzwl GTB_cs(%edx),%eax kaf24@2954: movl %eax,XREGS_cs+4(%esp) kaf24@2954: movl GTB_eip(%edx),%eax kaf24@2954: movl %eax,XREGS_eip+4(%esp) kaf24@1710: ret kaf24@2954: kaf24@1710: .section __ex_table,"a" kaf24@1710: .align 4 kaf24@1710: .long FAULT1, crash_domain_fixup3 # Fault writing to ring-1 stack kaf24@1710: .long FAULT2, crash_domain_fixup3 # Fault writing to ring-1 stack kaf24@1710: .long FAULT3, crash_domain_fixup3 # Fault writing to ring-1 stack kaf24@1710: .long FAULT4, crash_domain_fixup3 # Fault writing to ring-1 stack kaf24@1710: .long FAULT5, crash_domain_fixup1 # Fault executing failsafe iret kaf24@1710: .long FAULT6, crash_domain_fixup2 # Fault loading ring-1 stack selector kaf24@1710: .long FAULT7, crash_domain_fixup2 # Fault writing to ring-1 stack kaf24@1710: .long FAULT8, crash_domain_fixup2 # Fault writing to ring-1 stack kaf24@1710: .long FAULT9, crash_domain_fixup2 # Fault loading ring-1 stack selector kaf24@1710: .long FAULT10,crash_domain_fixup2 # Fault writing to ring-1 stack kaf24@1710: .long FAULT11,crash_domain_fixup2 # Fault writing to ring-1 stack kaf24@1710: .long FAULT12,crash_domain_fixup2 # Fault writing to ring-1 stack kaf24@1710: .long FAULT13,crash_domain_fixup3 # Fault writing to ring-1 stack kaf24@1710: .long FAULT14,crash_domain_fixup3 # Fault writing to ring-1 stack kaf24@1710: .previous kaf24@1710: kaf24@1710: # This handler kills domains which experience unrecoverable faults. kaf24@1710: .section .fixup,"ax" kaf24@1710: crash_domain_fixup1: kaf24@1710: subl $4,%esp kaf24@2955: SAVE_ALL(a) kaf24@2954: sti kaf24@1710: jmp domain_crash kaf24@1710: crash_domain_fixup2: kaf24@1710: addl $4,%esp kaf24@1710: crash_domain_fixup3: kaf24@1710: jmp domain_crash kaf24@1710: .previous kaf24@1710: kaf24@1710: ALIGN kaf24@1710: process_guest_exception_and_events: kaf24@2954: movl DOMAIN_processor(%ebx),%eax kaf24@1710: shl $4,%eax kaf24@1710: lea guest_trap_bounce(%eax),%edx kaf24@2954: testb $~0,GTB_flags(%edx) kaf24@1710: jz test_all_events kaf24@1710: call create_bounce_frame # just the basic frame kaf24@2954: mov GTB_flags(%edx),%cl kaf24@1710: test $GTBF_TRAP_NOCODE,%cl kaf24@1710: jnz 2f kaf24@1710: subl $4,%esi # push error_code onto guest frame kaf24@2954: movl GTB_error_code(%edx),%eax kaf24@2954: FAULT13:movl %eax,%gs:(%esi) kaf24@1710: test $GTBF_TRAP_CR2,%cl kaf24@1710: jz 1f kaf24@1710: subl $4,%esi # push %cr2 onto guest frame kaf24@2954: movl GTB_cr2(%edx),%eax kaf24@2954: FAULT14:movl %eax,%gs:(%esi) kaf24@2954: 1: movl %esi,XREGS_esp(%esp) kaf24@2954: 2: movb $0,GTB_flags(%edx) kaf24@1710: jmp test_all_events kaf24@1710: kaf24@1710: ALIGN kaf24@1710: ENTRY(ret_from_intr) kaf24@1710: GET_CURRENT(%ebx) kaf24@2954: movb XREGS_cs(%esp),%al kaf24@1710: testb $3,%al # return to non-supervisor? kaf24@1710: jne test_all_events kaf24@1710: jmp restore_all_xen kaf24@1710: kaf24@1710: ENTRY(divide_error) kaf24@1710: pushl $0 # no error code kaf24@1710: pushl $ SYMBOL_NAME(do_divide_error) kaf24@1710: ALIGN kaf24@1710: error_code: kaf24@2954: cld kaf24@2954: pushl %ebp kaf24@1710: pushl %edi kaf24@1710: pushl %esi kaf24@1710: pushl %edx kaf24@1710: pushl %ecx kaf24@1710: pushl %ebx kaf24@2954: movb XREGS_cs(%esp),%bl kaf24@2954: testb $3,%bl kaf24@2954: je 1f kaf24@2954: movl %ds,XREGS_ds(%esp) kaf24@2954: movl %es,XREGS_es(%esp) kaf24@2954: movl %fs,XREGS_fs(%esp) kaf24@2954: movl %gs,XREGS_gs(%esp) kaf24@2955: 1: SET_XEN_SEGMENTS(b) kaf24@2954: movl XREGS_orig_eax(%esp),%esi # get the error code kaf24@2954: movl XREGS_eax(%esp),%edi # get the function address kaf24@2954: movl %eax,XREGS_eax(%esp) kaf24@1710: movl %esp,%edx kaf24@1710: pushl %esi # push the error code ach61@2843: pushl %edx # push the xen_regs pointer kaf24@1710: GET_CURRENT(%ebx) kaf24@2954: call *%edi kaf24@1710: addl $8,%esp kaf24@2954: movb XREGS_cs(%esp),%al kaf24@1710: testb $3,%al kaf24@1710: je restore_all_xen kaf24@1710: jmp process_guest_exception_and_events kaf24@1710: kaf24@1710: ENTRY(coprocessor_error) kaf24@1710: pushl $0 kaf24@1710: pushl $ SYMBOL_NAME(do_coprocessor_error) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(simd_coprocessor_error) kaf24@1710: pushl $0 kaf24@1710: pushl $ SYMBOL_NAME(do_simd_coprocessor_error) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(device_not_available) kaf24@1710: pushl $0 kaf24@1710: pushl $SYMBOL_NAME(math_state_restore) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(debug) kaf24@1710: pushl $0 kaf24@1710: pushl $ SYMBOL_NAME(do_debug) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(int3) kaf24@1710: pushl $0 kaf24@1710: pushl $ SYMBOL_NAME(do_int3) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(overflow) kaf24@1710: pushl $0 kaf24@1710: pushl $ SYMBOL_NAME(do_overflow) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(bounds) kaf24@1710: pushl $0 kaf24@1710: pushl $ SYMBOL_NAME(do_bounds) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(invalid_op) kaf24@1710: pushl $0 kaf24@1710: pushl $ SYMBOL_NAME(do_invalid_op) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(coprocessor_segment_overrun) kaf24@1710: pushl $0 kaf24@1710: pushl $ SYMBOL_NAME(do_coprocessor_segment_overrun) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(invalid_TSS) kaf24@1710: pushl $ SYMBOL_NAME(do_invalid_TSS) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(segment_not_present) kaf24@1710: pushl $ SYMBOL_NAME(do_segment_not_present) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(stack_segment) kaf24@1710: pushl $ SYMBOL_NAME(do_stack_segment) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(general_protection) kaf24@1710: pushl $ SYMBOL_NAME(do_general_protection) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(alignment_check) kaf24@1710: pushl $ SYMBOL_NAME(do_alignment_check) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(page_fault) kaf24@1710: pushl $ SYMBOL_NAME(do_page_fault) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(machine_check) kaf24@1710: pushl $0 kaf24@1710: pushl $ SYMBOL_NAME(do_machine_check) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(spurious_interrupt_bug) kaf24@1710: pushl $0 kaf24@1710: pushl $ SYMBOL_NAME(do_spurious_interrupt_bug) kaf24@1710: jmp error_code kaf24@1710: kaf24@1710: ENTRY(nmi) kaf24@1710: # Save state but do not trash the segment registers! kaf24@1710: # We may otherwise be unable to reload them or copy them to ring 1. kaf24@1710: pushl %eax kaf24@2955: SAVE_ALL_NOSEGREGS(a) kaf24@1710: kaf24@2085: # Check for hardware problems. kaf24@1710: inb $0x61,%al kaf24@1710: testb $0x80,%al kaf24@2080: jne nmi_parity_err kaf24@1710: testb $0x40,%al kaf24@1710: jne nmi_io_err kaf24@1710: movl %eax,%ebx kaf24@1710: kaf24@1710: # Okay, its almost a normal NMI tick. We can only process it if: kaf24@1710: # A. We are the outermost Xen activation (in which case we have kaf24@1710: # the selectors safely saved on our stack) kaf24@1710: # B. DS-GS all contain sane Xen values. kaf24@1710: # In all other cases we bail without touching DS-GS, as we have kaf24@1710: # interrupted an enclosing Xen activation in tricky prologue or kaf24@1710: # epilogue code. kaf24@2954: movb XREGS_cs(%esp),%al kaf24@1710: testb $3,%al kaf24@1710: jne do_watchdog_tick kaf24@2954: movl XREGS_ds(%esp),%eax kaf24@1710: cmpw $(__HYPERVISOR_DS),%ax kaf24@2954: jne restore_all_xen kaf24@2954: movl XREGS_es(%esp),%eax kaf24@1710: cmpw $(__HYPERVISOR_DS),%ax kaf24@2954: jne restore_all_xen kaf24@2954: movl XREGS_fs(%esp),%eax kaf24@1710: cmpw $(__HYPERVISOR_DS),%ax kaf24@2954: jne restore_all_xen kaf24@2954: movl XREGS_gs(%esp),%eax kaf24@1710: cmpw $(__HYPERVISOR_DS),%ax kaf24@2954: jne restore_all_xen kaf24@1710: kaf24@1710: do_watchdog_tick: kaf24@1710: movl $(__HYPERVISOR_DS),%edx kaf24@1710: movl %edx,%ds kaf24@1710: movl %edx,%es kaf24@1710: movl %esp,%edx kaf24@1710: pushl %ebx # reason kaf24@1710: pushl %edx # regs kaf24@1710: call SYMBOL_NAME(do_nmi) kaf24@1710: addl $8,%esp kaf24@2954: movb XREGS_cs(%esp),%al kaf24@1710: testb $3,%al kaf24@1710: je restore_all_xen kaf24@1710: GET_CURRENT(%ebx) kaf24@1710: jmp restore_all_guest kaf24@1710: kaf24@2085: nmi_parity_err: kaf24@2085: # Clear and disable the parity-error line kaf24@2085: andb $0xf,%al kaf24@2085: orb $0x4,%al kaf24@2085: outb %al,$0x61 kaf24@2085: cmpb $'i',%ss:SYMBOL_NAME(opt_nmi) # nmi=ignore kaf24@2954: je restore_all_xen kaf24@2085: bts $0,%ss:SYMBOL_NAME(nmi_softirq_reason) kaf24@2085: bts $NMI_SOFTIRQ,%ss:SYMBOL_NAME(irq_stat) kaf24@2085: cmpb $'d',%ss:SYMBOL_NAME(opt_nmi) # nmi=dom0 kaf24@2954: je restore_all_xen kaf24@2085: movl $(__HYPERVISOR_DS),%edx # nmi=fatal kaf24@1710: movl %edx,%ds kaf24@1710: movl %edx,%es kaf24@2079: movl %esp,%edx kaf24@2079: push %edx kaf24@2079: call SYMBOL_NAME(mem_parity_error) kaf24@2085: addl $4,%esp kaf24@2085: jmp ret_from_intr kaf24@2085: kaf24@1710: nmi_io_err: kaf24@2085: # Clear and disable the I/O-error line kaf24@2085: andb $0xf,%al kaf24@2085: orb $0x8,%al kaf24@2085: outb %al,$0x61 kaf24@2085: cmpb $'i',%ss:SYMBOL_NAME(opt_nmi) # nmi=ignore kaf24@2954: je restore_all_xen kaf24@2085: bts $1,%ss:SYMBOL_NAME(nmi_softirq_reason) kaf24@2085: bts $NMI_SOFTIRQ,%ss:SYMBOL_NAME(irq_stat) kaf24@2085: cmpb $'d',%ss:SYMBOL_NAME(opt_nmi) # nmi=dom0 kaf24@2954: je restore_all_xen kaf24@2085: movl $(__HYPERVISOR_DS),%edx # nmi=fatal kaf24@1710: movl %edx,%ds kaf24@1710: movl %edx,%es kaf24@2079: movl %esp,%edx kaf24@2079: push %edx kaf24@2079: call SYMBOL_NAME(io_check_error) kaf24@2085: addl $4,%esp kaf24@2085: jmp ret_from_intr kaf24@2079: kaf24@1710: .data kaf24@1710: ENTRY(hypercall_table) kaf24@1710: .long SYMBOL_NAME(do_set_trap_table) /* 0 */ kaf24@1710: .long SYMBOL_NAME(do_mmu_update) kaf24@1710: .long SYMBOL_NAME(do_set_gdt) kaf24@1710: .long SYMBOL_NAME(do_stack_switch) kaf24@1710: .long SYMBOL_NAME(do_set_callbacks) kaf24@1710: .long SYMBOL_NAME(do_fpu_taskswitch) /* 5 */ kaf24@1710: .long SYMBOL_NAME(do_sched_op) kaf24@1710: .long SYMBOL_NAME(do_dom0_op) kaf24@1710: .long SYMBOL_NAME(do_set_debugreg) kaf24@1710: .long SYMBOL_NAME(do_get_debugreg) kaf24@1710: .long SYMBOL_NAME(do_update_descriptor) /* 10 */ kaf24@1710: .long SYMBOL_NAME(do_set_fast_trap) kaf24@1710: .long SYMBOL_NAME(do_dom_mem_op) kaf24@1710: .long SYMBOL_NAME(do_multicall) kaf24@1710: .long SYMBOL_NAME(do_update_va_mapping) kaf24@1710: .long SYMBOL_NAME(do_set_timer_op) /* 15 */ kaf24@1710: .long SYMBOL_NAME(do_event_channel_op) kaf24@1710: .long SYMBOL_NAME(do_xen_version) kaf24@1710: .long SYMBOL_NAME(do_console_io) kaf24@1710: .long SYMBOL_NAME(do_physdev_op) kaf24@2375: .long SYMBOL_NAME(do_grant_table_op) /* 20 */ kaf24@2111: .long SYMBOL_NAME(do_vm_assist) kaf24@2375: .long SYMBOL_NAME(do_update_va_mapping_otherdomain) kaf24@1710: .rept NR_hypercalls-((.-hypercall_table)/4) kaf24@1710: .long SYMBOL_NAME(do_ni_hypercall) kaf24@1710: .endr