debuggers.hg

view xen/arch/x86/traps.c @ 3635:ed902e5c4b49

bitkeeper revision 1.1159.212.62 (41fff40aESe4aWS82z_rLHeonXpxuQ)

More x86/64 stuff.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Tue Feb 01 21:26:34 2005 +0000 (2005-02-01)
parents 578b6c14e635
children 9a9c5a491401 e6af5d8f8b39 fd1dd0663b09
line source
1 /******************************************************************************
2 * arch/x86/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <asm/shadow.h>
43 #include <asm/domain_page.h>
44 #include <asm/system.h>
45 #include <asm/io.h>
46 #include <asm/atomic.h>
47 #include <asm/desc.h>
48 #include <asm/debugreg.h>
49 #include <asm/smp.h>
50 #include <asm/flushtlb.h>
51 #include <asm/uaccess.h>
52 #include <asm/i387.h>
53 #include <asm/debugger.h>
54 #include <asm/msr.h>
56 /*
57 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
58 * fatal: Xen prints diagnostic message and then hangs.
59 * dom0: The NMI is virtualised to DOM0.
60 * ignore: The NMI error is cleared and ignored.
61 */
62 #ifdef NDEBUG
63 char opt_nmi[10] = "dom0";
64 #else
65 char opt_nmi[10] = "fatal";
66 #endif
67 string_param("nmi", opt_nmi);
69 #define GUEST_FAULT(_r) (likely(VM86_MODE(_r) || !RING_0(_r)))
71 #define DOUBLEFAULT_STACK_SIZE 1024
72 static struct tss_struct doublefault_tss;
73 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
75 asmlinkage int hypercall(void);
77 /* Master table, and the one used by CPU0. */
78 idt_entry_t idt_table[IDT_ENTRIES] = { {0, 0}, };
79 /* All other CPUs have their own copy. */
80 idt_entry_t *idt_tables[NR_CPUS] = { 0 };
82 asmlinkage void divide_error(void);
83 asmlinkage void debug(void);
84 asmlinkage void nmi(void);
85 asmlinkage void int3(void);
86 asmlinkage void overflow(void);
87 asmlinkage void bounds(void);
88 asmlinkage void invalid_op(void);
89 asmlinkage void device_not_available(void);
90 asmlinkage void coprocessor_segment_overrun(void);
91 asmlinkage void invalid_TSS(void);
92 asmlinkage void segment_not_present(void);
93 asmlinkage void stack_segment(void);
94 asmlinkage void general_protection(void);
95 asmlinkage void page_fault(void);
96 asmlinkage void coprocessor_error(void);
97 asmlinkage void simd_coprocessor_error(void);
98 asmlinkage void alignment_check(void);
99 asmlinkage void spurious_interrupt_bug(void);
100 asmlinkage void machine_check(void);
102 int kstack_depth_to_print = 8*20;
104 static inline int kernel_text_address(unsigned long addr)
105 {
106 if (addr >= (unsigned long) &_stext &&
107 addr <= (unsigned long) &_etext)
108 return 1;
109 return 0;
111 }
113 void show_guest_stack(void)
114 {
115 int i;
116 execution_context_t *ec = get_execution_context();
117 unsigned long *stack = (unsigned long *)ec->esp;
118 printk("Guest EIP is %lx\n",ec->eip);
120 for ( i = 0; i < kstack_depth_to_print; i++ )
121 {
122 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
123 break;
124 if ( i && ((i % 8) == 0) )
125 printk("\n ");
126 printk("%08lx ", *stack++);
127 }
128 printk("\n");
130 }
132 void show_trace(unsigned long *esp)
133 {
134 unsigned long *stack, addr;
135 int i;
137 printk("Call Trace from ESP=%p: ", esp);
138 stack = esp;
139 i = 0;
140 while (((long) stack & (STACK_SIZE-1)) != 0) {
141 addr = *stack++;
142 if (kernel_text_address(addr)) {
143 if (i && ((i % 6) == 0))
144 printk("\n ");
145 printk("[<%08lx>] ", addr);
146 i++;
147 }
148 }
149 printk("\n");
150 }
152 void show_stack(unsigned long *esp)
153 {
154 unsigned long *stack;
155 int i;
157 printk("Stack trace from ESP=%p:\n", esp);
159 stack = esp;
160 for ( i = 0; i < kstack_depth_to_print; i++ )
161 {
162 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
163 break;
164 if ( i && ((i % 8) == 0) )
165 printk("\n ");
166 if ( kernel_text_address(*stack) )
167 printk("[%08lx] ", *stack++);
168 else
169 printk("%08lx ", *stack++);
170 }
171 printk("\n");
173 show_trace( esp );
174 }
176 void show_registers(struct xen_regs *regs)
177 {
178 unsigned long esp;
179 unsigned short ss, ds, es, fs, gs;
181 if ( GUEST_FAULT(regs) )
182 {
183 esp = regs->esp;
184 ss = regs->ss & 0xffff;
185 ds = regs->ds & 0xffff;
186 es = regs->es & 0xffff;
187 fs = regs->fs & 0xffff;
188 gs = regs->gs & 0xffff;
189 }
190 else
191 {
192 esp = (unsigned long)(&regs->esp);
193 ss = __HYPERVISOR_DS;
194 ds = __HYPERVISOR_DS;
195 es = __HYPERVISOR_DS;
196 fs = __HYPERVISOR_DS;
197 gs = __HYPERVISOR_DS;
198 }
200 printk("CPU: %d\nEIP: %04lx:[<%08lx>] \nEFLAGS: %08lx\n",
201 smp_processor_id(), 0xffff & regs->cs, regs->eip, regs->eflags);
202 printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
203 regs->eax, regs->ebx, regs->ecx, regs->edx);
204 printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
205 regs->esi, regs->edi, regs->ebp, esp);
206 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
207 ds, es, fs, gs, ss);
209 show_stack((unsigned long *)&regs->esp);
210 }
212 /*
213 * This is called for faults at very unexpected times (e.g., when interrupts
214 * are disabled). In such situations we can't do much that is safe. We try to
215 * print out some tracing and then we just spin.
216 */
217 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
218 {
219 int cpu = smp_processor_id();
220 unsigned long cr2;
221 static char *trapstr[] = {
222 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
223 "invalid operation", "device not available", "double fault",
224 "coprocessor segment", "invalid tss", "segment not found",
225 "stack error", "general protection fault", "page fault",
226 "spurious interrupt", "coprocessor error", "alignment check",
227 "machine check", "simd error"
228 };
230 show_registers(regs);
232 if ( trapnr == TRAP_page_fault )
233 {
234 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
235 printk("Faulting linear address might be %08lx\n", cr2);
236 }
238 printk("************************************\n");
239 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
240 cpu, trapnr, trapstr[trapnr], regs->error_code,
241 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
242 printk("System shutting down -- need manual reset.\n");
243 printk("************************************\n");
245 /* Lock up the console to prevent spurious output from other CPUs. */
246 console_force_lock();
248 /* Wait for manual reset. */
249 for ( ; ; )
250 __asm__ __volatile__ ( "hlt" );
251 }
253 static inline int do_trap(int trapnr, char *str,
254 struct xen_regs *regs,
255 int use_error_code)
256 {
257 struct exec_domain *ed = current;
258 struct trap_bounce *tb = &ed->thread.trap_bounce;
259 trap_info_t *ti;
260 unsigned long fixup;
262 DEBUGGER_trap_entry(trapnr, regs);
264 if ( !GUEST_FAULT(regs) )
265 goto xen_fault;
267 ti = current->thread.traps + trapnr;
268 tb->flags = TBF_EXCEPTION;
269 tb->cs = ti->cs;
270 tb->eip = ti->address;
271 if ( use_error_code )
272 {
273 tb->flags |= TBF_EXCEPTION_ERRCODE;
274 tb->error_code = regs->error_code;
275 }
276 if ( TI_GET_IF(ti) )
277 ed->vcpu_info->evtchn_upcall_mask = 1;
278 return 0;
280 xen_fault:
282 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
283 {
284 DPRINTK("Trap %d: %08lx -> %08lx\n", trapnr, regs->eip, fixup);
285 regs->eip = fixup;
286 return 0;
287 }
289 DEBUGGER_trap_fatal(trapnr, regs);
291 show_registers(regs);
292 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
293 "[error_code=%04x]\n",
294 smp_processor_id(), trapnr, str, regs->error_code);
295 return 0;
296 }
298 #define DO_ERROR_NOCODE(trapnr, str, name) \
299 asmlinkage int do_##name(struct xen_regs *regs) \
300 { \
301 return do_trap(trapnr, str, regs, 0); \
302 }
304 #define DO_ERROR(trapnr, str, name) \
305 asmlinkage int do_##name(struct xen_regs *regs) \
306 { \
307 return do_trap(trapnr, str, regs, 1); \
308 }
310 DO_ERROR_NOCODE( 0, "divide error", divide_error)
311 DO_ERROR_NOCODE( 4, "overflow", overflow)
312 DO_ERROR_NOCODE( 5, "bounds", bounds)
313 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
314 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
315 DO_ERROR(10, "invalid TSS", invalid_TSS)
316 DO_ERROR(11, "segment not present", segment_not_present)
317 DO_ERROR(12, "stack segment", stack_segment)
318 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
319 DO_ERROR(17, "alignment check", alignment_check)
320 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
322 asmlinkage int do_int3(struct xen_regs *regs)
323 {
324 struct exec_domain *ed = current;
325 struct trap_bounce *tb = &ed->thread.trap_bounce;
326 trap_info_t *ti;
328 DEBUGGER_trap_entry(TRAP_int3, regs);
330 if ( !GUEST_FAULT(regs) )
331 {
332 DEBUGGER_trap_fatal(TRAP_int3, regs);
333 show_registers(regs);
334 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
335 }
337 ti = current->thread.traps + 3;
338 tb->flags = TBF_EXCEPTION;
339 tb->cs = ti->cs;
340 tb->eip = ti->address;
341 if ( TI_GET_IF(ti) )
342 ed->vcpu_info->evtchn_upcall_mask = 1;
344 return 0;
345 }
347 asmlinkage void do_double_fault(void)
348 {
349 struct tss_struct *tss = &doublefault_tss;
350 unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
352 /* Disable the NMI watchdog. It's useless now. */
353 watchdog_on = 0;
355 /* Find information saved during fault and dump it to the console. */
356 tss = &init_tss[cpu];
357 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
358 cpu, tss->cs, tss->eip, tss->eflags);
359 printk("CR3: %08x\n", tss->__cr3);
360 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
361 tss->eax, tss->ebx, tss->ecx, tss->edx);
362 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
363 tss->esi, tss->edi, tss->ebp, tss->esp);
364 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
365 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
366 printk("************************************\n");
367 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
368 printk("System needs manual reset.\n");
369 printk("************************************\n");
371 /* Lock up the console to prevent spurious output from other CPUs. */
372 console_force_lock();
374 /* Wait for manual reset. */
375 for ( ; ; )
376 __asm__ __volatile__ ( "hlt" );
377 }
379 asmlinkage void do_machine_check(struct xen_regs *regs)
380 {
381 fatal_trap(TRAP_machine_check, regs);
382 }
384 void propagate_page_fault(unsigned long addr, u16 error_code)
385 {
386 trap_info_t *ti;
387 struct exec_domain *ed = current;
388 struct trap_bounce *tb = &ed->thread.trap_bounce;
390 ti = ed->thread.traps + 14;
391 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
392 tb->cr2 = addr;
393 tb->error_code = error_code;
394 tb->cs = ti->cs;
395 tb->eip = ti->address;
396 if ( TI_GET_IF(ti) )
397 ed->vcpu_info->evtchn_upcall_mask = 1;
399 ed->mm.guest_cr2 = addr;
400 }
402 asmlinkage int do_page_fault(struct xen_regs *regs)
403 {
404 unsigned long off, addr, fixup;
405 struct exec_domain *ed = current;
406 struct domain *d = ed->domain;
407 extern int map_ldt_shadow_page(unsigned int);
408 int cpu = ed->processor;
409 int ret;
411 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
413 DEBUGGER_trap_entry(TRAP_page_fault, regs);
415 perfc_incrc(page_faults);
417 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
418 {
419 LOCK_BIGLOCK(d);
420 if ( unlikely(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) &&
421 unlikely((addr >> L2_PAGETABLE_SHIFT) ==
422 ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
423 {
424 ptwr_flush(PTWR_PT_ACTIVE);
425 UNLOCK_BIGLOCK(d);
426 return EXCRET_fault_fixed;
427 }
429 if ( (addr < PAGE_OFFSET) &&
430 ((regs->error_code & 3) == 3) && /* write-protection fault */
431 ptwr_do_page_fault(addr) )
432 {
433 if ( unlikely(ed->mm.shadow_mode) )
434 (void)shadow_fault(addr, regs->error_code);
435 UNLOCK_BIGLOCK(d);
436 return EXCRET_fault_fixed;
437 }
438 UNLOCK_BIGLOCK(d);
439 }
441 if ( unlikely(ed->mm.shadow_mode) &&
442 (addr < PAGE_OFFSET) && shadow_fault(addr, regs->error_code) )
443 return EXCRET_fault_fixed;
445 if ( unlikely(addr >= LDT_VIRT_START(ed)) &&
446 (addr < (LDT_VIRT_START(ed) + (ed->mm.ldt_ents*LDT_ENTRY_SIZE))) )
447 {
448 /*
449 * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
450 * send the fault up to the guest OS to be handled.
451 */
452 LOCK_BIGLOCK(d);
453 off = addr - LDT_VIRT_START(ed);
454 addr = ed->mm.ldt_base + off;
455 ret = map_ldt_shadow_page(off >> PAGE_SHIFT);
456 UNLOCK_BIGLOCK(d);
457 if ( likely(ret) )
458 return EXCRET_fault_fixed; /* successfully copied the mapping */
459 }
461 if ( !GUEST_FAULT(regs) )
462 goto xen_fault;
464 propagate_page_fault(addr, regs->error_code);
465 return 0;
467 xen_fault:
469 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
470 {
471 perfc_incrc(copy_user_faults);
472 if ( !ed->mm.shadow_mode )
473 DPRINTK("Page fault: %08lx -> %08lx\n", regs->eip, fixup);
474 regs->eip = fixup;
475 return 0;
476 }
478 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
480 if ( addr >= PAGE_OFFSET )
481 {
482 unsigned long page;
483 page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
484 printk("*pde = %08lx\n", page);
485 if ( page & _PAGE_PRESENT )
486 {
487 page &= PAGE_MASK;
488 page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
489 printk(" *pte = %08lx\n", page);
490 }
491 #ifdef MEMORY_GUARD
492 if ( !(regs->error_code & 1) )
493 printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
494 #endif
495 }
497 show_registers(regs);
498 panic("CPU%d FATAL PAGE FAULT\n"
499 "[error_code=%04x]\n"
500 "Faulting linear address might be %08lx\n",
501 smp_processor_id(), regs->error_code, addr);
502 return 0;
503 }
505 static int emulate_privileged_op(struct xen_regs *regs)
506 {
507 extern long do_fpu_taskswitch(void);
508 extern void *decode_reg(struct xen_regs *regs, u8 b);
510 struct exec_domain *ed = current;
511 unsigned long *reg, eip = regs->eip;
512 u8 opcode;
514 if ( get_user(opcode, (u8 *)eip) )
515 goto page_fault;
516 eip += 1;
517 if ( (opcode & 0xff) != 0x0f )
518 goto fail;
520 if ( get_user(opcode, (u8 *)eip) )
521 goto page_fault;
522 eip += 1;
524 switch ( opcode )
525 {
526 case 0x06: /* CLTS */
527 (void)do_fpu_taskswitch();
528 break;
530 case 0x09: /* WBINVD */
531 if ( !IS_CAPABLE_PHYSDEV(ed->domain) )
532 {
533 DPRINTK("Non-physdev domain attempted WBINVD.\n");
534 goto fail;
535 }
536 wbinvd();
537 break;
539 case 0x20: /* MOV CR?,<reg> */
540 if ( get_user(opcode, (u8 *)eip) )
541 goto page_fault;
542 eip += 1;
543 if ( (opcode & 0xc0) != 0xc0 )
544 goto fail;
545 reg = decode_reg(regs, opcode);
546 switch ( (opcode >> 3) & 7 )
547 {
548 case 0: /* Read CR0 */
549 *reg =
550 (read_cr0() & ~X86_CR0_TS) |
551 (test_bit(EDF_GUEST_STTS, &ed->ed_flags) ? X86_CR0_TS : 0);
552 break;
554 case 2: /* Read CR2 */
555 *reg = ed->mm.guest_cr2;
556 break;
558 case 3: /* Read CR3 */
559 *reg = pagetable_val(ed->mm.pagetable);
560 break;
562 default:
563 goto fail;
564 }
565 break;
567 case 0x22: /* MOV <reg>,CR? */
568 if ( get_user(opcode, (u8 *)eip) )
569 goto page_fault;
570 eip += 1;
571 if ( (opcode & 0xc0) != 0xc0 )
572 goto fail;
573 reg = decode_reg(regs, opcode);
574 switch ( (opcode >> 3) & 7 )
575 {
576 case 0: /* Write CR0 */
577 if ( *reg & X86_CR0_TS ) /* XXX ignore all but TS bit */
578 (void)do_fpu_taskswitch;
579 break;
581 case 2: /* Write CR2 */
582 ed->mm.guest_cr2 = *reg;
583 break;
585 case 3: /* Write CR3 */
586 LOCK_BIGLOCK(ed->domain);
587 (void)new_guest_cr3(*reg);
588 UNLOCK_BIGLOCK(ed->domain);
589 break;
591 default:
592 goto fail;
593 }
594 break;
596 case 0x30: /* WRMSR */
597 if ( !IS_PRIV(ed->domain) )
598 {
599 DPRINTK("Non-priv domain attempted WRMSR.\n");
600 goto fail;
601 }
602 wrmsr(regs->ecx, regs->eax, regs->edx);
603 break;
605 case 0x32: /* RDMSR */
606 if ( !IS_PRIV(ed->domain) )
607 {
608 DPRINTK("Non-priv domain attempted RDMSR.\n");
609 goto fail;
610 }
611 rdmsr(regs->ecx, regs->eax, regs->edx);
612 break;
614 default:
615 goto fail;
616 }
618 regs->eip = eip;
619 return EXCRET_fault_fixed;
621 fail:
622 return 0;
624 page_fault:
625 propagate_page_fault(eip, 0);
626 return EXCRET_fault_fixed;
627 }
629 asmlinkage int do_general_protection(struct xen_regs *regs)
630 {
631 struct exec_domain *ed = current;
632 struct domain *d = ed->domain;
633 struct trap_bounce *tb = &ed->thread.trap_bounce;
634 trap_info_t *ti;
635 unsigned long fixup;
637 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
639 if ( regs->error_code & 1 )
640 goto hardware_gp;
642 if ( !GUEST_FAULT(regs) )
643 goto gp_in_kernel;
645 /*
646 * Cunning trick to allow arbitrary "INT n" handling.
647 *
648 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
649 * instruction from trapping to the appropriate vector, when that might not
650 * be expected by Xen or the guest OS. For example, that entry might be for
651 * a fault handler (unlike traps, faults don't increment EIP), or might
652 * expect an error code on the stack (which a software trap never
653 * provides), or might be a hardware interrupt handler that doesn't like
654 * being called spuriously.
655 *
656 * Instead, a GPF occurs with the faulting IDT vector in the error code.
657 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
658 * clear to indicate that it's a software fault, not hardware.
659 *
660 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
661 * okay because they can only be triggered by an explicit DPL-checked
662 * instruction. The DPL specified by the guest OS for these vectors is NOT
663 * CHECKED!!
664 */
665 if ( (regs->error_code & 3) == 2 )
666 {
667 /* This fault must be due to <INT n> instruction. */
668 ti = current->thread.traps + (regs->error_code>>3);
669 if ( TI_GET_DPL(ti) >= (VM86_MODE(regs) ? 3 : (regs->cs & 3)) )
670 {
671 tb->flags = TBF_EXCEPTION;
672 regs->eip += 2;
673 goto finish_propagation;
674 }
675 }
677 /* Emulate some simple privileged instructions when exec'ed in ring 1. */
678 if ( (regs->error_code == 0) &&
679 RING_1(regs) &&
680 emulate_privileged_op(regs) )
681 return 0;
683 #if defined(__i386__)
684 if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments) &&
685 (regs->error_code == 0) &&
686 gpf_emulate_4gb(regs) )
687 return 0;
688 #endif
690 /* Pass on GPF as is. */
691 ti = current->thread.traps + 13;
692 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
693 tb->error_code = regs->error_code;
694 finish_propagation:
695 tb->cs = ti->cs;
696 tb->eip = ti->address;
697 if ( TI_GET_IF(ti) )
698 ed->vcpu_info->evtchn_upcall_mask = 1;
699 return 0;
701 gp_in_kernel:
703 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
704 {
705 DPRINTK("GPF (%04x): %08lx -> %08lx\n",
706 regs->error_code, regs->eip, fixup);
707 regs->eip = fixup;
708 return 0;
709 }
711 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
713 hardware_gp:
714 show_registers(regs);
715 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
716 smp_processor_id(), regs->error_code);
717 return 0;
718 }
720 asmlinkage void mem_parity_error(struct xen_regs *regs)
721 {
722 console_force_unlock();
723 printk("\n\nNMI - MEMORY ERROR\n");
724 fatal_trap(TRAP_nmi, regs);
725 }
727 asmlinkage void io_check_error(struct xen_regs *regs)
728 {
729 console_force_unlock();
731 printk("\n\nNMI - I/O ERROR\n");
732 fatal_trap(TRAP_nmi, regs);
733 }
735 static void unknown_nmi_error(unsigned char reason, struct xen_regs * regs)
736 {
737 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
738 printk("Dazed and confused, but trying to continue\n");
739 printk("Do you have a strange power saving mode enabled?\n");
740 }
742 asmlinkage void do_nmi(struct xen_regs * regs, unsigned long reason)
743 {
744 ++nmi_count(smp_processor_id());
746 #if CONFIG_X86_LOCAL_APIC
747 if ( nmi_watchdog )
748 nmi_watchdog_tick(regs);
749 else
750 #endif
751 unknown_nmi_error((unsigned char)(reason&0xff), regs);
752 }
754 unsigned long nmi_softirq_reason;
755 static void nmi_softirq(void)
756 {
757 if ( dom0 == NULL )
758 return;
760 if ( test_and_clear_bit(0, &nmi_softirq_reason) )
761 send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR);
763 if ( test_and_clear_bit(1, &nmi_softirq_reason) )
764 send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR);
765 }
767 asmlinkage int math_state_restore(struct xen_regs *regs)
768 {
769 /* Prevent recursion. */
770 clts();
772 if ( !test_bit(EDF_USEDFPU, &current->ed_flags) )
773 {
774 if ( test_bit(EDF_DONEFPUINIT, &current->ed_flags) )
775 restore_fpu(current);
776 else
777 init_fpu();
778 set_bit(EDF_USEDFPU, &current->ed_flags); /* so we fnsave on switch_to() */
779 }
781 if ( test_and_clear_bit(EDF_GUEST_STTS, &current->ed_flags) )
782 {
783 struct trap_bounce *tb = &current->thread.trap_bounce;
784 tb->flags = TBF_EXCEPTION;
785 tb->cs = current->thread.traps[7].cs;
786 tb->eip = current->thread.traps[7].address;
787 }
789 return EXCRET_fault_fixed;
790 }
792 asmlinkage int do_debug(struct xen_regs *regs)
793 {
794 unsigned int condition;
795 struct exec_domain *d = current;
796 struct trap_bounce *tb = &d->thread.trap_bounce;
798 DEBUGGER_trap_entry(TRAP_debug, regs);
800 __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
802 /* Mask out spurious debug traps due to lazy DR7 setting */
803 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
804 (d->thread.debugreg[7] == 0) )
805 {
806 __asm__("movl %0,%%db7" : : "r" (0));
807 goto out;
808 }
810 if ( !GUEST_FAULT(regs) )
811 {
812 /* Clear TF just for absolute sanity. */
813 regs->eflags &= ~EF_TF;
814 /*
815 * We ignore watchpoints when they trigger within Xen. This may happen
816 * when a buffer is passed to us which previously had a watchpoint set
817 * on it. No need to bump EIP; the only faulting trap is an instruction
818 * breakpoint, which can't happen to us.
819 */
820 goto out;
821 }
823 /* Save debug status register where guest OS can peek at it */
824 d->thread.debugreg[6] = condition;
826 tb->flags = TBF_EXCEPTION;
827 tb->cs = d->thread.traps[1].cs;
828 tb->eip = d->thread.traps[1].address;
830 out:
831 return EXCRET_not_a_fault;
832 }
834 asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs)
835 {
836 return EXCRET_not_a_fault;
837 }
839 #define _set_gate(gate_addr,type,dpl,addr) \
840 do { \
841 int __d0, __d1; \
842 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
843 "movw %4,%%dx\n\t" \
844 "movl %%eax,%0\n\t" \
845 "movl %%edx,%1" \
846 :"=m" (*((long *) (gate_addr))), \
847 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
848 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
849 "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
850 } while (0)
852 void set_intr_gate(unsigned int n, void *addr)
853 {
854 _set_gate(idt_table+n,14,0,addr);
855 }
857 static void __init set_system_gate(unsigned int n, void *addr)
858 {
859 _set_gate(idt_table+n,14,3,addr);
860 }
862 static void set_task_gate(unsigned int n, unsigned int sel)
863 {
864 idt_table[n].a = sel << 16;
865 idt_table[n].b = 0x8500;
866 }
868 #define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
869 *((gate_addr)+1) = ((base) & 0xff000000) | \
870 (((base) & 0x00ff0000)>>16) | \
871 ((limit) & 0xf0000) | \
872 ((dpl)<<13) | \
873 (0x00408000) | \
874 ((type)<<8); \
875 *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
876 ((limit) & 0x0ffff); }
878 #define _set_tssldt_desc(n,addr,limit,type) \
879 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
880 "movw %%ax,2(%2)\n\t" \
881 "rorl $16,%%eax\n\t" \
882 "movb %%al,4(%2)\n\t" \
883 "movb %4,5(%2)\n\t" \
884 "movb $0,6(%2)\n\t" \
885 "movb %%ah,7(%2)\n\t" \
886 "rorl $16,%%eax" \
887 : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
889 void set_tss_desc(unsigned int n, void *addr)
890 {
891 _set_tssldt_desc(
892 gdt_table + __TSS(n),
893 (int)addr,
894 offsetof(struct tss_struct, __cacheline_filler) - 1,
895 0x89);
896 }
898 void __init trap_init(void)
899 {
900 /*
901 * Make a separate task for double faults. This will get us debug output if
902 * we blow the kernel stack.
903 */
904 struct tss_struct *tss = &doublefault_tss;
905 memset(tss, 0, sizeof(*tss));
906 tss->ds = __HYPERVISOR_DS;
907 tss->es = __HYPERVISOR_DS;
908 tss->ss = __HYPERVISOR_DS;
909 tss->esp = (unsigned long)
910 &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
911 tss->__cr3 = __pa(idle_pg_table);
912 tss->cs = __HYPERVISOR_CS;
913 tss->eip = (unsigned long)do_double_fault;
914 tss->eflags = 2;
915 tss->bitmap = IOBMP_INVALID_OFFSET;
916 _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
917 (int)tss, 235, 0x89);
919 /*
920 * Note that interrupt gates are always used, rather than trap gates. We
921 * must have interrupts disabled until DS/ES/FS/GS are saved because the
922 * first activation must have the "bad" value(s) for these registers and
923 * we may lose them if another activation is installed before they are
924 * saved. The page-fault handler also needs interrupts disabled until %cr2
925 * has been read and saved on the stack.
926 */
927 set_intr_gate(TRAP_divide_error,&divide_error);
928 set_intr_gate(TRAP_debug,&debug);
929 set_intr_gate(TRAP_nmi,&nmi);
930 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
931 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
932 set_intr_gate(TRAP_bounds,&bounds);
933 set_intr_gate(TRAP_invalid_op,&invalid_op);
934 set_intr_gate(TRAP_no_device,&device_not_available);
935 set_task_gate(TRAP_double_fault,__DOUBLEFAULT_TSS_ENTRY<<3);
936 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
937 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
938 set_intr_gate(TRAP_no_segment,&segment_not_present);
939 set_intr_gate(TRAP_stack_error,&stack_segment);
940 set_intr_gate(TRAP_gp_fault,&general_protection);
941 set_intr_gate(TRAP_page_fault,&page_fault);
942 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
943 set_intr_gate(TRAP_copro_error,&coprocessor_error);
944 set_intr_gate(TRAP_alignment_check,&alignment_check);
945 set_intr_gate(TRAP_machine_check,&machine_check);
946 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
947 set_intr_gate(TRAP_deferred_nmi,&nmi);
949 /* Only ring 1 can access Xen services. */
950 _set_gate(idt_table+HYPERCALL_VECTOR,14,1,&hypercall);
952 /* CPU0 uses the master IDT. */
953 idt_tables[0] = idt_table;
955 /*
956 * Should be a barrier for any external CPU state.
957 */
958 {
959 extern void cpu_init(void);
960 cpu_init();
961 }
963 open_softirq(NMI_SOFTIRQ, nmi_softirq);
964 }
967 long do_set_trap_table(trap_info_t *traps)
968 {
969 trap_info_t cur;
970 trap_info_t *dst = current->thread.traps;
972 LOCK_BIGLOCK(current->domain);
974 for ( ; ; )
975 {
976 if ( hypercall_preempt_check() )
977 {
978 UNLOCK_BIGLOCK(current->domain);
979 return hypercall_create_continuation(
980 __HYPERVISOR_set_trap_table, 1, traps);
981 }
983 if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
985 if ( cur.address == 0 ) break;
987 if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
989 memcpy(dst+cur.vector, &cur, sizeof(cur));
990 traps++;
991 }
993 UNLOCK_BIGLOCK(current->domain);
995 return 0;
996 }
999 long do_set_callbacks(unsigned long event_selector,
1000 unsigned long event_address,
1001 unsigned long failsafe_selector,
1002 unsigned long failsafe_address)
1004 struct exec_domain *d = current;
1006 if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
1007 return -EPERM;
1009 d->thread.event_selector = event_selector;
1010 d->thread.event_address = event_address;
1011 d->thread.failsafe_selector = failsafe_selector;
1012 d->thread.failsafe_address = failsafe_address;
1014 return 0;
1018 long set_fast_trap(struct exec_domain *p, int idx)
1020 trap_info_t *ti;
1022 /* Index 0 is special: it disables fast traps. */
1023 if ( idx == 0 )
1025 if ( p == current )
1026 CLEAR_FAST_TRAP(&p->thread);
1027 SET_DEFAULT_FAST_TRAP(&p->thread);
1028 return 0;
1031 /*
1032 * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
1033 * The former range is used by Windows and MS-DOS.
1034 * Vector 0x80 is used by Linux and the BSD variants.
1035 */
1036 if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
1037 return -1;
1039 ti = p->thread.traps + idx;
1041 /*
1042 * We can't virtualise interrupt gates, as there's no way to get
1043 * the CPU to automatically clear the events_mask variable.
1044 */
1045 if ( TI_GET_IF(ti) )
1046 return -1;
1048 if ( p == current )
1049 CLEAR_FAST_TRAP(&p->thread);
1051 p->thread.fast_trap_idx = idx;
1052 p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
1053 p->thread.fast_trap_desc.b =
1054 (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
1056 if ( p == current )
1057 SET_FAST_TRAP(&p->thread);
1059 return 0;
1063 long do_set_fast_trap(int idx)
1065 return set_fast_trap(current, idx);
1069 long do_fpu_taskswitch(void)
1071 set_bit(EDF_GUEST_STTS, &current->ed_flags);
1072 stts();
1073 return 0;
1077 long set_debugreg(struct exec_domain *p, int reg, unsigned long value)
1079 int i;
1081 switch ( reg )
1083 case 0:
1084 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
1085 if ( p == current )
1086 __asm__ ( "movl %0, %%db0" : : "r" (value) );
1087 break;
1088 case 1:
1089 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
1090 if ( p == current )
1091 __asm__ ( "movl %0, %%db1" : : "r" (value) );
1092 break;
1093 case 2:
1094 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
1095 if ( p == current )
1096 __asm__ ( "movl %0, %%db2" : : "r" (value) );
1097 break;
1098 case 3:
1099 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
1100 if ( p == current )
1101 __asm__ ( "movl %0, %%db3" : : "r" (value) );
1102 break;
1103 case 6:
1104 /*
1105 * DR6: Bits 4-11,16-31 reserved (set to 1).
1106 * Bit 12 reserved (set to 0).
1107 */
1108 value &= 0xffffefff; /* reserved bits => 0 */
1109 value |= 0xffff0ff0; /* reserved bits => 1 */
1110 if ( p == current )
1111 __asm__ ( "movl %0, %%db6" : : "r" (value) );
1112 break;
1113 case 7:
1114 /*
1115 * DR7: Bit 10 reserved (set to 1).
1116 * Bits 11-12,14-15 reserved (set to 0).
1117 * Privileged bits:
1118 * GD (bit 13): must be 0.
1119 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
1120 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
1121 */
1122 /* DR7 == 0 => debugging disabled for this domain. */
1123 if ( value != 0 )
1125 value &= 0xffff27ff; /* reserved bits => 0 */
1126 value |= 0x00000400; /* reserved bits => 1 */
1127 if ( (value & (1<<13)) != 0 ) return -EPERM;
1128 for ( i = 0; i < 16; i += 2 )
1129 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
1131 if ( p == current )
1132 __asm__ ( "movl %0, %%db7" : : "r" (value) );
1133 break;
1134 default:
1135 return -EINVAL;
1138 p->thread.debugreg[reg] = value;
1139 return 0;
1142 long do_set_debugreg(int reg, unsigned long value)
1144 return set_debugreg(current, reg, value);
1147 unsigned long do_get_debugreg(int reg)
1149 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
1150 return current->thread.debugreg[reg];