debuggers.hg

view xen/arch/x86/traps.c @ 3634:578b6c14e635

bitkeeper revision 1.1159.212.61 (41ff4ae9QGwwPUv_OONjfk2SaSj0dw)

Merge scramble.cl.cam.ac.uk:/local/scratch/kaf24/xen-2.0-testing.bk
into scramble.cl.cam.ac.uk:/local/scratch/kaf24/xen-unstable.bk
author kaf24@scramble.cl.cam.ac.uk
date Tue Feb 01 09:24:57 2005 +0000 (2005-02-01)
parents 2c56c6b39a48 a5f1a6abfc46
children ed902e5c4b49
line source
1 /******************************************************************************
2 * arch/i386/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <asm/shadow.h>
43 #include <asm/domain_page.h>
44 #include <asm/system.h>
45 #include <asm/io.h>
46 #include <asm/atomic.h>
47 #include <asm/desc.h>
48 #include <asm/debugreg.h>
49 #include <asm/smp.h>
50 #include <asm/flushtlb.h>
51 #include <asm/uaccess.h>
52 #include <asm/i387.h>
53 #include <asm/debugger.h>
54 #include <asm/msr.h>
56 /*
57 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
58 * fatal: Xen prints diagnostic message and then hangs.
59 * dom0: The NMI is virtualised to DOM0.
60 * ignore: The NMI error is cleared and ignored.
61 */
62 #ifdef NDEBUG
63 char opt_nmi[10] = "dom0";
64 #else
65 char opt_nmi[10] = "fatal";
66 #endif
67 string_param("nmi", opt_nmi);
69 #if defined(__i386__)
71 #define GUEST_FAULT(_r) (likely(VM86_MODE(_r) || !RING_0(_r)))
73 #define DOUBLEFAULT_STACK_SIZE 1024
74 static struct tss_struct doublefault_tss;
75 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
77 asmlinkage int hypercall(void);
79 /* Master table, and the one used by CPU0. */
80 struct desc_struct idt_table[256] = { {0, 0}, };
81 /* All other CPUs have their own copy. */
82 struct desc_struct *idt_tables[NR_CPUS] = { 0 };
84 asmlinkage void divide_error(void);
85 asmlinkage void debug(void);
86 asmlinkage void nmi(void);
87 asmlinkage void int3(void);
88 asmlinkage void overflow(void);
89 asmlinkage void bounds(void);
90 asmlinkage void invalid_op(void);
91 asmlinkage void device_not_available(void);
92 asmlinkage void coprocessor_segment_overrun(void);
93 asmlinkage void invalid_TSS(void);
94 asmlinkage void segment_not_present(void);
95 asmlinkage void stack_segment(void);
96 asmlinkage void general_protection(void);
97 asmlinkage void page_fault(void);
98 asmlinkage void coprocessor_error(void);
99 asmlinkage void simd_coprocessor_error(void);
100 asmlinkage void alignment_check(void);
101 asmlinkage void spurious_interrupt_bug(void);
102 asmlinkage void machine_check(void);
104 int kstack_depth_to_print = 8*20;
106 static inline int kernel_text_address(unsigned long addr)
107 {
108 if (addr >= (unsigned long) &_stext &&
109 addr <= (unsigned long) &_etext)
110 return 1;
111 return 0;
113 }
115 void show_guest_stack()
116 {
117 int i;
118 execution_context_t *ec = get_execution_context();
119 unsigned long *stack = (unsigned long *)ec->esp;
120 printk("Guest EIP is %lx\n",ec->eip);
122 for ( i = 0; i < kstack_depth_to_print; i++ )
123 {
124 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
125 break;
126 if ( i && ((i % 8) == 0) )
127 printk("\n ");
128 printk("%08lx ", *stack++);
129 }
130 printk("\n");
132 }
134 void show_trace(unsigned long *esp)
135 {
136 unsigned long *stack, addr;
137 int i;
139 printk("Call Trace from ESP=%p: ", esp);
140 stack = esp;
141 i = 0;
142 while (((long) stack & (STACK_SIZE-1)) != 0) {
143 addr = *stack++;
144 if (kernel_text_address(addr)) {
145 if (i && ((i % 6) == 0))
146 printk("\n ");
147 printk("[<%08lx>] ", addr);
148 i++;
149 }
150 }
151 printk("\n");
152 }
154 void show_stack(unsigned long *esp)
155 {
156 unsigned long *stack;
157 int i;
159 printk("Stack trace from ESP=%p:\n", esp);
161 stack = esp;
162 for ( i = 0; i < kstack_depth_to_print; i++ )
163 {
164 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
165 break;
166 if ( i && ((i % 8) == 0) )
167 printk("\n ");
168 if ( kernel_text_address(*stack) )
169 printk("[%08lx] ", *stack++);
170 else
171 printk("%08lx ", *stack++);
172 }
173 printk("\n");
175 show_trace( esp );
176 }
178 void show_registers(struct xen_regs *regs)
179 {
180 unsigned long esp;
181 unsigned short ss, ds, es, fs, gs;
183 if ( GUEST_FAULT(regs) )
184 {
185 esp = regs->esp;
186 ss = regs->ss & 0xffff;
187 ds = regs->ds & 0xffff;
188 es = regs->es & 0xffff;
189 fs = regs->fs & 0xffff;
190 gs = regs->gs & 0xffff;
191 }
192 else
193 {
194 esp = (unsigned long)(&regs->esp);
195 ss = __HYPERVISOR_DS;
196 ds = __HYPERVISOR_DS;
197 es = __HYPERVISOR_DS;
198 fs = __HYPERVISOR_DS;
199 gs = __HYPERVISOR_DS;
200 }
202 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
203 smp_processor_id(), 0xffff & regs->cs, regs->eip, regs->eflags);
204 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
205 regs->eax, regs->ebx, regs->ecx, regs->edx);
206 printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n",
207 regs->esi, regs->edi, regs->ebp, esp);
208 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
209 ds, es, fs, gs, ss);
211 show_stack((unsigned long *)&regs->esp);
212 }
214 /*
215 * This is called for faults at very unexpected times (e.g., when interrupts
216 * are disabled). In such situations we can't do much that is safe. We try to
217 * print out some tracing and then we just spin.
218 */
219 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
220 {
221 int cpu = smp_processor_id();
222 unsigned long cr2;
223 static char *trapstr[] = {
224 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
225 "invalid operation", "device not available", "double fault",
226 "coprocessor segment", "invalid tss", "segment not found",
227 "stack error", "general protection fault", "page fault",
228 "spurious interrupt", "coprocessor error", "alignment check",
229 "machine check", "simd error"
230 };
232 show_registers(regs);
234 if ( trapnr == TRAP_page_fault )
235 {
236 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
237 printk("Faulting linear address might be %08lx\n", cr2);
238 }
240 printk("************************************\n");
241 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
242 cpu, trapnr, trapstr[trapnr], regs->error_code,
243 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
244 printk("System shutting down -- need manual reset.\n");
245 printk("************************************\n");
247 /* Lock up the console to prevent spurious output from other CPUs. */
248 console_force_lock();
250 /* Wait for manual reset. */
251 for ( ; ; )
252 __asm__ __volatile__ ( "hlt" );
253 }
255 static inline int do_trap(int trapnr, char *str,
256 struct xen_regs *regs,
257 int use_error_code)
258 {
259 struct exec_domain *ed = current;
260 struct trap_bounce *tb = &ed->thread.trap_bounce;
261 trap_info_t *ti;
262 unsigned long fixup;
264 DEBUGGER_trap_entry(trapnr, regs);
266 if ( !GUEST_FAULT(regs) )
267 goto xen_fault;
269 ti = current->thread.traps + trapnr;
270 tb->flags = TBF_EXCEPTION;
271 tb->cs = ti->cs;
272 tb->eip = ti->address;
273 if ( use_error_code )
274 {
275 tb->flags |= TBF_EXCEPTION_ERRCODE;
276 tb->error_code = regs->error_code;
277 }
278 if ( TI_GET_IF(ti) )
279 ed->vcpu_info->evtchn_upcall_mask = 1;
280 return 0;
282 xen_fault:
284 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
285 {
286 DPRINTK("Trap %d: %08x -> %08lx\n", trapnr, regs->eip, fixup);
287 regs->eip = fixup;
288 return 0;
289 }
291 DEBUGGER_trap_fatal(trapnr, regs);
293 show_registers(regs);
294 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
295 "[error_code=%04x]\n",
296 smp_processor_id(), trapnr, str, regs->error_code);
297 return 0;
298 }
300 #define DO_ERROR_NOCODE(trapnr, str, name) \
301 asmlinkage int do_##name(struct xen_regs *regs) \
302 { \
303 return do_trap(trapnr, str, regs, 0); \
304 }
306 #define DO_ERROR(trapnr, str, name) \
307 asmlinkage int do_##name(struct xen_regs *regs) \
308 { \
309 return do_trap(trapnr, str, regs, 1); \
310 }
312 DO_ERROR_NOCODE( 0, "divide error", divide_error)
313 DO_ERROR_NOCODE( 4, "overflow", overflow)
314 DO_ERROR_NOCODE( 5, "bounds", bounds)
315 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
316 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
317 DO_ERROR(10, "invalid TSS", invalid_TSS)
318 DO_ERROR(11, "segment not present", segment_not_present)
319 DO_ERROR(12, "stack segment", stack_segment)
320 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
321 DO_ERROR(17, "alignment check", alignment_check)
322 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
324 asmlinkage int do_int3(struct xen_regs *regs)
325 {
326 struct exec_domain *ed = current;
327 struct trap_bounce *tb = &ed->thread.trap_bounce;
328 trap_info_t *ti;
330 DEBUGGER_trap_entry(TRAP_int3, regs);
332 if ( !GUEST_FAULT(regs) )
333 {
334 DEBUGGER_trap_fatal(TRAP_int3, regs);
335 show_registers(regs);
336 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
337 }
339 ti = current->thread.traps + 3;
340 tb->flags = TBF_EXCEPTION;
341 tb->cs = ti->cs;
342 tb->eip = ti->address;
343 if ( TI_GET_IF(ti) )
344 ed->vcpu_info->evtchn_upcall_mask = 1;
346 return 0;
347 }
349 asmlinkage void do_double_fault(void)
350 {
351 struct tss_struct *tss = &doublefault_tss;
352 unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
354 /* Disable the NMI watchdog. It's useless now. */
355 watchdog_on = 0;
357 /* Find information saved during fault and dump it to the console. */
358 tss = &init_tss[cpu];
359 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
360 cpu, tss->cs, tss->eip, tss->eflags);
361 printk("CR3: %08x\n", tss->__cr3);
362 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
363 tss->eax, tss->ebx, tss->ecx, tss->edx);
364 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
365 tss->esi, tss->edi, tss->ebp, tss->esp);
366 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
367 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
368 printk("************************************\n");
369 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
370 printk("System needs manual reset.\n");
371 printk("************************************\n");
373 /* Lock up the console to prevent spurious output from other CPUs. */
374 console_force_lock();
376 /* Wait for manual reset. */
377 for ( ; ; )
378 __asm__ __volatile__ ( "hlt" );
379 }
381 asmlinkage void do_machine_check(struct xen_regs *regs)
382 {
383 fatal_trap(TRAP_machine_check, regs);
384 }
386 void propagate_page_fault(unsigned long addr, u16 error_code)
387 {
388 trap_info_t *ti;
389 struct exec_domain *ed = current;
390 struct trap_bounce *tb = &ed->thread.trap_bounce;
392 ti = ed->thread.traps + 14;
393 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
394 tb->cr2 = addr;
395 tb->error_code = error_code;
396 tb->cs = ti->cs;
397 tb->eip = ti->address;
398 if ( TI_GET_IF(ti) )
399 ed->vcpu_info->evtchn_upcall_mask = 1;
401 ed->mm.guest_cr2 = addr;
402 }
404 asmlinkage int do_page_fault(struct xen_regs *regs)
405 {
406 unsigned long off, addr, fixup;
407 struct exec_domain *ed = current;
408 struct domain *d = ed->domain;
409 extern int map_ldt_shadow_page(unsigned int);
410 int cpu = ed->processor;
411 int ret;
413 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
415 DEBUGGER_trap_entry(TRAP_page_fault, regs);
417 perfc_incrc(page_faults);
419 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
420 {
421 LOCK_BIGLOCK(d);
422 if ( unlikely(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) &&
423 unlikely((addr >> L2_PAGETABLE_SHIFT) ==
424 ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
425 {
426 ptwr_flush(PTWR_PT_ACTIVE);
427 UNLOCK_BIGLOCK(d);
428 return EXCRET_fault_fixed;
429 }
431 if ( (addr < PAGE_OFFSET) &&
432 ((regs->error_code & 3) == 3) && /* write-protection fault */
433 ptwr_do_page_fault(addr) )
434 {
435 if ( unlikely(ed->mm.shadow_mode) )
436 (void)shadow_fault(addr, regs->error_code);
437 UNLOCK_BIGLOCK(d);
438 return EXCRET_fault_fixed;
439 }
440 UNLOCK_BIGLOCK(d);
441 }
443 if ( unlikely(ed->mm.shadow_mode) &&
444 (addr < PAGE_OFFSET) && shadow_fault(addr, regs->error_code) )
445 return EXCRET_fault_fixed;
447 if ( unlikely(addr >= LDT_VIRT_START(ed)) &&
448 (addr < (LDT_VIRT_START(ed) + (ed->mm.ldt_ents*LDT_ENTRY_SIZE))) )
449 {
450 /*
451 * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
452 * send the fault up to the guest OS to be handled.
453 */
454 LOCK_BIGLOCK(d);
455 off = addr - LDT_VIRT_START(ed);
456 addr = ed->mm.ldt_base + off;
457 ret = map_ldt_shadow_page(off >> PAGE_SHIFT);
458 UNLOCK_BIGLOCK(d);
459 if ( likely(ret) )
460 return EXCRET_fault_fixed; /* successfully copied the mapping */
461 }
463 if ( !GUEST_FAULT(regs) )
464 goto xen_fault;
466 propagate_page_fault(addr, regs->error_code);
467 return 0;
469 xen_fault:
471 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
472 {
473 perfc_incrc(copy_user_faults);
474 if ( !ed->mm.shadow_mode )
475 DPRINTK("Page fault: %08x -> %08lx\n", regs->eip, fixup);
476 regs->eip = fixup;
477 return 0;
478 }
480 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
482 if ( addr >= PAGE_OFFSET )
483 {
484 unsigned long page;
485 page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
486 printk("*pde = %08lx\n", page);
487 if ( page & _PAGE_PRESENT )
488 {
489 page &= PAGE_MASK;
490 page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
491 printk(" *pte = %08lx\n", page);
492 }
493 #ifdef MEMORY_GUARD
494 if ( !(regs->error_code & 1) )
495 printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
496 #endif
497 }
499 show_registers(regs);
500 panic("CPU%d FATAL PAGE FAULT\n"
501 "[error_code=%04x]\n"
502 "Faulting linear address might be %08lx\n",
503 smp_processor_id(), regs->error_code, addr);
504 return 0;
505 }
507 static int emulate_privileged_op(struct xen_regs *regs)
508 {
509 extern long do_fpu_taskswitch(void);
510 extern void *decode_reg(struct xen_regs *regs, u8 b);
512 struct exec_domain *ed = current;
513 unsigned long *reg, eip = regs->eip;
514 u8 opcode;
516 if ( get_user(opcode, (u8 *)eip) )
517 goto page_fault;
518 eip += 1;
519 if ( (opcode & 0xff) != 0x0f )
520 goto fail;
522 if ( get_user(opcode, (u8 *)eip) )
523 goto page_fault;
524 eip += 1;
526 switch ( opcode )
527 {
528 case 0x06: /* CLTS */
529 (void)do_fpu_taskswitch();
530 break;
532 case 0x09: /* WBINVD */
533 if ( !IS_CAPABLE_PHYSDEV(ed->domain) )
534 {
535 DPRINTK("Non-physdev domain attempted WBINVD.\n");
536 goto fail;
537 }
538 wbinvd();
539 break;
541 case 0x20: /* MOV CR?,<reg> */
542 if ( get_user(opcode, (u8 *)eip) )
543 goto page_fault;
544 eip += 1;
545 if ( (opcode & 0xc0) != 0xc0 )
546 goto fail;
547 reg = decode_reg(regs, opcode);
548 switch ( (opcode >> 3) & 7 )
549 {
550 case 0: /* Read CR0 */
551 *reg =
552 (read_cr0() & ~X86_CR0_TS) |
553 (test_bit(EDF_GUEST_STTS, &ed->ed_flags) ? X86_CR0_TS : 0);
554 break;
556 case 2: /* Read CR2 */
557 *reg = ed->mm.guest_cr2;
558 break;
560 case 3: /* Read CR3 */
561 *reg = pagetable_val(ed->mm.pagetable);
562 break;
564 default:
565 goto fail;
566 }
567 break;
569 case 0x22: /* MOV <reg>,CR? */
570 if ( get_user(opcode, (u8 *)eip) )
571 goto page_fault;
572 eip += 1;
573 if ( (opcode & 0xc0) != 0xc0 )
574 goto fail;
575 reg = decode_reg(regs, opcode);
576 switch ( (opcode >> 3) & 7 )
577 {
578 case 0: /* Write CR0 */
579 if ( *reg & X86_CR0_TS ) /* XXX ignore all but TS bit */
580 (void)do_fpu_taskswitch;
581 break;
583 case 2: /* Write CR2 */
584 ed->mm.guest_cr2 = *reg;
585 break;
587 case 3: /* Write CR3 */
588 LOCK_BIGLOCK(ed->domain);
589 (void)new_guest_cr3(*reg);
590 UNLOCK_BIGLOCK(ed->domain);
591 break;
593 default:
594 goto fail;
595 }
596 break;
598 case 0x30: /* WRMSR */
599 if ( !IS_PRIV(ed->domain) )
600 {
601 DPRINTK("Non-priv domain attempted WRMSR.\n");
602 goto fail;
603 }
604 wrmsr(regs->ecx, regs->eax, regs->edx);
605 break;
607 case 0x32: /* RDMSR */
608 if ( !IS_PRIV(ed->domain) )
609 {
610 DPRINTK("Non-priv domain attempted RDMSR.\n");
611 goto fail;
612 }
613 rdmsr(regs->ecx, regs->eax, regs->edx);
614 break;
616 default:
617 goto fail;
618 }
620 regs->eip = eip;
621 return EXCRET_fault_fixed;
623 fail:
624 return 0;
626 page_fault:
627 propagate_page_fault(eip, 0);
628 return EXCRET_fault_fixed;
629 }
631 asmlinkage int do_general_protection(struct xen_regs *regs)
632 {
633 struct exec_domain *ed = current;
634 struct domain *d = ed->domain;
635 struct trap_bounce *tb = &ed->thread.trap_bounce;
636 trap_info_t *ti;
637 unsigned long fixup;
639 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
641 if ( regs->error_code & 1 )
642 goto hardware_gp;
644 if ( !GUEST_FAULT(regs) )
645 goto gp_in_kernel;
647 /*
648 * Cunning trick to allow arbitrary "INT n" handling.
649 *
650 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
651 * instruction from trapping to the appropriate vector, when that might not
652 * be expected by Xen or the guest OS. For example, that entry might be for
653 * a fault handler (unlike traps, faults don't increment EIP), or might
654 * expect an error code on the stack (which a software trap never
655 * provides), or might be a hardware interrupt handler that doesn't like
656 * being called spuriously.
657 *
658 * Instead, a GPF occurs with the faulting IDT vector in the error code.
659 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
660 * clear to indicate that it's a software fault, not hardware.
661 *
662 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
663 * okay because they can only be triggered by an explicit DPL-checked
664 * instruction. The DPL specified by the guest OS for these vectors is NOT
665 * CHECKED!!
666 */
667 if ( (regs->error_code & 3) == 2 )
668 {
669 /* This fault must be due to <INT n> instruction. */
670 ti = current->thread.traps + (regs->error_code>>3);
671 if ( TI_GET_DPL(ti) >= (VM86_MODE(regs) ? 3 : (regs->cs & 3)) )
672 {
673 tb->flags = TBF_EXCEPTION;
674 regs->eip += 2;
675 goto finish_propagation;
676 }
677 }
679 /* Emulate some simple privileged instructions when exec'ed in ring 1. */
680 if ( (regs->error_code == 0) &&
681 RING_1(regs) &&
682 emulate_privileged_op(regs) )
683 return 0;
685 #if defined(__i386__)
686 if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments) &&
687 (regs->error_code == 0) &&
688 gpf_emulate_4gb(regs) )
689 return 0;
690 #endif
692 /* Pass on GPF as is. */
693 ti = current->thread.traps + 13;
694 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
695 tb->error_code = regs->error_code;
696 finish_propagation:
697 tb->cs = ti->cs;
698 tb->eip = ti->address;
699 if ( TI_GET_IF(ti) )
700 ed->vcpu_info->evtchn_upcall_mask = 1;
701 return 0;
703 gp_in_kernel:
705 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
706 {
707 DPRINTK("GPF (%04x): %08x -> %08lx\n",
708 regs->error_code, regs->eip, fixup);
709 regs->eip = fixup;
710 return 0;
711 }
713 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
715 hardware_gp:
716 show_registers(regs);
717 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
718 smp_processor_id(), regs->error_code);
719 return 0;
720 }
722 asmlinkage void mem_parity_error(struct xen_regs *regs)
723 {
724 console_force_unlock();
725 printk("\n\nNMI - MEMORY ERROR\n");
726 fatal_trap(TRAP_nmi, regs);
727 }
729 asmlinkage void io_check_error(struct xen_regs *regs)
730 {
731 console_force_unlock();
733 printk("\n\nNMI - I/O ERROR\n");
734 fatal_trap(TRAP_nmi, regs);
735 }
737 static void unknown_nmi_error(unsigned char reason, struct xen_regs * regs)
738 {
739 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
740 printk("Dazed and confused, but trying to continue\n");
741 printk("Do you have a strange power saving mode enabled?\n");
742 }
744 asmlinkage void do_nmi(struct xen_regs * regs, unsigned long reason)
745 {
746 ++nmi_count(smp_processor_id());
748 #if CONFIG_X86_LOCAL_APIC
749 if ( nmi_watchdog )
750 nmi_watchdog_tick(regs);
751 else
752 #endif
753 unknown_nmi_error((unsigned char)(reason&0xff), regs);
754 }
756 unsigned long nmi_softirq_reason;
757 static void nmi_softirq(void)
758 {
759 if ( dom0 == NULL )
760 return;
762 if ( test_and_clear_bit(0, &nmi_softirq_reason) )
763 send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR);
765 if ( test_and_clear_bit(1, &nmi_softirq_reason) )
766 send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR);
767 }
769 asmlinkage int math_state_restore(struct xen_regs *regs)
770 {
771 /* Prevent recursion. */
772 clts();
774 if ( !test_bit(EDF_USEDFPU, &current->ed_flags) )
775 {
776 if ( test_bit(EDF_DONEFPUINIT, &current->ed_flags) )
777 restore_fpu(current);
778 else
779 init_fpu();
780 set_bit(EDF_USEDFPU, &current->ed_flags); /* so we fnsave on switch_to() */
781 }
783 if ( test_and_clear_bit(EDF_GUEST_STTS, &current->ed_flags) )
784 {
785 struct trap_bounce *tb = &current->thread.trap_bounce;
786 tb->flags = TBF_EXCEPTION;
787 tb->cs = current->thread.traps[7].cs;
788 tb->eip = current->thread.traps[7].address;
789 }
791 return EXCRET_fault_fixed;
792 }
794 asmlinkage int do_debug(struct xen_regs *regs)
795 {
796 unsigned int condition;
797 struct exec_domain *d = current;
798 struct trap_bounce *tb = &d->thread.trap_bounce;
800 DEBUGGER_trap_entry(TRAP_debug, regs);
802 __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
804 /* Mask out spurious debug traps due to lazy DR7 setting */
805 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
806 (d->thread.debugreg[7] == 0) )
807 {
808 __asm__("movl %0,%%db7" : : "r" (0));
809 goto out;
810 }
812 if ( !GUEST_FAULT(regs) )
813 {
814 /* Clear TF just for absolute sanity. */
815 regs->eflags &= ~EF_TF;
816 /*
817 * We ignore watchpoints when they trigger within Xen. This may happen
818 * when a buffer is passed to us which previously had a watchpoint set
819 * on it. No need to bump EIP; the only faulting trap is an instruction
820 * breakpoint, which can't happen to us.
821 */
822 goto out;
823 }
825 /* Save debug status register where guest OS can peek at it */
826 d->thread.debugreg[6] = condition;
828 tb->flags = TBF_EXCEPTION;
829 tb->cs = d->thread.traps[1].cs;
830 tb->eip = d->thread.traps[1].address;
832 out:
833 return EXCRET_not_a_fault;
834 }
836 asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs)
837 {
838 return EXCRET_not_a_fault;
839 }
841 #define _set_gate(gate_addr,type,dpl,addr) \
842 do { \
843 int __d0, __d1; \
844 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
845 "movw %4,%%dx\n\t" \
846 "movl %%eax,%0\n\t" \
847 "movl %%edx,%1" \
848 :"=m" (*((long *) (gate_addr))), \
849 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
850 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
851 "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
852 } while (0)
854 void set_intr_gate(unsigned int n, void *addr)
855 {
856 _set_gate(idt_table+n,14,0,addr);
857 }
859 static void __init set_system_gate(unsigned int n, void *addr)
860 {
861 _set_gate(idt_table+n,14,3,addr);
862 }
864 static void set_task_gate(unsigned int n, unsigned int sel)
865 {
866 idt_table[n].a = sel << 16;
867 idt_table[n].b = 0x8500;
868 }
870 #define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
871 *((gate_addr)+1) = ((base) & 0xff000000) | \
872 (((base) & 0x00ff0000)>>16) | \
873 ((limit) & 0xf0000) | \
874 ((dpl)<<13) | \
875 (0x00408000) | \
876 ((type)<<8); \
877 *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
878 ((limit) & 0x0ffff); }
880 #define _set_tssldt_desc(n,addr,limit,type) \
881 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
882 "movw %%ax,2(%2)\n\t" \
883 "rorl $16,%%eax\n\t" \
884 "movb %%al,4(%2)\n\t" \
885 "movb %4,5(%2)\n\t" \
886 "movb $0,6(%2)\n\t" \
887 "movb %%ah,7(%2)\n\t" \
888 "rorl $16,%%eax" \
889 : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
891 void set_tss_desc(unsigned int n, void *addr)
892 {
893 _set_tssldt_desc(
894 gdt_table + __TSS(n),
895 (int)addr,
896 offsetof(struct tss_struct, __cacheline_filler) - 1,
897 0x89);
898 }
900 void __init trap_init(void)
901 {
902 /*
903 * Make a separate task for double faults. This will get us debug output if
904 * we blow the kernel stack.
905 */
906 struct tss_struct *tss = &doublefault_tss;
907 memset(tss, 0, sizeof(*tss));
908 tss->ds = __HYPERVISOR_DS;
909 tss->es = __HYPERVISOR_DS;
910 tss->ss = __HYPERVISOR_DS;
911 tss->esp = (unsigned long)
912 &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
913 tss->__cr3 = __pa(idle_pg_table);
914 tss->cs = __HYPERVISOR_CS;
915 tss->eip = (unsigned long)do_double_fault;
916 tss->eflags = 2;
917 tss->bitmap = IOBMP_INVALID_OFFSET;
918 _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
919 (int)tss, 235, 0x89);
921 /*
922 * Note that interrupt gates are always used, rather than trap gates. We
923 * must have interrupts disabled until DS/ES/FS/GS are saved because the
924 * first activation must have the "bad" value(s) for these registers and
925 * we may lose them if another activation is installed before they are
926 * saved. The page-fault handler also needs interrupts disabled until %cr2
927 * has been read and saved on the stack.
928 */
929 set_intr_gate(TRAP_divide_error,&divide_error);
930 set_intr_gate(TRAP_debug,&debug);
931 set_intr_gate(TRAP_nmi,&nmi);
932 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
933 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
934 set_intr_gate(TRAP_bounds,&bounds);
935 set_intr_gate(TRAP_invalid_op,&invalid_op);
936 set_intr_gate(TRAP_no_device,&device_not_available);
937 set_task_gate(TRAP_double_fault,__DOUBLEFAULT_TSS_ENTRY<<3);
938 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
939 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
940 set_intr_gate(TRAP_no_segment,&segment_not_present);
941 set_intr_gate(TRAP_stack_error,&stack_segment);
942 set_intr_gate(TRAP_gp_fault,&general_protection);
943 set_intr_gate(TRAP_page_fault,&page_fault);
944 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
945 set_intr_gate(TRAP_copro_error,&coprocessor_error);
946 set_intr_gate(TRAP_alignment_check,&alignment_check);
947 set_intr_gate(TRAP_machine_check,&machine_check);
948 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
949 set_intr_gate(TRAP_deferred_nmi,&nmi);
951 /* Only ring 1 can access Xen services. */
952 _set_gate(idt_table+HYPERCALL_VECTOR,14,1,&hypercall);
954 /* CPU0 uses the master IDT. */
955 idt_tables[0] = idt_table;
957 /*
958 * Should be a barrier for any external CPU state.
959 */
960 {
961 extern void cpu_init(void);
962 cpu_init();
963 }
965 open_softirq(NMI_SOFTIRQ, nmi_softirq);
966 }
969 long do_set_trap_table(trap_info_t *traps)
970 {
971 trap_info_t cur;
972 trap_info_t *dst = current->thread.traps;
974 LOCK_BIGLOCK(current->domain);
976 for ( ; ; )
977 {
978 if ( hypercall_preempt_check() )
979 {
980 UNLOCK_BIGLOCK(current->domain);
981 return hypercall_create_continuation(
982 __HYPERVISOR_set_trap_table, 1, traps);
983 }
985 if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
987 if ( cur.address == 0 ) break;
989 if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
991 memcpy(dst+cur.vector, &cur, sizeof(cur));
992 traps++;
993 }
995 UNLOCK_BIGLOCK(current->domain);
997 return 0;
998 }
1001 long do_set_callbacks(unsigned long event_selector,
1002 unsigned long event_address,
1003 unsigned long failsafe_selector,
1004 unsigned long failsafe_address)
1006 struct exec_domain *d = current;
1008 if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
1009 return -EPERM;
1011 d->thread.event_selector = event_selector;
1012 d->thread.event_address = event_address;
1013 d->thread.failsafe_selector = failsafe_selector;
1014 d->thread.failsafe_address = failsafe_address;
1016 return 0;
1020 long set_fast_trap(struct exec_domain *p, int idx)
1022 trap_info_t *ti;
1024 /* Index 0 is special: it disables fast traps. */
1025 if ( idx == 0 )
1027 if ( p == current )
1028 CLEAR_FAST_TRAP(&p->thread);
1029 SET_DEFAULT_FAST_TRAP(&p->thread);
1030 return 0;
1033 /*
1034 * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
1035 * The former range is used by Windows and MS-DOS.
1036 * Vector 0x80 is used by Linux and the BSD variants.
1037 */
1038 if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
1039 return -1;
1041 ti = p->thread.traps + idx;
1043 /*
1044 * We can't virtualise interrupt gates, as there's no way to get
1045 * the CPU to automatically clear the events_mask variable.
1046 */
1047 if ( TI_GET_IF(ti) )
1048 return -1;
1050 if ( p == current )
1051 CLEAR_FAST_TRAP(&p->thread);
1053 p->thread.fast_trap_idx = idx;
1054 p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
1055 p->thread.fast_trap_desc.b =
1056 (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
1058 if ( p == current )
1059 SET_FAST_TRAP(&p->thread);
1061 return 0;
1065 long do_set_fast_trap(int idx)
1067 return set_fast_trap(current, idx);
1071 long do_fpu_taskswitch(void)
1073 set_bit(EDF_GUEST_STTS, &current->ed_flags);
1074 stts();
1075 return 0;
1079 long set_debugreg(struct exec_domain *p, int reg, unsigned long value)
1081 int i;
1083 switch ( reg )
1085 case 0:
1086 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
1087 if ( p == current )
1088 __asm__ ( "movl %0, %%db0" : : "r" (value) );
1089 break;
1090 case 1:
1091 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
1092 if ( p == current )
1093 __asm__ ( "movl %0, %%db1" : : "r" (value) );
1094 break;
1095 case 2:
1096 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
1097 if ( p == current )
1098 __asm__ ( "movl %0, %%db2" : : "r" (value) );
1099 break;
1100 case 3:
1101 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
1102 if ( p == current )
1103 __asm__ ( "movl %0, %%db3" : : "r" (value) );
1104 break;
1105 case 6:
1106 /*
1107 * DR6: Bits 4-11,16-31 reserved (set to 1).
1108 * Bit 12 reserved (set to 0).
1109 */
1110 value &= 0xffffefff; /* reserved bits => 0 */
1111 value |= 0xffff0ff0; /* reserved bits => 1 */
1112 if ( p == current )
1113 __asm__ ( "movl %0, %%db6" : : "r" (value) );
1114 break;
1115 case 7:
1116 /*
1117 * DR7: Bit 10 reserved (set to 1).
1118 * Bits 11-12,14-15 reserved (set to 0).
1119 * Privileged bits:
1120 * GD (bit 13): must be 0.
1121 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
1122 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
1123 */
1124 /* DR7 == 0 => debugging disabled for this domain. */
1125 if ( value != 0 )
1127 value &= 0xffff27ff; /* reserved bits => 0 */
1128 value |= 0x00000400; /* reserved bits => 1 */
1129 if ( (value & (1<<13)) != 0 ) return -EPERM;
1130 for ( i = 0; i < 16; i += 2 )
1131 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
1133 if ( p == current )
1134 __asm__ ( "movl %0, %%db7" : : "r" (value) );
1135 break;
1136 default:
1137 return -EINVAL;
1140 p->thread.debugreg[reg] = value;
1141 return 0;
1144 long do_set_debugreg(int reg, unsigned long value)
1146 return set_debugreg(current, reg, value);
1149 unsigned long do_get_debugreg(int reg)
1151 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
1152 return current->thread.debugreg[reg];
1155 #else
1157 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
1161 #endif /* __i386__ */