debuggers.hg

view xen/arch/x86/traps.c @ 3674:fb875591fd72

bitkeeper revision 1.1159.223.63 (42028527-fv-d9BM0_LRp8UKGP19gQ)

Fix NMI deferral.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Thu Feb 03 20:10:15 2005 +0000 (2005-02-03)
parents a5f1a6abfc46
children 578b6c14e635 018ea84397ab 6e1e447cf696
line source
1 /******************************************************************************
2 * arch/i386/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <asm/shadow.h>
43 #include <asm/domain_page.h>
44 #include <asm/system.h>
45 #include <asm/io.h>
46 #include <asm/atomic.h>
47 #include <asm/desc.h>
48 #include <asm/debugreg.h>
49 #include <asm/smp.h>
50 #include <asm/flushtlb.h>
51 #include <asm/uaccess.h>
52 #include <asm/i387.h>
53 #include <asm/debugger.h>
55 /*
56 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
57 * fatal: Xen prints diagnostic message and then hangs.
58 * dom0: The NMI is virtualised to DOM0.
59 * ignore: The NMI error is cleared and ignored.
60 */
61 #ifdef NDEBUG
62 char opt_nmi[10] = "dom0";
63 #else
64 char opt_nmi[10] = "fatal";
65 #endif
66 string_param("nmi", opt_nmi);
68 #if defined(__i386__)
70 #define GUEST_FAULT(_r) (likely(VM86_MODE(_r) || !RING_0(_r)))
72 #define DOUBLEFAULT_STACK_SIZE 1024
73 static struct tss_struct doublefault_tss;
74 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
76 asmlinkage int hypercall(void);
78 /* Master table, and the one used by CPU0. */
79 struct desc_struct idt_table[256] = { {0, 0}, };
80 /* All other CPUs have their own copy. */
81 struct desc_struct *idt_tables[NR_CPUS] = { 0 };
83 asmlinkage void divide_error(void);
84 asmlinkage void debug(void);
85 asmlinkage void nmi(void);
86 asmlinkage void int3(void);
87 asmlinkage void overflow(void);
88 asmlinkage void bounds(void);
89 asmlinkage void invalid_op(void);
90 asmlinkage void device_not_available(void);
91 asmlinkage void coprocessor_segment_overrun(void);
92 asmlinkage void invalid_TSS(void);
93 asmlinkage void segment_not_present(void);
94 asmlinkage void stack_segment(void);
95 asmlinkage void general_protection(void);
96 asmlinkage void page_fault(void);
97 asmlinkage void coprocessor_error(void);
98 asmlinkage void simd_coprocessor_error(void);
99 asmlinkage void alignment_check(void);
100 asmlinkage void spurious_interrupt_bug(void);
101 asmlinkage void machine_check(void);
103 int kstack_depth_to_print = 8*20;
105 static inline int kernel_text_address(unsigned long addr)
106 {
107 if (addr >= (unsigned long) &_stext &&
108 addr <= (unsigned long) &_etext)
109 return 1;
110 return 0;
112 }
114 void show_guest_stack()
115 {
116 int i;
117 execution_context_t *ec = get_execution_context();
118 unsigned long *stack = (unsigned long *)ec->esp;
119 printk("Guest EIP is %lx\n",ec->eip);
121 for ( i = 0; i < kstack_depth_to_print; i++ )
122 {
123 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
124 break;
125 if ( i && ((i % 8) == 0) )
126 printk("\n ");
127 printk("%08lx ", *stack++);
128 }
129 printk("\n");
131 }
133 void show_trace(unsigned long *esp)
134 {
135 unsigned long *stack, addr;
136 int i;
138 printk("Call Trace from ESP=%p: ", esp);
139 stack = esp;
140 i = 0;
141 while (((long) stack & (STACK_SIZE-1)) != 0) {
142 addr = *stack++;
143 if (kernel_text_address(addr)) {
144 if (i && ((i % 6) == 0))
145 printk("\n ");
146 printk("[<%08lx>] ", addr);
147 i++;
148 }
149 }
150 printk("\n");
151 }
153 void show_stack(unsigned long *esp)
154 {
155 unsigned long *stack;
156 int i;
158 printk("Stack trace from ESP=%p:\n", esp);
160 stack = esp;
161 for ( i = 0; i < kstack_depth_to_print; i++ )
162 {
163 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
164 break;
165 if ( i && ((i % 8) == 0) )
166 printk("\n ");
167 if ( kernel_text_address(*stack) )
168 printk("[%08lx] ", *stack++);
169 else
170 printk("%08lx ", *stack++);
171 }
172 printk("\n");
174 show_trace( esp );
175 }
177 void show_registers(struct xen_regs *regs)
178 {
179 unsigned long esp;
180 unsigned short ss, ds, es, fs, gs;
182 if ( GUEST_FAULT(regs) )
183 {
184 esp = regs->esp;
185 ss = regs->ss & 0xffff;
186 ds = regs->ds & 0xffff;
187 es = regs->es & 0xffff;
188 fs = regs->fs & 0xffff;
189 gs = regs->gs & 0xffff;
190 }
191 else
192 {
193 esp = (unsigned long)(&regs->esp);
194 ss = __HYPERVISOR_DS;
195 ds = __HYPERVISOR_DS;
196 es = __HYPERVISOR_DS;
197 fs = __HYPERVISOR_DS;
198 gs = __HYPERVISOR_DS;
199 }
201 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
202 smp_processor_id(), 0xffff & regs->cs, regs->eip, regs->eflags);
203 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
204 regs->eax, regs->ebx, regs->ecx, regs->edx);
205 printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n",
206 regs->esi, regs->edi, regs->ebp, esp);
207 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
208 ds, es, fs, gs, ss);
210 show_stack((unsigned long *)&regs->esp);
211 }
213 /*
214 * This is called for faults at very unexpected times (e.g., when interrupts
215 * are disabled). In such situations we can't do much that is safe. We try to
216 * print out some tracing and then we just spin.
217 */
218 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
219 {
220 int cpu = smp_processor_id();
221 unsigned long cr2;
222 static char *trapstr[] = {
223 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
224 "invalid operation", "device not available", "double fault",
225 "coprocessor segment", "invalid tss", "segment not found",
226 "stack error", "general protection fault", "page fault",
227 "spurious interrupt", "coprocessor error", "alignment check",
228 "machine check", "simd error"
229 };
231 show_registers(regs);
233 if ( trapnr == TRAP_page_fault )
234 {
235 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
236 printk("Faulting linear address might be %08lx\n", cr2);
237 }
239 printk("************************************\n");
240 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
241 cpu, trapnr, trapstr[trapnr], regs->error_code,
242 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
243 printk("System shutting down -- need manual reset.\n");
244 printk("************************************\n");
246 /* Lock up the console to prevent spurious output from other CPUs. */
247 console_force_lock();
249 /* Wait for manual reset. */
250 for ( ; ; )
251 __asm__ __volatile__ ( "hlt" );
252 }
254 static inline int do_trap(int trapnr, char *str,
255 struct xen_regs *regs,
256 int use_error_code)
257 {
258 struct domain *d = current;
259 struct trap_bounce *tb = &d->thread.trap_bounce;
260 trap_info_t *ti;
261 unsigned long fixup;
263 DEBUGGER_trap_entry(trapnr, regs);
265 if ( !GUEST_FAULT(regs) )
266 goto xen_fault;
268 ti = current->thread.traps + trapnr;
269 tb->flags = TBF_EXCEPTION;
270 tb->cs = ti->cs;
271 tb->eip = ti->address;
272 if ( use_error_code )
273 {
274 tb->flags |= TBF_EXCEPTION_ERRCODE;
275 tb->error_code = regs->error_code;
276 }
277 if ( TI_GET_IF(ti) )
278 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
279 return 0;
281 xen_fault:
283 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
284 {
285 DPRINTK("Trap %d: %08x -> %08lx\n", trapnr, regs->eip, fixup);
286 regs->eip = fixup;
287 return 0;
288 }
290 DEBUGGER_trap_fatal(trapnr, regs);
292 show_registers(regs);
293 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
294 "[error_code=%04x]\n",
295 smp_processor_id(), trapnr, str, regs->error_code);
296 return 0;
297 }
299 #define DO_ERROR_NOCODE(trapnr, str, name) \
300 asmlinkage int do_##name(struct xen_regs *regs) \
301 { \
302 return do_trap(trapnr, str, regs, 0); \
303 }
305 #define DO_ERROR(trapnr, str, name) \
306 asmlinkage int do_##name(struct xen_regs *regs) \
307 { \
308 return do_trap(trapnr, str, regs, 1); \
309 }
311 DO_ERROR_NOCODE( 0, "divide error", divide_error)
312 DO_ERROR_NOCODE( 4, "overflow", overflow)
313 DO_ERROR_NOCODE( 5, "bounds", bounds)
314 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
315 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
316 DO_ERROR(10, "invalid TSS", invalid_TSS)
317 DO_ERROR(11, "segment not present", segment_not_present)
318 DO_ERROR(12, "stack segment", stack_segment)
319 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
320 DO_ERROR(17, "alignment check", alignment_check)
321 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
323 asmlinkage int do_int3(struct xen_regs *regs)
324 {
325 struct domain *d = current;
326 struct trap_bounce *tb = &d->thread.trap_bounce;
327 trap_info_t *ti;
329 DEBUGGER_trap_entry(TRAP_int3, regs);
331 if ( !GUEST_FAULT(regs) )
332 {
333 DEBUGGER_trap_fatal(TRAP_int3, regs);
334 show_registers(regs);
335 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
336 }
338 ti = current->thread.traps + 3;
339 tb->flags = TBF_EXCEPTION;
340 tb->cs = ti->cs;
341 tb->eip = ti->address;
342 if ( TI_GET_IF(ti) )
343 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
345 return 0;
346 }
348 asmlinkage void do_double_fault(void)
349 {
350 struct tss_struct *tss = &doublefault_tss;
351 unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
353 /* Disable the NMI watchdog. It's useless now. */
354 watchdog_on = 0;
356 /* Find information saved during fault and dump it to the console. */
357 tss = &init_tss[cpu];
358 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
359 cpu, tss->cs, tss->eip, tss->eflags);
360 printk("CR3: %08x\n", tss->__cr3);
361 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
362 tss->eax, tss->ebx, tss->ecx, tss->edx);
363 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
364 tss->esi, tss->edi, tss->ebp, tss->esp);
365 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
366 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
367 printk("************************************\n");
368 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
369 printk("System needs manual reset.\n");
370 printk("************************************\n");
372 /* Lock up the console to prevent spurious output from other CPUs. */
373 console_force_lock();
375 /* Wait for manual reset. */
376 for ( ; ; )
377 __asm__ __volatile__ ( "hlt" );
378 }
380 asmlinkage void do_machine_check(struct xen_regs *regs)
381 {
382 fatal_trap(TRAP_machine_check, regs);
383 }
385 void propagate_page_fault(unsigned long addr, u16 error_code)
386 {
387 trap_info_t *ti;
388 struct domain *d = current;
389 struct trap_bounce *tb = &d->thread.trap_bounce;
391 ti = d->thread.traps + 14;
392 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
393 tb->cr2 = addr;
394 tb->error_code = error_code;
395 tb->cs = ti->cs;
396 tb->eip = ti->address;
397 if ( TI_GET_IF(ti) )
398 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
399 }
401 asmlinkage int do_page_fault(struct xen_regs *regs)
402 {
403 unsigned long off, addr, fixup;
404 struct domain *d = current;
405 extern int map_ldt_shadow_page(unsigned int);
406 int cpu = d->processor;
408 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
410 DEBUGGER_trap_entry(TRAP_page_fault, regs);
412 perfc_incrc(page_faults);
414 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
415 {
416 if ( unlikely(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) &&
417 unlikely((addr >> L2_PAGETABLE_SHIFT) ==
418 ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
419 {
420 ptwr_flush(PTWR_PT_ACTIVE);
421 return EXCRET_fault_fixed;
422 }
424 if ( (addr < PAGE_OFFSET) &&
425 ((regs->error_code & 3) == 3) && /* write-protection fault */
426 ptwr_do_page_fault(addr) )
427 {
428 if ( unlikely(d->mm.shadow_mode) )
429 (void)shadow_fault(addr, regs->error_code);
430 return EXCRET_fault_fixed;
431 }
432 }
434 if ( unlikely(d->mm.shadow_mode) &&
435 (addr < PAGE_OFFSET) && shadow_fault(addr, regs->error_code) )
436 return EXCRET_fault_fixed;
438 if ( unlikely(addr >= LDT_VIRT_START) &&
439 (addr < (LDT_VIRT_START + (d->mm.ldt_ents*LDT_ENTRY_SIZE))) )
440 {
441 /*
442 * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
443 * send the fault up to the guest OS to be handled.
444 */
445 off = addr - LDT_VIRT_START;
446 addr = d->mm.ldt_base + off;
447 if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT)) )
448 return EXCRET_fault_fixed; /* successfully copied the mapping */
449 }
451 if ( !GUEST_FAULT(regs) )
452 goto xen_fault;
454 propagate_page_fault(addr, regs->error_code);
455 return 0;
457 xen_fault:
459 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
460 {
461 perfc_incrc(copy_user_faults);
462 if ( !d->mm.shadow_mode )
463 DPRINTK("Page fault: %08x -> %08lx\n", regs->eip, fixup);
464 regs->eip = fixup;
465 return 0;
466 }
468 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
470 if ( addr >= PAGE_OFFSET )
471 {
472 unsigned long page;
473 page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
474 printk("*pde = %08lx\n", page);
475 if ( page & _PAGE_PRESENT )
476 {
477 page &= PAGE_MASK;
478 page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
479 printk(" *pte = %08lx\n", page);
480 }
481 #ifdef MEMORY_GUARD
482 if ( !(regs->error_code & 1) )
483 printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
484 #endif
485 }
487 show_registers(regs);
488 panic("CPU%d FATAL PAGE FAULT\n"
489 "[error_code=%04x]\n"
490 "Faulting linear address might be %08lx\n",
491 smp_processor_id(), regs->error_code, addr);
492 return 0;
493 }
495 asmlinkage int do_general_protection(struct xen_regs *regs)
496 {
497 struct domain *d = current;
498 struct trap_bounce *tb = &d->thread.trap_bounce;
499 trap_info_t *ti;
500 unsigned long fixup;
502 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
504 if ( regs->error_code & 1 )
505 goto hardware_gp;
507 if ( !GUEST_FAULT(regs) )
508 goto gp_in_kernel;
510 /*
511 * Cunning trick to allow arbitrary "INT n" handling.
512 *
513 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
514 * instruction from trapping to the appropriate vector, when that might not
515 * be expected by Xen or the guest OS. For example, that entry might be for
516 * a fault handler (unlike traps, faults don't increment EIP), or might
517 * expect an error code on the stack (which a software trap never
518 * provides), or might be a hardware interrupt handler that doesn't like
519 * being called spuriously.
520 *
521 * Instead, a GPF occurs with the faulting IDT vector in the error code.
522 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
523 * clear to indicate that it's a software fault, not hardware.
524 *
525 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
526 * okay because they can only be triggered by an explicit DPL-checked
527 * instruction. The DPL specified by the guest OS for these vectors is NOT
528 * CHECKED!!
529 */
530 if ( (regs->error_code & 3) == 2 )
531 {
532 /* This fault must be due to <INT n> instruction. */
533 ti = current->thread.traps + (regs->error_code>>3);
534 if ( TI_GET_DPL(ti) >= (VM86_MODE(regs) ? 3 : (regs->cs & 3)) )
535 {
536 tb->flags = TBF_EXCEPTION;
537 regs->eip += 2;
538 goto finish_propagation;
539 }
540 }
542 #if defined(__i386__)
543 if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments) &&
544 (regs->error_code == 0) &&
545 gpf_emulate_4gb(regs) )
546 return 0;
547 #endif
549 /* Pass on GPF as is. */
550 ti = current->thread.traps + 13;
551 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
552 tb->error_code = regs->error_code;
553 finish_propagation:
554 tb->cs = ti->cs;
555 tb->eip = ti->address;
556 if ( TI_GET_IF(ti) )
557 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
558 return 0;
560 gp_in_kernel:
562 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
563 {
564 DPRINTK("GPF (%04x): %08x -> %08lx\n",
565 regs->error_code, regs->eip, fixup);
566 regs->eip = fixup;
567 return 0;
568 }
570 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
572 hardware_gp:
573 show_registers(regs);
574 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
575 smp_processor_id(), regs->error_code);
576 return 0;
577 }
579 asmlinkage void mem_parity_error(struct xen_regs *regs)
580 {
581 console_force_unlock();
582 printk("\n\nNMI - MEMORY ERROR\n");
583 fatal_trap(TRAP_nmi, regs);
584 }
586 asmlinkage void io_check_error(struct xen_regs *regs)
587 {
588 console_force_unlock();
590 printk("\n\nNMI - I/O ERROR\n");
591 fatal_trap(TRAP_nmi, regs);
592 }
594 static void unknown_nmi_error(unsigned char reason, struct xen_regs * regs)
595 {
596 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
597 printk("Dazed and confused, but trying to continue\n");
598 printk("Do you have a strange power saving mode enabled?\n");
599 }
601 asmlinkage void do_nmi(struct xen_regs * regs, unsigned long reason)
602 {
603 ++nmi_count(smp_processor_id());
605 #if CONFIG_X86_LOCAL_APIC
606 if ( nmi_watchdog )
607 nmi_watchdog_tick(regs);
608 else
609 #endif
610 unknown_nmi_error((unsigned char)(reason&0xff), regs);
611 }
613 unsigned long nmi_softirq_reason;
614 static void nmi_softirq(void)
615 {
616 if ( dom0 == NULL )
617 return;
619 if ( test_and_clear_bit(0, &nmi_softirq_reason) )
620 send_guest_virq(dom0, VIRQ_PARITY_ERR);
622 if ( test_and_clear_bit(1, &nmi_softirq_reason) )
623 send_guest_virq(dom0, VIRQ_IO_ERR);
624 }
626 asmlinkage int math_state_restore(struct xen_regs *regs)
627 {
628 /* Prevent recursion. */
629 clts();
631 if ( !test_bit(DF_USEDFPU, &current->flags) )
632 {
633 if ( test_bit(DF_DONEFPUINIT, &current->flags) )
634 restore_fpu(current);
635 else
636 init_fpu();
637 set_bit(DF_USEDFPU, &current->flags); /* so we fnsave on switch_to() */
638 }
640 if ( test_and_clear_bit(DF_GUEST_STTS, &current->flags) )
641 {
642 struct trap_bounce *tb = &current->thread.trap_bounce;
643 tb->flags = TBF_EXCEPTION;
644 tb->cs = current->thread.traps[7].cs;
645 tb->eip = current->thread.traps[7].address;
646 }
648 return EXCRET_fault_fixed;
649 }
651 asmlinkage int do_debug(struct xen_regs *regs)
652 {
653 unsigned int condition;
654 struct domain *d = current;
655 struct trap_bounce *tb = &d->thread.trap_bounce;
657 DEBUGGER_trap_entry(TRAP_debug, regs);
659 __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
661 /* Mask out spurious debug traps due to lazy DR7 setting */
662 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
663 (d->thread.debugreg[7] == 0) )
664 {
665 __asm__("movl %0,%%db7" : : "r" (0));
666 goto out;
667 }
669 if ( !GUEST_FAULT(regs) )
670 {
671 /* Clear TF just for absolute sanity. */
672 regs->eflags &= ~EF_TF;
673 /*
674 * We ignore watchpoints when they trigger within Xen. This may happen
675 * when a buffer is passed to us which previously had a watchpoint set
676 * on it. No need to bump EIP; the only faulting trap is an instruction
677 * breakpoint, which can't happen to us.
678 */
679 goto out;
680 }
682 /* Save debug status register where guest OS can peek at it */
683 d->thread.debugreg[6] = condition;
685 tb->flags = TBF_EXCEPTION;
686 tb->cs = d->thread.traps[1].cs;
687 tb->eip = d->thread.traps[1].address;
689 out:
690 return EXCRET_not_a_fault;
691 }
693 asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs)
694 {
695 return EXCRET_not_a_fault;
696 }
698 #define _set_gate(gate_addr,type,dpl,addr) \
699 do { \
700 int __d0, __d1; \
701 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
702 "movw %4,%%dx\n\t" \
703 "movl %%eax,%0\n\t" \
704 "movl %%edx,%1" \
705 :"=m" (*((long *) (gate_addr))), \
706 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
707 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
708 "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
709 } while (0)
711 void set_intr_gate(unsigned int n, void *addr)
712 {
713 _set_gate(idt_table+n,14,0,addr);
714 }
716 static void __init set_system_gate(unsigned int n, void *addr)
717 {
718 _set_gate(idt_table+n,14,3,addr);
719 }
721 static void set_task_gate(unsigned int n, unsigned int sel)
722 {
723 idt_table[n].a = sel << 16;
724 idt_table[n].b = 0x8500;
725 }
727 #define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
728 *((gate_addr)+1) = ((base) & 0xff000000) | \
729 (((base) & 0x00ff0000)>>16) | \
730 ((limit) & 0xf0000) | \
731 ((dpl)<<13) | \
732 (0x00408000) | \
733 ((type)<<8); \
734 *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
735 ((limit) & 0x0ffff); }
737 #define _set_tssldt_desc(n,addr,limit,type) \
738 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
739 "movw %%ax,2(%2)\n\t" \
740 "rorl $16,%%eax\n\t" \
741 "movb %%al,4(%2)\n\t" \
742 "movb %4,5(%2)\n\t" \
743 "movb $0,6(%2)\n\t" \
744 "movb %%ah,7(%2)\n\t" \
745 "rorl $16,%%eax" \
746 : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
748 void set_tss_desc(unsigned int n, void *addr)
749 {
750 _set_tssldt_desc(
751 gdt_table + __TSS(n),
752 (int)addr,
753 offsetof(struct tss_struct, __cacheline_filler) - 1,
754 0x89);
755 }
757 void __init trap_init(void)
758 {
759 /*
760 * Make a separate task for double faults. This will get us debug output if
761 * we blow the kernel stack.
762 */
763 struct tss_struct *tss = &doublefault_tss;
764 memset(tss, 0, sizeof(*tss));
765 tss->ds = __HYPERVISOR_DS;
766 tss->es = __HYPERVISOR_DS;
767 tss->ss = __HYPERVISOR_DS;
768 tss->esp = (unsigned long)
769 &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
770 tss->__cr3 = __pa(idle_pg_table);
771 tss->cs = __HYPERVISOR_CS;
772 tss->eip = (unsigned long)do_double_fault;
773 tss->eflags = 2;
774 tss->bitmap = IOBMP_INVALID_OFFSET;
775 _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
776 (int)tss, 235, 0x89);
778 /*
779 * Note that interrupt gates are always used, rather than trap gates. We
780 * must have interrupts disabled until DS/ES/FS/GS are saved because the
781 * first activation must have the "bad" value(s) for these registers and
782 * we may lose them if another activation is installed before they are
783 * saved. The page-fault handler also needs interrupts disabled until %cr2
784 * has been read and saved on the stack.
785 */
786 set_intr_gate(TRAP_divide_error,&divide_error);
787 set_intr_gate(TRAP_debug,&debug);
788 set_intr_gate(TRAP_nmi,&nmi);
789 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
790 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
791 set_intr_gate(TRAP_bounds,&bounds);
792 set_intr_gate(TRAP_invalid_op,&invalid_op);
793 set_intr_gate(TRAP_no_device,&device_not_available);
794 set_task_gate(TRAP_double_fault,__DOUBLEFAULT_TSS_ENTRY<<3);
795 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
796 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
797 set_intr_gate(TRAP_no_segment,&segment_not_present);
798 set_intr_gate(TRAP_stack_error,&stack_segment);
799 set_intr_gate(TRAP_gp_fault,&general_protection);
800 set_intr_gate(TRAP_page_fault,&page_fault);
801 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
802 set_intr_gate(TRAP_copro_error,&coprocessor_error);
803 set_intr_gate(TRAP_alignment_check,&alignment_check);
804 set_intr_gate(TRAP_machine_check,&machine_check);
805 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
806 set_intr_gate(TRAP_deferred_nmi,&nmi);
808 /* Only ring 1 can access Xen services. */
809 _set_gate(idt_table+HYPERCALL_VECTOR,14,1,&hypercall);
811 /* CPU0 uses the master IDT. */
812 idt_tables[0] = idt_table;
814 /*
815 * Should be a barrier for any external CPU state.
816 */
817 {
818 extern void cpu_init(void);
819 cpu_init();
820 }
822 open_softirq(NMI_SOFTIRQ, nmi_softirq);
823 }
826 long do_set_trap_table(trap_info_t *traps)
827 {
828 trap_info_t cur;
829 trap_info_t *dst = current->thread.traps;
831 for ( ; ; )
832 {
833 if ( hypercall_preempt_check() )
834 return hypercall_create_continuation(
835 __HYPERVISOR_set_trap_table, 1, traps);
837 if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
839 if ( cur.address == 0 ) break;
841 if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
843 memcpy(dst+cur.vector, &cur, sizeof(cur));
844 traps++;
845 }
847 return 0;
848 }
851 long do_set_callbacks(unsigned long event_selector,
852 unsigned long event_address,
853 unsigned long failsafe_selector,
854 unsigned long failsafe_address)
855 {
856 struct domain *d = current;
858 if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
859 return -EPERM;
861 d->thread.event_selector = event_selector;
862 d->thread.event_address = event_address;
863 d->thread.failsafe_selector = failsafe_selector;
864 d->thread.failsafe_address = failsafe_address;
866 return 0;
867 }
870 long set_fast_trap(struct domain *p, int idx)
871 {
872 trap_info_t *ti;
874 /* Index 0 is special: it disables fast traps. */
875 if ( idx == 0 )
876 {
877 if ( p == current )
878 CLEAR_FAST_TRAP(&p->thread);
879 SET_DEFAULT_FAST_TRAP(&p->thread);
880 return 0;
881 }
883 /*
884 * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
885 * The former range is used by Windows and MS-DOS.
886 * Vector 0x80 is used by Linux and the BSD variants.
887 */
888 if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
889 return -1;
891 ti = p->thread.traps + idx;
893 /*
894 * We can't virtualise interrupt gates, as there's no way to get
895 * the CPU to automatically clear the events_mask variable.
896 */
897 if ( TI_GET_IF(ti) )
898 return -1;
900 if ( p == current )
901 CLEAR_FAST_TRAP(&p->thread);
903 p->thread.fast_trap_idx = idx;
904 p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
905 p->thread.fast_trap_desc.b =
906 (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
908 if ( p == current )
909 SET_FAST_TRAP(&p->thread);
911 return 0;
912 }
915 long do_set_fast_trap(int idx)
916 {
917 return set_fast_trap(current, idx);
918 }
921 long do_fpu_taskswitch(void)
922 {
923 set_bit(DF_GUEST_STTS, &current->flags);
924 stts();
925 return 0;
926 }
929 long set_debugreg(struct domain *p, int reg, unsigned long value)
930 {
931 int i;
933 switch ( reg )
934 {
935 case 0:
936 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
937 if ( p == current )
938 __asm__ ( "movl %0, %%db0" : : "r" (value) );
939 break;
940 case 1:
941 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
942 if ( p == current )
943 __asm__ ( "movl %0, %%db1" : : "r" (value) );
944 break;
945 case 2:
946 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
947 if ( p == current )
948 __asm__ ( "movl %0, %%db2" : : "r" (value) );
949 break;
950 case 3:
951 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
952 if ( p == current )
953 __asm__ ( "movl %0, %%db3" : : "r" (value) );
954 break;
955 case 6:
956 /*
957 * DR6: Bits 4-11,16-31 reserved (set to 1).
958 * Bit 12 reserved (set to 0).
959 */
960 value &= 0xffffefff; /* reserved bits => 0 */
961 value |= 0xffff0ff0; /* reserved bits => 1 */
962 if ( p == current )
963 __asm__ ( "movl %0, %%db6" : : "r" (value) );
964 break;
965 case 7:
966 /*
967 * DR7: Bit 10 reserved (set to 1).
968 * Bits 11-12,14-15 reserved (set to 0).
969 * Privileged bits:
970 * GD (bit 13): must be 0.
971 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
972 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
973 */
974 /* DR7 == 0 => debugging disabled for this domain. */
975 if ( value != 0 )
976 {
977 value &= 0xffff27ff; /* reserved bits => 0 */
978 value |= 0x00000400; /* reserved bits => 1 */
979 if ( (value & (1<<13)) != 0 ) return -EPERM;
980 for ( i = 0; i < 16; i += 2 )
981 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
982 }
983 if ( p == current )
984 __asm__ ( "movl %0, %%db7" : : "r" (value) );
985 break;
986 default:
987 return -EINVAL;
988 }
990 p->thread.debugreg[reg] = value;
991 return 0;
992 }
994 long do_set_debugreg(int reg, unsigned long value)
995 {
996 return set_debugreg(current, reg, value);
997 }
999 unsigned long do_get_debugreg(int reg)
1001 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
1002 return current->thread.debugreg[reg];
1005 #else
1007 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
1011 #endif /* __i386__ */