debuggers.hg

view xen/arch/x86/traps.c @ 3755:ea98f0bb6510

bitkeeper revision 1.1159.212.127 (4208b02bTdSR4AVYRg8diDkKZmIVUg)

General shadow code cleanup.

Fixed compilation problems when SHADOW_DEBUG is enabled.
Fixed compilation problems when CONFIG_VMX is undefined.

Simplified l1pte_write_fault and l1pte_read_fault.
Name change: spfn => smfn (shadow machine frame numbers).

In general, the terms pfn and gpfn now refer to pages in the
guest's idea of physical frames (which diffs for full shadow
guests). mfn always refers to a machine frame number.

One bug fix for check_pagetable():
If we're using writable page tables
along with shadow mode, don't check the currently writable page table
page -- check its snapshot instead.

Signed-off-by: michael.fetterman@cl.cam.ac.uk
author mafetter@fleming.research
date Tue Feb 08 12:27:23 2005 +0000 (2005-02-08)
parents 23e7cf28ddb3
children 9f7935ea4606
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /******************************************************************************
3 * arch/x86/traps.c
4 *
5 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 /*
23 * Copyright (C) 1991, 1992 Linus Torvalds
24 *
25 * Pentium III FXSR, SSE support
26 * Gareth Hughes <gareth@valinux.com>, May 2000
27 */
29 #include <xen/config.h>
30 #include <xen/init.h>
31 #include <xen/sched.h>
32 #include <xen/lib.h>
33 #include <xen/errno.h>
34 #include <xen/mm.h>
35 #include <xen/console.h>
36 #include <asm/regs.h>
37 #include <xen/delay.h>
38 #include <xen/event.h>
39 #include <xen/spinlock.h>
40 #include <xen/irq.h>
41 #include <xen/perfc.h>
42 #include <xen/softirq.h>
43 #include <asm/shadow.h>
44 #include <asm/domain_page.h>
45 #include <asm/system.h>
46 #include <asm/io.h>
47 #include <asm/atomic.h>
48 #include <asm/desc.h>
49 #include <asm/debugreg.h>
50 #include <asm/smp.h>
51 #include <asm/flushtlb.h>
52 #include <asm/uaccess.h>
53 #include <asm/i387.h>
54 #include <asm/debugger.h>
55 #include <asm/msr.h>
57 /*
58 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
59 * fatal: Xen prints diagnostic message and then hangs.
60 * dom0: The NMI is virtualised to DOM0.
61 * ignore: The NMI error is cleared and ignored.
62 */
63 #ifdef NDEBUG
64 char opt_nmi[10] = "dom0";
65 #else
66 char opt_nmi[10] = "fatal";
67 #endif
68 string_param("nmi", opt_nmi);
70 asmlinkage int hypercall(void);
72 /* Master table, and the one used by CPU0. */
73 idt_entry_t idt_table[IDT_ENTRIES] = { {0, 0}, };
74 /* All other CPUs have their own copy. */
75 idt_entry_t *idt_tables[NR_CPUS] = { 0 };
77 asmlinkage void divide_error(void);
78 asmlinkage void debug(void);
79 asmlinkage void nmi(void);
80 asmlinkage void int3(void);
81 asmlinkage void overflow(void);
82 asmlinkage void bounds(void);
83 asmlinkage void invalid_op(void);
84 asmlinkage void device_not_available(void);
85 asmlinkage void coprocessor_segment_overrun(void);
86 asmlinkage void invalid_TSS(void);
87 asmlinkage void segment_not_present(void);
88 asmlinkage void stack_segment(void);
89 asmlinkage void general_protection(void);
90 asmlinkage void page_fault(void);
91 asmlinkage void coprocessor_error(void);
92 asmlinkage void simd_coprocessor_error(void);
93 asmlinkage void alignment_check(void);
94 asmlinkage void spurious_interrupt_bug(void);
95 asmlinkage void machine_check(void);
97 /*
98 * This is called for faults at very unexpected times (e.g., when interrupts
99 * are disabled). In such situations we can't do much that is safe. We try to
100 * print out some tracing and then we just spin.
101 */
102 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
103 {
104 int cpu = smp_processor_id();
105 unsigned long cr2;
106 static char *trapstr[] = {
107 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
108 "invalid operation", "device not available", "double fault",
109 "coprocessor segment", "invalid tss", "segment not found",
110 "stack error", "general protection fault", "page fault",
111 "spurious interrupt", "coprocessor error", "alignment check",
112 "machine check", "simd error"
113 };
115 show_registers(regs);
117 if ( trapnr == TRAP_page_fault )
118 {
119 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (cr2) : );
120 printk("Faulting linear address might be %0lx %lx\n", cr2, cr2);
121 }
123 printk("************************************\n");
124 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
125 cpu, trapnr, trapstr[trapnr], regs->error_code,
126 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
127 printk("System shutting down -- need manual reset.\n");
128 printk("************************************\n");
130 /* Lock up the console to prevent spurious output from other CPUs. */
131 console_force_lock();
133 /* Wait for manual reset. */
134 for ( ; ; )
135 __asm__ __volatile__ ( "hlt" );
136 }
138 static inline int do_trap(int trapnr, char *str,
139 struct xen_regs *regs,
140 int use_error_code)
141 {
142 struct exec_domain *ed = current;
143 struct trap_bounce *tb = &ed->arch.trap_bounce;
144 trap_info_t *ti;
145 unsigned long fixup;
147 DEBUGGER_trap_entry(trapnr, regs);
149 if ( !GUEST_FAULT(regs) )
150 goto xen_fault;
152 #ifndef NDEBUG
153 if ( (ed->arch.traps[trapnr].address == 0) && (ed->domain->id == 0) )
154 goto xen_fault;
155 #endif
157 ti = current->arch.traps + trapnr;
158 tb->flags = TBF_EXCEPTION;
159 tb->cs = ti->cs;
160 tb->eip = ti->address;
161 if ( use_error_code )
162 {
163 tb->flags |= TBF_EXCEPTION_ERRCODE;
164 tb->error_code = regs->error_code;
165 }
166 if ( TI_GET_IF(ti) )
167 ed->vcpu_info->evtchn_upcall_mask = 1;
168 return 0;
170 xen_fault:
172 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
173 {
174 DPRINTK("Trap %d: %p -> %p\n", trapnr, regs->eip, fixup);
175 regs->eip = fixup;
176 return 0;
177 }
179 DEBUGGER_trap_fatal(trapnr, regs);
181 show_registers(regs);
182 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
183 "[error_code=%04x]\n",
184 smp_processor_id(), trapnr, str, regs->error_code);
185 return 0;
186 }
188 #define DO_ERROR_NOCODE(trapnr, str, name) \
189 asmlinkage int do_##name(struct xen_regs *regs) \
190 { \
191 return do_trap(trapnr, str, regs, 0); \
192 }
194 #define DO_ERROR(trapnr, str, name) \
195 asmlinkage int do_##name(struct xen_regs *regs) \
196 { \
197 return do_trap(trapnr, str, regs, 1); \
198 }
200 DO_ERROR_NOCODE( 0, "divide error", divide_error)
201 DO_ERROR_NOCODE( 4, "overflow", overflow)
202 DO_ERROR_NOCODE( 5, "bounds", bounds)
203 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
204 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
205 DO_ERROR(10, "invalid TSS", invalid_TSS)
206 DO_ERROR(11, "segment not present", segment_not_present)
207 DO_ERROR(12, "stack segment", stack_segment)
208 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
209 DO_ERROR(17, "alignment check", alignment_check)
210 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
212 asmlinkage int do_int3(struct xen_regs *regs)
213 {
214 struct exec_domain *ed = current;
215 struct trap_bounce *tb = &ed->arch.trap_bounce;
216 trap_info_t *ti;
218 DEBUGGER_trap_entry(TRAP_int3, regs);
220 if ( !GUEST_FAULT(regs) )
221 {
222 DEBUGGER_trap_fatal(TRAP_int3, regs);
223 show_registers(regs);
224 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
225 }
227 ti = current->arch.traps + 3;
228 tb->flags = TBF_EXCEPTION;
229 tb->cs = ti->cs;
230 tb->eip = ti->address;
231 if ( TI_GET_IF(ti) )
232 ed->vcpu_info->evtchn_upcall_mask = 1;
234 return 0;
235 }
237 asmlinkage void do_machine_check(struct xen_regs *regs)
238 {
239 fatal_trap(TRAP_machine_check, regs);
240 }
242 void propagate_page_fault(unsigned long addr, u16 error_code)
243 {
244 trap_info_t *ti;
245 struct exec_domain *ed = current;
246 struct trap_bounce *tb = &ed->arch.trap_bounce;
248 ti = ed->arch.traps + 14;
249 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
250 tb->cr2 = addr;
251 tb->error_code = error_code;
252 tb->cs = ti->cs;
253 tb->eip = ti->address;
254 if ( TI_GET_IF(ti) )
255 ed->vcpu_info->evtchn_upcall_mask = 1;
257 ed->arch.guest_cr2 = addr;
258 }
260 asmlinkage int do_page_fault(struct xen_regs *regs)
261 {
262 unsigned long off, addr, fixup;
263 struct exec_domain *ed = current;
264 struct domain *d = ed->domain;
265 extern int map_ldt_shadow_page(unsigned int);
266 int cpu = ed->processor;
267 int ret;
269 __asm__ __volatile__ ("mov %%cr2,%0" : "=r" (addr) : );
271 DEBUGGER_trap_entry(TRAP_page_fault, regs);
273 perfc_incrc(page_faults);
275 #if 0
276 printk("do_page_fault(addr=0x%08lx, error_code=%d)\n",
277 addr, regs->error_code);
278 show_registers(regs);
279 #endif
281 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
282 {
283 LOCK_BIGLOCK(d);
284 if ( unlikely(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) &&
285 unlikely((addr >> L2_PAGETABLE_SHIFT) ==
286 ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
287 {
288 ptwr_flush(PTWR_PT_ACTIVE);
289 UNLOCK_BIGLOCK(d);
290 return EXCRET_fault_fixed;
291 }
293 if ( (addr < PAGE_OFFSET) &&
294 ((regs->error_code & 3) == 3) && /* write-protection fault */
295 ptwr_do_page_fault(addr) )
296 {
297 if ( unlikely(d->arch.shadow_mode) )
298 (void)shadow_fault(addr, regs->error_code);
299 UNLOCK_BIGLOCK(d);
300 return EXCRET_fault_fixed;
301 }
302 UNLOCK_BIGLOCK(d);
303 }
305 if ( unlikely(d->arch.shadow_mode) &&
306 (addr < PAGE_OFFSET) && shadow_fault(addr, regs->error_code) )
307 return EXCRET_fault_fixed;
309 if ( unlikely(addr >= LDT_VIRT_START(ed)) &&
310 (addr < (LDT_VIRT_START(ed) + (ed->arch.ldt_ents*LDT_ENTRY_SIZE))) )
311 {
312 /*
313 * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
314 * send the fault up to the guest OS to be handled.
315 */
316 LOCK_BIGLOCK(d);
317 off = addr - LDT_VIRT_START(ed);
318 addr = ed->arch.ldt_base + off;
319 ret = map_ldt_shadow_page(off >> PAGE_SHIFT);
320 UNLOCK_BIGLOCK(d);
321 if ( likely(ret) )
322 return EXCRET_fault_fixed; /* successfully copied the mapping */
323 }
325 if ( !GUEST_FAULT(regs) )
326 goto xen_fault;
328 #ifndef NDEBUG
329 if ( (ed->arch.traps[TRAP_page_fault].address == 0) && (d->id == 0) )
330 goto xen_fault;
331 #endif
333 propagate_page_fault(addr, regs->error_code);
334 return 0;
336 xen_fault:
338 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
339 {
340 perfc_incrc(copy_user_faults);
341 if ( !d->arch.shadow_mode )
342 DPRINTK("Page fault: %p -> %p\n", regs->eip, fixup);
343 regs->eip = fixup;
344 return 0;
345 }
347 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
349 show_registers(regs);
350 show_page_walk(addr);
351 panic("CPU%d FATAL PAGE FAULT\n"
352 "[error_code=%04x]\n"
353 "Faulting linear address might be %p\n",
354 smp_processor_id(), regs->error_code, addr);
355 return 0;
356 }
358 static int emulate_privileged_op(struct xen_regs *regs)
359 {
360 extern long do_fpu_taskswitch(void);
361 extern void *decode_reg(struct xen_regs *regs, u8 b);
363 struct exec_domain *ed = current;
364 unsigned long *reg, eip = regs->eip;
365 u8 opcode;
367 if ( get_user(opcode, (u8 *)eip) )
368 goto page_fault;
369 eip += 1;
370 if ( (opcode & 0xff) != 0x0f )
371 goto fail;
373 if ( get_user(opcode, (u8 *)eip) )
374 goto page_fault;
375 eip += 1;
377 switch ( opcode )
378 {
379 case 0x06: /* CLTS */
380 (void)do_fpu_taskswitch();
381 break;
383 case 0x09: /* WBINVD */
384 if ( !IS_CAPABLE_PHYSDEV(ed->domain) )
385 {
386 DPRINTK("Non-physdev domain attempted WBINVD.\n");
387 goto fail;
388 }
389 wbinvd();
390 break;
392 case 0x20: /* MOV CR?,<reg> */
393 if ( get_user(opcode, (u8 *)eip) )
394 goto page_fault;
395 eip += 1;
396 if ( (opcode & 0xc0) != 0xc0 )
397 goto fail;
398 reg = decode_reg(regs, opcode & 7);
399 switch ( (opcode >> 3) & 7 )
400 {
401 case 0: /* Read CR0 */
402 *reg =
403 (read_cr0() & ~X86_CR0_TS) |
404 (test_bit(EDF_GUEST_STTS, &ed->ed_flags) ? X86_CR0_TS : 0);
405 break;
407 case 2: /* Read CR2 */
408 *reg = ed->arch.guest_cr2;
409 break;
411 case 3: /* Read CR3 */
412 *reg = pagetable_val(ed->arch.pagetable);
413 break;
415 default:
416 goto fail;
417 }
418 break;
420 case 0x22: /* MOV <reg>,CR? */
421 if ( get_user(opcode, (u8 *)eip) )
422 goto page_fault;
423 eip += 1;
424 if ( (opcode & 0xc0) != 0xc0 )
425 goto fail;
426 reg = decode_reg(regs, opcode & 7);
427 switch ( (opcode >> 3) & 7 )
428 {
429 case 0: /* Write CR0 */
430 if ( *reg & X86_CR0_TS ) /* XXX ignore all but TS bit */
431 (void)do_fpu_taskswitch;
432 break;
434 case 2: /* Write CR2 */
435 ed->arch.guest_cr2 = *reg;
436 break;
438 case 3: /* Write CR3 */
439 LOCK_BIGLOCK(ed->domain);
440 (void)new_guest_cr3(*reg);
441 UNLOCK_BIGLOCK(ed->domain);
442 break;
444 default:
445 goto fail;
446 }
447 break;
449 case 0x30: /* WRMSR */
450 if ( !IS_PRIV(ed->domain) )
451 {
452 DPRINTK("Non-priv domain attempted WRMSR.\n");
453 goto fail;
454 }
455 wrmsr(regs->ecx, regs->eax, regs->edx);
456 break;
458 case 0x32: /* RDMSR */
459 if ( !IS_PRIV(ed->domain) )
460 {
461 DPRINTK("Non-priv domain attempted RDMSR.\n");
462 goto fail;
463 }
464 rdmsr(regs->ecx, regs->eax, regs->edx);
465 break;
467 default:
468 goto fail;
469 }
471 regs->eip = eip;
472 return EXCRET_fault_fixed;
474 fail:
475 return 0;
477 page_fault:
478 propagate_page_fault(eip, 0);
479 return EXCRET_fault_fixed;
480 }
482 asmlinkage int do_general_protection(struct xen_regs *regs)
483 {
484 struct exec_domain *ed = current;
485 struct trap_bounce *tb = &ed->arch.trap_bounce;
486 trap_info_t *ti;
487 unsigned long fixup;
489 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
491 if ( regs->error_code & 1 )
492 goto hardware_gp;
494 if ( !GUEST_FAULT(regs) )
495 goto gp_in_kernel;
497 /*
498 * Cunning trick to allow arbitrary "INT n" handling.
499 *
500 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
501 * instruction from trapping to the appropriate vector, when that might not
502 * be expected by Xen or the guest OS. For example, that entry might be for
503 * a fault handler (unlike traps, faults don't increment EIP), or might
504 * expect an error code on the stack (which a software trap never
505 * provides), or might be a hardware interrupt handler that doesn't like
506 * being called spuriously.
507 *
508 * Instead, a GPF occurs with the faulting IDT vector in the error code.
509 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
510 * clear to indicate that it's a software fault, not hardware.
511 *
512 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
513 * okay because they can only be triggered by an explicit DPL-checked
514 * instruction. The DPL specified by the guest OS for these vectors is NOT
515 * CHECKED!!
516 */
517 if ( (regs->error_code & 3) == 2 )
518 {
519 /* This fault must be due to <INT n> instruction. */
520 ti = current->arch.traps + (regs->error_code>>3);
521 if ( TI_GET_DPL(ti) >= (VM86_MODE(regs) ? 3 : (regs->cs & 3)) )
522 {
523 tb->flags = TBF_EXCEPTION;
524 regs->eip += 2;
525 goto finish_propagation;
526 }
527 }
529 /* Emulate some simple privileged instructions when exec'ed in ring 1. */
530 if ( (regs->error_code == 0) &&
531 RING_1(regs) &&
532 emulate_privileged_op(regs) )
533 return 0;
535 #if defined(__i386__)
536 if ( VM_ASSIST(ed->domain, VMASST_TYPE_4gb_segments) &&
537 (regs->error_code == 0) &&
538 gpf_emulate_4gb(regs) )
539 return 0;
540 #endif
542 #ifndef NDEBUG
543 if ( (ed->arch.traps[TRAP_gp_fault].address == 0) &&
544 (ed->domain->id == 0) )
545 goto gp_in_kernel;
546 #endif
548 /* Pass on GPF as is. */
549 ti = current->arch.traps + 13;
550 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
551 tb->error_code = regs->error_code;
552 finish_propagation:
553 tb->cs = ti->cs;
554 tb->eip = ti->address;
555 if ( TI_GET_IF(ti) )
556 ed->vcpu_info->evtchn_upcall_mask = 1;
557 return 0;
559 gp_in_kernel:
561 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
562 {
563 DPRINTK("GPF (%04x): %p -> %p\n",
564 regs->error_code, regs->eip, fixup);
565 regs->eip = fixup;
566 return 0;
567 }
569 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
571 hardware_gp:
572 show_registers(regs);
573 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
574 smp_processor_id(), regs->error_code);
575 return 0;
576 }
578 unsigned long nmi_softirq_reason;
579 static void nmi_softirq(void)
580 {
581 if ( dom0 == NULL )
582 return;
584 if ( test_and_clear_bit(0, &nmi_softirq_reason) )
585 send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR);
587 if ( test_and_clear_bit(1, &nmi_softirq_reason) )
588 send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR);
589 }
591 asmlinkage void mem_parity_error(struct xen_regs *regs)
592 {
593 /* Clear and disable the parity-error line. */
594 outb((inb(0x61)&15)|4,0x61);
596 switch ( opt_nmi[0] )
597 {
598 case 'd': /* 'dom0' */
599 set_bit(0, &nmi_softirq_reason);
600 raise_softirq(NMI_SOFTIRQ);
601 case 'i': /* 'ignore' */
602 break;
603 default: /* 'fatal' */
604 console_force_unlock();
605 printk("\n\nNMI - MEMORY ERROR\n");
606 fatal_trap(TRAP_nmi, regs);
607 }
608 }
610 asmlinkage void io_check_error(struct xen_regs *regs)
611 {
612 /* Clear and disable the I/O-error line. */
613 outb((inb(0x61)&15)|8,0x61);
615 switch ( opt_nmi[0] )
616 {
617 case 'd': /* 'dom0' */
618 set_bit(0, &nmi_softirq_reason);
619 raise_softirq(NMI_SOFTIRQ);
620 case 'i': /* 'ignore' */
621 break;
622 default: /* 'fatal' */
623 console_force_unlock();
624 printk("\n\nNMI - I/O ERROR\n");
625 fatal_trap(TRAP_nmi, regs);
626 }
627 }
629 static void unknown_nmi_error(unsigned char reason)
630 {
631 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
632 printk("Dazed and confused, but trying to continue\n");
633 printk("Do you have a strange power saving mode enabled?\n");
634 }
636 asmlinkage void do_nmi(struct xen_regs *regs, unsigned long reason)
637 {
638 ++nmi_count(smp_processor_id());
640 if ( nmi_watchdog )
641 nmi_watchdog_tick(regs);
643 if ( reason & 0x80 )
644 mem_parity_error(regs);
645 else if ( reason & 0x40 )
646 io_check_error(regs);
647 else if ( !nmi_watchdog )
648 unknown_nmi_error((unsigned char)(reason&0xff));
649 }
651 asmlinkage int math_state_restore(struct xen_regs *regs)
652 {
653 /* Prevent recursion. */
654 clts();
656 if ( !test_bit(EDF_USEDFPU, &current->ed_flags) )
657 {
658 if ( test_bit(EDF_DONEFPUINIT, &current->ed_flags) )
659 restore_fpu(current);
660 else
661 init_fpu();
662 set_bit(EDF_USEDFPU, &current->ed_flags); /* so we fnsave on switch_to() */
663 }
665 if ( test_and_clear_bit(EDF_GUEST_STTS, &current->ed_flags) )
666 {
667 struct trap_bounce *tb = &current->arch.trap_bounce;
668 tb->flags = TBF_EXCEPTION;
669 tb->cs = current->arch.traps[7].cs;
670 tb->eip = current->arch.traps[7].address;
671 }
673 return EXCRET_fault_fixed;
674 }
676 asmlinkage int do_debug(struct xen_regs *regs)
677 {
678 unsigned long condition;
679 struct exec_domain *d = current;
680 struct trap_bounce *tb = &d->arch.trap_bounce;
682 DEBUGGER_trap_entry(TRAP_debug, regs);
684 __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
686 /* Mask out spurious debug traps due to lazy DR7 setting */
687 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
688 (d->arch.debugreg[7] == 0) )
689 {
690 __asm__("mov %0,%%db7" : : "r" (0UL));
691 goto out;
692 }
694 if ( !GUEST_FAULT(regs) )
695 {
696 /* Clear TF just for absolute sanity. */
697 regs->eflags &= ~EF_TF;
698 /*
699 * We ignore watchpoints when they trigger within Xen. This may happen
700 * when a buffer is passed to us which previously had a watchpoint set
701 * on it. No need to bump EIP; the only faulting trap is an instruction
702 * breakpoint, which can't happen to us.
703 */
704 goto out;
705 }
707 /* Save debug status register where guest OS can peek at it */
708 d->arch.debugreg[6] = condition;
710 tb->flags = TBF_EXCEPTION;
711 tb->cs = d->arch.traps[1].cs;
712 tb->eip = d->arch.traps[1].address;
714 out:
715 return EXCRET_not_a_fault;
716 }
718 asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs)
719 {
720 return EXCRET_not_a_fault;
721 }
723 BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi)
724 asmlinkage void smp_deferred_nmi(struct xen_regs regs)
725 {
726 ack_APIC_irq();
727 do_nmi(&regs, 0);
728 }
730 void set_intr_gate(unsigned int n, void *addr)
731 {
732 _set_gate(idt_table+n,14,0,addr);
733 }
735 void set_system_gate(unsigned int n, void *addr)
736 {
737 _set_gate(idt_table+n,14,3,addr);
738 }
740 void set_task_gate(unsigned int n, unsigned int sel)
741 {
742 idt_table[n].a = sel << 16;
743 idt_table[n].b = 0x8500;
744 }
746 void set_tss_desc(unsigned int n, void *addr)
747 {
748 _set_tssldt_desc(
749 gdt_table + __TSS(n),
750 (unsigned long)addr,
751 offsetof(struct tss_struct, __cacheline_filler) - 1,
752 9);
753 }
755 void __init trap_init(void)
756 {
757 extern void percpu_traps_init(void);
758 extern void cpu_init(void);
760 /*
761 * Note that interrupt gates are always used, rather than trap gates. We
762 * must have interrupts disabled until DS/ES/FS/GS are saved because the
763 * first activation must have the "bad" value(s) for these registers and
764 * we may lose them if another activation is installed before they are
765 * saved. The page-fault handler also needs interrupts disabled until %cr2
766 * has been read and saved on the stack.
767 */
768 set_intr_gate(TRAP_divide_error,&divide_error);
769 set_intr_gate(TRAP_debug,&debug);
770 set_intr_gate(TRAP_nmi,&nmi);
771 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
772 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
773 set_intr_gate(TRAP_bounds,&bounds);
774 set_intr_gate(TRAP_invalid_op,&invalid_op);
775 set_intr_gate(TRAP_no_device,&device_not_available);
776 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
777 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
778 set_intr_gate(TRAP_no_segment,&segment_not_present);
779 set_intr_gate(TRAP_stack_error,&stack_segment);
780 set_intr_gate(TRAP_gp_fault,&general_protection);
781 set_intr_gate(TRAP_page_fault,&page_fault);
782 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
783 set_intr_gate(TRAP_copro_error,&coprocessor_error);
784 set_intr_gate(TRAP_alignment_check,&alignment_check);
785 set_intr_gate(TRAP_machine_check,&machine_check);
786 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
787 set_intr_gate(TRAP_deferred_nmi,&deferred_nmi);
789 #if defined(__i386__)
790 _set_gate(idt_table+HYPERCALL_VECTOR, 14, 1, &hypercall);
791 #endif
793 /* CPU0 uses the master IDT. */
794 idt_tables[0] = idt_table;
796 percpu_traps_init();
798 cpu_init();
800 open_softirq(NMI_SOFTIRQ, nmi_softirq);
801 }
804 long do_set_trap_table(trap_info_t *traps)
805 {
806 trap_info_t cur;
807 trap_info_t *dst = current->arch.traps;
809 LOCK_BIGLOCK(current->domain);
811 for ( ; ; )
812 {
813 if ( hypercall_preempt_check() )
814 {
815 UNLOCK_BIGLOCK(current->domain);
816 return hypercall1_create_continuation(
817 __HYPERVISOR_set_trap_table, traps);
818 }
820 if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
822 if ( cur.address == 0 ) break;
824 if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
826 memcpy(dst+cur.vector, &cur, sizeof(cur));
827 traps++;
828 }
830 UNLOCK_BIGLOCK(current->domain);
832 return 0;
833 }
836 long do_set_callbacks(unsigned long event_selector,
837 unsigned long event_address,
838 unsigned long failsafe_selector,
839 unsigned long failsafe_address)
840 {
841 struct exec_domain *d = current;
843 if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
844 return -EPERM;
846 d->arch.event_selector = event_selector;
847 d->arch.event_address = event_address;
848 d->arch.failsafe_selector = failsafe_selector;
849 d->arch.failsafe_address = failsafe_address;
851 return 0;
852 }
855 long do_fpu_taskswitch(void)
856 {
857 set_bit(EDF_GUEST_STTS, &current->ed_flags);
858 stts();
859 return 0;
860 }
863 #if defined(__i386__)
864 #define DB_VALID_ADDR(_a) \
865 ((_a) <= (PAGE_OFFSET - 4))
866 #elif defined(__x86_64__)
867 #define DB_VALID_ADDR(_a) \
868 ((_a) >= HYPERVISOR_VIRT_END) || ((_a) <= (HYPERVISOR_VIRT_START-8))
869 #endif
870 long set_debugreg(struct exec_domain *p, int reg, unsigned long value)
871 {
872 int i;
874 switch ( reg )
875 {
876 case 0:
877 if ( !DB_VALID_ADDR(value) ) return -EPERM;
878 if ( p == current )
879 __asm__ ( "mov %0, %%db0" : : "r" (value) );
880 break;
881 case 1:
882 if ( !DB_VALID_ADDR(value) ) return -EPERM;
883 if ( p == current )
884 __asm__ ( "mov %0, %%db1" : : "r" (value) );
885 break;
886 case 2:
887 if ( !DB_VALID_ADDR(value) ) return -EPERM;
888 if ( p == current )
889 __asm__ ( "mov %0, %%db2" : : "r" (value) );
890 break;
891 case 3:
892 if ( !DB_VALID_ADDR(value) ) return -EPERM;
893 if ( p == current )
894 __asm__ ( "mov %0, %%db3" : : "r" (value) );
895 break;
896 case 6:
897 /*
898 * DR6: Bits 4-11,16-31 reserved (set to 1).
899 * Bit 12 reserved (set to 0).
900 */
901 value &= 0xffffefff; /* reserved bits => 0 */
902 value |= 0xffff0ff0; /* reserved bits => 1 */
903 if ( p == current )
904 __asm__ ( "mov %0, %%db6" : : "r" (value) );
905 break;
906 case 7:
907 /*
908 * DR7: Bit 10 reserved (set to 1).
909 * Bits 11-12,14-15 reserved (set to 0).
910 * Privileged bits:
911 * GD (bit 13): must be 0.
912 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
913 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
914 */
915 /* DR7 == 0 => debugging disabled for this domain. */
916 if ( value != 0 )
917 {
918 value &= 0xffff27ff; /* reserved bits => 0 */
919 value |= 0x00000400; /* reserved bits => 1 */
920 if ( (value & (1<<13)) != 0 ) return -EPERM;
921 for ( i = 0; i < 16; i += 2 )
922 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
923 }
924 if ( p == current )
925 __asm__ ( "mov %0, %%db7" : : "r" (value) );
926 break;
927 default:
928 return -EINVAL;
929 }
931 p->arch.debugreg[reg] = value;
932 return 0;
933 }
935 long do_set_debugreg(int reg, unsigned long value)
936 {
937 return set_debugreg(current, reg, value);
938 }
940 unsigned long do_get_debugreg(int reg)
941 {
942 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
943 return current->arch.debugreg[reg];
944 }