debuggers.hg

view xen/arch/x86/traps.c @ 3129:e0351a3744a5

bitkeeper revision 1.1159.187.4 (41a471c8NjyQJy-vepqpb8H7LdzHzA)

Allow preemption of long-running hypercalls for softirq processing.
author kaf24@scramble.cl.cam.ac.uk
date Wed Nov 24 11:34:32 2004 +0000 (2004-11-24)
parents 50eaaedc0d3c
children 2754a2ed61c3 2fae9947de6f b013a6b30d9e
line source
1 /******************************************************************************
2 * arch/i386/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <asm/shadow.h>
43 #include <asm/domain_page.h>
44 #include <asm/system.h>
45 #include <asm/io.h>
46 #include <asm/atomic.h>
47 #include <asm/desc.h>
48 #include <asm/debugreg.h>
49 #include <asm/smp.h>
50 #include <asm/flushtlb.h>
51 #include <asm/uaccess.h>
52 #include <asm/i387.h>
53 #include <asm/debugger.h>
55 #if defined(__i386__)
57 #define DOUBLEFAULT_STACK_SIZE 1024
58 static struct tss_struct doublefault_tss;
59 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
61 asmlinkage int hypercall(void);
63 /* Master table, and the one used by CPU0. */
64 struct desc_struct idt_table[256] = { {0, 0}, };
65 /* All other CPUs have their own copy. */
66 struct desc_struct *idt_tables[NR_CPUS] = { 0 };
68 asmlinkage void divide_error(void);
69 asmlinkage void debug(void);
70 asmlinkage void nmi(void);
71 asmlinkage void int3(void);
72 asmlinkage void overflow(void);
73 asmlinkage void bounds(void);
74 asmlinkage void invalid_op(void);
75 asmlinkage void device_not_available(void);
76 asmlinkage void coprocessor_segment_overrun(void);
77 asmlinkage void invalid_TSS(void);
78 asmlinkage void segment_not_present(void);
79 asmlinkage void stack_segment(void);
80 asmlinkage void general_protection(void);
81 asmlinkage void page_fault(void);
82 asmlinkage void coprocessor_error(void);
83 asmlinkage void simd_coprocessor_error(void);
84 asmlinkage void alignment_check(void);
85 asmlinkage void spurious_interrupt_bug(void);
86 asmlinkage void machine_check(void);
88 int kstack_depth_to_print = 8*20;
90 static inline int kernel_text_address(unsigned long addr)
91 {
92 if (addr >= (unsigned long) &_stext &&
93 addr <= (unsigned long) &_etext)
94 return 1;
95 return 0;
97 }
99 void show_guest_stack()
100 {
101 int i;
102 execution_context_t *ec = get_execution_context();
103 unsigned long *stack = (unsigned long *)ec->esp;
104 printk("Guest EIP is %lx\n",ec->eip);
106 for ( i = 0; i < kstack_depth_to_print; i++ )
107 {
108 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
109 break;
110 if ( i && ((i % 8) == 0) )
111 printk("\n ");
112 printk("%08lx ", *stack++);
113 }
114 printk("\n");
116 }
118 void show_trace(unsigned long *esp)
119 {
120 unsigned long *stack, addr;
121 int i;
123 printk("Call Trace from ESP=%p: ", esp);
124 stack = esp;
125 i = 0;
126 while (((long) stack & (STACK_SIZE-1)) != 0) {
127 addr = *stack++;
128 if (kernel_text_address(addr)) {
129 if (i && ((i % 6) == 0))
130 printk("\n ");
131 printk("[<%08lx>] ", addr);
132 i++;
133 }
134 }
135 printk("\n");
136 }
138 void show_stack(unsigned long *esp)
139 {
140 unsigned long *stack;
141 int i;
143 printk("Stack trace from ESP=%p:\n", esp);
145 stack = esp;
146 for ( i = 0; i < kstack_depth_to_print; i++ )
147 {
148 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
149 break;
150 if ( i && ((i % 8) == 0) )
151 printk("\n ");
152 if ( kernel_text_address(*stack) )
153 printk("[%08lx] ", *stack++);
154 else
155 printk("%08lx ", *stack++);
156 }
157 printk("\n");
159 show_trace( esp );
160 }
162 void show_registers(struct xen_regs *regs)
163 {
164 unsigned long esp;
165 unsigned short ss, ds, es, fs, gs;
167 if ( regs->cs & 3 )
168 {
169 esp = regs->esp;
170 ss = regs->ss & 0xffff;
171 ds = regs->ds & 0xffff;
172 es = regs->es & 0xffff;
173 fs = regs->fs & 0xffff;
174 gs = regs->gs & 0xffff;
175 }
176 else
177 {
178 esp = (unsigned long)(&regs->esp);
179 ss = __HYPERVISOR_DS;
180 ds = __HYPERVISOR_DS;
181 es = __HYPERVISOR_DS;
182 fs = __HYPERVISOR_DS;
183 gs = __HYPERVISOR_DS;
184 }
186 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
187 smp_processor_id(), 0xffff & regs->cs, regs->eip, regs->eflags);
188 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
189 regs->eax, regs->ebx, regs->ecx, regs->edx);
190 printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n",
191 regs->esi, regs->edi, regs->ebp, esp);
192 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
193 ds, es, fs, gs, ss);
195 show_stack((unsigned long *)&regs->esp);
196 }
198 /*
199 * This is called for faults at very unexpected times (e.g., when interrupts
200 * are disabled). In such situations we can't do much that is safe. We try to
201 * print out some tracing and then we just spin.
202 */
203 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
204 {
205 int cpu = smp_processor_id();
206 unsigned long cr2;
207 static char *trapstr[] = {
208 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
209 "invalid operation", "device not available", "double fault",
210 "coprocessor segment", "invalid tss", "segment not found",
211 "stack error", "general protection fault", "page fault",
212 "spurious interrupt", "coprocessor error", "alignment check",
213 "machine check", "simd error"
214 };
216 show_registers(regs);
218 if ( trapnr == TRAP_page_fault )
219 {
220 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
221 printk("Faulting linear address might be %08lx\n", cr2);
222 }
224 printk("************************************\n");
225 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
226 cpu, trapnr, trapstr[trapnr], regs->error_code,
227 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
228 printk("System shutting down -- need manual reset.\n");
229 printk("************************************\n");
231 /* Lock up the console to prevent spurious output from other CPUs. */
232 console_force_lock();
234 /* Wait for manual reset. */
235 for ( ; ; )
236 __asm__ __volatile__ ( "hlt" );
237 }
239 static inline int do_trap(int trapnr, char *str,
240 struct xen_regs *regs,
241 int use_error_code)
242 {
243 struct domain *d = current;
244 struct trap_bounce *tb = &d->thread.trap_bounce;
245 trap_info_t *ti;
246 unsigned long fixup;
248 DEBUGGER_trap_entry(trapnr, regs);
250 if ( !(regs->cs & 3) )
251 goto xen_fault;
253 ti = current->thread.traps + trapnr;
254 tb->flags = TBF_EXCEPTION;
255 tb->cs = ti->cs;
256 tb->eip = ti->address;
257 if ( use_error_code )
258 {
259 tb->flags |= TBF_EXCEPTION_ERRCODE;
260 tb->error_code = regs->error_code;
261 }
262 if ( TI_GET_IF(ti) )
263 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
264 return 0;
266 xen_fault:
268 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
269 {
270 DPRINTK("Trap %d: %08x -> %08lx\n", trapnr, regs->eip, fixup);
271 regs->eip = fixup;
272 return 0;
273 }
275 DEBUGGER_trap_fatal(trapnr, regs);
277 show_registers(regs);
278 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
279 "[error_code=%04x]\n",
280 smp_processor_id(), trapnr, str, regs->error_code);
281 return 0;
282 }
284 #define DO_ERROR_NOCODE(trapnr, str, name) \
285 asmlinkage int do_##name(struct xen_regs *regs) \
286 { \
287 return do_trap(trapnr, str, regs, 0); \
288 }
290 #define DO_ERROR(trapnr, str, name) \
291 asmlinkage int do_##name(struct xen_regs *regs) \
292 { \
293 return do_trap(trapnr, str, regs, 1); \
294 }
296 DO_ERROR_NOCODE( 0, "divide error", divide_error)
297 DO_ERROR_NOCODE( 4, "overflow", overflow)
298 DO_ERROR_NOCODE( 5, "bounds", bounds)
299 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
300 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
301 DO_ERROR(10, "invalid TSS", invalid_TSS)
302 DO_ERROR(11, "segment not present", segment_not_present)
303 DO_ERROR(12, "stack segment", stack_segment)
304 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
305 DO_ERROR(17, "alignment check", alignment_check)
306 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
308 asmlinkage int do_int3(struct xen_regs *regs)
309 {
310 struct domain *d = current;
311 struct trap_bounce *tb = &d->thread.trap_bounce;
312 trap_info_t *ti;
314 DEBUGGER_trap_entry(TRAP_int3, regs);
316 if ( unlikely((regs->cs & 3) == 0) )
317 {
318 DEBUGGER_trap_fatal(TRAP_int3, regs);
319 show_registers(regs);
320 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
321 }
323 ti = current->thread.traps + 3;
324 tb->flags = TBF_EXCEPTION;
325 tb->cs = ti->cs;
326 tb->eip = ti->address;
327 if ( TI_GET_IF(ti) )
328 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
330 return 0;
331 }
333 asmlinkage void do_double_fault(void)
334 {
335 struct tss_struct *tss = &doublefault_tss;
336 unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
338 /* Disable the NMI watchdog. It's useless now. */
339 watchdog_on = 0;
341 /* Find information saved during fault and dump it to the console. */
342 tss = &init_tss[cpu];
343 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
344 cpu, tss->cs, tss->eip, tss->eflags);
345 printk("CR3: %08x\n", tss->__cr3);
346 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
347 tss->eax, tss->ebx, tss->ecx, tss->edx);
348 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
349 tss->esi, tss->edi, tss->ebp, tss->esp);
350 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
351 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
352 printk("************************************\n");
353 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
354 printk("System needs manual reset.\n");
355 printk("************************************\n");
357 /* Lock up the console to prevent spurious output from other CPUs. */
358 console_force_lock();
360 /* Wait for manual reset. */
361 for ( ; ; )
362 __asm__ __volatile__ ( "hlt" );
363 }
365 asmlinkage void do_machine_check(struct xen_regs *regs)
366 {
367 fatal_trap(TRAP_machine_check, regs);
368 }
370 asmlinkage int do_page_fault(struct xen_regs *regs)
371 {
372 trap_info_t *ti;
373 unsigned long off, addr, fixup;
374 struct domain *d = current;
375 extern int map_ldt_shadow_page(unsigned int);
376 struct trap_bounce *tb = &d->thread.trap_bounce;
377 int cpu = d->processor;
379 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
381 DEBUGGER_trap_entry(TRAP_page_fault, regs);
383 perfc_incrc(page_faults);
385 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
386 {
387 if ( unlikely(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) &&
388 unlikely((addr >> L2_PAGETABLE_SHIFT) ==
389 ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
390 {
391 ptwr_flush(PTWR_PT_ACTIVE);
392 return EXCRET_fault_fixed;
393 }
395 if ( (addr < PAGE_OFFSET) &&
396 ((regs->error_code & 3) == 3) && /* write-protection fault */
397 ptwr_do_page_fault(addr) )
398 {
399 if ( unlikely(d->mm.shadow_mode) )
400 (void)shadow_fault(addr, regs->error_code);
401 return EXCRET_fault_fixed;
402 }
403 }
405 if ( unlikely(d->mm.shadow_mode) &&
406 (addr < PAGE_OFFSET) && shadow_fault(addr, regs->error_code) )
407 return EXCRET_fault_fixed;
409 if ( unlikely(addr >= LDT_VIRT_START) &&
410 (addr < (LDT_VIRT_START + (d->mm.ldt_ents*LDT_ENTRY_SIZE))) )
411 {
412 /*
413 * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
414 * send the fault up to the guest OS to be handled.
415 */
416 off = addr - LDT_VIRT_START;
417 addr = d->mm.ldt_base + off;
418 if ( likely(map_ldt_shadow_page(off >> PAGE_SHIFT)) )
419 return EXCRET_fault_fixed; /* successfully copied the mapping */
420 }
422 if ( unlikely(!(regs->cs & 3)) )
423 goto xen_fault;
425 ti = d->thread.traps + 14;
426 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
427 tb->cr2 = addr;
428 tb->error_code = regs->error_code;
429 tb->cs = ti->cs;
430 tb->eip = ti->address;
431 if ( TI_GET_IF(ti) )
432 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
433 return 0;
435 xen_fault:
437 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
438 {
439 perfc_incrc(copy_user_faults);
440 if ( !d->mm.shadow_mode )
441 DPRINTK("Page fault: %08x -> %08lx\n", regs->eip, fixup);
442 regs->eip = fixup;
443 return 0;
444 }
446 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
448 if ( addr >= PAGE_OFFSET )
449 {
450 unsigned long page;
451 page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
452 printk("*pde = %08lx\n", page);
453 if ( page & _PAGE_PRESENT )
454 {
455 page &= PAGE_MASK;
456 page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
457 printk(" *pte = %08lx\n", page);
458 }
459 #ifdef MEMORY_GUARD
460 if ( !(regs->error_code & 1) )
461 printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
462 #endif
463 }
465 show_registers(regs);
466 panic("CPU%d FATAL PAGE FAULT\n"
467 "[error_code=%04x]\n"
468 "Faulting linear address might be %08lx\n",
469 smp_processor_id(), regs->error_code, addr);
470 return 0;
471 }
473 asmlinkage int do_general_protection(struct xen_regs *regs)
474 {
475 struct domain *d = current;
476 struct trap_bounce *tb = &d->thread.trap_bounce;
477 trap_info_t *ti;
478 unsigned long fixup;
480 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
482 /* Badness if error in ring 0, or result of an interrupt. */
483 if ( !(regs->cs & 3) || (regs->error_code & 1) )
484 goto gp_in_kernel;
486 /*
487 * Cunning trick to allow arbitrary "INT n" handling.
488 *
489 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
490 * instruction from trapping to the appropriate vector, when that might not
491 * be expected by Xen or the guest OS. For example, that entry might be for
492 * a fault handler (unlike traps, faults don't increment EIP), or might
493 * expect an error code on the stack (which a software trap never
494 * provides), or might be a hardware interrupt handler that doesn't like
495 * being called spuriously.
496 *
497 * Instead, a GPF occurs with the faulting IDT vector in the error code.
498 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
499 * clear to indicate that it's a software fault, not hardware.
500 *
501 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
502 * okay because they can only be triggered by an explicit DPL-checked
503 * instruction. The DPL specified by the guest OS for these vectors is NOT
504 * CHECKED!!
505 */
506 if ( (regs->error_code & 3) == 2 )
507 {
508 /* This fault must be due to <INT n> instruction. */
509 ti = current->thread.traps + (regs->error_code>>3);
510 if ( TI_GET_DPL(ti) >= (regs->cs & 3) )
511 {
512 tb->flags = TBF_EXCEPTION;
513 regs->eip += 2;
514 goto finish_propagation;
515 }
516 }
518 #if defined(__i386__)
519 if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments) &&
520 (regs->error_code == 0) &&
521 gpf_emulate_4gb(regs) )
522 return 0;
523 #endif
525 /* Pass on GPF as is. */
526 ti = current->thread.traps + 13;
527 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
528 tb->error_code = regs->error_code;
529 finish_propagation:
530 tb->cs = ti->cs;
531 tb->eip = ti->address;
532 if ( TI_GET_IF(ti) )
533 d->shared_info->vcpu_data[0].evtchn_upcall_mask = 1;
534 return 0;
536 gp_in_kernel:
538 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
539 {
540 DPRINTK("GPF (%04x): %08x -> %08lx\n",
541 regs->error_code, regs->eip, fixup);
542 regs->eip = fixup;
543 return 0;
544 }
546 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
548 show_registers(regs);
549 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
550 smp_processor_id(), regs->error_code);
551 return 0;
552 }
554 asmlinkage void mem_parity_error(struct xen_regs *regs)
555 {
556 console_force_unlock();
557 printk("\n\nNMI - MEMORY ERROR\n");
558 fatal_trap(TRAP_nmi, regs);
559 }
561 asmlinkage void io_check_error(struct xen_regs *regs)
562 {
563 console_force_unlock();
565 printk("\n\nNMI - I/O ERROR\n");
566 fatal_trap(TRAP_nmi, regs);
567 }
569 static void unknown_nmi_error(unsigned char reason, struct xen_regs * regs)
570 {
571 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
572 printk("Dazed and confused, but trying to continue\n");
573 printk("Do you have a strange power saving mode enabled?\n");
574 }
576 asmlinkage void do_nmi(struct xen_regs * regs, unsigned long reason)
577 {
578 ++nmi_count(smp_processor_id());
580 #if CONFIG_X86_LOCAL_APIC
581 if ( nmi_watchdog )
582 nmi_watchdog_tick(regs);
583 else
584 #endif
585 unknown_nmi_error((unsigned char)(reason&0xff), regs);
586 }
588 unsigned long nmi_softirq_reason;
589 static void nmi_softirq(void)
590 {
591 if ( dom0 == NULL )
592 return;
594 if ( test_and_clear_bit(0, &nmi_softirq_reason) )
595 send_guest_virq(dom0, VIRQ_PARITY_ERR);
597 if ( test_and_clear_bit(1, &nmi_softirq_reason) )
598 send_guest_virq(dom0, VIRQ_IO_ERR);
599 }
601 asmlinkage int math_state_restore(struct xen_regs *regs)
602 {
603 /* Prevent recursion. */
604 clts();
606 if ( !test_bit(DF_USEDFPU, &current->flags) )
607 {
608 if ( test_bit(DF_DONEFPUINIT, &current->flags) )
609 restore_fpu(current);
610 else
611 init_fpu();
612 set_bit(DF_USEDFPU, &current->flags); /* so we fnsave on switch_to() */
613 }
615 if ( test_and_clear_bit(DF_GUEST_STTS, &current->flags) )
616 {
617 struct trap_bounce *tb = &current->thread.trap_bounce;
618 tb->flags = TBF_EXCEPTION;
619 tb->cs = current->thread.traps[7].cs;
620 tb->eip = current->thread.traps[7].address;
621 }
623 return EXCRET_fault_fixed;
624 }
626 asmlinkage int do_debug(struct xen_regs *regs)
627 {
628 unsigned int condition;
629 struct domain *d = current;
630 struct trap_bounce *tb = &d->thread.trap_bounce;
632 DEBUGGER_trap_entry(TRAP_debug, regs);
634 __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
636 /* Mask out spurious debug traps due to lazy DR7 setting */
637 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
638 (d->thread.debugreg[7] == 0) )
639 {
640 __asm__("movl %0,%%db7" : : "r" (0));
641 goto out;
642 }
644 if ( (regs->cs & 3) == 0 )
645 {
646 /* Clear TF just for absolute sanity. */
647 regs->eflags &= ~EF_TF;
648 /*
649 * We ignore watchpoints when they trigger within Xen. This may happen
650 * when a buffer is passed to us which previously had a watchpoint set
651 * on it. No need to bump EIP; the only faulting trap is an instruction
652 * breakpoint, which can't happen to us.
653 */
654 goto out;
655 }
657 /* Save debug status register where guest OS can peek at it */
658 d->thread.debugreg[6] = condition;
660 tb->flags = TBF_EXCEPTION;
661 tb->cs = d->thread.traps[1].cs;
662 tb->eip = d->thread.traps[1].address;
664 out:
665 return EXCRET_not_a_fault;
666 }
668 asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs)
669 {
670 return EXCRET_not_a_fault;
671 }
673 #define _set_gate(gate_addr,type,dpl,addr) \
674 do { \
675 int __d0, __d1; \
676 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
677 "movw %4,%%dx\n\t" \
678 "movl %%eax,%0\n\t" \
679 "movl %%edx,%1" \
680 :"=m" (*((long *) (gate_addr))), \
681 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
682 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
683 "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
684 } while (0)
686 void set_intr_gate(unsigned int n, void *addr)
687 {
688 _set_gate(idt_table+n,14,0,addr);
689 }
691 static void __init set_system_gate(unsigned int n, void *addr)
692 {
693 _set_gate(idt_table+n,14,3,addr);
694 }
696 static void set_task_gate(unsigned int n, unsigned int sel)
697 {
698 idt_table[n].a = sel << 16;
699 idt_table[n].b = 0x8500;
700 }
702 #define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
703 *((gate_addr)+1) = ((base) & 0xff000000) | \
704 (((base) & 0x00ff0000)>>16) | \
705 ((limit) & 0xf0000) | \
706 ((dpl)<<13) | \
707 (0x00408000) | \
708 ((type)<<8); \
709 *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
710 ((limit) & 0x0ffff); }
712 #define _set_tssldt_desc(n,addr,limit,type) \
713 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
714 "movw %%ax,2(%2)\n\t" \
715 "rorl $16,%%eax\n\t" \
716 "movb %%al,4(%2)\n\t" \
717 "movb %4,5(%2)\n\t" \
718 "movb $0,6(%2)\n\t" \
719 "movb %%ah,7(%2)\n\t" \
720 "rorl $16,%%eax" \
721 : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
723 void set_tss_desc(unsigned int n, void *addr)
724 {
725 _set_tssldt_desc(
726 gdt_table + __TSS(n),
727 (int)addr,
728 offsetof(struct tss_struct, __cacheline_filler) - 1,
729 0x89);
730 }
732 void __init trap_init(void)
733 {
734 /*
735 * Make a separate task for double faults. This will get us debug output if
736 * we blow the kernel stack.
737 */
738 struct tss_struct *tss = &doublefault_tss;
739 memset(tss, 0, sizeof(*tss));
740 tss->ds = __HYPERVISOR_DS;
741 tss->es = __HYPERVISOR_DS;
742 tss->ss = __HYPERVISOR_DS;
743 tss->esp = (unsigned long)
744 &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
745 tss->__cr3 = __pa(idle_pg_table);
746 tss->cs = __HYPERVISOR_CS;
747 tss->eip = (unsigned long)do_double_fault;
748 tss->eflags = 2;
749 tss->bitmap = IOBMP_INVALID_OFFSET;
750 _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
751 (int)tss, 235, 0x89);
753 /*
754 * Note that interrupt gates are always used, rather than trap gates. We
755 * must have interrupts disabled until DS/ES/FS/GS are saved because the
756 * first activation must have the "bad" value(s) for these registers and
757 * we may lose them if another activation is installed before they are
758 * saved. The page-fault handler also needs interrupts disabled until %cr2
759 * has been read and saved on the stack.
760 */
761 set_intr_gate(TRAP_divide_error,&divide_error);
762 set_intr_gate(TRAP_debug,&debug);
763 set_intr_gate(TRAP_nmi,&nmi);
764 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
765 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
766 set_intr_gate(TRAP_bounds,&bounds);
767 set_intr_gate(TRAP_invalid_op,&invalid_op);
768 set_intr_gate(TRAP_no_device,&device_not_available);
769 set_task_gate(TRAP_double_fault,__DOUBLEFAULT_TSS_ENTRY<<3);
770 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
771 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
772 set_intr_gate(TRAP_no_segment,&segment_not_present);
773 set_intr_gate(TRAP_stack_error,&stack_segment);
774 set_intr_gate(TRAP_gp_fault,&general_protection);
775 set_intr_gate(TRAP_page_fault,&page_fault);
776 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
777 set_intr_gate(TRAP_copro_error,&coprocessor_error);
778 set_intr_gate(TRAP_alignment_check,&alignment_check);
779 set_intr_gate(TRAP_machine_check,&machine_check);
780 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
782 /* Only ring 1 can access Xen services. */
783 _set_gate(idt_table+HYPERCALL_VECTOR,14,1,&hypercall);
785 /* CPU0 uses the master IDT. */
786 idt_tables[0] = idt_table;
788 /*
789 * Should be a barrier for any external CPU state.
790 */
791 {
792 extern void cpu_init(void);
793 cpu_init();
794 }
796 open_softirq(NMI_SOFTIRQ, nmi_softirq);
797 }
800 long do_set_trap_table(trap_info_t *traps)
801 {
802 trap_info_t cur;
803 trap_info_t *dst = current->thread.traps;
805 for ( ; ; )
806 {
807 hypercall_may_preempt(__HYPERVISOR_set_trap_table, 1, traps);
809 if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
811 if ( cur.address == 0 ) break;
813 if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
815 memcpy(dst+cur.vector, &cur, sizeof(cur));
816 traps++;
817 }
819 return 0;
820 }
823 long do_set_callbacks(unsigned long event_selector,
824 unsigned long event_address,
825 unsigned long failsafe_selector,
826 unsigned long failsafe_address)
827 {
828 struct domain *d = current;
830 if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
831 return -EPERM;
833 d->thread.event_selector = event_selector;
834 d->thread.event_address = event_address;
835 d->thread.failsafe_selector = failsafe_selector;
836 d->thread.failsafe_address = failsafe_address;
838 return 0;
839 }
842 long set_fast_trap(struct domain *p, int idx)
843 {
844 trap_info_t *ti;
846 /* Index 0 is special: it disables fast traps. */
847 if ( idx == 0 )
848 {
849 if ( p == current )
850 CLEAR_FAST_TRAP(&p->thread);
851 SET_DEFAULT_FAST_TRAP(&p->thread);
852 return 0;
853 }
855 /*
856 * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
857 * The former range is used by Windows and MS-DOS.
858 * Vector 0x80 is used by Linux and the BSD variants.
859 */
860 if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
861 return -1;
863 ti = p->thread.traps + idx;
865 /*
866 * We can't virtualise interrupt gates, as there's no way to get
867 * the CPU to automatically clear the events_mask variable.
868 */
869 if ( TI_GET_IF(ti) )
870 return -1;
872 if ( p == current )
873 CLEAR_FAST_TRAP(&p->thread);
875 p->thread.fast_trap_idx = idx;
876 p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
877 p->thread.fast_trap_desc.b =
878 (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
880 if ( p == current )
881 SET_FAST_TRAP(&p->thread);
883 return 0;
884 }
887 long do_set_fast_trap(int idx)
888 {
889 return set_fast_trap(current, idx);
890 }
893 long do_fpu_taskswitch(void)
894 {
895 set_bit(DF_GUEST_STTS, &current->flags);
896 stts();
897 return 0;
898 }
901 long set_debugreg(struct domain *p, int reg, unsigned long value)
902 {
903 int i;
905 switch ( reg )
906 {
907 case 0:
908 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
909 if ( p == current )
910 __asm__ ( "movl %0, %%db0" : : "r" (value) );
911 break;
912 case 1:
913 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
914 if ( p == current )
915 __asm__ ( "movl %0, %%db1" : : "r" (value) );
916 break;
917 case 2:
918 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
919 if ( p == current )
920 __asm__ ( "movl %0, %%db2" : : "r" (value) );
921 break;
922 case 3:
923 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
924 if ( p == current )
925 __asm__ ( "movl %0, %%db3" : : "r" (value) );
926 break;
927 case 6:
928 /*
929 * DR6: Bits 4-11,16-31 reserved (set to 1).
930 * Bit 12 reserved (set to 0).
931 */
932 value &= 0xffffefff; /* reserved bits => 0 */
933 value |= 0xffff0ff0; /* reserved bits => 1 */
934 if ( p == current )
935 __asm__ ( "movl %0, %%db6" : : "r" (value) );
936 break;
937 case 7:
938 /*
939 * DR7: Bit 10 reserved (set to 1).
940 * Bits 11-12,14-15 reserved (set to 0).
941 * Privileged bits:
942 * GD (bit 13): must be 0.
943 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
944 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
945 */
946 /* DR7 == 0 => debugging disabled for this domain. */
947 if ( value != 0 )
948 {
949 value &= 0xffff27ff; /* reserved bits => 0 */
950 value |= 0x00000400; /* reserved bits => 1 */
951 if ( (value & (1<<13)) != 0 ) return -EPERM;
952 for ( i = 0; i < 16; i += 2 )
953 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
954 }
955 if ( p == current )
956 __asm__ ( "movl %0, %%db7" : : "r" (value) );
957 break;
958 default:
959 return -EINVAL;
960 }
962 p->thread.debugreg[reg] = value;
963 return 0;
964 }
966 long do_set_debugreg(int reg, unsigned long value)
967 {
968 return set_debugreg(current, reg, value);
969 }
971 unsigned long do_get_debugreg(int reg)
972 {
973 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
974 return current->thread.debugreg[reg];
975 }
977 #endif /* __i386__ */