debuggers.hg

view xen/arch/x86/traps.c @ 3349:c754bd0be650

bitkeeper revision 1.1159.1.496 (41c85faeMBUejFtICiJueb_Xdh8yJA)

Priv-op emulation in Xen, for RDMSR/WRMSR/WBINVD. Cleaned up Linux
a bit as a result.
author kaf24@scramble.cl.cam.ac.uk
date Tue Dec 21 17:38:54 2004 +0000 (2004-12-21)
parents dda5ab69e74a
children b2fa96909734
line source
1 /******************************************************************************
2 * arch/i386/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <asm/regs.h>
36 #include <xen/delay.h>
37 #include <xen/event.h>
38 #include <xen/spinlock.h>
39 #include <xen/irq.h>
40 #include <xen/perfc.h>
41 #include <xen/softirq.h>
42 #include <asm/shadow.h>
43 #include <asm/domain_page.h>
44 #include <asm/system.h>
45 #include <asm/io.h>
46 #include <asm/atomic.h>
47 #include <asm/desc.h>
48 #include <asm/debugreg.h>
49 #include <asm/smp.h>
50 #include <asm/flushtlb.h>
51 #include <asm/uaccess.h>
52 #include <asm/i387.h>
53 #include <asm/debugger.h>
54 #include <asm/msr.h>
56 #if defined(__i386__)
58 #define GUEST_FAULT(_r) (likely(VM86_MODE(_r) || !RING_0(_r)))
60 #define DOUBLEFAULT_STACK_SIZE 1024
61 static struct tss_struct doublefault_tss;
62 static unsigned char doublefault_stack[DOUBLEFAULT_STACK_SIZE];
64 asmlinkage int hypercall(void);
66 /* Master table, and the one used by CPU0. */
67 struct desc_struct idt_table[256] = { {0, 0}, };
68 /* All other CPUs have their own copy. */
69 struct desc_struct *idt_tables[NR_CPUS] = { 0 };
71 asmlinkage void divide_error(void);
72 asmlinkage void debug(void);
73 asmlinkage void nmi(void);
74 asmlinkage void int3(void);
75 asmlinkage void overflow(void);
76 asmlinkage void bounds(void);
77 asmlinkage void invalid_op(void);
78 asmlinkage void device_not_available(void);
79 asmlinkage void coprocessor_segment_overrun(void);
80 asmlinkage void invalid_TSS(void);
81 asmlinkage void segment_not_present(void);
82 asmlinkage void stack_segment(void);
83 asmlinkage void general_protection(void);
84 asmlinkage void page_fault(void);
85 asmlinkage void coprocessor_error(void);
86 asmlinkage void simd_coprocessor_error(void);
87 asmlinkage void alignment_check(void);
88 asmlinkage void spurious_interrupt_bug(void);
89 asmlinkage void machine_check(void);
91 int kstack_depth_to_print = 8*20;
93 static inline int kernel_text_address(unsigned long addr)
94 {
95 if (addr >= (unsigned long) &_stext &&
96 addr <= (unsigned long) &_etext)
97 return 1;
98 return 0;
100 }
102 void show_guest_stack()
103 {
104 int i;
105 execution_context_t *ec = get_execution_context();
106 unsigned long *stack = (unsigned long *)ec->esp;
107 printk("Guest EIP is %lx\n",ec->eip);
109 for ( i = 0; i < kstack_depth_to_print; i++ )
110 {
111 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
112 break;
113 if ( i && ((i % 8) == 0) )
114 printk("\n ");
115 printk("%08lx ", *stack++);
116 }
117 printk("\n");
119 }
121 void show_trace(unsigned long *esp)
122 {
123 unsigned long *stack, addr;
124 int i;
126 printk("Call Trace from ESP=%p: ", esp);
127 stack = esp;
128 i = 0;
129 while (((long) stack & (STACK_SIZE-1)) != 0) {
130 addr = *stack++;
131 if (kernel_text_address(addr)) {
132 if (i && ((i % 6) == 0))
133 printk("\n ");
134 printk("[<%08lx>] ", addr);
135 i++;
136 }
137 }
138 printk("\n");
139 }
141 void show_stack(unsigned long *esp)
142 {
143 unsigned long *stack;
144 int i;
146 printk("Stack trace from ESP=%p:\n", esp);
148 stack = esp;
149 for ( i = 0; i < kstack_depth_to_print; i++ )
150 {
151 if ( ((long)stack & (STACK_SIZE-1)) == 0 )
152 break;
153 if ( i && ((i % 8) == 0) )
154 printk("\n ");
155 if ( kernel_text_address(*stack) )
156 printk("[%08lx] ", *stack++);
157 else
158 printk("%08lx ", *stack++);
159 }
160 printk("\n");
162 show_trace( esp );
163 }
165 void show_registers(struct xen_regs *regs)
166 {
167 unsigned long esp;
168 unsigned short ss, ds, es, fs, gs;
170 if ( GUEST_FAULT(regs) )
171 {
172 esp = regs->esp;
173 ss = regs->ss & 0xffff;
174 ds = regs->ds & 0xffff;
175 es = regs->es & 0xffff;
176 fs = regs->fs & 0xffff;
177 gs = regs->gs & 0xffff;
178 }
179 else
180 {
181 esp = (unsigned long)(&regs->esp);
182 ss = __HYPERVISOR_DS;
183 ds = __HYPERVISOR_DS;
184 es = __HYPERVISOR_DS;
185 fs = __HYPERVISOR_DS;
186 gs = __HYPERVISOR_DS;
187 }
189 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
190 smp_processor_id(), 0xffff & regs->cs, regs->eip, regs->eflags);
191 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
192 regs->eax, regs->ebx, regs->ecx, regs->edx);
193 printk("esi: %08x edi: %08x ebp: %08x esp: %08lx\n",
194 regs->esi, regs->edi, regs->ebp, esp);
195 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
196 ds, es, fs, gs, ss);
198 show_stack((unsigned long *)&regs->esp);
199 }
201 /*
202 * This is called for faults at very unexpected times (e.g., when interrupts
203 * are disabled). In such situations we can't do much that is safe. We try to
204 * print out some tracing and then we just spin.
205 */
206 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
207 {
208 int cpu = smp_processor_id();
209 unsigned long cr2;
210 static char *trapstr[] = {
211 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
212 "invalid operation", "device not available", "double fault",
213 "coprocessor segment", "invalid tss", "segment not found",
214 "stack error", "general protection fault", "page fault",
215 "spurious interrupt", "coprocessor error", "alignment check",
216 "machine check", "simd error"
217 };
219 show_registers(regs);
221 if ( trapnr == TRAP_page_fault )
222 {
223 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (cr2) : );
224 printk("Faulting linear address might be %08lx\n", cr2);
225 }
227 printk("************************************\n");
228 printk("CPU%d FATAL TRAP %d (%s), ERROR_CODE %04x%s.\n",
229 cpu, trapnr, trapstr[trapnr], regs->error_code,
230 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
231 printk("System shutting down -- need manual reset.\n");
232 printk("************************************\n");
234 /* Lock up the console to prevent spurious output from other CPUs. */
235 console_force_lock();
237 /* Wait for manual reset. */
238 for ( ; ; )
239 __asm__ __volatile__ ( "hlt" );
240 }
242 static inline int do_trap(int trapnr, char *str,
243 struct xen_regs *regs,
244 int use_error_code)
245 {
246 struct exec_domain *ed = current;
247 struct trap_bounce *tb = &ed->thread.trap_bounce;
248 trap_info_t *ti;
249 unsigned long fixup;
251 DEBUGGER_trap_entry(trapnr, regs);
253 if ( !GUEST_FAULT(regs) )
254 goto xen_fault;
256 ti = current->thread.traps + trapnr;
257 tb->flags = TBF_EXCEPTION;
258 tb->cs = ti->cs;
259 tb->eip = ti->address;
260 if ( use_error_code )
261 {
262 tb->flags |= TBF_EXCEPTION_ERRCODE;
263 tb->error_code = regs->error_code;
264 }
265 if ( TI_GET_IF(ti) )
266 ed->vcpu_info->evtchn_upcall_mask = 1;
267 return 0;
269 xen_fault:
271 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
272 {
273 DPRINTK("Trap %d: %08x -> %08lx\n", trapnr, regs->eip, fixup);
274 regs->eip = fixup;
275 return 0;
276 }
278 DEBUGGER_trap_fatal(trapnr, regs);
280 show_registers(regs);
281 panic("CPU%d FATAL TRAP: vector = %d (%s)\n"
282 "[error_code=%04x]\n",
283 smp_processor_id(), trapnr, str, regs->error_code);
284 return 0;
285 }
287 #define DO_ERROR_NOCODE(trapnr, str, name) \
288 asmlinkage int do_##name(struct xen_regs *regs) \
289 { \
290 return do_trap(trapnr, str, regs, 0); \
291 }
293 #define DO_ERROR(trapnr, str, name) \
294 asmlinkage int do_##name(struct xen_regs *regs) \
295 { \
296 return do_trap(trapnr, str, regs, 1); \
297 }
299 DO_ERROR_NOCODE( 0, "divide error", divide_error)
300 DO_ERROR_NOCODE( 4, "overflow", overflow)
301 DO_ERROR_NOCODE( 5, "bounds", bounds)
302 DO_ERROR_NOCODE( 6, "invalid operand", invalid_op)
303 DO_ERROR_NOCODE( 9, "coprocessor segment overrun", coprocessor_segment_overrun)
304 DO_ERROR(10, "invalid TSS", invalid_TSS)
305 DO_ERROR(11, "segment not present", segment_not_present)
306 DO_ERROR(12, "stack segment", stack_segment)
307 DO_ERROR_NOCODE(16, "fpu error", coprocessor_error)
308 DO_ERROR(17, "alignment check", alignment_check)
309 DO_ERROR_NOCODE(19, "simd error", simd_coprocessor_error)
311 asmlinkage int do_int3(struct xen_regs *regs)
312 {
313 struct exec_domain *ed = current;
314 struct trap_bounce *tb = &ed->thread.trap_bounce;
315 trap_info_t *ti;
317 DEBUGGER_trap_entry(TRAP_int3, regs);
319 if ( !GUEST_FAULT(regs) )
320 {
321 DEBUGGER_trap_fatal(TRAP_int3, regs);
322 show_registers(regs);
323 panic("CPU%d FATAL TRAP: vector = 3 (Int3)\n", smp_processor_id());
324 }
326 ti = current->thread.traps + 3;
327 tb->flags = TBF_EXCEPTION;
328 tb->cs = ti->cs;
329 tb->eip = ti->address;
330 if ( TI_GET_IF(ti) )
331 ed->vcpu_info->evtchn_upcall_mask = 1;
333 return 0;
334 }
336 asmlinkage void do_double_fault(void)
337 {
338 struct tss_struct *tss = &doublefault_tss;
339 unsigned int cpu = ((tss->back_link>>3)-__FIRST_TSS_ENTRY)>>1;
341 /* Disable the NMI watchdog. It's useless now. */
342 watchdog_on = 0;
344 /* Find information saved during fault and dump it to the console. */
345 tss = &init_tss[cpu];
346 printk("CPU: %d\nEIP: %04x:[<%08x>] \nEFLAGS: %08x\n",
347 cpu, tss->cs, tss->eip, tss->eflags);
348 printk("CR3: %08x\n", tss->__cr3);
349 printk("eax: %08x ebx: %08x ecx: %08x edx: %08x\n",
350 tss->eax, tss->ebx, tss->ecx, tss->edx);
351 printk("esi: %08x edi: %08x ebp: %08x esp: %08x\n",
352 tss->esi, tss->edi, tss->ebp, tss->esp);
353 printk("ds: %04x es: %04x fs: %04x gs: %04x ss: %04x\n",
354 tss->ds, tss->es, tss->fs, tss->gs, tss->ss);
355 printk("************************************\n");
356 printk("CPU%d DOUBLE FAULT -- system shutdown\n", cpu);
357 printk("System needs manual reset.\n");
358 printk("************************************\n");
360 /* Lock up the console to prevent spurious output from other CPUs. */
361 console_force_lock();
363 /* Wait for manual reset. */
364 for ( ; ; )
365 __asm__ __volatile__ ( "hlt" );
366 }
368 asmlinkage void do_machine_check(struct xen_regs *regs)
369 {
370 fatal_trap(TRAP_machine_check, regs);
371 }
373 asmlinkage int do_page_fault(struct xen_regs *regs)
374 {
375 trap_info_t *ti;
376 unsigned long off, addr, fixup;
377 struct exec_domain *ed = current;
378 struct domain *d = ed->domain;
379 extern int map_ldt_shadow_page(unsigned int);
380 struct trap_bounce *tb = &ed->thread.trap_bounce;
381 int cpu = ed->processor;
382 int ret;
384 __asm__ __volatile__ ("movl %%cr2,%0" : "=r" (addr) : );
386 DEBUGGER_trap_entry(TRAP_page_fault, regs);
388 perfc_incrc(page_faults);
390 if ( likely(VM_ASSIST(d, VMASST_TYPE_writable_pagetables)) )
391 {
392 LOCK_BIGLOCK(d);
393 if ( unlikely(ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l1va) &&
394 unlikely((addr >> L2_PAGETABLE_SHIFT) ==
395 ptwr_info[cpu].ptinfo[PTWR_PT_ACTIVE].l2_idx) )
396 {
397 ptwr_flush(PTWR_PT_ACTIVE);
398 UNLOCK_BIGLOCK(d);
399 return EXCRET_fault_fixed;
400 }
402 if ( (addr < PAGE_OFFSET) &&
403 ((regs->error_code & 3) == 3) && /* write-protection fault */
404 ptwr_do_page_fault(addr) )
405 {
406 if ( unlikely(ed->mm.shadow_mode) )
407 (void)shadow_fault(addr, regs->error_code);
408 UNLOCK_BIGLOCK(d);
409 return EXCRET_fault_fixed;
410 }
411 UNLOCK_BIGLOCK(d);
412 }
414 if ( unlikely(ed->mm.shadow_mode) &&
415 (addr < PAGE_OFFSET) && shadow_fault(addr, regs->error_code) )
416 return EXCRET_fault_fixed;
418 if ( unlikely(addr >= LDT_VIRT_START(ed)) &&
419 (addr < (LDT_VIRT_START(ed) + (ed->mm.ldt_ents*LDT_ENTRY_SIZE))) )
420 {
421 /*
422 * Copy a mapping from the guest's LDT, if it is valid. Otherwise we
423 * send the fault up to the guest OS to be handled.
424 */
425 LOCK_BIGLOCK(d);
426 off = addr - LDT_VIRT_START(ed);
427 addr = ed->mm.ldt_base + off;
428 ret = map_ldt_shadow_page(off >> PAGE_SHIFT);
429 UNLOCK_BIGLOCK(d);
430 if ( likely(ret) )
431 return EXCRET_fault_fixed; /* successfully copied the mapping */
432 }
434 if ( !GUEST_FAULT(regs) )
435 goto xen_fault;
437 ti = ed->thread.traps + 14;
438 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE | TBF_EXCEPTION_CR2;
439 tb->cr2 = addr;
440 tb->error_code = regs->error_code;
441 tb->cs = ti->cs;
442 tb->eip = ti->address;
443 if ( TI_GET_IF(ti) )
444 ed->vcpu_info->evtchn_upcall_mask = 1;
445 return 0;
447 xen_fault:
449 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
450 {
451 perfc_incrc(copy_user_faults);
452 if ( !ed->mm.shadow_mode )
453 DPRINTK("Page fault: %08x -> %08lx\n", regs->eip, fixup);
454 regs->eip = fixup;
455 return 0;
456 }
458 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
460 if ( addr >= PAGE_OFFSET )
461 {
462 unsigned long page;
463 page = l2_pgentry_val(idle_pg_table[addr >> L2_PAGETABLE_SHIFT]);
464 printk("*pde = %08lx\n", page);
465 if ( page & _PAGE_PRESENT )
466 {
467 page &= PAGE_MASK;
468 page = ((unsigned long *) __va(page))[(addr&0x3ff000)>>PAGE_SHIFT];
469 printk(" *pte = %08lx\n", page);
470 }
471 #ifdef MEMORY_GUARD
472 if ( !(regs->error_code & 1) )
473 printk(" -- POSSIBLY AN ACCESS TO FREED MEMORY? --\n");
474 #endif
475 }
477 show_registers(regs);
478 panic("CPU%d FATAL PAGE FAULT\n"
479 "[error_code=%04x]\n"
480 "Faulting linear address might be %08lx\n",
481 smp_processor_id(), regs->error_code, addr);
482 return 0;
483 }
485 static int emulate_privileged_op(struct xen_regs *regs)
486 {
487 u16 opcode;
489 if ( get_user(opcode, (u16 *)regs->eip) || ((opcode & 0xff) != 0x0f) )
490 return 0;
492 switch ( opcode >> 8 )
493 {
494 case 0x09: /* WBINVD */
495 if ( !IS_CAPABLE_PHYSDEV(current->domain) )
496 {
497 DPRINTK("Non-physdev domain attempted WBINVD.\n");
498 return 0;
499 }
500 wbinvd();
501 regs->eip += 2;
502 return 1;
504 case 0x30: /* WRMSR */
505 if ( !IS_PRIV(current->domain) )
506 {
507 DPRINTK("Non-priv domain attempted WRMSR.\n");
508 return 0;
509 }
510 wrmsr(regs->ecx, regs->eax, regs->edx);
511 regs->eip += 2;
512 return 1;
514 case 0x32: /* RDMSR */
515 if ( !IS_PRIV(current->domain) )
516 {
517 DPRINTK("Non-priv domain attempted RDMSR.\n");
518 return 0;
519 }
520 rdmsr(regs->ecx, regs->eax, regs->edx);
521 regs->eip += 2;
522 return 1;
523 }
525 return 0;
526 }
528 asmlinkage int do_general_protection(struct xen_regs *regs)
529 {
530 struct exec_domain *ed = current;
531 struct domain *d = ed->domain;
532 struct trap_bounce *tb = &ed->thread.trap_bounce;
533 trap_info_t *ti;
534 unsigned long fixup;
536 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
538 if ( regs->error_code & 1 )
539 goto hardware_gp;
541 if ( !GUEST_FAULT(regs) )
542 goto gp_in_kernel;
544 /*
545 * Cunning trick to allow arbitrary "INT n" handling.
546 *
547 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
548 * instruction from trapping to the appropriate vector, when that might not
549 * be expected by Xen or the guest OS. For example, that entry might be for
550 * a fault handler (unlike traps, faults don't increment EIP), or might
551 * expect an error code on the stack (which a software trap never
552 * provides), or might be a hardware interrupt handler that doesn't like
553 * being called spuriously.
554 *
555 * Instead, a GPF occurs with the faulting IDT vector in the error code.
556 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
557 * clear to indicate that it's a software fault, not hardware.
558 *
559 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
560 * okay because they can only be triggered by an explicit DPL-checked
561 * instruction. The DPL specified by the guest OS for these vectors is NOT
562 * CHECKED!!
563 */
564 if ( (regs->error_code & 3) == 2 )
565 {
566 /* This fault must be due to <INT n> instruction. */
567 ti = current->thread.traps + (regs->error_code>>3);
568 if ( TI_GET_DPL(ti) >= (VM86_MODE(regs) ? 3 : (regs->cs & 3)) )
569 {
570 tb->flags = TBF_EXCEPTION;
571 regs->eip += 2;
572 goto finish_propagation;
573 }
574 }
576 /* Emulate some simple privileged instructions when exec'ed in ring 1. */
577 if ( (regs->error_code == 0) &&
578 RING_1(regs) &&
579 emulate_privileged_op(regs) )
580 return 0;
582 #if defined(__i386__)
583 if ( VM_ASSIST(d, VMASST_TYPE_4gb_segments) &&
584 (regs->error_code == 0) &&
585 gpf_emulate_4gb(regs) )
586 return 0;
587 #endif
589 /* Pass on GPF as is. */
590 ti = current->thread.traps + 13;
591 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
592 tb->error_code = regs->error_code;
593 finish_propagation:
594 tb->cs = ti->cs;
595 tb->eip = ti->address;
596 if ( TI_GET_IF(ti) )
597 ed->vcpu_info->evtchn_upcall_mask = 1;
598 return 0;
600 gp_in_kernel:
602 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
603 {
604 DPRINTK("GPF (%04x): %08x -> %08lx\n",
605 regs->error_code, regs->eip, fixup);
606 regs->eip = fixup;
607 return 0;
608 }
610 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
612 hardware_gp:
613 show_registers(regs);
614 panic("CPU%d GENERAL PROTECTION FAULT\n[error_code=%04x]\n",
615 smp_processor_id(), regs->error_code);
616 return 0;
617 }
619 asmlinkage void mem_parity_error(struct xen_regs *regs)
620 {
621 console_force_unlock();
622 printk("\n\nNMI - MEMORY ERROR\n");
623 fatal_trap(TRAP_nmi, regs);
624 }
626 asmlinkage void io_check_error(struct xen_regs *regs)
627 {
628 console_force_unlock();
630 printk("\n\nNMI - I/O ERROR\n");
631 fatal_trap(TRAP_nmi, regs);
632 }
634 static void unknown_nmi_error(unsigned char reason, struct xen_regs * regs)
635 {
636 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
637 printk("Dazed and confused, but trying to continue\n");
638 printk("Do you have a strange power saving mode enabled?\n");
639 }
641 asmlinkage void do_nmi(struct xen_regs * regs, unsigned long reason)
642 {
643 ++nmi_count(smp_processor_id());
645 #if CONFIG_X86_LOCAL_APIC
646 if ( nmi_watchdog )
647 nmi_watchdog_tick(regs);
648 else
649 #endif
650 unknown_nmi_error((unsigned char)(reason&0xff), regs);
651 }
653 unsigned long nmi_softirq_reason;
654 static void nmi_softirq(void)
655 {
656 if ( dom0 == NULL )
657 return;
659 if ( test_and_clear_bit(0, &nmi_softirq_reason) )
660 send_guest_virq(dom0->exec_domain[0], VIRQ_PARITY_ERR);
662 if ( test_and_clear_bit(1, &nmi_softirq_reason) )
663 send_guest_virq(dom0->exec_domain[0], VIRQ_IO_ERR);
664 }
666 asmlinkage int math_state_restore(struct xen_regs *regs)
667 {
668 /* Prevent recursion. */
669 clts();
671 if ( !test_bit(EDF_USEDFPU, &current->ed_flags) )
672 {
673 if ( test_bit(EDF_DONEFPUINIT, &current->ed_flags) )
674 restore_fpu(current);
675 else
676 init_fpu();
677 set_bit(EDF_USEDFPU, &current->ed_flags); /* so we fnsave on switch_to() */
678 }
680 if ( test_and_clear_bit(EDF_GUEST_STTS, &current->ed_flags) )
681 {
682 struct trap_bounce *tb = &current->thread.trap_bounce;
683 tb->flags = TBF_EXCEPTION;
684 tb->cs = current->thread.traps[7].cs;
685 tb->eip = current->thread.traps[7].address;
686 }
688 return EXCRET_fault_fixed;
689 }
691 asmlinkage int do_debug(struct xen_regs *regs)
692 {
693 unsigned int condition;
694 struct exec_domain *d = current;
695 struct trap_bounce *tb = &d->thread.trap_bounce;
697 DEBUGGER_trap_entry(TRAP_debug, regs);
699 __asm__ __volatile__("movl %%db6,%0" : "=r" (condition));
701 /* Mask out spurious debug traps due to lazy DR7 setting */
702 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
703 (d->thread.debugreg[7] == 0) )
704 {
705 __asm__("movl %0,%%db7" : : "r" (0));
706 goto out;
707 }
709 if ( !GUEST_FAULT(regs) )
710 {
711 /* Clear TF just for absolute sanity. */
712 regs->eflags &= ~EF_TF;
713 /*
714 * We ignore watchpoints when they trigger within Xen. This may happen
715 * when a buffer is passed to us which previously had a watchpoint set
716 * on it. No need to bump EIP; the only faulting trap is an instruction
717 * breakpoint, which can't happen to us.
718 */
719 goto out;
720 }
722 /* Save debug status register where guest OS can peek at it */
723 d->thread.debugreg[6] = condition;
725 tb->flags = TBF_EXCEPTION;
726 tb->cs = d->thread.traps[1].cs;
727 tb->eip = d->thread.traps[1].address;
729 out:
730 return EXCRET_not_a_fault;
731 }
733 asmlinkage int do_spurious_interrupt_bug(struct xen_regs *regs)
734 {
735 return EXCRET_not_a_fault;
736 }
738 #define _set_gate(gate_addr,type,dpl,addr) \
739 do { \
740 int __d0, __d1; \
741 __asm__ __volatile__ ("movw %%dx,%%ax\n\t" \
742 "movw %4,%%dx\n\t" \
743 "movl %%eax,%0\n\t" \
744 "movl %%edx,%1" \
745 :"=m" (*((long *) (gate_addr))), \
746 "=m" (*(1+(long *) (gate_addr))), "=&a" (__d0), "=&d" (__d1) \
747 :"i" ((short) (0x8000+(dpl<<13)+(type<<8))), \
748 "3" ((char *) (addr)),"2" (__HYPERVISOR_CS << 16)); \
749 } while (0)
751 void set_intr_gate(unsigned int n, void *addr)
752 {
753 _set_gate(idt_table+n,14,0,addr);
754 }
756 static void __init set_system_gate(unsigned int n, void *addr)
757 {
758 _set_gate(idt_table+n,14,3,addr);
759 }
761 static void set_task_gate(unsigned int n, unsigned int sel)
762 {
763 idt_table[n].a = sel << 16;
764 idt_table[n].b = 0x8500;
765 }
767 #define _set_seg_desc(gate_addr,type,dpl,base,limit) {\
768 *((gate_addr)+1) = ((base) & 0xff000000) | \
769 (((base) & 0x00ff0000)>>16) | \
770 ((limit) & 0xf0000) | \
771 ((dpl)<<13) | \
772 (0x00408000) | \
773 ((type)<<8); \
774 *(gate_addr) = (((base) & 0x0000ffff)<<16) | \
775 ((limit) & 0x0ffff); }
777 #define _set_tssldt_desc(n,addr,limit,type) \
778 __asm__ __volatile__ ("movw %w3,0(%2)\n\t" \
779 "movw %%ax,2(%2)\n\t" \
780 "rorl $16,%%eax\n\t" \
781 "movb %%al,4(%2)\n\t" \
782 "movb %4,5(%2)\n\t" \
783 "movb $0,6(%2)\n\t" \
784 "movb %%ah,7(%2)\n\t" \
785 "rorl $16,%%eax" \
786 : "=m"(*(n)) : "a" (addr), "r"(n), "ir"(limit), "i"(type))
788 void set_tss_desc(unsigned int n, void *addr)
789 {
790 _set_tssldt_desc(
791 gdt_table + __TSS(n),
792 (int)addr,
793 offsetof(struct tss_struct, __cacheline_filler) - 1,
794 0x89);
795 }
797 void __init trap_init(void)
798 {
799 /*
800 * Make a separate task for double faults. This will get us debug output if
801 * we blow the kernel stack.
802 */
803 struct tss_struct *tss = &doublefault_tss;
804 memset(tss, 0, sizeof(*tss));
805 tss->ds = __HYPERVISOR_DS;
806 tss->es = __HYPERVISOR_DS;
807 tss->ss = __HYPERVISOR_DS;
808 tss->esp = (unsigned long)
809 &doublefault_stack[DOUBLEFAULT_STACK_SIZE];
810 tss->__cr3 = __pa(idle_pg_table);
811 tss->cs = __HYPERVISOR_CS;
812 tss->eip = (unsigned long)do_double_fault;
813 tss->eflags = 2;
814 tss->bitmap = IOBMP_INVALID_OFFSET;
815 _set_tssldt_desc(gdt_table+__DOUBLEFAULT_TSS_ENTRY,
816 (int)tss, 235, 0x89);
818 /*
819 * Note that interrupt gates are always used, rather than trap gates. We
820 * must have interrupts disabled until DS/ES/FS/GS are saved because the
821 * first activation must have the "bad" value(s) for these registers and
822 * we may lose them if another activation is installed before they are
823 * saved. The page-fault handler also needs interrupts disabled until %cr2
824 * has been read and saved on the stack.
825 */
826 set_intr_gate(TRAP_divide_error,&divide_error);
827 set_intr_gate(TRAP_debug,&debug);
828 set_intr_gate(TRAP_nmi,&nmi);
829 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
830 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
831 set_intr_gate(TRAP_bounds,&bounds);
832 set_intr_gate(TRAP_invalid_op,&invalid_op);
833 set_intr_gate(TRAP_no_device,&device_not_available);
834 set_task_gate(TRAP_double_fault,__DOUBLEFAULT_TSS_ENTRY<<3);
835 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
836 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
837 set_intr_gate(TRAP_no_segment,&segment_not_present);
838 set_intr_gate(TRAP_stack_error,&stack_segment);
839 set_intr_gate(TRAP_gp_fault,&general_protection);
840 set_intr_gate(TRAP_page_fault,&page_fault);
841 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
842 set_intr_gate(TRAP_copro_error,&coprocessor_error);
843 set_intr_gate(TRAP_alignment_check,&alignment_check);
844 set_intr_gate(TRAP_machine_check,&machine_check);
845 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
847 /* Only ring 1 can access Xen services. */
848 _set_gate(idt_table+HYPERCALL_VECTOR,14,1,&hypercall);
850 /* CPU0 uses the master IDT. */
851 idt_tables[0] = idt_table;
853 /*
854 * Should be a barrier for any external CPU state.
855 */
856 {
857 extern void cpu_init(void);
858 cpu_init();
859 }
861 open_softirq(NMI_SOFTIRQ, nmi_softirq);
862 }
865 long do_set_trap_table(trap_info_t *traps)
866 {
867 trap_info_t cur;
868 trap_info_t *dst = current->thread.traps;
870 LOCK_BIGLOCK(current->domain);
872 for ( ; ; )
873 {
874 if ( hypercall_preempt_check() )
875 {
876 UNLOCK_BIGLOCK(current->domain);
877 return hypercall_create_continuation(
878 __HYPERVISOR_set_trap_table, 1, traps);
879 }
881 if ( copy_from_user(&cur, traps, sizeof(cur)) ) return -EFAULT;
883 if ( cur.address == 0 ) break;
885 if ( !VALID_CODESEL(cur.cs) ) return -EPERM;
887 memcpy(dst+cur.vector, &cur, sizeof(cur));
888 traps++;
889 }
891 UNLOCK_BIGLOCK(current->domain);
893 return 0;
894 }
897 long do_set_callbacks(unsigned long event_selector,
898 unsigned long event_address,
899 unsigned long failsafe_selector,
900 unsigned long failsafe_address)
901 {
902 struct exec_domain *d = current;
904 if ( !VALID_CODESEL(event_selector) || !VALID_CODESEL(failsafe_selector) )
905 return -EPERM;
907 d->thread.event_selector = event_selector;
908 d->thread.event_address = event_address;
909 d->thread.failsafe_selector = failsafe_selector;
910 d->thread.failsafe_address = failsafe_address;
912 return 0;
913 }
916 long set_fast_trap(struct exec_domain *p, int idx)
917 {
918 trap_info_t *ti;
920 /* Index 0 is special: it disables fast traps. */
921 if ( idx == 0 )
922 {
923 if ( p == current )
924 CLEAR_FAST_TRAP(&p->thread);
925 SET_DEFAULT_FAST_TRAP(&p->thread);
926 return 0;
927 }
929 /*
930 * We only fast-trap vectors 0x20-0x2f, and vector 0x80.
931 * The former range is used by Windows and MS-DOS.
932 * Vector 0x80 is used by Linux and the BSD variants.
933 */
934 if ( (idx != 0x80) && ((idx < 0x20) || (idx > 0x2f)) )
935 return -1;
937 ti = p->thread.traps + idx;
939 /*
940 * We can't virtualise interrupt gates, as there's no way to get
941 * the CPU to automatically clear the events_mask variable.
942 */
943 if ( TI_GET_IF(ti) )
944 return -1;
946 if ( p == current )
947 CLEAR_FAST_TRAP(&p->thread);
949 p->thread.fast_trap_idx = idx;
950 p->thread.fast_trap_desc.a = (ti->cs << 16) | (ti->address & 0xffff);
951 p->thread.fast_trap_desc.b =
952 (ti->address & 0xffff0000) | 0x8f00 | (TI_GET_DPL(ti)&3)<<13;
954 if ( p == current )
955 SET_FAST_TRAP(&p->thread);
957 return 0;
958 }
961 long do_set_fast_trap(int idx)
962 {
963 return set_fast_trap(current, idx);
964 }
967 long do_fpu_taskswitch(void)
968 {
969 set_bit(EDF_GUEST_STTS, &current->ed_flags);
970 stts();
971 return 0;
972 }
975 long set_debugreg(struct exec_domain *p, int reg, unsigned long value)
976 {
977 int i;
979 switch ( reg )
980 {
981 case 0:
982 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
983 if ( p == current )
984 __asm__ ( "movl %0, %%db0" : : "r" (value) );
985 break;
986 case 1:
987 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
988 if ( p == current )
989 __asm__ ( "movl %0, %%db1" : : "r" (value) );
990 break;
991 case 2:
992 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
993 if ( p == current )
994 __asm__ ( "movl %0, %%db2" : : "r" (value) );
995 break;
996 case 3:
997 if ( value > (PAGE_OFFSET-4) ) return -EPERM;
998 if ( p == current )
999 __asm__ ( "movl %0, %%db3" : : "r" (value) );
1000 break;
1001 case 6:
1002 /*
1003 * DR6: Bits 4-11,16-31 reserved (set to 1).
1004 * Bit 12 reserved (set to 0).
1005 */
1006 value &= 0xffffefff; /* reserved bits => 0 */
1007 value |= 0xffff0ff0; /* reserved bits => 1 */
1008 if ( p == current )
1009 __asm__ ( "movl %0, %%db6" : : "r" (value) );
1010 break;
1011 case 7:
1012 /*
1013 * DR7: Bit 10 reserved (set to 1).
1014 * Bits 11-12,14-15 reserved (set to 0).
1015 * Privileged bits:
1016 * GD (bit 13): must be 0.
1017 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
1018 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
1019 */
1020 /* DR7 == 0 => debugging disabled for this domain. */
1021 if ( value != 0 )
1023 value &= 0xffff27ff; /* reserved bits => 0 */
1024 value |= 0x00000400; /* reserved bits => 1 */
1025 if ( (value & (1<<13)) != 0 ) return -EPERM;
1026 for ( i = 0; i < 16; i += 2 )
1027 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
1029 if ( p == current )
1030 __asm__ ( "movl %0, %%db7" : : "r" (value) );
1031 break;
1032 default:
1033 return -EINVAL;
1036 p->thread.debugreg[reg] = value;
1037 return 0;
1040 long do_set_debugreg(int reg, unsigned long value)
1042 return set_debugreg(current, reg, value);
1045 unsigned long do_get_debugreg(int reg)
1047 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
1048 return current->thread.debugreg[reg];
1051 #else
1053 asmlinkage void fatal_trap(int trapnr, struct xen_regs *regs)
1057 #endif /* __i386__ */