debuggers.hg

view xen/arch/x86/traps.c @ 13645:2a9b6b1f848f

32-on-64: Small fixes.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author kfraser@localhost.localdomain
date Thu Jan 25 13:05:15 2007 +0000 (2007-01-25)
parents 7ad205a162a8
children 01ec7dba9ff8
line source
1 /******************************************************************************
2 * arch/x86/traps.c
3 *
4 * Modifications to Linux original are copyright (c) 2002-2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 /*
22 * Copyright (C) 1991, 1992 Linus Torvalds
23 *
24 * Pentium III FXSR, SSE support
25 * Gareth Hughes <gareth@valinux.com>, May 2000
26 */
28 #include <xen/config.h>
29 #include <xen/init.h>
30 #include <xen/sched.h>
31 #include <xen/lib.h>
32 #include <xen/errno.h>
33 #include <xen/mm.h>
34 #include <xen/console.h>
35 #include <xen/shutdown.h>
36 #include <asm/regs.h>
37 #include <xen/delay.h>
38 #include <xen/event.h>
39 #include <xen/spinlock.h>
40 #include <xen/irq.h>
41 #include <xen/perfc.h>
42 #include <xen/softirq.h>
43 #include <xen/domain_page.h>
44 #include <xen/symbols.h>
45 #include <xen/iocap.h>
46 #include <xen/nmi.h>
47 #include <xen/version.h>
48 #include <xen/kexec.h>
49 #include <asm/shadow.h>
50 #include <asm/system.h>
51 #include <asm/io.h>
52 #include <asm/atomic.h>
53 #include <asm/desc.h>
54 #include <asm/debugreg.h>
55 #include <asm/smp.h>
56 #include <asm/flushtlb.h>
57 #include <asm/uaccess.h>
58 #include <asm/i387.h>
59 #include <asm/debugger.h>
60 #include <asm/msr.h>
61 #include <asm/shared.h>
62 #include <asm/x86_emulate.h>
63 #include <asm/hvm/vpt.h>
65 /*
66 * opt_nmi: one of 'ignore', 'dom0', or 'fatal'.
67 * fatal: Xen prints diagnostic message and then hangs.
68 * dom0: The NMI is virtualised to DOM0.
69 * ignore: The NMI error is cleared and ignored.
70 */
71 #ifdef NDEBUG
72 char opt_nmi[10] = "dom0";
73 #else
74 char opt_nmi[10] = "fatal";
75 #endif
76 string_param("nmi", opt_nmi);
78 /* Master table, used by all CPUs on x86/64, and by CPU0 on x86/32.*/
79 idt_entry_t idt_table[IDT_ENTRIES];
81 #define DECLARE_TRAP_HANDLER(_name) \
82 asmlinkage void _name(void); \
83 asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
85 asmlinkage void nmi(void);
86 DECLARE_TRAP_HANDLER(divide_error);
87 DECLARE_TRAP_HANDLER(debug);
88 DECLARE_TRAP_HANDLER(int3);
89 DECLARE_TRAP_HANDLER(overflow);
90 DECLARE_TRAP_HANDLER(bounds);
91 DECLARE_TRAP_HANDLER(invalid_op);
92 DECLARE_TRAP_HANDLER(device_not_available);
93 DECLARE_TRAP_HANDLER(coprocessor_segment_overrun);
94 DECLARE_TRAP_HANDLER(invalid_TSS);
95 DECLARE_TRAP_HANDLER(segment_not_present);
96 DECLARE_TRAP_HANDLER(stack_segment);
97 DECLARE_TRAP_HANDLER(general_protection);
98 DECLARE_TRAP_HANDLER(page_fault);
99 DECLARE_TRAP_HANDLER(coprocessor_error);
100 DECLARE_TRAP_HANDLER(simd_coprocessor_error);
101 DECLARE_TRAP_HANDLER(alignment_check);
102 DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
103 DECLARE_TRAP_HANDLER(machine_check);
105 long do_set_debugreg(int reg, unsigned long value);
106 unsigned long do_get_debugreg(int reg);
108 static int debug_stack_lines = 20;
109 integer_param("debug_stack_lines", debug_stack_lines);
111 #ifdef CONFIG_X86_32
112 #define stack_words_per_line 8
113 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)&regs->esp)
114 #else
115 #define stack_words_per_line 4
116 #define ESP_BEFORE_EXCEPTION(regs) ((unsigned long *)regs->rsp)
117 #endif
119 static void show_guest_stack(struct cpu_user_regs *regs)
120 {
121 int i;
122 unsigned long *stack, addr;
124 if ( is_hvm_vcpu(current) )
125 return;
127 if ( IS_COMPAT(container_of(regs, struct cpu_info, guest_cpu_user_regs)->current_vcpu->domain) )
128 {
129 compat_show_guest_stack(regs, debug_stack_lines);
130 return;
131 }
133 if ( vm86_mode(regs) )
134 {
135 stack = (unsigned long *)((regs->ss << 4) + (regs->esp & 0xffff));
136 printk("Guest stack trace from ss:sp = %04x:%04x (VM86)\n ",
137 regs->ss, (uint16_t)(regs->esp & 0xffff));
138 }
139 else
140 {
141 stack = (unsigned long *)regs->esp;
142 printk("Guest stack trace from "__OP"sp=%p:\n ", stack);
143 }
145 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
146 {
147 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
148 break;
149 if ( get_user(addr, stack) )
150 {
151 if ( i != 0 )
152 printk("\n ");
153 printk("Fault while accessing guest memory.");
154 i = 1;
155 break;
156 }
157 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
158 printk("\n ");
159 printk(" %p", _p(addr));
160 stack++;
161 }
162 if ( i == 0 )
163 printk("Stack empty.");
164 printk("\n");
165 }
167 #ifdef NDEBUG
169 static void show_trace(struct cpu_user_regs *regs)
170 {
171 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
173 printk("Xen call trace:\n ");
175 printk("[<%p>]", _p(regs->eip));
176 print_symbol(" %s\n ", regs->eip);
178 while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
179 {
180 addr = *stack++;
181 if ( is_kernel_text(addr) || is_kernel_inittext(addr) )
182 {
183 printk("[<%p>]", _p(addr));
184 print_symbol(" %s\n ", addr);
185 }
186 }
188 printk("\n");
189 }
191 #else
193 static void show_trace(struct cpu_user_regs *regs)
194 {
195 unsigned long *frame, next, addr, low, high;
197 printk("Xen call trace:\n ");
199 printk("[<%p>]", _p(regs->eip));
200 print_symbol(" %s\n ", regs->eip);
202 /* Bounds for range of valid frame pointer. */
203 low = (unsigned long)(ESP_BEFORE_EXCEPTION(regs) - 2);
204 high = (low & ~(STACK_SIZE - 1)) +
205 (STACK_SIZE - sizeof(struct cpu_info) - 2*sizeof(unsigned long));
207 /* The initial frame pointer. */
208 next = regs->ebp;
210 for ( ; ; )
211 {
212 /* Valid frame pointer? */
213 if ( (next < low) || (next >= high) )
214 {
215 /*
216 * Exception stack frames have a different layout, denoted by an
217 * inverted frame pointer.
218 */
219 next = ~next;
220 if ( (next < low) || (next >= high) )
221 break;
222 frame = (unsigned long *)next;
223 next = frame[0];
224 addr = frame[(offsetof(struct cpu_user_regs, eip) -
225 offsetof(struct cpu_user_regs, ebp))
226 / BYTES_PER_LONG];
227 }
228 else
229 {
230 /* Ordinary stack frame. */
231 frame = (unsigned long *)next;
232 next = frame[0];
233 addr = frame[1];
234 }
236 printk("[<%p>]", _p(addr));
237 print_symbol(" %s\n ", addr);
239 low = (unsigned long)&frame[2];
240 }
242 printk("\n");
243 }
245 #endif
247 void show_stack(struct cpu_user_regs *regs)
248 {
249 unsigned long *stack = ESP_BEFORE_EXCEPTION(regs), addr;
250 int i;
252 if ( guest_mode(regs) )
253 return show_guest_stack(regs);
255 printk("Xen stack trace from "__OP"sp=%p:\n ", stack);
257 for ( i = 0; i < (debug_stack_lines*stack_words_per_line); i++ )
258 {
259 if ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) == 0 )
260 break;
261 if ( (i != 0) && ((i % stack_words_per_line) == 0) )
262 printk("\n ");
263 addr = *stack++;
264 printk(" %p", _p(addr));
265 }
266 if ( i == 0 )
267 printk("Stack empty.");
268 printk("\n");
270 show_trace(regs);
271 }
273 void show_xen_trace()
274 {
275 struct cpu_user_regs regs;
276 #ifdef __x86_64
277 __asm__("movq %%rsp,%0" : "=m" (regs.rsp));
278 __asm__("movq %%rbp,%0" : "=m" (regs.rbp));
279 __asm__("leaq 0(%%rip),%0" : "=a" (regs.rip));
280 #else
281 __asm__("movl %%esp,%0" : "=m" (regs.esp));
282 __asm__("movl %%ebp,%0" : "=m" (regs.ebp));
283 __asm__("call 1f; 1: popl %0" : "=a" (regs.eip));
284 #endif
285 show_trace(&regs);
286 }
288 void show_stack_overflow(unsigned long esp)
289 {
290 #ifdef MEMORY_GUARD
291 unsigned long esp_top;
292 unsigned long *stack, addr;
294 esp_top = (esp | (STACK_SIZE - 1)) - DEBUG_STACK_SIZE;
296 /* Trigger overflow trace if %esp is within 512 bytes of the guard page. */
297 if ( ((unsigned long)(esp - esp_top) > 512) &&
298 ((unsigned long)(esp_top - esp) > 512) )
299 return;
301 if ( esp < esp_top )
302 esp = esp_top;
304 printk("Xen stack overflow:\n ");
306 stack = (unsigned long *)esp;
307 while ( ((long)stack & (STACK_SIZE-BYTES_PER_LONG)) != 0 )
308 {
309 addr = *stack++;
310 if ( is_kernel_text(addr) || is_kernel_inittext(addr) )
311 {
312 printk("%p: [<%p>]", stack, _p(addr));
313 print_symbol(" %s\n ", addr);
314 }
315 }
317 printk("\n");
318 #endif
319 }
321 void show_execution_state(struct cpu_user_regs *regs)
322 {
323 show_registers(regs);
324 show_stack(regs);
325 }
327 char *trapstr(int trapnr)
328 {
329 static char *strings[] = {
330 "divide error", "debug", "nmi", "bkpt", "overflow", "bounds",
331 "invalid opcode", "device not available", "double fault",
332 "coprocessor segment", "invalid tss", "segment not found",
333 "stack error", "general protection fault", "page fault",
334 "spurious interrupt", "coprocessor error", "alignment check",
335 "machine check", "simd error"
336 };
338 if ( (trapnr < 0) || (trapnr >= ARRAY_SIZE(strings)) )
339 return "???";
341 return strings[trapnr];
342 }
344 /*
345 * This is called for faults at very unexpected times (e.g., when interrupts
346 * are disabled). In such situations we can't do much that is safe. We try to
347 * print out some tracing and then we just spin.
348 */
349 asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs)
350 {
351 watchdog_disable();
352 console_start_sync();
354 show_execution_state(regs);
356 if ( trapnr == TRAP_page_fault )
357 {
358 unsigned long cr2 = read_cr2();
359 printk("Faulting linear address: %p\n", _p(cr2));
360 show_page_walk(cr2);
361 }
363 panic("FATAL TRAP: vector = %d (%s)\n"
364 "[error_code=%04x] %s\n",
365 trapnr, trapstr(trapnr), regs->error_code,
366 (regs->eflags & X86_EFLAGS_IF) ? "" : ", IN INTERRUPT CONTEXT");
367 }
369 static int do_guest_trap(
370 int trapnr, const struct cpu_user_regs *regs, int use_error_code)
371 {
372 struct vcpu *v = current;
373 struct trap_bounce *tb;
374 const struct trap_info *ti;
376 tb = &v->arch.trap_bounce;
377 ti = &v->arch.guest_context.trap_ctxt[trapnr];
379 tb->flags = TBF_EXCEPTION;
380 tb->cs = ti->cs;
381 tb->eip = ti->address;
383 if ( use_error_code )
384 {
385 tb->flags |= TBF_EXCEPTION_ERRCODE;
386 tb->error_code = regs->error_code;
387 }
389 if ( TI_GET_IF(ti) )
390 tb->flags |= TBF_INTERRUPT;
392 if ( unlikely(null_trap_bounce(v, tb)) )
393 gdprintk(XENLOG_WARNING, "Unhandled %s fault/trap [#%d] in "
394 "domain %d on VCPU %d [ec=%04x]\n",
395 trapstr(trapnr), trapnr, v->domain->domain_id, v->vcpu_id,
396 regs->error_code);
398 return 0;
399 }
401 static inline int do_trap(
402 int trapnr, struct cpu_user_regs *regs, int use_error_code)
403 {
404 unsigned long fixup;
406 DEBUGGER_trap_entry(trapnr, regs);
408 if ( guest_mode(regs) )
409 return do_guest_trap(trapnr, regs, use_error_code);
411 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
412 {
413 dprintk(XENLOG_ERR, "Trap %d: %p -> %p\n",
414 trapnr, _p(regs->eip), _p(fixup));
415 regs->eip = fixup;
416 return 0;
417 }
419 DEBUGGER_trap_fatal(trapnr, regs);
421 show_execution_state(regs);
422 panic("FATAL TRAP: vector = %d (%s)\n"
423 "[error_code=%04x]\n",
424 trapnr, trapstr(trapnr), regs->error_code);
425 return 0;
426 }
428 #define DO_ERROR_NOCODE(trapnr, name) \
429 asmlinkage int do_##name(struct cpu_user_regs *regs) \
430 { \
431 return do_trap(trapnr, regs, 0); \
432 }
434 #define DO_ERROR(trapnr, name) \
435 asmlinkage int do_##name(struct cpu_user_regs *regs) \
436 { \
437 return do_trap(trapnr, regs, 1); \
438 }
440 DO_ERROR_NOCODE(TRAP_divide_error, divide_error)
441 DO_ERROR_NOCODE(TRAP_overflow, overflow)
442 DO_ERROR_NOCODE(TRAP_bounds, bounds)
443 DO_ERROR_NOCODE(TRAP_copro_seg, coprocessor_segment_overrun)
444 DO_ERROR( TRAP_invalid_tss, invalid_TSS)
445 DO_ERROR( TRAP_no_segment, segment_not_present)
446 DO_ERROR( TRAP_stack_error, stack_segment)
447 DO_ERROR_NOCODE(TRAP_copro_error, coprocessor_error)
448 DO_ERROR( TRAP_alignment_check, alignment_check)
449 DO_ERROR_NOCODE(TRAP_simd_error, simd_coprocessor_error)
451 int rdmsr_hypervisor_regs(
452 uint32_t idx, uint32_t *eax, uint32_t *edx)
453 {
454 idx -= 0x40000000;
455 if ( idx > 0 )
456 return 0;
458 *eax = *edx = 0;
459 return 1;
460 }
462 int wrmsr_hypervisor_regs(
463 uint32_t idx, uint32_t eax, uint32_t edx)
464 {
465 struct domain *d = current->domain;
467 idx -= 0x40000000;
468 if ( idx > 0 )
469 return 0;
471 switch ( idx )
472 {
473 case 0:
474 {
475 void *hypercall_page;
476 unsigned long mfn;
477 unsigned long gmfn = ((unsigned long)edx << 20) | (eax >> 12);
478 unsigned int idx = eax & 0xfff;
480 if ( idx > 0 )
481 {
482 gdprintk(XENLOG_WARNING,
483 "Dom%d: Out of range index %u to MSR %08x\n",
484 d->domain_id, idx, 0x40000000);
485 return 0;
486 }
488 mfn = gmfn_to_mfn(d, gmfn);
490 if ( !mfn_valid(mfn) ||
491 !get_page_and_type(mfn_to_page(mfn), d, PGT_writable_page) )
492 {
493 gdprintk(XENLOG_WARNING,
494 "Dom%d: Bad GMFN %lx (MFN %lx) to MSR %08x\n",
495 d->domain_id, gmfn, mfn, 0x40000000);
496 return 0;
497 }
499 hypercall_page = map_domain_page(mfn);
500 hypercall_page_initialise(d, hypercall_page);
501 unmap_domain_page(hypercall_page);
503 put_page_and_type(mfn_to_page(mfn));
504 break;
505 }
507 default:
508 BUG();
509 }
511 return 1;
512 }
514 int cpuid_hypervisor_leaves(
515 uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx)
516 {
517 idx -= 0x40000000;
518 if ( idx > 2 )
519 return 0;
521 switch ( idx )
522 {
523 case 0:
524 *eax = 0x40000002; /* Largest leaf */
525 *ebx = 0x566e6558; /* Signature 1: "XenV" */
526 *ecx = 0x65584d4d; /* Signature 2: "MMXe" */
527 *edx = 0x4d4d566e; /* Signature 3: "nVMM" */
528 break;
530 case 1:
531 *eax = (xen_major_version() << 16) | xen_minor_version();
532 *ebx = 0; /* Reserved */
533 *ecx = 0; /* Reserved */
534 *edx = 0; /* Reserved */
535 break;
537 case 2:
538 *eax = 1; /* Number of hypercall-transfer pages */
539 *ebx = 0x40000000; /* MSR base address */
540 *ecx = 0; /* Features 1 */
541 *edx = 0; /* Features 2 */
542 break;
544 default:
545 BUG();
546 }
548 return 1;
549 }
551 static int emulate_forced_invalid_op(struct cpu_user_regs *regs)
552 {
553 char sig[5], instr[2];
554 uint32_t a, b, c, d;
555 unsigned long eip, rc;
557 a = regs->eax;
558 b = regs->ebx;
559 c = regs->ecx;
560 d = regs->edx;
561 eip = regs->eip;
563 /* Check for forced emulation signature: ud2 ; .ascii "xen". */
564 if ( (rc = copy_from_user(sig, (char *)eip, sizeof(sig))) != 0 )
565 {
566 propagate_page_fault(eip + sizeof(sig) - rc, 0);
567 return EXCRET_fault_fixed;
568 }
569 if ( memcmp(sig, "\xf\xbxen", sizeof(sig)) )
570 return 0;
571 eip += sizeof(sig);
573 /* We only emulate CPUID. */
574 if ( ( rc = copy_from_user(instr, (char *)eip, sizeof(instr))) != 0 )
575 {
576 propagate_page_fault(eip + sizeof(instr) - rc, 0);
577 return EXCRET_fault_fixed;
578 }
579 if ( memcmp(instr, "\xf\xa2", sizeof(instr)) )
580 return 0;
581 eip += sizeof(instr);
583 __asm__ (
584 "cpuid"
585 : "=a" (a), "=b" (b), "=c" (c), "=d" (d)
586 : "0" (a), "1" (b), "2" (c), "3" (d) );
588 if ( regs->eax == 1 )
589 {
590 /* Modify Feature Information. */
591 clear_bit(X86_FEATURE_VME, &d);
592 clear_bit(X86_FEATURE_DE, &d);
593 clear_bit(X86_FEATURE_PSE, &d);
594 clear_bit(X86_FEATURE_PGE, &d);
595 if ( !supervisor_mode_kernel )
596 clear_bit(X86_FEATURE_SEP, &d);
597 if ( !IS_PRIV(current->domain) )
598 clear_bit(X86_FEATURE_MTRR, &d);
599 }
600 else
601 {
602 (void)cpuid_hypervisor_leaves(regs->eax, &a, &b, &c, &d);
603 }
605 regs->eax = a;
606 regs->ebx = b;
607 regs->ecx = c;
608 regs->edx = d;
609 regs->eip = eip;
611 return EXCRET_fault_fixed;
612 }
614 asmlinkage int do_invalid_op(struct cpu_user_regs *regs)
615 {
616 int rc;
618 DEBUGGER_trap_entry(TRAP_invalid_op, regs);
620 if ( unlikely(!guest_mode(regs)) )
621 {
622 char sig[5];
623 /* Signature (ud2; .ascii "dbg") indicates dump state and continue. */
624 if ( (__copy_from_user(sig, (char *)regs->eip, sizeof(sig)) == 0) &&
625 (memcmp(sig, "\xf\xb""dbg", sizeof(sig)) == 0) )
626 {
627 show_execution_state(regs);
628 regs->eip += sizeof(sig);
629 return EXCRET_fault_fixed;
630 }
631 DEBUGGER_trap_fatal(TRAP_invalid_op, regs);
632 show_execution_state(regs);
633 panic("FATAL TRAP: vector = %d (invalid opcode)\n", TRAP_invalid_op);
634 }
636 if ( (rc = emulate_forced_invalid_op(regs)) != 0 )
637 return rc;
639 return do_guest_trap(TRAP_invalid_op, regs, 0);
640 }
642 asmlinkage int do_int3(struct cpu_user_regs *regs)
643 {
644 DEBUGGER_trap_entry(TRAP_int3, regs);
646 if ( !guest_mode(regs) )
647 {
648 DEBUGGER_trap_fatal(TRAP_int3, regs);
649 show_execution_state(regs);
650 panic("FATAL TRAP: vector = 3 (Int3)\n");
651 }
653 return do_guest_trap(TRAP_int3, regs, 0);
654 }
656 asmlinkage int do_machine_check(struct cpu_user_regs *regs)
657 {
658 fatal_trap(TRAP_machine_check, regs);
659 return 0;
660 }
662 void propagate_page_fault(unsigned long addr, u16 error_code)
663 {
664 struct trap_info *ti;
665 struct vcpu *v = current;
666 struct trap_bounce *tb = &v->arch.trap_bounce;
668 v->arch.guest_context.ctrlreg[2] = addr;
669 arch_set_cr2(v, addr);
671 /* Re-set error_code.user flag appropriately for the guest. */
672 error_code &= ~PFEC_user_mode;
673 if ( !guest_kernel_mode(v, guest_cpu_user_regs()) )
674 error_code |= PFEC_user_mode;
676 ti = &v->arch.guest_context.trap_ctxt[TRAP_page_fault];
677 tb->flags = TBF_EXCEPTION | TBF_EXCEPTION_ERRCODE;
678 tb->error_code = error_code;
679 tb->cs = ti->cs;
680 tb->eip = ti->address;
681 if ( TI_GET_IF(ti) )
682 tb->flags |= TBF_INTERRUPT;
683 if ( unlikely(null_trap_bounce(v, tb)) )
684 {
685 printk("Unhandled page fault in domain %d on VCPU %d (ec=%04X)\n",
686 v->domain->domain_id, v->vcpu_id, error_code);
687 show_page_walk(addr);
688 }
689 }
691 static int handle_gdt_ldt_mapping_fault(
692 unsigned long offset, struct cpu_user_regs *regs)
693 {
694 /* Which vcpu's area did we fault in, and is it in the ldt sub-area? */
695 unsigned int is_ldt_area = (offset >> (GDT_LDT_VCPU_VA_SHIFT-1)) & 1;
696 unsigned int vcpu_area = (offset >> GDT_LDT_VCPU_VA_SHIFT);
698 /* Should never fault in another vcpu's area. */
699 BUG_ON(vcpu_area != current->vcpu_id);
701 /* Byte offset within the gdt/ldt sub-area. */
702 offset &= (1UL << (GDT_LDT_VCPU_VA_SHIFT-1)) - 1UL;
704 if ( likely(is_ldt_area) )
705 {
706 /* LDT fault: Copy a mapping from the guest's LDT, if it is valid. */
707 if ( unlikely(map_ldt_shadow_page(offset >> PAGE_SHIFT) == 0) )
708 {
709 /* In hypervisor mode? Leave it to the #PF handler to fix up. */
710 if ( !guest_mode(regs) )
711 return 0;
712 /* In guest mode? Propagate #PF to guest, with adjusted %cr2. */
713 propagate_page_fault(
714 current->arch.guest_context.ldt_base + offset,
715 regs->error_code);
716 }
717 }
718 else
719 {
720 /* GDT fault: handle the fault as #GP(selector). */
721 regs->error_code = (u16)offset & ~7;
722 (void)do_general_protection(regs);
723 }
725 return EXCRET_fault_fixed;
726 }
728 #ifdef HYPERVISOR_VIRT_END
729 #define IN_HYPERVISOR_RANGE(va) \
730 (((va) >= HYPERVISOR_VIRT_START) && ((va) < HYPERVISOR_VIRT_END))
731 #else
732 #define IN_HYPERVISOR_RANGE(va) \
733 (((va) >= HYPERVISOR_VIRT_START))
734 #endif
736 static int __spurious_page_fault(
737 unsigned long addr, struct cpu_user_regs *regs)
738 {
739 unsigned long mfn, cr3 = read_cr3();
740 #if CONFIG_PAGING_LEVELS >= 4
741 l4_pgentry_t l4e, *l4t;
742 #endif
743 #if CONFIG_PAGING_LEVELS >= 3
744 l3_pgentry_t l3e, *l3t;
745 #endif
746 l2_pgentry_t l2e, *l2t;
747 l1_pgentry_t l1e, *l1t;
748 unsigned int required_flags, disallowed_flags;
750 /* Reserved bit violations are never spurious faults. */
751 if ( regs->error_code & PFEC_reserved_bit )
752 return 0;
754 required_flags = _PAGE_PRESENT;
755 if ( regs->error_code & PFEC_write_access )
756 required_flags |= _PAGE_RW;
757 if ( regs->error_code & PFEC_user_mode )
758 required_flags |= _PAGE_USER;
760 disallowed_flags = 0;
761 if ( regs->error_code & PFEC_insn_fetch )
762 disallowed_flags |= _PAGE_NX;
764 mfn = cr3 >> PAGE_SHIFT;
766 #if CONFIG_PAGING_LEVELS >= 4
767 l4t = map_domain_page(mfn);
768 l4e = l4e_read_atomic(&l4t[l4_table_offset(addr)]);
769 mfn = l4e_get_pfn(l4e);
770 unmap_domain_page(l4t);
771 if ( ((l4e_get_flags(l4e) & required_flags) != required_flags) ||
772 (l4e_get_flags(l4e) & disallowed_flags) )
773 return 0;
774 #endif
776 #if CONFIG_PAGING_LEVELS >= 3
777 l3t = map_domain_page(mfn);
778 #ifdef CONFIG_X86_PAE
779 l3t += (cr3 & 0xFE0UL) >> 3;
780 #endif
781 l3e = l3e_read_atomic(&l3t[l3_table_offset(addr)]);
782 mfn = l3e_get_pfn(l3e);
783 unmap_domain_page(l3t);
784 #ifdef CONFIG_X86_PAE
785 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
786 return 0;
787 #else
788 if ( ((l3e_get_flags(l3e) & required_flags) != required_flags) ||
789 (l3e_get_flags(l3e) & disallowed_flags) )
790 return 0;
791 #endif
792 #endif
794 l2t = map_domain_page(mfn);
795 l2e = l2e_read_atomic(&l2t[l2_table_offset(addr)]);
796 mfn = l2e_get_pfn(l2e);
797 unmap_domain_page(l2t);
798 if ( ((l2e_get_flags(l2e) & required_flags) != required_flags) ||
799 (l2e_get_flags(l2e) & disallowed_flags) )
800 return 0;
801 if ( l2e_get_flags(l2e) & _PAGE_PSE )
802 {
803 l1e = l1e_empty(); /* define before use in debug tracing */
804 goto spurious;
805 }
807 l1t = map_domain_page(mfn);
808 l1e = l1e_read_atomic(&l1t[l1_table_offset(addr)]);
809 mfn = l1e_get_pfn(l1e);
810 unmap_domain_page(l1t);
811 if ( ((l1e_get_flags(l1e) & required_flags) != required_flags) ||
812 (l1e_get_flags(l1e) & disallowed_flags) )
813 return 0;
815 spurious:
816 dprintk(XENLOG_WARNING, "Spurious fault in domain %u:%u "
817 "at addr %lx, e/c %04x\n",
818 current->domain->domain_id, current->vcpu_id,
819 addr, regs->error_code);
820 #if CONFIG_PAGING_LEVELS >= 4
821 dprintk(XENLOG_WARNING, " l4e = %"PRIpte"\n", l4e_get_intpte(l4e));
822 #endif
823 #if CONFIG_PAGING_LEVELS >= 3
824 dprintk(XENLOG_WARNING, " l3e = %"PRIpte"\n", l3e_get_intpte(l3e));
825 #endif
826 dprintk(XENLOG_WARNING, " l2e = %"PRIpte"\n", l2e_get_intpte(l2e));
827 dprintk(XENLOG_WARNING, " l1e = %"PRIpte"\n", l1e_get_intpte(l1e));
828 #ifndef NDEBUG
829 show_registers(regs);
830 #endif
831 return 1;
832 }
834 static int spurious_page_fault(
835 unsigned long addr, struct cpu_user_regs *regs)
836 {
837 unsigned long flags;
838 int is_spurious;
840 /*
841 * Disabling interrupts prevents TLB flushing, and hence prevents
842 * page tables from becoming invalid under our feet during the walk.
843 */
844 local_irq_save(flags);
845 is_spurious = __spurious_page_fault(addr, regs);
846 local_irq_restore(flags);
848 return is_spurious;
849 }
851 static int fixup_page_fault(unsigned long addr, struct cpu_user_regs *regs)
852 {
853 struct vcpu *v = current;
854 struct domain *d = v->domain;
856 if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
857 {
858 if ( shadow_mode_external(d) && guest_mode(regs) )
859 return shadow_fault(addr, regs);
860 if ( (addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
861 return handle_gdt_ldt_mapping_fault(
862 addr - GDT_LDT_VIRT_START, regs);
863 return 0;
864 }
866 if ( VM_ASSIST(d, VMASST_TYPE_writable_pagetables) &&
867 guest_kernel_mode(v, regs) &&
868 /* Do not check if access-protection fault since the page may
869 legitimately be not present in shadow page tables */
870 ((regs->error_code & PFEC_write_access) == PFEC_write_access) &&
871 ptwr_do_page_fault(v, addr, regs) )
872 return EXCRET_fault_fixed;
874 if ( shadow_mode_enabled(d) )
875 return shadow_fault(addr, regs);
877 return 0;
878 }
880 /*
881 * #PF error code:
882 * Bit 0: Protection violation (=1) ; Page not present (=0)
883 * Bit 1: Write access
884 * Bit 2: User mode (=1) ; Supervisor mode (=0)
885 * Bit 3: Reserved bit violation
886 * Bit 4: Instruction fetch
887 */
888 asmlinkage int do_page_fault(struct cpu_user_regs *regs)
889 {
890 unsigned long addr, fixup;
891 int rc;
893 ASSERT(!in_irq());
895 addr = read_cr2();
897 DEBUGGER_trap_entry(TRAP_page_fault, regs);
899 perfc_incrc(page_faults);
901 if ( unlikely((rc = fixup_page_fault(addr, regs)) != 0) )
902 return rc;
904 if ( unlikely(!guest_mode(regs)) )
905 {
906 if ( spurious_page_fault(addr, regs) )
907 return EXCRET_not_a_fault;
909 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
910 {
911 perfc_incrc(copy_user_faults);
912 regs->eip = fixup;
913 return 0;
914 }
916 DEBUGGER_trap_fatal(TRAP_page_fault, regs);
918 show_execution_state(regs);
919 show_page_walk(addr);
920 panic("FATAL PAGE FAULT\n"
921 "[error_code=%04x]\n"
922 "Faulting linear address: %p\n",
923 regs->error_code, _p(addr));
924 }
926 propagate_page_fault(addr, regs->error_code);
927 return 0;
928 }
930 /*
931 * Early handler to deal with spurious page faults. For example, consider a
932 * routine that uses a mapping immediately after installing it (making it
933 * present). The CPU may speculatively execute the memory access before
934 * executing the PTE write. The instruction will then be marked to cause a
935 * page fault when it is retired, despite the fact that the PTE is present and
936 * correct at that point in time.
937 */
938 asmlinkage int do_early_page_fault(struct cpu_user_regs *regs)
939 {
940 static int stuck;
941 static unsigned long prev_eip, prev_cr2;
942 unsigned long cr2 = read_cr2();
944 BUG_ON(smp_processor_id() != 0);
946 if ( (regs->eip != prev_eip) || (cr2 != prev_cr2) )
947 {
948 prev_eip = regs->eip;
949 prev_cr2 = cr2;
950 stuck = 0;
951 return EXCRET_not_a_fault;
952 }
954 if ( stuck++ == 1000 )
955 panic("Early fatal page fault at %04x:%p (cr2=%p, ec=%04x)\n",
956 regs->cs, _p(regs->eip), _p(cr2), regs->error_code);
958 return EXCRET_not_a_fault;
959 }
961 long do_fpu_taskswitch(int set)
962 {
963 struct vcpu *v = current;
965 if ( set )
966 {
967 v->arch.guest_context.ctrlreg[0] |= X86_CR0_TS;
968 stts();
969 }
970 else
971 {
972 v->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
973 if ( test_bit(_VCPUF_fpu_dirtied, &v->vcpu_flags) )
974 clts();
975 }
977 return 0;
978 }
980 static int read_descriptor(unsigned int sel,
981 const struct vcpu *v,
982 const struct cpu_user_regs * regs,
983 unsigned long *base,
984 unsigned long *limit,
985 unsigned int *ar,
986 unsigned int vm86attr)
987 {
988 struct desc_struct desc;
990 if ( !vm86_mode(regs) )
991 {
992 if ( sel < 4)
993 desc.b = desc.a = 0;
994 else if ( __get_user(desc,
995 (const struct desc_struct *)(!(sel & 4)
996 ? GDT_VIRT_START(v)
997 : LDT_VIRT_START(v))
998 + (sel >> 3)) )
999 return 0;
1000 if ( !(vm86attr & _SEGMENT_CODE) )
1001 desc.b &= ~_SEGMENT_L;
1003 else
1005 desc.a = (sel << 20) | 0xffff;
1006 desc.b = vm86attr | (sel >> 12);
1009 *ar = desc.b & 0x00f0ff00;
1010 if ( !(desc.b & _SEGMENT_L) )
1012 *base = (desc.a >> 16) + ((desc.b & 0xff) << 16) + (desc.b & 0xff000000);
1013 *limit = (desc.a & 0xffff) | (desc.b & 0x000f0000);
1014 if ( desc.b & _SEGMENT_G )
1015 *limit = ((*limit + 1) << 12) - 1;
1016 #ifndef NDEBUG
1017 if ( !vm86_mode(regs) && sel > 3 )
1019 unsigned int a, l;
1020 unsigned char valid;
1022 __asm__("larl %2, %0\n\tsetz %1" : "=r" (a), "=rm" (valid) : "rm" (sel));
1023 BUG_ON(valid && (a & 0x00f0ff00) != *ar);
1024 __asm__("lsll %2, %0\n\tsetz %1" : "=r" (l), "=rm" (valid) : "rm" (sel));
1025 BUG_ON(valid && l != *limit);
1027 #endif
1029 else
1031 *base = 0UL;
1032 *limit = ~0UL;
1035 return 1;
1038 /* Has the guest requested sufficient permission for this I/O access? */
1039 static inline int guest_io_okay(
1040 unsigned int port, unsigned int bytes,
1041 struct vcpu *v, struct cpu_user_regs *regs)
1043 #if defined(__x86_64__)
1044 /* If in user mode, switch to kernel mode just to read I/O bitmap. */
1045 int user_mode = !(v->arch.flags & TF_kernel_mode);
1046 #define TOGGLE_MODE() if ( user_mode ) toggle_guest_mode(v)
1047 #elif defined(__i386__)
1048 #define TOGGLE_MODE() ((void)0)
1049 #endif
1051 if ( !vm86_mode(regs) &&
1052 (v->arch.iopl >= (guest_kernel_mode(v, regs) ? 1 : 3)) )
1053 return 1;
1055 if ( v->arch.iobmp_limit > (port + bytes) )
1057 union { uint8_t bytes[2]; uint16_t mask; } x;
1059 /*
1060 * Grab permission bytes from guest space. Inaccessible bytes are
1061 * read as 0xff (no access allowed).
1062 */
1063 TOGGLE_MODE();
1064 switch ( __copy_from_guest_offset(&x.bytes[0], v->arch.iobmp,
1065 port>>3, 2) )
1067 default: x.bytes[0] = ~0;
1068 case 1: x.bytes[1] = ~0;
1069 case 0: break;
1071 TOGGLE_MODE();
1073 if ( (x.mask & (((1<<bytes)-1) << (port&7))) == 0 )
1074 return 1;
1077 return 0;
1080 /* Has the administrator granted sufficient permission for this I/O access? */
1081 static inline int admin_io_okay(
1082 unsigned int port, unsigned int bytes,
1083 struct vcpu *v, struct cpu_user_regs *regs)
1085 return ioports_access_permitted(v->domain, port, port + bytes - 1);
1088 #define guest_inb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r)
1089 #define guest_inw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r)
1090 #define guest_inl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r)
1091 #define guest_outb_okay(_p, _d, _r) admin_io_okay(_p, 1, _d, _r)
1092 #define guest_outw_okay(_p, _d, _r) admin_io_okay(_p, 2, _d, _r)
1093 #define guest_outl_okay(_p, _d, _r) admin_io_okay(_p, 4, _d, _r)
1095 /* I/O emulation support. Helper routines for, and type of, the stack stub.*/
1096 void host_to_guest_gpr_switch(struct cpu_user_regs *)
1097 __attribute__((__regparm__(1)));
1098 unsigned long guest_to_host_gpr_switch(unsigned long)
1099 __attribute__((__regparm__(1)));
1101 /* Instruction fetch with error handling. */
1102 #define insn_fetch(type, base, eip, limit) \
1103 ({ unsigned long _rc, _ptr = (base) + (eip); \
1104 type _x; \
1105 if ( (limit) < sizeof(_x) - 1 || (eip) > (limit) - (sizeof(_x) - 1) ) \
1106 goto fail; \
1107 if ( (_rc = copy_from_user(&_x, (type *)_ptr, sizeof(_x))) != 0 ) \
1108 { \
1109 propagate_page_fault(_ptr + sizeof(_x) - _rc, 0); \
1110 return EXCRET_fault_fixed; \
1111 } \
1112 (eip) += sizeof(_x); _x; })
1114 #if defined(CONFIG_X86_32)
1115 # define read_sreg(regs, sr) ((regs)->sr)
1116 #elif defined(CONFIG_X86_64)
1117 # define read_sreg(regs, sr) read_segment_register(sr)
1118 #endif
1120 static int emulate_privileged_op(struct cpu_user_regs *regs)
1122 struct vcpu *v = current;
1123 unsigned long *reg, eip = regs->eip, res;
1124 u8 opcode, modrm_reg = 0, modrm_rm = 0, rep_prefix = 0, lock = 0, rex = 0;
1125 enum { lm_seg_none, lm_seg_fs, lm_seg_gs } lm_ovr = lm_seg_none;
1126 unsigned int port, i, data_sel, ar, data, rc;
1127 unsigned int op_bytes, op_default, ad_bytes, ad_default;
1128 #define rd_ad(reg) (ad_bytes >= sizeof(regs->reg) \
1129 ? regs->reg \
1130 : ad_bytes == 4 \
1131 ? (u32)regs->reg \
1132 : (u16)regs->reg)
1133 #define wr_ad(reg, val) (ad_bytes >= sizeof(regs->reg) \
1134 ? regs->reg = (val) \
1135 : ad_bytes == 4 \
1136 ? (*(u32 *)&regs->reg = (val)) \
1137 : (*(u16 *)&regs->reg = (val)))
1138 unsigned long code_base, code_limit;
1139 char io_emul_stub[16];
1140 void (*io_emul)(struct cpu_user_regs *) __attribute__((__regparm__(1)));
1141 u32 l, h;
1143 if ( !read_descriptor(regs->cs, v, regs,
1144 &code_base, &code_limit, &ar,
1145 _SEGMENT_CODE|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) )
1146 goto fail;
1147 op_default = op_bytes = (ar & (_SEGMENT_L|_SEGMENT_DB)) ? 4 : 2;
1148 ad_default = ad_bytes = (ar & _SEGMENT_L) ? 8 : op_default;
1149 if ( !(ar & _SEGMENT_S) ||
1150 !(ar & _SEGMENT_P) ||
1151 !(ar & _SEGMENT_CODE) )
1152 goto fail;
1154 /* emulating only opcodes not allowing SS to be default */
1155 data_sel = read_sreg(regs, ds);
1157 /* Legacy prefixes. */
1158 for ( i = 0; i < 8; i++, rex == opcode || (rex = 0) )
1160 switch ( opcode = insn_fetch(u8, code_base, eip, code_limit) )
1162 case 0x66: /* operand-size override */
1163 op_bytes = op_default ^ 6; /* switch between 2/4 bytes */
1164 continue;
1165 case 0x67: /* address-size override */
1166 ad_bytes = ad_default != 4 ? 4 : 2; /* switch to 2/4 bytes */
1167 continue;
1168 case 0x2e: /* CS override */
1169 data_sel = regs->cs;
1170 continue;
1171 case 0x3e: /* DS override */
1172 data_sel = read_sreg(regs, ds);
1173 continue;
1174 case 0x26: /* ES override */
1175 data_sel = read_sreg(regs, es);
1176 continue;
1177 case 0x64: /* FS override */
1178 data_sel = read_sreg(regs, fs);
1179 lm_ovr = lm_seg_fs;
1180 continue;
1181 case 0x65: /* GS override */
1182 data_sel = read_sreg(regs, gs);
1183 lm_ovr = lm_seg_gs;
1184 continue;
1185 case 0x36: /* SS override */
1186 data_sel = regs->ss;
1187 continue;
1188 case 0xf0: /* LOCK */
1189 lock = 1;
1190 continue;
1191 case 0xf2: /* REPNE/REPNZ */
1192 case 0xf3: /* REP/REPE/REPZ */
1193 rep_prefix = 1;
1194 continue;
1195 default:
1196 if ( (ar & _SEGMENT_L) && (opcode & 0xf0) == 0x40 )
1198 rex = opcode;
1199 continue;
1201 break;
1203 break;
1206 /* REX prefix. */
1207 if ( rex & 8 ) /* REX.W */
1208 op_bytes = 4; /* emulating only opcodes not supporting 64-bit operands */
1209 modrm_reg = (rex & 4) << 1; /* REX.R */
1210 /* REX.X does not need to be decoded. */
1211 modrm_rm = (rex & 1) << 3; /* REX.B */
1213 if ( opcode == 0x0f )
1214 goto twobyte_opcode;
1216 if ( lock )
1217 goto fail;
1219 /* Input/Output String instructions. */
1220 if ( (opcode >= 0x6c) && (opcode <= 0x6f) )
1222 unsigned long data_base, data_limit;
1224 if ( rep_prefix && (rd_ad(ecx) == 0) )
1225 goto done;
1227 if ( !(opcode & 2) )
1229 data_sel = read_sreg(regs, es);
1230 lm_ovr = lm_seg_none;
1233 if ( !(ar & _SEGMENT_L) )
1235 if ( !read_descriptor(data_sel, v, regs,
1236 &data_base, &data_limit, &ar,
1237 _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P) )
1238 goto fail;
1239 if ( !(ar & _SEGMENT_S) ||
1240 !(ar & _SEGMENT_P) ||
1241 (opcode & 2 ?
1242 (ar & _SEGMENT_CODE) && !(ar & _SEGMENT_WR) :
1243 (ar & _SEGMENT_CODE) || !(ar & _SEGMENT_WR)) )
1244 goto fail;
1246 #ifdef CONFIG_X86_64
1247 else
1249 if ( lm_ovr == lm_seg_none || data_sel < 4 )
1251 switch ( lm_ovr )
1253 case lm_seg_none:
1254 data_base = 0UL;
1255 break;
1256 case lm_seg_fs:
1257 data_base = v->arch.guest_context.fs_base;
1258 break;
1259 case lm_seg_gs:
1260 if ( guest_kernel_mode(v, regs) )
1261 data_base = v->arch.guest_context.gs_base_kernel;
1262 else
1263 data_base = v->arch.guest_context.gs_base_user;
1264 break;
1267 else
1268 read_descriptor(data_sel, v, regs,
1269 &data_base, &data_limit, &ar,
1270 0);
1271 data_limit = ~0UL;
1272 ar = _SEGMENT_WR|_SEGMENT_S|_SEGMENT_DPL|_SEGMENT_P;
1274 #endif
1276 continue_io_string:
1277 switch ( opcode )
1279 case 0x6c: /* INSB */
1280 op_bytes = 1;
1281 case 0x6d: /* INSW/INSL */
1282 if ( data_limit < op_bytes - 1 ||
1283 rd_ad(edi) > data_limit - (op_bytes - 1) ||
1284 !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
1285 goto fail;
1286 port = (u16)regs->edx;
1287 switch ( op_bytes )
1289 case 1:
1290 /* emulate PIT counter 2 */
1291 data = (u8)(guest_inb_okay(port, v, regs) ? inb(port) :
1292 ((port == 0x42 || port == 0x43 || port == 0x61) ?
1293 pv_pit_handler(port, 0, 0) : ~0));
1294 break;
1295 case 2:
1296 data = (u16)(guest_inw_okay(port, v, regs) ? inw(port) : ~0);
1297 break;
1298 case 4:
1299 data = (u32)(guest_inl_okay(port, v, regs) ? inl(port) : ~0);
1300 break;
1302 if ( (rc = copy_to_user((void *)data_base + rd_ad(edi), &data, op_bytes)) != 0 )
1304 propagate_page_fault(data_base + rd_ad(edi) + op_bytes - rc,
1305 PFEC_write_access);
1306 return EXCRET_fault_fixed;
1308 wr_ad(edi, regs->edi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes));
1309 break;
1311 case 0x6e: /* OUTSB */
1312 op_bytes = 1;
1313 case 0x6f: /* OUTSW/OUTSL */
1314 if ( data_limit < op_bytes - 1 ||
1315 rd_ad(esi) > data_limit - (op_bytes - 1) ||
1316 !guest_io_okay((u16)regs->edx, op_bytes, v, regs) )
1317 goto fail;
1318 rc = copy_from_user(&data, (void *)data_base + rd_ad(esi), op_bytes);
1319 if ( rc != 0 )
1321 propagate_page_fault(data_base + rd_ad(esi) + op_bytes - rc, 0);
1322 return EXCRET_fault_fixed;
1324 port = (u16)regs->edx;
1325 switch ( op_bytes )
1327 case 1:
1328 if ( guest_outb_okay(port, v, regs) )
1329 outb((u8)data, port);
1330 else if ( port == 0x42 || port == 0x43 || port == 0x61 )
1331 pv_pit_handler(port, data, 1);
1332 break;
1333 case 2:
1334 if ( guest_outw_okay(port, v, regs) )
1335 outw((u16)data, port);
1336 break;
1337 case 4:
1338 if ( guest_outl_okay(port, v, regs) )
1339 outl((u32)data, port);
1340 break;
1342 wr_ad(esi, regs->esi + (int)((regs->eflags & EF_DF) ? -op_bytes : op_bytes));
1343 break;
1346 if ( rep_prefix && (wr_ad(ecx, regs->ecx - 1) != 0) )
1348 if ( !hypercall_preempt_check() )
1349 goto continue_io_string;
1350 eip = regs->eip;
1353 goto done;
1356 /*
1357 * Very likely to be an I/O instruction (IN/OUT).
1358 * Build an on-stack stub to execute the instruction with full guest
1359 * GPR context. This is needed for some systems which (ab)use IN/OUT
1360 * to communicate with BIOS code in system-management mode.
1361 */
1362 /* call host_to_guest_gpr_switch */
1363 io_emul_stub[0] = 0xe8;
1364 *(s32 *)&io_emul_stub[1] =
1365 (char *)host_to_guest_gpr_switch - &io_emul_stub[5];
1366 /* data16 or nop */
1367 io_emul_stub[5] = (op_bytes != 2) ? 0x90 : 0x66;
1368 /* <io-access opcode> */
1369 io_emul_stub[6] = opcode;
1370 /* imm8 or nop */
1371 io_emul_stub[7] = 0x90;
1372 /* jmp guest_to_host_gpr_switch */
1373 io_emul_stub[8] = 0xe9;
1374 *(s32 *)&io_emul_stub[9] =
1375 (char *)guest_to_host_gpr_switch - &io_emul_stub[13];
1377 /* Handy function-typed pointer to the stub. */
1378 io_emul = (void *)io_emul_stub;
1380 /* I/O Port and Interrupt Flag instructions. */
1381 switch ( opcode )
1383 case 0xe4: /* IN imm8,%al */
1384 op_bytes = 1;
1385 case 0xe5: /* IN imm8,%eax */
1386 port = insn_fetch(u8, code_base, eip, code_limit);
1387 io_emul_stub[7] = port; /* imm8 */
1388 exec_in:
1389 if ( !guest_io_okay(port, op_bytes, v, regs) )
1390 goto fail;
1391 switch ( op_bytes )
1393 case 1:
1394 if ( guest_inb_okay(port, v, regs) )
1395 io_emul(regs);
1396 else if ( port == 0x42 || port == 0x43 || port == 0x61 )
1398 regs->eax &= ~0xffUL;
1399 regs->eax |= pv_pit_handler(port, 0, 0);
1401 else
1402 regs->eax |= (u8)~0;
1403 break;
1404 case 2:
1405 if ( guest_inw_okay(port, v, regs) )
1406 io_emul(regs);
1407 else
1408 regs->eax |= (u16)~0;
1409 break;
1410 case 4:
1411 if ( guest_inl_okay(port, v, regs) )
1412 io_emul(regs);
1413 else
1414 regs->eax = (u32)~0;
1415 break;
1417 goto done;
1419 case 0xec: /* IN %dx,%al */
1420 op_bytes = 1;
1421 case 0xed: /* IN %dx,%eax */
1422 port = (u16)regs->edx;
1423 goto exec_in;
1425 case 0xe6: /* OUT %al,imm8 */
1426 op_bytes = 1;
1427 case 0xe7: /* OUT %eax,imm8 */
1428 port = insn_fetch(u8, code_base, eip, code_limit);
1429 io_emul_stub[7] = port; /* imm8 */
1430 exec_out:
1431 if ( !guest_io_okay(port, op_bytes, v, regs) )
1432 goto fail;
1433 switch ( op_bytes )
1435 case 1:
1436 if ( guest_outb_okay(port, v, regs) )
1437 io_emul(regs);
1438 else if ( port == 0x42 || port == 0x43 || port == 0x61 )
1439 pv_pit_handler(port, regs->eax, 1);
1440 break;
1441 case 2:
1442 if ( guest_outw_okay(port, v, regs) )
1443 io_emul(regs);
1444 break;
1445 case 4:
1446 if ( guest_outl_okay(port, v, regs) )
1447 io_emul(regs);
1448 break;
1450 goto done;
1452 case 0xee: /* OUT %al,%dx */
1453 op_bytes = 1;
1454 case 0xef: /* OUT %eax,%dx */
1455 port = (u16)regs->edx;
1456 goto exec_out;
1458 case 0xfa: /* CLI */
1459 case 0xfb: /* STI */
1460 if ( v->arch.iopl < (guest_kernel_mode(v, regs) ? 1 : 3) )
1461 goto fail;
1462 /*
1463 * This is just too dangerous to allow, in my opinion. Consider if the
1464 * caller then tries to reenable interrupts using POPF: we can't trap
1465 * that and we'll end up with hard-to-debug lockups. Fast & loose will
1466 * do for us. :-)
1467 */
1468 /*v->vcpu_info->evtchn_upcall_mask = (opcode == 0xfa);*/
1469 goto done;
1472 /* No decode of this single-byte opcode. */
1473 goto fail;
1475 twobyte_opcode:
1476 /* Two-byte opcodes only emulated from guest kernel. */
1477 if ( !guest_kernel_mode(v, regs) )
1478 goto fail;
1480 /* Privileged (ring 0) instructions. */
1481 opcode = insn_fetch(u8, code_base, eip, code_limit);
1482 if ( lock && (opcode & ~3) != 0x20 )
1483 goto fail;
1484 switch ( opcode )
1486 case 0x06: /* CLTS */
1487 (void)do_fpu_taskswitch(0);
1488 break;
1490 case 0x09: /* WBINVD */
1491 /* Ignore the instruction if unprivileged. */
1492 if ( !cache_flush_permitted(v->domain) )
1493 /* Non-physdev domain attempted WBINVD; ignore for now since
1494 newer linux uses this in some start-of-day timing loops */
1496 else
1497 wbinvd();
1498 break;
1500 case 0x20: /* MOV CR?,<reg> */
1501 opcode = insn_fetch(u8, code_base, eip, code_limit);
1502 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
1503 modrm_rm |= (opcode >> 0) & 7;
1504 reg = decode_register(modrm_rm, regs, 0);
1505 switch ( modrm_reg )
1507 case 0: /* Read CR0 */
1508 *reg = (read_cr0() & ~X86_CR0_TS) |
1509 v->arch.guest_context.ctrlreg[0];
1510 break;
1512 case 2: /* Read CR2 */
1513 *reg = v->arch.guest_context.ctrlreg[2];
1514 break;
1516 case 3: /* Read CR3 */
1517 if ( !IS_COMPAT(v->domain) )
1518 *reg = xen_pfn_to_cr3(mfn_to_gmfn(
1519 v->domain, pagetable_get_pfn(v->arch.guest_table)));
1520 #ifdef CONFIG_COMPAT
1521 else
1522 *reg = compat_pfn_to_cr3(mfn_to_gmfn(
1523 v->domain, l4e_get_pfn(*(l4_pgentry_t *)__va(pagetable_get_paddr(v->arch.guest_table)))));
1524 #endif
1525 break;
1527 case 4: /* Read CR4 */
1528 /*
1529 * Guests can read CR4 to see what features Xen has enabled. We
1530 * therefore lie about PGE & PSE as they are unavailable to guests.
1531 */
1532 *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE);
1533 break;
1535 default:
1536 goto fail;
1538 break;
1540 case 0x21: /* MOV DR?,<reg> */
1541 opcode = insn_fetch(u8, code_base, eip, code_limit);
1542 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
1543 modrm_rm |= (opcode >> 0) & 7;
1544 reg = decode_register(modrm_rm, regs, 0);
1545 if ( (res = do_get_debugreg(modrm_reg)) > (unsigned long)-256 )
1546 goto fail;
1547 *reg = res;
1548 break;
1550 case 0x22: /* MOV <reg>,CR? */
1551 opcode = insn_fetch(u8, code_base, eip, code_limit);
1552 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
1553 modrm_rm |= (opcode >> 0) & 7;
1554 reg = decode_register(modrm_rm, regs, 0);
1555 switch ( modrm_reg )
1557 case 0: /* Write CR0 */
1558 if ( (*reg ^ read_cr0()) & ~X86_CR0_TS )
1560 gdprintk(XENLOG_WARNING,
1561 "Attempt to change unmodifiable CR0 flags.\n");
1562 goto fail;
1564 (void)do_fpu_taskswitch(!!(*reg & X86_CR0_TS));
1565 break;
1567 case 2: /* Write CR2 */
1568 v->arch.guest_context.ctrlreg[2] = *reg;
1569 arch_set_cr2(v, *reg);
1570 break;
1572 case 3: /* Write CR3 */
1573 LOCK_BIGLOCK(v->domain);
1574 if ( !IS_COMPAT(v->domain) )
1575 rc = new_guest_cr3(gmfn_to_mfn(v->domain, xen_cr3_to_pfn(*reg)));
1576 #ifdef CONFIG_COMPAT
1577 else
1578 rc = new_guest_cr3(gmfn_to_mfn(v->domain, compat_cr3_to_pfn(*reg)));
1579 #endif
1580 UNLOCK_BIGLOCK(v->domain);
1581 if ( rc == 0 ) /* not okay */
1582 goto fail;
1583 break;
1585 case 4:
1586 if ( *reg != (read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE)) )
1588 gdprintk(XENLOG_WARNING, "Attempt to change CR4 flags.\n");
1589 goto fail;
1591 break;
1593 default:
1594 goto fail;
1596 break;
1598 case 0x23: /* MOV <reg>,DR? */
1599 opcode = insn_fetch(u8, code_base, eip, code_limit);
1600 modrm_reg += ((opcode >> 3) & 7) + (lock << 3);
1601 modrm_rm |= (opcode >> 0) & 7;
1602 reg = decode_register(modrm_rm, regs, 0);
1603 if ( do_set_debugreg(modrm_reg, *reg) != 0 )
1604 goto fail;
1605 break;
1607 case 0x30: /* WRMSR */
1608 switch ( regs->ecx )
1610 #ifdef CONFIG_X86_64
1611 case MSR_FS_BASE:
1612 if ( IS_COMPAT(v->domain) )
1613 goto fail;
1614 if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) )
1615 goto fail;
1616 v->arch.guest_context.fs_base =
1617 ((u64)regs->edx << 32) | regs->eax;
1618 break;
1619 case MSR_GS_BASE:
1620 if ( IS_COMPAT(v->domain) )
1621 goto fail;
1622 if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) )
1623 goto fail;
1624 v->arch.guest_context.gs_base_kernel =
1625 ((u64)regs->edx << 32) | regs->eax;
1626 break;
1627 case MSR_SHADOW_GS_BASE:
1628 if ( IS_COMPAT(v->domain) )
1629 goto fail;
1630 if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) )
1631 goto fail;
1632 v->arch.guest_context.gs_base_user =
1633 ((u64)regs->edx << 32) | regs->eax;
1634 break;
1635 #endif
1636 default:
1637 if ( wrmsr_hypervisor_regs(regs->ecx, regs->eax, regs->edx) )
1638 break;
1640 if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
1641 (regs->eax != l) || (regs->edx != h) )
1642 gdprintk(XENLOG_WARNING, "Domain attempted WRMSR %p from "
1643 "%08x:%08x to %08lx:%08lx.\n",
1644 _p(regs->ecx), h, l, (long)regs->edx, (long)regs->eax);
1645 break;
1647 break;
1649 case 0x32: /* RDMSR */
1650 switch ( regs->ecx )
1652 #ifdef CONFIG_X86_64
1653 case MSR_FS_BASE:
1654 if ( IS_COMPAT(v->domain) )
1655 goto fail;
1656 regs->eax = v->arch.guest_context.fs_base & 0xFFFFFFFFUL;
1657 regs->edx = v->arch.guest_context.fs_base >> 32;
1658 break;
1659 case MSR_GS_BASE:
1660 if ( IS_COMPAT(v->domain) )
1661 goto fail;
1662 regs->eax = v->arch.guest_context.gs_base_kernel & 0xFFFFFFFFUL;
1663 regs->edx = v->arch.guest_context.gs_base_kernel >> 32;
1664 break;
1665 case MSR_SHADOW_GS_BASE:
1666 if ( IS_COMPAT(v->domain) )
1667 goto fail;
1668 regs->eax = v->arch.guest_context.gs_base_user & 0xFFFFFFFFUL;
1669 regs->edx = v->arch.guest_context.gs_base_user >> 32;
1670 break;
1671 #endif
1672 case MSR_EFER:
1673 if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
1674 goto fail;
1675 break;
1676 default:
1677 if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
1679 regs->eax = l;
1680 regs->edx = h;
1681 break;
1683 /* Everyone can read the MSR space. */
1684 /* gdprintk(XENLOG_WARNING,"Domain attempted RDMSR %p.\n",
1685 _p(regs->ecx));*/
1686 if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
1687 goto fail;
1688 break;
1690 break;
1692 default:
1693 goto fail;
1696 #undef wr_ad
1697 #undef rd_ad
1699 done:
1700 regs->eip = eip;
1701 return EXCRET_fault_fixed;
1703 fail:
1704 return 0;
1707 asmlinkage int do_general_protection(struct cpu_user_regs *regs)
1709 struct vcpu *v = current;
1710 unsigned long fixup;
1712 DEBUGGER_trap_entry(TRAP_gp_fault, regs);
1714 if ( regs->error_code & 1 )
1715 goto hardware_gp;
1717 if ( !guest_mode(regs) )
1718 goto gp_in_kernel;
1720 /*
1721 * Cunning trick to allow arbitrary "INT n" handling.
1723 * We set DPL == 0 on all vectors in the IDT. This prevents any INT <n>
1724 * instruction from trapping to the appropriate vector, when that might not
1725 * be expected by Xen or the guest OS. For example, that entry might be for
1726 * a fault handler (unlike traps, faults don't increment EIP), or might
1727 * expect an error code on the stack (which a software trap never
1728 * provides), or might be a hardware interrupt handler that doesn't like
1729 * being called spuriously.
1731 * Instead, a GPF occurs with the faulting IDT vector in the error code.
1732 * Bit 1 is set to indicate that an IDT entry caused the fault. Bit 0 is
1733 * clear to indicate that it's a software fault, not hardware.
1735 * NOTE: Vectors 3 and 4 are dealt with from their own handler. This is
1736 * okay because they can only be triggered by an explicit DPL-checked
1737 * instruction. The DPL specified by the guest OS for these vectors is NOT
1738 * CHECKED!!
1739 */
1740 if ( (regs->error_code & 3) == 2 )
1742 /* This fault must be due to <INT n> instruction. */
1743 const struct trap_info *ti;
1744 unsigned char vector = regs->error_code >> 3;
1745 ti = &v->arch.guest_context.trap_ctxt[vector];
1746 if ( permit_softint(TI_GET_DPL(ti), v, regs) )
1748 regs->eip += 2;
1749 return do_guest_trap(vector, regs, 0);
1753 /* Emulate some simple privileged and I/O instructions. */
1754 if ( (regs->error_code == 0) &&
1755 emulate_privileged_op(regs) )
1756 return 0;
1758 #if defined(__i386__)
1759 if ( VM_ASSIST(v->domain, VMASST_TYPE_4gb_segments) &&
1760 (regs->error_code == 0) &&
1761 gpf_emulate_4gb(regs) )
1762 return 0;
1763 #endif
1765 /* Pass on GPF as is. */
1766 return do_guest_trap(TRAP_gp_fault, regs, 1);
1768 gp_in_kernel:
1770 if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
1772 dprintk(XENLOG_WARNING, "GPF (%04x): %p -> %p\n",
1773 regs->error_code, _p(regs->eip), _p(fixup));
1774 regs->eip = fixup;
1775 return 0;
1778 DEBUGGER_trap_fatal(TRAP_gp_fault, regs);
1780 hardware_gp:
1781 show_execution_state(regs);
1782 panic("GENERAL PROTECTION FAULT\n[error_code=%04x]\n", regs->error_code);
1783 return 0;
1786 static void nmi_softirq(void)
1788 /* Only used to defer wakeup of dom0,vcpu0 to a safe (non-NMI) context. */
1789 vcpu_kick(dom0->vcpu[0]);
1792 static void nmi_dom0_report(unsigned int reason_idx)
1794 struct domain *d;
1795 struct vcpu *v;
1797 if ( ((d = dom0) == NULL) || ((v = d->vcpu[0]) == NULL) )
1798 return;
1800 set_bit(reason_idx, nmi_reason(d));
1802 if ( test_and_set_bit(_VCPUF_nmi_pending, &v->vcpu_flags) )
1803 raise_softirq(NMI_SOFTIRQ); /* not safe to wake up a vcpu here */
1806 asmlinkage void mem_parity_error(struct cpu_user_regs *regs)
1808 switch ( opt_nmi[0] )
1810 case 'd': /* 'dom0' */
1811 nmi_dom0_report(_XEN_NMIREASON_parity_error);
1812 case 'i': /* 'ignore' */
1813 break;
1814 default: /* 'fatal' */
1815 console_force_unlock();
1816 printk("\n\nNMI - MEMORY ERROR\n");
1817 fatal_trap(TRAP_nmi, regs);
1820 outb((inb(0x61) & 0x0f) | 0x04, 0x61); /* clear-and-disable parity check */
1821 mdelay(1);
1822 outb((inb(0x61) & 0x0b) | 0x00, 0x61); /* enable parity check */
1825 asmlinkage void io_check_error(struct cpu_user_regs *regs)
1827 switch ( opt_nmi[0] )
1829 case 'd': /* 'dom0' */
1830 nmi_dom0_report(_XEN_NMIREASON_io_error);
1831 case 'i': /* 'ignore' */
1832 break;
1833 default: /* 'fatal' */
1834 console_force_unlock();
1835 printk("\n\nNMI - I/O ERROR\n");
1836 fatal_trap(TRAP_nmi, regs);
1839 outb((inb(0x61) & 0x0f) | 0x08, 0x61); /* clear-and-disable IOCK */
1840 mdelay(1);
1841 outb((inb(0x61) & 0x07) | 0x00, 0x61); /* enable IOCK */
1844 static void unknown_nmi_error(unsigned char reason)
1846 switch ( opt_nmi[0] )
1848 case 'd': /* 'dom0' */
1849 nmi_dom0_report(_XEN_NMIREASON_unknown);
1850 case 'i': /* 'ignore' */
1851 break;
1852 default: /* 'fatal' */
1853 printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
1854 printk("Dazed and confused, but trying to continue\n");
1855 printk("Do you have a strange power saving mode enabled?\n");
1856 kexec_crash();
1860 static int dummy_nmi_callback(struct cpu_user_regs *regs, int cpu)
1862 return 0;
1865 static nmi_callback_t nmi_callback = dummy_nmi_callback;
1867 asmlinkage void do_nmi(struct cpu_user_regs *regs)
1869 unsigned int cpu = smp_processor_id();
1870 unsigned char reason;
1872 ++nmi_count(cpu);
1874 if ( nmi_callback(regs, cpu) )
1875 return;
1877 if ( nmi_watchdog )
1878 nmi_watchdog_tick(regs);
1880 /* Only the BSP gets external NMIs from the system. */
1881 if ( cpu == 0 )
1883 reason = inb(0x61);
1884 if ( reason & 0x80 )
1885 mem_parity_error(regs);
1886 else if ( reason & 0x40 )
1887 io_check_error(regs);
1888 else if ( !nmi_watchdog )
1889 unknown_nmi_error((unsigned char)(reason&0xff));
1893 void set_nmi_callback(nmi_callback_t callback)
1895 nmi_callback = callback;
1898 void unset_nmi_callback(void)
1900 nmi_callback = dummy_nmi_callback;
1903 asmlinkage int math_state_restore(struct cpu_user_regs *regs)
1905 setup_fpu(current);
1907 if ( current->arch.guest_context.ctrlreg[0] & X86_CR0_TS )
1909 do_guest_trap(TRAP_no_device, regs, 0);
1910 current->arch.guest_context.ctrlreg[0] &= ~X86_CR0_TS;
1913 return EXCRET_fault_fixed;
1916 asmlinkage int do_debug(struct cpu_user_regs *regs)
1918 unsigned long condition;
1919 struct vcpu *v = current;
1921 __asm__ __volatile__("mov %%db6,%0" : "=r" (condition));
1923 /* Mask out spurious debug traps due to lazy DR7 setting */
1924 if ( (condition & (DR_TRAP0|DR_TRAP1|DR_TRAP2|DR_TRAP3)) &&
1925 (v->arch.guest_context.debugreg[7] == 0) )
1927 __asm__("mov %0,%%db7" : : "r" (0UL));
1928 goto out;
1931 DEBUGGER_trap_entry(TRAP_debug, regs);
1933 if ( !guest_mode(regs) )
1935 /* Clear TF just for absolute sanity. */
1936 regs->eflags &= ~EF_TF;
1937 /*
1938 * We ignore watchpoints when they trigger within Xen. This may happen
1939 * when a buffer is passed to us which previously had a watchpoint set
1940 * on it. No need to bump EIP; the only faulting trap is an instruction
1941 * breakpoint, which can't happen to us.
1942 */
1943 goto out;
1946 /* Save debug status register where guest OS can peek at it */
1947 v->arch.guest_context.debugreg[6] = condition;
1949 return do_guest_trap(TRAP_debug, regs, 0);
1951 out:
1952 return EXCRET_not_a_fault;
1955 asmlinkage int do_spurious_interrupt_bug(struct cpu_user_regs *regs)
1957 return EXCRET_not_a_fault;
1960 void set_intr_gate(unsigned int n, void *addr)
1962 #ifdef __i386__
1963 int i;
1964 /* Keep secondary tables in sync with IRQ updates. */
1965 for ( i = 1; i < NR_CPUS; i++ )
1966 if ( idt_tables[i] != NULL )
1967 _set_gate(&idt_tables[i][n], 14, 0, addr);
1968 #endif
1969 _set_gate(&idt_table[n], 14, 0, addr);
1972 void set_system_gate(unsigned int n, void *addr)
1974 _set_gate(idt_table+n,14,3,addr);
1977 void set_task_gate(unsigned int n, unsigned int sel)
1979 idt_table[n].a = sel << 16;
1980 idt_table[n].b = 0x8500;
1983 void set_tss_desc(unsigned int n, void *addr)
1985 _set_tssldt_desc(
1986 gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
1987 (unsigned long)addr,
1988 offsetof(struct tss_struct, __cacheline_filler) - 1,
1989 9);
1990 #ifdef CONFIG_COMPAT
1991 _set_tssldt_desc(
1992 compat_gdt_table + __TSS(n) - FIRST_RESERVED_GDT_ENTRY,
1993 (unsigned long)addr,
1994 offsetof(struct tss_struct, __cacheline_filler) - 1,
1995 11);
1996 #endif
1999 void __init trap_init(void)
2001 extern void percpu_traps_init(void);
2003 /*
2004 * Note that interrupt gates are always used, rather than trap gates. We
2005 * must have interrupts disabled until DS/ES/FS/GS are saved because the
2006 * first activation must have the "bad" value(s) for these registers and
2007 * we may lose them if another activation is installed before they are
2008 * saved. The page-fault handler also needs interrupts disabled until %cr2
2009 * has been read and saved on the stack.
2010 */
2011 set_intr_gate(TRAP_divide_error,&divide_error);
2012 set_intr_gate(TRAP_debug,&debug);
2013 set_intr_gate(TRAP_nmi,&nmi);
2014 set_system_gate(TRAP_int3,&int3); /* usable from all privileges */
2015 set_system_gate(TRAP_overflow,&overflow); /* usable from all privileges */
2016 set_intr_gate(TRAP_bounds,&bounds);
2017 set_intr_gate(TRAP_invalid_op,&invalid_op);
2018 set_intr_gate(TRAP_no_device,&device_not_available);
2019 set_intr_gate(TRAP_copro_seg,&coprocessor_segment_overrun);
2020 set_intr_gate(TRAP_invalid_tss,&invalid_TSS);
2021 set_intr_gate(TRAP_no_segment,&segment_not_present);
2022 set_intr_gate(TRAP_stack_error,&stack_segment);
2023 set_intr_gate(TRAP_gp_fault,&general_protection);
2024 set_intr_gate(TRAP_page_fault,&page_fault);
2025 set_intr_gate(TRAP_spurious_int,&spurious_interrupt_bug);
2026 set_intr_gate(TRAP_copro_error,&coprocessor_error);
2027 set_intr_gate(TRAP_alignment_check,&alignment_check);
2028 set_intr_gate(TRAP_machine_check,&machine_check);
2029 set_intr_gate(TRAP_simd_error,&simd_coprocessor_error);
2031 percpu_traps_init();
2033 cpu_init();
2035 open_softirq(NMI_SOFTIRQ, nmi_softirq);
2039 long do_set_trap_table(XEN_GUEST_HANDLE(trap_info_t) traps)
2041 struct trap_info cur;
2042 struct trap_info *dst = current->arch.guest_context.trap_ctxt;
2043 long rc = 0;
2045 /* If no table is presented then clear the entire virtual IDT. */
2046 if ( guest_handle_is_null(traps) )
2048 memset(dst, 0, 256 * sizeof(*dst));
2049 init_int80_direct_trap(current);
2050 return 0;
2053 for ( ; ; )
2055 if ( hypercall_preempt_check() )
2057 rc = hypercall_create_continuation(
2058 __HYPERVISOR_set_trap_table, "h", traps);
2059 break;
2062 if ( copy_from_guest(&cur, traps, 1) )
2064 rc = -EFAULT;
2065 break;
2068 if ( cur.address == 0 )
2069 break;
2071 fixup_guest_code_selector(current->domain, cur.cs);
2073 memcpy(&dst[cur.vector], &cur, sizeof(cur));
2075 if ( cur.vector == 0x80 )
2076 init_int80_direct_trap(current);
2078 guest_handle_add_offset(traps, 1);
2081 return rc;
2085 long set_debugreg(struct vcpu *p, int reg, unsigned long value)
2087 int i;
2089 switch ( reg )
2091 case 0:
2092 if ( !access_ok(value, sizeof(long)) )
2093 return -EPERM;
2094 if ( p == current )
2095 __asm__ ( "mov %0, %%db0" : : "r" (value) );
2096 break;
2097 case 1:
2098 if ( !access_ok(value, sizeof(long)) )
2099 return -EPERM;
2100 if ( p == current )
2101 __asm__ ( "mov %0, %%db1" : : "r" (value) );
2102 break;
2103 case 2:
2104 if ( !access_ok(value, sizeof(long)) )
2105 return -EPERM;
2106 if ( p == current )
2107 __asm__ ( "mov %0, %%db2" : : "r" (value) );
2108 break;
2109 case 3:
2110 if ( !access_ok(value, sizeof(long)) )
2111 return -EPERM;
2112 if ( p == current )
2113 __asm__ ( "mov %0, %%db3" : : "r" (value) );
2114 break;
2115 case 6:
2116 /*
2117 * DR6: Bits 4-11,16-31 reserved (set to 1).
2118 * Bit 12 reserved (set to 0).
2119 */
2120 value &= 0xffffefff; /* reserved bits => 0 */
2121 value |= 0xffff0ff0; /* reserved bits => 1 */
2122 if ( p == current )
2123 __asm__ ( "mov %0, %%db6" : : "r" (value) );
2124 break;
2125 case 7:
2126 /*
2127 * DR7: Bit 10 reserved (set to 1).
2128 * Bits 11-12,14-15 reserved (set to 0).
2129 * Privileged bits:
2130 * GD (bit 13): must be 0.
2131 * R/Wn (bits 16-17,20-21,24-25,28-29): mustn't be 10.
2132 * LENn (bits 18-19,22-23,26-27,30-31): mustn't be 10.
2133 */
2134 /* DR7 == 0 => debugging disabled for this domain. */
2135 if ( value != 0 )
2137 value &= 0xffff27ff; /* reserved bits => 0 */
2138 value |= 0x00000400; /* reserved bits => 1 */
2139 if ( (value & (1<<13)) != 0 ) return -EPERM;
2140 for ( i = 0; i < 16; i += 2 )
2141 if ( ((value >> (i+16)) & 3) == 2 ) return -EPERM;
2143 if ( p == current )
2144 __asm__ ( "mov %0, %%db7" : : "r" (value) );
2145 break;
2146 default:
2147 return -EINVAL;
2150 p->arch.guest_context.debugreg[reg] = value;
2151 return 0;
2154 long do_set_debugreg(int reg, unsigned long value)
2156 return set_debugreg(current, reg, value);
2159 unsigned long do_get_debugreg(int reg)
2161 if ( (reg < 0) || (reg > 7) ) return -EINVAL;
2162 return current->arch.guest_context.debugreg[reg];
2165 /*
2166 * Local variables:
2167 * mode: C
2168 * c-set-style: "BSD"
2169 * c-basic-offset: 4
2170 * tab-width: 4
2171 * indent-tabs-mode: nil
2172 * End:
2173 */