/root/src/xen/xen/arch/x86/x86_64/traps.c
Line | Count | Source (jump to first uncovered line) |
1 | | |
2 | | #include <xen/version.h> |
3 | | #include <xen/init.h> |
4 | | #include <xen/sched.h> |
5 | | #include <xen/lib.h> |
6 | | #include <xen/errno.h> |
7 | | #include <xen/mm.h> |
8 | | #include <xen/irq.h> |
9 | | #include <xen/symbols.h> |
10 | | #include <xen/console.h> |
11 | | #include <xen/sched.h> |
12 | | #include <xen/shutdown.h> |
13 | | #include <xen/guest_access.h> |
14 | | #include <xen/watchdog.h> |
15 | | #include <xen/hypercall.h> |
16 | | #include <asm/current.h> |
17 | | #include <asm/flushtlb.h> |
18 | | #include <asm/traps.h> |
19 | | #include <asm/event.h> |
20 | | #include <asm/nmi.h> |
21 | | #include <asm/msr.h> |
22 | | #include <asm/page.h> |
23 | | #include <asm/shared.h> |
24 | | #include <asm/hvm/hvm.h> |
25 | | #include <asm/hvm/support.h> |
26 | | |
27 | | |
28 | | static void print_xen_info(void) |
29 | 0 | { |
30 | 0 | char taint_str[TAINT_STRING_MAX_LEN]; |
31 | 0 |
|
32 | 0 | printk("----[ Xen-%d.%d%s x86_64 debug=%c " gcov_string " %s ]----\n", |
33 | 0 | xen_major_version(), xen_minor_version(), xen_extra_version(), |
34 | 0 | debug_build() ? 'y' : 'n', print_tainted(taint_str)); |
35 | 0 | } |
36 | | |
37 | | enum context { CTXT_hypervisor, CTXT_pv_guest, CTXT_hvm_guest }; |
38 | | |
39 | | static void _show_registers( |
40 | | const struct cpu_user_regs *regs, unsigned long crs[8], |
41 | | enum context context, const struct vcpu *v) |
42 | 0 | { |
43 | 0 | static const char *const context_names[] = { |
44 | 0 | [CTXT_hypervisor] = "hypervisor", |
45 | 0 | [CTXT_pv_guest] = "pv guest", |
46 | 0 | [CTXT_hvm_guest] = "hvm guest" |
47 | 0 | }; |
48 | 0 |
|
49 | 0 | printk("RIP: %04x:[<%016lx>]", regs->cs, regs->rip); |
50 | 0 | if ( context == CTXT_hypervisor ) |
51 | 0 | printk(" %pS", _p(regs->rip)); |
52 | 0 | printk("\nRFLAGS: %016lx ", regs->rflags); |
53 | 0 | if ( (context == CTXT_pv_guest) && v && v->vcpu_info ) |
54 | 0 | printk("EM: %d ", !!vcpu_info(v, evtchn_upcall_mask)); |
55 | 0 | printk("CONTEXT: %s", context_names[context]); |
56 | 0 | if ( v && !is_idle_vcpu(v) ) |
57 | 0 | printk(" (%pv)", v); |
58 | 0 |
|
59 | 0 | printk("\nrax: %016lx rbx: %016lx rcx: %016lx\n", |
60 | 0 | regs->rax, regs->rbx, regs->rcx); |
61 | 0 | printk("rdx: %016lx rsi: %016lx rdi: %016lx\n", |
62 | 0 | regs->rdx, regs->rsi, regs->rdi); |
63 | 0 | printk("rbp: %016lx rsp: %016lx r8: %016lx\n", |
64 | 0 | regs->rbp, regs->rsp, regs->r8); |
65 | 0 | printk("r9: %016lx r10: %016lx r11: %016lx\n", |
66 | 0 | regs->r9, regs->r10, regs->r11); |
67 | 0 | if ( !(regs->entry_vector & TRAP_regs_partial) ) |
68 | 0 | { |
69 | 0 | printk("r12: %016lx r13: %016lx r14: %016lx\n", |
70 | 0 | regs->r12, regs->r13, regs->r14); |
71 | 0 | printk("r15: %016lx cr0: %016lx cr4: %016lx\n", |
72 | 0 | regs->r15, crs[0], crs[4]); |
73 | 0 | } |
74 | 0 | else |
75 | 0 | printk("cr0: %016lx cr4: %016lx\n", crs[0], crs[4]); |
76 | 0 | printk("cr3: %016lx cr2: %016lx\n", crs[3], crs[2]); |
77 | 0 | printk("ds: %04x es: %04x fs: %04x gs: %04x " |
78 | 0 | "ss: %04x cs: %04x\n", |
79 | 0 | regs->ds, regs->es, regs->fs, |
80 | 0 | regs->gs, regs->ss, regs->cs); |
81 | 0 | } |
82 | | |
83 | | void show_registers(const struct cpu_user_regs *regs) |
84 | 0 | { |
85 | 0 | struct cpu_user_regs fault_regs = *regs; |
86 | 0 | unsigned long fault_crs[8]; |
87 | 0 | enum context context; |
88 | 0 | struct vcpu *v = system_state >= SYS_STATE_smp_boot ? current : NULL; |
89 | 0 |
|
90 | 0 | if ( guest_mode(regs) && is_hvm_vcpu(v) ) |
91 | 0 | { |
92 | 0 | struct segment_register sreg; |
93 | 0 | context = CTXT_hvm_guest; |
94 | 0 | fault_crs[0] = v->arch.hvm_vcpu.guest_cr[0]; |
95 | 0 | fault_crs[2] = v->arch.hvm_vcpu.guest_cr[2]; |
96 | 0 | fault_crs[3] = v->arch.hvm_vcpu.guest_cr[3]; |
97 | 0 | fault_crs[4] = v->arch.hvm_vcpu.guest_cr[4]; |
98 | 0 | hvm_get_segment_register(v, x86_seg_cs, &sreg); |
99 | 0 | fault_regs.cs = sreg.sel; |
100 | 0 | hvm_get_segment_register(v, x86_seg_ds, &sreg); |
101 | 0 | fault_regs.ds = sreg.sel; |
102 | 0 | hvm_get_segment_register(v, x86_seg_es, &sreg); |
103 | 0 | fault_regs.es = sreg.sel; |
104 | 0 | hvm_get_segment_register(v, x86_seg_fs, &sreg); |
105 | 0 | fault_regs.fs = sreg.sel; |
106 | 0 | hvm_get_segment_register(v, x86_seg_gs, &sreg); |
107 | 0 | fault_regs.gs = sreg.sel; |
108 | 0 | hvm_get_segment_register(v, x86_seg_ss, &sreg); |
109 | 0 | fault_regs.ss = sreg.sel; |
110 | 0 | } |
111 | 0 | else |
112 | 0 | { |
113 | 0 | if ( guest_mode(regs) ) |
114 | 0 | { |
115 | 0 | context = CTXT_pv_guest; |
116 | 0 | fault_crs[2] = arch_get_cr2(v); |
117 | 0 | } |
118 | 0 | else |
119 | 0 | { |
120 | 0 | context = CTXT_hypervisor; |
121 | 0 | fault_crs[2] = read_cr2(); |
122 | 0 | } |
123 | 0 |
|
124 | 0 | fault_crs[0] = read_cr0(); |
125 | 0 | fault_crs[3] = read_cr3(); |
126 | 0 | fault_crs[4] = read_cr4(); |
127 | 0 | fault_regs.ds = read_sreg(ds); |
128 | 0 | fault_regs.es = read_sreg(es); |
129 | 0 | fault_regs.fs = read_sreg(fs); |
130 | 0 | fault_regs.gs = read_sreg(gs); |
131 | 0 | } |
132 | 0 |
|
133 | 0 | print_xen_info(); |
134 | 0 | printk("CPU: %d\n", smp_processor_id()); |
135 | 0 | _show_registers(&fault_regs, fault_crs, context, v); |
136 | 0 |
|
137 | 0 | if ( this_cpu(ler_msr) && !guest_mode(regs) ) |
138 | 0 | { |
139 | 0 | u64 from, to; |
140 | 0 | rdmsrl(this_cpu(ler_msr), from); |
141 | 0 | rdmsrl(this_cpu(ler_msr) + 1, to); |
142 | 0 | printk("ler: %016lx -> %016lx\n", from, to); |
143 | 0 | } |
144 | 0 | } |
145 | | |
146 | | void vcpu_show_registers(const struct vcpu *v) |
147 | 0 | { |
148 | 0 | const struct cpu_user_regs *regs = &v->arch.user_regs; |
149 | 0 | unsigned long crs[8]; |
150 | 0 |
|
151 | 0 | /* Only handle PV guests for now */ |
152 | 0 | if ( !is_pv_vcpu(v) ) |
153 | 0 | return; |
154 | 0 |
|
155 | 0 | crs[0] = v->arch.pv_vcpu.ctrlreg[0]; |
156 | 0 | crs[2] = arch_get_cr2(v); |
157 | 0 | crs[3] = pagetable_get_paddr(guest_kernel_mode(v, regs) ? |
158 | 0 | v->arch.guest_table : |
159 | 0 | v->arch.guest_table_user); |
160 | 0 | crs[4] = v->arch.pv_vcpu.ctrlreg[4]; |
161 | 0 |
|
162 | 0 | _show_registers(regs, crs, CTXT_pv_guest, v); |
163 | 0 | } |
164 | | |
165 | | void show_page_walk(unsigned long addr) |
166 | 0 | { |
167 | 0 | unsigned long pfn, mfn = read_cr3() >> PAGE_SHIFT; |
168 | 0 | l4_pgentry_t l4e, *l4t; |
169 | 0 | l3_pgentry_t l3e, *l3t; |
170 | 0 | l2_pgentry_t l2e, *l2t; |
171 | 0 | l1_pgentry_t l1e, *l1t; |
172 | 0 |
|
173 | 0 | printk("Pagetable walk from %016lx:\n", addr); |
174 | 0 | if ( !is_canonical_address(addr) ) |
175 | 0 | return; |
176 | 0 |
|
177 | 0 | l4t = map_domain_page(_mfn(mfn)); |
178 | 0 | l4e = l4t[l4_table_offset(addr)]; |
179 | 0 | unmap_domain_page(l4t); |
180 | 0 | mfn = l4e_get_pfn(l4e); |
181 | 0 | pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ? |
182 | 0 | get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY; |
183 | 0 | printk(" L4[0x%03lx] = %"PRIpte" %016lx\n", |
184 | 0 | l4_table_offset(addr), l4e_get_intpte(l4e), pfn); |
185 | 0 | if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) || |
186 | 0 | !mfn_valid(_mfn(mfn)) ) |
187 | 0 | return; |
188 | 0 |
|
189 | 0 | l3t = map_domain_page(_mfn(mfn)); |
190 | 0 | l3e = l3t[l3_table_offset(addr)]; |
191 | 0 | unmap_domain_page(l3t); |
192 | 0 | mfn = l3e_get_pfn(l3e); |
193 | 0 | pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ? |
194 | 0 | get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY; |
195 | 0 | printk(" L3[0x%03lx] = %"PRIpte" %016lx%s\n", |
196 | 0 | l3_table_offset(addr), l3e_get_intpte(l3e), pfn, |
197 | 0 | (l3e_get_flags(l3e) & _PAGE_PSE) ? " (PSE)" : ""); |
198 | 0 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || |
199 | 0 | (l3e_get_flags(l3e) & _PAGE_PSE) || |
200 | 0 | !mfn_valid(_mfn(mfn)) ) |
201 | 0 | return; |
202 | 0 |
|
203 | 0 | l2t = map_domain_page(_mfn(mfn)); |
204 | 0 | l2e = l2t[l2_table_offset(addr)]; |
205 | 0 | unmap_domain_page(l2t); |
206 | 0 | mfn = l2e_get_pfn(l2e); |
207 | 0 | pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ? |
208 | 0 | get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY; |
209 | 0 | printk(" L2[0x%03lx] = %"PRIpte" %016lx%s\n", |
210 | 0 | l2_table_offset(addr), l2e_get_intpte(l2e), pfn, |
211 | 0 | (l2e_get_flags(l2e) & _PAGE_PSE) ? " (PSE)" : ""); |
212 | 0 | if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || |
213 | 0 | (l2e_get_flags(l2e) & _PAGE_PSE) || |
214 | 0 | !mfn_valid(_mfn(mfn)) ) |
215 | 0 | return; |
216 | 0 |
|
217 | 0 | l1t = map_domain_page(_mfn(mfn)); |
218 | 0 | l1e = l1t[l1_table_offset(addr)]; |
219 | 0 | unmap_domain_page(l1t); |
220 | 0 | mfn = l1e_get_pfn(l1e); |
221 | 0 | pfn = mfn_valid(_mfn(mfn)) && machine_to_phys_mapping_valid ? |
222 | 0 | get_gpfn_from_mfn(mfn) : INVALID_M2P_ENTRY; |
223 | 0 | printk(" L1[0x%03lx] = %"PRIpte" %016lx\n", |
224 | 0 | l1_table_offset(addr), l1e_get_intpte(l1e), pfn); |
225 | 0 | } |
226 | | |
227 | | void do_double_fault(struct cpu_user_regs *regs) |
228 | 0 | { |
229 | 0 | unsigned int cpu; |
230 | 0 | unsigned long crs[8]; |
231 | 0 |
|
232 | 0 | console_force_unlock(); |
233 | 0 |
|
234 | 0 | asm ( "lsll %1, %0" : "=r" (cpu) : "rm" (PER_CPU_GDT_ENTRY << 3) ); |
235 | 0 |
|
236 | 0 | /* Find information saved during fault and dump it to the console. */ |
237 | 0 | printk("*** DOUBLE FAULT ***\n"); |
238 | 0 | print_xen_info(); |
239 | 0 |
|
240 | 0 | crs[0] = read_cr0(); |
241 | 0 | crs[2] = read_cr2(); |
242 | 0 | crs[3] = read_cr3(); |
243 | 0 | crs[4] = read_cr4(); |
244 | 0 | regs->ds = read_sreg(ds); |
245 | 0 | regs->es = read_sreg(es); |
246 | 0 | regs->fs = read_sreg(fs); |
247 | 0 | regs->gs = read_sreg(gs); |
248 | 0 |
|
249 | 0 | printk("CPU: %d\n", cpu); |
250 | 0 | _show_registers(regs, crs, CTXT_hypervisor, NULL); |
251 | 0 | show_stack_overflow(cpu, regs); |
252 | 0 |
|
253 | 0 | panic("DOUBLE FAULT -- system shutdown"); |
254 | 0 | } |
255 | | |
256 | | static unsigned int write_stub_trampoline( |
257 | | unsigned char *stub, unsigned long stub_va, |
258 | | unsigned long stack_bottom, unsigned long target_va) |
259 | 26 | { |
260 | 26 | /* movabsq %rax, stack_bottom - 8 */ |
261 | 26 | stub[0] = 0x48; |
262 | 26 | stub[1] = 0xa3; |
263 | 26 | *(uint64_t *)&stub[2] = stack_bottom - 8; |
264 | 26 | |
265 | 26 | /* movq %rsp, %rax */ |
266 | 26 | stub[10] = 0x48; |
267 | 26 | stub[11] = 0x89; |
268 | 26 | stub[12] = 0xe0; |
269 | 26 | |
270 | 26 | /* movabsq $stack_bottom - 8, %rsp */ |
271 | 26 | stub[13] = 0x48; |
272 | 26 | stub[14] = 0xbc; |
273 | 26 | *(uint64_t *)&stub[15] = stack_bottom - 8; |
274 | 26 | |
275 | 26 | /* pushq %rax */ |
276 | 26 | stub[23] = 0x50; |
277 | 26 | |
278 | 26 | /* jmp target_va */ |
279 | 26 | stub[24] = 0xe9; |
280 | 26 | *(int32_t *)&stub[25] = target_va - (stub_va + 29); |
281 | 26 | |
282 | 26 | /* Round up to a multiple of 16 bytes. */ |
283 | 26 | return 32; |
284 | 26 | } |
285 | | |
286 | | DEFINE_PER_CPU(struct stubs, stubs); |
287 | | void lstar_enter(void); |
288 | | void cstar_enter(void); |
289 | | |
290 | | void subarch_percpu_traps_init(void) |
291 | 13 | { |
292 | 13 | unsigned long stack_bottom = get_stack_bottom(); |
293 | 13 | unsigned long stub_va = this_cpu(stubs.addr); |
294 | 13 | unsigned char *stub_page; |
295 | 13 | unsigned int offset; |
296 | 13 | |
297 | 13 | /* IST_MAX IST pages + 1 syscall page + 1 guard page + primary stack. */ |
298 | 13 | BUILD_BUG_ON((IST_MAX + 2) * PAGE_SIZE + PRIMARY_STACK_SIZE > STACK_SIZE); |
299 | 13 | |
300 | 13 | stub_page = map_domain_page(_mfn(this_cpu(stubs.mfn))); |
301 | 13 | |
302 | 13 | /* |
303 | 13 | * Trampoline for SYSCALL entry from 64-bit mode. The VT-x HVM vcpu |
304 | 13 | * context switch logic relies on the SYSCALL trampoline being at the |
305 | 13 | * start of the stubs. |
306 | 13 | */ |
307 | 13 | wrmsrl(MSR_LSTAR, stub_va); |
308 | 13 | offset = write_stub_trampoline(stub_page + (stub_va & ~PAGE_MASK), |
309 | 13 | stub_va, stack_bottom, |
310 | 13 | (unsigned long)lstar_enter); |
311 | 13 | stub_va += offset; |
312 | 13 | |
313 | 13 | if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL || |
314 | 0 | boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR ) |
315 | 13 | { |
316 | 13 | /* SYSENTER entry. */ |
317 | 13 | wrmsrl(MSR_IA32_SYSENTER_ESP, stack_bottom); |
318 | 13 | wrmsrl(MSR_IA32_SYSENTER_EIP, (unsigned long)sysenter_entry); |
319 | 13 | wrmsr(MSR_IA32_SYSENTER_CS, __HYPERVISOR_CS, 0); |
320 | 13 | } |
321 | 13 | |
322 | 13 | /* Trampoline for SYSCALL entry from compatibility mode. */ |
323 | 13 | wrmsrl(MSR_CSTAR, stub_va); |
324 | 13 | offset += write_stub_trampoline(stub_page + (stub_va & ~PAGE_MASK), |
325 | 13 | stub_va, stack_bottom, |
326 | 13 | (unsigned long)cstar_enter); |
327 | 13 | |
328 | 13 | /* Don't consume more than half of the stub space here. */ |
329 | 13 | ASSERT(offset <= STUB_BUF_SIZE / 2); |
330 | 13 | |
331 | 13 | unmap_domain_page(stub_page); |
332 | 13 | |
333 | 13 | /* Common SYSCALL parameters. */ |
334 | 13 | wrmsrl(MSR_STAR, XEN_MSR_STAR); |
335 | 13 | wrmsrl(MSR_SYSCALL_MASK, XEN_SYSCALL_MASK); |
336 | 13 | } |
337 | | |
338 | | void hypercall_page_initialise(struct domain *d, void *hypercall_page) |
339 | 2 | { |
340 | 2 | memset(hypercall_page, 0xCC, PAGE_SIZE); |
341 | 2 | if ( is_hvm_domain(d) ) |
342 | 2 | hvm_hypercall_page_initialise(d, hypercall_page); |
343 | 0 | else if ( !is_pv_32bit_domain(d) ) |
344 | 0 | hypercall_page_initialise_ring3_kernel(hypercall_page); |
345 | 0 | else |
346 | 0 | hypercall_page_initialise_ring1_kernel(hypercall_page); |
347 | 2 | } |
348 | | |
349 | | /* |
350 | | * Local variables: |
351 | | * mode: C |
352 | | * c-file-style: "BSD" |
353 | | * c-basic-offset: 4 |
354 | | * tab-width: 4 |
355 | | * indent-tabs-mode: nil |
356 | | * End: |
357 | | */ |