/root/src/xen/xen/common/kexec.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * kexec.c - Achitecture independent kexec code for Xen |
3 | | * |
4 | | * Xen port written by: |
5 | | * - Simon 'Horms' Horman <horms@verge.net.au> |
6 | | * - Magnus Damm <magnus@valinux.co.jp> |
7 | | */ |
8 | | |
9 | | #include <xen/init.h> |
10 | | #include <xen/lib.h> |
11 | | #include <xen/acpi.h> |
12 | | #include <xen/ctype.h> |
13 | | #include <xen/errno.h> |
14 | | #include <xen/guest_access.h> |
15 | | #include <xen/watchdog.h> |
16 | | #include <xen/sched.h> |
17 | | #include <xen/types.h> |
18 | | #include <xen/hypercall.h> |
19 | | #include <xen/kexec.h> |
20 | | #include <xen/keyhandler.h> |
21 | | #include <public/kexec.h> |
22 | | #include <xen/cpumask.h> |
23 | | #include <asm/atomic.h> |
24 | | #include <xen/spinlock.h> |
25 | | #include <xen/version.h> |
26 | | #include <xen/console.h> |
27 | | #include <xen/kexec.h> |
28 | | #include <xen/kimage.h> |
29 | | #include <public/elfnote.h> |
30 | | #include <xsm/xsm.h> |
31 | | #include <xen/cpu.h> |
32 | | #ifdef CONFIG_COMPAT |
33 | | #include <compat/kexec.h> |
34 | | #endif |
35 | | |
36 | | bool_t kexecing = FALSE; |
37 | | |
38 | | /* Memory regions to store the per cpu register state etc. on a crash. */ |
39 | | typedef struct { Elf_Note * start; size_t size; } crash_note_range_t; |
40 | | static crash_note_range_t * crash_notes; |
41 | | |
42 | | /* Lock to prevent race conditions when allocating the crash note buffers. |
43 | | * It also serves to protect calls to alloc_from_crash_heap when allocating |
44 | | * crash note buffers in lower memory. */ |
45 | | static DEFINE_SPINLOCK(crash_notes_lock); |
46 | | |
47 | | static Elf_Note *xen_crash_note; |
48 | | |
49 | | static cpumask_t crash_saved_cpus; |
50 | | |
51 | | static struct kexec_image *kexec_image[KEXEC_IMAGE_NR]; |
52 | | |
53 | 0 | #define KEXEC_FLAG_DEFAULT_POS (KEXEC_IMAGE_NR + 0) |
54 | 0 | #define KEXEC_FLAG_CRASH_POS (KEXEC_IMAGE_NR + 1) |
55 | | #define KEXEC_FLAG_IN_PROGRESS (KEXEC_IMAGE_NR + 2) |
56 | | #define KEXEC_FLAG_IN_HYPERCALL (KEXEC_IMAGE_NR + 3) |
57 | | |
58 | | static unsigned long kexec_flags = 0; /* the lowest bits are for KEXEC_IMAGE... */ |
59 | | |
60 | | static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES]; |
61 | | static size_t vmcoreinfo_size = 0; |
62 | | |
63 | | xen_kexec_reserve_t kexec_crash_area; |
64 | | paddr_t __initdata kexec_crash_area_limit = ~(paddr_t)0; |
65 | | static struct { |
66 | | u64 start, end; |
67 | | unsigned long size; |
68 | | } ranges[16] __initdata; |
69 | | |
70 | | /* Low crashinfo mode. Start as INVALID so serveral codepaths can set up |
71 | | * defaults without needing to know the state of the others. */ |
72 | | enum low_crashinfo low_crashinfo_mode = LOW_CRASHINFO_INVALID; |
73 | | |
74 | | /* This value is only considered if low_crash_mode is set to MIN or ALL, so |
75 | | * setting a default here is safe. Default to 4GB. This is because the current |
76 | | * KEXEC_CMD_get_range compat hypercall trucates 64bit pointers to 32 bits. The |
77 | | * typical usecase for crashinfo_maxaddr will be for 64bit Xen with 32bit dom0 |
78 | | * and 32bit crash kernel. */ |
79 | | static paddr_t __initdata crashinfo_maxaddr = 4ULL << 30; |
80 | | |
81 | | /* = log base 2 of crashinfo_maxaddr after checking for sanity. Default to |
82 | | * larger than the entire physical address space. */ |
83 | | unsigned int __initdata crashinfo_maxaddr_bits = 64; |
84 | | |
85 | | /* Pointers to keep track of the crash heap region. */ |
86 | | static void *crash_heap_current = NULL, *crash_heap_end = NULL; |
87 | | |
88 | | /* |
89 | | * Parse command lines in the format |
90 | | * |
91 | | * crashkernel=<ramsize-range>:<size>[,...][{@,<,below=}<address>] |
92 | | * |
93 | | * with <ramsize-range> being of form |
94 | | * |
95 | | * <start>-[<end>] |
96 | | * |
97 | | * as well as the legacy ones in the format |
98 | | * |
99 | | * crashkernel=<size>[{@,<}<address>] |
100 | | * crashkernel=<size>,below=address |
101 | | * |
102 | | * < and below are synonyomous, the latter being useful for grub2 systems |
103 | | * which would otherwise require escaping of the < option |
104 | | */ |
105 | | static int __init parse_crashkernel(const char *str) |
106 | 0 | { |
107 | 0 | const char *cur; |
108 | 0 | int rc = 0; |
109 | 0 |
|
110 | 0 | if ( strchr(str, ':' ) ) |
111 | 0 | { |
112 | 0 | unsigned int idx = 0; |
113 | 0 |
|
114 | 0 | do { |
115 | 0 | if ( idx >= ARRAY_SIZE(ranges) ) |
116 | 0 | { |
117 | 0 | printk(XENLOG_WARNING "crashkernel: too many ranges\n"); |
118 | 0 | cur = NULL; |
119 | 0 | str = strpbrk(str, "@,<"); |
120 | 0 | rc = -EINVAL; |
121 | 0 | break; |
122 | 0 | } |
123 | 0 |
|
124 | 0 | ranges[idx].start = parse_size_and_unit(cur = str + !!idx, &str); |
125 | 0 | if ( cur == str ) |
126 | 0 | break; |
127 | 0 |
|
128 | 0 | if ( *str != '-' ) |
129 | 0 | { |
130 | 0 | printk(XENLOG_WARNING "crashkernel: '-' expected\n"); |
131 | 0 | rc = -EINVAL; |
132 | 0 | break; |
133 | 0 | } |
134 | 0 |
|
135 | 0 | if ( *++str != ':' ) |
136 | 0 | { |
137 | 0 | ranges[idx].end = parse_size_and_unit(cur = str, &str); |
138 | 0 | if ( cur == str ) |
139 | 0 | break; |
140 | 0 | if ( ranges[idx].end <= ranges[idx].start ) |
141 | 0 | { |
142 | 0 | printk(XENLOG_WARNING "crashkernel: end <= start\n"); |
143 | 0 | rc = -EINVAL; |
144 | 0 | break; |
145 | 0 | } |
146 | 0 | } |
147 | 0 | else |
148 | 0 | ranges[idx].end = -1; |
149 | 0 |
|
150 | 0 | if ( *str != ':' ) |
151 | 0 | { |
152 | 0 | printk(XENLOG_WARNING "crashkernel: ':' expected\n"); |
153 | 0 | rc = -EINVAL; |
154 | 0 | break; |
155 | 0 | } |
156 | 0 |
|
157 | 0 | ranges[idx].size = parse_size_and_unit(cur = str + 1, &str); |
158 | 0 | if ( cur == str ) |
159 | 0 | break; |
160 | 0 |
|
161 | 0 | ++idx; |
162 | 0 | } while ( *str == ',' ); |
163 | 0 | if ( idx < ARRAY_SIZE(ranges) ) |
164 | 0 | ranges[idx].size = 0; |
165 | 0 | } |
166 | 0 | else |
167 | 0 | kexec_crash_area.size = parse_size_and_unit(cur = str, &str); |
168 | 0 | if ( cur != str ) |
169 | 0 | { |
170 | 0 | if ( *str == '@' ) |
171 | 0 | kexec_crash_area.start = parse_size_and_unit(cur = str + 1, &str); |
172 | 0 | else if ( *str == '<' ) |
173 | 0 | kexec_crash_area_limit = parse_size_and_unit(cur = str + 1, &str); |
174 | 0 | else if ( !strncmp(str, ",below=", 7) ) |
175 | 0 | kexec_crash_area_limit = parse_size_and_unit(cur = str + 7, &str); |
176 | 0 | else |
177 | 0 | { |
178 | 0 | printk(XENLOG_WARNING "crashkernel: '%s' ignored\n", str); |
179 | 0 | rc = -EINVAL; |
180 | 0 | } |
181 | 0 | } |
182 | 0 | if ( cur && cur == str ) |
183 | 0 | { |
184 | 0 | printk(XENLOG_WARNING "crashkernel: memory value expected\n"); |
185 | 0 | rc = -EINVAL; |
186 | 0 | } |
187 | 0 |
|
188 | 0 | return rc; |
189 | 0 | } |
190 | | custom_param("crashkernel", parse_crashkernel); |
191 | | |
192 | | /* Parse command lines in the format: |
193 | | * |
194 | | * low_crashinfo=[none,min,all] |
195 | | * |
196 | | * - none disables the low allocation of crash info. |
197 | | * - min will allocate enough low information for the crash kernel to be able |
198 | | * to extract the hypervisor and dom0 message ring buffers. |
199 | | * - all will allocate additional structures such as domain and vcpu structs |
200 | | * low so the crash kernel can perform an extended analysis of state. |
201 | | */ |
202 | | static int __init parse_low_crashinfo(const char *str) |
203 | 0 | { |
204 | 0 |
|
205 | 0 | if ( !strlen(str) ) |
206 | 0 | /* default to min if user just specifies "low_crashinfo" */ |
207 | 0 | low_crashinfo_mode = LOW_CRASHINFO_MIN; |
208 | 0 | else if ( !strcmp(str, "none" ) ) |
209 | 0 | low_crashinfo_mode = LOW_CRASHINFO_NONE; |
210 | 0 | else if ( !strcmp(str, "min" ) ) |
211 | 0 | low_crashinfo_mode = LOW_CRASHINFO_MIN; |
212 | 0 | else if ( !strcmp(str, "all" ) ) |
213 | 0 | low_crashinfo_mode = LOW_CRASHINFO_ALL; |
214 | 0 | else |
215 | 0 | { |
216 | 0 | printk("Unknown low_crashinfo parameter '%s'. Defaulting to min.\n", str); |
217 | 0 | low_crashinfo_mode = LOW_CRASHINFO_MIN; |
218 | 0 | return -EINVAL; |
219 | 0 | } |
220 | 0 |
|
221 | 0 | return 0; |
222 | 0 | } |
223 | | custom_param("low_crashinfo", parse_low_crashinfo); |
224 | | |
225 | | /* Parse command lines in the format: |
226 | | * |
227 | | * crashinfo_maxaddr=<addr> |
228 | | * |
229 | | * <addr> will be rounded down to the nearest power of two. Defaults to 64G |
230 | | */ |
231 | | static int __init parse_crashinfo_maxaddr(const char *str) |
232 | 0 | { |
233 | 0 | u64 addr; |
234 | 0 | const char *q; |
235 | 0 |
|
236 | 0 | /* if low_crashinfo_mode is unset, default to min. */ |
237 | 0 | if ( low_crashinfo_mode == LOW_CRASHINFO_INVALID ) |
238 | 0 | low_crashinfo_mode = LOW_CRASHINFO_MIN; |
239 | 0 |
|
240 | 0 | if ( (addr = parse_size_and_unit(str, &q)) ) |
241 | 0 | crashinfo_maxaddr = addr; |
242 | 0 | else |
243 | 0 | { |
244 | 0 | printk("Unable to parse crashinfo_maxaddr. Defaulting to %"PRIpaddr"\n", |
245 | 0 | crashinfo_maxaddr); |
246 | 0 | return -EINVAL; |
247 | 0 | } |
248 | 0 |
|
249 | 0 | return *q ? -EINVAL : 0; |
250 | 0 | } |
251 | | custom_param("crashinfo_maxaddr", parse_crashinfo_maxaddr); |
252 | | |
253 | | void __init set_kexec_crash_area_size(u64 system_ram) |
254 | 1 | { |
255 | 1 | unsigned int idx; |
256 | 1 | |
257 | 1 | for ( idx = 0; idx < ARRAY_SIZE(ranges) && !kexec_crash_area.size; ++idx ) |
258 | 1 | { |
259 | 1 | if ( !ranges[idx].size ) |
260 | 1 | break; |
261 | 1 | |
262 | 0 | if ( ranges[idx].size >= system_ram ) |
263 | 0 | { |
264 | 0 | printk(XENLOG_WARNING "crashkernel: invalid size\n"); |
265 | 0 | continue; |
266 | 0 | } |
267 | 0 |
|
268 | 0 | if ( ranges[idx].start <= system_ram && ranges[idx].end > system_ram ) |
269 | 0 | kexec_crash_area.size = ranges[idx].size; |
270 | 0 | } |
271 | 1 | } |
272 | | |
273 | | /* |
274 | | * Only allow one cpu to continue on the crash path, forcing others to spin. |
275 | | * Racing on the crash path from here will end in misery. If we reenter, |
276 | | * something has very gone wrong and retrying will (almost certainly) be |
277 | | * futile. Return up to our nested panic() to try and reboot. |
278 | | * |
279 | | * This is noinline to make it obvious in stack traces which cpus have lost |
280 | | * the race (as opposed to being somewhere in kexec_common_shutdown()) |
281 | | */ |
282 | | static int noinline one_cpu_only(void) |
283 | 0 | { |
284 | 0 | static unsigned int crashing_cpu = -1; |
285 | 0 | unsigned int cpu = smp_processor_id(); |
286 | 0 |
|
287 | 0 | if ( cmpxchg(&crashing_cpu, -1, cpu) != -1 ) |
288 | 0 | { |
289 | 0 | /* Not the first entry into one_cpu_only(). */ |
290 | 0 | if ( crashing_cpu == cpu ) |
291 | 0 | { |
292 | 0 | printk("Reentered the crash path. Something is very broken\n"); |
293 | 0 | return -EBUSY; |
294 | 0 | } |
295 | 0 |
|
296 | 0 | /* |
297 | 0 | * Another cpu has beaten us to this point. Wait here patiently for |
298 | 0 | * it to kill us. |
299 | 0 | */ |
300 | 0 | for ( ; ; ) |
301 | 0 | halt(); |
302 | 0 | } |
303 | 0 |
|
304 | 0 | set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags); |
305 | 0 | printk("Executing kexec image on cpu%u\n", cpu); |
306 | 0 |
|
307 | 0 | return 0; |
308 | 0 | } |
309 | | |
310 | | /* Save the registers in the per-cpu crash note buffer. */ |
311 | | void kexec_crash_save_cpu(void) |
312 | 0 | { |
313 | 0 | int cpu = smp_processor_id(); |
314 | 0 | Elf_Note *note; |
315 | 0 | ELF_Prstatus *prstatus; |
316 | 0 | crash_xen_core_t *xencore; |
317 | 0 |
|
318 | 0 | BUG_ON ( ! crash_notes ); |
319 | 0 |
|
320 | 0 | if ( cpumask_test_and_set_cpu(cpu, &crash_saved_cpus) ) |
321 | 0 | return; |
322 | 0 |
|
323 | 0 | note = crash_notes[cpu].start; |
324 | 0 |
|
325 | 0 | prstatus = (ELF_Prstatus *)ELFNOTE_DESC(note); |
326 | 0 |
|
327 | 0 | note = ELFNOTE_NEXT(note); |
328 | 0 | xencore = (crash_xen_core_t *)ELFNOTE_DESC(note); |
329 | 0 |
|
330 | 0 | elf_core_save_regs(&prstatus->pr_reg, xencore); |
331 | 0 | } |
332 | | |
333 | | /* Set up the single Xen-specific-info crash note. */ |
334 | | crash_xen_info_t *kexec_crash_save_info(void) |
335 | 0 | { |
336 | 0 | int cpu = smp_processor_id(); |
337 | 0 | crash_xen_info_t info; |
338 | 0 | crash_xen_info_t *out = (crash_xen_info_t *)ELFNOTE_DESC(xen_crash_note); |
339 | 0 |
|
340 | 0 | BUG_ON(!cpumask_test_and_set_cpu(cpu, &crash_saved_cpus)); |
341 | 0 |
|
342 | 0 | memset(&info, 0, sizeof(info)); |
343 | 0 | info.xen_major_version = xen_major_version(); |
344 | 0 | info.xen_minor_version = xen_minor_version(); |
345 | 0 | info.xen_extra_version = __pa(xen_extra_version()); |
346 | 0 | info.xen_changeset = __pa(xen_changeset()); |
347 | 0 | info.xen_compiler = __pa(xen_compiler()); |
348 | 0 | info.xen_compile_date = __pa(xen_compile_date()); |
349 | 0 | info.xen_compile_time = __pa(xen_compile_time()); |
350 | 0 | info.tainted = tainted; |
351 | 0 |
|
352 | 0 | /* Copy from guaranteed-aligned local copy to possibly-unaligned dest. */ |
353 | 0 | memcpy(out, &info, sizeof(info)); |
354 | 0 |
|
355 | 0 | return out; |
356 | 0 | } |
357 | | |
358 | | static int kexec_common_shutdown(void) |
359 | 0 | { |
360 | 0 | int ret; |
361 | 0 |
|
362 | 0 | ret = one_cpu_only(); |
363 | 0 | if ( ret ) |
364 | 0 | return ret; |
365 | 0 |
|
366 | 0 | watchdog_disable(); |
367 | 0 | console_start_sync(); |
368 | 0 | spin_debug_disable(); |
369 | 0 | acpi_dmar_reinstate(); |
370 | 0 |
|
371 | 0 | return 0; |
372 | 0 | } |
373 | | |
374 | | void kexec_crash(void) |
375 | 0 | { |
376 | 0 | int pos; |
377 | 0 |
|
378 | 0 | pos = (test_bit(KEXEC_FLAG_CRASH_POS, &kexec_flags) != 0); |
379 | 0 | if ( !test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) ) |
380 | 0 | return; |
381 | 0 |
|
382 | 0 | kexecing = TRUE; |
383 | 0 |
|
384 | 0 | if ( kexec_common_shutdown() != 0 ) |
385 | 0 | return; |
386 | 0 |
|
387 | 0 | kexec_crash_save_cpu(); |
388 | 0 | machine_crash_shutdown(); |
389 | 0 | machine_kexec(kexec_image[KEXEC_IMAGE_CRASH_BASE + pos]); |
390 | 0 |
|
391 | 0 | BUG(); |
392 | 0 | } |
393 | | |
394 | | static long kexec_reboot(void *_image) |
395 | 0 | { |
396 | 0 | struct kexec_image *image = _image; |
397 | 0 |
|
398 | 0 | kexecing = TRUE; |
399 | 0 |
|
400 | 0 | kexec_common_shutdown(); |
401 | 0 | machine_reboot_kexec(image); |
402 | 0 |
|
403 | 0 | BUG(); |
404 | 0 | return 0; |
405 | 0 | } |
406 | | |
407 | | static void do_crashdump_trigger(unsigned char key) |
408 | 0 | { |
409 | 0 | printk("'%c' pressed -> triggering crashdump\n", key); |
410 | 0 | kexec_crash(); |
411 | 0 | printk(" * no crash kernel loaded!\n"); |
412 | 0 | } |
413 | | |
414 | | static void setup_note(Elf_Note *n, const char *name, int type, int descsz) |
415 | 0 | { |
416 | 0 | int l = strlen(name) + 1; |
417 | 0 | strlcpy(ELFNOTE_NAME(n), name, l); |
418 | 0 | n->namesz = l; |
419 | 0 | n->descsz = descsz; |
420 | 0 | n->type = type; |
421 | 0 | } |
422 | | |
423 | | static size_t sizeof_note(const char *name, int descsz) |
424 | 0 | { |
425 | 0 | return (sizeof(Elf_Note) + |
426 | 0 | ELFNOTE_ALIGN(strlen(name)+1) + |
427 | 0 | ELFNOTE_ALIGN(descsz)); |
428 | 0 | } |
429 | | |
430 | | static size_t sizeof_cpu_notes(const unsigned long cpu) |
431 | 0 | { |
432 | 0 | /* All CPUs present a PRSTATUS and crash_xen_core note. */ |
433 | 0 | size_t bytes = |
434 | 0 | + sizeof_note("CORE", sizeof(ELF_Prstatus)) + |
435 | 0 | + sizeof_note("Xen", sizeof(crash_xen_core_t)); |
436 | 0 |
|
437 | 0 | /* CPU0 also presents the crash_xen_info note. */ |
438 | 0 | if ( ! cpu ) |
439 | 0 | bytes = bytes + |
440 | 0 | sizeof_note("Xen", sizeof(crash_xen_info_t)); |
441 | 0 |
|
442 | 0 | return bytes; |
443 | 0 | } |
444 | | |
445 | | /* Allocate size_t bytes of space from the previously allocated |
446 | | * crash heap if the user has requested that crash notes be allocated |
447 | | * in lower memory. There is currently no case where the crash notes |
448 | | * should be free()'d. */ |
449 | | static void * alloc_from_crash_heap(const size_t bytes) |
450 | 0 | { |
451 | 0 | void * ret; |
452 | 0 | if ( crash_heap_current + bytes > crash_heap_end ) |
453 | 0 | return NULL; |
454 | 0 | ret = (void*)crash_heap_current; |
455 | 0 | crash_heap_current += bytes; |
456 | 0 | return ret; |
457 | 0 | } |
458 | | |
459 | | /* Allocate a crash note buffer for a newly onlined cpu. */ |
460 | | static int kexec_init_cpu_notes(const unsigned long cpu) |
461 | 0 | { |
462 | 0 | Elf_Note * note = NULL; |
463 | 0 | int ret = 0; |
464 | 0 | int nr_bytes = 0; |
465 | 0 |
|
466 | 0 | BUG_ON( cpu >= nr_cpu_ids || ! crash_notes ); |
467 | 0 |
|
468 | 0 | /* If already allocated, nothing to do. */ |
469 | 0 | if ( crash_notes[cpu].start ) |
470 | 0 | return ret; |
471 | 0 |
|
472 | 0 | nr_bytes = sizeof_cpu_notes(cpu); |
473 | 0 |
|
474 | 0 | /* If we dont care about the position of allocation, malloc. */ |
475 | 0 | if ( low_crashinfo_mode == LOW_CRASHINFO_NONE ) |
476 | 0 | note = xzalloc_bytes(nr_bytes); |
477 | 0 |
|
478 | 0 | /* Protect the write into crash_notes[] with a spinlock, as this function |
479 | 0 | * is on a hotplug path and a hypercall path. */ |
480 | 0 | spin_lock(&crash_notes_lock); |
481 | 0 |
|
482 | 0 | /* If we are racing with another CPU and it has beaten us, give up |
483 | 0 | * gracefully. */ |
484 | 0 | if ( crash_notes[cpu].start ) |
485 | 0 | { |
486 | 0 | spin_unlock(&crash_notes_lock); |
487 | 0 | /* Always return ok, because whether we successfully allocated or not, |
488 | 0 | * another CPU has successfully allocated. */ |
489 | 0 | xfree(note); |
490 | 0 | } |
491 | 0 | else |
492 | 0 | { |
493 | 0 | /* If we care about memory possition, alloc from the crash heap, |
494 | 0 | * also protected by the crash_notes_lock. */ |
495 | 0 | if ( low_crashinfo_mode > LOW_CRASHINFO_NONE ) |
496 | 0 | note = alloc_from_crash_heap(nr_bytes); |
497 | 0 |
|
498 | 0 | crash_notes[cpu].start = note; |
499 | 0 | crash_notes[cpu].size = nr_bytes; |
500 | 0 | spin_unlock(&crash_notes_lock); |
501 | 0 |
|
502 | 0 | /* If the allocation failed, and another CPU did not beat us, give |
503 | 0 | * up with ENOMEM. */ |
504 | 0 | if ( ! note ) |
505 | 0 | ret = -ENOMEM; |
506 | 0 | /* else all is good so lets set up the notes. */ |
507 | 0 | else |
508 | 0 | { |
509 | 0 | /* Set up CORE note. */ |
510 | 0 | setup_note(note, "CORE", NT_PRSTATUS, sizeof(ELF_Prstatus)); |
511 | 0 | note = ELFNOTE_NEXT(note); |
512 | 0 |
|
513 | 0 | /* Set up Xen CORE note. */ |
514 | 0 | setup_note(note, "Xen", XEN_ELFNOTE_CRASH_REGS, |
515 | 0 | sizeof(crash_xen_core_t)); |
516 | 0 |
|
517 | 0 | if ( ! cpu ) |
518 | 0 | { |
519 | 0 | /* Set up Xen Crash Info note. */ |
520 | 0 | xen_crash_note = note = ELFNOTE_NEXT(note); |
521 | 0 | setup_note(note, "Xen", XEN_ELFNOTE_CRASH_INFO, |
522 | 0 | sizeof(crash_xen_info_t)); |
523 | 0 | } |
524 | 0 | } |
525 | 0 | } |
526 | 0 |
|
527 | 0 | return ret; |
528 | 0 | } |
529 | | |
530 | | static int cpu_callback( |
531 | | struct notifier_block *nfb, unsigned long action, void *hcpu) |
532 | 0 | { |
533 | 0 | unsigned long cpu = (unsigned long)hcpu; |
534 | 0 |
|
535 | 0 | /* Only hook on CPU_UP_PREPARE because once a crash_note has been reported |
536 | 0 | * to dom0, it must keep it around in case of a crash, as the crash kernel |
537 | 0 | * will be hard coded to the original physical address reported. */ |
538 | 0 | switch ( action ) |
539 | 0 | { |
540 | 0 | case CPU_UP_PREPARE: |
541 | 0 | /* Ignore return value. If this boot time, -ENOMEM will cause all |
542 | 0 | * manner of problems elsewhere very soon, and if it is during runtime, |
543 | 0 | * then failing to allocate crash notes is not a good enough reason to |
544 | 0 | * fail the CPU_UP_PREPARE */ |
545 | 0 | kexec_init_cpu_notes(cpu); |
546 | 0 | break; |
547 | 0 | default: |
548 | 0 | break; |
549 | 0 | } |
550 | 0 | return NOTIFY_DONE; |
551 | 0 | } |
552 | | |
553 | | static struct notifier_block cpu_nfb = { |
554 | | .notifier_call = cpu_callback |
555 | | }; |
556 | | |
557 | | void __init kexec_early_calculations(void) |
558 | 1 | { |
559 | 1 | /* If low_crashinfo_mode is still INVALID, neither "low_crashinfo" nor |
560 | 1 | * "crashinfo_maxaddr" have been specified on the command line, so |
561 | 1 | * explicitly set to NONE. */ |
562 | 1 | if ( low_crashinfo_mode == LOW_CRASHINFO_INVALID ) |
563 | 1 | low_crashinfo_mode = LOW_CRASHINFO_NONE; |
564 | 1 | |
565 | 1 | if ( low_crashinfo_mode > LOW_CRASHINFO_NONE ) |
566 | 0 | crashinfo_maxaddr_bits = fls64(crashinfo_maxaddr) - 1; |
567 | 1 | } |
568 | | |
569 | | static int __init kexec_init(void) |
570 | 1 | { |
571 | 1 | void *cpu = (void *)(unsigned long)smp_processor_id(); |
572 | 1 | |
573 | 1 | /* If no crash area, no need to allocate space for notes. */ |
574 | 1 | if ( !kexec_crash_area.size ) |
575 | 1 | return 0; |
576 | 1 | |
577 | 0 | if ( low_crashinfo_mode > LOW_CRASHINFO_NONE ) |
578 | 0 | { |
579 | 0 | size_t crash_heap_size; |
580 | 0 |
|
581 | 0 | /* This calculation is safe even if the machine is booted in |
582 | 0 | * uniprocessor mode. */ |
583 | 0 | crash_heap_size = sizeof_cpu_notes(0) + |
584 | 0 | sizeof_cpu_notes(1) * (nr_cpu_ids - 1); |
585 | 0 | crash_heap_size = PAGE_ALIGN(crash_heap_size); |
586 | 0 |
|
587 | 0 | crash_heap_current = alloc_xenheap_pages( |
588 | 0 | get_order_from_bytes(crash_heap_size), |
589 | 0 | MEMF_bits(crashinfo_maxaddr_bits) ); |
590 | 0 |
|
591 | 0 | if ( ! crash_heap_current ) |
592 | 0 | return -ENOMEM; |
593 | 0 |
|
594 | 0 | memset(crash_heap_current, 0, crash_heap_size); |
595 | 0 |
|
596 | 0 | crash_heap_end = crash_heap_current + crash_heap_size; |
597 | 0 | } |
598 | 0 |
|
599 | 0 | /* crash_notes may be allocated anywhere Xen can reach in memory. |
600 | 0 | Only the individual CPU crash notes themselves must be allocated |
601 | 0 | in lower memory if requested. */ |
602 | 0 | crash_notes = xzalloc_array(crash_note_range_t, nr_cpu_ids); |
603 | 0 | if ( ! crash_notes ) |
604 | 0 | return -ENOMEM; |
605 | 0 |
|
606 | 0 | register_keyhandler('C', do_crashdump_trigger, "trigger a crashdump", 0); |
607 | 0 |
|
608 | 0 | cpu_callback(&cpu_nfb, CPU_UP_PREPARE, cpu); |
609 | 0 | register_cpu_notifier(&cpu_nfb); |
610 | 0 | return 0; |
611 | 0 | } |
612 | | /* The reason for this to be a presmp_initcall as opposed to a regular |
613 | | * __initcall is to allow the setup of the cpu hotplug handler before APs are |
614 | | * brought up. */ |
615 | | presmp_initcall(kexec_init); |
616 | | |
617 | | static int kexec_get_reserve(xen_kexec_range_t *range) |
618 | 0 | { |
619 | 0 | if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) { |
620 | 0 | range->start = kexec_crash_area.start; |
621 | 0 | range->size = kexec_crash_area.size; |
622 | 0 | } |
623 | 0 | else |
624 | 0 | range->start = range->size = 0; |
625 | 0 | return 0; |
626 | 0 | } |
627 | | |
628 | | static int kexec_get_cpu(xen_kexec_range_t *range) |
629 | 0 | { |
630 | 0 | int nr = range->nr; |
631 | 0 |
|
632 | 0 | if ( nr < 0 || nr >= nr_cpu_ids ) |
633 | 0 | return -ERANGE; |
634 | 0 |
|
635 | 0 | if ( ! crash_notes ) |
636 | 0 | return -EINVAL; |
637 | 0 |
|
638 | 0 | /* Try once again to allocate room for the crash notes. It is just possible |
639 | 0 | * that more space has become available since we last tried. If space has |
640 | 0 | * already been allocated, kexec_init_cpu_notes() will return early with 0. |
641 | 0 | */ |
642 | 0 | kexec_init_cpu_notes(nr); |
643 | 0 |
|
644 | 0 | /* In the case of still not having enough memory to allocate buffer room, |
645 | 0 | * returning a range of 0,0 is still valid. */ |
646 | 0 | if ( crash_notes[nr].start ) |
647 | 0 | { |
648 | 0 | range->start = __pa(crash_notes[nr].start); |
649 | 0 | range->size = crash_notes[nr].size; |
650 | 0 | } |
651 | 0 | else |
652 | 0 | range->start = range->size = 0; |
653 | 0 |
|
654 | 0 | return 0; |
655 | 0 | } |
656 | | |
657 | | static int kexec_get_vmcoreinfo(xen_kexec_range_t *range) |
658 | 0 | { |
659 | 0 | range->start = __pa((unsigned long)vmcoreinfo_data); |
660 | 0 | range->size = VMCOREINFO_BYTES; |
661 | 0 | return 0; |
662 | 0 | } |
663 | | |
664 | | static int kexec_get_range_internal(xen_kexec_range_t *range) |
665 | 0 | { |
666 | 0 | int ret = -EINVAL; |
667 | 0 |
|
668 | 0 | switch ( range->range ) |
669 | 0 | { |
670 | 0 | case KEXEC_RANGE_MA_CRASH: |
671 | 0 | ret = kexec_get_reserve(range); |
672 | 0 | break; |
673 | 0 | case KEXEC_RANGE_MA_CPU: |
674 | 0 | ret = kexec_get_cpu(range); |
675 | 0 | break; |
676 | 0 | case KEXEC_RANGE_MA_VMCOREINFO: |
677 | 0 | ret = kexec_get_vmcoreinfo(range); |
678 | 0 | break; |
679 | 0 | default: |
680 | 0 | ret = machine_kexec_get(range); |
681 | 0 | break; |
682 | 0 | } |
683 | 0 |
|
684 | 0 | return ret; |
685 | 0 | } |
686 | | |
687 | | static int kexec_get_range(XEN_GUEST_HANDLE_PARAM(void) uarg) |
688 | 0 | { |
689 | 0 | xen_kexec_range_t range; |
690 | 0 | int ret = -EINVAL; |
691 | 0 |
|
692 | 0 | if ( unlikely(copy_from_guest(&range, uarg, 1)) ) |
693 | 0 | return -EFAULT; |
694 | 0 |
|
695 | 0 | ret = kexec_get_range_internal(&range); |
696 | 0 |
|
697 | 0 | if ( ret == 0 && unlikely(__copy_to_guest(uarg, &range, 1)) ) |
698 | 0 | ret = -EFAULT; |
699 | 0 |
|
700 | 0 | return ret; |
701 | 0 | } |
702 | | |
703 | | static int kexec_get_range_compat(XEN_GUEST_HANDLE_PARAM(void) uarg) |
704 | 0 | { |
705 | 0 | #ifdef CONFIG_COMPAT |
706 | 0 | xen_kexec_range_t range; |
707 | 0 | compat_kexec_range_t compat_range; |
708 | 0 | int ret = -EINVAL; |
709 | 0 |
|
710 | 0 | if ( unlikely(copy_from_guest(&compat_range, uarg, 1)) ) |
711 | 0 | return -EFAULT; |
712 | 0 |
|
713 | 0 | XLAT_kexec_range(&range, &compat_range); |
714 | 0 |
|
715 | 0 | ret = kexec_get_range_internal(&range); |
716 | 0 |
|
717 | 0 | /* Dont silently truncate physical addresses or sizes. */ |
718 | 0 | if ( (range.start | range.size) & ~(unsigned long)(~0u) ) |
719 | 0 | return -ERANGE; |
720 | 0 |
|
721 | 0 | if ( ret == 0 ) |
722 | 0 | { |
723 | 0 | XLAT_kexec_range(&compat_range, &range); |
724 | 0 | if ( unlikely(__copy_to_guest(uarg, &compat_range, 1)) ) |
725 | 0 | ret = -EFAULT; |
726 | 0 | } |
727 | 0 |
|
728 | 0 | return ret; |
729 | 0 | #else /* CONFIG_COMPAT */ |
730 | | return 0; |
731 | | #endif /* CONFIG_COMPAT */ |
732 | 0 | } |
733 | | |
734 | | static int kexec_load_get_bits(int type, int *base, int *bit) |
735 | 0 | { |
736 | 0 | switch ( type ) |
737 | 0 | { |
738 | 0 | case KEXEC_TYPE_DEFAULT: |
739 | 0 | *base = KEXEC_IMAGE_DEFAULT_BASE; |
740 | 0 | *bit = KEXEC_FLAG_DEFAULT_POS; |
741 | 0 | break; |
742 | 0 | case KEXEC_TYPE_CRASH: |
743 | 0 | *base = KEXEC_IMAGE_CRASH_BASE; |
744 | 0 | *bit = KEXEC_FLAG_CRASH_POS; |
745 | 0 | break; |
746 | 0 | default: |
747 | 0 | return -1; |
748 | 0 | } |
749 | 0 | return 0; |
750 | 0 | } |
751 | | |
752 | | void vmcoreinfo_append_str(const char *fmt, ...) |
753 | 0 | { |
754 | 0 | va_list args; |
755 | 0 | char buf[0x50]; |
756 | 0 | int r; |
757 | 0 | size_t note_size = sizeof(Elf_Note) + ELFNOTE_ALIGN(strlen(VMCOREINFO_NOTE_NAME) + 1); |
758 | 0 |
|
759 | 0 | if (vmcoreinfo_size + note_size + sizeof(buf) > VMCOREINFO_BYTES) |
760 | 0 | return; |
761 | 0 |
|
762 | 0 | va_start(args, fmt); |
763 | 0 | r = vsnprintf(buf, sizeof(buf), fmt, args); |
764 | 0 | va_end(args); |
765 | 0 |
|
766 | 0 | memcpy(&vmcoreinfo_data[note_size + vmcoreinfo_size], buf, r); |
767 | 0 |
|
768 | 0 | vmcoreinfo_size += r; |
769 | 0 | } |
770 | | |
771 | | static void crash_save_vmcoreinfo(void) |
772 | 0 | { |
773 | 0 | size_t data_size; |
774 | 0 |
|
775 | 0 | if (vmcoreinfo_size > 0) /* already saved */ |
776 | 0 | return; |
777 | 0 |
|
778 | 0 | data_size = VMCOREINFO_BYTES - (sizeof(Elf_Note) + ELFNOTE_ALIGN(strlen(VMCOREINFO_NOTE_NAME) + 1)); |
779 | 0 | setup_note((Elf_Note *)vmcoreinfo_data, VMCOREINFO_NOTE_NAME, 0, data_size); |
780 | 0 |
|
781 | 0 | VMCOREINFO_PAGESIZE(PAGE_SIZE); |
782 | 0 |
|
783 | 0 | VMCOREINFO_SYMBOL(domain_list); |
784 | 0 | #ifndef frame_table |
785 | | VMCOREINFO_SYMBOL(frame_table); |
786 | | #else |
787 | 0 | { |
788 | 0 | static const void *const _frame_table = frame_table; |
789 | 0 | VMCOREINFO_SYMBOL_ALIAS(frame_table, _frame_table); |
790 | 0 | } |
791 | 0 | #endif |
792 | 0 | VMCOREINFO_SYMBOL(max_page); |
793 | 0 |
|
794 | 0 | VMCOREINFO_STRUCT_SIZE(page_info); |
795 | 0 | VMCOREINFO_STRUCT_SIZE(domain); |
796 | 0 |
|
797 | 0 | VMCOREINFO_OFFSET(page_info, count_info); |
798 | 0 | VMCOREINFO_OFFSET_SUB(page_info, v.inuse, _domain); |
799 | 0 | VMCOREINFO_OFFSET(domain, domain_id); |
800 | 0 | VMCOREINFO_OFFSET(domain, next_in_list); |
801 | 0 |
|
802 | 0 | #ifdef ARCH_CRASH_SAVE_VMCOREINFO |
803 | 0 | arch_crash_save_vmcoreinfo(); |
804 | 0 | #endif |
805 | 0 | } |
806 | | |
807 | | static void kexec_unload_image(struct kexec_image *image) |
808 | 0 | { |
809 | 0 | if ( !image ) |
810 | 0 | return; |
811 | 0 |
|
812 | 0 | machine_kexec_unload(image); |
813 | 0 | kimage_free(image); |
814 | 0 | } |
815 | | |
816 | | static int kexec_exec(XEN_GUEST_HANDLE_PARAM(void) uarg) |
817 | 0 | { |
818 | 0 | xen_kexec_exec_t exec; |
819 | 0 | struct kexec_image *image; |
820 | 0 | int base, bit, pos, ret = -EINVAL; |
821 | 0 |
|
822 | 0 | if ( unlikely(copy_from_guest(&exec, uarg, 1)) ) |
823 | 0 | return -EFAULT; |
824 | 0 |
|
825 | 0 | if ( kexec_load_get_bits(exec.type, &base, &bit) ) |
826 | 0 | return -EINVAL; |
827 | 0 |
|
828 | 0 | pos = (test_bit(bit, &kexec_flags) != 0); |
829 | 0 |
|
830 | 0 | /* Only allow kexec/kdump into loaded images */ |
831 | 0 | if ( !test_bit(base + pos, &kexec_flags) ) |
832 | 0 | return -ENOENT; |
833 | 0 |
|
834 | 0 | switch (exec.type) |
835 | 0 | { |
836 | 0 | case KEXEC_TYPE_DEFAULT: |
837 | 0 | image = kexec_image[base + pos]; |
838 | 0 | ret = continue_hypercall_on_cpu(0, kexec_reboot, image); |
839 | 0 | break; |
840 | 0 | case KEXEC_TYPE_CRASH: |
841 | 0 | kexec_crash(); /* Does not return */ |
842 | 0 | break; |
843 | 0 | } |
844 | 0 |
|
845 | 0 | return -EINVAL; /* never reached */ |
846 | 0 | } |
847 | | |
848 | | static int kexec_swap_images(int type, struct kexec_image *new, |
849 | | struct kexec_image **old) |
850 | 0 | { |
851 | 0 | int base, bit, pos; |
852 | 0 | int new_slot, old_slot; |
853 | 0 |
|
854 | 0 | *old = NULL; |
855 | 0 |
|
856 | 0 | if ( test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) ) |
857 | 0 | return -EBUSY; |
858 | 0 |
|
859 | 0 | if ( kexec_load_get_bits(type, &base, &bit) ) |
860 | 0 | return -EINVAL; |
861 | 0 |
|
862 | 0 | ASSERT(test_bit(KEXEC_FLAG_IN_HYPERCALL, &kexec_flags)); |
863 | 0 |
|
864 | 0 | pos = (test_bit(bit, &kexec_flags) != 0); |
865 | 0 | old_slot = base + pos; |
866 | 0 | new_slot = base + !pos; |
867 | 0 |
|
868 | 0 | kexec_image[new_slot] = new; |
869 | 0 | if ( new ) |
870 | 0 | set_bit(new_slot, &kexec_flags); |
871 | 0 | change_bit(bit, &kexec_flags); |
872 | 0 |
|
873 | 0 | clear_bit(old_slot, &kexec_flags); |
874 | 0 | *old = kexec_image[old_slot]; |
875 | 0 |
|
876 | 0 | return 0; |
877 | 0 | } |
878 | | |
879 | | static int kexec_load_slot(struct kexec_image *kimage) |
880 | 0 | { |
881 | 0 | struct kexec_image *old_kimage; |
882 | 0 | int ret = -ENOMEM; |
883 | 0 |
|
884 | 0 | ret = machine_kexec_load(kimage); |
885 | 0 | if ( ret < 0 ) |
886 | 0 | return ret; |
887 | 0 |
|
888 | 0 | crash_save_vmcoreinfo(); |
889 | 0 |
|
890 | 0 | ret = kexec_swap_images(kimage->type, kimage, &old_kimage); |
891 | 0 | if ( ret < 0 ) |
892 | 0 | return ret; |
893 | 0 |
|
894 | 0 | kexec_unload_image(old_kimage); |
895 | 0 |
|
896 | 0 | return 0; |
897 | 0 | } |
898 | | |
899 | | static uint16_t kexec_load_v1_arch(void) |
900 | 0 | { |
901 | 0 | #ifdef CONFIG_X86 |
902 | 0 | return is_pv_32bit_domain(hardware_domain) ? EM_386 : EM_X86_64; |
903 | 0 | #else |
904 | | return EM_NONE; |
905 | | #endif |
906 | 0 | } |
907 | | |
908 | | static int kexec_segments_add_segment(unsigned int *nr_segments, |
909 | | xen_kexec_segment_t *segments, |
910 | | mfn_t mfn) |
911 | 0 | { |
912 | 0 | paddr_t maddr = mfn_to_maddr(mfn); |
913 | 0 | unsigned int n = *nr_segments; |
914 | 0 |
|
915 | 0 | /* Need a new segment? */ |
916 | 0 | if ( n == 0 |
917 | 0 | || segments[n-1].dest_maddr + segments[n-1].dest_size != maddr ) |
918 | 0 | { |
919 | 0 | n++; |
920 | 0 | if ( n > KEXEC_SEGMENT_MAX ) |
921 | 0 | return -EINVAL; |
922 | 0 | *nr_segments = n; |
923 | 0 |
|
924 | 0 | set_xen_guest_handle(segments[n-1].buf.h, NULL); |
925 | 0 | segments[n-1].buf_size = 0; |
926 | 0 | segments[n-1].dest_maddr = maddr; |
927 | 0 | segments[n-1].dest_size = 0; |
928 | 0 | } |
929 | 0 |
|
930 | 0 | return 0; |
931 | 0 | } |
932 | | |
933 | | static int kexec_segments_from_ind_page(mfn_t mfn, |
934 | | unsigned int *nr_segments, |
935 | | xen_kexec_segment_t *segments, |
936 | | bool_t compat) |
937 | 0 | { |
938 | 0 | void *page; |
939 | 0 | kimage_entry_t *entry; |
940 | 0 | int ret = 0; |
941 | 0 |
|
942 | 0 | page = map_domain_page(mfn); |
943 | 0 |
|
944 | 0 | /* |
945 | 0 | * Walk the indirection page list, adding destination pages to the |
946 | 0 | * segments. |
947 | 0 | */ |
948 | 0 | for ( entry = page; ; ) |
949 | 0 | { |
950 | 0 | unsigned long ind; |
951 | 0 |
|
952 | 0 | ind = kimage_entry_ind(entry, compat); |
953 | 0 | mfn = kimage_entry_mfn(entry, compat); |
954 | 0 |
|
955 | 0 | switch ( ind ) |
956 | 0 | { |
957 | 0 | case IND_DESTINATION: |
958 | 0 | ret = kexec_segments_add_segment(nr_segments, segments, mfn); |
959 | 0 | if ( ret < 0 ) |
960 | 0 | goto done; |
961 | 0 | break; |
962 | 0 | case IND_INDIRECTION: |
963 | 0 | unmap_domain_page(page); |
964 | 0 | entry = page = map_domain_page(mfn); |
965 | 0 | continue; |
966 | 0 | case IND_DONE: |
967 | 0 | goto done; |
968 | 0 | case IND_SOURCE: |
969 | 0 | if ( *nr_segments == 0 ) |
970 | 0 | { |
971 | 0 | ret = -EINVAL; |
972 | 0 | goto done; |
973 | 0 | } |
974 | 0 | segments[*nr_segments-1].dest_size += PAGE_SIZE; |
975 | 0 | break; |
976 | 0 | default: |
977 | 0 | ret = -EINVAL; |
978 | 0 | goto done; |
979 | 0 | } |
980 | 0 | entry = kimage_entry_next(entry, compat); |
981 | 0 | } |
982 | 0 | done: |
983 | 0 | unmap_domain_page(page); |
984 | 0 | return ret; |
985 | 0 | } |
986 | | |
987 | | static int kexec_do_load_v1(xen_kexec_load_v1_t *load, int compat) |
988 | 0 | { |
989 | 0 | struct kexec_image *kimage = NULL; |
990 | 0 | xen_kexec_segment_t *segments; |
991 | 0 | uint16_t arch; |
992 | 0 | unsigned int nr_segments = 0; |
993 | 0 | mfn_t ind_mfn = maddr_to_mfn(load->image.indirection_page); |
994 | 0 | int ret; |
995 | 0 |
|
996 | 0 | arch = kexec_load_v1_arch(); |
997 | 0 | if ( arch == EM_NONE ) |
998 | 0 | return -ENOSYS; |
999 | 0 |
|
1000 | 0 | segments = xmalloc_array(xen_kexec_segment_t, KEXEC_SEGMENT_MAX); |
1001 | 0 | if ( segments == NULL ) |
1002 | 0 | return -ENOMEM; |
1003 | 0 |
|
1004 | 0 | /* |
1005 | 0 | * Work out the image segments (destination only) from the |
1006 | 0 | * indirection pages. |
1007 | 0 | * |
1008 | 0 | * This is needed so we don't allocate pages that will overlap |
1009 | 0 | * with the destination when building the new set of indirection |
1010 | 0 | * pages below. |
1011 | 0 | */ |
1012 | 0 | ret = kexec_segments_from_ind_page(ind_mfn, &nr_segments, segments, compat); |
1013 | 0 | if ( ret < 0 ) |
1014 | 0 | goto error; |
1015 | 0 |
|
1016 | 0 | ret = kimage_alloc(&kimage, load->type, arch, load->image.start_address, |
1017 | 0 | nr_segments, segments); |
1018 | 0 | if ( ret < 0 ) |
1019 | 0 | goto error; |
1020 | 0 |
|
1021 | 0 | /* |
1022 | 0 | * Build a new set of indirection pages in the native format. |
1023 | 0 | * |
1024 | 0 | * This walks the guest provided indirection pages a second time. |
1025 | 0 | * The guest could have altered then, invalidating the segment |
1026 | 0 | * information constructed above. This will only result in the |
1027 | 0 | * resulting image being potentially unrelocatable. |
1028 | 0 | */ |
1029 | 0 | ret = kimage_build_ind(kimage, ind_mfn, compat); |
1030 | 0 | if ( ret < 0 ) |
1031 | 0 | goto error; |
1032 | 0 |
|
1033 | 0 | if ( arch == EM_386 || arch == EM_X86_64 ) |
1034 | 0 | { |
1035 | 0 | /* |
1036 | 0 | * Ensure 0 - 1 MiB is mapped and accessible by the image. |
1037 | 0 | * |
1038 | 0 | * This allows access to VGA memory and the region purgatory copies |
1039 | 0 | * in the crash case. |
1040 | 0 | */ |
1041 | 0 | unsigned long addr; |
1042 | 0 |
|
1043 | 0 | for ( addr = 0; addr < MB(1); addr += PAGE_SIZE ) |
1044 | 0 | { |
1045 | 0 | ret = machine_kexec_add_page(kimage, addr, addr); |
1046 | 0 | if ( ret < 0 ) |
1047 | 0 | goto error; |
1048 | 0 | } |
1049 | 0 | } |
1050 | 0 |
|
1051 | 0 | ret = kexec_load_slot(kimage); |
1052 | 0 | if ( ret < 0 ) |
1053 | 0 | goto error; |
1054 | 0 |
|
1055 | 0 | return 0; |
1056 | 0 |
|
1057 | 0 | error: |
1058 | 0 | if ( !kimage ) |
1059 | 0 | xfree(segments); |
1060 | 0 | kimage_free(kimage); |
1061 | 0 | return ret; |
1062 | 0 | } |
1063 | | |
1064 | | static int kexec_load_v1(XEN_GUEST_HANDLE_PARAM(void) uarg) |
1065 | 0 | { |
1066 | 0 | xen_kexec_load_v1_t load; |
1067 | 0 |
|
1068 | 0 | if ( unlikely(copy_from_guest(&load, uarg, 1)) ) |
1069 | 0 | return -EFAULT; |
1070 | 0 |
|
1071 | 0 | return kexec_do_load_v1(&load, 0); |
1072 | 0 | } |
1073 | | |
1074 | | static int kexec_load_v1_compat(XEN_GUEST_HANDLE_PARAM(void) uarg) |
1075 | 0 | { |
1076 | 0 | #ifdef CONFIG_COMPAT |
1077 | 0 | compat_kexec_load_v1_t compat_load; |
1078 | 0 | xen_kexec_load_v1_t load; |
1079 | 0 |
|
1080 | 0 | if ( unlikely(copy_from_guest(&compat_load, uarg, 1)) ) |
1081 | 0 | return -EFAULT; |
1082 | 0 |
|
1083 | 0 | /* This is a bit dodgy, load.image is inside load, |
1084 | 0 | * but XLAT_kexec_load (which is automatically generated) |
1085 | 0 | * doesn't translate load.image (correctly) |
1086 | 0 | * Just copy load->type, the only other member, manually instead. |
1087 | 0 | * |
1088 | 0 | * XLAT_kexec_load(&load, &compat_load); |
1089 | 0 | */ |
1090 | 0 | load.type = compat_load.type; |
1091 | 0 | XLAT_kexec_image(&load.image, &compat_load.image); |
1092 | 0 |
|
1093 | 0 | return kexec_do_load_v1(&load, 1); |
1094 | 0 | #else |
1095 | | return 0; |
1096 | | #endif |
1097 | 0 | } |
1098 | | |
1099 | | static int kexec_load(XEN_GUEST_HANDLE_PARAM(void) uarg) |
1100 | 0 | { |
1101 | 0 | xen_kexec_load_t load; |
1102 | 0 | xen_kexec_segment_t *segments; |
1103 | 0 | struct kexec_image *kimage = NULL; |
1104 | 0 | int ret; |
1105 | 0 |
|
1106 | 0 | if ( copy_from_guest(&load, uarg, 1) ) |
1107 | 0 | return -EFAULT; |
1108 | 0 |
|
1109 | 0 | if ( load.nr_segments >= KEXEC_SEGMENT_MAX ) |
1110 | 0 | return -EINVAL; |
1111 | 0 |
|
1112 | 0 | segments = xmalloc_array(xen_kexec_segment_t, load.nr_segments); |
1113 | 0 | if ( segments == NULL ) |
1114 | 0 | return -ENOMEM; |
1115 | 0 |
|
1116 | 0 | if ( copy_from_guest(segments, load.segments.h, load.nr_segments) ) |
1117 | 0 | { |
1118 | 0 | ret = -EFAULT; |
1119 | 0 | goto error; |
1120 | 0 | } |
1121 | 0 |
|
1122 | 0 | ret = kimage_alloc(&kimage, load.type, load.arch, load.entry_maddr, |
1123 | 0 | load.nr_segments, segments); |
1124 | 0 | if ( ret < 0 ) |
1125 | 0 | goto error; |
1126 | 0 |
|
1127 | 0 | ret = kimage_load_segments(kimage); |
1128 | 0 | if ( ret < 0 ) |
1129 | 0 | goto error; |
1130 | 0 |
|
1131 | 0 | ret = kexec_load_slot(kimage); |
1132 | 0 | if ( ret < 0 ) |
1133 | 0 | goto error; |
1134 | 0 |
|
1135 | 0 | return 0; |
1136 | 0 |
|
1137 | 0 | error: |
1138 | 0 | if ( ! kimage ) |
1139 | 0 | xfree(segments); |
1140 | 0 | kimage_free(kimage); |
1141 | 0 | return ret; |
1142 | 0 | } |
1143 | | |
1144 | | static int kexec_do_unload(xen_kexec_unload_t *unload) |
1145 | 0 | { |
1146 | 0 | struct kexec_image *old_kimage; |
1147 | 0 | int ret; |
1148 | 0 |
|
1149 | 0 | ret = kexec_swap_images(unload->type, NULL, &old_kimage); |
1150 | 0 | if ( ret < 0 ) |
1151 | 0 | return ret; |
1152 | 0 |
|
1153 | 0 | kexec_unload_image(old_kimage); |
1154 | 0 |
|
1155 | 0 | return 0; |
1156 | 0 | } |
1157 | | |
1158 | | static int kexec_unload_v1(XEN_GUEST_HANDLE_PARAM(void) uarg) |
1159 | 0 | { |
1160 | 0 | xen_kexec_load_v1_t load; |
1161 | 0 | xen_kexec_unload_t unload; |
1162 | 0 |
|
1163 | 0 | if ( copy_from_guest(&load, uarg, 1) ) |
1164 | 0 | return -EFAULT; |
1165 | 0 |
|
1166 | 0 | unload.type = load.type; |
1167 | 0 | return kexec_do_unload(&unload); |
1168 | 0 | } |
1169 | | |
1170 | | static int kexec_unload_v1_compat(XEN_GUEST_HANDLE_PARAM(void) uarg) |
1171 | 0 | { |
1172 | 0 | #ifdef CONFIG_COMPAT |
1173 | 0 | compat_kexec_load_v1_t compat_load; |
1174 | 0 | xen_kexec_unload_t unload; |
1175 | 0 |
|
1176 | 0 | if ( copy_from_guest(&compat_load, uarg, 1) ) |
1177 | 0 | return -EFAULT; |
1178 | 0 |
|
1179 | 0 | unload.type = compat_load.type; |
1180 | 0 | return kexec_do_unload(&unload); |
1181 | 0 | #else |
1182 | | return 0; |
1183 | | #endif |
1184 | 0 | } |
1185 | | |
1186 | | static int kexec_unload(XEN_GUEST_HANDLE_PARAM(void) uarg) |
1187 | 0 | { |
1188 | 0 | xen_kexec_unload_t unload; |
1189 | 0 |
|
1190 | 0 | if ( unlikely(copy_from_guest(&unload, uarg, 1)) ) |
1191 | 0 | return -EFAULT; |
1192 | 0 |
|
1193 | 0 | return kexec_do_unload(&unload); |
1194 | 0 | } |
1195 | | |
1196 | | static int kexec_status(XEN_GUEST_HANDLE_PARAM(void) uarg) |
1197 | 0 | { |
1198 | 0 | xen_kexec_status_t status; |
1199 | 0 | int base, bit; |
1200 | 0 |
|
1201 | 0 | if ( unlikely(copy_from_guest(&status, uarg, 1)) ) |
1202 | 0 | return -EFAULT; |
1203 | 0 |
|
1204 | 0 | /* No need to check KEXEC_FLAG_IN_PROGRESS. */ |
1205 | 0 |
|
1206 | 0 | if ( kexec_load_get_bits(status.type, &base, &bit) ) |
1207 | 0 | return -EINVAL; |
1208 | 0 |
|
1209 | 0 | return !!test_bit(bit, &kexec_flags); |
1210 | 0 | } |
1211 | | |
1212 | | static int do_kexec_op_internal(unsigned long op, |
1213 | | XEN_GUEST_HANDLE_PARAM(void) uarg, |
1214 | | bool_t compat) |
1215 | 0 | { |
1216 | 0 | int ret = -EINVAL; |
1217 | 0 |
|
1218 | 0 | ret = xsm_kexec(XSM_PRIV); |
1219 | 0 | if ( ret ) |
1220 | 0 | return ret; |
1221 | 0 |
|
1222 | 0 | if ( test_and_set_bit(KEXEC_FLAG_IN_HYPERCALL, &kexec_flags) ) |
1223 | 0 | return hypercall_create_continuation(__HYPERVISOR_kexec_op, "lh", op, uarg); |
1224 | 0 |
|
1225 | 0 | switch ( op ) |
1226 | 0 | { |
1227 | 0 | case KEXEC_CMD_kexec_get_range: |
1228 | 0 | if (compat) |
1229 | 0 | ret = kexec_get_range_compat(uarg); |
1230 | 0 | else |
1231 | 0 | ret = kexec_get_range(uarg); |
1232 | 0 | break; |
1233 | 0 | case KEXEC_CMD_kexec_load_v1: |
1234 | 0 | if ( compat ) |
1235 | 0 | ret = kexec_load_v1_compat(uarg); |
1236 | 0 | else |
1237 | 0 | ret = kexec_load_v1(uarg); |
1238 | 0 | break; |
1239 | 0 | case KEXEC_CMD_kexec_unload_v1: |
1240 | 0 | if ( compat ) |
1241 | 0 | ret = kexec_unload_v1_compat(uarg); |
1242 | 0 | else |
1243 | 0 | ret = kexec_unload_v1(uarg); |
1244 | 0 | break; |
1245 | 0 | case KEXEC_CMD_kexec: |
1246 | 0 | ret = kexec_exec(uarg); |
1247 | 0 | break; |
1248 | 0 | case KEXEC_CMD_kexec_load: |
1249 | 0 | ret = kexec_load(uarg); |
1250 | 0 | break; |
1251 | 0 | case KEXEC_CMD_kexec_unload: |
1252 | 0 | ret = kexec_unload(uarg); |
1253 | 0 | break; |
1254 | 0 | case KEXEC_CMD_kexec_status: |
1255 | 0 | ret = kexec_status(uarg); |
1256 | 0 | break; |
1257 | 0 | } |
1258 | 0 |
|
1259 | 0 | clear_bit(KEXEC_FLAG_IN_HYPERCALL, &kexec_flags); |
1260 | 0 |
|
1261 | 0 | return ret; |
1262 | 0 | } |
1263 | | |
1264 | | long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) uarg) |
1265 | 0 | { |
1266 | 0 | return do_kexec_op_internal(op, uarg, 0); |
1267 | 0 | } |
1268 | | |
1269 | | #ifdef CONFIG_COMPAT |
1270 | | int compat_kexec_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) uarg) |
1271 | 0 | { |
1272 | 0 | return do_kexec_op_internal(op, uarg, 1); |
1273 | 0 | } |
1274 | | #endif |
1275 | | |
1276 | | /* |
1277 | | * Local variables: |
1278 | | * mode: C |
1279 | | * c-file-style: "BSD" |
1280 | | * c-basic-offset: 4 |
1281 | | * tab-width: 4 |
1282 | | * indent-tabs-mode: nil |
1283 | | * End: |
1284 | | */ |