debuggers.hg

view xen/common/kexec.c @ 19964:3952eaeb70b0

Introduce and use a per-CPU read-mostly sub-section

Since mixing data that only gets setup once and then (perhaps
frequently) gets read by remote CPUs with data that the local CPU may
modify (again, perhaps frequently) still causes undesirable cache
protocol related bus traffic, separate the former class of objects
from the latter.

These objects converted here are just picked based on their write-once
(or write-very-rarely) properties; perhaps some more adjustments may
be desirable subsequently. The primary users of the new sub-section
will result from the next patch.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Mon Jul 13 11:32:41 2009 +0100 (2009-07-13)
parents d6c1d7992f43
children 68e8b8379244
line source
1 /******************************************************************************
2 * kexec.c - Achitecture independent kexec code for Xen
3 *
4 * Xen port written by:
5 * - Simon 'Horms' Horman <horms@verge.net.au>
6 * - Magnus Damm <magnus@valinux.co.jp>
7 */
9 #include <xen/lib.h>
10 #include <xen/ctype.h>
11 #include <xen/errno.h>
12 #include <xen/guest_access.h>
13 #include <xen/sched.h>
14 #include <xen/types.h>
15 #include <xen/kexec.h>
16 #include <xen/keyhandler.h>
17 #include <public/kexec.h>
18 #include <xen/cpumask.h>
19 #include <asm/atomic.h>
20 #include <xen/spinlock.h>
21 #include <xen/version.h>
22 #include <xen/console.h>
23 #include <xen/kexec.h>
24 #include <public/elfnote.h>
25 #include <xsm/xsm.h>
26 #ifdef CONFIG_COMPAT
27 #include <compat/kexec.h>
28 #endif
30 static DEFINE_PER_CPU_READ_MOSTLY(void *, crash_notes);
32 static Elf_Note *xen_crash_note;
34 static cpumask_t crash_saved_cpus;
36 static xen_kexec_image_t kexec_image[KEXEC_IMAGE_NR];
38 #define KEXEC_FLAG_DEFAULT_POS (KEXEC_IMAGE_NR + 0)
39 #define KEXEC_FLAG_CRASH_POS (KEXEC_IMAGE_NR + 1)
40 #define KEXEC_FLAG_IN_PROGRESS (KEXEC_IMAGE_NR + 2)
42 static unsigned long kexec_flags = 0; /* the lowest bits are for KEXEC_IMAGE... */
44 static spinlock_t kexec_lock = SPIN_LOCK_UNLOCKED;
46 static unsigned char vmcoreinfo_data[VMCOREINFO_BYTES];
47 static size_t vmcoreinfo_size = 0;
49 xen_kexec_reserve_t kexec_crash_area;
51 static void __init parse_crashkernel(const char *str)
52 {
53 kexec_crash_area.size = parse_size_and_unit(str, &str);
54 if ( *str == '@' )
55 kexec_crash_area.start = parse_size_and_unit(str+1, NULL);
56 }
57 custom_param("crashkernel", parse_crashkernel);
59 static void one_cpu_only(void)
60 {
61 /* Only allow the first cpu to continue - force other cpus to spin */
62 if ( test_and_set_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
63 for ( ; ; ) ;
64 }
66 /* Save the registers in the per-cpu crash note buffer. */
67 void kexec_crash_save_cpu(void)
68 {
69 int cpu = smp_processor_id();
70 Elf_Note *note = per_cpu(crash_notes, cpu);
71 ELF_Prstatus *prstatus;
72 crash_xen_core_t *xencore;
74 if ( cpu_test_and_set(cpu, crash_saved_cpus) )
75 return;
77 prstatus = (ELF_Prstatus *)ELFNOTE_DESC(note);
79 note = ELFNOTE_NEXT(note);
80 xencore = (crash_xen_core_t *)ELFNOTE_DESC(note);
82 elf_core_save_regs(&prstatus->pr_reg, xencore);
83 }
85 /* Set up the single Xen-specific-info crash note. */
86 crash_xen_info_t *kexec_crash_save_info(void)
87 {
88 int cpu = smp_processor_id();
89 crash_xen_info_t info;
90 crash_xen_info_t *out = (crash_xen_info_t *)ELFNOTE_DESC(xen_crash_note);
92 BUG_ON(!cpu_test_and_set(cpu, crash_saved_cpus));
94 memset(&info, 0, sizeof(info));
95 info.xen_major_version = xen_major_version();
96 info.xen_minor_version = xen_minor_version();
97 info.xen_extra_version = __pa(xen_extra_version());
98 info.xen_changeset = __pa(xen_changeset());
99 info.xen_compiler = __pa(xen_compiler());
100 info.xen_compile_date = __pa(xen_compile_date());
101 info.xen_compile_time = __pa(xen_compile_time());
102 info.tainted = tainted;
104 /* Copy from guaranteed-aligned local copy to possibly-unaligned dest. */
105 memcpy(out, &info, sizeof(info));
107 return out;
108 }
110 void kexec_crash(void)
111 {
112 int pos;
114 pos = (test_bit(KEXEC_FLAG_CRASH_POS, &kexec_flags) != 0);
115 if ( !test_bit(KEXEC_IMAGE_CRASH_BASE + pos, &kexec_flags) )
116 return;
118 console_start_sync();
120 one_cpu_only();
121 kexec_crash_save_cpu();
122 machine_crash_shutdown();
124 machine_kexec(&kexec_image[KEXEC_IMAGE_CRASH_BASE + pos]);
126 BUG();
127 }
129 static void do_crashdump_trigger(unsigned char key)
130 {
131 printk("'%c' pressed -> triggering crashdump\n", key);
132 kexec_crash();
133 printk(" * no crash kernel loaded!\n");
134 }
136 static __init int register_crashdump_trigger(void)
137 {
138 register_keyhandler('C', do_crashdump_trigger, "trigger a crashdump");
139 return 0;
140 }
141 __initcall(register_crashdump_trigger);
143 static void setup_note(Elf_Note *n, const char *name, int type, int descsz)
144 {
145 int l = strlen(name) + 1;
146 strlcpy(ELFNOTE_NAME(n), name, l);
147 n->namesz = l;
148 n->descsz = descsz;
149 n->type = type;
150 }
152 static int sizeof_note(const char *name, int descsz)
153 {
154 return (sizeof(Elf_Note) +
155 ELFNOTE_ALIGN(strlen(name)+1) +
156 ELFNOTE_ALIGN(descsz));
157 }
159 static int kexec_get_reserve(xen_kexec_range_t *range)
160 {
161 if ( kexec_crash_area.size > 0 && kexec_crash_area.start > 0) {
162 range->start = kexec_crash_area.start;
163 range->size = kexec_crash_area.size;
164 }
165 else
166 range->start = range->size = 0;
167 return 0;
168 }
170 static int kexec_get_cpu(xen_kexec_range_t *range)
171 {
172 int nr = range->nr;
173 int nr_bytes = 0;
175 if ( nr < 0 || nr >= num_present_cpus() )
176 return -EINVAL;
178 nr_bytes += sizeof_note("CORE", sizeof(ELF_Prstatus));
179 nr_bytes += sizeof_note("Xen", sizeof(crash_xen_core_t));
181 /* The Xen info note is included in CPU0's range. */
182 if ( nr == 0 )
183 nr_bytes += sizeof_note("Xen", sizeof(crash_xen_info_t));
185 if ( per_cpu(crash_notes, nr) == NULL )
186 {
187 Elf_Note *note;
189 note = per_cpu(crash_notes, nr) = xmalloc_bytes(nr_bytes);
191 if ( note == NULL )
192 return -ENOMEM;
194 /* Setup CORE note. */
195 setup_note(note, "CORE", NT_PRSTATUS, sizeof(ELF_Prstatus));
197 /* Setup Xen CORE note. */
198 note = ELFNOTE_NEXT(note);
199 setup_note(note, "Xen", XEN_ELFNOTE_CRASH_REGS, sizeof(crash_xen_core_t));
201 if (nr == 0)
202 {
203 /* Setup system wide Xen info note. */
204 xen_crash_note = note = ELFNOTE_NEXT(note);
205 setup_note(note, "Xen", XEN_ELFNOTE_CRASH_INFO, sizeof(crash_xen_info_t));
206 }
207 }
209 range->start = __pa((unsigned long)per_cpu(crash_notes, nr));
210 range->size = nr_bytes;
211 return 0;
212 }
214 static int kexec_get_vmcoreinfo(xen_kexec_range_t *range)
215 {
216 range->start = __pa((unsigned long)vmcoreinfo_data);
217 range->size = VMCOREINFO_BYTES;
218 return 0;
219 }
221 static int kexec_get_range_internal(xen_kexec_range_t *range)
222 {
223 int ret = -EINVAL;
225 switch ( range->range )
226 {
227 case KEXEC_RANGE_MA_CRASH:
228 ret = kexec_get_reserve(range);
229 break;
230 case KEXEC_RANGE_MA_CPU:
231 ret = kexec_get_cpu(range);
232 break;
233 case KEXEC_RANGE_MA_VMCOREINFO:
234 ret = kexec_get_vmcoreinfo(range);
235 break;
236 default:
237 ret = machine_kexec_get(range);
238 break;
239 }
241 return ret;
242 }
244 static int kexec_get_range(XEN_GUEST_HANDLE(void) uarg)
245 {
246 xen_kexec_range_t range;
247 int ret = -EINVAL;
249 if ( unlikely(copy_from_guest(&range, uarg, 1)) )
250 return -EFAULT;
252 ret = kexec_get_range_internal(&range);
254 if ( ret == 0 && unlikely(copy_to_guest(uarg, &range, 1)) )
255 return -EFAULT;
257 return ret;
258 }
260 static int kexec_get_range_compat(XEN_GUEST_HANDLE(void) uarg)
261 {
262 #ifdef CONFIG_COMPAT
263 xen_kexec_range_t range;
264 compat_kexec_range_t compat_range;
265 int ret = -EINVAL;
267 if ( unlikely(copy_from_guest(&compat_range, uarg, 1)) )
268 return -EFAULT;
270 XLAT_kexec_range(&range, &compat_range);
272 ret = kexec_get_range_internal(&range);
274 if ( ret == 0 ) {
275 XLAT_kexec_range(&compat_range, &range);
276 if ( unlikely(copy_to_guest(uarg, &compat_range, 1)) )
277 return -EFAULT;
278 }
280 return ret;
281 #else /* CONFIG_COMPAT */
282 return 0;
283 #endif /* CONFIG_COMPAT */
284 }
286 static int kexec_load_get_bits(int type, int *base, int *bit)
287 {
288 switch ( type )
289 {
290 case KEXEC_TYPE_DEFAULT:
291 *base = KEXEC_IMAGE_DEFAULT_BASE;
292 *bit = KEXEC_FLAG_DEFAULT_POS;
293 break;
294 case KEXEC_TYPE_CRASH:
295 *base = KEXEC_IMAGE_CRASH_BASE;
296 *bit = KEXEC_FLAG_CRASH_POS;
297 break;
298 default:
299 return -1;
300 }
301 return 0;
302 }
304 void vmcoreinfo_append_str(const char *fmt, ...)
305 {
306 va_list args;
307 char buf[0x50];
308 int r;
309 size_t note_size = sizeof(Elf_Note) + ELFNOTE_ALIGN(strlen(VMCOREINFO_NOTE_NAME) + 1);
311 if (vmcoreinfo_size + note_size + sizeof(buf) > VMCOREINFO_BYTES)
312 return;
314 va_start(args, fmt);
315 r = vsnprintf(buf, sizeof(buf), fmt, args);
316 va_end(args);
318 memcpy(&vmcoreinfo_data[note_size + vmcoreinfo_size], buf, r);
320 vmcoreinfo_size += r;
321 }
323 static void crash_save_vmcoreinfo(void)
324 {
325 size_t data_size;
327 if (vmcoreinfo_size > 0) /* already saved */
328 return;
330 data_size = VMCOREINFO_BYTES - (sizeof(Elf_Note) + ELFNOTE_ALIGN(strlen(VMCOREINFO_NOTE_NAME) + 1));
331 setup_note((Elf_Note *)vmcoreinfo_data, VMCOREINFO_NOTE_NAME, 0, data_size);
333 VMCOREINFO_PAGESIZE(PAGE_SIZE);
335 VMCOREINFO_SYMBOL(domain_list);
336 VMCOREINFO_SYMBOL(frame_table);
337 VMCOREINFO_SYMBOL(max_page);
339 VMCOREINFO_STRUCT_SIZE(page_info);
340 VMCOREINFO_STRUCT_SIZE(domain);
342 VMCOREINFO_OFFSET(page_info, count_info);
343 VMCOREINFO_OFFSET_ALIAS(page_info, u, _domain);
344 VMCOREINFO_OFFSET(domain, domain_id);
345 VMCOREINFO_OFFSET(domain, next_in_list);
347 #ifdef ARCH_CRASH_SAVE_VMCOREINFO
348 arch_crash_save_vmcoreinfo();
349 #endif
350 }
352 static int kexec_load_unload_internal(unsigned long op, xen_kexec_load_t *load)
353 {
354 xen_kexec_image_t *image;
355 int base, bit, pos;
356 int ret = 0;
358 if ( kexec_load_get_bits(load->type, &base, &bit) )
359 return -EINVAL;
361 pos = (test_bit(bit, &kexec_flags) != 0);
363 /* Load the user data into an unused image */
364 if ( op == KEXEC_CMD_kexec_load )
365 {
366 image = &kexec_image[base + !pos];
368 BUG_ON(test_bit((base + !pos), &kexec_flags)); /* must be free */
370 memcpy(image, &load->image, sizeof(*image));
372 if ( !(ret = machine_kexec_load(load->type, base + !pos, image)) )
373 {
374 /* Set image present bit */
375 set_bit((base + !pos), &kexec_flags);
377 /* Make new image the active one */
378 change_bit(bit, &kexec_flags);
379 }
381 crash_save_vmcoreinfo();
382 }
384 /* Unload the old image if present and load successful */
385 if ( ret == 0 && !test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags) )
386 {
387 if ( test_and_clear_bit((base + pos), &kexec_flags) )
388 {
389 image = &kexec_image[base + pos];
390 machine_kexec_unload(load->type, base + pos, image);
391 }
392 }
394 return ret;
395 }
397 static int kexec_load_unload(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
398 {
399 xen_kexec_load_t load;
401 if ( unlikely(copy_from_guest(&load, uarg, 1)) )
402 return -EFAULT;
404 return kexec_load_unload_internal(op, &load);
405 }
407 static int kexec_load_unload_compat(unsigned long op,
408 XEN_GUEST_HANDLE(void) uarg)
409 {
410 #ifdef CONFIG_COMPAT
411 compat_kexec_load_t compat_load;
412 xen_kexec_load_t load;
414 if ( unlikely(copy_from_guest(&compat_load, uarg, 1)) )
415 return -EFAULT;
417 /* This is a bit dodgy, load.image is inside load,
418 * but XLAT_kexec_load (which is automatically generated)
419 * doesn't translate load.image (correctly)
420 * Just copy load->type, the only other member, manually instead.
421 *
422 * XLAT_kexec_load(&load, &compat_load);
423 */
424 load.type = compat_load.type;
425 XLAT_kexec_image(&load.image, &compat_load.image);
427 return kexec_load_unload_internal(op, &load);
428 #else /* CONFIG_COMPAT */
429 return 0;
430 #endif /* CONFIG_COMPAT */
431 }
433 static int kexec_exec(XEN_GUEST_HANDLE(void) uarg)
434 {
435 xen_kexec_exec_t exec;
436 xen_kexec_image_t *image;
437 int base, bit, pos;
439 if ( unlikely(copy_from_guest(&exec, uarg, 1)) )
440 return -EFAULT;
442 if ( kexec_load_get_bits(exec.type, &base, &bit) )
443 return -EINVAL;
445 pos = (test_bit(bit, &kexec_flags) != 0);
447 /* Only allow kexec/kdump into loaded images */
448 if ( !test_bit(base + pos, &kexec_flags) )
449 return -ENOENT;
451 switch (exec.type)
452 {
453 case KEXEC_TYPE_DEFAULT:
454 image = &kexec_image[base + pos];
455 one_cpu_only();
456 machine_reboot_kexec(image); /* Does not return */
457 break;
458 case KEXEC_TYPE_CRASH:
459 kexec_crash(); /* Does not return */
460 break;
461 }
463 return -EINVAL; /* never reached */
464 }
466 int do_kexec_op_internal(unsigned long op, XEN_GUEST_HANDLE(void) uarg,
467 int compat)
468 {
469 unsigned long flags;
470 int ret = -EINVAL;
472 if ( !IS_PRIV(current->domain) )
473 return -EPERM;
475 ret = xsm_kexec();
476 if ( ret )
477 return ret;
479 switch ( op )
480 {
481 case KEXEC_CMD_kexec_get_range:
482 if (compat)
483 ret = kexec_get_range_compat(uarg);
484 else
485 ret = kexec_get_range(uarg);
486 break;
487 case KEXEC_CMD_kexec_load:
488 case KEXEC_CMD_kexec_unload:
489 spin_lock_irqsave(&kexec_lock, flags);
490 if (!test_bit(KEXEC_FLAG_IN_PROGRESS, &kexec_flags))
491 {
492 if (compat)
493 ret = kexec_load_unload_compat(op, uarg);
494 else
495 ret = kexec_load_unload(op, uarg);
496 }
497 spin_unlock_irqrestore(&kexec_lock, flags);
498 break;
499 case KEXEC_CMD_kexec:
500 ret = kexec_exec(uarg);
501 break;
502 }
504 return ret;
505 }
507 long do_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
508 {
509 return do_kexec_op_internal(op, uarg, 0);
510 }
512 #ifdef CONFIG_COMPAT
513 int compat_kexec_op(unsigned long op, XEN_GUEST_HANDLE(void) uarg)
514 {
515 return do_kexec_op_internal(op, uarg, 1);
516 }
517 #endif
519 /*
520 * Local variables:
521 * mode: C
522 * c-set-style: "BSD"
523 * c-basic-offset: 4
524 * tab-width: 4
525 * indent-tabs-mode: nil
526 * End:
527 */