/root/src/xen/xen/common/kimage.c
Line | Count | Source (jump to first uncovered line) |
1 | | /* |
2 | | * Kexec Image |
3 | | * |
4 | | * Copyright (C) 2013 Citrix Systems R&D Ltd. |
5 | | * |
6 | | * Derived from kernel/kexec.c from Linux: |
7 | | * |
8 | | * Copyright (C) 2002-2004 Eric Biederman <ebiederm@xmission.com> |
9 | | * |
10 | | * This source code is licensed under the GNU General Public License, |
11 | | * Version 2. See the file COPYING for more details. |
12 | | */ |
13 | | |
14 | | #include <xen/types.h> |
15 | | #include <xen/init.h> |
16 | | #include <xen/kernel.h> |
17 | | #include <xen/errno.h> |
18 | | #include <xen/spinlock.h> |
19 | | #include <xen/guest_access.h> |
20 | | #include <xen/mm.h> |
21 | | #include <xen/kexec.h> |
22 | | #include <xen/kimage.h> |
23 | | |
24 | | #include <asm/page.h> |
25 | | |
26 | | /* Override macros from asm/page.h to make them work with mfn_t */ |
27 | | #undef mfn_to_page |
28 | 0 | #define mfn_to_page(mfn) __mfn_to_page(mfn_x(mfn)) |
29 | | #undef page_to_mfn |
30 | 0 | #define page_to_mfn(pg) _mfn(__page_to_mfn(pg)) |
31 | | |
32 | | /* |
33 | | * When kexec transitions to the new kernel there is a one-to-one |
34 | | * mapping between physical and virtual addresses. On processors |
35 | | * where you can disable the MMU this is trivial, and easy. For |
36 | | * others it is still a simple predictable page table to setup. |
37 | | * |
38 | | * The code for the transition from the current kernel to the the new |
39 | | * kernel is placed in the page-size control_code_buffer. This memory |
40 | | * must be identity mapped in the transition from virtual to physical |
41 | | * addresses. |
42 | | * |
43 | | * The assembly stub in the control code buffer is passed a linked list |
44 | | * of descriptor pages detailing the source pages of the new kernel, |
45 | | * and the destination addresses of those source pages. As this data |
46 | | * structure is not used in the context of the current OS, it must |
47 | | * be self-contained. |
48 | | * |
49 | | * The code has been made to work with highmem pages and will use a |
50 | | * destination page in its final resting place (if it happens |
51 | | * to allocate it). The end product of this is that most of the |
52 | | * physical address space, and most of RAM can be used. |
53 | | * |
54 | | * Future directions include: |
55 | | * - allocating a page table with the control code buffer identity |
56 | | * mapped, to simplify machine_kexec and make kexec_on_panic more |
57 | | * reliable. |
58 | | */ |
59 | | |
60 | | /* |
61 | | * KIMAGE_NO_DEST is an impossible destination address..., for |
62 | | * allocating pages whose destination address we do not care about. |
63 | | */ |
64 | 0 | #define KIMAGE_NO_DEST (-1UL) |
65 | | |
66 | | /* |
67 | | * Offset of the last entry in an indirection page. |
68 | | */ |
69 | 0 | #define KIMAGE_LAST_ENTRY (PAGE_SIZE/sizeof(kimage_entry_t) - 1) |
70 | | |
71 | | |
72 | | static int kimage_is_destination_range(struct kexec_image *image, |
73 | | paddr_t start, paddr_t end); |
74 | | static struct page_info *kimage_alloc_page(struct kexec_image *image, |
75 | | paddr_t dest); |
76 | | |
77 | | static struct page_info *kimage_alloc_zeroed_page(unsigned memflags) |
78 | 0 | { |
79 | 0 | struct page_info *page; |
80 | 0 |
|
81 | 0 | page = alloc_domheap_page(NULL, memflags); |
82 | 0 | if ( !page ) |
83 | 0 | return NULL; |
84 | 0 |
|
85 | 0 | clear_domain_page(page_to_mfn(page)); |
86 | 0 |
|
87 | 0 | return page; |
88 | 0 | } |
89 | | |
90 | | static int do_kimage_alloc(struct kexec_image **rimage, paddr_t entry, |
91 | | unsigned long nr_segments, |
92 | | xen_kexec_segment_t *segments, uint8_t type) |
93 | 0 | { |
94 | 0 | struct kexec_image *image; |
95 | 0 | unsigned long i; |
96 | 0 | int result; |
97 | 0 |
|
98 | 0 | /* Allocate a controlling structure */ |
99 | 0 | result = -ENOMEM; |
100 | 0 | image = xzalloc(typeof(*image)); |
101 | 0 | if ( !image ) |
102 | 0 | goto out; |
103 | 0 |
|
104 | 0 | image->entry_maddr = entry; |
105 | 0 | image->type = type; |
106 | 0 | image->nr_segments = nr_segments; |
107 | 0 | image->segments = segments; |
108 | 0 |
|
109 | 0 | image->next_crash_page = kexec_crash_area.start; |
110 | 0 |
|
111 | 0 | INIT_PAGE_LIST_HEAD(&image->control_pages); |
112 | 0 | INIT_PAGE_LIST_HEAD(&image->dest_pages); |
113 | 0 | INIT_PAGE_LIST_HEAD(&image->unusable_pages); |
114 | 0 |
|
115 | 0 | /* |
116 | 0 | * Verify we have good destination addresses. The caller is |
117 | 0 | * responsible for making certain we don't attempt to load the new |
118 | 0 | * image into invalid or reserved areas of RAM. This just |
119 | 0 | * verifies it is an address we can use. |
120 | 0 | * |
121 | 0 | * Since the kernel does everything in page size chunks ensure the |
122 | 0 | * destination addresses are page aligned. Too many special cases |
123 | 0 | * crop of when we don't do this. The most insidious is getting |
124 | 0 | * overlapping destination addresses simply because addresses are |
125 | 0 | * changed to page size granularity. |
126 | 0 | */ |
127 | 0 | result = -EADDRNOTAVAIL; |
128 | 0 | for ( i = 0; i < nr_segments; i++ ) |
129 | 0 | { |
130 | 0 | paddr_t mstart, mend; |
131 | 0 |
|
132 | 0 | mstart = image->segments[i].dest_maddr; |
133 | 0 | mend = mstart + image->segments[i].dest_size; |
134 | 0 | if ( (mstart & ~PAGE_MASK) || (mend & ~PAGE_MASK) ) |
135 | 0 | goto out; |
136 | 0 | } |
137 | 0 |
|
138 | 0 | /* |
139 | 0 | * Verify our destination addresses do not overlap. If we allowed |
140 | 0 | * overlapping destination addresses through very weird things can |
141 | 0 | * happen with no easy explanation as one segment stops on |
142 | 0 | * another. |
143 | 0 | */ |
144 | 0 | result = -EINVAL; |
145 | 0 | for ( i = 0; i < nr_segments; i++ ) |
146 | 0 | { |
147 | 0 | paddr_t mstart, mend; |
148 | 0 | unsigned long j; |
149 | 0 |
|
150 | 0 | mstart = image->segments[i].dest_maddr; |
151 | 0 | mend = mstart + image->segments[i].dest_size; |
152 | 0 | for (j = 0; j < i; j++ ) |
153 | 0 | { |
154 | 0 | paddr_t pstart, pend; |
155 | 0 | pstart = image->segments[j].dest_maddr; |
156 | 0 | pend = pstart + image->segments[j].dest_size; |
157 | 0 | /* Do the segments overlap? */ |
158 | 0 | if ( (mend > pstart) && (mstart < pend) ) |
159 | 0 | goto out; |
160 | 0 | } |
161 | 0 | } |
162 | 0 |
|
163 | 0 | /* |
164 | 0 | * Ensure our buffer sizes are strictly less than our memory |
165 | 0 | * sizes. This should always be the case, and it is easier to |
166 | 0 | * check up front than to be surprised later on. |
167 | 0 | */ |
168 | 0 | result = -EINVAL; |
169 | 0 | for ( i = 0; i < nr_segments; i++ ) |
170 | 0 | { |
171 | 0 | if ( image->segments[i].buf_size > image->segments[i].dest_size ) |
172 | 0 | goto out; |
173 | 0 | } |
174 | 0 |
|
175 | 0 | /* |
176 | 0 | * Page for the relocation code must still be accessible after the |
177 | 0 | * processor has switched to 32-bit mode. |
178 | 0 | */ |
179 | 0 | result = -ENOMEM; |
180 | 0 | image->control_code_page = kimage_alloc_control_page(image, MEMF_bits(32)); |
181 | 0 | if ( !image->control_code_page ) |
182 | 0 | goto out; |
183 | 0 | result = machine_kexec_add_page(image, |
184 | 0 | page_to_maddr(image->control_code_page), |
185 | 0 | page_to_maddr(image->control_code_page)); |
186 | 0 | if ( result < 0 ) |
187 | 0 | goto out; |
188 | 0 |
|
189 | 0 | /* Add an empty indirection page. */ |
190 | 0 | result = -ENOMEM; |
191 | 0 | image->entry_page = kimage_alloc_control_page(image, 0); |
192 | 0 | if ( !image->entry_page ) |
193 | 0 | goto out; |
194 | 0 | result = machine_kexec_add_page(image, page_to_maddr(image->entry_page), |
195 | 0 | page_to_maddr(image->entry_page)); |
196 | 0 | if ( result < 0 ) |
197 | 0 | goto out; |
198 | 0 |
|
199 | 0 | image->head = page_to_maddr(image->entry_page); |
200 | 0 |
|
201 | 0 | result = 0; |
202 | 0 | out: |
203 | 0 | if ( result == 0 ) |
204 | 0 | *rimage = image; |
205 | 0 | else if ( image ) |
206 | 0 | { |
207 | 0 | image->segments = NULL; /* caller frees segments after an error */ |
208 | 0 | kimage_free(image); |
209 | 0 | } |
210 | 0 |
|
211 | 0 | return result; |
212 | 0 |
|
213 | 0 | } |
214 | | |
215 | | static int kimage_normal_alloc(struct kexec_image **rimage, paddr_t entry, |
216 | | unsigned long nr_segments, |
217 | | xen_kexec_segment_t *segments) |
218 | 0 | { |
219 | 0 | return do_kimage_alloc(rimage, entry, nr_segments, segments, |
220 | 0 | KEXEC_TYPE_DEFAULT); |
221 | 0 | } |
222 | | |
223 | | static int kimage_crash_alloc(struct kexec_image **rimage, paddr_t entry, |
224 | | unsigned long nr_segments, |
225 | | xen_kexec_segment_t *segments) |
226 | 0 | { |
227 | 0 | unsigned long i; |
228 | 0 |
|
229 | 0 | /* Verify we have a valid entry point */ |
230 | 0 | if ( (entry < kexec_crash_area.start) |
231 | 0 | || (entry > kexec_crash_area.start + kexec_crash_area.size)) |
232 | 0 | return -EADDRNOTAVAIL; |
233 | 0 |
|
234 | 0 | /* |
235 | 0 | * Verify we have good destination addresses. Normally |
236 | 0 | * the caller is responsible for making certain we don't |
237 | 0 | * attempt to load the new image into invalid or reserved |
238 | 0 | * areas of RAM. But crash kernels are preloaded into a |
239 | 0 | * reserved area of ram. We must ensure the addresses |
240 | 0 | * are in the reserved area otherwise preloading the |
241 | 0 | * kernel could corrupt things. |
242 | 0 | */ |
243 | 0 | for ( i = 0; i < nr_segments; i++ ) |
244 | 0 | { |
245 | 0 | paddr_t mstart, mend; |
246 | 0 |
|
247 | 0 | if ( guest_handle_is_null(segments[i].buf.h) ) |
248 | 0 | continue; |
249 | 0 |
|
250 | 0 | mstart = segments[i].dest_maddr; |
251 | 0 | mend = mstart + segments[i].dest_size; |
252 | 0 | /* Ensure we are within the crash kernel limits. */ |
253 | 0 | if ( (mstart < kexec_crash_area.start ) |
254 | 0 | || (mend > kexec_crash_area.start + kexec_crash_area.size)) |
255 | 0 | return -EADDRNOTAVAIL; |
256 | 0 | } |
257 | 0 |
|
258 | 0 | /* Allocate and initialize a controlling structure. */ |
259 | 0 | return do_kimage_alloc(rimage, entry, nr_segments, segments, |
260 | 0 | KEXEC_TYPE_CRASH); |
261 | 0 | } |
262 | | |
263 | | static int kimage_is_destination_range(struct kexec_image *image, |
264 | | paddr_t start, |
265 | | paddr_t end) |
266 | 0 | { |
267 | 0 | unsigned long i; |
268 | 0 |
|
269 | 0 | for ( i = 0; i < image->nr_segments; i++ ) |
270 | 0 | { |
271 | 0 | paddr_t mstart, mend; |
272 | 0 |
|
273 | 0 | mstart = image->segments[i].dest_maddr; |
274 | 0 | mend = mstart + image->segments[i].dest_size; |
275 | 0 | if ( (end > mstart) && (start < mend) ) |
276 | 0 | return 1; |
277 | 0 | } |
278 | 0 |
|
279 | 0 | return 0; |
280 | 0 | } |
281 | | |
282 | | static void kimage_free_page_list(struct page_list_head *list) |
283 | 0 | { |
284 | 0 | struct page_info *page, *next; |
285 | 0 |
|
286 | 0 | page_list_for_each_safe(page, next, list) |
287 | 0 | { |
288 | 0 | page_list_del(page, list); |
289 | 0 | free_domheap_page(page); |
290 | 0 | } |
291 | 0 | } |
292 | | |
293 | | static struct page_info *kimage_alloc_normal_control_page( |
294 | | struct kexec_image *image, unsigned memflags) |
295 | 0 | { |
296 | 0 | /* |
297 | 0 | * Control pages are special, they are the intermediaries that are |
298 | 0 | * needed while we copy the rest of the pages to their final |
299 | 0 | * resting place. As such they must not conflict with either the |
300 | 0 | * destination addresses or memory the kernel is already using. |
301 | 0 | * |
302 | 0 | * The only case where we really need more than one of these are |
303 | 0 | * for architectures where we cannot disable the MMU and must |
304 | 0 | * instead generate an identity mapped page table for all of the |
305 | 0 | * memory. |
306 | 0 | * |
307 | 0 | * At worst this runs in O(N) of the image size. |
308 | 0 | */ |
309 | 0 | struct page_list_head extra_pages; |
310 | 0 | struct page_info *page = NULL; |
311 | 0 |
|
312 | 0 | INIT_PAGE_LIST_HEAD(&extra_pages); |
313 | 0 |
|
314 | 0 | /* |
315 | 0 | * Loop while I can allocate a page and the page allocated is a |
316 | 0 | * destination page. |
317 | 0 | */ |
318 | 0 | do { |
319 | 0 | paddr_t addr, eaddr; |
320 | 0 |
|
321 | 0 | page = kimage_alloc_zeroed_page(memflags); |
322 | 0 | if ( !page ) |
323 | 0 | break; |
324 | 0 | addr = page_to_maddr(page); |
325 | 0 | eaddr = addr + PAGE_SIZE; |
326 | 0 | if ( kimage_is_destination_range(image, addr, eaddr) ) |
327 | 0 | { |
328 | 0 | page_list_add(page, &extra_pages); |
329 | 0 | page = NULL; |
330 | 0 | } |
331 | 0 | } while ( !page ); |
332 | 0 |
|
333 | 0 | if ( page ) |
334 | 0 | { |
335 | 0 | /* Remember the allocated page... */ |
336 | 0 | page_list_add(page, &image->control_pages); |
337 | 0 |
|
338 | 0 | /* |
339 | 0 | * Because the page is already in it's destination location we |
340 | 0 | * will never allocate another page at that address. |
341 | 0 | * Therefore kimage_alloc_page will not return it (again) and |
342 | 0 | * we don't need to give it an entry in image->segments[]. |
343 | 0 | */ |
344 | 0 | } |
345 | 0 | /* |
346 | 0 | * Deal with the destination pages I have inadvertently allocated. |
347 | 0 | * |
348 | 0 | * Ideally I would convert multi-page allocations into single page |
349 | 0 | * allocations, and add everything to image->dest_pages. |
350 | 0 | * |
351 | 0 | * For now it is simpler to just free the pages. |
352 | 0 | */ |
353 | 0 | kimage_free_page_list(&extra_pages); |
354 | 0 |
|
355 | 0 | return page; |
356 | 0 | } |
357 | | |
358 | | static struct page_info *kimage_alloc_crash_control_page(struct kexec_image *image) |
359 | 0 | { |
360 | 0 | /* |
361 | 0 | * Control pages are special, they are the intermediaries that are |
362 | 0 | * needed while we copy the rest of the pages to their final |
363 | 0 | * resting place. As such they must not conflict with either the |
364 | 0 | * destination addresses or memory the kernel is already using. |
365 | 0 | * |
366 | 0 | * Control pages are also the only pags we must allocate when |
367 | 0 | * loading a crash kernel. All of the other pages are specified |
368 | 0 | * by the segments and we just memcpy into them directly. |
369 | 0 | * |
370 | 0 | * The only case where we really need more than one of these are |
371 | 0 | * for architectures where we cannot disable the MMU and must |
372 | 0 | * instead generate an identity mapped page table for all of the |
373 | 0 | * memory. |
374 | 0 | * |
375 | 0 | * Given the low demand this implements a very simple allocator |
376 | 0 | * that finds the first hole of the appropriate size in the |
377 | 0 | * reserved memory region, and allocates all of the memory up to |
378 | 0 | * and including the hole. |
379 | 0 | */ |
380 | 0 | paddr_t hole_start, hole_end; |
381 | 0 | struct page_info *page = NULL; |
382 | 0 |
|
383 | 0 | hole_start = PAGE_ALIGN(image->next_crash_page); |
384 | 0 | hole_end = hole_start + PAGE_SIZE; |
385 | 0 | while ( hole_end <= kexec_crash_area.start + kexec_crash_area.size ) |
386 | 0 | { |
387 | 0 | unsigned long i; |
388 | 0 |
|
389 | 0 | /* See if I overlap any of the segments. */ |
390 | 0 | for ( i = 0; i < image->nr_segments; i++ ) |
391 | 0 | { |
392 | 0 | paddr_t mstart, mend; |
393 | 0 |
|
394 | 0 | mstart = image->segments[i].dest_maddr; |
395 | 0 | mend = mstart + image->segments[i].dest_size; |
396 | 0 | if ( (hole_end > mstart) && (hole_start < mend) ) |
397 | 0 | { |
398 | 0 | /* Advance the hole to the end of the segment. */ |
399 | 0 | hole_start = PAGE_ALIGN(mend); |
400 | 0 | hole_end = hole_start + PAGE_SIZE; |
401 | 0 | break; |
402 | 0 | } |
403 | 0 | } |
404 | 0 | /* If I don't overlap any segments I have found my hole! */ |
405 | 0 | if ( i == image->nr_segments ) |
406 | 0 | { |
407 | 0 | page = maddr_to_page(hole_start); |
408 | 0 | break; |
409 | 0 | } |
410 | 0 | } |
411 | 0 | if ( page ) |
412 | 0 | { |
413 | 0 | image->next_crash_page = hole_end; |
414 | 0 | clear_domain_page(page_to_mfn(page)); |
415 | 0 | } |
416 | 0 |
|
417 | 0 | return page; |
418 | 0 | } |
419 | | |
420 | | |
421 | | struct page_info *kimage_alloc_control_page(struct kexec_image *image, |
422 | | unsigned memflags) |
423 | 0 | { |
424 | 0 | struct page_info *pages = NULL; |
425 | 0 |
|
426 | 0 | switch ( image->type ) |
427 | 0 | { |
428 | 0 | case KEXEC_TYPE_DEFAULT: |
429 | 0 | pages = kimage_alloc_normal_control_page(image, memflags); |
430 | 0 | break; |
431 | 0 | case KEXEC_TYPE_CRASH: |
432 | 0 | pages = kimage_alloc_crash_control_page(image); |
433 | 0 | break; |
434 | 0 | } |
435 | 0 | return pages; |
436 | 0 | } |
437 | | |
438 | | static int kimage_add_entry(struct kexec_image *image, kimage_entry_t entry) |
439 | 0 | { |
440 | 0 | kimage_entry_t *entries; |
441 | 0 |
|
442 | 0 | if ( image->next_entry == KIMAGE_LAST_ENTRY ) |
443 | 0 | { |
444 | 0 | struct page_info *page; |
445 | 0 |
|
446 | 0 | page = kimage_alloc_page(image, KIMAGE_NO_DEST); |
447 | 0 | if ( !page ) |
448 | 0 | return -ENOMEM; |
449 | 0 |
|
450 | 0 | entries = __map_domain_page(image->entry_page); |
451 | 0 | entries[image->next_entry] = page_to_maddr(page) | IND_INDIRECTION; |
452 | 0 | unmap_domain_page(entries); |
453 | 0 |
|
454 | 0 | image->entry_page = page; |
455 | 0 | image->next_entry = 0; |
456 | 0 | } |
457 | 0 |
|
458 | 0 | entries = __map_domain_page(image->entry_page); |
459 | 0 | entries[image->next_entry] = entry; |
460 | 0 | image->next_entry++; |
461 | 0 | unmap_domain_page(entries); |
462 | 0 |
|
463 | 0 | return 0; |
464 | 0 | } |
465 | | |
466 | | static int kimage_set_destination(struct kexec_image *image, |
467 | | paddr_t destination) |
468 | 0 | { |
469 | 0 | return kimage_add_entry(image, (destination & PAGE_MASK) | IND_DESTINATION); |
470 | 0 | } |
471 | | |
472 | | |
473 | | static int kimage_add_page(struct kexec_image *image, paddr_t maddr) |
474 | 0 | { |
475 | 0 | return kimage_add_entry(image, (maddr & PAGE_MASK) | IND_SOURCE); |
476 | 0 | } |
477 | | |
478 | | |
479 | | static void kimage_free_extra_pages(struct kexec_image *image) |
480 | 0 | { |
481 | 0 | kimage_free_page_list(&image->dest_pages); |
482 | 0 | kimage_free_page_list(&image->unusable_pages); |
483 | 0 | } |
484 | | |
485 | | static void kimage_terminate(struct kexec_image *image) |
486 | 0 | { |
487 | 0 | kimage_entry_t *entries; |
488 | 0 |
|
489 | 0 | entries = __map_domain_page(image->entry_page); |
490 | 0 | entries[image->next_entry] = IND_DONE; |
491 | 0 | unmap_domain_page(entries); |
492 | 0 | } |
493 | | |
494 | | /* |
495 | | * Iterate over all the entries in the indirection pages. |
496 | | * |
497 | | * Call unmap_domain_page(ptr) after the loop exits. |
498 | | */ |
499 | | #define for_each_kimage_entry(image, ptr, entry) \ |
500 | 0 | for ( ptr = map_domain_page(_mfn(paddr_to_pfn(image->head))); \ |
501 | 0 | (entry = *ptr) && !(entry & IND_DONE); \ |
502 | 0 | ptr = (entry & IND_INDIRECTION) ? \ |
503 | 0 | (unmap_domain_page(ptr), map_domain_page(_mfn(paddr_to_pfn(entry)))) \ |
504 | 0 | : ptr + 1 ) |
505 | | |
506 | | static void kimage_free_entry(kimage_entry_t entry) |
507 | 0 | { |
508 | 0 | struct page_info *page; |
509 | 0 |
|
510 | 0 | page = maddr_to_page(entry); |
511 | 0 | free_domheap_page(page); |
512 | 0 | } |
513 | | |
514 | | static void kimage_free_all_entries(struct kexec_image *image) |
515 | 0 | { |
516 | 0 | kimage_entry_t *ptr, entry; |
517 | 0 | kimage_entry_t ind = 0; |
518 | 0 |
|
519 | 0 | if ( !image->head ) |
520 | 0 | return; |
521 | 0 |
|
522 | 0 | for_each_kimage_entry(image, ptr, entry) |
523 | 0 | { |
524 | 0 | if ( entry & IND_INDIRECTION ) |
525 | 0 | { |
526 | 0 | /* Free the previous indirection page */ |
527 | 0 | if ( ind & IND_INDIRECTION ) |
528 | 0 | kimage_free_entry(ind); |
529 | 0 | /* Save this indirection page until we are done with it. */ |
530 | 0 | ind = entry; |
531 | 0 | } |
532 | 0 | else if ( entry & IND_SOURCE ) |
533 | 0 | kimage_free_entry(entry); |
534 | 0 | } |
535 | 0 | unmap_domain_page(ptr); |
536 | 0 |
|
537 | 0 | /* Free the final indirection page. */ |
538 | 0 | if ( ind & IND_INDIRECTION ) |
539 | 0 | kimage_free_entry(ind); |
540 | 0 | } |
541 | | |
542 | | void kimage_free(struct kexec_image *image) |
543 | 0 | { |
544 | 0 | if ( !image ) |
545 | 0 | return; |
546 | 0 |
|
547 | 0 | kimage_free_extra_pages(image); |
548 | 0 | kimage_free_all_entries(image); |
549 | 0 | kimage_free_page_list(&image->control_pages); |
550 | 0 | xfree(image->segments); |
551 | 0 | xfree(image); |
552 | 0 | } |
553 | | |
554 | | static kimage_entry_t *kimage_dst_used(struct kexec_image *image, |
555 | | paddr_t maddr) |
556 | 0 | { |
557 | 0 | kimage_entry_t *ptr, entry; |
558 | 0 | unsigned long destination = 0; |
559 | 0 |
|
560 | 0 | for_each_kimage_entry(image, ptr, entry) |
561 | 0 | { |
562 | 0 | if ( entry & IND_DESTINATION ) |
563 | 0 | destination = entry & PAGE_MASK; |
564 | 0 | else if ( entry & IND_SOURCE ) |
565 | 0 | { |
566 | 0 | if ( maddr == destination ) |
567 | 0 | return ptr; |
568 | 0 | destination += PAGE_SIZE; |
569 | 0 | } |
570 | 0 | } |
571 | 0 | unmap_domain_page(ptr); |
572 | 0 |
|
573 | 0 | return NULL; |
574 | 0 | } |
575 | | |
576 | | static struct page_info *kimage_alloc_page(struct kexec_image *image, |
577 | | paddr_t destination) |
578 | 0 | { |
579 | 0 | /* |
580 | 0 | * Here we implement safeguards to ensure that a source page is |
581 | 0 | * not copied to its destination page before the data on the |
582 | 0 | * destination page is no longer useful. |
583 | 0 | * |
584 | 0 | * To do this we maintain the invariant that a source page is |
585 | 0 | * either its own destination page, or it is not a destination |
586 | 0 | * page at all. |
587 | 0 | * |
588 | 0 | * That is slightly stronger than required, but the proof that no |
589 | 0 | * problems will not occur is trivial, and the implementation is |
590 | 0 | * simply to verify. |
591 | 0 | * |
592 | 0 | * When allocating all pages normally this algorithm will run in |
593 | 0 | * O(N) time, but in the worst case it will run in O(N^2) time. |
594 | 0 | * If the runtime is a problem the data structures can be fixed. |
595 | 0 | */ |
596 | 0 | struct page_info *page; |
597 | 0 | paddr_t addr; |
598 | 0 | int ret; |
599 | 0 |
|
600 | 0 | /* |
601 | 0 | * Walk through the list of destination pages, and see if I have a |
602 | 0 | * match. |
603 | 0 | */ |
604 | 0 | page_list_for_each(page, &image->dest_pages) |
605 | 0 | { |
606 | 0 | addr = page_to_maddr(page); |
607 | 0 | if ( addr == destination ) |
608 | 0 | { |
609 | 0 | page_list_del(page, &image->dest_pages); |
610 | 0 | goto found; |
611 | 0 | } |
612 | 0 | } |
613 | 0 | page = NULL; |
614 | 0 | for (;;) |
615 | 0 | { |
616 | 0 | kimage_entry_t *old; |
617 | 0 |
|
618 | 0 | /* Allocate a page, if we run out of memory give up. */ |
619 | 0 | page = kimage_alloc_zeroed_page(0); |
620 | 0 | if ( !page ) |
621 | 0 | return NULL; |
622 | 0 | addr = page_to_maddr(page); |
623 | 0 |
|
624 | 0 | /* If it is the destination page we want use it. */ |
625 | 0 | if ( addr == destination ) |
626 | 0 | break; |
627 | 0 |
|
628 | 0 | /* If the page is not a destination page use it. */ |
629 | 0 | if ( !kimage_is_destination_range(image, addr, |
630 | 0 | addr + PAGE_SIZE) ) |
631 | 0 | break; |
632 | 0 |
|
633 | 0 | /* |
634 | 0 | * I know that the page is someones destination page. See if |
635 | 0 | * there is already a source page for this destination page. |
636 | 0 | * And if so swap the source pages. |
637 | 0 | */ |
638 | 0 | old = kimage_dst_used(image, addr); |
639 | 0 | if ( old ) |
640 | 0 | { |
641 | 0 | /* If so move it. */ |
642 | 0 | mfn_t old_mfn = maddr_to_mfn(*old); |
643 | 0 | mfn_t mfn = maddr_to_mfn(addr); |
644 | 0 |
|
645 | 0 | copy_domain_page(mfn, old_mfn); |
646 | 0 | clear_domain_page(old_mfn); |
647 | 0 | *old = (addr & ~PAGE_MASK) | IND_SOURCE; |
648 | 0 | unmap_domain_page(old); |
649 | 0 |
|
650 | 0 | page = mfn_to_page(old_mfn); |
651 | 0 | break; |
652 | 0 | } |
653 | 0 | else |
654 | 0 | { |
655 | 0 | /* |
656 | 0 | * Place the page on the destination list; I will use it |
657 | 0 | * later. |
658 | 0 | */ |
659 | 0 | page_list_add(page, &image->dest_pages); |
660 | 0 | } |
661 | 0 | } |
662 | 0 | found: |
663 | 0 | ret = machine_kexec_add_page(image, page_to_maddr(page), |
664 | 0 | page_to_maddr(page)); |
665 | 0 | if ( ret < 0 ) |
666 | 0 | { |
667 | 0 | free_domheap_page(page); |
668 | 0 | return NULL; |
669 | 0 | } |
670 | 0 | return page; |
671 | 0 | } |
672 | | |
673 | | static int kimage_load_normal_segment(struct kexec_image *image, |
674 | | xen_kexec_segment_t *segment) |
675 | 0 | { |
676 | 0 | unsigned long to_copy; |
677 | 0 | unsigned long src_offset; |
678 | 0 | paddr_t dest, end; |
679 | 0 | int ret; |
680 | 0 |
|
681 | 0 | to_copy = segment->buf_size; |
682 | 0 | src_offset = 0; |
683 | 0 | dest = segment->dest_maddr; |
684 | 0 |
|
685 | 0 | ret = kimage_set_destination(image, dest); |
686 | 0 | if ( ret < 0 ) |
687 | 0 | return ret; |
688 | 0 |
|
689 | 0 | while ( to_copy ) |
690 | 0 | { |
691 | 0 | unsigned long dest_mfn; |
692 | 0 | struct page_info *page; |
693 | 0 | void *dest_va; |
694 | 0 | size_t size; |
695 | 0 |
|
696 | 0 | dest_mfn = dest >> PAGE_SHIFT; |
697 | 0 |
|
698 | 0 | size = min_t(unsigned long, PAGE_SIZE, to_copy); |
699 | 0 |
|
700 | 0 | page = kimage_alloc_page(image, dest); |
701 | 0 | if ( !page ) |
702 | 0 | return -ENOMEM; |
703 | 0 | ret = kimage_add_page(image, page_to_maddr(page)); |
704 | 0 | if ( ret < 0 ) |
705 | 0 | return ret; |
706 | 0 |
|
707 | 0 | dest_va = __map_domain_page(page); |
708 | 0 | ret = copy_from_guest_offset(dest_va, segment->buf.h, src_offset, size); |
709 | 0 | unmap_domain_page(dest_va); |
710 | 0 | if ( ret ) |
711 | 0 | return -EFAULT; |
712 | 0 |
|
713 | 0 | to_copy -= size; |
714 | 0 | src_offset += size; |
715 | 0 | dest += PAGE_SIZE; |
716 | 0 | } |
717 | 0 |
|
718 | 0 | /* Remainder of the destination should be zeroed. */ |
719 | 0 | end = segment->dest_maddr + segment->dest_size; |
720 | 0 | for ( ; dest < end; dest += PAGE_SIZE ) |
721 | 0 | kimage_add_entry(image, IND_ZERO); |
722 | 0 |
|
723 | 0 | return 0; |
724 | 0 | } |
725 | | |
726 | | static int kimage_load_crash_segment(struct kexec_image *image, |
727 | | xen_kexec_segment_t *segment) |
728 | 0 | { |
729 | 0 | /* |
730 | 0 | * For crash dumps kernels we simply copy the data from user space |
731 | 0 | * to it's destination. |
732 | 0 | */ |
733 | 0 | paddr_t dest; |
734 | 0 | unsigned long sbytes, dbytes; |
735 | 0 | int ret = 0; |
736 | 0 | unsigned long src_offset = 0; |
737 | 0 |
|
738 | 0 | sbytes = segment->buf_size; |
739 | 0 | dbytes = segment->dest_size; |
740 | 0 | dest = segment->dest_maddr; |
741 | 0 |
|
742 | 0 | while ( dbytes ) |
743 | 0 | { |
744 | 0 | unsigned long dest_mfn; |
745 | 0 | void *dest_va; |
746 | 0 | size_t schunk, dchunk; |
747 | 0 |
|
748 | 0 | dest_mfn = dest >> PAGE_SHIFT; |
749 | 0 |
|
750 | 0 | dchunk = PAGE_SIZE; |
751 | 0 | schunk = min(dchunk, sbytes); |
752 | 0 |
|
753 | 0 | dest_va = map_domain_page(_mfn(dest_mfn)); |
754 | 0 | if ( !dest_va ) |
755 | 0 | return -EINVAL; |
756 | 0 |
|
757 | 0 | ret = copy_from_guest_offset(dest_va, segment->buf.h, src_offset, schunk); |
758 | 0 | memset(dest_va + schunk, 0, dchunk - schunk); |
759 | 0 |
|
760 | 0 | unmap_domain_page(dest_va); |
761 | 0 | if ( ret ) |
762 | 0 | return -EFAULT; |
763 | 0 |
|
764 | 0 | dbytes -= dchunk; |
765 | 0 | sbytes -= schunk; |
766 | 0 | dest += dchunk; |
767 | 0 | src_offset += schunk; |
768 | 0 | } |
769 | 0 |
|
770 | 0 | return 0; |
771 | 0 | } |
772 | | |
773 | | static int kimage_load_segment(struct kexec_image *image, xen_kexec_segment_t *segment) |
774 | 0 | { |
775 | 0 | int result = -ENOMEM; |
776 | 0 | paddr_t addr; |
777 | 0 |
|
778 | 0 | if ( !guest_handle_is_null(segment->buf.h) ) |
779 | 0 | { |
780 | 0 | switch ( image->type ) |
781 | 0 | { |
782 | 0 | case KEXEC_TYPE_DEFAULT: |
783 | 0 | result = kimage_load_normal_segment(image, segment); |
784 | 0 | break; |
785 | 0 | case KEXEC_TYPE_CRASH: |
786 | 0 | result = kimage_load_crash_segment(image, segment); |
787 | 0 | break; |
788 | 0 | } |
789 | 0 | } |
790 | 0 |
|
791 | 0 | for ( addr = segment->dest_maddr & PAGE_MASK; |
792 | 0 | addr < segment->dest_maddr + segment->dest_size; addr += PAGE_SIZE ) |
793 | 0 | { |
794 | 0 | result = machine_kexec_add_page(image, addr, addr); |
795 | 0 | if ( result < 0 ) |
796 | 0 | break; |
797 | 0 | } |
798 | 0 |
|
799 | 0 | return result; |
800 | 0 | } |
801 | | |
802 | | int kimage_alloc(struct kexec_image **rimage, uint8_t type, uint16_t arch, |
803 | | uint64_t entry_maddr, |
804 | | uint32_t nr_segments, xen_kexec_segment_t *segment) |
805 | 0 | { |
806 | 0 | int result; |
807 | 0 |
|
808 | 0 | switch( type ) |
809 | 0 | { |
810 | 0 | case KEXEC_TYPE_DEFAULT: |
811 | 0 | result = kimage_normal_alloc(rimage, entry_maddr, nr_segments, segment); |
812 | 0 | break; |
813 | 0 | case KEXEC_TYPE_CRASH: |
814 | 0 | result = kimage_crash_alloc(rimage, entry_maddr, nr_segments, segment); |
815 | 0 | break; |
816 | 0 | default: |
817 | 0 | result = -EINVAL; |
818 | 0 | break; |
819 | 0 | } |
820 | 0 | if ( result < 0 ) |
821 | 0 | return result; |
822 | 0 |
|
823 | 0 | (*rimage)->arch = arch; |
824 | 0 |
|
825 | 0 | return result; |
826 | 0 | } |
827 | | |
828 | | int kimage_load_segments(struct kexec_image *image) |
829 | 0 | { |
830 | 0 | int s; |
831 | 0 | int result; |
832 | 0 |
|
833 | 0 | for ( s = 0; s < image->nr_segments; s++ ) { |
834 | 0 | result = kimage_load_segment(image, &image->segments[s]); |
835 | 0 | if ( result < 0 ) |
836 | 0 | return result; |
837 | 0 | } |
838 | 0 | kimage_terminate(image); |
839 | 0 | return 0; |
840 | 0 | } |
841 | | |
842 | | kimage_entry_t *kimage_entry_next(kimage_entry_t *entry, bool_t compat) |
843 | 0 | { |
844 | 0 | if ( compat ) |
845 | 0 | return (kimage_entry_t *)((uint32_t *)entry + 1); |
846 | 0 | return entry + 1; |
847 | 0 | } |
848 | | |
849 | | mfn_t kimage_entry_mfn(kimage_entry_t *entry, bool_t compat) |
850 | 0 | { |
851 | 0 | if ( compat ) |
852 | 0 | return maddr_to_mfn(*(uint32_t *)entry); |
853 | 0 | return maddr_to_mfn(*entry); |
854 | 0 | } |
855 | | |
856 | | unsigned long kimage_entry_ind(kimage_entry_t *entry, bool_t compat) |
857 | 0 | { |
858 | 0 | if ( compat ) |
859 | 0 | return *(uint32_t *)entry & 0xf; |
860 | 0 | return *entry & 0xf; |
861 | 0 | } |
862 | | |
863 | | int kimage_build_ind(struct kexec_image *image, mfn_t ind_mfn, |
864 | | bool_t compat) |
865 | 0 | { |
866 | 0 | void *page; |
867 | 0 | kimage_entry_t *entry; |
868 | 0 | int ret = 0; |
869 | 0 | paddr_t dest = KIMAGE_NO_DEST; |
870 | 0 |
|
871 | 0 | page = map_domain_page(ind_mfn); |
872 | 0 | if ( !page ) |
873 | 0 | return -ENOMEM; |
874 | 0 |
|
875 | 0 | /* |
876 | 0 | * Walk the guest-supplied indirection pages, adding entries to |
877 | 0 | * the image's indirection pages. |
878 | 0 | */ |
879 | 0 | for ( entry = page; ; ) |
880 | 0 | { |
881 | 0 | unsigned long ind; |
882 | 0 | mfn_t mfn; |
883 | 0 |
|
884 | 0 | ind = kimage_entry_ind(entry, compat); |
885 | 0 | mfn = kimage_entry_mfn(entry, compat); |
886 | 0 |
|
887 | 0 | switch ( ind ) |
888 | 0 | { |
889 | 0 | case IND_DESTINATION: |
890 | 0 | dest = mfn_to_maddr(mfn); |
891 | 0 | ret = kimage_set_destination(image, dest); |
892 | 0 | if ( ret < 0 ) |
893 | 0 | goto done; |
894 | 0 | break; |
895 | 0 | case IND_INDIRECTION: |
896 | 0 | unmap_domain_page(page); |
897 | 0 | page = map_domain_page(mfn); |
898 | 0 | entry = page; |
899 | 0 | continue; |
900 | 0 | case IND_DONE: |
901 | 0 | kimage_terminate(image); |
902 | 0 | goto done; |
903 | 0 | case IND_SOURCE: |
904 | 0 | { |
905 | 0 | struct page_info *guest_page, *xen_page; |
906 | 0 |
|
907 | 0 | guest_page = mfn_to_page(mfn); |
908 | 0 | if ( !get_page(guest_page, current->domain) ) |
909 | 0 | { |
910 | 0 | ret = -EFAULT; |
911 | 0 | goto done; |
912 | 0 | } |
913 | 0 |
|
914 | 0 | xen_page = kimage_alloc_page(image, dest); |
915 | 0 | if ( !xen_page ) |
916 | 0 | { |
917 | 0 | put_page(guest_page); |
918 | 0 | ret = -ENOMEM; |
919 | 0 | goto done; |
920 | 0 | } |
921 | 0 |
|
922 | 0 | copy_domain_page(page_to_mfn(xen_page), mfn); |
923 | 0 | put_page(guest_page); |
924 | 0 |
|
925 | 0 | ret = kimage_add_page(image, page_to_maddr(xen_page)); |
926 | 0 | if ( ret < 0 ) |
927 | 0 | goto done; |
928 | 0 |
|
929 | 0 | ret = machine_kexec_add_page(image, dest, dest); |
930 | 0 | if ( ret < 0 ) |
931 | 0 | goto done; |
932 | 0 |
|
933 | 0 | dest += PAGE_SIZE; |
934 | 0 | break; |
935 | 0 | } |
936 | 0 | default: |
937 | 0 | ret = -EINVAL; |
938 | 0 | goto done; |
939 | 0 | } |
940 | 0 | entry = kimage_entry_next(entry, compat); |
941 | 0 | } |
942 | 0 | done: |
943 | 0 | unmap_domain_page(page); |
944 | 0 | return ret; |
945 | 0 | } |
946 | | |
947 | | /* |
948 | | * Local variables: |
949 | | * mode: C |
950 | | * c-file-style: "BSD" |
951 | | * c-basic-offset: 4 |
952 | | * tab-width: 4 |
953 | | * indent-tabs-mode: nil |
954 | | * End: |
955 | | */ |