debuggers.hg

view xen/common/page_alloc.c @ 4671:18a8f5216548

bitkeeper revision 1.1366 (4268c126o36cKcnzrSkVxkbrPsoz1g)

Clean up shadow destruction and fix domain destroy when shadow mode
is disabled.
Signed-off-by: Keir Fraser <keir@xensource.com>
author kaf24@firebug.cl.cam.ac.uk
date Fri Apr 22 09:17:26 2005 +0000 (2005-04-22)
parents 9a768d11cc7b
children ccc4ee412321
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
23 #include <xen/config.h>
24 #include <xen/init.h>
25 #include <xen/types.h>
26 #include <xen/lib.h>
27 #include <xen/perfc.h>
28 #include <xen/sched.h>
29 #include <xen/spinlock.h>
30 #include <xen/slab.h>
31 #include <xen/irq.h>
32 #include <xen/softirq.h>
33 #include <xen/shadow.h>
34 #include <asm/domain_page.h>
35 #include <asm/page.h>
37 /*
38 * Comma-separated list of hexadecimal page numbers containing bad bytes.
39 * e.g. 'badpage=0x3f45,0x8a321'.
40 */
41 static char opt_badpage[100] = "";
42 string_param("badpage", opt_badpage);
44 #define round_pgdown(_p) ((_p)&PAGE_MASK)
45 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
47 static spinlock_t page_scrub_lock;
48 struct list_head page_scrub_list;
50 /*********************
51 * ALLOCATION BITMAP
52 * One bit per page of memory. Bit set => page is allocated.
53 */
55 static unsigned long bitmap_size; /* in bytes */
56 static unsigned long *alloc_bitmap;
57 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
59 #define allocated_in_map(_pn) \
60 ( !! (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & \
61 (1UL<<((_pn)&(PAGES_PER_MAPWORD-1)))) )
63 /*
64 * Hint regarding bitwise arithmetic in map_{alloc,free}:
65 * -(1<<n) sets all bits >= n.
66 * (1<<n)-1 sets all bits < n.
67 * Variable names in map_{alloc,free}:
68 * *_idx == Index into `alloc_bitmap' array.
69 * *_off == Bit offset within an element of the `alloc_bitmap' array.
70 */
72 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
73 {
74 unsigned long start_off, end_off, curr_idx, end_idx;
76 #ifndef NDEBUG
77 unsigned long i;
78 /* Check that the block isn't already allocated. */
79 for ( i = 0; i < nr_pages; i++ )
80 ASSERT(!allocated_in_map(first_page + i));
81 #endif
83 curr_idx = first_page / PAGES_PER_MAPWORD;
84 start_off = first_page & (PAGES_PER_MAPWORD-1);
85 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
86 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
88 if ( curr_idx == end_idx )
89 {
90 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
91 }
92 else
93 {
94 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
95 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
96 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
97 }
98 }
101 static void map_free(unsigned long first_page, unsigned long nr_pages)
102 {
103 unsigned long start_off, end_off, curr_idx, end_idx;
105 #ifndef NDEBUG
106 unsigned long i;
107 /* Check that the block isn't already freed. */
108 for ( i = 0; i < nr_pages; i++ )
109 ASSERT(allocated_in_map(first_page + i));
110 #endif
112 curr_idx = first_page / PAGES_PER_MAPWORD;
113 start_off = first_page & (PAGES_PER_MAPWORD-1);
114 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
115 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
117 if ( curr_idx == end_idx )
118 {
119 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
120 }
121 else
122 {
123 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
124 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
125 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
126 }
127 }
131 /*************************
132 * BOOT-TIME ALLOCATOR
133 */
135 /* Initialise allocator to handle up to @max_page pages. */
136 unsigned long init_boot_allocator(unsigned long bitmap_start)
137 {
138 bitmap_start = round_pgup(bitmap_start);
140 /* Allocate space for the allocation bitmap. */
141 bitmap_size = max_page / 8;
142 bitmap_size = round_pgup(bitmap_size);
143 alloc_bitmap = (unsigned long *)phys_to_virt(bitmap_start);
145 /* All allocated by default. */
146 memset(alloc_bitmap, ~0, bitmap_size);
148 return bitmap_start + bitmap_size;
149 }
151 void init_boot_pages(unsigned long ps, unsigned long pe)
152 {
153 unsigned long bad_pfn;
154 char *p;
156 ps = round_pgup(ps);
157 pe = round_pgdown(pe);
159 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
161 /* Check new pages against the bad-page list. */
162 p = opt_badpage;
163 while ( *p != '\0' )
164 {
165 bad_pfn = simple_strtoul(p, &p, 0);
167 if ( *p == ',' )
168 p++;
169 else if ( *p != '\0' )
170 break;
172 if ( (bad_pfn < (bitmap_size*8)) && !allocated_in_map(bad_pfn) )
173 {
174 printk("Marking page %p as bad\n", bad_pfn);
175 map_alloc(bad_pfn, 1);
176 }
177 }
178 }
180 unsigned long alloc_boot_pages(unsigned long size, unsigned long align)
181 {
182 unsigned long pg, i;
184 size = round_pgup(size) >> PAGE_SHIFT;
185 align = round_pgup(align) >> PAGE_SHIFT;
187 for ( pg = 0; (pg + size) < (bitmap_size*8); pg += align )
188 {
189 for ( i = 0; i < size; i++ )
190 if ( allocated_in_map(pg + i) )
191 break;
193 if ( i == size )
194 {
195 map_alloc(pg, size);
196 return pg << PAGE_SHIFT;
197 }
198 }
200 return 0;
201 }
205 /*************************
206 * BINARY BUDDY ALLOCATOR
207 */
209 #define MEMZONE_XEN 0
210 #define MEMZONE_DOM 1
211 #define NR_ZONES 2
213 /* Up to 2^20 pages can be allocated at once. */
214 #define MAX_ORDER 20
215 static struct list_head heap[NR_ZONES][MAX_ORDER+1];
217 static unsigned long avail[NR_ZONES];
219 static spinlock_t heap_lock = SPIN_LOCK_UNLOCKED;
221 void end_boot_allocator(void)
222 {
223 unsigned long i, j;
224 int curr_free = 0, next_free = 0;
226 memset(avail, 0, sizeof(avail));
228 for ( i = 0; i < NR_ZONES; i++ )
229 for ( j = 0; j <= MAX_ORDER; j++ )
230 INIT_LIST_HEAD(&heap[i][j]);
232 /* Pages that are free now go to the domain sub-allocator. */
233 for ( i = 0; i < max_page; i++ )
234 {
235 curr_free = next_free;
236 next_free = !allocated_in_map(i+1);
237 if ( next_free )
238 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
239 if ( curr_free )
240 free_heap_pages(MEMZONE_DOM, pfn_to_page(i), 0);
241 }
242 }
244 /* Hand the specified arbitrary page range to the specified heap zone. */
245 void init_heap_pages(
246 unsigned int zone, struct pfn_info *pg, unsigned long nr_pages)
247 {
248 unsigned long i;
250 ASSERT(zone < NR_ZONES);
252 for ( i = 0; i < nr_pages; i++ )
253 free_heap_pages(zone, pg+i, 0);
254 }
257 /* Allocate 2^@order contiguous pages. */
258 struct pfn_info *alloc_heap_pages(unsigned int zone, unsigned int order)
259 {
260 int i;
261 struct pfn_info *pg;
263 ASSERT(zone < NR_ZONES);
265 if ( unlikely(order > MAX_ORDER) )
266 return NULL;
268 spin_lock(&heap_lock);
270 /* Find smallest order which can satisfy the request. */
271 for ( i = order; i <= MAX_ORDER; i++ )
272 if ( !list_empty(&heap[zone][i]) )
273 goto found;
275 /* No suitable memory blocks. Fail the request. */
276 spin_unlock(&heap_lock);
277 return NULL;
279 found:
280 pg = list_entry(heap[zone][i].next, struct pfn_info, list);
281 list_del(&pg->list);
283 /* We may have to halve the chunk a number of times. */
284 while ( i != order )
285 {
286 PFN_ORDER(pg) = --i;
287 list_add_tail(&pg->list, &heap[zone][i]);
288 pg += 1 << i;
289 }
291 map_alloc(page_to_pfn(pg), 1 << order);
292 avail[zone] -= 1 << order;
294 spin_unlock(&heap_lock);
296 return pg;
297 }
300 /* Free 2^@order set of pages. */
301 void free_heap_pages(
302 unsigned int zone, struct pfn_info *pg, unsigned int order)
303 {
304 unsigned long mask;
306 ASSERT(zone < NR_ZONES);
307 ASSERT(order <= MAX_ORDER);
309 spin_lock(&heap_lock);
311 map_free(page_to_pfn(pg), 1 << order);
312 avail[zone] += 1 << order;
314 /* Merge chunks as far as possible. */
315 while ( order < MAX_ORDER )
316 {
317 mask = 1 << order;
319 if ( (page_to_pfn(pg) & mask) )
320 {
321 /* Merge with predecessor block? */
322 if ( allocated_in_map(page_to_pfn(pg)-mask) ||
323 (PFN_ORDER(pg-mask) != order) )
324 break;
325 list_del(&(pg-mask)->list);
326 pg -= mask;
327 }
328 else
329 {
330 /* Merge with successor block? */
331 if ( allocated_in_map(page_to_pfn(pg)+mask) ||
332 (PFN_ORDER(pg+mask) != order) )
333 break;
334 list_del(&(pg+mask)->list);
335 }
337 order++;
338 }
340 PFN_ORDER(pg) = order;
341 list_add_tail(&pg->list, &heap[zone][order]);
343 spin_unlock(&heap_lock);
344 }
347 /*
348 * Scrub all unallocated pages in all heap zones. This function is more
349 * convoluted than appears necessary because we do not want to continuously
350 * hold the lock or disable interrupts while scrubbing very large memory areas.
351 */
352 void scrub_heap_pages(void)
353 {
354 void *p;
355 unsigned long pfn, flags;
357 printk("Scrubbing Free RAM: ");
359 for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
360 {
361 /* Every 100MB, print a progress dot and appease the watchdog. */
362 if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
363 {
364 printk(".");
365 touch_nmi_watchdog();
366 }
368 /* Quick lock-free check. */
369 if ( allocated_in_map(pfn) )
370 continue;
372 spin_lock_irqsave(&heap_lock, flags);
374 /* Re-check page status with lock held. */
375 if ( !allocated_in_map(pfn) )
376 {
377 p = map_domain_mem(pfn << PAGE_SHIFT);
378 clear_page(p);
379 unmap_domain_mem(p);
380 }
382 spin_unlock_irqrestore(&heap_lock, flags);
383 }
385 printk("done.\n");
386 }
390 /*************************
391 * XEN-HEAP SUB-ALLOCATOR
392 */
394 void init_xenheap_pages(unsigned long ps, unsigned long pe)
395 {
396 unsigned long flags;
398 ps = round_pgup(ps);
399 pe = round_pgdown(pe);
401 memguard_guard_range(__va(ps), pe - ps);
403 /*
404 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
405 * prevent merging of power-of-two blocks across the zone boundary.
406 */
407 if ( !IS_XEN_HEAP_FRAME(phys_to_page(pe)) )
408 pe -= PAGE_SIZE;
410 local_irq_save(flags);
411 init_heap_pages(MEMZONE_XEN, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
412 local_irq_restore(flags);
413 }
416 unsigned long alloc_xenheap_pages(unsigned int order)
417 {
418 unsigned long flags;
419 struct pfn_info *pg;
420 int i;
422 local_irq_save(flags);
423 pg = alloc_heap_pages(MEMZONE_XEN, order);
424 local_irq_restore(flags);
426 if ( unlikely(pg == NULL) )
427 goto no_memory;
429 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
431 for ( i = 0; i < (1 << order); i++ )
432 {
433 pg[i].count_info = 0;
434 pg[i].u.inuse._domain = 0;
435 pg[i].u.inuse.type_info = 0;
436 }
438 return (unsigned long)page_to_virt(pg);
440 no_memory:
441 printk("Cannot handle page request order %d!\n", order);
442 return 0;
443 }
446 void free_xenheap_pages(unsigned long p, unsigned int order)
447 {
448 unsigned long flags;
450 memguard_guard_range((void *)p, 1 << (order + PAGE_SHIFT));
452 local_irq_save(flags);
453 free_heap_pages(MEMZONE_XEN, virt_to_page(p), order);
454 local_irq_restore(flags);
455 }
459 /*************************
460 * DOMAIN-HEAP SUB-ALLOCATOR
461 */
463 void init_domheap_pages(unsigned long ps, unsigned long pe)
464 {
465 ASSERT(!in_irq());
467 ps = round_pgup(ps);
468 pe = round_pgdown(pe);
470 init_heap_pages(MEMZONE_DOM, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
471 }
474 struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
475 {
476 struct pfn_info *pg;
477 unsigned long mask = 0;
478 int i;
480 ASSERT(!in_irq());
482 if ( unlikely((pg = alloc_heap_pages(MEMZONE_DOM, order)) == NULL) )
483 return NULL;
485 for ( i = 0; i < (1 << order); i++ )
486 {
487 mask |= tlbflush_filter_cpuset(
488 pg[i].u.free.cpu_mask & ~mask, pg[i].tlbflush_timestamp);
490 pg[i].count_info = 0;
491 pg[i].u.inuse._domain = 0;
492 pg[i].u.inuse.type_info = 0;
493 }
495 if ( unlikely(mask != 0) )
496 {
497 perfc_incrc(need_flush_tlb_flush);
498 flush_tlb_mask(mask);
499 }
501 if ( d == NULL )
502 return pg;
504 spin_lock(&d->page_alloc_lock);
506 if ( unlikely(test_bit(DF_DYING, &d->d_flags)) ||
507 unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
508 {
509 DPRINTK("Over-allocation for domain %u: %u > %u\n",
510 d->id, d->tot_pages + (1 << order), d->max_pages);
511 DPRINTK("...or the domain is dying (%d)\n",
512 !!test_bit(DF_DYING, &d->d_flags));
513 spin_unlock(&d->page_alloc_lock);
514 free_heap_pages(MEMZONE_DOM, pg, order);
515 return NULL;
516 }
518 if ( unlikely(d->tot_pages == 0) )
519 get_knownalive_domain(d);
521 d->tot_pages += 1 << order;
523 for ( i = 0; i < (1 << order); i++ )
524 {
525 page_set_owner(&pg[i], d);
526 wmb(); /* Domain pointer must be visible before updating refcnt. */
527 pg[i].count_info |= PGC_allocated | 1;
528 list_add_tail(&pg[i].list, &d->page_list);
529 }
531 spin_unlock(&d->page_alloc_lock);
533 return pg;
534 }
537 void free_domheap_pages(struct pfn_info *pg, unsigned int order)
538 {
539 int i, drop_dom_ref;
540 struct domain *d = page_get_owner(pg);
542 ASSERT(!in_irq());
544 if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
545 {
546 /* NB. May recursively lock from relinquish_memory(). */
547 spin_lock_recursive(&d->page_alloc_lock);
549 for ( i = 0; i < (1 << order); i++ )
550 list_del(&pg[i].list);
552 d->xenheap_pages -= 1 << order;
553 drop_dom_ref = (d->xenheap_pages == 0);
555 spin_unlock_recursive(&d->page_alloc_lock);
556 }
557 else if ( likely(d != NULL) )
558 {
559 /* NB. May recursively lock from relinquish_memory(). */
560 spin_lock_recursive(&d->page_alloc_lock);
562 for ( i = 0; i < (1 << order); i++ )
563 {
564 shadow_drop_references(d, &pg[i]);
565 ASSERT(((pg[i].u.inuse.type_info & PGT_count_mask) == 0) ||
566 shadow_tainted_refcnts(d));
567 pg[i].tlbflush_timestamp = tlbflush_current_time();
568 pg[i].u.free.cpu_mask = d->cpuset;
569 list_del(&pg[i].list);
570 }
572 d->tot_pages -= 1 << order;
573 drop_dom_ref = (d->tot_pages == 0);
575 spin_unlock_recursive(&d->page_alloc_lock);
577 if ( likely(!test_bit(DF_DYING, &d->d_flags)) )
578 {
579 free_heap_pages(MEMZONE_DOM, pg, order);
580 }
581 else
582 {
583 /*
584 * Normally we expect a domain to clear pages before freeing them,
585 * if it cares about the secrecy of their contents. However, after
586 * a domain has died we assume responsibility for erasure.
587 */
588 for ( i = 0; i < (1 << order); i++ )
589 {
590 spin_lock(&page_scrub_lock);
591 list_add(&pg[i].list, &page_scrub_list);
592 spin_unlock(&page_scrub_lock);
593 }
594 }
595 }
596 else
597 {
598 /* Freeing an anonymous domain-heap page. */
599 free_heap_pages(MEMZONE_DOM, pg, order);
600 drop_dom_ref = 0;
601 }
603 if ( drop_dom_ref )
604 put_domain(d);
605 }
608 unsigned long avail_domheap_pages(void)
609 {
610 return avail[MEMZONE_DOM];
611 }
615 /*************************
616 * PAGE SCRUBBING
617 */
619 static void page_scrub_softirq(void)
620 {
621 struct list_head *ent;
622 struct pfn_info *pg;
623 void *p;
624 int i;
625 s_time_t start = NOW();
627 /* Aim to do 1ms of work (ten percent of a 10ms jiffy). */
628 do {
629 spin_lock(&page_scrub_lock);
631 if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
632 {
633 spin_unlock(&page_scrub_lock);
634 return;
635 }
637 /* Peel up to 16 pages from the list. */
638 for ( i = 0; i < 16; i++ )
639 {
640 if ( ent->next == &page_scrub_list )
641 break;
642 ent = ent->next;
643 }
645 /* Remove peeled pages from the list. */
646 ent->next->prev = &page_scrub_list;
647 page_scrub_list.next = ent->next;
649 spin_unlock(&page_scrub_lock);
651 /* Working backwards, scrub each page in turn. */
652 while ( ent != &page_scrub_list )
653 {
654 pg = list_entry(ent, struct pfn_info, list);
655 ent = ent->prev;
656 p = map_domain_mem(page_to_phys(pg));
657 clear_page(p);
658 unmap_domain_mem(p);
659 free_heap_pages(MEMZONE_DOM, pg, 0);
660 }
661 } while ( (NOW() - start) < MILLISECS(1) );
662 }
664 static __init int page_scrub_init(void)
665 {
666 spin_lock_init(&page_scrub_lock);
667 INIT_LIST_HEAD(&page_scrub_list);
668 open_softirq(PAGE_SCRUB_SOFTIRQ, page_scrub_softirq);
669 return 0;
670 }
671 __initcall(page_scrub_init);
673 /*
674 * Local variables:
675 * mode: C
676 * c-set-style: "BSD"
677 * c-basic-offset: 4
678 * tab-width: 4
679 * indent-tabs-mode: nil
680 * End:
681 */