debuggers.hg

view xen/common/page_alloc.c @ 3670:5f3b5a2eb615

bitkeeper revision 1.1159.223.62 (4202591aKU7cGiiCesn0hIfz1J0vUQ)

Fix bootmem allocator.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Thu Feb 03 17:02:18 2005 +0000 (2005-02-03)
parents 04f50d583813
children 1c43dbcfc46f 02e17ce32b91
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
23 #include <xen/config.h>
24 #include <xen/init.h>
25 #include <xen/types.h>
26 #include <xen/lib.h>
27 #include <asm/page.h>
28 #include <xen/spinlock.h>
29 #include <xen/slab.h>
30 #include <xen/irq.h>
31 #include <asm/domain_page.h>
33 /*
34 * Comma-separated list of hexadecimal page numbers containing bad bytes.
35 * e.g. 'badpage=0x3f45,0x8a321'.
36 */
37 static char opt_badpage[100] = "";
38 string_param("badpage", opt_badpage);
40 #define round_pgdown(_p) ((_p)&PAGE_MASK)
41 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
43 /*********************
44 * ALLOCATION BITMAP
45 * One bit per page of memory. Bit set => page is allocated.
46 */
48 static unsigned long bitmap_size; /* in bytes */
49 static unsigned long *alloc_bitmap;
50 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
52 #define allocated_in_map(_pn) \
53 (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & (1<<((_pn)&(PAGES_PER_MAPWORD-1))))
55 /*
56 * Hint regarding bitwise arithmetic in map_{alloc,free}:
57 * -(1<<n) sets all bits >= n.
58 * (1<<n)-1 sets all bits < n.
59 * Variable names in map_{alloc,free}:
60 * *_idx == Index into `alloc_bitmap' array.
61 * *_off == Bit offset within an element of the `alloc_bitmap' array.
62 */
64 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
65 {
66 unsigned long start_off, end_off, curr_idx, end_idx;
68 #ifndef NDEBUG
69 unsigned long i;
70 /* Check that the block isn't already allocated. */
71 for ( i = 0; i < nr_pages; i++ )
72 ASSERT(!allocated_in_map(first_page + i));
73 #endif
75 curr_idx = first_page / PAGES_PER_MAPWORD;
76 start_off = first_page & (PAGES_PER_MAPWORD-1);
77 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
78 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
80 if ( curr_idx == end_idx )
81 {
82 alloc_bitmap[curr_idx] |= ((1<<end_off)-1) & -(1<<start_off);
83 }
84 else
85 {
86 alloc_bitmap[curr_idx] |= -(1<<start_off);
87 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0L;
88 alloc_bitmap[curr_idx] |= (1<<end_off)-1;
89 }
90 }
93 static void map_free(unsigned long first_page, unsigned long nr_pages)
94 {
95 unsigned long start_off, end_off, curr_idx, end_idx;
97 #ifndef NDEBUG
98 unsigned long i;
99 /* Check that the block isn't already freed. */
100 for ( i = 0; i < nr_pages; i++ )
101 ASSERT(allocated_in_map(first_page + i));
102 #endif
104 curr_idx = first_page / PAGES_PER_MAPWORD;
105 start_off = first_page & (PAGES_PER_MAPWORD-1);
106 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
107 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
109 if ( curr_idx == end_idx )
110 {
111 alloc_bitmap[curr_idx] &= -(1<<end_off) | ((1<<start_off)-1);
112 }
113 else
114 {
115 alloc_bitmap[curr_idx] &= (1<<start_off)-1;
116 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
117 alloc_bitmap[curr_idx] &= -(1<<end_off);
118 }
119 }
123 /*************************
124 * BOOT-TIME ALLOCATOR
125 */
127 /* Initialise allocator to handle up to @max_page pages. */
128 unsigned long init_boot_allocator(unsigned long bitmap_start)
129 {
130 bitmap_start = round_pgup(bitmap_start);
132 /* Allocate space for the allocation bitmap. */
133 bitmap_size = max_page / 8;
134 bitmap_size = round_pgup(bitmap_size);
135 alloc_bitmap = (unsigned long *)phys_to_virt(bitmap_start);
137 /* All allocated by default. */
138 memset(alloc_bitmap, ~0, bitmap_size);
140 return bitmap_start + bitmap_size;
141 }
143 void init_boot_pages(unsigned long ps, unsigned long pe)
144 {
145 unsigned long bad_pfn;
146 char *p;
148 ps = round_pgup(ps);
149 pe = round_pgdown(pe);
151 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
153 /* Check new pages against the bad-page list. */
154 p = opt_badpage;
155 while ( *p != '\0' )
156 {
157 bad_pfn = simple_strtoul(p, &p, 0);
159 if ( *p == ',' )
160 p++;
161 else if ( *p != '\0' )
162 break;
164 if ( (bad_pfn < (bitmap_size*8)) && !allocated_in_map(bad_pfn) )
165 {
166 printk("Marking page %08lx as bad\n", bad_pfn);
167 map_alloc(bad_pfn, 1);
168 }
169 }
170 }
172 unsigned long alloc_boot_pages(unsigned long size, unsigned long align)
173 {
174 unsigned long pg, i;
176 size = round_pgup(size) >> PAGE_SHIFT;
177 align = round_pgup(align) >> PAGE_SHIFT;
179 for ( pg = 0; (pg + size) < (bitmap_size*8); pg += align )
180 {
181 for ( i = 0; i < size; i++ )
182 if ( allocated_in_map(pg + i) )
183 break;
185 if ( i == size )
186 {
187 map_alloc(pg, size);
188 return pg << PAGE_SHIFT;
189 }
190 }
192 return 0;
193 }
197 /*************************
198 * BINARY BUDDY ALLOCATOR
199 */
201 #define MEMZONE_XEN 0
202 #define MEMZONE_DOM 1
203 #define NR_ZONES 2
205 /* Up to 2^10 pages can be allocated at once. */
206 #define MAX_ORDER 10
207 static struct list_head heap[NR_ZONES][MAX_ORDER+1];
209 static unsigned long avail[NR_ZONES];
211 static spinlock_t heap_lock = SPIN_LOCK_UNLOCKED;
213 void end_boot_allocator(void)
214 {
215 unsigned long i, j;
216 int curr_free = 0, next_free = 0;
218 memset(avail, 0, sizeof(avail));
220 for ( i = 0; i < NR_ZONES; i++ )
221 for ( j = 0; j <= MAX_ORDER; j++ )
222 INIT_LIST_HEAD(&heap[i][j]);
224 /* Pages that are free now go to the domain sub-allocator. */
225 for ( i = 0; i < max_page; i++ )
226 {
227 curr_free = next_free;
228 next_free = !allocated_in_map(i+1);
229 if ( next_free )
230 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
231 if ( curr_free )
232 free_heap_pages(MEMZONE_DOM, pfn_to_page(i), 0);
233 }
234 }
236 /* Hand the specified arbitrary page range to the specified heap zone. */
237 void init_heap_pages(
238 unsigned int zone, struct pfn_info *pg, unsigned long nr_pages)
239 {
240 unsigned long i;
242 ASSERT(zone < NR_ZONES);
244 for ( i = 0; i < nr_pages; i++ )
245 free_heap_pages(zone, pg+i, 0);
246 }
249 /* Allocate 2^@order contiguous pages. */
250 struct pfn_info *alloc_heap_pages(unsigned int zone, unsigned int order)
251 {
252 int i;
253 struct pfn_info *pg;
255 ASSERT(zone < NR_ZONES);
257 if ( unlikely(order > MAX_ORDER) )
258 return NULL;
260 spin_lock(&heap_lock);
262 /* Find smallest order which can satisfy the request. */
263 for ( i = order; i <= MAX_ORDER; i++ )
264 if ( !list_empty(&heap[zone][i]) )
265 goto found;
267 /* No suitable memory blocks. Fail the request. */
268 spin_unlock(&heap_lock);
269 return NULL;
271 found:
272 pg = list_entry(heap[zone][i].next, struct pfn_info, list);
273 list_del(&pg->list);
275 /* We may have to halve the chunk a number of times. */
276 while ( i != order )
277 {
278 PFN_ORDER(pg) = --i;
279 list_add_tail(&pg->list, &heap[zone][i]);
280 pg += 1 << i;
281 }
283 map_alloc(page_to_pfn(pg), 1 << order);
284 avail[zone] -= 1 << order;
286 spin_unlock(&heap_lock);
288 return pg;
289 }
292 /* Free 2^@order set of pages. */
293 void free_heap_pages(
294 unsigned int zone, struct pfn_info *pg, unsigned int order)
295 {
296 unsigned long mask;
298 ASSERT(zone < NR_ZONES);
299 ASSERT(order <= MAX_ORDER);
301 spin_lock(&heap_lock);
303 map_free(page_to_pfn(pg), 1 << order);
304 avail[zone] += 1 << order;
306 /* Merge chunks as far as possible. */
307 while ( order < MAX_ORDER )
308 {
309 mask = 1 << order;
311 if ( (page_to_pfn(pg) & mask) )
312 {
313 /* Merge with predecessor block? */
314 if ( allocated_in_map(page_to_pfn(pg)-mask) ||
315 (PFN_ORDER(pg-mask) != order) )
316 break;
317 list_del(&(pg-mask)->list);
318 pg -= mask;
319 }
320 else
321 {
322 /* Merge with successor block? */
323 if ( allocated_in_map(page_to_pfn(pg)+mask) ||
324 (PFN_ORDER(pg+mask) != order) )
325 break;
326 list_del(&(pg+mask)->list);
327 }
329 order++;
330 }
332 PFN_ORDER(pg) = order;
333 list_add_tail(&pg->list, &heap[zone][order]);
335 spin_unlock(&heap_lock);
336 }
339 /*
340 * Scrub all unallocated pages in all heap zones. This function is more
341 * convoluted than appears necessary because we do not want to continuously
342 * hold the lock or disable interrupts while scrubbing very large memory areas.
343 */
344 void scrub_heap_pages(void)
345 {
346 void *p;
347 unsigned long pfn, flags;
349 printk("Scrubbing Free RAM: ");
351 for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
352 {
353 /* Every 100MB, print a progress dot and appease the watchdog. */
354 if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
355 {
356 printk(".");
357 touch_nmi_watchdog();
358 }
360 /* Quick lock-free check. */
361 if ( allocated_in_map(pfn) )
362 continue;
364 spin_lock_irqsave(&heap_lock, flags);
366 /* Re-check page status with lock held. */
367 if ( !allocated_in_map(pfn) )
368 {
369 p = map_domain_mem(pfn << PAGE_SHIFT);
370 clear_page(p);
371 unmap_domain_mem(p);
372 }
374 spin_unlock_irqrestore(&heap_lock, flags);
375 }
377 printk("done.\n");
378 }
382 /*************************
383 * XEN-HEAP SUB-ALLOCATOR
384 */
386 void init_xenheap_pages(unsigned long ps, unsigned long pe)
387 {
388 unsigned long flags;
390 ps = round_pgup(ps);
391 pe = round_pgdown(pe);
393 memguard_guard_range(__va(ps), pe - ps);
395 local_irq_save(flags);
396 init_heap_pages(MEMZONE_XEN, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
397 local_irq_restore(flags);
398 }
401 unsigned long alloc_xenheap_pages(unsigned int order)
402 {
403 unsigned long flags;
404 struct pfn_info *pg;
405 int i, attempts = 0;
407 retry:
408 local_irq_save(flags);
409 pg = alloc_heap_pages(MEMZONE_XEN, order);
410 local_irq_restore(flags);
412 if ( unlikely(pg == NULL) )
413 goto no_memory;
415 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
417 for ( i = 0; i < (1 << order); i++ )
418 {
419 pg[i].count_info = 0;
420 pg[i].u.inuse.domain = NULL;
421 pg[i].u.inuse.type_info = 0;
422 }
424 return (unsigned long)page_to_virt(pg);
426 no_memory:
427 if ( attempts++ < 8 )
428 {
429 xmem_cache_reap();
430 goto retry;
431 }
433 printk("Cannot handle page request order %d!\n", order);
434 dump_slabinfo();
435 return 0;
436 }
439 void free_xenheap_pages(unsigned long p, unsigned int order)
440 {
441 unsigned long flags;
443 memguard_guard_range((void *)p, 1 << (order + PAGE_SHIFT));
445 local_irq_save(flags);
446 free_heap_pages(MEMZONE_XEN, virt_to_page(p), order);
447 local_irq_restore(flags);
448 }
452 /*************************
453 * DOMAIN-HEAP SUB-ALLOCATOR
454 */
456 void init_domheap_pages(unsigned long ps, unsigned long pe)
457 {
458 ASSERT(!in_irq());
460 ps = round_pgup(ps);
461 pe = round_pgdown(pe);
463 init_heap_pages(MEMZONE_DOM, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
464 }
467 struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
468 {
469 struct pfn_info *pg;
470 unsigned long mask, flushed_mask, pfn_stamp, cpu_stamp;
471 int i, j;
473 ASSERT(!in_irq());
475 if ( unlikely((pg = alloc_heap_pages(MEMZONE_DOM, order)) == NULL) )
476 return NULL;
478 flushed_mask = 0;
479 for ( i = 0; i < (1 << order); i++ )
480 {
481 if ( (mask = (pg[i].u.free.cpu_mask & ~flushed_mask)) != 0 )
482 {
483 pfn_stamp = pg[i].tlbflush_timestamp;
484 for ( j = 0; (mask != 0) && (j < smp_num_cpus); j++ )
485 {
486 if ( mask & (1<<j) )
487 {
488 cpu_stamp = tlbflush_time[j];
489 if ( !NEED_FLUSH(cpu_stamp, pfn_stamp) )
490 mask &= ~(1<<j);
491 }
492 }
494 if ( unlikely(mask != 0) )
495 {
496 flush_tlb_mask(mask);
497 perfc_incrc(need_flush_tlb_flush);
498 flushed_mask |= mask;
499 }
500 }
502 pg[i].count_info = 0;
503 pg[i].u.inuse.domain = NULL;
504 pg[i].u.inuse.type_info = 0;
505 }
507 if ( d == NULL )
508 return pg;
510 spin_lock(&d->page_alloc_lock);
512 if ( unlikely(test_bit(DF_DYING, &d->flags)) ||
513 unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
514 {
515 DPRINTK("Over-allocation for domain %u: %u > %u\n",
516 d->id, d->tot_pages + (1 << order), d->max_pages);
517 DPRINTK("...or the domain is dying (%d)\n",
518 !!test_bit(DF_DYING, &d->flags));
519 spin_unlock(&d->page_alloc_lock);
520 free_heap_pages(MEMZONE_DOM, pg, order);
521 return NULL;
522 }
524 if ( unlikely(d->tot_pages == 0) )
525 get_knownalive_domain(d);
527 d->tot_pages += 1 << order;
529 for ( i = 0; i < (1 << order); i++ )
530 {
531 pg[i].u.inuse.domain = d;
532 wmb(); /* Domain pointer must be visible before updating refcnt. */
533 pg[i].count_info |= PGC_allocated | 1;
534 list_add_tail(&pg[i].list, &d->page_list);
535 }
537 spin_unlock(&d->page_alloc_lock);
539 return pg;
540 }
543 void free_domheap_pages(struct pfn_info *pg, unsigned int order)
544 {
545 int i, drop_dom_ref;
546 struct domain *d = pg->u.inuse.domain;
547 void *p;
549 ASSERT(!in_irq());
551 if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
552 {
553 /* NB. May recursively lock from domain_relinquish_memory(). */
554 spin_lock_recursive(&d->page_alloc_lock);
556 for ( i = 0; i < (1 << order); i++ )
557 list_del(&pg[i].list);
559 d->xenheap_pages -= 1 << order;
560 drop_dom_ref = (d->xenheap_pages == 0);
562 spin_unlock_recursive(&d->page_alloc_lock);
563 }
564 else if ( likely(d != NULL) )
565 {
566 /* NB. May recursively lock from domain_relinquish_memory(). */
567 spin_lock_recursive(&d->page_alloc_lock);
569 for ( i = 0; i < (1 << order); i++ )
570 {
571 ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
572 pg[i].tlbflush_timestamp = tlbflush_current_time();
573 pg[i].u.free.cpu_mask = 1 << d->processor;
574 list_del(&pg[i].list);
576 /*
577 * Normally we expect a domain to clear pages before freeing them,
578 * if it cares about the secrecy of their contents. However, after
579 * a domain has died we assume responsibility for erasure.
580 */
581 if ( unlikely(test_bit(DF_DYING, &d->flags)) )
582 {
583 p = map_domain_mem(page_to_phys(&pg[i]));
584 clear_page(p);
585 unmap_domain_mem(p);
586 }
587 }
589 d->tot_pages -= 1 << order;
590 drop_dom_ref = (d->tot_pages == 0);
592 spin_unlock_recursive(&d->page_alloc_lock);
594 free_heap_pages(MEMZONE_DOM, pg, order);
595 }
596 else
597 {
598 /* Freeing an anonymous domain-heap page. */
599 free_heap_pages(MEMZONE_DOM, pg, order);
600 drop_dom_ref = 0;
601 }
603 if ( drop_dom_ref )
604 put_domain(d);
605 }
608 unsigned long avail_domheap_pages(void)
609 {
610 return avail[MEMZONE_DOM];
611 }