debuggers.hg

view xen/common/page_alloc.c @ 3620:6d98eb831816

bitkeeper revision 1.1159.212.52 (41fa6980PfhDt-hKCfacnyHcFB7DNQ)

Make page allocator 64-bit safe.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Fri Jan 28 16:34:08 2005 +0000 (2005-01-28)
parents 1f6bfd28d0c6
children 49103eca5edb 0ef6e8e6e85d
line source
1 /******************************************************************************
2 * page_alloc.c
3 *
4 * Simple buddy heap allocator for Xen.
5 *
6 * Copyright (c) 2002-2004 K A Fraser
7 *
8 * This program is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2 of the License, or
11 * (at your option) any later version.
12 *
13 * This program is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with this program; if not, write to the Free Software
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
23 #include <xen/config.h>
24 #include <xen/init.h>
25 #include <xen/types.h>
26 #include <xen/lib.h>
27 #include <asm/page.h>
28 #include <xen/spinlock.h>
29 #include <xen/slab.h>
30 #include <xen/irq.h>
31 #include <asm/domain_page.h>
33 /*
34 * Comma-separated list of hexadecimal page numbers containing bad bytes.
35 * e.g. 'badpage=0x3f45,0x8a321'.
36 */
37 static char opt_badpage[100] = "";
38 string_param("badpage", opt_badpage);
40 #define round_pgdown(_p) ((_p)&PAGE_MASK)
41 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
43 /*********************
44 * ALLOCATION BITMAP
45 * One bit per page of memory. Bit set => page is allocated.
46 */
48 static unsigned long bitmap_size; /* in bytes */
49 static unsigned long *alloc_bitmap;
50 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
52 #define allocated_in_map(_pn) \
53 ( !! (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & \
54 (1UL<<((_pn)&(PAGES_PER_MAPWORD-1)))) )
56 /*
57 * Hint regarding bitwise arithmetic in map_{alloc,free}:
58 * -(1<<n) sets all bits >= n.
59 * (1<<n)-1 sets all bits < n.
60 * Variable names in map_{alloc,free}:
61 * *_idx == Index into `alloc_bitmap' array.
62 * *_off == Bit offset within an element of the `alloc_bitmap' array.
63 */
65 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
66 {
67 unsigned long start_off, end_off, curr_idx, end_idx;
69 #ifndef NDEBUG
70 unsigned long i;
71 /* Check that the block isn't already allocated. */
72 for ( i = 0; i < nr_pages; i++ )
73 ASSERT(!allocated_in_map(first_page + i));
74 #endif
76 curr_idx = first_page / PAGES_PER_MAPWORD;
77 start_off = first_page & (PAGES_PER_MAPWORD-1);
78 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
79 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
81 if ( curr_idx == end_idx )
82 {
83 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
84 }
85 else
86 {
87 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
88 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
89 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
90 }
91 }
94 static void map_free(unsigned long first_page, unsigned long nr_pages)
95 {
96 unsigned long start_off, end_off, curr_idx, end_idx;
98 #ifndef NDEBUG
99 unsigned long i;
100 /* Check that the block isn't already freed. */
101 for ( i = 0; i < nr_pages; i++ )
102 ASSERT(allocated_in_map(first_page + i));
103 #endif
105 curr_idx = first_page / PAGES_PER_MAPWORD;
106 start_off = first_page & (PAGES_PER_MAPWORD-1);
107 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
108 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
110 if ( curr_idx == end_idx )
111 {
112 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
113 }
114 else
115 {
116 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
117 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
118 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
119 }
120 }
124 /*************************
125 * BOOT-TIME ALLOCATOR
126 */
128 /* Initialise allocator to handle up to @max_page pages. */
129 unsigned long init_boot_allocator(unsigned long bitmap_start)
130 {
131 bitmap_start = round_pgup(bitmap_start);
133 /* Allocate space for the allocation bitmap. */
134 bitmap_size = max_page / 8;
135 bitmap_size = round_pgup(bitmap_size);
136 alloc_bitmap = (unsigned long *)phys_to_virt(bitmap_start);
138 /* All allocated by default. */
139 memset(alloc_bitmap, ~0, bitmap_size);
141 return bitmap_start + bitmap_size;
142 }
144 void init_boot_pages(unsigned long ps, unsigned long pe)
145 {
146 unsigned long bad_pfn;
147 char *p;
149 ps = round_pgup(ps);
150 pe = round_pgdown(pe);
152 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
154 /* Check new pages against the bad-page list. */
155 p = opt_badpage;
156 while ( *p != '\0' )
157 {
158 bad_pfn = simple_strtoul(p, &p, 0);
160 if ( *p == ',' )
161 p++;
162 else if ( *p != '\0' )
163 break;
165 if ( (bad_pfn < (bitmap_size*8)) && !allocated_in_map(bad_pfn) )
166 {
167 printk("Marking page %08lx as bad\n", bad_pfn);
168 map_alloc(bad_pfn, 1);
169 }
170 }
171 }
173 unsigned long alloc_boot_pages(unsigned long size, unsigned long align)
174 {
175 unsigned long pg, i;
177 size = round_pgup(size) >> PAGE_SHIFT;
178 align = round_pgup(align) >> PAGE_SHIFT;
180 for ( pg = 0; (pg + size) < (bitmap_size*PAGES_PER_MAPWORD); pg += align )
181 {
182 for ( i = 0; i < size; i++ )
183 if ( allocated_in_map(pg + i) )
184 break;
186 if ( i == size )
187 {
188 map_alloc(pg, size);
189 return pg << PAGE_SHIFT;
190 }
191 }
193 return 0;
194 }
198 /*************************
199 * BINARY BUDDY ALLOCATOR
200 */
202 #define MEMZONE_XEN 0
203 #define MEMZONE_DOM 1
204 #define NR_ZONES 2
206 /* Up to 2^10 pages can be allocated at once. */
207 #define MAX_ORDER 10
208 static struct list_head heap[NR_ZONES][MAX_ORDER+1];
210 static unsigned long avail[NR_ZONES];
212 static spinlock_t heap_lock = SPIN_LOCK_UNLOCKED;
214 void end_boot_allocator(void)
215 {
216 unsigned long i, j;
217 int curr_free = 0, next_free = 0;
219 memset(avail, 0, sizeof(avail));
221 for ( i = 0; i < NR_ZONES; i++ )
222 for ( j = 0; j <= MAX_ORDER; j++ )
223 INIT_LIST_HEAD(&heap[i][j]);
225 /* Pages that are free now go to the domain sub-allocator. */
226 for ( i = 0; i < max_page; i++ )
227 {
228 curr_free = next_free;
229 next_free = !allocated_in_map(i+1);
230 if ( next_free )
231 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
232 if ( curr_free )
233 free_heap_pages(MEMZONE_DOM, pfn_to_page(i), 0);
234 }
235 }
237 /* Hand the specified arbitrary page range to the specified heap zone. */
238 void init_heap_pages(
239 unsigned int zone, struct pfn_info *pg, unsigned long nr_pages)
240 {
241 unsigned long i;
243 ASSERT(zone < NR_ZONES);
245 for ( i = 0; i < nr_pages; i++ )
246 free_heap_pages(zone, pg+i, 0);
247 }
250 /* Allocate 2^@order contiguous pages. */
251 struct pfn_info *alloc_heap_pages(unsigned int zone, unsigned int order)
252 {
253 int i;
254 struct pfn_info *pg;
256 ASSERT(zone < NR_ZONES);
258 if ( unlikely(order > MAX_ORDER) )
259 return NULL;
261 spin_lock(&heap_lock);
263 /* Find smallest order which can satisfy the request. */
264 for ( i = order; i <= MAX_ORDER; i++ )
265 if ( !list_empty(&heap[zone][i]) )
266 goto found;
268 /* No suitable memory blocks. Fail the request. */
269 spin_unlock(&heap_lock);
270 return NULL;
272 found:
273 pg = list_entry(heap[zone][i].next, struct pfn_info, list);
274 list_del(&pg->list);
276 /* We may have to halve the chunk a number of times. */
277 while ( i != order )
278 {
279 PFN_ORDER(pg) = --i;
280 list_add_tail(&pg->list, &heap[zone][i]);
281 pg += 1 << i;
282 }
284 map_alloc(page_to_pfn(pg), 1 << order);
285 avail[zone] -= 1 << order;
287 spin_unlock(&heap_lock);
289 return pg;
290 }
293 /* Free 2^@order set of pages. */
294 void free_heap_pages(
295 unsigned int zone, struct pfn_info *pg, unsigned int order)
296 {
297 unsigned long mask;
299 ASSERT(zone < NR_ZONES);
300 ASSERT(order <= MAX_ORDER);
302 spin_lock(&heap_lock);
304 map_free(page_to_pfn(pg), 1 << order);
305 avail[zone] += 1 << order;
307 /* Merge chunks as far as possible. */
308 while ( order < MAX_ORDER )
309 {
310 mask = 1 << order;
312 if ( (page_to_pfn(pg) & mask) )
313 {
314 /* Merge with predecessor block? */
315 if ( allocated_in_map(page_to_pfn(pg)-mask) ||
316 (PFN_ORDER(pg-mask) != order) )
317 break;
318 list_del(&(pg-mask)->list);
319 pg -= mask;
320 }
321 else
322 {
323 /* Merge with successor block? */
324 if ( allocated_in_map(page_to_pfn(pg)+mask) ||
325 (PFN_ORDER(pg+mask) != order) )
326 break;
327 list_del(&(pg+mask)->list);
328 }
330 order++;
331 }
333 PFN_ORDER(pg) = order;
334 list_add_tail(&pg->list, &heap[zone][order]);
336 spin_unlock(&heap_lock);
337 }
340 /*
341 * Scrub all unallocated pages in all heap zones. This function is more
342 * convoluted than appears necessary because we do not want to continuously
343 * hold the lock or disable interrupts while scrubbing very large memory areas.
344 */
345 void scrub_heap_pages(void)
346 {
347 void *p;
348 unsigned long pfn, flags;
350 printk("Scrubbing Free RAM: ");
352 for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
353 {
354 /* Every 100MB, print a progress dot and appease the watchdog. */
355 if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
356 {
357 printk(".");
358 touch_nmi_watchdog();
359 }
361 /* Quick lock-free check. */
362 if ( allocated_in_map(pfn) )
363 continue;
365 spin_lock_irqsave(&heap_lock, flags);
367 /* Re-check page status with lock held. */
368 if ( !allocated_in_map(pfn) )
369 {
370 p = map_domain_mem(pfn << PAGE_SHIFT);
371 clear_page(p);
372 unmap_domain_mem(p);
373 }
375 spin_unlock_irqrestore(&heap_lock, flags);
376 }
378 printk("done.\n");
379 }
383 /*************************
384 * XEN-HEAP SUB-ALLOCATOR
385 */
387 void init_xenheap_pages(unsigned long ps, unsigned long pe)
388 {
389 unsigned long flags;
391 ps = round_pgup(ps);
392 pe = round_pgdown(pe);
394 memguard_guard_range(__va(ps), pe - ps);
396 local_irq_save(flags);
397 init_heap_pages(MEMZONE_XEN, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
398 local_irq_restore(flags);
399 }
402 unsigned long alloc_xenheap_pages(unsigned int order)
403 {
404 unsigned long flags;
405 struct pfn_info *pg;
406 int i, attempts = 0;
408 retry:
409 local_irq_save(flags);
410 pg = alloc_heap_pages(MEMZONE_XEN, order);
411 local_irq_restore(flags);
413 if ( unlikely(pg == NULL) )
414 goto no_memory;
416 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
418 for ( i = 0; i < (1 << order); i++ )
419 {
420 pg[i].count_info = 0;
421 pg[i].u.inuse.domain = NULL;
422 pg[i].u.inuse.type_info = 0;
423 }
425 return (unsigned long)page_to_virt(pg);
427 no_memory:
428 if ( attempts++ < 8 )
429 {
430 xmem_cache_reap();
431 goto retry;
432 }
434 printk("Cannot handle page request order %d!\n", order);
435 dump_slabinfo();
436 return 0;
437 }
440 void free_xenheap_pages(unsigned long p, unsigned int order)
441 {
442 unsigned long flags;
444 memguard_guard_range((void *)p, 1 << (order + PAGE_SHIFT));
446 local_irq_save(flags);
447 free_heap_pages(MEMZONE_XEN, virt_to_page(p), order);
448 local_irq_restore(flags);
449 }
453 /*************************
454 * DOMAIN-HEAP SUB-ALLOCATOR
455 */
457 void init_domheap_pages(unsigned long ps, unsigned long pe)
458 {
459 ASSERT(!in_irq());
461 ps = round_pgup(ps);
462 pe = round_pgdown(pe);
464 init_heap_pages(MEMZONE_DOM, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
465 }
468 struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
469 {
470 struct pfn_info *pg;
471 unsigned long mask, flushed_mask, pfn_stamp, cpu_stamp;
472 int i, j;
474 ASSERT(!in_irq());
476 if ( unlikely((pg = alloc_heap_pages(MEMZONE_DOM, order)) == NULL) )
477 return NULL;
479 flushed_mask = 0;
480 for ( i = 0; i < (1 << order); i++ )
481 {
482 if ( (mask = (pg[i].u.free.cpu_mask & ~flushed_mask)) != 0 )
483 {
484 pfn_stamp = pg[i].tlbflush_timestamp;
485 for ( j = 0; (mask != 0) && (j < smp_num_cpus); j++ )
486 {
487 if ( mask & (1UL<<j) )
488 {
489 cpu_stamp = tlbflush_time[j];
490 if ( !NEED_FLUSH(cpu_stamp, pfn_stamp) )
491 mask &= ~(1UL<<j);
492 }
493 }
495 if ( unlikely(mask != 0) )
496 {
497 flush_tlb_mask(mask);
498 perfc_incrc(need_flush_tlb_flush);
499 flushed_mask |= mask;
500 }
501 }
503 pg[i].count_info = 0;
504 pg[i].u.inuse.domain = NULL;
505 pg[i].u.inuse.type_info = 0;
506 }
508 if ( d == NULL )
509 return pg;
511 spin_lock(&d->page_alloc_lock);
513 if ( unlikely(test_bit(DF_DYING, &d->d_flags)) ||
514 unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
515 {
516 DPRINTK("Over-allocation for domain %u: %u > %u\n",
517 d->id, d->tot_pages + (1 << order), d->max_pages);
518 DPRINTK("...or the domain is dying (%d)\n",
519 !!test_bit(DF_DYING, &d->d_flags));
520 spin_unlock(&d->page_alloc_lock);
521 free_heap_pages(MEMZONE_DOM, pg, order);
522 return NULL;
523 }
525 if ( unlikely(d->tot_pages == 0) )
526 get_knownalive_domain(d);
528 d->tot_pages += 1 << order;
530 for ( i = 0; i < (1 << order); i++ )
531 {
532 pg[i].u.inuse.domain = d;
533 wmb(); /* Domain pointer must be visible before updating refcnt. */
534 pg[i].count_info |= PGC_allocated | 1;
535 list_add_tail(&pg[i].list, &d->page_list);
536 }
538 spin_unlock(&d->page_alloc_lock);
540 return pg;
541 }
544 void free_domheap_pages(struct pfn_info *pg, unsigned int order)
545 {
546 int i, drop_dom_ref;
547 struct domain *d = pg->u.inuse.domain;
548 struct exec_domain *ed;
549 void *p;
550 int cpu_mask = 0;
552 ASSERT(!in_irq());
554 if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
555 {
556 /* NB. May recursively lock from domain_relinquish_memory(). */
557 spin_lock_recursive(&d->page_alloc_lock);
559 for ( i = 0; i < (1 << order); i++ )
560 list_del(&pg[i].list);
562 d->xenheap_pages -= 1 << order;
563 drop_dom_ref = (d->xenheap_pages == 0);
565 spin_unlock_recursive(&d->page_alloc_lock);
566 }
567 else if ( likely(d != NULL) )
568 {
569 /* NB. May recursively lock from domain_relinquish_memory(). */
570 spin_lock_recursive(&d->page_alloc_lock);
572 for_each_exec_domain(d, ed)
573 cpu_mask |= 1 << ed->processor;
575 for ( i = 0; i < (1 << order); i++ )
576 {
577 ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
578 pg[i].tlbflush_timestamp = tlbflush_current_time();
579 pg[i].u.free.cpu_mask = cpu_mask;
580 list_del(&pg[i].list);
582 /*
583 * Normally we expect a domain to clear pages before freeing them,
584 * if it cares about the secrecy of their contents. However, after
585 * a domain has died we assume responsibility for erasure.
586 */
587 if ( unlikely(test_bit(DF_DYING, &d->d_flags)) )
588 {
589 p = map_domain_mem(page_to_phys(&pg[i]));
590 clear_page(p);
591 unmap_domain_mem(p);
592 }
593 }
595 d->tot_pages -= 1 << order;
596 drop_dom_ref = (d->tot_pages == 0);
598 spin_unlock_recursive(&d->page_alloc_lock);
600 free_heap_pages(MEMZONE_DOM, pg, order);
601 }
602 else
603 {
604 /* Freeing an anonymous domain-heap page. */
605 free_heap_pages(MEMZONE_DOM, pg, order);
606 drop_dom_ref = 0;
607 }
609 if ( drop_dom_ref )
610 put_domain(d);
611 }
614 unsigned long avail_domheap_pages(void)
615 {
616 return avail[MEMZONE_DOM];
617 }