debuggers.hg

view xen/common/page_alloc.c @ 3705:4294cfa9fad3

bitkeeper revision 1.1159.212.95 (4204aa0ee0re5Xx1zWrJ9ejxzgRs3w)

Various cleanups. Remove PDB pending simpler GDB stub and/or NetBSD debugger.
Force emacs mode to appropriate tabbing in various files.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Sat Feb 05 11:12:14 2005 +0000 (2005-02-05)
parents 32d29625d39b
children 88957a238191 cb87fd290eb0
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /******************************************************************************
3 * page_alloc.c
4 *
5 * Simple buddy heap allocator for Xen.
6 *
7 * Copyright (c) 2002-2004 K A Fraser
8 *
9 * This program is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * This program is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
24 #include <xen/config.h>
25 #include <xen/init.h>
26 #include <xen/types.h>
27 #include <xen/lib.h>
28 #include <asm/page.h>
29 #include <xen/spinlock.h>
30 #include <xen/slab.h>
31 #include <xen/irq.h>
32 #include <asm/domain_page.h>
34 /*
35 * Comma-separated list of hexadecimal page numbers containing bad bytes.
36 * e.g. 'badpage=0x3f45,0x8a321'.
37 */
38 static char opt_badpage[100] = "";
39 string_param("badpage", opt_badpage);
41 #define round_pgdown(_p) ((_p)&PAGE_MASK)
42 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
44 /*********************
45 * ALLOCATION BITMAP
46 * One bit per page of memory. Bit set => page is allocated.
47 */
49 static unsigned long bitmap_size; /* in bytes */
50 static unsigned long *alloc_bitmap;
51 #define PAGES_PER_MAPWORD (sizeof(unsigned long) * 8)
53 #define allocated_in_map(_pn) \
54 ( !! (alloc_bitmap[(_pn)/PAGES_PER_MAPWORD] & \
55 (1UL<<((_pn)&(PAGES_PER_MAPWORD-1)))) )
57 /*
58 * Hint regarding bitwise arithmetic in map_{alloc,free}:
59 * -(1<<n) sets all bits >= n.
60 * (1<<n)-1 sets all bits < n.
61 * Variable names in map_{alloc,free}:
62 * *_idx == Index into `alloc_bitmap' array.
63 * *_off == Bit offset within an element of the `alloc_bitmap' array.
64 */
66 static void map_alloc(unsigned long first_page, unsigned long nr_pages)
67 {
68 unsigned long start_off, end_off, curr_idx, end_idx;
70 #ifndef NDEBUG
71 unsigned long i;
72 /* Check that the block isn't already allocated. */
73 for ( i = 0; i < nr_pages; i++ )
74 ASSERT(!allocated_in_map(first_page + i));
75 #endif
77 curr_idx = first_page / PAGES_PER_MAPWORD;
78 start_off = first_page & (PAGES_PER_MAPWORD-1);
79 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
80 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
82 if ( curr_idx == end_idx )
83 {
84 alloc_bitmap[curr_idx] |= ((1UL<<end_off)-1) & -(1UL<<start_off);
85 }
86 else
87 {
88 alloc_bitmap[curr_idx] |= -(1UL<<start_off);
89 while ( ++curr_idx < end_idx ) alloc_bitmap[curr_idx] = ~0UL;
90 alloc_bitmap[curr_idx] |= (1UL<<end_off)-1;
91 }
92 }
95 static void map_free(unsigned long first_page, unsigned long nr_pages)
96 {
97 unsigned long start_off, end_off, curr_idx, end_idx;
99 #ifndef NDEBUG
100 unsigned long i;
101 /* Check that the block isn't already freed. */
102 for ( i = 0; i < nr_pages; i++ )
103 ASSERT(allocated_in_map(first_page + i));
104 #endif
106 curr_idx = first_page / PAGES_PER_MAPWORD;
107 start_off = first_page & (PAGES_PER_MAPWORD-1);
108 end_idx = (first_page + nr_pages) / PAGES_PER_MAPWORD;
109 end_off = (first_page + nr_pages) & (PAGES_PER_MAPWORD-1);
111 if ( curr_idx == end_idx )
112 {
113 alloc_bitmap[curr_idx] &= -(1UL<<end_off) | ((1UL<<start_off)-1);
114 }
115 else
116 {
117 alloc_bitmap[curr_idx] &= (1UL<<start_off)-1;
118 while ( ++curr_idx != end_idx ) alloc_bitmap[curr_idx] = 0;
119 alloc_bitmap[curr_idx] &= -(1UL<<end_off);
120 }
121 }
125 /*************************
126 * BOOT-TIME ALLOCATOR
127 */
129 /* Initialise allocator to handle up to @max_page pages. */
130 unsigned long init_boot_allocator(unsigned long bitmap_start)
131 {
132 bitmap_start = round_pgup(bitmap_start);
134 /* Allocate space for the allocation bitmap. */
135 bitmap_size = max_page / 8;
136 bitmap_size = round_pgup(bitmap_size);
137 alloc_bitmap = (unsigned long *)phys_to_virt(bitmap_start);
139 /* All allocated by default. */
140 memset(alloc_bitmap, ~0, bitmap_size);
142 return bitmap_start + bitmap_size;
143 }
145 void init_boot_pages(unsigned long ps, unsigned long pe)
146 {
147 unsigned long bad_pfn;
148 char *p;
150 ps = round_pgup(ps);
151 pe = round_pgdown(pe);
153 map_free(ps >> PAGE_SHIFT, (pe - ps) >> PAGE_SHIFT);
155 /* Check new pages against the bad-page list. */
156 p = opt_badpage;
157 while ( *p != '\0' )
158 {
159 bad_pfn = simple_strtoul(p, &p, 0);
161 if ( *p == ',' )
162 p++;
163 else if ( *p != '\0' )
164 break;
166 if ( (bad_pfn < (bitmap_size*8)) && !allocated_in_map(bad_pfn) )
167 {
168 printk("Marking page %08lx as bad\n", bad_pfn);
169 map_alloc(bad_pfn, 1);
170 }
171 }
172 }
174 unsigned long alloc_boot_pages(unsigned long size, unsigned long align)
175 {
176 unsigned long pg, i;
178 size = round_pgup(size) >> PAGE_SHIFT;
179 align = round_pgup(align) >> PAGE_SHIFT;
181 for ( pg = 0; (pg + size) < (bitmap_size*8); pg += align )
182 {
183 for ( i = 0; i < size; i++ )
184 if ( allocated_in_map(pg + i) )
185 break;
187 if ( i == size )
188 {
189 map_alloc(pg, size);
190 return pg << PAGE_SHIFT;
191 }
192 }
194 return 0;
195 }
199 /*************************
200 * BINARY BUDDY ALLOCATOR
201 */
203 #define MEMZONE_XEN 0
204 #define MEMZONE_DOM 1
205 #define NR_ZONES 2
207 /* Up to 2^10 pages can be allocated at once. */
208 #define MAX_ORDER 10
209 static struct list_head heap[NR_ZONES][MAX_ORDER+1];
211 static unsigned long avail[NR_ZONES];
213 static spinlock_t heap_lock = SPIN_LOCK_UNLOCKED;
215 void end_boot_allocator(void)
216 {
217 unsigned long i, j;
218 int curr_free = 0, next_free = 0;
220 memset(avail, 0, sizeof(avail));
222 for ( i = 0; i < NR_ZONES; i++ )
223 for ( j = 0; j <= MAX_ORDER; j++ )
224 INIT_LIST_HEAD(&heap[i][j]);
226 /* Pages that are free now go to the domain sub-allocator. */
227 for ( i = 0; i < max_page; i++ )
228 {
229 curr_free = next_free;
230 next_free = !allocated_in_map(i+1);
231 if ( next_free )
232 map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
233 if ( curr_free )
234 free_heap_pages(MEMZONE_DOM, pfn_to_page(i), 0);
235 }
236 }
238 /* Hand the specified arbitrary page range to the specified heap zone. */
239 void init_heap_pages(
240 unsigned int zone, struct pfn_info *pg, unsigned long nr_pages)
241 {
242 unsigned long i;
244 ASSERT(zone < NR_ZONES);
246 for ( i = 0; i < nr_pages; i++ )
247 free_heap_pages(zone, pg+i, 0);
248 }
251 /* Allocate 2^@order contiguous pages. */
252 struct pfn_info *alloc_heap_pages(unsigned int zone, unsigned int order)
253 {
254 int i;
255 struct pfn_info *pg;
257 ASSERT(zone < NR_ZONES);
259 if ( unlikely(order > MAX_ORDER) )
260 return NULL;
262 spin_lock(&heap_lock);
264 /* Find smallest order which can satisfy the request. */
265 for ( i = order; i <= MAX_ORDER; i++ )
266 if ( !list_empty(&heap[zone][i]) )
267 goto found;
269 /* No suitable memory blocks. Fail the request. */
270 spin_unlock(&heap_lock);
271 return NULL;
273 found:
274 pg = list_entry(heap[zone][i].next, struct pfn_info, list);
275 list_del(&pg->list);
277 /* We may have to halve the chunk a number of times. */
278 while ( i != order )
279 {
280 PFN_ORDER(pg) = --i;
281 list_add_tail(&pg->list, &heap[zone][i]);
282 pg += 1 << i;
283 }
285 map_alloc(page_to_pfn(pg), 1 << order);
286 avail[zone] -= 1 << order;
288 spin_unlock(&heap_lock);
290 return pg;
291 }
294 /* Free 2^@order set of pages. */
295 void free_heap_pages(
296 unsigned int zone, struct pfn_info *pg, unsigned int order)
297 {
298 unsigned long mask;
300 ASSERT(zone < NR_ZONES);
301 ASSERT(order <= MAX_ORDER);
303 spin_lock(&heap_lock);
305 map_free(page_to_pfn(pg), 1 << order);
306 avail[zone] += 1 << order;
308 /* Merge chunks as far as possible. */
309 while ( order < MAX_ORDER )
310 {
311 mask = 1 << order;
313 if ( (page_to_pfn(pg) & mask) )
314 {
315 /* Merge with predecessor block? */
316 if ( allocated_in_map(page_to_pfn(pg)-mask) ||
317 (PFN_ORDER(pg-mask) != order) )
318 break;
319 list_del(&(pg-mask)->list);
320 pg -= mask;
321 }
322 else
323 {
324 /* Merge with successor block? */
325 if ( allocated_in_map(page_to_pfn(pg)+mask) ||
326 (PFN_ORDER(pg+mask) != order) )
327 break;
328 list_del(&(pg+mask)->list);
329 }
331 order++;
332 }
334 PFN_ORDER(pg) = order;
335 list_add_tail(&pg->list, &heap[zone][order]);
337 spin_unlock(&heap_lock);
338 }
341 /*
342 * Scrub all unallocated pages in all heap zones. This function is more
343 * convoluted than appears necessary because we do not want to continuously
344 * hold the lock or disable interrupts while scrubbing very large memory areas.
345 */
346 void scrub_heap_pages(void)
347 {
348 void *p;
349 unsigned long pfn, flags;
351 printk("Scrubbing Free RAM: ");
353 for ( pfn = 0; pfn < (bitmap_size * 8); pfn++ )
354 {
355 /* Every 100MB, print a progress dot and appease the watchdog. */
356 if ( (pfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
357 {
358 printk(".");
359 touch_nmi_watchdog();
360 }
362 /* Quick lock-free check. */
363 if ( allocated_in_map(pfn) )
364 continue;
366 spin_lock_irqsave(&heap_lock, flags);
368 /* Re-check page status with lock held. */
369 if ( !allocated_in_map(pfn) )
370 {
371 p = map_domain_mem(pfn << PAGE_SHIFT);
372 clear_page(p);
373 unmap_domain_mem(p);
374 }
376 spin_unlock_irqrestore(&heap_lock, flags);
377 }
379 printk("done.\n");
380 }
384 /*************************
385 * XEN-HEAP SUB-ALLOCATOR
386 */
388 void init_xenheap_pages(unsigned long ps, unsigned long pe)
389 {
390 unsigned long flags;
392 ps = round_pgup(ps);
393 pe = round_pgdown(pe);
395 memguard_guard_range(__va(ps), pe - ps);
397 local_irq_save(flags);
398 init_heap_pages(MEMZONE_XEN, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
399 local_irq_restore(flags);
400 }
403 unsigned long alloc_xenheap_pages(unsigned int order)
404 {
405 unsigned long flags;
406 struct pfn_info *pg;
407 int i;
409 local_irq_save(flags);
410 pg = alloc_heap_pages(MEMZONE_XEN, order);
411 local_irq_restore(flags);
413 if ( unlikely(pg == NULL) )
414 goto no_memory;
416 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
418 for ( i = 0; i < (1 << order); i++ )
419 {
420 pg[i].count_info = 0;
421 pg[i].u.inuse._domain = 0;
422 pg[i].u.inuse.type_info = 0;
423 }
425 return (unsigned long)page_to_virt(pg);
427 no_memory:
428 printk("Cannot handle page request order %d!\n", order);
429 return 0;
430 }
433 void free_xenheap_pages(unsigned long p, unsigned int order)
434 {
435 unsigned long flags;
437 memguard_guard_range((void *)p, 1 << (order + PAGE_SHIFT));
439 local_irq_save(flags);
440 free_heap_pages(MEMZONE_XEN, virt_to_page(p), order);
441 local_irq_restore(flags);
442 }
446 /*************************
447 * DOMAIN-HEAP SUB-ALLOCATOR
448 */
450 void init_domheap_pages(unsigned long ps, unsigned long pe)
451 {
452 ASSERT(!in_irq());
454 ps = round_pgup(ps);
455 pe = round_pgdown(pe);
457 init_heap_pages(MEMZONE_DOM, phys_to_page(ps), (pe - ps) >> PAGE_SHIFT);
458 }
461 struct pfn_info *alloc_domheap_pages(struct domain *d, unsigned int order)
462 {
463 struct pfn_info *pg;
464 unsigned long mask, flushed_mask, pfn_stamp, cpu_stamp;
465 int i, j;
467 ASSERT(!in_irq());
469 if ( unlikely((pg = alloc_heap_pages(MEMZONE_DOM, order)) == NULL) )
470 return NULL;
472 flushed_mask = 0;
473 for ( i = 0; i < (1 << order); i++ )
474 {
475 if ( (mask = (pg[i].u.free.cpu_mask & ~flushed_mask)) != 0 )
476 {
477 pfn_stamp = pg[i].tlbflush_timestamp;
478 for ( j = 0; (mask != 0) && (j < smp_num_cpus); j++ )
479 {
480 if ( mask & (1UL<<j) )
481 {
482 cpu_stamp = tlbflush_time[j];
483 if ( !NEED_FLUSH(cpu_stamp, pfn_stamp) )
484 mask &= ~(1UL<<j);
485 }
486 }
488 if ( unlikely(mask != 0) )
489 {
490 flush_tlb_mask(mask);
491 perfc_incrc(need_flush_tlb_flush);
492 flushed_mask |= mask;
493 }
494 }
496 pg[i].count_info = 0;
497 pg[i].u.inuse._domain = 0;
498 pg[i].u.inuse.type_info = 0;
499 }
501 if ( d == NULL )
502 return pg;
504 spin_lock(&d->page_alloc_lock);
506 if ( unlikely(test_bit(DF_DYING, &d->d_flags)) ||
507 unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
508 {
509 DPRINTK("Over-allocation for domain %u: %u > %u\n",
510 d->id, d->tot_pages + (1 << order), d->max_pages);
511 DPRINTK("...or the domain is dying (%d)\n",
512 !!test_bit(DF_DYING, &d->d_flags));
513 spin_unlock(&d->page_alloc_lock);
514 free_heap_pages(MEMZONE_DOM, pg, order);
515 return NULL;
516 }
518 if ( unlikely(d->tot_pages == 0) )
519 get_knownalive_domain(d);
521 d->tot_pages += 1 << order;
523 for ( i = 0; i < (1 << order); i++ )
524 {
525 page_set_owner(&pg[i], d);
526 wmb(); /* Domain pointer must be visible before updating refcnt. */
527 pg[i].count_info |= PGC_allocated | 1;
528 list_add_tail(&pg[i].list, &d->page_list);
529 }
531 spin_unlock(&d->page_alloc_lock);
533 return pg;
534 }
537 void free_domheap_pages(struct pfn_info *pg, unsigned int order)
538 {
539 int i, drop_dom_ref;
540 struct domain *d = page_get_owner(pg);
541 struct exec_domain *ed;
542 void *p;
543 int cpu_mask = 0;
545 ASSERT(!in_irq());
547 if ( unlikely(IS_XEN_HEAP_FRAME(pg)) )
548 {
549 /* NB. May recursively lock from domain_relinquish_memory(). */
550 spin_lock_recursive(&d->page_alloc_lock);
552 for ( i = 0; i < (1 << order); i++ )
553 list_del(&pg[i].list);
555 d->xenheap_pages -= 1 << order;
556 drop_dom_ref = (d->xenheap_pages == 0);
558 spin_unlock_recursive(&d->page_alloc_lock);
559 }
560 else if ( likely(d != NULL) )
561 {
562 /* NB. May recursively lock from domain_relinquish_memory(). */
563 spin_lock_recursive(&d->page_alloc_lock);
565 for_each_exec_domain(d, ed)
566 cpu_mask |= 1 << ed->processor;
568 for ( i = 0; i < (1 << order); i++ )
569 {
570 ASSERT((pg[i].u.inuse.type_info & PGT_count_mask) == 0);
571 pg[i].tlbflush_timestamp = tlbflush_current_time();
572 pg[i].u.free.cpu_mask = cpu_mask;
573 list_del(&pg[i].list);
575 /*
576 * Normally we expect a domain to clear pages before freeing them,
577 * if it cares about the secrecy of their contents. However, after
578 * a domain has died we assume responsibility for erasure.
579 */
580 if ( unlikely(test_bit(DF_DYING, &d->d_flags)) )
581 {
582 p = map_domain_mem(page_to_phys(&pg[i]));
583 clear_page(p);
584 unmap_domain_mem(p);
585 }
586 }
588 d->tot_pages -= 1 << order;
589 drop_dom_ref = (d->tot_pages == 0);
591 spin_unlock_recursive(&d->page_alloc_lock);
593 free_heap_pages(MEMZONE_DOM, pg, order);
594 }
595 else
596 {
597 /* Freeing an anonymous domain-heap page. */
598 free_heap_pages(MEMZONE_DOM, pg, order);
599 drop_dom_ref = 0;
600 }
602 if ( drop_dom_ref )
603 put_domain(d);
604 }
607 unsigned long avail_domheap_pages(void)
608 {
609 return avail[MEMZONE_DOM];
610 }