debuggers.hg

view xen/common/slab.c @ 3652:10a0f6b0a996

bitkeeper revision 1.1159.238.3 (4200cd90cCW2XIYxAgdkWL28Tzf-8g)

Introduce _xmalloc for when you really want just bytes.

Signed-off-by: ian.pratt@cl.cam.ac.uk
author iap10@labyrinth.cl.cam.ac.uk
date Wed Feb 02 12:54:40 2005 +0000 (2005-02-02)
parents 51052c8b6456
children 0ef6e8e6e85d
line source
1 /*
2 * linux/mm/slab.c
3 * Written by Mark Hemment, 1996/97.
4 * (markhe@nextd.demon.co.uk)
5 *
6 * xmem_cache_destroy() + some cleanup - 1999 Andrea Arcangeli
7 *
8 * Major cleanup, different bufctl logic, per-cpu arrays
9 * (c) 2000 Manfred Spraul
10 *
11 * An implementation of the Slab Allocator as described in outline in;
12 * UNIX Internals: The New Frontiers by Uresh Vahalia
13 * Pub: Prentice Hall ISBN 0-13-101908-2
14 * or with a little more detail in;
15 * The Slab Allocator: An Object-Caching Kernel Memory Allocator
16 * Jeff Bonwick (Sun Microsystems).
17 * Presented at: USENIX Summer 1994 Technical Conference
18 *
19 *
20 * The memory is organized in caches, one cache for each object type.
21 * (e.g. inode_cache, dentry_cache, buffer_head, vm_area_struct)
22 * Each cache consists out of many slabs (they are small (usually one
23 * page long) and always contiguous), and each slab contains multiple
24 * initialized objects.
25 *
26 * In order to reduce fragmentation, the slabs are sorted in 3 groups:
27 * full slabs with 0 free objects
28 * partial slabs
29 * empty slabs with no allocated objects
30 *
31 * If partial slabs exist, then new allocations come from these slabs,
32 * otherwise from empty slabs or new slabs are allocated.
33 *
34 * xmem_cache_destroy() CAN CRASH if you try to allocate from the cache
35 * during xmem_cache_destroy(). The caller must prevent concurrent allocs.
36 *
37 * On SMP systems, each cache has a short per-cpu head array, most allocs
38 * and frees go into that array, and if that array overflows, then 1/2
39 * of the entries in the array are given back into the global cache.
40 * This reduces the number of spinlock operations.
41 *
42 * The c_cpuarray may not be read with enabled local interrupts.
43 *
44 * SMP synchronization:
45 * constructors and destructors are called without any locking.
46 * Several members in xmem_cache_t and slab_t never change, they
47 * are accessed without any locking.
48 * The per-cpu arrays are never accessed from the wrong cpu, no locking.
49 * The non-constant members are protected with a per-cache irq spinlock.
50 */
52 #include <xen/config.h>
53 #include <xen/init.h>
54 #include <xen/types.h>
55 #include <xen/lib.h>
56 #include <xen/slab.h>
57 #include <xen/list.h>
58 #include <xen/spinlock.h>
59 #include <xen/errno.h>
60 #include <xen/smp.h>
61 #include <xen/sched.h>
63 /*
64 * DEBUG - 1 for xmem_cache_create() to honour; SLAB_DEBUG_INITIAL,
65 * SLAB_RED_ZONE & SLAB_POISON.
66 * 0 for faster, smaller code (especially in the critical paths).
67 *
68 * STATS - 1 to collect stats for /proc/slabinfo.
69 * 0 for faster, smaller code (especially in the critical paths).
70 *
71 * FORCED_DEBUG - 1 enables SLAB_RED_ZONE and SLAB_POISON (if possible)
72 */
73 #ifdef CONFIG_DEBUG_SLAB
74 #define DEBUG 1
75 #define STATS 1
76 #define FORCED_DEBUG 1
77 #else
78 #define DEBUG 0
79 #define STATS 0
80 #define FORCED_DEBUG 0
81 #endif
83 /*
84 * Parameters for xmem_cache_reap
85 */
86 #define REAP_SCANLEN 10
87 #define REAP_PERFECT 10
89 /* Shouldn't this be in a header file somewhere? */
90 #define BYTES_PER_WORD sizeof(void *)
92 /* Legal flag mask for xmem_cache_create(). */
93 #if DEBUG
94 #define CREATE_MASK (SLAB_DEBUG_INITIAL | SLAB_RED_ZONE | \
95 SLAB_POISON | SLAB_HWCACHE_ALIGN | \
96 SLAB_NO_REAP)
97 #else
98 #define CREATE_MASK (SLAB_HWCACHE_ALIGN | SLAB_NO_REAP)
99 #endif
101 /*
102 * xmem_bufctl_t:
103 *
104 * Bufctl's are used for linking objs within a slab
105 * linked offsets.
106 *
107 * This implementaion relies on "struct page" for locating the cache &
108 * slab an object belongs to.
109 * This allows the bufctl structure to be small (one int), but limits
110 * the number of objects a slab (not a cache) can contain when off-slab
111 * bufctls are used. The limit is the size of the largest general cache
112 * that does not use off-slab slabs.
113 * For 32bit archs with 4 kB pages, is this 56.
114 * This is not serious, as it is only for large objects, when it is unwise
115 * to have too many per slab.
116 * Note: This limit can be raised by introducing a general cache whose size
117 * is less than 512 (PAGE_SIZE<<3), but greater than 256.
118 */
120 #define BUFCTL_END (((xmem_bufctl_t)(~0U))-0)
121 #define BUFCTL_FREE (((xmem_bufctl_t)(~0U))-1)
122 #define SLAB_LIMIT (((xmem_bufctl_t)(~0U))-2)
124 /* Max number of objs-per-slab for caches which use off-slab slabs.
125 * Needed to avoid a possible looping condition in xmem_cache_grow().
126 */
127 static unsigned long offslab_limit;
129 /*
130 * slab_t
131 *
132 * Manages the objs in a slab. Placed either at the beginning of mem allocated
133 * for a slab, or allocated from an general cache.
134 * Slabs are chained into three list: fully used, partial, fully free slabs.
135 */
136 typedef struct slab_s {
137 struct list_head list;
138 unsigned long colouroff;
139 void *s_mem; /* including colour offset */
140 unsigned int inuse; /* num of objs active in slab */
141 xmem_bufctl_t free;
142 } slab_t;
144 #define slab_bufctl(slabp) \
145 ((xmem_bufctl_t *)(((slab_t*)slabp)+1))
147 /*
148 * cpucache_t
149 *
150 * Per cpu structures
151 * The limit is stored in the per-cpu structure to reduce the data cache
152 * footprint.
153 */
154 typedef struct cpucache_s {
155 unsigned int avail;
156 unsigned int limit;
157 } cpucache_t;
159 #define cc_entry(cpucache) \
160 ((void **)(((cpucache_t*)(cpucache))+1))
161 #define cc_data(cachep) \
162 ((cachep)->cpudata[smp_processor_id()])
163 /*
164 * xmem_cache_t
165 *
166 * manages a cache.
167 */
169 #define CACHE_NAMELEN 20 /* max name length for a slab cache */
171 struct xmem_cache_s {
172 /* 1) each alloc & free */
173 /* full, partial first, then free */
174 struct list_head slabs_full;
175 struct list_head slabs_partial;
176 struct list_head slabs_free;
177 unsigned int objsize;
178 unsigned int flags; /* constant flags */
179 unsigned int num; /* # of objs per slab */
180 spinlock_t spinlock;
181 #ifdef CONFIG_SMP
182 unsigned int batchcount;
183 #endif
185 /* 2) slab additions /removals */
186 /* order of pgs per slab (2^n) */
187 unsigned int gfporder;
188 size_t colour; /* cache colouring range */
189 unsigned int colour_off; /* colour offset */
190 unsigned int colour_next; /* cache colouring */
191 xmem_cache_t *slabp_cache;
192 unsigned int growing;
193 unsigned int dflags; /* dynamic flags */
195 /* constructor func */
196 void (*ctor)(void *, xmem_cache_t *, unsigned long);
198 /* de-constructor func */
199 void (*dtor)(void *, xmem_cache_t *, unsigned long);
201 unsigned long failures;
203 /* 3) cache creation/removal */
204 char name[CACHE_NAMELEN];
205 struct list_head next;
206 #ifdef CONFIG_SMP
207 /* 4) per-cpu data */
208 cpucache_t *cpudata[NR_CPUS];
209 #endif
210 #if STATS
211 unsigned long num_active;
212 unsigned long num_allocations;
213 unsigned long high_mark;
214 unsigned long grown;
215 unsigned long reaped;
216 unsigned long errors;
217 #ifdef CONFIG_SMP
218 atomic_t allochit;
219 atomic_t allocmiss;
220 atomic_t freehit;
221 atomic_t freemiss;
222 #endif
223 #endif
224 };
226 /* internal c_flags */
227 #define CFLGS_OFF_SLAB 0x010000UL /* slab management in own cache */
228 #define CFLGS_OPTIMIZE 0x020000UL /* optimized slab lookup */
230 /* c_dflags (dynamic flags). Need to hold the spinlock to access this member */
231 #define DFLGS_GROWN 0x000001UL /* don't reap a recently grown */
233 #define OFF_SLAB(x) ((x)->flags & CFLGS_OFF_SLAB)
234 #define OPTIMIZE(x) ((x)->flags & CFLGS_OPTIMIZE)
235 #define GROWN(x) ((x)->dlags & DFLGS_GROWN)
237 #if STATS
238 #define STATS_INC_ACTIVE(x) ((x)->num_active++)
239 #define STATS_DEC_ACTIVE(x) ((x)->num_active--)
240 #define STATS_INC_ALLOCED(x) ((x)->num_allocations++)
241 #define STATS_INC_GROWN(x) ((x)->grown++)
242 #define STATS_INC_REAPED(x) ((x)->reaped++)
243 #define STATS_SET_HIGH(x) do { if ((x)->num_active > (x)->high_mark) \
244 (x)->high_mark = (x)->num_active; \
245 } while (0)
246 #define STATS_INC_ERR(x) ((x)->errors++)
247 #else
248 #define STATS_INC_ACTIVE(x) do { } while (0)
249 #define STATS_DEC_ACTIVE(x) do { } while (0)
250 #define STATS_INC_ALLOCED(x) do { } while (0)
251 #define STATS_INC_GROWN(x) do { } while (0)
252 #define STATS_INC_REAPED(x) do { } while (0)
253 #define STATS_SET_HIGH(x) do { } while (0)
254 #define STATS_INC_ERR(x) do { } while (0)
255 #endif
257 #if STATS && defined(CONFIG_SMP)
258 #define STATS_INC_ALLOCHIT(x) atomic_inc(&(x)->allochit)
259 #define STATS_INC_ALLOCMISS(x) atomic_inc(&(x)->allocmiss)
260 #define STATS_INC_FREEHIT(x) atomic_inc(&(x)->freehit)
261 #define STATS_INC_FREEMISS(x) atomic_inc(&(x)->freemiss)
262 #else
263 #define STATS_INC_ALLOCHIT(x) do { } while (0)
264 #define STATS_INC_ALLOCMISS(x) do { } while (0)
265 #define STATS_INC_FREEHIT(x) do { } while (0)
266 #define STATS_INC_FREEMISS(x) do { } while (0)
267 #endif
269 #if DEBUG
270 /* Magic nums for obj red zoning.
271 * Placed in the first word before and the first word after an obj.
272 */
273 #define RED_MAGIC1 0x5A2CF071UL /* when obj is active */
274 #define RED_MAGIC2 0x170FC2A5UL /* when obj is inactive */
276 /* ...and for poisoning */
277 #define POISON_BYTE 0x5a /* byte value for poisoning */
278 #define POISON_END 0xa5 /* end-byte of poisoning */
280 #endif
282 /* maximum size of an obj (in 2^order pages) */
283 #define MAX_OBJ_ORDER 5 /* 32 pages */
285 /*
286 * Do not go above this order unless 0 objects fit into the slab.
287 */
288 #define BREAK_GFP_ORDER_HI 2
289 #define BREAK_GFP_ORDER_LO 1
290 static int slab_break_gfp_order = BREAK_GFP_ORDER_LO;
292 /*
293 * Absolute limit for the gfp order
294 */
295 #define MAX_GFP_ORDER 5 /* 32 pages */
298 /* Macros for storing/retrieving the cachep and or slab from the
299 * global 'mem_map'. These are used to find the slab an obj belongs to.
300 * With xfree(), these are used to find the cache which an obj belongs to.
301 */
302 #define SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x))
303 #define GET_PAGE_CACHE(pg) ((xmem_cache_t *)(pg)->list.next)
304 #define SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))
305 #define GET_PAGE_SLAB(pg) ((slab_t *)(pg)->list.prev)
307 /* Size description struct for general caches. */
308 typedef struct cache_sizes {
309 size_t cs_size;
310 xmem_cache_t *cs_cachep;
311 } cache_sizes_t;
313 static cache_sizes_t cache_sizes[] = {
314 { 32, NULL},
315 { 64, NULL},
316 { 128, NULL},
317 { 256, NULL},
318 { 512, NULL},
319 { 1024, NULL},
320 { 2048, NULL},
321 { 4096, NULL},
322 { 8192, NULL},
323 { 16384, NULL},
324 { 32768, NULL},
325 { 65536, NULL},
326 { 0, NULL}
327 };
329 /* internal cache of cache description objs */
330 static xmem_cache_t cache_cache = {
331 slabs_full: LIST_HEAD_INIT(cache_cache.slabs_full),
332 slabs_partial: LIST_HEAD_INIT(cache_cache.slabs_partial),
333 slabs_free: LIST_HEAD_INIT(cache_cache.slabs_free),
334 objsize: sizeof(xmem_cache_t),
335 flags: SLAB_NO_REAP,
336 spinlock: SPIN_LOCK_UNLOCKED,
337 colour_off: L1_CACHE_BYTES,
338 name: "xmem_cache"
339 };
341 /* Guard access to the cache-chain. */
342 /* KAF: No semaphores, as we'll never wait around for I/O. */
343 static spinlock_t cache_chain_sem;
344 #define init_MUTEX(_m) spin_lock_init(_m)
345 #define down(_m) spin_lock_irqsave(_m,spin_flags)
346 #define up(_m) spin_unlock_irqrestore(_m,spin_flags)
348 /* Place maintainer for reaping. */
349 static xmem_cache_t *clock_searchp = &cache_cache;
351 #define cache_chain (cache_cache.next)
353 #ifdef CONFIG_SMP
354 /*
355 * chicken and egg problem: delay the per-cpu array allocation
356 * until the general caches are up.
357 */
358 static int g_cpucache_up;
360 static void enable_cpucache (xmem_cache_t *cachep);
361 static void enable_all_cpucaches (void);
362 #endif
364 /* Cal the num objs, wastage, and bytes left over for a given slab size. */
365 static void xmem_cache_estimate (unsigned long gfporder, size_t size,
366 int flags, size_t *left_over, unsigned int *num)
367 {
368 int i;
369 size_t wastage = PAGE_SIZE<<gfporder;
370 size_t extra = 0;
371 size_t base = 0;
373 if (!(flags & CFLGS_OFF_SLAB)) {
374 base = sizeof(slab_t);
375 extra = sizeof(xmem_bufctl_t);
376 }
377 i = 0;
378 while (i*size + L1_CACHE_ALIGN(base+i*extra) <= wastage)
379 i++;
380 if (i > 0)
381 i--;
383 if (i > SLAB_LIMIT)
384 i = SLAB_LIMIT;
386 *num = i;
387 wastage -= i*size;
388 wastage -= L1_CACHE_ALIGN(base+i*extra);
389 *left_over = wastage;
390 }
392 /* Initialisation - setup the `cache' cache. */
393 void __init xmem_cache_init(void)
394 {
395 size_t left_over;
397 init_MUTEX(&cache_chain_sem);
398 INIT_LIST_HEAD(&cache_chain);
400 xmem_cache_estimate(0, cache_cache.objsize, 0,
401 &left_over, &cache_cache.num);
402 if (!cache_cache.num)
403 BUG();
405 cache_cache.colour = left_over/cache_cache.colour_off;
406 cache_cache.colour_next = 0;
407 }
410 /* Initialisation - setup remaining internal and general caches.
411 * Called after the gfp() functions have been enabled, and before smp_init().
412 */
413 void __init xmem_cache_sizes_init(unsigned long num_physpages)
414 {
415 cache_sizes_t *sizes = cache_sizes;
416 char name[20];
417 /*
418 * Fragmentation resistance on low memory - only use bigger
419 * page orders on machines with more than 32MB of memory.
420 */
421 if (num_physpages > (32 << 20) >> PAGE_SHIFT)
422 slab_break_gfp_order = BREAK_GFP_ORDER_HI;
423 do {
424 /* For performance, all the general caches are L1 aligned.
425 * This should be particularly beneficial on SMP boxes, as it
426 * eliminates "false sharing".
427 * Note for systems short on memory removing the alignment will
428 * allow tighter packing of the smaller caches. */
429 sprintf(name,"size-%Zd",sizes->cs_size);
430 if (!(sizes->cs_cachep =
431 xmem_cache_create(name, sizes->cs_size,
432 0, SLAB_HWCACHE_ALIGN, NULL, NULL))) {
433 BUG();
434 }
436 /* Inc off-slab bufctl limit until the ceiling is hit. */
437 if (!(OFF_SLAB(sizes->cs_cachep))) {
438 offslab_limit = sizes->cs_size-sizeof(slab_t);
439 offslab_limit /= 2;
440 }
441 sizes++;
442 } while (sizes->cs_size);
443 }
445 int __init xmem_cpucache_init(void)
446 {
447 #ifdef CONFIG_SMP
448 g_cpucache_up = 1;
449 enable_all_cpucaches();
450 #endif
451 return 0;
452 }
454 /*__initcall(xmem_cpucache_init);*/
456 /* Interface to system's page allocator. No need to hold the cache-lock.
457 */
458 static inline void *xmem_getpages(xmem_cache_t *cachep)
459 {
460 void *addr;
462 addr = (void*) alloc_xenheap_pages(cachep->gfporder);
463 /* Assume that now we have the pages no one else can legally
464 * messes with the 'struct page's.
465 * However vm_scan() might try to test the structure to see if
466 * it is a named-page or buffer-page. The members it tests are
467 * of no interest here.....
468 */
469 return addr;
470 }
472 /* Interface to system's page release. */
473 static inline void xmem_freepages (xmem_cache_t *cachep, void *addr)
474 {
475 unsigned long i = (1<<cachep->gfporder);
476 struct pfn_info *page = virt_to_page(addr);
478 /* free_xenheap_pages() does not clear the type bit - we do that.
479 * The pages have been unlinked from their cache-slab,
480 * but their 'struct page's might be accessed in
481 * vm_scan(). Shouldn't be a worry.
482 */
483 while (i--) {
484 PageClearSlab(page);
485 page++;
486 }
488 free_xenheap_pages((unsigned long)addr, cachep->gfporder);
489 }
491 #if DEBUG
492 static inline void xmem_poison_obj (xmem_cache_t *cachep, void *addr)
493 {
494 int size = cachep->objsize;
495 if (cachep->flags & SLAB_RED_ZONE) {
496 addr += BYTES_PER_WORD;
497 size -= 2*BYTES_PER_WORD;
498 }
499 memset(addr, POISON_BYTE, size);
500 *(unsigned char *)(addr+size-1) = POISON_END;
501 }
503 static inline int xmem_check_poison_obj (xmem_cache_t *cachep, void *addr)
504 {
505 int size = cachep->objsize;
506 void *end;
507 if (cachep->flags & SLAB_RED_ZONE) {
508 addr += BYTES_PER_WORD;
509 size -= 2*BYTES_PER_WORD;
510 }
511 end = memchr(addr, POISON_END, size);
512 if (end != (addr+size-1))
513 return 1;
514 return 0;
515 }
516 #endif
518 /* Destroy all the objs in a slab, and release the mem back to the system.
519 * Before calling the slab must have been unlinked from the cache.
520 * The cache-lock is not held/needed.
521 */
522 static void xmem_slab_destroy (xmem_cache_t *cachep, slab_t *slabp)
523 {
524 if (cachep->dtor
525 #if DEBUG
526 || cachep->flags & (SLAB_POISON | SLAB_RED_ZONE)
527 #endif
528 ) {
529 int i;
530 for (i = 0; i < cachep->num; i++) {
531 void* objp = slabp->s_mem+cachep->objsize*i;
532 #if DEBUG
533 if (cachep->flags & SLAB_RED_ZONE) {
534 if (*((unsigned long*)(objp)) != RED_MAGIC1)
535 BUG();
536 if (*((unsigned long*)(objp + cachep->objsize
537 -BYTES_PER_WORD)) != RED_MAGIC1)
538 BUG();
539 objp += BYTES_PER_WORD;
540 }
541 #endif
542 if (cachep->dtor)
543 (cachep->dtor)(objp, cachep, 0);
544 #if DEBUG
545 if (cachep->flags & SLAB_RED_ZONE) {
546 objp -= BYTES_PER_WORD;
547 }
548 if ((cachep->flags & SLAB_POISON) &&
549 xmem_check_poison_obj(cachep, objp))
550 BUG();
551 #endif
552 }
553 }
555 xmem_freepages(cachep, slabp->s_mem-slabp->colouroff);
556 if (OFF_SLAB(cachep))
557 xmem_cache_free(cachep->slabp_cache, slabp);
558 }
560 /**
561 * xmem_cache_create - Create a cache.
562 * @name: A string which is used in /proc/slabinfo to identify this cache.
563 * @size: The size of objects to be created in this cache.
564 * @offset: The offset to use within the page.
565 * @flags: SLAB flags
566 * @ctor: A constructor for the objects.
567 * @dtor: A destructor for the objects.
568 *
569 * Returns a ptr to the cache on success, NULL on failure.
570 * Cannot be called within a int, but can be interrupted.
571 * The @ctor is run when new pages are allocated by the cache
572 * and the @dtor is run before the pages are handed back.
573 * The flags are
574 *
575 * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5)
576 * to catch references to uninitialised memory.
577 *
578 * %SLAB_RED_ZONE - Insert `Red' zones around the allocated memory to check
579 * for buffer overruns.
580 *
581 * %SLAB_NO_REAP - Don't automatically reap this cache when we're under
582 * memory pressure.
583 *
584 * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware
585 * cacheline. This can be beneficial if you're counting cycles as closely
586 * as davem.
587 */
588 xmem_cache_t *
589 xmem_cache_create (const char *name, size_t size, size_t offset,
590 unsigned long flags,
591 void (*ctor)(void*, xmem_cache_t *, unsigned long),
592 void (*dtor)(void*, xmem_cache_t *, unsigned long))
593 {
594 const char *func_nm = KERN_ERR "xmem_create: ";
595 size_t left_over, align, slab_size;
596 xmem_cache_t *cachep = NULL;
597 unsigned long spin_flags;
599 /*
600 * Sanity checks... these are all serious usage bugs.
601 */
602 if ((!name) ||
603 ((strlen(name) >= CACHE_NAMELEN - 1)) ||
604 (size < BYTES_PER_WORD) ||
605 (size > (1<<MAX_OBJ_ORDER)*PAGE_SIZE) ||
606 (dtor && !ctor) ||
607 (offset < 0 || offset > size))
608 BUG();
610 #if DEBUG
611 if ((flags & SLAB_DEBUG_INITIAL) && !ctor) {
612 /* No constructor, but inital state check requested */
613 printk("%sNo con, but init state check requested - %s\n",
614 func_nm, name);
615 flags &= ~SLAB_DEBUG_INITIAL;
616 }
618 if ((flags & SLAB_POISON) && ctor) {
619 /* request for poisoning, but we can't do that with a constructor */
620 printk("%sPoisoning requested, but con given - %s\n",
621 func_nm, name);
622 flags &= ~SLAB_POISON;
623 }
624 #if FORCED_DEBUG
625 if (size < (PAGE_SIZE>>3))
626 /*
627 * do not red zone large object, causes severe
628 * fragmentation.
629 */
630 flags |= SLAB_RED_ZONE;
631 if (!ctor)
632 flags |= SLAB_POISON;
633 #endif
634 #endif
636 /*
637 * Always checks flags, a caller might be expecting debug
638 * support which isn't available.
639 */
640 if (flags & ~CREATE_MASK)
641 BUG();
643 /* Get cache's description obj. */
644 cachep = (xmem_cache_t *)xmem_cache_alloc(&cache_cache);
645 if (!cachep)
646 goto opps;
647 memset(cachep, 0, sizeof(xmem_cache_t));
649 /* Check that size is in terms of words. This is needed to avoid
650 * unaligned accesses for some archs when redzoning is used, and makes
651 * sure any on-slab bufctl's are also correctly aligned.
652 */
653 if (size & (BYTES_PER_WORD-1)) {
654 size += (BYTES_PER_WORD-1);
655 size &= ~(BYTES_PER_WORD-1);
656 printk("%sForcing size word alignment - %s\n", func_nm, name);
657 }
659 #if DEBUG
660 if (flags & SLAB_RED_ZONE) {
661 /*
662 * There is no point trying to honour cache alignment
663 * when redzoning.
664 */
665 flags &= ~SLAB_HWCACHE_ALIGN;
666 size += 2*BYTES_PER_WORD; /* words for redzone */
667 }
668 #endif
669 align = BYTES_PER_WORD;
670 if (flags & SLAB_HWCACHE_ALIGN)
671 align = L1_CACHE_BYTES;
673 /* Determine if the slab management is 'on' or 'off' slab. */
674 if (size >= (PAGE_SIZE>>3))
675 /*
676 * Size is large, assume best to place the slab management obj
677 * off-slab (should allow better packing of objs).
678 */
679 flags |= CFLGS_OFF_SLAB;
681 if (flags & SLAB_HWCACHE_ALIGN) {
682 /* Need to adjust size so that objs are cache aligned. */
683 /* Small obj size, can get at least two per cache line. */
684 /* FIXME: only power of 2 supported, was better */
685 while (size < align/2)
686 align /= 2;
687 size = (size+align-1)&(~(align-1));
688 }
690 /* Cal size (in pages) of slabs, and the num of objs per slab.
691 * This could be made much more intelligent. For now, try to avoid
692 * using high page-orders for slabs. When the gfp() funcs are more
693 * friendly towards high-order requests, this should be changed.
694 */
695 do {
696 unsigned int break_flag = 0;
697 cal_wastage:
698 xmem_cache_estimate(cachep->gfporder, size, flags,
699 &left_over, &cachep->num);
700 if (break_flag)
701 break;
702 if (cachep->gfporder >= MAX_GFP_ORDER)
703 break;
704 if (!cachep->num)
705 goto next;
706 if (flags & CFLGS_OFF_SLAB && cachep->num > offslab_limit) {
707 /* Oops, this num of objs will cause problems. */
708 cachep->gfporder--;
709 break_flag++;
710 goto cal_wastage;
711 }
713 /*
714 * Large num of objs is good, but v. large slabs are currently
715 * bad for the gfp()s.
716 */
717 if (cachep->gfporder >= slab_break_gfp_order)
718 break;
720 if ((left_over*8) <= (PAGE_SIZE<<cachep->gfporder))
721 break; /* Acceptable internal fragmentation. */
722 next:
723 cachep->gfporder++;
724 } while (1);
726 if (!cachep->num) {
727 printk("xmem_cache_create: couldn't create cache %s.\n", name);
728 xmem_cache_free(&cache_cache, cachep);
729 cachep = NULL;
730 goto opps;
731 }
732 slab_size = L1_CACHE_ALIGN(cachep->num*sizeof(xmem_bufctl_t) +
733 sizeof(slab_t));
735 /*
736 * If the slab has been placed off-slab, and we have enough space then
737 * move it on-slab. This is at the expense of any extra colouring.
738 */
739 if (flags & CFLGS_OFF_SLAB && left_over >= slab_size) {
740 flags &= ~CFLGS_OFF_SLAB;
741 left_over -= slab_size;
742 }
744 /* Offset must be a multiple of the alignment. */
745 offset += (align-1);
746 offset &= ~(align-1);
747 if (!offset)
748 offset = L1_CACHE_BYTES;
749 cachep->colour_off = offset;
750 cachep->colour = left_over/offset;
752 /* init remaining fields */
753 if (!cachep->gfporder && !(flags & CFLGS_OFF_SLAB))
754 flags |= CFLGS_OPTIMIZE;
756 cachep->flags = flags;
757 spin_lock_init(&cachep->spinlock);
758 cachep->objsize = size;
759 INIT_LIST_HEAD(&cachep->slabs_full);
760 INIT_LIST_HEAD(&cachep->slabs_partial);
761 INIT_LIST_HEAD(&cachep->slabs_free);
763 if (flags & CFLGS_OFF_SLAB)
764 cachep->slabp_cache = xmem_find_general_cachep(slab_size);
765 cachep->ctor = ctor;
766 cachep->dtor = dtor;
767 /* Copy name over so we don't have problems with unloaded modules */
768 strcpy(cachep->name, name);
770 #ifdef CONFIG_SMP
771 if (g_cpucache_up)
772 enable_cpucache(cachep);
773 #endif
774 /* Need the semaphore to access the chain. */
775 down(&cache_chain_sem);
776 {
777 xmem_cache_t *pc;
779 list_for_each_entry(pc, &cache_chain, next) {
780 /* The name field is constant - no lock needed. */
781 if (!strcmp(pc->name, name))
782 BUG();
783 }
784 }
786 /* There is no reason to lock our new cache before we
787 * link it in - no one knows about it yet...
788 */
789 list_add(&cachep->next, &cache_chain);
790 up(&cache_chain_sem);
791 opps:
792 return cachep;
793 }
796 #if DEBUG
797 /*
798 * This check if the xmem_cache_t pointer is chained in the cache_cache
799 * list. -arca
800 */
801 static int is_chained_xmem_cache(xmem_cache_t * cachep)
802 {
803 xmem_cache_t *pc;
804 int ret = 0;
805 unsigned long spin_flags;
807 /* Find the cache in the chain of caches. */
808 down(&cache_chain_sem);
809 list_for_each_entry(pc, &cache_chain, next) {
810 if (pc == &cachep) {
811 ret = 1;
812 break;
813 }
814 }
815 up(&cache_chain_sem);
817 return ret;
818 }
819 #else
820 #define is_chained_xmem_cache(x) 1
821 #endif
823 #ifdef CONFIG_SMP
824 /*
825 * Waits for all CPUs to execute func().
826 */
827 static void smp_call_function_all_cpus(void (*func) (void *arg), void *arg)
828 {
829 local_irq_disable();
830 func(arg);
831 local_irq_enable();
833 if (smp_call_function(func, arg, 1, 1))
834 BUG();
835 }
836 typedef struct ccupdate_struct_s
837 {
838 xmem_cache_t *cachep;
839 cpucache_t *new[NR_CPUS];
840 } ccupdate_struct_t;
842 static void do_ccupdate_local(void *info)
843 {
844 ccupdate_struct_t *new = (ccupdate_struct_t *)info;
845 cpucache_t *old = cc_data(new->cachep);
847 cc_data(new->cachep) = new->new[smp_processor_id()];
848 new->new[smp_processor_id()] = old;
849 }
851 static void free_block (xmem_cache_t* cachep, void** objpp, int len);
853 static void drain_cpu_caches(xmem_cache_t *cachep)
854 {
855 ccupdate_struct_t new;
856 int i;
857 unsigned long spin_flags;
859 memset(&new.new,0,sizeof(new.new));
861 new.cachep = cachep;
863 down(&cache_chain_sem);
864 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
866 for (i = 0; i < smp_num_cpus; i++) {
867 cpucache_t* ccold = new.new[cpu_logical_map(i)];
868 if (!ccold || (ccold->avail == 0))
869 continue;
870 local_irq_disable();
871 free_block(cachep, cc_entry(ccold), ccold->avail);
872 local_irq_enable();
873 ccold->avail = 0;
874 }
875 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
876 up(&cache_chain_sem);
877 }
879 #else
880 #define drain_cpu_caches(cachep) do { } while (0)
881 #endif
883 static int __xmem_cache_shrink(xmem_cache_t *cachep)
884 {
885 slab_t *slabp;
886 int ret;
888 drain_cpu_caches(cachep);
890 spin_lock_irq(&cachep->spinlock);
892 /* If the cache is growing, stop shrinking. */
893 while (!cachep->growing) {
894 struct list_head *p;
896 p = cachep->slabs_free.prev;
897 if (p == &cachep->slabs_free)
898 break;
900 slabp = list_entry(cachep->slabs_free.prev, slab_t, list);
901 #if DEBUG
902 if (slabp->inuse)
903 BUG();
904 #endif
905 list_del(&slabp->list);
907 spin_unlock_irq(&cachep->spinlock);
908 xmem_slab_destroy(cachep, slabp);
909 spin_lock_irq(&cachep->spinlock);
910 }
911 ret = (!list_empty(&cachep->slabs_full) ||
912 !list_empty(&cachep->slabs_partial));
913 spin_unlock_irq(&cachep->spinlock);
914 return ret;
915 }
917 /**
918 * xmem_cache_shrink - Shrink a cache.
919 * @cachep: The cache to shrink.
920 *
921 * Releases as many slabs as possible for a cache.
922 * To help debugging, a zero exit status indicates all slabs were released.
923 */
924 int xmem_cache_shrink(xmem_cache_t *cachep)
925 {
926 if (!cachep || !is_chained_xmem_cache(cachep))
927 BUG();
929 return __xmem_cache_shrink(cachep);
930 }
932 /**
933 * xmem_cache_destroy - delete a cache
934 * @cachep: the cache to destroy
935 *
936 * Remove a xmem_cache_t object from the slab cache.
937 * Returns 0 on success.
938 *
939 * It is expected this function will be called by a module when it is
940 * unloaded. This will remove the cache completely, and avoid a duplicate
941 * cache being allocated each time a module is loaded and unloaded, if the
942 * module doesn't have persistent in-kernel storage across loads and unloads.
943 *
944 * The caller must guarantee that noone will allocate memory from the cache
945 * during the xmem_cache_destroy().
946 */
947 int xmem_cache_destroy (xmem_cache_t * cachep)
948 {
949 unsigned long spin_flags;
951 if (!cachep || cachep->growing)
952 BUG();
954 /* Find the cache in the chain of caches. */
955 down(&cache_chain_sem);
956 /* the chain is never empty, cache_cache is never destroyed */
957 if (clock_searchp == cachep)
958 clock_searchp = list_entry(cachep->next.next,
959 xmem_cache_t, next);
960 list_del(&cachep->next);
961 up(&cache_chain_sem);
963 if (__xmem_cache_shrink(cachep)) {
964 printk(KERN_ERR "xmem_cache_destroy: Can't free all objects %p\n",
965 cachep);
966 down(&cache_chain_sem);
967 list_add(&cachep->next,&cache_chain);
968 up(&cache_chain_sem);
969 return 1;
970 }
971 #ifdef CONFIG_SMP
972 {
973 int i;
974 for (i = 0; i < NR_CPUS; i++)
975 xfree(cachep->cpudata[i]);
976 }
977 #endif
978 xmem_cache_free(&cache_cache, cachep);
980 return 0;
981 }
983 /* Get the memory for a slab management obj. */
984 static inline slab_t *xmem_cache_slabmgmt(xmem_cache_t *cachep,
985 void *objp, int colour_off,
986 int local_flags)
987 {
988 slab_t *slabp;
990 if (OFF_SLAB(cachep)) {
991 /* Slab management obj is off-slab. */
992 slabp = xmem_cache_alloc(cachep->slabp_cache);
993 if (!slabp)
994 return NULL;
995 } else {
996 /* FIXME: change to
997 slabp = objp
998 * if you enable OPTIMIZE
999 */
1000 slabp = objp+colour_off;
1001 colour_off += L1_CACHE_ALIGN(cachep->num *
1002 sizeof(xmem_bufctl_t) + sizeof(slab_t));
1004 slabp->inuse = 0;
1005 slabp->colouroff = colour_off;
1006 slabp->s_mem = objp+colour_off;
1008 return slabp;
1011 static inline void xmem_cache_init_objs(xmem_cache_t *cachep,
1012 slab_t *slabp,
1013 unsigned long ctor_flags)
1015 int i;
1017 for (i = 0; i < cachep->num; i++) {
1018 void* objp = slabp->s_mem+cachep->objsize*i;
1019 #if DEBUG
1020 if (cachep->flags & SLAB_RED_ZONE) {
1021 *((unsigned long*)(objp)) = RED_MAGIC1;
1022 *((unsigned long*)(objp + cachep->objsize -
1023 BYTES_PER_WORD)) = RED_MAGIC1;
1024 objp += BYTES_PER_WORD;
1026 #endif
1028 /*
1029 * Constructors are not allowed to allocate memory from
1030 * the same cache which they are a constructor for.
1031 * Otherwise, deadlock. They must also be threaded.
1032 */
1033 if (cachep->ctor)
1034 cachep->ctor(objp, cachep, ctor_flags);
1035 #if DEBUG
1036 if (cachep->flags & SLAB_RED_ZONE)
1037 objp -= BYTES_PER_WORD;
1038 if (cachep->flags & SLAB_POISON)
1039 /* need to poison the objs */
1040 xmem_poison_obj(cachep, objp);
1041 if (cachep->flags & SLAB_RED_ZONE) {
1042 if (*((unsigned long*)(objp)) != RED_MAGIC1)
1043 BUG();
1044 if (*((unsigned long*)(objp + cachep->objsize -
1045 BYTES_PER_WORD)) != RED_MAGIC1)
1046 BUG();
1048 #endif
1049 slab_bufctl(slabp)[i] = i+1;
1051 slab_bufctl(slabp)[i-1] = BUFCTL_END;
1052 slabp->free = 0;
1055 /*
1056 * Grow (by 1) the number of slabs within a cache. This is called by
1057 * xmem_cache_alloc() when there are no active objs left in a cache.
1058 */
1059 static int xmem_cache_grow(xmem_cache_t * cachep)
1061 slab_t *slabp;
1062 struct pfn_info *page; unsigned int i;
1063 void *objp;
1064 size_t offset;
1065 unsigned long ctor_flags;
1066 unsigned long save_flags;
1068 ctor_flags = SLAB_CTOR_CONSTRUCTOR;
1070 /* About to mess with non-constant members - lock. */
1071 spin_lock_irqsave(&cachep->spinlock, save_flags);
1073 /* Get colour for the slab, and cal the next value. */
1074 offset = cachep->colour_next;
1075 cachep->colour_next++;
1076 if (cachep->colour_next >= cachep->colour)
1077 cachep->colour_next = 0;
1078 offset *= cachep->colour_off;
1079 cachep->dflags |= DFLGS_GROWN;
1081 cachep->growing++;
1082 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1084 /* A series of memory allocations for a new slab.
1085 * Neither the cache-chain semaphore, or cache-lock, are
1086 * held, but the incrementing c_growing prevents this
1087 * cache from being reaped or shrunk.
1088 * Note: The cache could be selected in for reaping in
1089 * xmem_cache_reap(), but when the final test is made the
1090 * growing value will be seen.
1091 */
1093 /* Get mem for the objs. */
1094 if (!(objp = xmem_getpages(cachep)))
1095 goto failed;
1097 /* Get slab management. */
1098 if (!(slabp = xmem_cache_slabmgmt(cachep, objp, offset, 0)))
1099 goto opps1;
1101 /* Nasty!!!!!! I hope this is OK. */
1102 i = 1 << cachep->gfporder;
1103 page = virt_to_page(objp);
1104 do {
1105 SET_PAGE_CACHE(page, cachep);
1106 SET_PAGE_SLAB(page, slabp);
1107 PageSetSlab(page);
1108 page++;
1109 } while (--i);
1111 xmem_cache_init_objs(cachep, slabp, ctor_flags);
1113 spin_lock_irqsave(&cachep->spinlock, save_flags);
1114 cachep->growing--;
1116 /* Make slab active. */
1117 list_add_tail(&slabp->list, &cachep->slabs_free);
1118 STATS_INC_GROWN(cachep);
1119 cachep->failures = 0;
1121 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1122 return 1;
1123 opps1:
1124 xmem_freepages(cachep, objp);
1125 failed:
1126 spin_lock_irqsave(&cachep->spinlock, save_flags);
1127 cachep->growing--;
1128 spin_unlock_irqrestore(&cachep->spinlock, save_flags);
1129 return 0;
1132 /*
1133 * Perform extra freeing checks:
1134 * - detect double free
1135 * - detect bad pointers.
1136 * Called with the cache-lock held.
1137 */
1139 #if DEBUG
1140 static int xmem_extra_free_checks (xmem_cache_t * cachep,
1141 slab_t *slabp, void * objp)
1143 int i;
1144 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1146 if (objnr >= cachep->num)
1147 BUG();
1148 if (objp != slabp->s_mem + objnr*cachep->objsize)
1149 BUG();
1151 /* Check slab's freelist to see if this obj is there. */
1152 for (i = slabp->free; i != BUFCTL_END; i = slab_bufctl(slabp)[i]) {
1153 if (i == objnr)
1154 BUG();
1156 return 0;
1158 #endif
1160 static inline void * xmem_cache_alloc_one_tail (xmem_cache_t *cachep,
1161 slab_t *slabp)
1163 void *objp;
1165 STATS_INC_ALLOCED(cachep);
1166 STATS_INC_ACTIVE(cachep);
1167 STATS_SET_HIGH(cachep);
1169 /* get obj pointer */
1170 slabp->inuse++;
1171 objp = slabp->s_mem + slabp->free*cachep->objsize;
1172 slabp->free=slab_bufctl(slabp)[slabp->free];
1174 if (unlikely(slabp->free == BUFCTL_END)) {
1175 list_del(&slabp->list);
1176 list_add(&slabp->list, &cachep->slabs_full);
1178 #if DEBUG
1179 if (cachep->flags & SLAB_POISON)
1180 if (xmem_check_poison_obj(cachep, objp))
1181 BUG();
1182 if (cachep->flags & SLAB_RED_ZONE) {
1183 /* Set alloc red-zone, and check old one. */
1184 if (xchg((unsigned long *)objp, RED_MAGIC2) !=
1185 RED_MAGIC1)
1186 BUG();
1187 if (xchg((unsigned long *)(objp+cachep->objsize -
1188 BYTES_PER_WORD), RED_MAGIC2) != RED_MAGIC1)
1189 BUG();
1190 objp += BYTES_PER_WORD;
1192 #endif
1193 return objp;
1196 /*
1197 * Returns a ptr to an obj in the given cache.
1198 * caller must guarantee synchronization
1199 * #define for the goto optimization 8-)
1200 */
1201 #define xmem_cache_alloc_one(cachep) \
1202 ({ \
1203 struct list_head * slabs_partial, * entry; \
1204 slab_t *slabp; \
1206 slabs_partial = &(cachep)->slabs_partial; \
1207 entry = slabs_partial->next; \
1208 if (unlikely(entry == slabs_partial)) { \
1209 struct list_head * slabs_free; \
1210 slabs_free = &(cachep)->slabs_free; \
1211 entry = slabs_free->next; \
1212 if (unlikely(entry == slabs_free)) \
1213 goto alloc_new_slab; \
1214 list_del(entry); \
1215 list_add(entry, slabs_partial); \
1216 } \
1218 slabp = list_entry(entry, slab_t, list); \
1219 xmem_cache_alloc_one_tail(cachep, slabp); \
1220 })
1222 #ifdef CONFIG_SMP
1223 void* xmem_cache_alloc_batch(xmem_cache_t* cachep)
1225 int batchcount = cachep->batchcount;
1226 cpucache_t* cc = cc_data(cachep);
1228 spin_lock(&cachep->spinlock);
1229 while (batchcount--) {
1230 struct list_head * slabs_partial, * entry;
1231 slab_t *slabp;
1232 /* Get slab alloc is to come from. */
1233 slabs_partial = &(cachep)->slabs_partial;
1234 entry = slabs_partial->next;
1235 if (unlikely(entry == slabs_partial)) {
1236 struct list_head * slabs_free;
1237 slabs_free = &(cachep)->slabs_free;
1238 entry = slabs_free->next;
1239 if (unlikely(entry == slabs_free))
1240 break;
1241 list_del(entry);
1242 list_add(entry, slabs_partial);
1245 slabp = list_entry(entry, slab_t, list);
1246 cc_entry(cc)[cc->avail++] =
1247 xmem_cache_alloc_one_tail(cachep, slabp);
1249 spin_unlock(&cachep->spinlock);
1251 if (cc->avail)
1252 return cc_entry(cc)[--cc->avail];
1253 return NULL;
1255 #endif
1257 static inline void *__xmem_cache_alloc(xmem_cache_t *cachep)
1259 unsigned long flags;
1260 void* objp;
1262 try_again:
1263 local_irq_save(flags);
1264 #ifdef CONFIG_SMP
1266 cpucache_t *cc = cc_data(cachep);
1268 if (cc) {
1269 if (cc->avail) {
1270 STATS_INC_ALLOCHIT(cachep);
1271 objp = cc_entry(cc)[--cc->avail];
1272 } else {
1273 STATS_INC_ALLOCMISS(cachep);
1274 objp = xmem_cache_alloc_batch(cachep);
1275 if (!objp)
1276 goto alloc_new_slab_nolock;
1278 } else {
1279 spin_lock(&cachep->spinlock);
1280 objp = xmem_cache_alloc_one(cachep);
1281 spin_unlock(&cachep->spinlock);
1284 #else
1285 objp = xmem_cache_alloc_one(cachep);
1286 #endif
1287 local_irq_restore(flags);
1288 return objp;
1289 alloc_new_slab:
1290 #ifdef CONFIG_SMP
1291 spin_unlock(&cachep->spinlock);
1292 alloc_new_slab_nolock:
1293 #endif
1294 local_irq_restore(flags);
1295 if (xmem_cache_grow(cachep))
1296 /* Someone may have stolen our objs. Doesn't matter, we'll
1297 * just come back here again.
1298 */
1299 goto try_again;
1300 return NULL;
1303 /*
1304 * Release an obj back to its cache. If the obj has a constructed
1305 * state, it should be in this state _before_ it is released.
1306 * - caller is responsible for the synchronization
1307 */
1309 #if DEBUG
1310 # define CHECK_NR(pg) \
1311 do { \
1312 if (!VALID_PAGE(pg)) { \
1313 printk(KERN_ERR "xfree: out of range ptr %lxh.\n", \
1314 (unsigned long)objp); \
1315 BUG(); \
1316 } \
1317 } while (0)
1318 # define CHECK_PAGE(page) \
1319 do { \
1320 CHECK_NR(page); \
1321 if (!PageSlab(page)) { \
1322 printk(KERN_ERR "xfree: bad ptr %lxh.\n", \
1323 (unsigned long)objp); \
1324 BUG(); \
1325 } \
1326 } while (0)
1328 #else
1329 # define CHECK_PAGE(pg) do { } while (0)
1330 #endif
1332 static inline void xmem_cache_free_one(xmem_cache_t *cachep, void *objp)
1334 slab_t* slabp;
1336 CHECK_PAGE(virt_to_page(objp));
1337 /* reduces memory footprint
1339 if (OPTIMIZE(cachep))
1340 slabp = (void*)((unsigned long)objp&(~(PAGE_SIZE-1)));
1341 else
1342 */
1343 slabp = GET_PAGE_SLAB(virt_to_page(objp));
1345 #if DEBUG
1346 if (cachep->flags & SLAB_DEBUG_INITIAL)
1347 /* Need to call the slab's constructor so the
1348 * caller can perform a verify of its state (debugging).
1349 * Called without the cache-lock held.
1350 */
1351 cachep->ctor(objp, cachep, SLAB_CTOR_CONSTRUCTOR|SLAB_CTOR_VERIFY);
1353 if (cachep->flags & SLAB_RED_ZONE) {
1354 objp -= BYTES_PER_WORD;
1355 if (xchg((unsigned long *)objp, RED_MAGIC1) != RED_MAGIC2)
1356 /* Either write before start, or a double free. */
1357 BUG();
1358 if (xchg((unsigned long *)(objp+cachep->objsize -
1359 BYTES_PER_WORD), RED_MAGIC1) != RED_MAGIC2)
1360 /* Either write past end, or a double free. */
1361 BUG();
1363 if (cachep->flags & SLAB_POISON)
1364 xmem_poison_obj(cachep, objp);
1365 if (xmem_extra_free_checks(cachep, slabp, objp))
1366 return;
1367 #endif
1369 unsigned int objnr = (objp-slabp->s_mem)/cachep->objsize;
1371 slab_bufctl(slabp)[objnr] = slabp->free;
1372 slabp->free = objnr;
1374 STATS_DEC_ACTIVE(cachep);
1376 /* fixup slab chains */
1378 int inuse = slabp->inuse;
1379 if (unlikely(!--slabp->inuse)) {
1380 /* Was partial or full, now empty. */
1381 list_del(&slabp->list);
1382 list_add(&slabp->list, &cachep->slabs_free);
1383 } else if (unlikely(inuse == cachep->num)) {
1384 /* Was full. */
1385 list_del(&slabp->list);
1386 list_add(&slabp->list, &cachep->slabs_partial);
1391 #ifdef CONFIG_SMP
1392 static inline void __free_block (xmem_cache_t* cachep,
1393 void** objpp, int len)
1395 for ( ; len > 0; len--, objpp++)
1396 xmem_cache_free_one(cachep, *objpp);
1399 static void free_block (xmem_cache_t* cachep, void** objpp, int len)
1401 spin_lock(&cachep->spinlock);
1402 __free_block(cachep, objpp, len);
1403 spin_unlock(&cachep->spinlock);
1405 #endif
1407 /*
1408 * __xmem_cache_free
1409 * called with disabled ints
1410 */
1411 static inline void __xmem_cache_free (xmem_cache_t *cachep, void* objp)
1413 #ifdef CONFIG_SMP
1414 cpucache_t *cc = cc_data(cachep);
1416 CHECK_PAGE(virt_to_page(objp));
1417 if (cc) {
1418 int batchcount;
1419 if (cc->avail < cc->limit) {
1420 STATS_INC_FREEHIT(cachep);
1421 cc_entry(cc)[cc->avail++] = objp;
1422 return;
1424 STATS_INC_FREEMISS(cachep);
1425 batchcount = cachep->batchcount;
1426 cc->avail -= batchcount;
1427 free_block(cachep,
1428 &cc_entry(cc)[cc->avail],batchcount);
1429 cc_entry(cc)[cc->avail++] = objp;
1430 return;
1431 } else {
1432 free_block(cachep, &objp, 1);
1434 #else
1435 xmem_cache_free_one(cachep, objp);
1436 #endif
1439 /**
1440 * xmem_cache_alloc - Allocate an object
1441 * @cachep: The cache to allocate from.
1443 * Allocate an object from this cache. The flags are only relevant
1444 * if the cache has no available objects.
1445 */
1446 void *xmem_cache_alloc(xmem_cache_t *cachep)
1448 return __xmem_cache_alloc(cachep);
1451 /**
1452 * _xmalloc - allocate memory
1453 * @size: how many bytes of memory are required.
1454 */
1455 void *_xmalloc(size_t size)
1457 cache_sizes_t *csizep = cache_sizes;
1459 for (; csizep->cs_size; csizep++) {
1460 if (size > csizep->cs_size)
1461 continue;
1462 return __xmem_cache_alloc(csizep->cs_cachep);
1464 return NULL;
1467 /**
1468 * xmem_cache_free - Deallocate an object
1469 * @cachep: The cache the allocation was from.
1470 * @objp: The previously allocated object.
1472 * Free an object which was previously allocated from this
1473 * cache.
1474 */
1475 void xmem_cache_free (xmem_cache_t *cachep, void *objp)
1477 unsigned long flags;
1478 #if DEBUG
1479 CHECK_PAGE(virt_to_page(objp));
1480 if (cachep != GET_PAGE_CACHE(virt_to_page(objp)))
1481 BUG();
1482 #endif
1484 local_irq_save(flags);
1485 __xmem_cache_free(cachep, objp);
1486 local_irq_restore(flags);
1489 /**
1490 * xfree - free previously allocated memory
1491 * @objp: pointer returned by xmalloc.
1493 * Don't free memory not originally allocated by xmalloc()
1494 * or you will run into trouble.
1495 */
1496 void xfree (const void *objp)
1498 xmem_cache_t *c;
1499 unsigned long flags;
1501 if (!objp)
1502 return;
1503 local_irq_save(flags);
1504 CHECK_PAGE(virt_to_page(objp));
1505 c = GET_PAGE_CACHE(virt_to_page(objp));
1506 __xmem_cache_free(c, (void*)objp);
1507 local_irq_restore(flags);
1510 xmem_cache_t *xmem_find_general_cachep(size_t size)
1512 cache_sizes_t *csizep = cache_sizes;
1514 /* This function could be moved to the header file, and
1515 * made inline so consumers can quickly determine what
1516 * cache pointer they require.
1517 */
1518 for ( ; csizep->cs_size; csizep++) {
1519 if (size > csizep->cs_size)
1520 continue;
1521 break;
1523 return csizep->cs_cachep;
1526 #ifdef CONFIG_SMP
1528 /* called with cache_chain_sem acquired. */
1529 static int xmem_tune_cpucache (xmem_cache_t* cachep, int limit, int batchcount)
1531 ccupdate_struct_t new;
1532 int i;
1534 /*
1535 * These are admin-provided, so we are more graceful.
1536 */
1537 if (limit < 0)
1538 return -EINVAL;
1539 if (batchcount < 0)
1540 return -EINVAL;
1541 if (batchcount > limit)
1542 return -EINVAL;
1543 if (limit != 0 && !batchcount)
1544 return -EINVAL;
1546 memset(&new.new,0,sizeof(new.new));
1547 if (limit) {
1548 for (i = 0; i< smp_num_cpus; i++) {
1549 cpucache_t* ccnew;
1551 ccnew = _xmalloc(sizeof(void*)*limit+sizeof(cpucache_t));
1552 if (!ccnew)
1553 goto oom;
1554 ccnew->limit = limit;
1555 ccnew->avail = 0;
1556 new.new[cpu_logical_map(i)] = ccnew;
1559 new.cachep = cachep;
1560 spin_lock_irq(&cachep->spinlock);
1561 cachep->batchcount = batchcount;
1562 spin_unlock_irq(&cachep->spinlock);
1564 smp_call_function_all_cpus(do_ccupdate_local, (void *)&new);
1566 for (i = 0; i < smp_num_cpus; i++) {
1567 cpucache_t* ccold = new.new[cpu_logical_map(i)];
1568 if (!ccold)
1569 continue;
1570 local_irq_disable();
1571 free_block(cachep, cc_entry(ccold), ccold->avail);
1572 local_irq_enable();
1573 xfree(ccold);
1575 return 0;
1576 oom:
1577 for (i--; i >= 0; i--)
1578 xfree(new.new[cpu_logical_map(i)]);
1579 return -ENOMEM;
1582 static void enable_cpucache (xmem_cache_t *cachep)
1584 int err;
1585 int limit;
1587 /* FIXME: optimize */
1588 if (cachep->objsize > PAGE_SIZE)
1589 return;
1590 if (cachep->objsize > 1024)
1591 limit = 60;
1592 else if (cachep->objsize > 256)
1593 limit = 124;
1594 else
1595 limit = 252;
1597 err = xmem_tune_cpucache(cachep, limit, limit/2);
1598 if (err)
1599 printk(KERN_ERR "enable_cpucache failed for %s, error %d.\n",
1600 cachep->name, -err);
1603 static void enable_all_cpucaches (void)
1605 struct list_head* p;
1606 unsigned long spin_flags;
1608 down(&cache_chain_sem);
1610 p = &cache_cache.next;
1611 do {
1612 xmem_cache_t* cachep = list_entry(p, xmem_cache_t, next);
1614 enable_cpucache(cachep);
1615 p = cachep->next.next;
1616 } while (p != &cache_cache.next);
1618 up(&cache_chain_sem);
1620 #endif
1622 /**
1623 * xmem_cache_reap - Reclaim memory from caches.
1624 */
1625 int xmem_cache_reap(void)
1627 slab_t *slabp;
1628 xmem_cache_t *searchp;
1629 xmem_cache_t *best_cachep;
1630 unsigned int best_pages;
1631 unsigned int best_len;
1632 unsigned int scan;
1633 int ret = 0;
1634 unsigned long spin_flags;
1636 down(&cache_chain_sem);
1638 scan = REAP_SCANLEN;
1639 best_len = 0;
1640 best_pages = 0;
1641 best_cachep = NULL;
1642 searchp = clock_searchp;
1643 do {
1644 unsigned int pages;
1645 struct list_head* p;
1646 unsigned int full_free;
1648 /* It's safe to test this without holding the cache-lock. */
1649 if (searchp->flags & SLAB_NO_REAP)
1650 goto next;
1651 spin_lock_irq(&searchp->spinlock);
1652 if (searchp->growing)
1653 goto next_unlock;
1654 if (searchp->dflags & DFLGS_GROWN) {
1655 searchp->dflags &= ~DFLGS_GROWN;
1656 goto next_unlock;
1658 #ifdef CONFIG_SMP
1660 cpucache_t *cc = cc_data(searchp);
1661 if (cc && cc->avail) {
1662 __free_block(searchp, cc_entry(cc), cc->avail);
1663 cc->avail = 0;
1666 #endif
1668 full_free = 0;
1669 p = searchp->slabs_free.next;
1670 while (p != &searchp->slabs_free) {
1671 slabp = list_entry(p, slab_t, list);
1672 #if DEBUG
1673 if (slabp->inuse)
1674 BUG();
1675 #endif
1676 full_free++;
1677 p = p->next;
1680 /*
1681 * Try to avoid slabs with constructors and/or
1682 * more than one page per slab (as it can be difficult
1683 * to get high orders from gfp()).
1684 */
1685 pages = full_free * (1<<searchp->gfporder);
1686 if (searchp->ctor)
1687 pages = (pages*4+1)/5;
1688 if (searchp->gfporder)
1689 pages = (pages*4+1)/5;
1690 if (pages > best_pages) {
1691 best_cachep = searchp;
1692 best_len = full_free;
1693 best_pages = pages;
1694 if (pages >= REAP_PERFECT) {
1695 clock_searchp = list_entry(searchp->next.next,
1696 xmem_cache_t,next);
1697 goto perfect;
1700 next_unlock:
1701 spin_unlock_irq(&searchp->spinlock);
1702 next:
1703 searchp = list_entry(searchp->next.next,xmem_cache_t,next);
1704 } while (--scan && searchp != clock_searchp);
1706 clock_searchp = searchp;
1708 if (!best_cachep)
1709 /* couldn't find anything to reap */
1710 goto out;
1712 spin_lock_irq(&best_cachep->spinlock);
1713 perfect:
1714 /* free only 50% of the free slabs */
1715 best_len = (best_len + 1)/2;
1716 for (scan = 0; scan < best_len; scan++) {
1717 struct list_head *p;
1719 if (best_cachep->growing)
1720 break;
1721 p = best_cachep->slabs_free.prev;
1722 if (p == &best_cachep->slabs_free)
1723 break;
1724 slabp = list_entry(p,slab_t,list);
1725 #if DEBUG
1726 if (slabp->inuse)
1727 BUG();
1728 #endif
1729 list_del(&slabp->list);
1730 STATS_INC_REAPED(best_cachep);
1732 /* Safe to drop the lock. The slab is no longer linked to the
1733 * cache.
1734 */
1735 spin_unlock_irq(&best_cachep->spinlock);
1736 xmem_slab_destroy(best_cachep, slabp);
1737 spin_lock_irq(&best_cachep->spinlock);
1739 spin_unlock_irq(&best_cachep->spinlock);
1740 ret = scan * (1 << best_cachep->gfporder);
1741 out:
1742 up(&cache_chain_sem);
1743 return ret;
1746 void dump_slabinfo()
1748 struct list_head *p;
1749 unsigned long spin_flags;
1751 /* Output format version, so at least we can change it without _too_
1752 * many complaints.
1753 */
1754 printk( "slabinfo - version: 1.1"
1755 #if STATS
1756 " (statistics)"
1757 #endif
1758 #ifdef CONFIG_SMP
1759 " (SMP)"
1760 #endif
1761 "\n");
1762 down(&cache_chain_sem);
1763 p = &cache_cache.next;
1764 do {
1765 xmem_cache_t *cachep;
1766 slab_t *slabp;
1767 unsigned long active_objs;
1768 unsigned long num_objs;
1769 unsigned long active_slabs = 0;
1770 unsigned long num_slabs;
1771 cachep = list_entry(p, xmem_cache_t, next);
1773 spin_lock_irq(&cachep->spinlock);
1774 active_objs = 0;
1775 num_slabs = 0;
1776 list_for_each_entry(slabp, &cachep->slabs_full, list) {
1777 if (slabp->inuse != cachep->num)
1778 BUG();
1779 active_objs += cachep->num;
1780 active_slabs++;
1782 list_for_each_entry(slabp, &cachep->slabs_partial, list) {
1783 if (slabp->inuse == cachep->num || !slabp->inuse)
1784 BUG();
1785 active_objs += slabp->inuse;
1786 active_slabs++;
1788 list_for_each_entry(slabp, &cachep->slabs_free, list) {
1789 if (slabp->inuse)
1790 BUG();
1791 num_slabs++;
1793 num_slabs+=active_slabs;
1794 num_objs = num_slabs*cachep->num;
1796 printk("%-17s %6lu %6lu %6u %4lu %4lu %4u",
1797 cachep->name, active_objs, num_objs, cachep->objsize,
1798 active_slabs, num_slabs, (1<<cachep->gfporder));
1800 #if STATS
1802 unsigned long errors = cachep->errors;
1803 unsigned long high = cachep->high_mark;
1804 unsigned long grown = cachep->grown;
1805 unsigned long reaped = cachep->reaped;
1806 unsigned long allocs = cachep->num_allocations;
1808 printk(" : %6lu %7lu %5lu %4lu %4lu",
1809 high, allocs, grown, reaped, errors);
1811 #endif
1812 #ifdef CONFIG_SMP
1814 unsigned int batchcount = cachep->batchcount;
1815 unsigned int limit;
1817 if (cc_data(cachep))
1818 limit = cc_data(cachep)->limit;
1819 else
1820 limit = 0;
1821 printk(" : %4u %4u",
1822 limit, batchcount);
1824 #endif
1825 #if STATS && defined(CONFIG_SMP)
1827 unsigned long allochit = atomic_read(&cachep->allochit);
1828 unsigned long allocmiss = atomic_read(&cachep->allocmiss);
1829 unsigned long freehit = atomic_read(&cachep->freehit);
1830 unsigned long freemiss = atomic_read(&cachep->freemiss);
1831 printk(" : %6lu %6lu %6lu %6lu",
1832 allochit, allocmiss, freehit, freemiss);
1834 #endif
1835 printk("\n");
1836 spin_unlock_irq(&cachep->spinlock);
1838 p = cachep->next.next;
1839 } while (p != &cache_cache.next);
1841 up(&cache_chain_sem);
1843 return;