rev |
line source |
iap10@274
|
1 /******************************************************************************
|
iap10@274
|
2 * page_alloc.c
|
iap10@274
|
3 *
|
kaf24@1249
|
4 * Simple buddy heap allocator for Xen.
|
iap10@274
|
5 *
|
kaf24@1971
|
6 * Copyright (c) 2002-2004 K A Fraser
|
kfraser@11932
|
7 * Copyright (c) 2006 IBM Ryan Harper <ryanh@us.ibm.com>
|
kaf24@804
|
8 *
|
kaf24@804
|
9 * This program is free software; you can redistribute it and/or modify
|
kaf24@804
|
10 * it under the terms of the GNU General Public License as published by
|
kaf24@804
|
11 * the Free Software Foundation; either version 2 of the License, or
|
kaf24@804
|
12 * (at your option) any later version.
|
kaf24@804
|
13 *
|
kaf24@804
|
14 * This program is distributed in the hope that it will be useful,
|
kaf24@804
|
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
kaf24@804
|
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
kaf24@804
|
17 * GNU General Public License for more details.
|
kaf24@804
|
18 *
|
kaf24@804
|
19 * You should have received a copy of the GNU General Public License
|
kaf24@804
|
20 * along with this program; if not, write to the Free Software
|
kaf24@804
|
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
iap10@274
|
22 */
|
iap10@274
|
23
|
kaf24@1248
|
24 #include <xen/config.h>
|
kaf24@1248
|
25 #include <xen/init.h>
|
kaf24@1248
|
26 #include <xen/types.h>
|
kaf24@1248
|
27 #include <xen/lib.h>
|
kaf24@4254
|
28 #include <xen/sched.h>
|
kaf24@1248
|
29 #include <xen/spinlock.h>
|
kaf24@4877
|
30 #include <xen/mm.h>
|
kaf24@1979
|
31 #include <xen/irq.h>
|
kaf24@4305
|
32 #include <xen/softirq.h>
|
kaf24@5394
|
33 #include <xen/domain_page.h>
|
kaf24@9051
|
34 #include <xen/keyhandler.h>
|
awilliam@11068
|
35 #include <xen/perfc.h>
|
kfraser@11958
|
36 #include <xen/numa.h>
|
kfraser@11958
|
37 #include <xen/nodemask.h>
|
keir@19684
|
38 #include <xen/tmem.h>
|
keir@22502
|
39 #include <xen/tmem_xen.h>
|
keir@19324
|
40 #include <public/sysctl.h>
|
kaf24@4231
|
41 #include <asm/page.h>
|
keir@17421
|
42 #include <asm/numa.h>
|
Tim@15666
|
43 #include <asm/flushtlb.h>
|
iap10@274
|
44
|
kaf24@3372
|
45 /*
|
kaf24@3372
|
46 * Comma-separated list of hexadecimal page numbers containing bad bytes.
|
kaf24@3372
|
47 * e.g. 'badpage=0x3f45,0x8a321'.
|
kaf24@3372
|
48 */
|
keir@20173
|
49 static char __initdata opt_badpage[100] = "";
|
kaf24@3372
|
50 string_param("badpage", opt_badpage);
|
iap10@274
|
51
|
kaf24@9541
|
52 /*
|
kfraser@15549
|
53 * no-bootscrub -> Free pages are not zeroed during boot.
|
kfraser@15549
|
54 */
|
keir@22676
|
55 static bool_t opt_bootscrub __initdata = 1;
|
kfraser@15549
|
56 boolean_param("bootscrub", opt_bootscrub);
|
kfraser@15549
|
57
|
kfraser@15549
|
58 /*
|
keir@18195
|
59 * Bit width of the DMA heap -- used to override NUMA-node-first.
|
keir@18195
|
60 * allocation strategy, which can otherwise exhaust low memory.
|
kfraser@12638
|
61 */
|
keir@18195
|
62 static unsigned int dma_bitsize;
|
keir@18195
|
63 integer_param("dma_bits", dma_bitsize);
|
kaf24@9541
|
64
|
kaf24@3392
|
65 #define round_pgdown(_p) ((_p)&PAGE_MASK)
|
kaf24@3392
|
66 #define round_pgup(_p) (((_p)+(PAGE_SIZE-1))&PAGE_MASK)
|
kaf24@3392
|
67
|
keir@19381
|
68 /* Offlined page list, protected by heap_lock. */
|
keir@19324
|
69 PAGE_LIST_HEAD(page_offlined_list);
|
keir@19381
|
70 /* Broken page list, protected by heap_lock. */
|
keir@19381
|
71 PAGE_LIST_HEAD(page_broken_list);
|
keir@19324
|
72
|
iap10@274
|
73 /*************************
|
kaf24@3392
|
74 * BOOT-TIME ALLOCATOR
|
kaf24@3392
|
75 */
|
kaf24@3392
|
76
|
keir@19952
|
77 static unsigned long __initdata first_valid_mfn = ~0UL;
|
keir@19952
|
78
|
keir@19952
|
79 static struct bootmem_region {
|
keir@19952
|
80 unsigned long s, e; /* MFNs @s through @e-1 inclusive are free */
|
keir@19952
|
81 } *__initdata bootmem_region_list;
|
keir@19952
|
82 static unsigned int __initdata nr_bootmem_regions;
|
keir@19952
|
83
|
keir@19952
|
84 static void __init boot_bug(int line)
|
keir@19952
|
85 {
|
keir@19952
|
86 panic("Boot BUG at %s:%d\n", __FILE__, line);
|
keir@19952
|
87 }
|
keir@19952
|
88 #define BOOT_BUG_ON(p) if ( p ) boot_bug(__LINE__);
|
kfraser@13058
|
89
|
keir@19952
|
90 static void __init bootmem_region_add(unsigned long s, unsigned long e)
|
kaf24@3392
|
91 {
|
keir@19952
|
92 unsigned int i;
|
keir@19952
|
93
|
keir@19952
|
94 if ( (bootmem_region_list == NULL) && (s < e) )
|
keir@19952
|
95 bootmem_region_list = mfn_to_virt(s++);
|
keir@19952
|
96
|
keir@19952
|
97 if ( s >= e )
|
keir@19952
|
98 return;
|
kaf24@6108
|
99
|
keir@19952
|
100 for ( i = 0; i < nr_bootmem_regions; i++ )
|
keir@19952
|
101 if ( s < bootmem_region_list[i].e )
|
keir@19952
|
102 break;
|
keir@19952
|
103
|
keir@19952
|
104 BOOT_BUG_ON((i < nr_bootmem_regions) && (e > bootmem_region_list[i].s));
|
keir@19952
|
105 BOOT_BUG_ON(nr_bootmem_regions ==
|
keir@19952
|
106 (PAGE_SIZE / sizeof(struct bootmem_region)));
|
kaf24@3392
|
107
|
keir@19952
|
108 memmove(&bootmem_region_list[i+1], &bootmem_region_list[i],
|
keir@19952
|
109 (nr_bootmem_regions - i) * sizeof(*bootmem_region_list));
|
keir@19952
|
110 bootmem_region_list[i] = (struct bootmem_region) { s, e };
|
keir@19952
|
111 nr_bootmem_regions++;
|
keir@19952
|
112 }
|
keir@19952
|
113
|
keir@19952
|
114 static void __init bootmem_region_zap(unsigned long s, unsigned long e)
|
keir@19952
|
115 {
|
keir@19952
|
116 unsigned int i;
|
kaf24@3392
|
117
|
keir@19952
|
118 for ( i = 0; i < nr_bootmem_regions; i++ )
|
keir@19952
|
119 {
|
keir@19952
|
120 struct bootmem_region *r = &bootmem_region_list[i];
|
keir@19952
|
121 if ( e <= r->s )
|
keir@19952
|
122 break;
|
keir@19952
|
123 if ( s >= r->e )
|
keir@19952
|
124 continue;
|
keir@19952
|
125 if ( s <= r->s )
|
keir@19952
|
126 {
|
keir@19952
|
127 r->s = min(e, r->e);
|
keir@19952
|
128 }
|
keir@19952
|
129 else if ( e >= r->e )
|
keir@19952
|
130 {
|
keir@19952
|
131 r->e = s;
|
keir@19952
|
132 }
|
keir@19952
|
133 else
|
keir@19952
|
134 {
|
keir@19952
|
135 unsigned long _e = r->e;
|
keir@19952
|
136 r->e = s;
|
keir@19952
|
137 bootmem_region_add(e, _e);
|
keir@19952
|
138 }
|
keir@19952
|
139 }
|
kaf24@3392
|
140 }
|
kaf24@3392
|
141
|
keir@15081
|
142 void __init init_boot_pages(paddr_t ps, paddr_t pe)
|
kaf24@3392
|
143 {
|
keir@19952
|
144 unsigned long bad_spfn, bad_epfn;
|
kfraser@13158
|
145 const char *p;
|
kaf24@3392
|
146
|
kaf24@3392
|
147 ps = round_pgup(ps);
|
kaf24@3392
|
148 pe = round_pgdown(pe);
|
kaf24@5036
|
149 if ( pe <= ps )
|
kaf24@5036
|
150 return;
|
kaf24@3392
|
151
|
kfraser@13058
|
152 first_valid_mfn = min_t(unsigned long, ps >> PAGE_SHIFT, first_valid_mfn);
|
kfraser@13058
|
153
|
keir@19952
|
154 bootmem_region_add(ps >> PAGE_SHIFT, pe >> PAGE_SHIFT);
|
kaf24@3392
|
155
|
kaf24@3392
|
156 /* Check new pages against the bad-page list. */
|
kaf24@3392
|
157 p = opt_badpage;
|
kaf24@3392
|
158 while ( *p != '\0' )
|
kaf24@3392
|
159 {
|
kaf24@9576
|
160 bad_spfn = simple_strtoul(p, &p, 0);
|
kaf24@9576
|
161 bad_epfn = bad_spfn;
|
kaf24@9576
|
162
|
kaf24@9576
|
163 if ( *p == '-' )
|
kaf24@9576
|
164 {
|
kaf24@9576
|
165 p++;
|
kaf24@9576
|
166 bad_epfn = simple_strtoul(p, &p, 0);
|
kaf24@9576
|
167 if ( bad_epfn < bad_spfn )
|
kaf24@9576
|
168 bad_epfn = bad_spfn;
|
kaf24@9576
|
169 }
|
kaf24@3392
|
170
|
kaf24@3392
|
171 if ( *p == ',' )
|
kaf24@3392
|
172 p++;
|
kaf24@3392
|
173 else if ( *p != '\0' )
|
kaf24@3392
|
174 break;
|
kaf24@3392
|
175
|
kaf24@9576
|
176 if ( bad_epfn == bad_spfn )
|
kaf24@9576
|
177 printk("Marking page %lx as bad\n", bad_spfn);
|
kaf24@9576
|
178 else
|
kaf24@9576
|
179 printk("Marking pages %lx through %lx as bad\n",
|
kaf24@9576
|
180 bad_spfn, bad_epfn);
|
kaf24@9576
|
181
|
keir@19952
|
182 bootmem_region_zap(bad_spfn, bad_epfn+1);
|
kaf24@3392
|
183 }
|
kaf24@3392
|
184 }
|
kaf24@3392
|
185
|
keir@15081
|
186 unsigned long __init alloc_boot_pages(
|
kfraser@14088
|
187 unsigned long nr_pfns, unsigned long pfn_align)
|
kaf24@3392
|
188 {
|
keir@19952
|
189 unsigned long pg, _e;
|
keir@19952
|
190 int i;
|
kaf24@3392
|
191
|
keir@19952
|
192 for ( i = nr_bootmem_regions - 1; i >= 0; i-- )
|
kaf24@3392
|
193 {
|
keir@19952
|
194 struct bootmem_region *r = &bootmem_region_list[i];
|
keir@19952
|
195 pg = (r->e - nr_pfns) & ~(pfn_align - 1);
|
keir@19952
|
196 if ( pg < r->s )
|
keir@19952
|
197 continue;
|
keir@19952
|
198 _e = r->e;
|
keir@19952
|
199 r->e = pg;
|
keir@19952
|
200 bootmem_region_add(pg + nr_pfns, _e);
|
keir@19952
|
201 return pg;
|
kaf24@3392
|
202 }
|
kaf24@3392
|
203
|
keir@19952
|
204 BOOT_BUG_ON(1);
|
kfraser@14088
|
205 return 0;
|
kaf24@3392
|
206 }
|
kaf24@3392
|
207
|
kaf24@3392
|
208
|
kaf24@3392
|
209
|
kaf24@3392
|
210 /*************************
|
iap10@274
|
211 * BINARY BUDDY ALLOCATOR
|
iap10@274
|
212 */
|
iap10@274
|
213
|
kaf24@1974
|
214 #define MEMZONE_XEN 0
|
kfraser@14130
|
215 #define NR_ZONES (PADDR_BITS - PAGE_SHIFT)
|
kaf24@5951
|
216
|
keir@19099
|
217 #define bits_to_zone(b) (((b) < (PAGE_SHIFT + 1)) ? 0 : ((b) - PAGE_SHIFT - 1))
|
keir@19099
|
218 #define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN : \
|
keir@19099
|
219 (fls(page_to_mfn(pg)) - 1))
|
kaf24@1974
|
220
|
keir@19170
|
221 typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
|
keir@14134
|
222 static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
|
keir@14134
|
223 #define heap(node, zone, order) ((*_heap[node])[zone][order])
|
kaf24@1974
|
224
|
keir@14134
|
225 static unsigned long *avail[MAX_NUMNODES];
|
keir@20641
|
226 static long total_avail_pages;
|
iap10@274
|
227
|
keir@20991
|
228 /* TMEM: Reserve a fraction of memory for mid-size (0<order<9) allocations.*/
|
keir@20991
|
229 static long midsize_alloc_zone_pages;
|
keir@20991
|
230 #define MIDSIZE_ALLOC_FRAC 128
|
keir@20991
|
231
|
kaf24@10288
|
232 static DEFINE_SPINLOCK(heap_lock);
|
iap10@274
|
233
|
keir@19179
|
234 static unsigned long init_node_heap(int node, unsigned long mfn,
|
keir@21080
|
235 unsigned long nr, bool_t *use_tail)
|
keir@14134
|
236 {
|
kfraser@15214
|
237 /* First node to be discovered has its heap metadata statically alloced. */
|
kfraser@15214
|
238 static heap_by_zone_and_order_t _heap_static;
|
kfraser@15214
|
239 static unsigned long avail_static[NR_ZONES];
|
keir@16614
|
240 static int first_node_initialised;
|
keir@19179
|
241 unsigned long needed = (sizeof(**_heap) +
|
keir@19179
|
242 sizeof(**avail) * NR_ZONES +
|
keir@19179
|
243 PAGE_SIZE - 1) >> PAGE_SHIFT;
|
keir@14134
|
244 int i, j;
|
kfraser@15214
|
245
|
keir@16614
|
246 if ( !first_node_initialised )
|
kfraser@15214
|
247 {
|
kfraser@15214
|
248 _heap[node] = &_heap_static;
|
kfraser@15214
|
249 avail[node] = avail_static;
|
keir@16614
|
250 first_node_initialised = 1;
|
keir@19179
|
251 needed = 0;
|
keir@19179
|
252 }
|
keir@19179
|
253 #ifdef DIRECTMAP_VIRT_END
|
keir@21080
|
254 else if ( *use_tail && nr >= needed &&
|
keir@21080
|
255 (mfn + nr) <= (virt_to_mfn(DIRECTMAP_VIRT_END - 1) + 1) )
|
keir@21080
|
256 {
|
keir@21080
|
257 _heap[node] = mfn_to_virt(mfn + nr - needed);
|
keir@21080
|
258 avail[node] = mfn_to_virt(mfn + nr - 1) +
|
keir@21080
|
259 PAGE_SIZE - sizeof(**avail) * NR_ZONES;
|
keir@21080
|
260 }
|
keir@19179
|
261 else if ( nr >= needed &&
|
keir@19452
|
262 (mfn + needed) <= (virt_to_mfn(DIRECTMAP_VIRT_END - 1) + 1) )
|
keir@19179
|
263 {
|
keir@19179
|
264 _heap[node] = mfn_to_virt(mfn);
|
keir@19479
|
265 avail[node] = mfn_to_virt(mfn + needed - 1) +
|
keir@19479
|
266 PAGE_SIZE - sizeof(**avail) * NR_ZONES;
|
keir@21080
|
267 *use_tail = 0;
|
keir@19179
|
268 }
|
keir@19179
|
269 #endif
|
keir@19179
|
270 else if ( get_order_from_bytes(sizeof(**_heap)) ==
|
keir@19179
|
271 get_order_from_pages(needed) )
|
keir@19179
|
272 {
|
keir@19179
|
273 _heap[node] = alloc_xenheap_pages(get_order_from_pages(needed), 0);
|
keir@19179
|
274 BUG_ON(!_heap[node]);
|
keir@19179
|
275 avail[node] = (void *)_heap[node] + (needed << PAGE_SHIFT) -
|
keir@19179
|
276 sizeof(**avail) * NR_ZONES;
|
keir@19179
|
277 needed = 0;
|
kfraser@15214
|
278 }
|
kfraser@15214
|
279 else
|
kfraser@15214
|
280 {
|
kfraser@15214
|
281 _heap[node] = xmalloc(heap_by_zone_and_order_t);
|
kfraser@15214
|
282 avail[node] = xmalloc_array(unsigned long, NR_ZONES);
|
kfraser@15214
|
283 BUG_ON(!_heap[node] || !avail[node]);
|
keir@19179
|
284 needed = 0;
|
kfraser@15214
|
285 }
|
kfraser@15214
|
286
|
kfraser@15214
|
287 memset(avail[node], 0, NR_ZONES * sizeof(long));
|
kfraser@15214
|
288
|
keir@14134
|
289 for ( i = 0; i < NR_ZONES; i++ )
|
keir@14134
|
290 for ( j = 0; j <= MAX_ORDER; j++ )
|
keir@19170
|
291 INIT_PAGE_LIST_HEAD(&(*_heap[node])[i][j]);
|
keir@19179
|
292
|
keir@19179
|
293 return needed;
|
keir@14134
|
294 }
|
keir@14134
|
295
|
kfraser@14126
|
296 /* Allocate 2^@order contiguous pages. */
|
kfraser@14126
|
297 static struct page_info *alloc_heap_pages(
|
kfraser@14316
|
298 unsigned int zone_lo, unsigned int zone_hi,
|
keir@21959
|
299 unsigned int order, unsigned int memflags,
|
keir@21959
|
300 struct domain *d)
|
iap10@274
|
301 {
|
keir@21959
|
302 unsigned int first_node, i, j, zone = 0, nodemask_retry = 0;
|
keir@21959
|
303 unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1);
|
kfraser@14289
|
304 unsigned long request = 1UL << order;
|
kfraser@14322
|
305 cpumask_t extra_cpus_mask, mask;
|
kaf24@8764
|
306 struct page_info *pg;
|
keir@21959
|
307 nodemask_t nodemask = (d != NULL ) ? d->node_affinity : node_online_map;
|
iap10@274
|
308
|
keir@17421
|
309 if ( node == NUMA_NO_NODE )
|
keir@21765
|
310 {
|
keir@21959
|
311 memflags &= ~MEMF_exact_node;
|
keir@21959
|
312 if ( d != NULL )
|
keir@21959
|
313 {
|
keir@21959
|
314 node = next_node(d->last_alloc_node, nodemask);
|
keir@21959
|
315 if ( node >= MAX_NUMNODES )
|
keir@21959
|
316 node = first_node(nodemask);
|
keir@21959
|
317 }
|
keir@21959
|
318 if ( node >= MAX_NUMNODES )
|
keir@21959
|
319 node = cpu_to_node(smp_processor_id());
|
keir@21765
|
320 }
|
keir@21959
|
321 first_node = node;
|
keir@17421
|
322
|
kfraser@11932
|
323 ASSERT(node >= 0);
|
kfraser@14130
|
324 ASSERT(zone_lo <= zone_hi);
|
kfraser@14130
|
325 ASSERT(zone_hi < NR_ZONES);
|
kaf24@3499
|
326
|
kaf24@3499
|
327 if ( unlikely(order > MAX_ORDER) )
|
kaf24@1979
|
328 return NULL;
|
kaf24@1979
|
329
|
kaf24@2844
|
330 spin_lock(&heap_lock);
|
iap10@274
|
331
|
kfraser@14316
|
332 /*
|
keir@21704
|
333 * TMEM: When available memory is scarce due to tmem absorbing it, allow
|
keir@21704
|
334 * only mid-size allocations to avoid worst of fragmentation issues.
|
keir@21704
|
335 * Others try tmem pools then fail. This is a workaround until all
|
keir@21704
|
336 * post-dom0-creation-multi-page allocations can be eliminated.
|
keir@20991
|
337 */
|
keir@20991
|
338 if ( opt_tmem && ((order == 0) || (order >= 9)) &&
|
keir@21704
|
339 (total_avail_pages <= midsize_alloc_zone_pages) &&
|
keir@21704
|
340 tmem_freeable_pages() )
|
keir@21001
|
341 goto try_tmem;
|
keir@20991
|
342
|
keir@20991
|
343 /*
|
kfraser@14316
|
344 * Start with requested node, but exhaust all node memory in requested
|
kfraser@14316
|
345 * zone before failing, only calc new node value if we fail to find memory
|
kfraser@14316
|
346 * in target node, this avoids needless computation on fast-path.
|
kfraser@14316
|
347 */
|
keir@21959
|
348 for ( ; ; )
|
kfraser@11932
|
349 {
|
kfraser@14316
|
350 zone = zone_hi;
|
kfraser@14316
|
351 do {
|
kfraser@14316
|
352 /* Check if target node can support the allocation. */
|
kfraser@14316
|
353 if ( !avail[node] || (avail[node][zone] < request) )
|
kfraser@14316
|
354 continue;
|
kfraser@14316
|
355
|
kfraser@14316
|
356 /* Find smallest order which can satisfy the request. */
|
kfraser@14316
|
357 for ( j = order; j <= MAX_ORDER; j++ )
|
keir@19170
|
358 if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
|
kfraser@14316
|
359 goto found;
|
kfraser@14316
|
360 } while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
|
keir@14134
|
361
|
keir@21959
|
362 if ( memflags & MEMF_exact_node )
|
keir@21765
|
363 goto not_found;
|
keir@21765
|
364
|
keir@21959
|
365 /* Pick next node. */
|
keir@21959
|
366 if ( !node_isset(node, nodemask) )
|
keir@21959
|
367 {
|
keir@21959
|
368 /* Very first node may be caller-specified and outside nodemask. */
|
keir@21959
|
369 ASSERT(!nodemask_retry);
|
keir@21959
|
370 first_node = node = first_node(nodemask);
|
keir@21959
|
371 if ( node < MAX_NUMNODES )
|
keir@21959
|
372 continue;
|
keir@21959
|
373 }
|
keir@21959
|
374 else if ( (node = next_node(node, nodemask)) >= MAX_NUMNODES )
|
keir@21959
|
375 node = first_node(nodemask);
|
keir@21959
|
376 if ( node == first_node )
|
keir@21959
|
377 {
|
keir@21959
|
378 /* When we have tried all in nodemask, we fall back to others. */
|
keir@21959
|
379 if ( nodemask_retry++ )
|
keir@21959
|
380 goto not_found;
|
keir@21959
|
381 nodes_andnot(nodemask, node_online_map, nodemask);
|
keir@21959
|
382 first_node = node = first_node(nodemask);
|
keir@21959
|
383 if ( node >= MAX_NUMNODES )
|
keir@21959
|
384 goto not_found;
|
keir@21959
|
385 }
|
kfraser@11932
|
386 }
|
iap10@274
|
387
|
keir@21001
|
388 try_tmem:
|
keir@19684
|
389 /* Try to free memory from tmem */
|
keir@21959
|
390 if ( (pg = tmem_relinquish_pages(order, memflags)) != NULL )
|
keir@19684
|
391 {
|
keir@19684
|
392 /* reassigning an already allocated anonymous heap page */
|
keir@19684
|
393 spin_unlock(&heap_lock);
|
keir@19684
|
394 return pg;
|
keir@19684
|
395 }
|
keir@19684
|
396
|
keir@21765
|
397 not_found:
|
kaf24@3499
|
398 /* No suitable memory blocks. Fail the request. */
|
kaf24@3499
|
399 spin_unlock(&heap_lock);
|
kaf24@3499
|
400 return NULL;
|
kaf24@3499
|
401
|
kaf24@3499
|
402 found:
|
kaf24@1971
|
403 /* We may have to halve the chunk a number of times. */
|
kfraser@11932
|
404 while ( j != order )
|
iap10@274
|
405 {
|
kfraser@11932
|
406 PFN_ORDER(pg) = --j;
|
keir@19170
|
407 page_list_add_tail(pg, &heap(node, zone, j));
|
kfraser@11932
|
408 pg += 1 << j;
|
iap10@274
|
409 }
|
keir@19951
|
410
|
keir@14134
|
411 ASSERT(avail[node][zone] >= request);
|
keir@14134
|
412 avail[node][zone] -= request;
|
keir@20641
|
413 total_avail_pages -= request;
|
keir@20641
|
414 ASSERT(total_avail_pages >= 0);
|
iap10@274
|
415
|
keir@21959
|
416 if ( d != NULL )
|
keir@21959
|
417 d->last_alloc_node = node;
|
keir@21959
|
418
|
kfraser@14322
|
419 cpus_clear(mask);
|
kfraser@14322
|
420
|
kfraser@14322
|
421 for ( i = 0; i < (1 << order); i++ )
|
kfraser@14322
|
422 {
|
kfraser@14322
|
423 /* Reference count must continuously be zero for free pages. */
|
keir@19951
|
424 BUG_ON(pg[i].count_info != PGC_state_free);
|
keir@19951
|
425 pg[i].count_info = PGC_state_inuse;
|
kfraser@14322
|
426
|
keir@19202
|
427 if ( pg[i].u.free.need_tlbflush )
|
keir@19202
|
428 {
|
keir@19202
|
429 /* Add in extra CPUs that need flushing because of this page. */
|
keir@19202
|
430 cpus_andnot(extra_cpus_mask, cpu_online_map, mask);
|
keir@19202
|
431 tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
|
keir@19202
|
432 cpus_or(mask, mask, extra_cpus_mask);
|
keir@19202
|
433 }
|
kfraser@14322
|
434
|
kfraser@14322
|
435 /* Initialise fields which have other uses for free pages. */
|
kfraser@14322
|
436 pg[i].u.inuse.type_info = 0;
|
kfraser@14322
|
437 page_set_owner(&pg[i], NULL);
|
kfraser@14322
|
438 }
|
kfraser@14322
|
439
|
keir@22181
|
440 spin_unlock(&heap_lock);
|
keir@22181
|
441
|
kfraser@14322
|
442 if ( unlikely(!cpus_empty(mask)) )
|
kfraser@14322
|
443 {
|
kfraser@14625
|
444 perfc_incr(need_flush_tlb_flush);
|
keir@19689
|
445 flush_tlb_mask(&mask);
|
kfraser@14322
|
446 }
|
kfraser@14322
|
447
|
kaf24@1974
|
448 return pg;
|
iap10@274
|
449 }
|
iap10@274
|
450
|
keir@19381
|
451 /* Remove any offlined page in the buddy pointed to by head. */
|
keir@19324
|
452 static int reserve_offlined_page(struct page_info *head)
|
keir@19324
|
453 {
|
keir@19324
|
454 unsigned int node = phys_to_nid(page_to_maddr(head));
|
keir@19324
|
455 int zone = page_to_zone(head), i, head_order = PFN_ORDER(head), count = 0;
|
keir@19324
|
456 struct page_info *cur_head;
|
keir@19324
|
457 int cur_order;
|
keir@19324
|
458
|
keir@19324
|
459 ASSERT(spin_is_locked(&heap_lock));
|
keir@19324
|
460
|
keir@19324
|
461 cur_head = head;
|
keir@19324
|
462
|
keir@19324
|
463 page_list_del(head, &heap(node, zone, head_order));
|
keir@19324
|
464
|
keir@19324
|
465 while ( cur_head < (head + (1 << head_order)) )
|
keir@19324
|
466 {
|
keir@19324
|
467 struct page_info *pg;
|
keir@19324
|
468 int next_order;
|
keir@19324
|
469
|
keir@19951
|
470 if ( page_state_is(cur_head, offlined) )
|
keir@19324
|
471 {
|
keir@19324
|
472 cur_head++;
|
keir@19324
|
473 continue;
|
keir@19324
|
474 }
|
keir@19324
|
475
|
keir@19324
|
476 next_order = cur_order = 0;
|
keir@19324
|
477
|
keir@19381
|
478 while ( cur_order < head_order )
|
keir@19324
|
479 {
|
keir@19324
|
480 next_order = cur_order + 1;
|
keir@19324
|
481
|
keir@19381
|
482 if ( (cur_head + (1 << next_order)) >= (head + ( 1 << head_order)) )
|
keir@19324
|
483 goto merge;
|
keir@19324
|
484
|
keir@19381
|
485 for ( i = (1 << cur_order), pg = cur_head + (1 << cur_order );
|
keir@19381
|
486 i < (1 << next_order);
|
keir@19381
|
487 i++, pg++ )
|
keir@19951
|
488 if ( page_state_is(pg, offlined) )
|
keir@19324
|
489 break;
|
keir@19381
|
490 if ( i == ( 1 << next_order) )
|
keir@19324
|
491 {
|
keir@19324
|
492 cur_order = next_order;
|
keir@19324
|
493 continue;
|
keir@19324
|
494 }
|
keir@19324
|
495 else
|
keir@19324
|
496 {
|
keir@19381
|
497 merge:
|
keir@19381
|
498 /* We don't consider merging outside the head_order. */
|
keir@19324
|
499 page_list_add_tail(cur_head, &heap(node, zone, cur_order));
|
keir@19324
|
500 PFN_ORDER(cur_head) = cur_order;
|
keir@19324
|
501 cur_head += (1 << cur_order);
|
keir@19324
|
502 break;
|
keir@19324
|
503 }
|
keir@19324
|
504 }
|
keir@19324
|
505 }
|
keir@19324
|
506
|
keir@19381
|
507 for ( cur_head = head; cur_head < head + ( 1UL << head_order); cur_head++ )
|
keir@19324
|
508 {
|
keir@19951
|
509 if ( !page_state_is(cur_head, offlined) )
|
keir@19324
|
510 continue;
|
keir@19324
|
511
|
keir@19381
|
512 avail[node][zone]--;
|
keir@20641
|
513 total_avail_pages--;
|
keir@20641
|
514 ASSERT(total_avail_pages >= 0);
|
keir@19324
|
515
|
keir@19381
|
516 page_list_add_tail(cur_head,
|
keir@19381
|
517 test_bit(_PGC_broken, &cur_head->count_info) ?
|
keir@19381
|
518 &page_broken_list : &page_offlined_list);
|
keir@19324
|
519
|
keir@19381
|
520 count++;
|
keir@19324
|
521 }
|
keir@19324
|
522
|
keir@19324
|
523 return count;
|
keir@19324
|
524 }
|
keir@19324
|
525
|
kaf24@1974
|
526 /* Free 2^@order set of pages. */
|
kfraser@14126
|
527 static void free_heap_pages(
|
keir@19099
|
528 struct page_info *pg, unsigned int order)
|
iap10@274
|
529 {
|
keir@22783
|
530 unsigned long mask;
|
keir@19324
|
531 unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
|
keir@19099
|
532 unsigned int zone = page_to_zone(pg);
|
iap10@274
|
533
|
kaf24@3499
|
534 ASSERT(order <= MAX_ORDER);
|
kfraser@11932
|
535 ASSERT(node >= 0);
|
kaf24@3499
|
536
|
keir@22181
|
537 spin_lock(&heap_lock);
|
keir@22181
|
538
|
kfraser@14322
|
539 for ( i = 0; i < (1 << order); i++ )
|
kfraser@14322
|
540 {
|
keir@14797
|
541 /*
|
keir@14797
|
542 * Cannot assume that count_info == 0, as there are some corner cases
|
keir@14797
|
543 * where it isn't the case and yet it isn't a bug:
|
keir@14797
|
544 * 1. page_get_owner() is NULL
|
keir@14797
|
545 * 2. page_get_owner() is a domain that was never accessible by
|
keir@14797
|
546 * its domid (e.g., failed to fully construct the domain).
|
keir@14797
|
547 * 3. page was never addressable by the guest (e.g., it's an
|
keir@14797
|
548 * auto-translate-physmap guest and the page was never included
|
keir@14797
|
549 * in its pseudophysical address space).
|
keir@14797
|
550 * In all the above cases there can be no guest mappings of this page.
|
keir@14797
|
551 */
|
keir@19951
|
552 ASSERT(!page_state_is(&pg[i], offlined));
|
keir@19951
|
553 pg[i].count_info =
|
keir@19951
|
554 ((pg[i].count_info & PGC_broken) |
|
keir@19951
|
555 (page_state_is(&pg[i], offlining)
|
keir@19951
|
556 ? PGC_state_offlined : PGC_state_free));
|
keir@19951
|
557 if ( page_state_is(&pg[i], offlined) )
|
keir@19324
|
558 tainted = 1;
|
keir@14797
|
559
|
keir@19200
|
560 /* If a page has no owner it will need no safety TLB flush. */
|
keir@19202
|
561 pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
|
keir@19202
|
562 if ( pg[i].u.free.need_tlbflush )
|
keir@19202
|
563 pg[i].tlbflush_timestamp = tlbflush_current_time();
|
kfraser@14322
|
564 }
|
kfraser@14322
|
565
|
keir@14134
|
566 avail[node][zone] += 1 << order;
|
keir@20641
|
567 total_avail_pages += 1 << order;
|
keir@14134
|
568
|
keir@20991
|
569 if ( opt_tmem )
|
keir@20991
|
570 midsize_alloc_zone_pages = max(
|
keir@20991
|
571 midsize_alloc_zone_pages, total_avail_pages / MIDSIZE_ALLOC_FRAC);
|
keir@20991
|
572
|
iap10@274
|
573 /* Merge chunks as far as possible. */
|
kaf24@1974
|
574 while ( order < MAX_ORDER )
|
iap10@274
|
575 {
|
kfraser@14289
|
576 mask = 1UL << order;
|
kaf24@1971
|
577
|
kaf24@8764
|
578 if ( (page_to_mfn(pg) & mask) )
|
iap10@274
|
579 {
|
iap10@274
|
580 /* Merge with predecessor block? */
|
keir@20011
|
581 if ( !mfn_valid(page_to_mfn(pg-mask)) ||
|
keir@20011
|
582 !page_state_is(pg-mask, free) ||
|
keir@22179
|
583 (PFN_ORDER(pg-mask) != order) ||
|
keir@22179
|
584 (phys_to_nid(page_to_maddr(pg-mask)) != node) )
|
kaf24@1183
|
585 break;
|
kaf24@1971
|
586 pg -= mask;
|
keir@19170
|
587 page_list_del(pg, &heap(node, zone, order));
|
iap10@274
|
588 }
|
iap10@274
|
589 else
|
iap10@274
|
590 {
|
iap10@274
|
591 /* Merge with successor block? */
|
keir@20011
|
592 if ( !mfn_valid(page_to_mfn(pg+mask)) ||
|
keir@20011
|
593 !page_state_is(pg+mask, free) ||
|
keir@22179
|
594 (PFN_ORDER(pg+mask) != order) ||
|
keir@22179
|
595 (phys_to_nid(page_to_maddr(pg+mask)) != node) )
|
kaf24@1183
|
596 break;
|
keir@19170
|
597 page_list_del(pg + mask, &heap(node, zone, order));
|
iap10@274
|
598 }
|
keir@19324
|
599
|
iap10@274
|
600 order++;
|
iap10@274
|
601 }
|
iap10@274
|
602
|
kaf24@1971
|
603 PFN_ORDER(pg) = order;
|
keir@19170
|
604 page_list_add_tail(pg, &heap(node, zone, order));
|
kaf24@1974
|
605
|
keir@19381
|
606 if ( tainted )
|
keir@19324
|
607 reserve_offlined_page(pg);
|
keir@19324
|
608
|
kaf24@2844
|
609 spin_unlock(&heap_lock);
|
kaf24@1974
|
610 }
|
kaf24@1974
|
611
|
keir@19324
|
612
|
keir@19324
|
613 /*
|
keir@19324
|
614 * Following possible status for a page:
|
keir@19324
|
615 * free and Online; free and offlined; free and offlined and broken;
|
keir@19324
|
616 * assigned and online; assigned and offlining; assigned and offling and broken
|
keir@19324
|
617 *
|
keir@19324
|
618 * Following rules applied for page offline:
|
keir@19324
|
619 * Once a page is broken, it can't be assigned anymore
|
keir@19324
|
620 * A page will be offlined only if it is free
|
keir@19324
|
621 * return original count_info
|
keir@19324
|
622 */
|
keir@19324
|
623 static unsigned long mark_page_offline(struct page_info *pg, int broken)
|
keir@19324
|
624 {
|
keir@19324
|
625 unsigned long nx, x, y = pg->count_info;
|
keir@19324
|
626
|
keir@19324
|
627 ASSERT(page_is_ram_type(page_to_mfn(pg), RAM_TYPE_CONVENTIONAL));
|
keir@19324
|
628 ASSERT(spin_is_locked(&heap_lock));
|
keir@19324
|
629
|
keir@19324
|
630 do {
|
keir@19324
|
631 nx = x = y;
|
keir@19324
|
632
|
keir@19951
|
633 if ( ((x & PGC_state) != PGC_state_offlined) &&
|
keir@19951
|
634 ((x & PGC_state) != PGC_state_offlining) )
|
keir@19324
|
635 {
|
keir@19951
|
636 nx &= ~PGC_state;
|
keir@19951
|
637 nx |= (((x & PGC_state) == PGC_state_free)
|
keir@19951
|
638 ? PGC_state_offlined : PGC_state_offlining);
|
keir@19381
|
639 }
|
keir@19324
|
640
|
keir@19381
|
641 if ( broken )
|
keir@19324
|
642 nx |= PGC_broken;
|
keir@19951
|
643
|
keir@19951
|
644 if ( x == nx )
|
keir@19951
|
645 break;
|
keir@19324
|
646 } while ( (y = cmpxchg(&pg->count_info, x, nx)) != x );
|
keir@19324
|
647
|
keir@19324
|
648 return y;
|
keir@19324
|
649 }
|
keir@19324
|
650
|
keir@19324
|
651 static int reserve_heap_page(struct page_info *pg)
|
keir@19324
|
652 {
|
keir@19324
|
653 struct page_info *head = NULL;
|
keir@19324
|
654 unsigned int i, node = phys_to_nid(page_to_maddr(pg));
|
keir@19324
|
655 unsigned int zone = page_to_zone(pg);
|
keir@19324
|
656
|
keir@19324
|
657 for ( i = 0; i <= MAX_ORDER; i++ )
|
keir@19324
|
658 {
|
keir@19324
|
659 struct page_info *tmp;
|
keir@19324
|
660
|
keir@19324
|
661 if ( page_list_empty(&heap(node, zone, i)) )
|
keir@19324
|
662 continue;
|
keir@19324
|
663
|
keir@19381
|
664 page_list_for_each_safe ( head, tmp, &heap(node, zone, i) )
|
keir@19324
|
665 {
|
keir@19324
|
666 if ( (head <= pg) &&
|
keir@19324
|
667 (head + (1UL << i) > pg) )
|
keir@19324
|
668 return reserve_offlined_page(head);
|
keir@19324
|
669 }
|
keir@19324
|
670 }
|
keir@19324
|
671
|
keir@19324
|
672 return -EINVAL;
|
keir@19324
|
673
|
keir@19324
|
674 }
|
keir@19324
|
675
|
keir@19324
|
676 int offline_page(unsigned long mfn, int broken, uint32_t *status)
|
keir@19324
|
677 {
|
keir@19324
|
678 unsigned long old_info = 0;
|
keir@19324
|
679 struct domain *owner;
|
keir@19324
|
680 int ret = 0;
|
keir@19324
|
681 struct page_info *pg;
|
keir@19324
|
682
|
keir@20226
|
683 if ( !mfn_valid(mfn) )
|
keir@19324
|
684 {
|
keir@19324
|
685 dprintk(XENLOG_WARNING,
|
keir@19324
|
686 "try to offline page out of range %lx\n", mfn);
|
keir@19324
|
687 return -EINVAL;
|
keir@19324
|
688 }
|
keir@19324
|
689
|
keir@19324
|
690 *status = 0;
|
keir@19324
|
691 pg = mfn_to_page(mfn);
|
keir@19324
|
692
|
keir@19324
|
693 if ( is_xen_fixed_mfn(mfn) )
|
keir@19324
|
694 {
|
keir@19324
|
695 *status = PG_OFFLINE_XENPAGE | PG_OFFLINE_FAILED |
|
keir@19324
|
696 (DOMID_XEN << PG_OFFLINE_OWNER_SHIFT);
|
keir@19324
|
697 return -EPERM;
|
keir@19324
|
698 }
|
keir@19324
|
699
|
keir@19324
|
700 /*
|
keir@19952
|
701 * N.B. xen's txt in x86_64 is marked reserved and handled already.
|
keir@19952
|
702 * Also kexec range is reserved.
|
keir@19324
|
703 */
|
keir@19952
|
704 if ( !page_is_ram_type(mfn, RAM_TYPE_CONVENTIONAL) )
|
keir@19952
|
705 {
|
keir@19324
|
706 *status = PG_OFFLINE_FAILED | PG_OFFLINE_NOT_CONV_RAM;
|
keir@19324
|
707 return -EINVAL;
|
keir@19952
|
708 }
|
keir@19324
|
709
|
keir@19324
|
710 spin_lock(&heap_lock);
|
keir@19324
|
711
|
keir@19324
|
712 old_info = mark_page_offline(pg, broken);
|
keir@19324
|
713
|
keir@19951
|
714 if ( page_state_is(pg, free) )
|
keir@19324
|
715 {
|
keir@19324
|
716 /* Free pages are reserve directly */
|
keir@19324
|
717 reserve_heap_page(pg);
|
keir@19324
|
718 *status = PG_OFFLINE_OFFLINED;
|
keir@19324
|
719 }
|
keir@19951
|
720 else if ( page_state_is(pg, offlined) )
|
keir@19324
|
721 {
|
keir@19324
|
722 *status = PG_OFFLINE_OFFLINED;
|
keir@19324
|
723 }
|
keir@19381
|
724 else if ( (owner = page_get_owner_and_reference(pg)) )
|
keir@19324
|
725 {
|
keir@19324
|
726 *status = PG_OFFLINE_OWNED | PG_OFFLINE_PENDING |
|
keir@19324
|
727 (owner->domain_id << PG_OFFLINE_OWNER_SHIFT);
|
keir@19324
|
728 /* Release the reference since it will not be allocated anymore */
|
keir@19324
|
729 put_page(pg);
|
keir@19324
|
730 }
|
keir@19952
|
731 else if ( old_info & PGC_xen_heap )
|
keir@19324
|
732 {
|
keir@19324
|
733 *status = PG_OFFLINE_XENPAGE | PG_OFFLINE_PENDING |
|
keir@19324
|
734 (DOMID_XEN << PG_OFFLINE_OWNER_SHIFT);
|
keir@19324
|
735 }
|
keir@19324
|
736 else
|
keir@19324
|
737 {
|
keir@19324
|
738 /*
|
keir@19324
|
739 * assign_pages does not hold heap_lock, so small window that the owner
|
keir@19324
|
740 * may be set later, but please notice owner will only change from
|
keir@19324
|
741 * NULL to be set, not verse, since page is offlining now.
|
keir@19324
|
742 * No windows If called from #MC handler, since all CPU are in softirq
|
keir@19324
|
743 * If called from user space like CE handling, tools can wait some time
|
keir@19324
|
744 * before call again.
|
keir@19324
|
745 */
|
keir@19324
|
746 *status = PG_OFFLINE_ANONYMOUS | PG_OFFLINE_FAILED |
|
keir@19324
|
747 (DOMID_INVALID << PG_OFFLINE_OWNER_SHIFT );
|
keir@19324
|
748 }
|
keir@19324
|
749
|
keir@19381
|
750 if ( broken )
|
keir@19324
|
751 *status |= PG_OFFLINE_BROKEN;
|
keir@19324
|
752
|
keir@19324
|
753 spin_unlock(&heap_lock);
|
keir@19324
|
754
|
keir@19324
|
755 return ret;
|
keir@19324
|
756 }
|
keir@19324
|
757
|
keir@19324
|
758 /*
|
keir@19324
|
759 * Online the memory.
|
keir@19324
|
760 * The caller should make sure end_pfn <= max_page,
|
keir@19324
|
761 * if not, expand_pages() should be called prior to online_page().
|
keir@19324
|
762 */
|
keir@19324
|
763 unsigned int online_page(unsigned long mfn, uint32_t *status)
|
keir@19324
|
764 {
|
keir@19951
|
765 unsigned long x, nx, y;
|
keir@19324
|
766 struct page_info *pg;
|
keir@19951
|
767 int ret;
|
keir@19324
|
768
|
keir@20011
|
769 if ( !mfn_valid(mfn) )
|
keir@19324
|
770 {
|
keir@19324
|
771 dprintk(XENLOG_WARNING, "call expand_pages() first\n");
|
keir@19324
|
772 return -EINVAL;
|
keir@19324
|
773 }
|
keir@19324
|
774
|
keir@19324
|
775 pg = mfn_to_page(mfn);
|
keir@19324
|
776
|
keir@19324
|
777 spin_lock(&heap_lock);
|
keir@19324
|
778
|
keir@19951
|
779 y = pg->count_info;
|
keir@19951
|
780 do {
|
keir@19951
|
781 ret = *status = 0;
|
keir@19951
|
782
|
keir@19951
|
783 if ( y & PGC_broken )
|
keir@19951
|
784 {
|
keir@19951
|
785 ret = -EINVAL;
|
keir@19951
|
786 *status = PG_ONLINE_FAILED |PG_ONLINE_BROKEN;
|
keir@19951
|
787 break;
|
keir@19951
|
788 }
|
keir@19951
|
789
|
keir@19951
|
790 if ( (y & PGC_state) == PGC_state_offlined )
|
keir@19951
|
791 {
|
keir@19951
|
792 page_list_del(pg, &page_offlined_list);
|
keir@19951
|
793 *status = PG_ONLINE_ONLINED;
|
keir@19951
|
794 }
|
keir@19951
|
795 else if ( (y & PGC_state) == PGC_state_offlining )
|
keir@19951
|
796 {
|
keir@19951
|
797 *status = PG_ONLINE_ONLINED;
|
keir@19951
|
798 }
|
keir@19951
|
799 else
|
keir@19951
|
800 {
|
keir@19951
|
801 break;
|
keir@19951
|
802 }
|
keir@19951
|
803
|
keir@19951
|
804 x = y;
|
keir@19951
|
805 nx = (x & ~PGC_state) | PGC_state_inuse;
|
keir@19951
|
806 } while ( (y = cmpxchg(&pg->count_info, x, nx)) != x );
|
keir@19951
|
807
|
keir@19324
|
808 spin_unlock(&heap_lock);
|
keir@19324
|
809
|
keir@19951
|
810 if ( (y & PGC_state) == PGC_state_offlined )
|
keir@19324
|
811 free_heap_pages(pg, 0);
|
keir@19324
|
812
|
keir@19324
|
813 return ret;
|
keir@19324
|
814 }
|
keir@19324
|
815
|
keir@19324
|
816 int query_page_offline(unsigned long mfn, uint32_t *status)
|
keir@19324
|
817 {
|
keir@19324
|
818 struct page_info *pg;
|
keir@19324
|
819
|
keir@20011
|
820 if ( !mfn_valid(mfn) || !page_is_ram_type(mfn, RAM_TYPE_CONVENTIONAL) )
|
keir@19324
|
821 {
|
keir@19324
|
822 dprintk(XENLOG_WARNING, "call expand_pages() first\n");
|
keir@19324
|
823 return -EINVAL;
|
keir@19324
|
824 }
|
keir@19324
|
825
|
keir@19324
|
826 *status = 0;
|
keir@19324
|
827 spin_lock(&heap_lock);
|
keir@19324
|
828
|
keir@19324
|
829 pg = mfn_to_page(mfn);
|
keir@19324
|
830
|
keir@19951
|
831 if ( page_state_is(pg, offlining) )
|
keir@19324
|
832 *status |= PG_OFFLINE_STATUS_OFFLINE_PENDING;
|
keir@19951
|
833 if ( pg->count_info & PGC_broken )
|
keir@19324
|
834 *status |= PG_OFFLINE_STATUS_BROKEN;
|
keir@19951
|
835 if ( page_state_is(pg, offlined) )
|
keir@19324
|
836 *status |= PG_OFFLINE_STATUS_OFFLINED;
|
keir@19324
|
837
|
keir@19324
|
838 spin_unlock(&heap_lock);
|
keir@19324
|
839
|
keir@19324
|
840 return 0;
|
keir@19324
|
841 }
|
keir@19324
|
842
|
kfraser@14126
|
843 /*
|
kfraser@14126
|
844 * Hand the specified arbitrary page range to the specified heap zone
|
kfraser@14126
|
845 * checking the node_id of the previous page. If they differ and the
|
kfraser@14126
|
846 * latter is not on a MAX_ORDER boundary, then we reserve the page by
|
kfraser@14126
|
847 * not freeing it to the buddy allocator.
|
kfraser@14126
|
848 */
|
keir@16611
|
849 static void init_heap_pages(
|
keir@19099
|
850 struct page_info *pg, unsigned long nr_pages)
|
kfraser@14126
|
851 {
|
kfraser@14126
|
852 unsigned long i;
|
kfraser@14126
|
853
|
keir@22179
|
854 for ( i = 0; i < nr_pages; i++ )
|
kfraser@14126
|
855 {
|
keir@22179
|
856 unsigned int nid = phys_to_nid(page_to_maddr(pg+i));
|
kfraser@14126
|
857
|
keir@22179
|
858 if ( unlikely(!avail[nid]) )
|
keir@19179
|
859 {
|
keir@21080
|
860 unsigned long s = page_to_mfn(pg + i);
|
keir@21080
|
861 unsigned long e = page_to_mfn(pg + nr_pages - 1) + 1;
|
keir@22179
|
862 bool_t use_tail = (nid == phys_to_nid(pfn_to_paddr(e - 1))) &&
|
keir@21080
|
863 !(s & ((1UL << MAX_ORDER) - 1)) &&
|
keir@21080
|
864 (find_first_set_bit(e) <= find_first_set_bit(s));
|
keir@19179
|
865 unsigned long n;
|
keir@19179
|
866
|
keir@22179
|
867 n = init_node_heap(nid, page_to_mfn(pg+i), nr_pages - i,
|
keir@21080
|
868 &use_tail);
|
keir@21080
|
869 BUG_ON(i + n > nr_pages);
|
keir@21080
|
870 if ( n && !use_tail )
|
keir@19179
|
871 {
|
keir@19179
|
872 i += n - 1;
|
keir@19179
|
873 continue;
|
keir@19179
|
874 }
|
keir@21080
|
875 if ( i + n == nr_pages )
|
keir@21080
|
876 break;
|
keir@21080
|
877 nr_pages -= n;
|
keir@19179
|
878 }
|
keir@14134
|
879
|
keir@22179
|
880 free_heap_pages(pg+i, 0);
|
kfraser@14126
|
881 }
|
kfraser@14126
|
882 }
|
kfraser@14126
|
883
|
kfraser@14126
|
884 static unsigned long avail_heap_pages(
|
kfraser@14130
|
885 unsigned int zone_lo, unsigned int zone_hi, unsigned int node)
|
kfraser@14126
|
886 {
|
keir@20328
|
887 unsigned int i, zone;
|
kfraser@14126
|
888 unsigned long free_pages = 0;
|
kfraser@14126
|
889
|
kfraser@14130
|
890 if ( zone_hi >= NR_ZONES )
|
kfraser@14130
|
891 zone_hi = NR_ZONES - 1;
|
keir@14134
|
892
|
keir@20328
|
893 for_each_online_node(i)
|
keir@14134
|
894 {
|
keir@14134
|
895 if ( !avail[i] )
|
keir@14134
|
896 continue;
|
keir@14134
|
897 for ( zone = zone_lo; zone <= zone_hi; zone++ )
|
kfraser@14130
|
898 if ( (node == -1) || (node == i) )
|
keir@14134
|
899 free_pages += avail[i][zone];
|
keir@14134
|
900 }
|
kfraser@14126
|
901
|
kfraser@14126
|
902 return free_pages;
|
kfraser@14126
|
903 }
|
kfraser@14126
|
904
|
keir@20641
|
905 unsigned long total_free_pages(void)
|
keir@20641
|
906 {
|
keir@20991
|
907 return total_avail_pages - midsize_alloc_zone_pages;
|
keir@20641
|
908 }
|
keir@20641
|
909
|
keir@15081
|
910 void __init end_boot_allocator(void)
|
kfraser@14126
|
911 {
|
keir@19952
|
912 unsigned int i;
|
kfraser@14126
|
913
|
kfraser@14126
|
914 /* Pages that are free now go to the domain sub-allocator. */
|
keir@19952
|
915 for ( i = 0; i < nr_bootmem_regions; i++ )
|
kfraser@14126
|
916 {
|
keir@19952
|
917 struct bootmem_region *r = &bootmem_region_list[i];
|
keir@21080
|
918 if ( (r->s < r->e) &&
|
keir@21080
|
919 (phys_to_nid(pfn_to_paddr(r->s)) == cpu_to_node(0)) )
|
keir@21080
|
920 {
|
keir@21080
|
921 init_heap_pages(mfn_to_page(r->s), r->e - r->s);
|
keir@21080
|
922 r->e = r->s;
|
keir@21080
|
923 break;
|
keir@21080
|
924 }
|
keir@21080
|
925 }
|
keir@21080
|
926 for ( i = nr_bootmem_regions; i-- > 0; )
|
keir@21080
|
927 {
|
keir@21080
|
928 struct bootmem_region *r = &bootmem_region_list[i];
|
keir@19952
|
929 if ( r->s < r->e )
|
keir@19952
|
930 init_heap_pages(mfn_to_page(r->s), r->e - r->s);
|
kfraser@14126
|
931 }
|
keir@19952
|
932 init_heap_pages(virt_to_page(bootmem_region_list), 1);
|
kfraser@14126
|
933
|
keir@18195
|
934 if ( !dma_bitsize && (num_online_nodes() > 1) )
|
keir@18196
|
935 {
|
keir@18196
|
936 #ifdef CONFIG_X86
|
keir@18195
|
937 dma_bitsize = min_t(unsigned int,
|
keir@18195
|
938 fls(NODE_DATA(0)->node_spanned_pages) - 1
|
keir@18195
|
939 + PAGE_SHIFT - 2,
|
keir@18195
|
940 32);
|
keir@18196
|
941 #else
|
keir@18196
|
942 dma_bitsize = 32;
|
keir@18196
|
943 #endif
|
keir@18196
|
944 }
|
keir@18195
|
945
|
keir@18195
|
946 printk("Domain heap initialised");
|
keir@18195
|
947 if ( dma_bitsize )
|
keir@18195
|
948 printk(" DMA width %u bits", dma_bitsize);
|
keir@18195
|
949 printk("\n");
|
kfraser@14126
|
950 }
|
kaf24@1974
|
951
|
kaf24@2810
|
952 /*
|
kaf24@2810
|
953 * Scrub all unallocated pages in all heap zones. This function is more
|
kaf24@2810
|
954 * convoluted than appears necessary because we do not want to continuously
|
kfraser@14322
|
955 * hold the lock while scrubbing very large memory areas.
|
kaf24@2810
|
956 */
|
keir@15081
|
957 void __init scrub_heap_pages(void)
|
kaf24@2810
|
958 {
|
kfraser@13058
|
959 unsigned long mfn;
|
keir@19951
|
960 struct page_info *pg;
|
kaf24@2810
|
961
|
kfraser@15549
|
962 if ( !opt_bootscrub )
|
kfraser@15549
|
963 return;
|
kfraser@15549
|
964
|
kaf24@2888
|
965 printk("Scrubbing Free RAM: ");
|
kaf24@2888
|
966
|
kfraser@13058
|
967 for ( mfn = first_valid_mfn; mfn < max_page; mfn++ )
|
kaf24@2810
|
968 {
|
keir@20760
|
969 process_pending_softirqs();
|
kaf24@5850
|
970
|
keir@19951
|
971 pg = mfn_to_page(mfn);
|
keir@19951
|
972
|
kaf24@2810
|
973 /* Quick lock-free check. */
|
keir@20011
|
974 if ( !mfn_valid(mfn) || !page_state_is(pg, free) )
|
kaf24@2810
|
975 continue;
|
kaf24@5850
|
976
|
kfraser@13117
|
977 /* Every 100MB, print a progress dot. */
|
kfraser@13117
|
978 if ( (mfn % ((100*1024*1024)/PAGE_SIZE)) == 0 )
|
kfraser@13117
|
979 printk(".");
|
kfraser@13117
|
980
|
kfraser@14322
|
981 spin_lock(&heap_lock);
|
kaf24@5850
|
982
|
kaf24@2810
|
983 /* Re-check page status with lock held. */
|
keir@19951
|
984 if ( page_state_is(pg, free) )
|
keir@19951
|
985 scrub_one_page(pg);
|
kaf24@5850
|
986
|
kfraser@14322
|
987 spin_unlock(&heap_lock);
|
kaf24@2810
|
988 }
|
kaf24@2888
|
989
|
kaf24@2888
|
990 printk("done.\n");
|
kaf24@2810
|
991 }
|
kaf24@2810
|
992
|
kaf24@2810
|
993
|
kaf24@1974
|
994
|
kaf24@1974
|
995 /*************************
|
kaf24@1974
|
996 * XEN-HEAP SUB-ALLOCATOR
|
kaf24@1974
|
997 */
|
kaf24@1974
|
998
|
yamahata@19168
|
999 #if !defined(__x86_64__) && !defined(__ia64__)
|
keir@19092
|
1000
|
kaf24@8764
|
1001 void init_xenheap_pages(paddr_t ps, paddr_t pe)
|
kaf24@1974
|
1002 {
|
kaf24@1974
|
1003 ps = round_pgup(ps);
|
kaf24@1974
|
1004 pe = round_pgdown(pe);
|
kaf24@6134
|
1005 if ( pe <= ps )
|
kaf24@6134
|
1006 return;
|
kaf24@2844
|
1007
|
kaf24@4055
|
1008 /*
|
kaf24@4055
|
1009 * Yuk! Ensure there is a one-page buffer between Xen and Dom zones, to
|
kaf24@4055
|
1010 * prevent merging of power-of-two blocks across the zone boundary.
|
kaf24@4055
|
1011 */
|
keir@16376
|
1012 if ( ps && !is_xen_heap_mfn(paddr_to_pfn(ps)-1) )
|
kfraser@15073
|
1013 ps += PAGE_SIZE;
|
keir@16376
|
1014 if ( !is_xen_heap_mfn(paddr_to_pfn(pe)) )
|
kaf24@4055
|
1015 pe -= PAGE_SIZE;
|
kaf24@4055
|
1016
|
keir@20931
|
1017 memguard_guard_range(maddr_to_virt(ps), pe - ps);
|
keir@20931
|
1018
|
keir@19099
|
1019 init_heap_pages(maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
|
kaf24@1974
|
1020 }
|
kaf24@1974
|
1021
|
kaf24@2844
|
1022
|
keir@19143
|
1023 void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
|
kaf24@1974
|
1024 {
|
kaf24@8764
|
1025 struct page_info *pg;
|
kaf24@1974
|
1026
|
kfraser@14322
|
1027 ASSERT(!in_irq());
|
kfraser@14322
|
1028
|
keir@19684
|
1029 pg = alloc_heap_pages(MEMZONE_XEN, MEMZONE_XEN,
|
keir@21959
|
1030 order, memflags, NULL);
|
kaf24@2844
|
1031 if ( unlikely(pg == NULL) )
|
keir@19143
|
1032 return NULL;
|
kaf24@2382
|
1033
|
kaf24@1974
|
1034 memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
|
kaf24@2382
|
1035
|
kaf24@5436
|
1036 return page_to_virt(pg);
|
kaf24@1974
|
1037 }
|
kaf24@1974
|
1038
|
kaf24@2844
|
1039
|
kaf24@5436
|
1040 void free_xenheap_pages(void *v, unsigned int order)
|
kaf24@1974
|
1041 {
|
kfraser@14322
|
1042 ASSERT(!in_irq());
|
kaf24@2844
|
1043
|
kaf24@7803
|
1044 if ( v == NULL )
|
kaf24@7803
|
1045 return;
|
kaf24@7803
|
1046
|
kfraser@14322
|
1047 memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
|
kaf24@2844
|
1048
|
keir@19099
|
1049 free_heap_pages(virt_to_page(v), order);
|
iap10@274
|
1050 }
|
kaf24@1974
|
1051
|
keir@19092
|
1052 #else
|
keir@19092
|
1053
|
keir@19092
|
1054 void init_xenheap_pages(paddr_t ps, paddr_t pe)
|
keir@19092
|
1055 {
|
keir@19092
|
1056 init_domheap_pages(ps, pe);
|
keir@19092
|
1057 }
|
keir@19092
|
1058
|
keir@19143
|
1059 void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
|
keir@19092
|
1060 {
|
keir@19092
|
1061 struct page_info *pg;
|
keir@19092
|
1062 unsigned int i;
|
keir@19092
|
1063
|
keir@19092
|
1064 ASSERT(!in_irq());
|
keir@19092
|
1065
|
keir@19143
|
1066 pg = alloc_domheap_pages(NULL, order, memflags);
|
keir@19092
|
1067 if ( unlikely(pg == NULL) )
|
keir@19143
|
1068 return NULL;
|
keir@19092
|
1069
|
keir@19092
|
1070 for ( i = 0; i < (1u << order); i++ )
|
keir@19092
|
1071 pg[i].count_info |= PGC_xen_heap;
|
keir@19092
|
1072
|
keir@19092
|
1073 return page_to_virt(pg);
|
keir@19092
|
1074 }
|
keir@19092
|
1075
|
keir@19092
|
1076 void free_xenheap_pages(void *v, unsigned int order)
|
keir@19092
|
1077 {
|
keir@19092
|
1078 struct page_info *pg;
|
keir@19092
|
1079 unsigned int i;
|
keir@19092
|
1080
|
keir@19092
|
1081 ASSERT(!in_irq());
|
keir@19092
|
1082
|
keir@19092
|
1083 if ( v == NULL )
|
keir@19092
|
1084 return;
|
keir@19092
|
1085
|
keir@19092
|
1086 pg = virt_to_page(v);
|
keir@19092
|
1087
|
keir@19092
|
1088 for ( i = 0; i < (1u << order); i++ )
|
keir@19092
|
1089 pg[i].count_info &= ~PGC_xen_heap;
|
keir@19092
|
1090
|
keir@19099
|
1091 free_heap_pages(pg, order);
|
keir@19092
|
1092 }
|
keir@19092
|
1093
|
keir@19092
|
1094 #endif
|
keir@19092
|
1095
|
kaf24@1974
|
1096
|
kaf24@1974
|
1097
|
kaf24@1974
|
1098 /*************************
|
kaf24@1974
|
1099 * DOMAIN-HEAP SUB-ALLOCATOR
|
kaf24@1974
|
1100 */
|
kaf24@1974
|
1101
|
kaf24@8764
|
1102 void init_domheap_pages(paddr_t ps, paddr_t pe)
|
kaf24@1974
|
1103 {
|
keir@19099
|
1104 unsigned long smfn, emfn;
|
kaf24@6721
|
1105
|
kaf24@2844
|
1106 ASSERT(!in_irq());
|
kaf24@2844
|
1107
|
keir@19099
|
1108 smfn = round_pgup(ps) >> PAGE_SHIFT;
|
keir@19099
|
1109 emfn = round_pgdown(pe) >> PAGE_SHIFT;
|
keir@19095
|
1110
|
keir@19099
|
1111 init_heap_pages(mfn_to_page(smfn), emfn - smfn);
|
kaf24@1974
|
1112 }
|
kaf24@1974
|
1113
|
kaf24@2844
|
1114
|
kfraser@10398
|
1115 int assign_pages(
|
kfraser@10398
|
1116 struct domain *d,
|
kfraser@10398
|
1117 struct page_info *pg,
|
kfraser@10398
|
1118 unsigned int order,
|
kfraser@10398
|
1119 unsigned int memflags)
|
kfraser@10398
|
1120 {
|
kfraser@10398
|
1121 unsigned long i;
|
kfraser@10398
|
1122
|
kfraser@10398
|
1123 spin_lock(&d->page_alloc_lock);
|
kfraser@10398
|
1124
|
kfraser@14677
|
1125 if ( unlikely(d->is_dying) )
|
kfraser@10398
|
1126 {
|
kaf24@12062
|
1127 gdprintk(XENLOG_INFO, "Cannot assign page to domain%d -- dying.\n",
|
kaf24@12056
|
1128 d->domain_id);
|
kfraser@10398
|
1129 goto fail;
|
kfraser@10398
|
1130 }
|
kfraser@10398
|
1131
|
kfraser@10398
|
1132 if ( !(memflags & MEMF_no_refcount) )
|
kfraser@10398
|
1133 {
|
kfraser@10398
|
1134 if ( unlikely((d->tot_pages + (1 << order)) > d->max_pages) )
|
kfraser@10398
|
1135 {
|
keir@20505
|
1136 if ( !opt_tmem || order != 0 || d->tot_pages != d->max_pages )
|
keir@20505
|
1137 gdprintk(XENLOG_INFO, "Over-allocation for domain %u: "
|
keir@20505
|
1138 "%u > %u\n", d->domain_id,
|
keir@20505
|
1139 d->tot_pages + (1 << order), d->max_pages);
|
kfraser@10398
|
1140 goto fail;
|
kfraser@10398
|
1141 }
|
kfraser@10398
|
1142
|
kfraser@10398
|
1143 if ( unlikely(d->tot_pages == 0) )
|
kfraser@10398
|
1144 get_knownalive_domain(d);
|
kfraser@10398
|
1145
|
kfraser@10398
|
1146 d->tot_pages += 1 << order;
|
kfraser@10398
|
1147 }
|
kfraser@10398
|
1148
|
kfraser@10398
|
1149 for ( i = 0; i < (1 << order); i++ )
|
kfraser@10398
|
1150 {
|
kfraser@10398
|
1151 ASSERT(page_get_owner(&pg[i]) == NULL);
|
kfraser@10398
|
1152 ASSERT((pg[i].count_info & ~(PGC_allocated | 1)) == 0);
|
kfraser@10398
|
1153 page_set_owner(&pg[i], d);
|
kfraser@10398
|
1154 wmb(); /* Domain pointer must be visible before updating refcnt. */
|
kfraser@10398
|
1155 pg[i].count_info = PGC_allocated | 1;
|
keir@19170
|
1156 page_list_add_tail(&pg[i], &d->page_list);
|
kfraser@10398
|
1157 }
|
kfraser@10398
|
1158
|
kfraser@10398
|
1159 spin_unlock(&d->page_alloc_lock);
|
kfraser@10398
|
1160 return 0;
|
kfraser@10398
|
1161
|
kfraser@10398
|
1162 fail:
|
kfraser@10398
|
1163 spin_unlock(&d->page_alloc_lock);
|
kfraser@10398
|
1164 return -1;
|
kfraser@10398
|
1165 }
|
kfraser@10398
|
1166
|
kfraser@10398
|
1167
|
keir@17421
|
1168 struct page_info *alloc_domheap_pages(
|
keir@17421
|
1169 struct domain *d, unsigned int order, unsigned int memflags)
|
kaf24@1974
|
1170 {
|
kaf24@8764
|
1171 struct page_info *pg = NULL;
|
keir@14135
|
1172 unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
|
keir@21959
|
1173 unsigned int dma_zone;
|
kaf24@1979
|
1174
|
kaf24@1979
|
1175 ASSERT(!in_irq());
|
kaf24@1979
|
1176
|
keir@16587
|
1177 bits = domain_clamp_alloc_bitsize(d, bits ? : (BITS_PER_LONG+PAGE_SHIFT));
|
keir@19095
|
1178 if ( (zone_hi = min_t(unsigned int, bits_to_zone(bits), zone_hi)) == 0 )
|
keir@16586
|
1179 return NULL;
|
keir@16586
|
1180
|
keir@19095
|
1181 if ( dma_bitsize && ((dma_zone = bits_to_zone(dma_bitsize)) < zone_hi) )
|
keir@21959
|
1182 pg = alloc_heap_pages(dma_zone + 1, zone_hi, order, memflags, d);
|
kfraser@14131
|
1183
|
kfraser@14322
|
1184 if ( (pg == NULL) &&
|
keir@21318
|
1185 ((memflags & MEMF_no_dma) ||
|
keir@21959
|
1186 ((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi, order,
|
keir@21959
|
1187 memflags, d)) == NULL)) )
|
kfraser@14322
|
1188 return NULL;
|
kaf24@4340
|
1189
|
kfraser@10398
|
1190 if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
|
kaf24@1979
|
1191 {
|
keir@19099
|
1192 free_heap_pages(pg, order);
|
kaf24@1979
|
1193 return NULL;
|
kaf24@1979
|
1194 }
|
kaf24@1979
|
1195
|
kaf24@1974
|
1196 return pg;
|
kaf24@1974
|
1197 }
|
kaf24@1974
|
1198
|
kaf24@8764
|
1199 void free_domheap_pages(struct page_info *pg, unsigned int order)
|
kaf24@1974
|
1200 {
|
kaf24@1979
|
1201 int i, drop_dom_ref;
|
kaf24@3669
|
1202 struct domain *d = page_get_owner(pg);
|
kaf24@1979
|
1203
|
kaf24@2844
|
1204 ASSERT(!in_irq());
|
kaf24@2844
|
1205
|
keir@16376
|
1206 if ( unlikely(is_xen_heap_page(pg)) )
|
kaf24@1979
|
1207 {
|
kaf24@4493
|
1208 /* NB. May recursively lock from relinquish_memory(). */
|
kaf24@1979
|
1209 spin_lock_recursive(&d->page_alloc_lock);
|
kaf24@2382
|
1210
|
kaf24@2360
|
1211 for ( i = 0; i < (1 << order); i++ )
|
keir@19170
|
1212 page_list_del2(&pg[i], &d->xenpage_list, &d->arch.relmem_list);
|
kaf24@2382
|
1213
|
kaf24@1979
|
1214 d->xenheap_pages -= 1 << order;
|
kaf24@1979
|
1215 drop_dom_ref = (d->xenheap_pages == 0);
|
kaf24@2382
|
1216
|
kaf24@1979
|
1217 spin_unlock_recursive(&d->page_alloc_lock);
|
kaf24@1979
|
1218 }
|
keir@20723
|
1219 else if ( likely(d != NULL) && likely(d != dom_cow) )
|
kaf24@1979
|
1220 {
|
kaf24@4493
|
1221 /* NB. May recursively lock from relinquish_memory(). */
|
kaf24@1979
|
1222 spin_lock_recursive(&d->page_alloc_lock);
|
kaf24@1979
|
1223
|
kaf24@1979
|
1224 for ( i = 0; i < (1 << order); i++ )
|
kaf24@1979
|
1225 {
|
kfraser@14322
|
1226 BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
|
keir@19170
|
1227 page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list);
|
kaf24@1979
|
1228 }
|
kaf24@1979
|
1229
|
kaf24@1979
|
1230 d->tot_pages -= 1 << order;
|
kaf24@1979
|
1231 drop_dom_ref = (d->tot_pages == 0);
|
kaf24@1979
|
1232
|
kaf24@1979
|
1233 spin_unlock_recursive(&d->page_alloc_lock);
|
kaf24@1979
|
1234
|
keir@19926
|
1235 /*
|
keir@19926
|
1236 * Normally we expect a domain to clear pages before freeing them, if
|
keir@19926
|
1237 * it cares about the secrecy of their contents. However, after a
|
keir@19926
|
1238 * domain has died we assume responsibility for erasure.
|
keir@19926
|
1239 */
|
keir@19926
|
1240 if ( unlikely(d->is_dying) )
|
kaf24@4305
|
1241 for ( i = 0; i < (1 << order); i++ )
|
keir@19924
|
1242 scrub_one_page(&pg[i]);
|
keir@19926
|
1243
|
keir@19926
|
1244 free_heap_pages(pg, order);
|
kaf24@1979
|
1245 }
|
keir@20723
|
1246 else if ( unlikely(d == dom_cow) )
|
keir@20723
|
1247 {
|
keir@20723
|
1248 ASSERT(order == 0);
|
keir@20723
|
1249 scrub_one_page(pg);
|
keir@20723
|
1250 free_heap_pages(pg, 0);
|
keir@20723
|
1251 drop_dom_ref = 0;
|
keir@20723
|
1252 }
|
kaf24@2329
|
1253 else
|
kaf24@2329
|
1254 {
|
kaf24@7989
|
1255 /* Freeing anonymous domain-heap pages. */
|
keir@19099
|
1256 free_heap_pages(pg, order);
|
kaf24@2329
|
1257 drop_dom_ref = 0;
|
kaf24@2329
|
1258 }
|
kaf24@1979
|
1259
|
kaf24@1979
|
1260 if ( drop_dom_ref )
|
kaf24@1979
|
1261 put_domain(d);
|
kaf24@1974
|
1262 }
|
kaf24@1974
|
1263
|
kfraser@15572
|
1264 unsigned long avail_domheap_pages_region(
|
kfraser@15572
|
1265 unsigned int node, unsigned int min_width, unsigned int max_width)
|
kfraser@15572
|
1266 {
|
kfraser@15572
|
1267 int zone_lo, zone_hi;
|
kfraser@15572
|
1268
|
keir@19095
|
1269 zone_lo = min_width ? bits_to_zone(min_width) : (MEMZONE_XEN + 1);
|
keir@19095
|
1270 zone_lo = max_t(int, MEMZONE_XEN + 1, min_t(int, NR_ZONES - 1, zone_lo));
|
kfraser@15572
|
1271
|
keir@19095
|
1272 zone_hi = max_width ? bits_to_zone(max_width) : (NR_ZONES - 1);
|
keir@19095
|
1273 zone_hi = max_t(int, MEMZONE_XEN + 1, min_t(int, NR_ZONES - 1, zone_hi));
|
kfraser@15572
|
1274
|
kfraser@15572
|
1275 return avail_heap_pages(zone_lo, zone_hi, node);
|
kfraser@15572
|
1276 }
|
kaf24@2844
|
1277
|
kaf24@1974
|
1278 unsigned long avail_domheap_pages(void)
|
kaf24@1974
|
1279 {
|
keir@18195
|
1280 return avail_heap_pages(MEMZONE_XEN + 1,
|
keir@18195
|
1281 NR_ZONES - 1,
|
keir@18195
|
1282 -1);
|
kaf24@1974
|
1283 }
|
kaf24@4305
|
1284
|
keir@21164
|
1285 unsigned long avail_node_heap_pages(unsigned int nodeid)
|
keir@21164
|
1286 {
|
keir@21164
|
1287 return avail_heap_pages(MEMZONE_XEN, NR_ZONES -1, nodeid);
|
keir@21164
|
1288 }
|
keir@21164
|
1289
|
keir@21164
|
1290
|
keir@20048
|
1291 static void pagealloc_info(unsigned char key)
|
kaf24@9051
|
1292 {
|
kfraser@14130
|
1293 unsigned int zone = MEMZONE_XEN;
|
keir@18194
|
1294 unsigned long n, total = 0;
|
kfraser@14130
|
1295
|
kaf24@9051
|
1296 printk("Physical memory information:\n");
|
kfraser@14130
|
1297 printk(" Xen heap: %lukB free\n",
|
kfraser@14130
|
1298 avail_heap_pages(zone, zone, -1) << (PAGE_SHIFT-10));
|
kfraser@14130
|
1299
|
kfraser@14130
|
1300 while ( ++zone < NR_ZONES )
|
kfraser@14130
|
1301 {
|
keir@18195
|
1302 if ( (zone + PAGE_SHIFT) == dma_bitsize )
|
kfraser@14130
|
1303 {
|
kfraser@14130
|
1304 printk(" DMA heap: %lukB free\n", total << (PAGE_SHIFT-10));
|
kfraser@14130
|
1305 total = 0;
|
kfraser@14130
|
1306 }
|
kfraser@14130
|
1307
|
kfraser@14130
|
1308 if ( (n = avail_heap_pages(zone, zone, -1)) != 0 )
|
kfraser@14130
|
1309 {
|
kfraser@14130
|
1310 total += n;
|
kfraser@14130
|
1311 printk(" heap[%02u]: %lukB free\n", zone, n << (PAGE_SHIFT-10));
|
kfraser@14130
|
1312 }
|
kfraser@14130
|
1313 }
|
kfraser@14130
|
1314
|
kfraser@14130
|
1315 printk(" Dom heap: %lukB free\n", total << (PAGE_SHIFT-10));
|
kaf24@9051
|
1316 }
|
kaf24@9051
|
1317
|
keir@20048
|
1318 static struct keyhandler pagealloc_info_keyhandler = {
|
keir@20048
|
1319 .diagnostic = 1,
|
keir@20048
|
1320 .u.fn = pagealloc_info,
|
keir@20048
|
1321 .desc = "memory info"
|
keir@20048
|
1322 };
|
kaf24@9051
|
1323
|
kaf24@9051
|
1324 static __init int pagealloc_keyhandler_init(void)
|
kaf24@9051
|
1325 {
|
keir@20048
|
1326 register_keyhandler('m', &pagealloc_info_keyhandler);
|
kaf24@9051
|
1327 return 0;
|
kaf24@9051
|
1328 }
|
kaf24@9051
|
1329 __initcall(pagealloc_keyhandler_init);
|
kaf24@9051
|
1330
|
kaf24@9051
|
1331
|
keir@19684
|
1332 void scrub_one_page(struct page_info *pg)
|
keir@19684
|
1333 {
|
keir@20277
|
1334 void *p = __map_domain_page(pg);
|
keir@19684
|
1335
|
keir@20958
|
1336 if ( unlikely(pg->count_info & PGC_broken) )
|
keir@20958
|
1337 return;
|
keir@20958
|
1338
|
keir@19924
|
1339 #ifndef NDEBUG
|
keir@19924
|
1340 /* Avoid callers relying on allocations returning zeroed pages. */
|
keir@19924
|
1341 memset(p, 0xc2, PAGE_SIZE);
|
keir@19924
|
1342 #else
|
keir@19924
|
1343 /* For a production build, clear_page() is the fastest way to scrub. */
|
keir@19924
|
1344 clear_page(p);
|
keir@19924
|
1345 #endif
|
keir@19684
|
1346
|
keir@19924
|
1347 unmap_domain_page(p);
|
kaf24@10539
|
1348 }
|
kaf24@10539
|
1349
|
kfraser@11932
|
1350 static void dump_heap(unsigned char key)
|
kfraser@11932
|
1351 {
|
keir@14134
|
1352 s_time_t now = NOW();
|
keir@14134
|
1353 int i, j;
|
kfraser@11932
|
1354
|
kfraser@11932
|
1355 printk("'%c' pressed -> dumping heap info (now-0x%X:%08X)\n", key,
|
kfraser@11932
|
1356 (u32)(now>>32), (u32)now);
|
kfraser@11932
|
1357
|
keir@14134
|
1358 for ( i = 0; i < MAX_NUMNODES; i++ )
|
keir@14134
|
1359 {
|
keir@14134
|
1360 if ( !avail[i] )
|
keir@14134
|
1361 continue;
|
keir@14134
|
1362 for ( j = 0; j < NR_ZONES; j++ )
|
keir@14134
|
1363 printk("heap[node=%d][zone=%d] -> %lu pages\n",
|
keir@14134
|
1364 i, j, avail[i][j]);
|
keir@14134
|
1365 }
|
kfraser@11932
|
1366 }
|
kfraser@11932
|
1367
|
keir@20048
|
1368 static struct keyhandler dump_heap_keyhandler = {
|
keir@20048
|
1369 .diagnostic = 1,
|
keir@20048
|
1370 .u.fn = dump_heap,
|
keir@20048
|
1371 .desc = "dump heap info"
|
keir@20048
|
1372 };
|
keir@20048
|
1373
|
kfraser@11932
|
1374 static __init int register_heap_trigger(void)
|
kfraser@11932
|
1375 {
|
keir@20048
|
1376 register_keyhandler('H', &dump_heap_keyhandler);
|
kfraser@11932
|
1377 return 0;
|
kfraser@11932
|
1378 }
|
kfraser@11932
|
1379 __initcall(register_heap_trigger);
|
kfraser@11932
|
1380
|
kaf24@3952
|
1381 /*
|
kaf24@3952
|
1382 * Local variables:
|
kaf24@3952
|
1383 * mode: C
|
kaf24@3952
|
1384 * c-set-style: "BSD"
|
kaf24@3952
|
1385 * c-basic-offset: 4
|
kaf24@3952
|
1386 * tab-width: 4
|
kaf24@3952
|
1387 * indent-tabs-mode: nil
|
kaf24@4026
|
1388 * End:
|
kaf24@3952
|
1389 */
|