/root/src/xen/xen/common/tmem.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * tmem.c |
3 | | * |
4 | | * Transcendent memory |
5 | | * |
6 | | * Copyright (c) 2009, Dan Magenheimer, Oracle Corp. |
7 | | */ |
8 | | |
9 | | /* TODO list: 090129 (updated 100318) |
10 | | - any better reclamation policy? |
11 | | - use different tlsf pools for each client (maybe each pool) |
12 | | - test shared access more completely (ocfs2) |
13 | | - add feedback-driven compression (not for persistent pools though!) |
14 | | - add data-structure total bytes overhead stats |
15 | | */ |
16 | | |
17 | | #ifdef __XEN__ |
18 | | #include <xen/tmem_xen.h> /* host-specific (eg Xen) code goes here. */ |
19 | | #endif |
20 | | |
21 | | #include <public/sysctl.h> |
22 | | #include <xen/tmem.h> |
23 | | #include <xen/rbtree.h> |
24 | | #include <xen/radix-tree.h> |
25 | | #include <xen/list.h> |
26 | | #include <xen/init.h> |
27 | | |
28 | 0 | #define TMEM_SPEC_VERSION 1 |
29 | | |
30 | | struct tmem_statistics tmem_stats = { |
31 | | .global_obj_count = ATOMIC_INIT(0), |
32 | | .global_pgp_count = ATOMIC_INIT(0), |
33 | | .global_pcd_count = ATOMIC_INIT(0), |
34 | | .global_page_count = ATOMIC_INIT(0), |
35 | | .global_rtree_node_count = ATOMIC_INIT(0), |
36 | | }; |
37 | | |
38 | | /************ CORE DATA STRUCTURES ************************************/ |
39 | | |
40 | | struct tmem_object_root { |
41 | | struct xen_tmem_oid oid; |
42 | | struct rb_node rb_tree_node; /* Protected by pool->pool_rwlock. */ |
43 | | unsigned long objnode_count; /* Atomicity depends on obj_spinlock. */ |
44 | | long pgp_count; /* Atomicity depends on obj_spinlock. */ |
45 | | struct radix_tree_root tree_root; /* Tree of pages within object. */ |
46 | | struct tmem_pool *pool; |
47 | | domid_t last_client; |
48 | | spinlock_t obj_spinlock; |
49 | | }; |
50 | | |
51 | | struct tmem_object_node { |
52 | | struct tmem_object_root *obj; |
53 | | struct radix_tree_node rtn; |
54 | | }; |
55 | | |
56 | | struct tmem_page_descriptor { |
57 | | union { |
58 | | struct list_head global_eph_pages; |
59 | | struct list_head client_inv_pages; |
60 | | }; |
61 | | union { |
62 | | struct { |
63 | | union { |
64 | | struct list_head client_eph_pages; |
65 | | struct list_head pool_pers_pages; |
66 | | }; |
67 | | struct tmem_object_root *obj; |
68 | | } us; |
69 | | struct xen_tmem_oid inv_oid; /* Used for invalid list only. */ |
70 | | }; |
71 | | pagesize_t size; /* 0 == PAGE_SIZE (pfp), -1 == data invalid, |
72 | | else compressed data (cdata). */ |
73 | | uint32_t index; |
74 | | bool eviction_attempted; /* CHANGE TO lifetimes? (settable). */ |
75 | | union { |
76 | | struct page_info *pfp; /* Page frame pointer. */ |
77 | | char *cdata; /* Compressed data. */ |
78 | | struct tmem_page_content_descriptor *pcd; /* Page dedup. */ |
79 | | }; |
80 | | union { |
81 | | uint64_t timestamp; |
82 | | uint32_t pool_id; /* Used for invalid list only. */ |
83 | | }; |
84 | | }; |
85 | | |
86 | | #define PCD_TZE_MAX_SIZE (PAGE_SIZE - (PAGE_SIZE/64)) |
87 | | |
88 | | struct tmem_page_content_descriptor { |
89 | | union { |
90 | | struct page_info *pfp; /* Page frame pointer. */ |
91 | | char *cdata; /* If compression_enabled. */ |
92 | | }; |
93 | | pagesize_t size; /* If compression_enabled -> 0<size<PAGE_SIZE (*cdata) |
94 | | * else if tze, 0<=size<PAGE_SIZE, rounded up to mult of 8 |
95 | | * else PAGE_SIZE -> *pfp. */ |
96 | | }; |
97 | | |
98 | | static int tmem_initialized = 0; |
99 | | |
100 | | struct xmem_pool *tmem_mempool = 0; |
101 | | unsigned int tmem_mempool_maxalloc = 0; |
102 | | |
103 | | DEFINE_SPINLOCK(tmem_page_list_lock); |
104 | | PAGE_LIST_HEAD(tmem_page_list); |
105 | | unsigned long tmem_page_list_pages = 0; |
106 | | |
107 | | DEFINE_RWLOCK(tmem_rwlock); |
108 | | static DEFINE_SPINLOCK(eph_lists_spinlock); /* Protects global AND clients. */ |
109 | | static DEFINE_SPINLOCK(pers_lists_spinlock); |
110 | | |
111 | 0 | #define ASSERT_SPINLOCK(_l) ASSERT(spin_is_locked(_l)) |
112 | 0 | #define ASSERT_WRITELOCK(_l) ASSERT(rw_is_write_locked(_l)) |
113 | | |
114 | | atomic_t client_weight_total; |
115 | | |
116 | | struct tmem_global tmem_global = { |
117 | | .ephemeral_page_list = LIST_HEAD_INIT(tmem_global.ephemeral_page_list), |
118 | | .client_list = LIST_HEAD_INIT(tmem_global.client_list), |
119 | | .client_weight_total = ATOMIC_INIT(0), |
120 | | }; |
121 | | |
122 | | /* |
123 | | * There two types of memory allocation interfaces in tmem. |
124 | | * One is based on xmem_pool and the other is used for allocate a whole page. |
125 | | * Both of them are based on the lowlevel function __tmem_alloc_page/_thispool(). |
126 | | * The call trace of alloc path is like below. |
127 | | * Persistant pool: |
128 | | * 1.tmem_malloc() |
129 | | * > xmem_pool_alloc() |
130 | | * > tmem_persistent_pool_page_get() |
131 | | * > __tmem_alloc_page_thispool() |
132 | | * 2.tmem_alloc_page() |
133 | | * > __tmem_alloc_page_thispool() |
134 | | * |
135 | | * Ephemeral pool: |
136 | | * 1.tmem_malloc() |
137 | | * > xmem_pool_alloc() |
138 | | * > tmem_mempool_page_get() |
139 | | * > __tmem_alloc_page() |
140 | | * 2.tmem_alloc_page() |
141 | | * > __tmem_alloc_page() |
142 | | * |
143 | | * The free path is done in the same manner. |
144 | | */ |
145 | | static void *tmem_malloc(size_t size, struct tmem_pool *pool) |
146 | 0 | { |
147 | 0 | void *v = NULL; |
148 | 0 |
|
149 | 0 | if ( (pool != NULL) && is_persistent(pool) ) { |
150 | 0 | if ( pool->client->persistent_pool ) |
151 | 0 | v = xmem_pool_alloc(size, pool->client->persistent_pool); |
152 | 0 | } |
153 | 0 | else |
154 | 0 | { |
155 | 0 | ASSERT( size < tmem_mempool_maxalloc ); |
156 | 0 | ASSERT( tmem_mempool != NULL ); |
157 | 0 | v = xmem_pool_alloc(size, tmem_mempool); |
158 | 0 | } |
159 | 0 | if ( v == NULL ) |
160 | 0 | tmem_stats.alloc_failed++; |
161 | 0 | return v; |
162 | 0 | } |
163 | | |
164 | | static void tmem_free(void *p, struct tmem_pool *pool) |
165 | 0 | { |
166 | 0 | if ( pool == NULL || !is_persistent(pool) ) |
167 | 0 | { |
168 | 0 | ASSERT( tmem_mempool != NULL ); |
169 | 0 | xmem_pool_free(p, tmem_mempool); |
170 | 0 | } |
171 | 0 | else |
172 | 0 | { |
173 | 0 | ASSERT( pool->client->persistent_pool != NULL ); |
174 | 0 | xmem_pool_free(p, pool->client->persistent_pool); |
175 | 0 | } |
176 | 0 | } |
177 | | |
178 | | static struct page_info *tmem_alloc_page(struct tmem_pool *pool) |
179 | 0 | { |
180 | 0 | struct page_info *pfp = NULL; |
181 | 0 |
|
182 | 0 | if ( pool != NULL && is_persistent(pool) ) |
183 | 0 | pfp = __tmem_alloc_page_thispool(pool->client->domain); |
184 | 0 | else |
185 | 0 | pfp = __tmem_alloc_page(); |
186 | 0 | if ( pfp == NULL ) |
187 | 0 | tmem_stats.alloc_page_failed++; |
188 | 0 | else |
189 | 0 | atomic_inc_and_max(global_page_count); |
190 | 0 | return pfp; |
191 | 0 | } |
192 | | |
193 | | static void tmem_free_page(struct tmem_pool *pool, struct page_info *pfp) |
194 | 0 | { |
195 | 0 | ASSERT(pfp); |
196 | 0 | if ( pool == NULL || !is_persistent(pool) ) |
197 | 0 | __tmem_free_page(pfp); |
198 | 0 | else |
199 | 0 | __tmem_free_page_thispool(pfp); |
200 | 0 | atomic_dec_and_assert(global_page_count); |
201 | 0 | } |
202 | | |
203 | | static void *tmem_mempool_page_get(unsigned long size) |
204 | 0 | { |
205 | 0 | struct page_info *pi; |
206 | 0 |
|
207 | 0 | ASSERT(size == PAGE_SIZE); |
208 | 0 | if ( (pi = __tmem_alloc_page()) == NULL ) |
209 | 0 | return NULL; |
210 | 0 | return page_to_virt(pi); |
211 | 0 | } |
212 | | |
213 | | static void tmem_mempool_page_put(void *page_va) |
214 | 0 | { |
215 | 0 | ASSERT(IS_PAGE_ALIGNED(page_va)); |
216 | 0 | __tmem_free_page(virt_to_page(page_va)); |
217 | 0 | } |
218 | | |
219 | | static int __init tmem_mempool_init(void) |
220 | 0 | { |
221 | 0 | tmem_mempool = xmem_pool_create("tmem", tmem_mempool_page_get, |
222 | 0 | tmem_mempool_page_put, PAGE_SIZE, 0, PAGE_SIZE); |
223 | 0 | if ( tmem_mempool ) |
224 | 0 | tmem_mempool_maxalloc = xmem_pool_maxalloc(tmem_mempool); |
225 | 0 | return tmem_mempool != NULL; |
226 | 0 | } |
227 | | |
228 | | /* Persistent pools are per-domain. */ |
229 | | static void *tmem_persistent_pool_page_get(unsigned long size) |
230 | 0 | { |
231 | 0 | struct page_info *pi; |
232 | 0 | struct domain *d = current->domain; |
233 | 0 |
|
234 | 0 | ASSERT(size == PAGE_SIZE); |
235 | 0 | if ( (pi = __tmem_alloc_page_thispool(d)) == NULL ) |
236 | 0 | return NULL; |
237 | 0 | ASSERT(IS_VALID_PAGE(pi)); |
238 | 0 | return page_to_virt(pi); |
239 | 0 | } |
240 | | |
241 | | static void tmem_persistent_pool_page_put(void *page_va) |
242 | 0 | { |
243 | 0 | struct page_info *pi; |
244 | 0 |
|
245 | 0 | ASSERT(IS_PAGE_ALIGNED(page_va)); |
246 | 0 | pi = mfn_to_page(virt_to_mfn(page_va)); |
247 | 0 | ASSERT(IS_VALID_PAGE(pi)); |
248 | 0 | __tmem_free_page_thispool(pi); |
249 | 0 | } |
250 | | |
251 | | /* |
252 | | * Page content descriptor manipulation routines. |
253 | | */ |
254 | | #define NOT_SHAREABLE ((uint16_t)-1UL) |
255 | | |
256 | | /************ PAGE DESCRIPTOR MANIPULATION ROUTINES *******************/ |
257 | | |
258 | | /* Allocate a struct tmem_page_descriptor and associate it with an object. */ |
259 | | static struct tmem_page_descriptor *pgp_alloc(struct tmem_object_root *obj) |
260 | 0 | { |
261 | 0 | struct tmem_page_descriptor *pgp; |
262 | 0 | struct tmem_pool *pool; |
263 | 0 |
|
264 | 0 | ASSERT(obj != NULL); |
265 | 0 | ASSERT(obj->pool != NULL); |
266 | 0 | pool = obj->pool; |
267 | 0 | if ( (pgp = tmem_malloc(sizeof(struct tmem_page_descriptor), pool)) == NULL ) |
268 | 0 | return NULL; |
269 | 0 | pgp->us.obj = obj; |
270 | 0 | INIT_LIST_HEAD(&pgp->global_eph_pages); |
271 | 0 | INIT_LIST_HEAD(&pgp->us.client_eph_pages); |
272 | 0 | pgp->pfp = NULL; |
273 | 0 | pgp->size = -1; |
274 | 0 | pgp->index = -1; |
275 | 0 | pgp->timestamp = get_cycles(); |
276 | 0 | atomic_inc_and_max(global_pgp_count); |
277 | 0 | atomic_inc(&pool->pgp_count); |
278 | 0 | if ( _atomic_read(pool->pgp_count) > pool->pgp_count_max ) |
279 | 0 | pool->pgp_count_max = _atomic_read(pool->pgp_count); |
280 | 0 | return pgp; |
281 | 0 | } |
282 | | |
283 | | static struct tmem_page_descriptor *pgp_lookup_in_obj(struct tmem_object_root *obj, uint32_t index) |
284 | 0 | { |
285 | 0 | ASSERT(obj != NULL); |
286 | 0 | ASSERT_SPINLOCK(&obj->obj_spinlock); |
287 | 0 | ASSERT(obj->pool != NULL); |
288 | 0 | return radix_tree_lookup(&obj->tree_root, index); |
289 | 0 | } |
290 | | |
291 | | static void pgp_free_data(struct tmem_page_descriptor *pgp, struct tmem_pool *pool) |
292 | 0 | { |
293 | 0 | pagesize_t pgp_size = pgp->size; |
294 | 0 |
|
295 | 0 | if ( pgp->pfp == NULL ) |
296 | 0 | return; |
297 | 0 | if ( pgp_size ) |
298 | 0 | tmem_free(pgp->cdata, pool); |
299 | 0 | else |
300 | 0 | tmem_free_page(pgp->us.obj->pool,pgp->pfp); |
301 | 0 | if ( pool != NULL && pgp_size ) |
302 | 0 | { |
303 | 0 | pool->client->compressed_pages--; |
304 | 0 | pool->client->compressed_sum_size -= pgp_size; |
305 | 0 | } |
306 | 0 | pgp->pfp = NULL; |
307 | 0 | pgp->size = -1; |
308 | 0 | } |
309 | | |
310 | | static void __pgp_free(struct tmem_page_descriptor *pgp, struct tmem_pool *pool) |
311 | 0 | { |
312 | 0 | pgp->us.obj = NULL; |
313 | 0 | pgp->index = -1; |
314 | 0 | tmem_free(pgp, pool); |
315 | 0 | } |
316 | | |
317 | | static void pgp_free(struct tmem_page_descriptor *pgp) |
318 | 0 | { |
319 | 0 | struct tmem_pool *pool = NULL; |
320 | 0 |
|
321 | 0 | ASSERT(pgp->us.obj != NULL); |
322 | 0 | ASSERT(pgp->us.obj->pool != NULL); |
323 | 0 | ASSERT(pgp->us.obj->pool->client != NULL); |
324 | 0 |
|
325 | 0 | pool = pgp->us.obj->pool; |
326 | 0 | if ( !is_persistent(pool) ) |
327 | 0 | { |
328 | 0 | ASSERT(list_empty(&pgp->global_eph_pages)); |
329 | 0 | ASSERT(list_empty(&pgp->us.client_eph_pages)); |
330 | 0 | } |
331 | 0 | pgp_free_data(pgp, pool); |
332 | 0 | atomic_dec_and_assert(global_pgp_count); |
333 | 0 | atomic_dec(&pool->pgp_count); |
334 | 0 | ASSERT(_atomic_read(pool->pgp_count) >= 0); |
335 | 0 | pgp->size = -1; |
336 | 0 | if ( is_persistent(pool) && pool->client->info.flags.u.migrating ) |
337 | 0 | { |
338 | 0 | pgp->inv_oid = pgp->us.obj->oid; |
339 | 0 | pgp->pool_id = pool->pool_id; |
340 | 0 | return; |
341 | 0 | } |
342 | 0 | __pgp_free(pgp, pool); |
343 | 0 | } |
344 | | |
345 | | /* Remove pgp from global/pool/client lists and free it. */ |
346 | | static void pgp_delist_free(struct tmem_page_descriptor *pgp) |
347 | 0 | { |
348 | 0 | struct client *client; |
349 | 0 | uint64_t life; |
350 | 0 |
|
351 | 0 | ASSERT(pgp != NULL); |
352 | 0 | ASSERT(pgp->us.obj != NULL); |
353 | 0 | ASSERT(pgp->us.obj->pool != NULL); |
354 | 0 | client = pgp->us.obj->pool->client; |
355 | 0 | ASSERT(client != NULL); |
356 | 0 |
|
357 | 0 | /* Delist pgp. */ |
358 | 0 | if ( !is_persistent(pgp->us.obj->pool) ) |
359 | 0 | { |
360 | 0 | spin_lock(&eph_lists_spinlock); |
361 | 0 | if ( !list_empty(&pgp->us.client_eph_pages) ) |
362 | 0 | client->eph_count--; |
363 | 0 | ASSERT(client->eph_count >= 0); |
364 | 0 | list_del_init(&pgp->us.client_eph_pages); |
365 | 0 | if ( !list_empty(&pgp->global_eph_pages) ) |
366 | 0 | tmem_global.eph_count--; |
367 | 0 | ASSERT(tmem_global.eph_count >= 0); |
368 | 0 | list_del_init(&pgp->global_eph_pages); |
369 | 0 | spin_unlock(&eph_lists_spinlock); |
370 | 0 | } |
371 | 0 | else |
372 | 0 | { |
373 | 0 | if ( client->info.flags.u.migrating ) |
374 | 0 | { |
375 | 0 | spin_lock(&pers_lists_spinlock); |
376 | 0 | list_add_tail(&pgp->client_inv_pages, |
377 | 0 | &client->persistent_invalidated_list); |
378 | 0 | if ( pgp != pgp->us.obj->pool->cur_pgp ) |
379 | 0 | list_del_init(&pgp->us.pool_pers_pages); |
380 | 0 | spin_unlock(&pers_lists_spinlock); |
381 | 0 | } |
382 | 0 | else |
383 | 0 | { |
384 | 0 | spin_lock(&pers_lists_spinlock); |
385 | 0 | list_del_init(&pgp->us.pool_pers_pages); |
386 | 0 | spin_unlock(&pers_lists_spinlock); |
387 | 0 | } |
388 | 0 | } |
389 | 0 | life = get_cycles() - pgp->timestamp; |
390 | 0 | pgp->us.obj->pool->sum_life_cycles += life; |
391 | 0 |
|
392 | 0 | /* Free pgp. */ |
393 | 0 | pgp_free(pgp); |
394 | 0 | } |
395 | | |
396 | | /* Called only indirectly by radix_tree_destroy. */ |
397 | | static void pgp_destroy(void *v) |
398 | 0 | { |
399 | 0 | struct tmem_page_descriptor *pgp = (struct tmem_page_descriptor *)v; |
400 | 0 |
|
401 | 0 | pgp->us.obj->pgp_count--; |
402 | 0 | pgp_delist_free(pgp); |
403 | 0 | } |
404 | | |
405 | | static int pgp_add_to_obj(struct tmem_object_root *obj, uint32_t index, struct tmem_page_descriptor *pgp) |
406 | 0 | { |
407 | 0 | int ret; |
408 | 0 |
|
409 | 0 | ASSERT_SPINLOCK(&obj->obj_spinlock); |
410 | 0 | ret = radix_tree_insert(&obj->tree_root, index, pgp); |
411 | 0 | if ( !ret ) |
412 | 0 | obj->pgp_count++; |
413 | 0 | return ret; |
414 | 0 | } |
415 | | |
416 | | static struct tmem_page_descriptor *pgp_delete_from_obj(struct tmem_object_root *obj, uint32_t index) |
417 | 0 | { |
418 | 0 | struct tmem_page_descriptor *pgp; |
419 | 0 |
|
420 | 0 | ASSERT(obj != NULL); |
421 | 0 | ASSERT_SPINLOCK(&obj->obj_spinlock); |
422 | 0 | ASSERT(obj->pool != NULL); |
423 | 0 | pgp = radix_tree_delete(&obj->tree_root, index); |
424 | 0 | if ( pgp != NULL ) |
425 | 0 | obj->pgp_count--; |
426 | 0 | ASSERT(obj->pgp_count >= 0); |
427 | 0 |
|
428 | 0 | return pgp; |
429 | 0 | } |
430 | | |
431 | | /************ RADIX TREE NODE MANIPULATION ROUTINES *******************/ |
432 | | |
433 | | /* Called only indirectly from radix_tree_insert. */ |
434 | | static struct radix_tree_node *rtn_alloc(void *arg) |
435 | 0 | { |
436 | 0 | struct tmem_object_node *objnode; |
437 | 0 | struct tmem_object_root *obj = (struct tmem_object_root *)arg; |
438 | 0 |
|
439 | 0 | ASSERT(obj->pool != NULL); |
440 | 0 | objnode = tmem_malloc(sizeof(struct tmem_object_node),obj->pool); |
441 | 0 | if (objnode == NULL) |
442 | 0 | return NULL; |
443 | 0 | objnode->obj = obj; |
444 | 0 | memset(&objnode->rtn, 0, sizeof(struct radix_tree_node)); |
445 | 0 | if (++obj->pool->objnode_count > obj->pool->objnode_count_max) |
446 | 0 | obj->pool->objnode_count_max = obj->pool->objnode_count; |
447 | 0 | atomic_inc_and_max(global_rtree_node_count); |
448 | 0 | obj->objnode_count++; |
449 | 0 | return &objnode->rtn; |
450 | 0 | } |
451 | | |
452 | | /* Called only indirectly from radix_tree_delete/destroy. */ |
453 | | static void rtn_free(struct radix_tree_node *rtn, void *arg) |
454 | 0 | { |
455 | 0 | struct tmem_pool *pool; |
456 | 0 | struct tmem_object_node *objnode; |
457 | 0 |
|
458 | 0 | ASSERT(rtn != NULL); |
459 | 0 | objnode = container_of(rtn,struct tmem_object_node,rtn); |
460 | 0 | ASSERT(objnode->obj != NULL); |
461 | 0 | ASSERT_SPINLOCK(&objnode->obj->obj_spinlock); |
462 | 0 | pool = objnode->obj->pool; |
463 | 0 | ASSERT(pool != NULL); |
464 | 0 | pool->objnode_count--; |
465 | 0 | objnode->obj->objnode_count--; |
466 | 0 | objnode->obj = NULL; |
467 | 0 | tmem_free(objnode, pool); |
468 | 0 | atomic_dec_and_assert(global_rtree_node_count); |
469 | 0 | } |
470 | | |
471 | | /************ POOL OBJECT COLLECTION MANIPULATION ROUTINES *******************/ |
472 | | |
473 | | static int oid_compare(struct xen_tmem_oid *left, |
474 | | struct xen_tmem_oid *right) |
475 | 0 | { |
476 | 0 | if ( left->oid[2] == right->oid[2] ) |
477 | 0 | { |
478 | 0 | if ( left->oid[1] == right->oid[1] ) |
479 | 0 | { |
480 | 0 | if ( left->oid[0] == right->oid[0] ) |
481 | 0 | return 0; |
482 | 0 | else if ( left->oid[0] < right->oid[0] ) |
483 | 0 | return -1; |
484 | 0 | else |
485 | 0 | return 1; |
486 | 0 | } |
487 | 0 | else if ( left->oid[1] < right->oid[1] ) |
488 | 0 | return -1; |
489 | 0 | else |
490 | 0 | return 1; |
491 | 0 | } |
492 | 0 | else if ( left->oid[2] < right->oid[2] ) |
493 | 0 | return -1; |
494 | 0 | else |
495 | 0 | return 1; |
496 | 0 | } |
497 | | |
498 | | static void oid_set_invalid(struct xen_tmem_oid *oidp) |
499 | 0 | { |
500 | 0 | oidp->oid[0] = oidp->oid[1] = oidp->oid[2] = -1UL; |
501 | 0 | } |
502 | | |
503 | | static unsigned oid_hash(struct xen_tmem_oid *oidp) |
504 | 0 | { |
505 | 0 | return (tmem_hash(oidp->oid[0] ^ oidp->oid[1] ^ oidp->oid[2], |
506 | 0 | BITS_PER_LONG) & OBJ_HASH_BUCKETS_MASK); |
507 | 0 | } |
508 | | |
509 | | /* Searches for object==oid in pool, returns locked object if found. */ |
510 | | static struct tmem_object_root * obj_find(struct tmem_pool *pool, |
511 | | struct xen_tmem_oid *oidp) |
512 | 0 | { |
513 | 0 | struct rb_node *node; |
514 | 0 | struct tmem_object_root *obj; |
515 | 0 |
|
516 | 0 | restart_find: |
517 | 0 | read_lock(&pool->pool_rwlock); |
518 | 0 | node = pool->obj_rb_root[oid_hash(oidp)].rb_node; |
519 | 0 | while ( node ) |
520 | 0 | { |
521 | 0 | obj = container_of(node, struct tmem_object_root, rb_tree_node); |
522 | 0 | switch ( oid_compare(&obj->oid, oidp) ) |
523 | 0 | { |
524 | 0 | case 0: /* Equal. */ |
525 | 0 | if ( !spin_trylock(&obj->obj_spinlock) ) |
526 | 0 | { |
527 | 0 | read_unlock(&pool->pool_rwlock); |
528 | 0 | goto restart_find; |
529 | 0 | } |
530 | 0 | read_unlock(&pool->pool_rwlock); |
531 | 0 | return obj; |
532 | 0 | case -1: |
533 | 0 | node = node->rb_left; |
534 | 0 | break; |
535 | 0 | case 1: |
536 | 0 | node = node->rb_right; |
537 | 0 | } |
538 | 0 | } |
539 | 0 | read_unlock(&pool->pool_rwlock); |
540 | 0 | return NULL; |
541 | 0 | } |
542 | | |
543 | | /* Free an object that has no more pgps in it. */ |
544 | | static void obj_free(struct tmem_object_root *obj) |
545 | 0 | { |
546 | 0 | struct tmem_pool *pool; |
547 | 0 | struct xen_tmem_oid old_oid; |
548 | 0 |
|
549 | 0 | ASSERT_SPINLOCK(&obj->obj_spinlock); |
550 | 0 | ASSERT(obj != NULL); |
551 | 0 | ASSERT(obj->pgp_count == 0); |
552 | 0 | pool = obj->pool; |
553 | 0 | ASSERT(pool != NULL); |
554 | 0 | ASSERT(pool->client != NULL); |
555 | 0 | ASSERT_WRITELOCK(&pool->pool_rwlock); |
556 | 0 | if ( obj->tree_root.rnode != NULL ) /* May be a "stump" with no leaves. */ |
557 | 0 | radix_tree_destroy(&obj->tree_root, pgp_destroy); |
558 | 0 | ASSERT((long)obj->objnode_count == 0); |
559 | 0 | ASSERT(obj->tree_root.rnode == NULL); |
560 | 0 | pool->obj_count--; |
561 | 0 | ASSERT(pool->obj_count >= 0); |
562 | 0 | obj->pool = NULL; |
563 | 0 | old_oid = obj->oid; |
564 | 0 | oid_set_invalid(&obj->oid); |
565 | 0 | obj->last_client = TMEM_CLI_ID_NULL; |
566 | 0 | atomic_dec_and_assert(global_obj_count); |
567 | 0 | rb_erase(&obj->rb_tree_node, &pool->obj_rb_root[oid_hash(&old_oid)]); |
568 | 0 | spin_unlock(&obj->obj_spinlock); |
569 | 0 | tmem_free(obj, pool); |
570 | 0 | } |
571 | | |
572 | | static int obj_rb_insert(struct rb_root *root, struct tmem_object_root *obj) |
573 | 0 | { |
574 | 0 | struct rb_node **new, *parent = NULL; |
575 | 0 | struct tmem_object_root *this; |
576 | 0 |
|
577 | 0 | ASSERT(obj->pool); |
578 | 0 | ASSERT_WRITELOCK(&obj->pool->pool_rwlock); |
579 | 0 |
|
580 | 0 | new = &(root->rb_node); |
581 | 0 | while ( *new ) |
582 | 0 | { |
583 | 0 | this = container_of(*new, struct tmem_object_root, rb_tree_node); |
584 | 0 | parent = *new; |
585 | 0 | switch ( oid_compare(&this->oid, &obj->oid) ) |
586 | 0 | { |
587 | 0 | case 0: |
588 | 0 | return 0; |
589 | 0 | case -1: |
590 | 0 | new = &((*new)->rb_left); |
591 | 0 | break; |
592 | 0 | case 1: |
593 | 0 | new = &((*new)->rb_right); |
594 | 0 | break; |
595 | 0 | } |
596 | 0 | } |
597 | 0 | rb_link_node(&obj->rb_tree_node, parent, new); |
598 | 0 | rb_insert_color(&obj->rb_tree_node, root); |
599 | 0 | return 1; |
600 | 0 | } |
601 | | |
602 | | /* |
603 | | * Allocate, initialize, and insert an tmem_object_root |
604 | | * (should be called only if find failed). |
605 | | */ |
606 | | static struct tmem_object_root * obj_alloc(struct tmem_pool *pool, |
607 | | struct xen_tmem_oid *oidp) |
608 | 0 | { |
609 | 0 | struct tmem_object_root *obj; |
610 | 0 |
|
611 | 0 | ASSERT(pool != NULL); |
612 | 0 | if ( (obj = tmem_malloc(sizeof(struct tmem_object_root), pool)) == NULL ) |
613 | 0 | return NULL; |
614 | 0 | pool->obj_count++; |
615 | 0 | if (pool->obj_count > pool->obj_count_max) |
616 | 0 | pool->obj_count_max = pool->obj_count; |
617 | 0 | atomic_inc_and_max(global_obj_count); |
618 | 0 | radix_tree_init(&obj->tree_root); |
619 | 0 | radix_tree_set_alloc_callbacks(&obj->tree_root, rtn_alloc, rtn_free, obj); |
620 | 0 | spin_lock_init(&obj->obj_spinlock); |
621 | 0 | obj->pool = pool; |
622 | 0 | obj->oid = *oidp; |
623 | 0 | obj->objnode_count = 0; |
624 | 0 | obj->pgp_count = 0; |
625 | 0 | obj->last_client = TMEM_CLI_ID_NULL; |
626 | 0 | return obj; |
627 | 0 | } |
628 | | |
629 | | /* Free an object after destroying any pgps in it. */ |
630 | | static void obj_destroy(struct tmem_object_root *obj) |
631 | 0 | { |
632 | 0 | ASSERT_WRITELOCK(&obj->pool->pool_rwlock); |
633 | 0 | radix_tree_destroy(&obj->tree_root, pgp_destroy); |
634 | 0 | obj_free(obj); |
635 | 0 | } |
636 | | |
637 | | /* Destroys all objs in a pool, or only if obj->last_client matches cli_id. */ |
638 | | static void pool_destroy_objs(struct tmem_pool *pool, domid_t cli_id) |
639 | 0 | { |
640 | 0 | struct rb_node *node; |
641 | 0 | struct tmem_object_root *obj; |
642 | 0 | int i; |
643 | 0 |
|
644 | 0 | write_lock(&pool->pool_rwlock); |
645 | 0 | pool->is_dying = 1; |
646 | 0 | for (i = 0; i < OBJ_HASH_BUCKETS; i++) |
647 | 0 | { |
648 | 0 | node = rb_first(&pool->obj_rb_root[i]); |
649 | 0 | while ( node != NULL ) |
650 | 0 | { |
651 | 0 | obj = container_of(node, struct tmem_object_root, rb_tree_node); |
652 | 0 | spin_lock(&obj->obj_spinlock); |
653 | 0 | node = rb_next(node); |
654 | 0 | if ( obj->last_client == cli_id ) |
655 | 0 | obj_destroy(obj); |
656 | 0 | else |
657 | 0 | spin_unlock(&obj->obj_spinlock); |
658 | 0 | } |
659 | 0 | } |
660 | 0 | write_unlock(&pool->pool_rwlock); |
661 | 0 | } |
662 | | |
663 | | |
664 | | /************ POOL MANIPULATION ROUTINES ******************************/ |
665 | | |
666 | | static struct tmem_pool * pool_alloc(void) |
667 | 0 | { |
668 | 0 | struct tmem_pool *pool; |
669 | 0 | int i; |
670 | 0 |
|
671 | 0 | if ( (pool = xzalloc(struct tmem_pool)) == NULL ) |
672 | 0 | return NULL; |
673 | 0 | for (i = 0; i < OBJ_HASH_BUCKETS; i++) |
674 | 0 | pool->obj_rb_root[i] = RB_ROOT; |
675 | 0 | INIT_LIST_HEAD(&pool->persistent_page_list); |
676 | 0 | rwlock_init(&pool->pool_rwlock); |
677 | 0 | return pool; |
678 | 0 | } |
679 | | |
680 | | static void pool_free(struct tmem_pool *pool) |
681 | 0 | { |
682 | 0 | pool->client = NULL; |
683 | 0 | xfree(pool); |
684 | 0 | } |
685 | | |
686 | | /* |
687 | | * Register new_client as a user of this shared pool and return 0 on succ. |
688 | | */ |
689 | | static int shared_pool_join(struct tmem_pool *pool, struct client *new_client) |
690 | 0 | { |
691 | 0 | struct share_list *sl; |
692 | 0 | ASSERT(is_shared(pool)); |
693 | 0 |
|
694 | 0 | if ( (sl = tmem_malloc(sizeof(struct share_list), NULL)) == NULL ) |
695 | 0 | return -1; |
696 | 0 | sl->client = new_client; |
697 | 0 | list_add_tail(&sl->share_list, &pool->share_list); |
698 | 0 | if ( new_client->cli_id != pool->client->cli_id ) |
699 | 0 | tmem_client_info("adding new %s %d to shared pool owned by %s %d\n", |
700 | 0 | tmem_client_str, new_client->cli_id, tmem_client_str, |
701 | 0 | pool->client->cli_id); |
702 | 0 | else if ( pool->shared_count ) |
703 | 0 | tmem_client_info("inter-guest sharing of shared pool %s by client %d\n", |
704 | 0 | tmem_client_str, pool->client->cli_id); |
705 | 0 | ++pool->shared_count; |
706 | 0 | return 0; |
707 | 0 | } |
708 | | |
709 | | /* Reassign "ownership" of the pool to another client that shares this pool. */ |
710 | | static void shared_pool_reassign(struct tmem_pool *pool) |
711 | 0 | { |
712 | 0 | struct share_list *sl; |
713 | 0 | int poolid; |
714 | 0 | struct client *old_client = pool->client, *new_client; |
715 | 0 |
|
716 | 0 | ASSERT(is_shared(pool)); |
717 | 0 | if ( list_empty(&pool->share_list) ) |
718 | 0 | { |
719 | 0 | ASSERT(pool->shared_count == 0); |
720 | 0 | return; |
721 | 0 | } |
722 | 0 | old_client->pools[pool->pool_id] = NULL; |
723 | 0 | sl = list_entry(pool->share_list.next, struct share_list, share_list); |
724 | 0 | /* |
725 | 0 | * The sl->client can be old_client if there are multiple shared pools |
726 | 0 | * within an guest. |
727 | 0 | */ |
728 | 0 | pool->client = new_client = sl->client; |
729 | 0 | for (poolid = 0; poolid < MAX_POOLS_PER_DOMAIN; poolid++) |
730 | 0 | if (new_client->pools[poolid] == pool) |
731 | 0 | break; |
732 | 0 | ASSERT(poolid != MAX_POOLS_PER_DOMAIN); |
733 | 0 | new_client->eph_count += _atomic_read(pool->pgp_count); |
734 | 0 | old_client->eph_count -= _atomic_read(pool->pgp_count); |
735 | 0 | list_splice_init(&old_client->ephemeral_page_list, |
736 | 0 | &new_client->ephemeral_page_list); |
737 | 0 | tmem_client_info("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n", |
738 | 0 | tmem_cli_id_str, old_client->cli_id, tmem_cli_id_str, new_client->cli_id, poolid); |
739 | 0 | pool->pool_id = poolid; |
740 | 0 | } |
741 | | |
742 | | /* |
743 | | * Destroy all objects with last_client same as passed cli_id, |
744 | | * remove pool's cli_id from list of sharers of this pool. |
745 | | */ |
746 | | static int shared_pool_quit(struct tmem_pool *pool, domid_t cli_id) |
747 | 0 | { |
748 | 0 | struct share_list *sl; |
749 | 0 | int s_poolid; |
750 | 0 |
|
751 | 0 | ASSERT(is_shared(pool)); |
752 | 0 | ASSERT(pool->client != NULL); |
753 | 0 |
|
754 | 0 | ASSERT_WRITELOCK(&tmem_rwlock); |
755 | 0 | pool_destroy_objs(pool, cli_id); |
756 | 0 | list_for_each_entry(sl,&pool->share_list, share_list) |
757 | 0 | { |
758 | 0 | if (sl->client->cli_id != cli_id) |
759 | 0 | continue; |
760 | 0 | list_del(&sl->share_list); |
761 | 0 | tmem_free(sl, pool); |
762 | 0 | --pool->shared_count; |
763 | 0 | if (pool->client->cli_id == cli_id) |
764 | 0 | shared_pool_reassign(pool); |
765 | 0 | if (pool->shared_count) |
766 | 0 | return pool->shared_count; |
767 | 0 | for (s_poolid = 0; s_poolid < MAX_GLOBAL_SHARED_POOLS; s_poolid++) |
768 | 0 | if ( (tmem_global.shared_pools[s_poolid]) == pool ) |
769 | 0 | { |
770 | 0 | tmem_global.shared_pools[s_poolid] = NULL; |
771 | 0 | break; |
772 | 0 | } |
773 | 0 | return 0; |
774 | 0 | } |
775 | 0 | tmem_client_warn("tmem: no match unsharing pool, %s=%d\n", |
776 | 0 | tmem_cli_id_str,pool->client->cli_id); |
777 | 0 | return -1; |
778 | 0 | } |
779 | | |
780 | | /* Flush all data (owned by cli_id) from a pool and, optionally, free it. */ |
781 | | static void pool_flush(struct tmem_pool *pool, domid_t cli_id) |
782 | 0 | { |
783 | 0 | ASSERT(pool != NULL); |
784 | 0 | if ( (is_shared(pool)) && (shared_pool_quit(pool,cli_id) > 0) ) |
785 | 0 | { |
786 | 0 | tmem_client_warn("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n", |
787 | 0 | tmem_cli_id_str, cli_id, pool->pool_id, tmem_cli_id_str,pool->client->cli_id); |
788 | 0 | return; |
789 | 0 | } |
790 | 0 | tmem_client_info("Destroying %s-%s tmem pool %s=%d pool_id=%d\n", |
791 | 0 | is_persistent(pool) ? "persistent" : "ephemeral" , |
792 | 0 | is_shared(pool) ? "shared" : "private", |
793 | 0 | tmem_cli_id_str, pool->client->cli_id, pool->pool_id); |
794 | 0 | if ( pool->client->info.flags.u.migrating ) |
795 | 0 | { |
796 | 0 | tmem_client_warn("can't destroy pool while %s is live-migrating\n", |
797 | 0 | tmem_client_str); |
798 | 0 | return; |
799 | 0 | } |
800 | 0 | pool_destroy_objs(pool, TMEM_CLI_ID_NULL); |
801 | 0 | pool->client->pools[pool->pool_id] = NULL; |
802 | 0 | pool_free(pool); |
803 | 0 | } |
804 | | |
805 | | /************ CLIENT MANIPULATION OPERATIONS **************************/ |
806 | | |
807 | | struct client *client_create(domid_t cli_id) |
808 | 0 | { |
809 | 0 | struct client *client = xzalloc(struct client); |
810 | 0 | int i, shift; |
811 | 0 | char name[5]; |
812 | 0 | struct domain *d; |
813 | 0 |
|
814 | 0 | tmem_client_info("tmem: initializing tmem capability for %s=%d...", |
815 | 0 | tmem_cli_id_str, cli_id); |
816 | 0 | if ( client == NULL ) |
817 | 0 | { |
818 | 0 | tmem_client_err("failed... out of memory\n"); |
819 | 0 | goto fail; |
820 | 0 | } |
821 | 0 |
|
822 | 0 | for (i = 0, shift = 12; i < 4; shift -=4, i++) |
823 | 0 | name[i] = (((unsigned short)cli_id >> shift) & 0xf) + '0'; |
824 | 0 | name[4] = '\0'; |
825 | 0 | client->persistent_pool = xmem_pool_create(name, tmem_persistent_pool_page_get, |
826 | 0 | tmem_persistent_pool_page_put, PAGE_SIZE, 0, PAGE_SIZE); |
827 | 0 | if ( client->persistent_pool == NULL ) |
828 | 0 | { |
829 | 0 | tmem_client_err("failed... can't alloc persistent pool\n"); |
830 | 0 | goto fail; |
831 | 0 | } |
832 | 0 |
|
833 | 0 | d = rcu_lock_domain_by_id(cli_id); |
834 | 0 | if ( d == NULL ) { |
835 | 0 | tmem_client_err("failed... can't set client\n"); |
836 | 0 | xmem_pool_destroy(client->persistent_pool); |
837 | 0 | goto fail; |
838 | 0 | } |
839 | 0 | if ( !d->is_dying ) { |
840 | 0 | d->tmem_client = client; |
841 | 0 | client->domain = d; |
842 | 0 | } |
843 | 0 | rcu_unlock_domain(d); |
844 | 0 |
|
845 | 0 | client->cli_id = cli_id; |
846 | 0 | client->info.version = TMEM_SPEC_VERSION; |
847 | 0 | client->info.maxpools = MAX_POOLS_PER_DOMAIN; |
848 | 0 | client->info.flags.u.compress = tmem_compression_enabled(); |
849 | 0 | for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) |
850 | 0 | client->shared_auth_uuid[i][0] = |
851 | 0 | client->shared_auth_uuid[i][1] = -1L; |
852 | 0 | list_add_tail(&client->client_list, &tmem_global.client_list); |
853 | 0 | INIT_LIST_HEAD(&client->ephemeral_page_list); |
854 | 0 | INIT_LIST_HEAD(&client->persistent_invalidated_list); |
855 | 0 | tmem_client_info("ok\n"); |
856 | 0 | return client; |
857 | 0 |
|
858 | 0 | fail: |
859 | 0 | xfree(client); |
860 | 0 | return NULL; |
861 | 0 | } |
862 | | |
863 | | static void client_free(struct client *client) |
864 | 0 | { |
865 | 0 | list_del(&client->client_list); |
866 | 0 | xmem_pool_destroy(client->persistent_pool); |
867 | 0 | xfree(client); |
868 | 0 | } |
869 | | |
870 | | /* Flush all data from a client and, optionally, free it. */ |
871 | | static void client_flush(struct client *client) |
872 | 0 | { |
873 | 0 | int i; |
874 | 0 | struct tmem_pool *pool; |
875 | 0 |
|
876 | 0 | for (i = 0; i < MAX_POOLS_PER_DOMAIN; i++) |
877 | 0 | { |
878 | 0 | if ( (pool = client->pools[i]) == NULL ) |
879 | 0 | continue; |
880 | 0 | pool_flush(pool, client->cli_id); |
881 | 0 | client->pools[i] = NULL; |
882 | 0 | client->info.nr_pools--; |
883 | 0 | } |
884 | 0 | client_free(client); |
885 | 0 | } |
886 | | |
887 | | static bool client_over_quota(const struct client *client) |
888 | 0 | { |
889 | 0 | int total = _atomic_read(tmem_global.client_weight_total); |
890 | 0 |
|
891 | 0 | ASSERT(client != NULL); |
892 | 0 | if ( (total == 0) || (client->info.weight == 0) || |
893 | 0 | (client->eph_count == 0) ) |
894 | 0 | return false; |
895 | 0 |
|
896 | 0 | return (((tmem_global.eph_count * 100L) / client->eph_count) > |
897 | 0 | ((total * 100L) / client->info.weight)); |
898 | 0 | } |
899 | | |
900 | | /************ MEMORY REVOCATION ROUTINES *******************************/ |
901 | | |
902 | | static bool tmem_try_to_evict_pgp(struct tmem_page_descriptor *pgp, |
903 | | bool *hold_pool_rwlock) |
904 | 0 | { |
905 | 0 | struct tmem_object_root *obj = pgp->us.obj; |
906 | 0 | struct tmem_pool *pool = obj->pool; |
907 | 0 |
|
908 | 0 | if ( pool->is_dying ) |
909 | 0 | return false; |
910 | 0 | if ( spin_trylock(&obj->obj_spinlock) ) |
911 | 0 | { |
912 | 0 | if ( obj->pgp_count > 1 ) |
913 | 0 | return true; |
914 | 0 | if ( write_trylock(&pool->pool_rwlock) ) |
915 | 0 | { |
916 | 0 | *hold_pool_rwlock = 1; |
917 | 0 | return true; |
918 | 0 | } |
919 | 0 | spin_unlock(&obj->obj_spinlock); |
920 | 0 | } |
921 | 0 | return false; |
922 | 0 | } |
923 | | |
924 | | int tmem_evict(void) |
925 | 0 | { |
926 | 0 | struct client *client = current->domain->tmem_client; |
927 | 0 | struct tmem_page_descriptor *pgp = NULL, *pgp_del; |
928 | 0 | struct tmem_object_root *obj; |
929 | 0 | struct tmem_pool *pool; |
930 | 0 | int ret = 0; |
931 | 0 | bool hold_pool_rwlock = false; |
932 | 0 |
|
933 | 0 | tmem_stats.evict_attempts++; |
934 | 0 | spin_lock(&eph_lists_spinlock); |
935 | 0 | if ( (client != NULL) && client_over_quota(client) && |
936 | 0 | !list_empty(&client->ephemeral_page_list) ) |
937 | 0 | { |
938 | 0 | list_for_each_entry(pgp, &client->ephemeral_page_list, us.client_eph_pages) |
939 | 0 | if ( tmem_try_to_evict_pgp(pgp, &hold_pool_rwlock) ) |
940 | 0 | goto found; |
941 | 0 | } |
942 | 0 | else if ( !list_empty(&tmem_global.ephemeral_page_list) ) |
943 | 0 | { |
944 | 0 | list_for_each_entry(pgp, &tmem_global.ephemeral_page_list, global_eph_pages) |
945 | 0 | if ( tmem_try_to_evict_pgp(pgp, &hold_pool_rwlock) ) |
946 | 0 | { |
947 | 0 | client = pgp->us.obj->pool->client; |
948 | 0 | goto found; |
949 | 0 | } |
950 | 0 | } |
951 | 0 | /* Global_ephemeral_page_list is empty, so we bail out. */ |
952 | 0 | spin_unlock(&eph_lists_spinlock); |
953 | 0 | goto out; |
954 | 0 |
|
955 | 0 | found: |
956 | 0 | /* Delist. */ |
957 | 0 | list_del_init(&pgp->us.client_eph_pages); |
958 | 0 | client->eph_count--; |
959 | 0 | list_del_init(&pgp->global_eph_pages); |
960 | 0 | tmem_global.eph_count--; |
961 | 0 | ASSERT(tmem_global.eph_count >= 0); |
962 | 0 | ASSERT(client->eph_count >= 0); |
963 | 0 | spin_unlock(&eph_lists_spinlock); |
964 | 0 |
|
965 | 0 | ASSERT(pgp != NULL); |
966 | 0 | obj = pgp->us.obj; |
967 | 0 | ASSERT(obj != NULL); |
968 | 0 | ASSERT(obj->pool != NULL); |
969 | 0 | pool = obj->pool; |
970 | 0 |
|
971 | 0 | ASSERT_SPINLOCK(&obj->obj_spinlock); |
972 | 0 | pgp_del = pgp_delete_from_obj(obj, pgp->index); |
973 | 0 | ASSERT(pgp_del == pgp); |
974 | 0 |
|
975 | 0 | /* pgp already delist, so call pgp_free directly. */ |
976 | 0 | pgp_free(pgp); |
977 | 0 | if ( obj->pgp_count == 0 ) |
978 | 0 | { |
979 | 0 | ASSERT_WRITELOCK(&pool->pool_rwlock); |
980 | 0 | obj_free(obj); |
981 | 0 | } |
982 | 0 | else |
983 | 0 | spin_unlock(&obj->obj_spinlock); |
984 | 0 | if ( hold_pool_rwlock ) |
985 | 0 | write_unlock(&pool->pool_rwlock); |
986 | 0 | tmem_stats.evicted_pgs++; |
987 | 0 | ret = 1; |
988 | 0 | out: |
989 | 0 | return ret; |
990 | 0 | } |
991 | | |
992 | | |
993 | | /* |
994 | | * Under certain conditions (e.g. if each client is putting pages for exactly |
995 | | * one object), once locks are held, freeing up memory may |
996 | | * result in livelocks and very long "put" times, so we try to ensure there |
997 | | * is a minimum amount of memory (1MB) available BEFORE any data structure |
998 | | * locks are held. |
999 | | */ |
1000 | | static inline bool tmem_ensure_avail_pages(void) |
1001 | 0 | { |
1002 | 0 | int failed_evict = 10; |
1003 | 0 | unsigned long free_mem; |
1004 | 0 |
|
1005 | 0 | do { |
1006 | 0 | free_mem = (tmem_page_list_pages + total_free_pages()) |
1007 | 0 | >> (20 - PAGE_SHIFT); |
1008 | 0 | if ( free_mem ) |
1009 | 0 | return true; |
1010 | 0 | if ( !tmem_evict() ) |
1011 | 0 | failed_evict--; |
1012 | 0 | } while ( failed_evict > 0 ); |
1013 | 0 |
|
1014 | 0 | return false; |
1015 | 0 | } |
1016 | | |
1017 | | /************ TMEM CORE OPERATIONS ************************************/ |
1018 | | |
1019 | | static int do_tmem_put_compress(struct tmem_page_descriptor *pgp, xen_pfn_t cmfn, |
1020 | | tmem_cli_va_param_t clibuf) |
1021 | 0 | { |
1022 | 0 | void *dst, *p; |
1023 | 0 | size_t size; |
1024 | 0 | int ret = 0; |
1025 | 0 |
|
1026 | 0 | ASSERT(pgp != NULL); |
1027 | 0 | ASSERT(pgp->us.obj != NULL); |
1028 | 0 | ASSERT_SPINLOCK(&pgp->us.obj->obj_spinlock); |
1029 | 0 | ASSERT(pgp->us.obj->pool != NULL); |
1030 | 0 | ASSERT(pgp->us.obj->pool->client != NULL); |
1031 | 0 |
|
1032 | 0 | if ( pgp->pfp != NULL ) |
1033 | 0 | pgp_free_data(pgp, pgp->us.obj->pool); |
1034 | 0 | ret = tmem_compress_from_client(cmfn, &dst, &size, clibuf); |
1035 | 0 | if ( ret <= 0 ) |
1036 | 0 | goto out; |
1037 | 0 | else if ( (size == 0) || (size >= tmem_mempool_maxalloc) ) { |
1038 | 0 | ret = 0; |
1039 | 0 | goto out; |
1040 | 0 | } else if ( (p = tmem_malloc(size,pgp->us.obj->pool)) == NULL ) { |
1041 | 0 | ret = -ENOMEM; |
1042 | 0 | goto out; |
1043 | 0 | } else { |
1044 | 0 | memcpy(p,dst,size); |
1045 | 0 | pgp->cdata = p; |
1046 | 0 | } |
1047 | 0 | pgp->size = size; |
1048 | 0 | pgp->us.obj->pool->client->compressed_pages++; |
1049 | 0 | pgp->us.obj->pool->client->compressed_sum_size += size; |
1050 | 0 | ret = 1; |
1051 | 0 |
|
1052 | 0 | out: |
1053 | 0 | return ret; |
1054 | 0 | } |
1055 | | |
1056 | | static int do_tmem_dup_put(struct tmem_page_descriptor *pgp, xen_pfn_t cmfn, |
1057 | | tmem_cli_va_param_t clibuf) |
1058 | 0 | { |
1059 | 0 | struct tmem_pool *pool; |
1060 | 0 | struct tmem_object_root *obj; |
1061 | 0 | struct client *client; |
1062 | 0 | struct tmem_page_descriptor *pgpfound = NULL; |
1063 | 0 | int ret; |
1064 | 0 |
|
1065 | 0 | ASSERT(pgp != NULL); |
1066 | 0 | ASSERT(pgp->pfp != NULL); |
1067 | 0 | ASSERT(pgp->size != -1); |
1068 | 0 | obj = pgp->us.obj; |
1069 | 0 | ASSERT_SPINLOCK(&obj->obj_spinlock); |
1070 | 0 | ASSERT(obj != NULL); |
1071 | 0 | pool = obj->pool; |
1072 | 0 | ASSERT(pool != NULL); |
1073 | 0 | client = pool->client; |
1074 | 0 | if ( client->info.flags.u.migrating ) |
1075 | 0 | goto failed_dup; /* No dups allowed when migrating. */ |
1076 | 0 | /* Can we successfully manipulate pgp to change out the data? */ |
1077 | 0 | if ( client->info.flags.u.compress && pgp->size != 0 ) |
1078 | 0 | { |
1079 | 0 | ret = do_tmem_put_compress(pgp, cmfn, clibuf); |
1080 | 0 | if ( ret == 1 ) |
1081 | 0 | goto done; |
1082 | 0 | else if ( ret == 0 ) |
1083 | 0 | goto copy_uncompressed; |
1084 | 0 | else if ( ret == -ENOMEM ) |
1085 | 0 | goto failed_dup; |
1086 | 0 | else if ( ret == -EFAULT ) |
1087 | 0 | goto bad_copy; |
1088 | 0 | } |
1089 | 0 |
|
1090 | 0 | copy_uncompressed: |
1091 | 0 | if ( pgp->pfp ) |
1092 | 0 | pgp_free_data(pgp, pool); |
1093 | 0 | if ( ( pgp->pfp = tmem_alloc_page(pool) ) == NULL ) |
1094 | 0 | goto failed_dup; |
1095 | 0 | pgp->size = 0; |
1096 | 0 | ret = tmem_copy_from_client(pgp->pfp, cmfn, tmem_cli_buf_null); |
1097 | 0 | if ( ret < 0 ) |
1098 | 0 | goto bad_copy; |
1099 | 0 |
|
1100 | 0 | done: |
1101 | 0 | /* Successfully replaced data, clean up and return success. */ |
1102 | 0 | if ( is_shared(pool) ) |
1103 | 0 | obj->last_client = client->cli_id; |
1104 | 0 | spin_unlock(&obj->obj_spinlock); |
1105 | 0 | pool->dup_puts_replaced++; |
1106 | 0 | pool->good_puts++; |
1107 | 0 | if ( is_persistent(pool) ) |
1108 | 0 | client->succ_pers_puts++; |
1109 | 0 | return 1; |
1110 | 0 |
|
1111 | 0 | bad_copy: |
1112 | 0 | tmem_stats.failed_copies++; |
1113 | 0 | goto cleanup; |
1114 | 0 |
|
1115 | 0 | failed_dup: |
1116 | 0 | /* |
1117 | 0 | * Couldn't change out the data, flush the old data and return |
1118 | 0 | * -ENOSPC instead of -ENOMEM to differentiate failed _dup_ put. |
1119 | 0 | */ |
1120 | 0 | ret = -ENOSPC; |
1121 | 0 | cleanup: |
1122 | 0 | pgpfound = pgp_delete_from_obj(obj, pgp->index); |
1123 | 0 | ASSERT(pgpfound == pgp); |
1124 | 0 | pgp_delist_free(pgpfound); |
1125 | 0 | if ( obj->pgp_count == 0 ) |
1126 | 0 | { |
1127 | 0 | write_lock(&pool->pool_rwlock); |
1128 | 0 | obj_free(obj); |
1129 | 0 | write_unlock(&pool->pool_rwlock); |
1130 | 0 | } else { |
1131 | 0 | spin_unlock(&obj->obj_spinlock); |
1132 | 0 | } |
1133 | 0 | pool->dup_puts_flushed++; |
1134 | 0 | return ret; |
1135 | 0 | } |
1136 | | |
1137 | | static int do_tmem_put(struct tmem_pool *pool, |
1138 | | struct xen_tmem_oid *oidp, uint32_t index, |
1139 | | xen_pfn_t cmfn, tmem_cli_va_param_t clibuf) |
1140 | 0 | { |
1141 | 0 | struct tmem_object_root *obj = NULL; |
1142 | 0 | struct tmem_page_descriptor *pgp = NULL; |
1143 | 0 | struct client *client; |
1144 | 0 | int ret, newobj = 0; |
1145 | 0 |
|
1146 | 0 | ASSERT(pool != NULL); |
1147 | 0 | client = pool->client; |
1148 | 0 | ASSERT(client != NULL); |
1149 | 0 | ret = client->info.flags.u.frozen ? -EFROZEN : -ENOMEM; |
1150 | 0 | pool->puts++; |
1151 | 0 |
|
1152 | 0 | refind: |
1153 | 0 | /* Does page already exist (dup)? if so, handle specially. */ |
1154 | 0 | if ( (obj = obj_find(pool, oidp)) != NULL ) |
1155 | 0 | { |
1156 | 0 | if ((pgp = pgp_lookup_in_obj(obj, index)) != NULL) |
1157 | 0 | { |
1158 | 0 | return do_tmem_dup_put(pgp, cmfn, clibuf); |
1159 | 0 | } |
1160 | 0 | else |
1161 | 0 | { |
1162 | 0 | /* No puts allowed into a frozen pool (except dup puts). */ |
1163 | 0 | if ( client->info.flags.u.frozen ) |
1164 | 0 | goto unlock_obj; |
1165 | 0 | } |
1166 | 0 | } |
1167 | 0 | else |
1168 | 0 | { |
1169 | 0 | /* No puts allowed into a frozen pool (except dup puts). */ |
1170 | 0 | if ( client->info.flags.u.frozen ) |
1171 | 0 | return ret; |
1172 | 0 | if ( (obj = obj_alloc(pool, oidp)) == NULL ) |
1173 | 0 | return -ENOMEM; |
1174 | 0 |
|
1175 | 0 | write_lock(&pool->pool_rwlock); |
1176 | 0 | /* |
1177 | 0 | * Parallel callers may already allocated obj and inserted to obj_rb_root |
1178 | 0 | * before us. |
1179 | 0 | */ |
1180 | 0 | if ( !obj_rb_insert(&pool->obj_rb_root[oid_hash(oidp)], obj) ) |
1181 | 0 | { |
1182 | 0 | tmem_free(obj, pool); |
1183 | 0 | write_unlock(&pool->pool_rwlock); |
1184 | 0 | goto refind; |
1185 | 0 | } |
1186 | 0 |
|
1187 | 0 | spin_lock(&obj->obj_spinlock); |
1188 | 0 | newobj = 1; |
1189 | 0 | write_unlock(&pool->pool_rwlock); |
1190 | 0 | } |
1191 | 0 |
|
1192 | 0 | /* When arrive here, we have a spinlocked obj for use. */ |
1193 | 0 | ASSERT_SPINLOCK(&obj->obj_spinlock); |
1194 | 0 | if ( (pgp = pgp_alloc(obj)) == NULL ) |
1195 | 0 | goto unlock_obj; |
1196 | 0 |
|
1197 | 0 | ret = pgp_add_to_obj(obj, index, pgp); |
1198 | 0 | if ( ret == -ENOMEM ) |
1199 | 0 | /* Warning: may result in partially built radix tree ("stump"). */ |
1200 | 0 | goto free_pgp; |
1201 | 0 |
|
1202 | 0 | pgp->index = index; |
1203 | 0 | pgp->size = 0; |
1204 | 0 |
|
1205 | 0 | if ( client->info.flags.u.compress ) |
1206 | 0 | { |
1207 | 0 | ASSERT(pgp->pfp == NULL); |
1208 | 0 | ret = do_tmem_put_compress(pgp, cmfn, clibuf); |
1209 | 0 | if ( ret == 1 ) |
1210 | 0 | goto insert_page; |
1211 | 0 | if ( ret == -ENOMEM ) |
1212 | 0 | { |
1213 | 0 | client->compress_nomem++; |
1214 | 0 | goto del_pgp_from_obj; |
1215 | 0 | } |
1216 | 0 | if ( ret == 0 ) |
1217 | 0 | { |
1218 | 0 | client->compress_poor++; |
1219 | 0 | goto copy_uncompressed; |
1220 | 0 | } |
1221 | 0 | if ( ret == -EFAULT ) |
1222 | 0 | goto bad_copy; |
1223 | 0 | } |
1224 | 0 |
|
1225 | 0 | copy_uncompressed: |
1226 | 0 | if ( ( pgp->pfp = tmem_alloc_page(pool) ) == NULL ) |
1227 | 0 | { |
1228 | 0 | ret = -ENOMEM; |
1229 | 0 | goto del_pgp_from_obj; |
1230 | 0 | } |
1231 | 0 | ret = tmem_copy_from_client(pgp->pfp, cmfn, clibuf); |
1232 | 0 | if ( ret < 0 ) |
1233 | 0 | goto bad_copy; |
1234 | 0 |
|
1235 | 0 | insert_page: |
1236 | 0 | if ( !is_persistent(pool) ) |
1237 | 0 | { |
1238 | 0 | spin_lock(&eph_lists_spinlock); |
1239 | 0 | list_add_tail(&pgp->global_eph_pages, &tmem_global.ephemeral_page_list); |
1240 | 0 | if (++tmem_global.eph_count > tmem_stats.global_eph_count_max) |
1241 | 0 | tmem_stats.global_eph_count_max = tmem_global.eph_count; |
1242 | 0 | list_add_tail(&pgp->us.client_eph_pages, |
1243 | 0 | &client->ephemeral_page_list); |
1244 | 0 | if (++client->eph_count > client->eph_count_max) |
1245 | 0 | client->eph_count_max = client->eph_count; |
1246 | 0 | spin_unlock(&eph_lists_spinlock); |
1247 | 0 | } |
1248 | 0 | else |
1249 | 0 | { /* is_persistent. */ |
1250 | 0 | spin_lock(&pers_lists_spinlock); |
1251 | 0 | list_add_tail(&pgp->us.pool_pers_pages, |
1252 | 0 | &pool->persistent_page_list); |
1253 | 0 | spin_unlock(&pers_lists_spinlock); |
1254 | 0 | } |
1255 | 0 |
|
1256 | 0 | if ( is_shared(pool) ) |
1257 | 0 | obj->last_client = client->cli_id; |
1258 | 0 |
|
1259 | 0 | /* Free the obj spinlock. */ |
1260 | 0 | spin_unlock(&obj->obj_spinlock); |
1261 | 0 | pool->good_puts++; |
1262 | 0 |
|
1263 | 0 | if ( is_persistent(pool) ) |
1264 | 0 | client->succ_pers_puts++; |
1265 | 0 | else |
1266 | 0 | tmem_stats.tot_good_eph_puts++; |
1267 | 0 | return 1; |
1268 | 0 |
|
1269 | 0 | bad_copy: |
1270 | 0 | tmem_stats.failed_copies++; |
1271 | 0 |
|
1272 | 0 | del_pgp_from_obj: |
1273 | 0 | ASSERT((obj != NULL) && (pgp != NULL) && (pgp->index != -1)); |
1274 | 0 | pgp_delete_from_obj(obj, pgp->index); |
1275 | 0 |
|
1276 | 0 | free_pgp: |
1277 | 0 | pgp_free(pgp); |
1278 | 0 | unlock_obj: |
1279 | 0 | if ( newobj ) |
1280 | 0 | { |
1281 | 0 | write_lock(&pool->pool_rwlock); |
1282 | 0 | obj_free(obj); |
1283 | 0 | write_unlock(&pool->pool_rwlock); |
1284 | 0 | } |
1285 | 0 | else |
1286 | 0 | { |
1287 | 0 | spin_unlock(&obj->obj_spinlock); |
1288 | 0 | } |
1289 | 0 | pool->no_mem_puts++; |
1290 | 0 | return ret; |
1291 | 0 | } |
1292 | | |
1293 | | static int do_tmem_get(struct tmem_pool *pool, |
1294 | | struct xen_tmem_oid *oidp, uint32_t index, |
1295 | | xen_pfn_t cmfn, tmem_cli_va_param_t clibuf) |
1296 | 0 | { |
1297 | 0 | struct tmem_object_root *obj; |
1298 | 0 | struct tmem_page_descriptor *pgp; |
1299 | 0 | struct client *client = pool->client; |
1300 | 0 | int rc; |
1301 | 0 |
|
1302 | 0 | if ( !_atomic_read(pool->pgp_count) ) |
1303 | 0 | return -EEMPTY; |
1304 | 0 |
|
1305 | 0 | pool->gets++; |
1306 | 0 | obj = obj_find(pool,oidp); |
1307 | 0 | if ( obj == NULL ) |
1308 | 0 | return 0; |
1309 | 0 |
|
1310 | 0 | ASSERT_SPINLOCK(&obj->obj_spinlock); |
1311 | 0 | if (is_shared(pool) || is_persistent(pool) ) |
1312 | 0 | pgp = pgp_lookup_in_obj(obj, index); |
1313 | 0 | else |
1314 | 0 | pgp = pgp_delete_from_obj(obj, index); |
1315 | 0 | if ( pgp == NULL ) |
1316 | 0 | { |
1317 | 0 | spin_unlock(&obj->obj_spinlock); |
1318 | 0 | return 0; |
1319 | 0 | } |
1320 | 0 | ASSERT(pgp->size != -1); |
1321 | 0 | if ( pgp->size != 0 ) |
1322 | 0 | { |
1323 | 0 | rc = tmem_decompress_to_client(cmfn, pgp->cdata, pgp->size, clibuf); |
1324 | 0 | } |
1325 | 0 | else |
1326 | 0 | rc = tmem_copy_to_client(cmfn, pgp->pfp, clibuf); |
1327 | 0 | if ( rc <= 0 ) |
1328 | 0 | goto bad_copy; |
1329 | 0 |
|
1330 | 0 | if ( !is_persistent(pool) ) |
1331 | 0 | { |
1332 | 0 | if ( !is_shared(pool) ) |
1333 | 0 | { |
1334 | 0 | pgp_delist_free(pgp); |
1335 | 0 | if ( obj->pgp_count == 0 ) |
1336 | 0 | { |
1337 | 0 | write_lock(&pool->pool_rwlock); |
1338 | 0 | obj_free(obj); |
1339 | 0 | obj = NULL; |
1340 | 0 | write_unlock(&pool->pool_rwlock); |
1341 | 0 | } |
1342 | 0 | } else { |
1343 | 0 | spin_lock(&eph_lists_spinlock); |
1344 | 0 | list_del(&pgp->global_eph_pages); |
1345 | 0 | list_add_tail(&pgp->global_eph_pages,&tmem_global.ephemeral_page_list); |
1346 | 0 | list_del(&pgp->us.client_eph_pages); |
1347 | 0 | list_add_tail(&pgp->us.client_eph_pages,&client->ephemeral_page_list); |
1348 | 0 | spin_unlock(&eph_lists_spinlock); |
1349 | 0 | obj->last_client = current->domain->domain_id; |
1350 | 0 | } |
1351 | 0 | } |
1352 | 0 | if ( obj != NULL ) |
1353 | 0 | { |
1354 | 0 | spin_unlock(&obj->obj_spinlock); |
1355 | 0 | } |
1356 | 0 | pool->found_gets++; |
1357 | 0 | if ( is_persistent(pool) ) |
1358 | 0 | client->succ_pers_gets++; |
1359 | 0 | else |
1360 | 0 | client->succ_eph_gets++; |
1361 | 0 | return 1; |
1362 | 0 |
|
1363 | 0 | bad_copy: |
1364 | 0 | spin_unlock(&obj->obj_spinlock); |
1365 | 0 | tmem_stats.failed_copies++; |
1366 | 0 | return rc; |
1367 | 0 | } |
1368 | | |
1369 | | static int do_tmem_flush_page(struct tmem_pool *pool, |
1370 | | struct xen_tmem_oid *oidp, uint32_t index) |
1371 | 0 | { |
1372 | 0 | struct tmem_object_root *obj; |
1373 | 0 | struct tmem_page_descriptor *pgp; |
1374 | 0 |
|
1375 | 0 | pool->flushs++; |
1376 | 0 | obj = obj_find(pool,oidp); |
1377 | 0 | if ( obj == NULL ) |
1378 | 0 | goto out; |
1379 | 0 | pgp = pgp_delete_from_obj(obj, index); |
1380 | 0 | if ( pgp == NULL ) |
1381 | 0 | { |
1382 | 0 | spin_unlock(&obj->obj_spinlock); |
1383 | 0 | goto out; |
1384 | 0 | } |
1385 | 0 | pgp_delist_free(pgp); |
1386 | 0 | if ( obj->pgp_count == 0 ) |
1387 | 0 | { |
1388 | 0 | write_lock(&pool->pool_rwlock); |
1389 | 0 | obj_free(obj); |
1390 | 0 | write_unlock(&pool->pool_rwlock); |
1391 | 0 | } else { |
1392 | 0 | spin_unlock(&obj->obj_spinlock); |
1393 | 0 | } |
1394 | 0 | pool->flushs_found++; |
1395 | 0 |
|
1396 | 0 | out: |
1397 | 0 | if ( pool->client->info.flags.u.frozen ) |
1398 | 0 | return -EFROZEN; |
1399 | 0 | else |
1400 | 0 | return 1; |
1401 | 0 | } |
1402 | | |
1403 | | static int do_tmem_flush_object(struct tmem_pool *pool, |
1404 | | struct xen_tmem_oid *oidp) |
1405 | 0 | { |
1406 | 0 | struct tmem_object_root *obj; |
1407 | 0 |
|
1408 | 0 | pool->flush_objs++; |
1409 | 0 | obj = obj_find(pool,oidp); |
1410 | 0 | if ( obj == NULL ) |
1411 | 0 | goto out; |
1412 | 0 | write_lock(&pool->pool_rwlock); |
1413 | 0 | obj_destroy(obj); |
1414 | 0 | pool->flush_objs_found++; |
1415 | 0 | write_unlock(&pool->pool_rwlock); |
1416 | 0 |
|
1417 | 0 | out: |
1418 | 0 | if ( pool->client->info.flags.u.frozen ) |
1419 | 0 | return -EFROZEN; |
1420 | 0 | else |
1421 | 0 | return 1; |
1422 | 0 | } |
1423 | | |
1424 | | static int do_tmem_destroy_pool(uint32_t pool_id) |
1425 | 0 | { |
1426 | 0 | struct client *client = current->domain->tmem_client; |
1427 | 0 | struct tmem_pool *pool; |
1428 | 0 |
|
1429 | 0 | if ( pool_id >= MAX_POOLS_PER_DOMAIN ) |
1430 | 0 | return 0; |
1431 | 0 | if ( (pool = client->pools[pool_id]) == NULL ) |
1432 | 0 | return 0; |
1433 | 0 | client->pools[pool_id] = NULL; |
1434 | 0 | pool_flush(pool, client->cli_id); |
1435 | 0 | client->info.nr_pools--; |
1436 | 0 | return 1; |
1437 | 0 | } |
1438 | | |
1439 | | int do_tmem_new_pool(domid_t this_cli_id, |
1440 | | uint32_t d_poolid, uint32_t flags, |
1441 | | uint64_t uuid_lo, uint64_t uuid_hi) |
1442 | 0 | { |
1443 | 0 | struct client *client; |
1444 | 0 | domid_t cli_id; |
1445 | 0 | int persistent = flags & TMEM_POOL_PERSIST; |
1446 | 0 | int shared = flags & TMEM_POOL_SHARED; |
1447 | 0 | int pagebits = (flags >> TMEM_POOL_PAGESIZE_SHIFT) |
1448 | 0 | & TMEM_POOL_PAGESIZE_MASK; |
1449 | 0 | int specversion = (flags >> TMEM_POOL_VERSION_SHIFT) |
1450 | 0 | & TMEM_POOL_VERSION_MASK; |
1451 | 0 | struct tmem_pool *pool, *shpool; |
1452 | 0 | int i, first_unused_s_poolid; |
1453 | 0 |
|
1454 | 0 | if ( this_cli_id == TMEM_CLI_ID_NULL ) |
1455 | 0 | cli_id = current->domain->domain_id; |
1456 | 0 | else |
1457 | 0 | cli_id = this_cli_id; |
1458 | 0 | tmem_client_info("tmem: allocating %s-%s tmem pool for %s=%d...", |
1459 | 0 | persistent ? "persistent" : "ephemeral" , |
1460 | 0 | shared ? "shared" : "private", tmem_cli_id_str, cli_id); |
1461 | 0 | if ( specversion != TMEM_SPEC_VERSION ) |
1462 | 0 | { |
1463 | 0 | tmem_client_err("failed... unsupported spec version\n"); |
1464 | 0 | return -EPERM; |
1465 | 0 | } |
1466 | 0 | if ( shared && persistent ) |
1467 | 0 | { |
1468 | 0 | tmem_client_err("failed... unable to create a shared-persistant pool\n"); |
1469 | 0 | return -EPERM; |
1470 | 0 | } |
1471 | 0 | if ( pagebits != (PAGE_SHIFT - 12) ) |
1472 | 0 | { |
1473 | 0 | tmem_client_err("failed... unsupported pagesize %d\n", |
1474 | 0 | 1 << (pagebits + 12)); |
1475 | 0 | return -EPERM; |
1476 | 0 | } |
1477 | 0 | if ( flags & TMEM_POOL_PRECOMPRESSED ) |
1478 | 0 | { |
1479 | 0 | tmem_client_err("failed... precompression flag set but unsupported\n"); |
1480 | 0 | return -EPERM; |
1481 | 0 | } |
1482 | 0 | if ( flags & TMEM_POOL_RESERVED_BITS ) |
1483 | 0 | { |
1484 | 0 | tmem_client_err("failed... reserved bits must be zero\n"); |
1485 | 0 | return -EPERM; |
1486 | 0 | } |
1487 | 0 | if ( this_cli_id != TMEM_CLI_ID_NULL ) |
1488 | 0 | { |
1489 | 0 | if ( (client = tmem_client_from_cli_id(this_cli_id)) == NULL |
1490 | 0 | || d_poolid >= MAX_POOLS_PER_DOMAIN |
1491 | 0 | || client->pools[d_poolid] != NULL ) |
1492 | 0 | return -EPERM; |
1493 | 0 | } |
1494 | 0 | else |
1495 | 0 | { |
1496 | 0 | client = current->domain->tmem_client; |
1497 | 0 | ASSERT(client != NULL); |
1498 | 0 | for ( d_poolid = 0; d_poolid < MAX_POOLS_PER_DOMAIN; d_poolid++ ) |
1499 | 0 | if ( client->pools[d_poolid] == NULL ) |
1500 | 0 | break; |
1501 | 0 | if ( d_poolid >= MAX_POOLS_PER_DOMAIN ) |
1502 | 0 | { |
1503 | 0 | tmem_client_err("failed... no more pool slots available for this %s\n", |
1504 | 0 | tmem_client_str); |
1505 | 0 | return -EPERM; |
1506 | 0 | } |
1507 | 0 | } |
1508 | 0 |
|
1509 | 0 | if ( (pool = pool_alloc()) == NULL ) |
1510 | 0 | { |
1511 | 0 | tmem_client_err("failed... out of memory\n"); |
1512 | 0 | return -ENOMEM; |
1513 | 0 | } |
1514 | 0 | client->pools[d_poolid] = pool; |
1515 | 0 | pool->client = client; |
1516 | 0 | pool->pool_id = d_poolid; |
1517 | 0 | pool->shared = shared; |
1518 | 0 | pool->persistent = persistent; |
1519 | 0 | pool->uuid[0] = uuid_lo; |
1520 | 0 | pool->uuid[1] = uuid_hi; |
1521 | 0 |
|
1522 | 0 | /* |
1523 | 0 | * Already created a pool when arrived here, but need some special process |
1524 | 0 | * for shared pool. |
1525 | 0 | */ |
1526 | 0 | if ( shared ) |
1527 | 0 | { |
1528 | 0 | if ( uuid_lo == -1L && uuid_hi == -1L ) |
1529 | 0 | { |
1530 | 0 | tmem_client_info("Invalid uuid, create non shared pool instead!\n"); |
1531 | 0 | pool->shared = 0; |
1532 | 0 | goto out; |
1533 | 0 | } |
1534 | 0 | if ( !tmem_global.shared_auth ) |
1535 | 0 | { |
1536 | 0 | for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) |
1537 | 0 | if ( (client->shared_auth_uuid[i][0] == uuid_lo) && |
1538 | 0 | (client->shared_auth_uuid[i][1] == uuid_hi) ) |
1539 | 0 | break; |
1540 | 0 | if ( i == MAX_GLOBAL_SHARED_POOLS ) |
1541 | 0 | { |
1542 | 0 | tmem_client_info("Shared auth failed, create non shared pool instead!\n"); |
1543 | 0 | pool->shared = 0; |
1544 | 0 | goto out; |
1545 | 0 | } |
1546 | 0 | } |
1547 | 0 |
|
1548 | 0 | /* |
1549 | 0 | * Authorize okay, match a global shared pool or use the newly allocated |
1550 | 0 | * one. |
1551 | 0 | */ |
1552 | 0 | first_unused_s_poolid = MAX_GLOBAL_SHARED_POOLS; |
1553 | 0 | for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++ ) |
1554 | 0 | { |
1555 | 0 | if ( (shpool = tmem_global.shared_pools[i]) != NULL ) |
1556 | 0 | { |
1557 | 0 | if ( shpool->uuid[0] == uuid_lo && shpool->uuid[1] == uuid_hi ) |
1558 | 0 | { |
1559 | 0 | /* Succ to match a global shared pool. */ |
1560 | 0 | tmem_client_info("(matches shared pool uuid=%"PRIx64".%"PRIx64") pool_id=%d\n", |
1561 | 0 | uuid_hi, uuid_lo, d_poolid); |
1562 | 0 | client->pools[d_poolid] = shpool; |
1563 | 0 | if ( !shared_pool_join(shpool, client) ) |
1564 | 0 | { |
1565 | 0 | pool_free(pool); |
1566 | 0 | goto out; |
1567 | 0 | } |
1568 | 0 | else |
1569 | 0 | goto fail; |
1570 | 0 | } |
1571 | 0 | } |
1572 | 0 | else |
1573 | 0 | { |
1574 | 0 | if ( first_unused_s_poolid == MAX_GLOBAL_SHARED_POOLS ) |
1575 | 0 | first_unused_s_poolid = i; |
1576 | 0 | } |
1577 | 0 | } |
1578 | 0 |
|
1579 | 0 | /* Failed to find a global shared pool slot. */ |
1580 | 0 | if ( first_unused_s_poolid == MAX_GLOBAL_SHARED_POOLS ) |
1581 | 0 | { |
1582 | 0 | tmem_client_warn("tmem: failed... no global shared pool slots available\n"); |
1583 | 0 | goto fail; |
1584 | 0 | } |
1585 | 0 | /* Add pool to global shared pool. */ |
1586 | 0 | else |
1587 | 0 | { |
1588 | 0 | INIT_LIST_HEAD(&pool->share_list); |
1589 | 0 | pool->shared_count = 0; |
1590 | 0 | if ( shared_pool_join(pool, client) ) |
1591 | 0 | goto fail; |
1592 | 0 | tmem_global.shared_pools[first_unused_s_poolid] = pool; |
1593 | 0 | } |
1594 | 0 | } |
1595 | 0 |
|
1596 | 0 | out: |
1597 | 0 | tmem_client_info("pool_id=%d\n", d_poolid); |
1598 | 0 | client->info.nr_pools++; |
1599 | 0 | return d_poolid; |
1600 | 0 |
|
1601 | 0 | fail: |
1602 | 0 | pool_free(pool); |
1603 | 0 | return -EPERM; |
1604 | 0 | } |
1605 | | |
1606 | | /************ TMEM CONTROL OPERATIONS ************************************/ |
1607 | | |
1608 | | int tmemc_shared_pool_auth(domid_t cli_id, uint64_t uuid_lo, |
1609 | | uint64_t uuid_hi, bool auth) |
1610 | 0 | { |
1611 | 0 | struct client *client; |
1612 | 0 | int i, free = -1; |
1613 | 0 |
|
1614 | 0 | if ( cli_id == TMEM_CLI_ID_NULL ) |
1615 | 0 | { |
1616 | 0 | tmem_global.shared_auth = auth; |
1617 | 0 | return 1; |
1618 | 0 | } |
1619 | 0 | client = tmem_client_from_cli_id(cli_id); |
1620 | 0 | if ( client == NULL ) |
1621 | 0 | return -EINVAL; |
1622 | 0 |
|
1623 | 0 | for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) |
1624 | 0 | { |
1625 | 0 | if ( auth == 0 ) |
1626 | 0 | { |
1627 | 0 | if ( (client->shared_auth_uuid[i][0] == uuid_lo) && |
1628 | 0 | (client->shared_auth_uuid[i][1] == uuid_hi) ) |
1629 | 0 | { |
1630 | 0 | client->shared_auth_uuid[i][0] = -1L; |
1631 | 0 | client->shared_auth_uuid[i][1] = -1L; |
1632 | 0 | return 1; |
1633 | 0 | } |
1634 | 0 | } |
1635 | 0 | else |
1636 | 0 | { |
1637 | 0 | if ( (client->shared_auth_uuid[i][0] == -1L) && |
1638 | 0 | (client->shared_auth_uuid[i][1] == -1L) ) |
1639 | 0 | { |
1640 | 0 | free = i; |
1641 | 0 | break; |
1642 | 0 | } |
1643 | 0 | } |
1644 | 0 | } |
1645 | 0 | if ( auth == 0 ) |
1646 | 0 | return 0; |
1647 | 0 | else if ( free == -1) |
1648 | 0 | return -ENOMEM; |
1649 | 0 | else |
1650 | 0 | { |
1651 | 0 | client->shared_auth_uuid[free][0] = uuid_lo; |
1652 | 0 | client->shared_auth_uuid[free][1] = uuid_hi; |
1653 | 0 | return 1; |
1654 | 0 | } |
1655 | 0 | } |
1656 | | |
1657 | | static int tmemc_save_subop(int cli_id, uint32_t pool_id, |
1658 | | uint32_t subop, tmem_cli_va_param_t buf, uint32_t arg) |
1659 | 0 | { |
1660 | 0 | struct client *client = tmem_client_from_cli_id(cli_id); |
1661 | 0 | uint32_t p; |
1662 | 0 | struct tmem_page_descriptor *pgp, *pgp2; |
1663 | 0 | int rc = -ENOENT; |
1664 | 0 |
|
1665 | 0 | switch(subop) |
1666 | 0 | { |
1667 | 0 | case XEN_SYSCTL_TMEM_OP_SAVE_BEGIN: |
1668 | 0 | if ( client == NULL ) |
1669 | 0 | break; |
1670 | 0 | for (p = 0; p < MAX_POOLS_PER_DOMAIN; p++) |
1671 | 0 | if ( client->pools[p] != NULL ) |
1672 | 0 | break; |
1673 | 0 |
|
1674 | 0 | if ( p == MAX_POOLS_PER_DOMAIN ) |
1675 | 0 | break; |
1676 | 0 |
|
1677 | 0 | client->was_frozen = client->info.flags.u.frozen; |
1678 | 0 | client->info.flags.u.frozen = 1; |
1679 | 0 | if ( arg != 0 ) |
1680 | 0 | client->info.flags.u.migrating = 1; |
1681 | 0 | rc = 0; |
1682 | 0 | break; |
1683 | 0 | case XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN: |
1684 | 0 | if ( client == NULL ) |
1685 | 0 | rc = client_create(cli_id) ? 0 : -ENOMEM; |
1686 | 0 | else |
1687 | 0 | rc = -EEXIST; |
1688 | 0 | break; |
1689 | 0 | case XEN_SYSCTL_TMEM_OP_SAVE_END: |
1690 | 0 | if ( client == NULL ) |
1691 | 0 | break; |
1692 | 0 | client->info.flags.u.migrating = 0; |
1693 | 0 | if ( !list_empty(&client->persistent_invalidated_list) ) |
1694 | 0 | list_for_each_entry_safe(pgp,pgp2, |
1695 | 0 | &client->persistent_invalidated_list, client_inv_pages) |
1696 | 0 | __pgp_free(pgp, client->pools[pgp->pool_id]); |
1697 | 0 | client->info.flags.u.frozen = client->was_frozen; |
1698 | 0 | rc = 0; |
1699 | 0 | break; |
1700 | 0 | } |
1701 | 0 | return rc; |
1702 | 0 | } |
1703 | | |
1704 | | static int tmemc_save_get_next_page(int cli_id, uint32_t pool_id, |
1705 | | tmem_cli_va_param_t buf, uint32_t bufsize) |
1706 | 0 | { |
1707 | 0 | struct client *client = tmem_client_from_cli_id(cli_id); |
1708 | 0 | struct tmem_pool *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) |
1709 | 0 | ? NULL : client->pools[pool_id]; |
1710 | 0 | struct tmem_page_descriptor *pgp; |
1711 | 0 | struct xen_tmem_oid *oid; |
1712 | 0 | int ret = 0; |
1713 | 0 | struct tmem_handle h; |
1714 | 0 |
|
1715 | 0 | if ( pool == NULL || !is_persistent(pool) ) |
1716 | 0 | return -1; |
1717 | 0 |
|
1718 | 0 | if ( bufsize < PAGE_SIZE + sizeof(struct tmem_handle) ) |
1719 | 0 | return -ENOMEM; |
1720 | 0 |
|
1721 | 0 | spin_lock(&pers_lists_spinlock); |
1722 | 0 | if ( list_empty(&pool->persistent_page_list) ) |
1723 | 0 | { |
1724 | 0 | ret = -1; |
1725 | 0 | goto out; |
1726 | 0 | } |
1727 | 0 | /* Note: pool->cur_pgp is the pgp last returned by get_next_page. */ |
1728 | 0 | if ( pool->cur_pgp == NULL ) |
1729 | 0 | { |
1730 | 0 | /* Process the first one. */ |
1731 | 0 | pool->cur_pgp = pgp = list_entry((&pool->persistent_page_list)->next, |
1732 | 0 | struct tmem_page_descriptor,us.pool_pers_pages); |
1733 | 0 | } else if ( list_is_last(&pool->cur_pgp->us.pool_pers_pages, |
1734 | 0 | &pool->persistent_page_list) ) |
1735 | 0 | { |
1736 | 0 | /* Already processed the last one in the list. */ |
1737 | 0 | ret = -1; |
1738 | 0 | goto out; |
1739 | 0 | } |
1740 | 0 | pgp = list_entry((&pool->cur_pgp->us.pool_pers_pages)->next, |
1741 | 0 | struct tmem_page_descriptor,us.pool_pers_pages); |
1742 | 0 | pool->cur_pgp = pgp; |
1743 | 0 | oid = &pgp->us.obj->oid; |
1744 | 0 | h.pool_id = pool_id; |
1745 | 0 | BUILD_BUG_ON(sizeof(h.oid) != sizeof(*oid)); |
1746 | 0 | memcpy(&(h.oid), oid, sizeof(h.oid)); |
1747 | 0 | h.index = pgp->index; |
1748 | 0 | if ( copy_to_guest(guest_handle_cast(buf, void), &h, 1) ) |
1749 | 0 | { |
1750 | 0 | ret = -EFAULT; |
1751 | 0 | goto out; |
1752 | 0 | } |
1753 | 0 | guest_handle_add_offset(buf, sizeof(h)); |
1754 | 0 | ret = do_tmem_get(pool, oid, pgp->index, 0, buf); |
1755 | 0 |
|
1756 | 0 | out: |
1757 | 0 | spin_unlock(&pers_lists_spinlock); |
1758 | 0 | return ret; |
1759 | 0 | } |
1760 | | |
1761 | | static int tmemc_save_get_next_inv(int cli_id, tmem_cli_va_param_t buf, |
1762 | | uint32_t bufsize) |
1763 | 0 | { |
1764 | 0 | struct client *client = tmem_client_from_cli_id(cli_id); |
1765 | 0 | struct tmem_page_descriptor *pgp; |
1766 | 0 | struct tmem_handle h; |
1767 | 0 | int ret = 0; |
1768 | 0 |
|
1769 | 0 | if ( client == NULL ) |
1770 | 0 | return 0; |
1771 | 0 | if ( bufsize < sizeof(struct tmem_handle) ) |
1772 | 0 | return 0; |
1773 | 0 | spin_lock(&pers_lists_spinlock); |
1774 | 0 | if ( list_empty(&client->persistent_invalidated_list) ) |
1775 | 0 | goto out; |
1776 | 0 | if ( client->cur_pgp == NULL ) |
1777 | 0 | { |
1778 | 0 | pgp = list_entry((&client->persistent_invalidated_list)->next, |
1779 | 0 | struct tmem_page_descriptor,client_inv_pages); |
1780 | 0 | client->cur_pgp = pgp; |
1781 | 0 | } else if ( list_is_last(&client->cur_pgp->client_inv_pages, |
1782 | 0 | &client->persistent_invalidated_list) ) |
1783 | 0 | { |
1784 | 0 | client->cur_pgp = NULL; |
1785 | 0 | ret = 0; |
1786 | 0 | goto out; |
1787 | 0 | } else { |
1788 | 0 | pgp = list_entry((&client->cur_pgp->client_inv_pages)->next, |
1789 | 0 | struct tmem_page_descriptor,client_inv_pages); |
1790 | 0 | client->cur_pgp = pgp; |
1791 | 0 | } |
1792 | 0 | h.pool_id = pgp->pool_id; |
1793 | 0 | BUILD_BUG_ON(sizeof(h.oid) != sizeof(pgp->inv_oid)); |
1794 | 0 | memcpy(&(h.oid), &(pgp->inv_oid), sizeof(h.oid)); |
1795 | 0 | h.index = pgp->index; |
1796 | 0 | ret = 1; |
1797 | 0 | if ( copy_to_guest(guest_handle_cast(buf, void), &h, 1) ) |
1798 | 0 | ret = -EFAULT; |
1799 | 0 | out: |
1800 | 0 | spin_unlock(&pers_lists_spinlock); |
1801 | 0 | return ret; |
1802 | 0 | } |
1803 | | |
1804 | | static int tmemc_restore_put_page(int cli_id, uint32_t pool_id, |
1805 | | struct xen_tmem_oid *oidp, |
1806 | | uint32_t index, tmem_cli_va_param_t buf, |
1807 | | uint32_t bufsize) |
1808 | 0 | { |
1809 | 0 | struct client *client = tmem_client_from_cli_id(cli_id); |
1810 | 0 | struct tmem_pool *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) |
1811 | 0 | ? NULL : client->pools[pool_id]; |
1812 | 0 |
|
1813 | 0 | if ( pool == NULL ) |
1814 | 0 | return -1; |
1815 | 0 | if (bufsize != PAGE_SIZE) { |
1816 | 0 | tmem_client_err("tmem: %s: invalid parameter bufsize(%d) != (%ld)\n", |
1817 | 0 | __func__, bufsize, PAGE_SIZE); |
1818 | 0 | return -EINVAL; |
1819 | 0 | } |
1820 | 0 | return do_tmem_put(pool, oidp, index, 0, buf); |
1821 | 0 | } |
1822 | | |
1823 | | static int tmemc_restore_flush_page(int cli_id, uint32_t pool_id, |
1824 | | struct xen_tmem_oid *oidp, |
1825 | | uint32_t index) |
1826 | 0 | { |
1827 | 0 | struct client *client = tmem_client_from_cli_id(cli_id); |
1828 | 0 | struct tmem_pool *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) |
1829 | 0 | ? NULL : client->pools[pool_id]; |
1830 | 0 |
|
1831 | 0 | if ( pool == NULL ) |
1832 | 0 | return -1; |
1833 | 0 | return do_tmem_flush_page(pool,oidp,index); |
1834 | 0 | } |
1835 | | |
1836 | | int do_tmem_control(struct xen_sysctl_tmem_op *op) |
1837 | 0 | { |
1838 | 0 | int ret; |
1839 | 0 | uint32_t pool_id = op->pool_id; |
1840 | 0 | uint32_t cmd = op->cmd; |
1841 | 0 | struct xen_tmem_oid *oidp = &op->oid; |
1842 | 0 |
|
1843 | 0 | ASSERT(rw_is_write_locked(&tmem_rwlock)); |
1844 | 0 |
|
1845 | 0 | switch (cmd) |
1846 | 0 | { |
1847 | 0 | case XEN_SYSCTL_TMEM_OP_SAVE_BEGIN: |
1848 | 0 | case XEN_SYSCTL_TMEM_OP_RESTORE_BEGIN: |
1849 | 0 | case XEN_SYSCTL_TMEM_OP_SAVE_END: |
1850 | 0 | ret = tmemc_save_subop(op->cli_id, pool_id, cmd, |
1851 | 0 | guest_handle_cast(op->u.buf, char), op->arg); |
1852 | 0 | break; |
1853 | 0 | case XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_PAGE: |
1854 | 0 | ret = tmemc_save_get_next_page(op->cli_id, pool_id, |
1855 | 0 | guest_handle_cast(op->u.buf, char), op->len); |
1856 | 0 | break; |
1857 | 0 | case XEN_SYSCTL_TMEM_OP_SAVE_GET_NEXT_INV: |
1858 | 0 | ret = tmemc_save_get_next_inv(op->cli_id, |
1859 | 0 | guest_handle_cast(op->u.buf, char), op->len); |
1860 | 0 | break; |
1861 | 0 | case XEN_SYSCTL_TMEM_OP_RESTORE_PUT_PAGE: |
1862 | 0 | ret = tmemc_restore_put_page(op->cli_id, pool_id, oidp, op->arg, |
1863 | 0 | guest_handle_cast(op->u.buf, char), op->len); |
1864 | 0 | break; |
1865 | 0 | case XEN_SYSCTL_TMEM_OP_RESTORE_FLUSH_PAGE: |
1866 | 0 | ret = tmemc_restore_flush_page(op->cli_id, pool_id, oidp, op->arg); |
1867 | 0 | break; |
1868 | 0 | default: |
1869 | 0 | ret = -1; |
1870 | 0 | } |
1871 | 0 |
|
1872 | 0 | return ret; |
1873 | 0 | } |
1874 | | |
1875 | | /************ EXPORTed FUNCTIONS **************************************/ |
1876 | | |
1877 | | long do_tmem_op(tmem_cli_op_t uops) |
1878 | 0 | { |
1879 | 0 | struct tmem_op op; |
1880 | 0 | struct client *client = current->domain->tmem_client; |
1881 | 0 | struct tmem_pool *pool = NULL; |
1882 | 0 | struct xen_tmem_oid *oidp; |
1883 | 0 | int rc = 0; |
1884 | 0 |
|
1885 | 0 | if ( !tmem_initialized ) |
1886 | 0 | return -ENODEV; |
1887 | 0 |
|
1888 | 0 | if ( xsm_tmem_op(XSM_HOOK) ) |
1889 | 0 | return -EPERM; |
1890 | 0 |
|
1891 | 0 | tmem_stats.total_tmem_ops++; |
1892 | 0 |
|
1893 | 0 | if ( client != NULL && client->domain->is_dying ) |
1894 | 0 | { |
1895 | 0 | tmem_stats.errored_tmem_ops++; |
1896 | 0 | return -ENODEV; |
1897 | 0 | } |
1898 | 0 |
|
1899 | 0 | if ( unlikely(tmem_get_tmemop_from_client(&op, uops) != 0) ) |
1900 | 0 | { |
1901 | 0 | tmem_client_err("tmem: can't get tmem struct from %s\n", tmem_client_str); |
1902 | 0 | tmem_stats.errored_tmem_ops++; |
1903 | 0 | return -EFAULT; |
1904 | 0 | } |
1905 | 0 |
|
1906 | 0 | /* Acquire write lock for all commands at first. */ |
1907 | 0 | write_lock(&tmem_rwlock); |
1908 | 0 |
|
1909 | 0 | switch ( op.cmd ) |
1910 | 0 | { |
1911 | 0 | case TMEM_CONTROL: |
1912 | 0 | case TMEM_RESTORE_NEW: |
1913 | 0 | case TMEM_AUTH: |
1914 | 0 | rc = -EOPNOTSUPP; |
1915 | 0 | break; |
1916 | 0 |
|
1917 | 0 | default: |
1918 | 0 | /* |
1919 | 0 | * For other commands, create per-client tmem structure dynamically on |
1920 | 0 | * first use by client. |
1921 | 0 | */ |
1922 | 0 | if ( client == NULL ) |
1923 | 0 | { |
1924 | 0 | if ( (client = client_create(current->domain->domain_id)) == NULL ) |
1925 | 0 | { |
1926 | 0 | tmem_client_err("tmem: can't create tmem structure for %s\n", |
1927 | 0 | tmem_client_str); |
1928 | 0 | rc = -ENOMEM; |
1929 | 0 | goto out; |
1930 | 0 | } |
1931 | 0 | } |
1932 | 0 |
|
1933 | 0 | if ( op.cmd == TMEM_NEW_POOL || op.cmd == TMEM_DESTROY_POOL ) |
1934 | 0 | { |
1935 | 0 | if ( op.cmd == TMEM_NEW_POOL ) |
1936 | 0 | rc = do_tmem_new_pool(TMEM_CLI_ID_NULL, 0, op.u.creat.flags, |
1937 | 0 | op.u.creat.uuid[0], op.u.creat.uuid[1]); |
1938 | 0 | else |
1939 | 0 | rc = do_tmem_destroy_pool(op.pool_id); |
1940 | 0 | } |
1941 | 0 | else |
1942 | 0 | { |
1943 | 0 | if ( ((uint32_t)op.pool_id >= MAX_POOLS_PER_DOMAIN) || |
1944 | 0 | ((pool = client->pools[op.pool_id]) == NULL) ) |
1945 | 0 | { |
1946 | 0 | tmem_client_err("tmem: operation requested on uncreated pool\n"); |
1947 | 0 | rc = -ENODEV; |
1948 | 0 | goto out; |
1949 | 0 | } |
1950 | 0 | /* Commands that only need read lock. */ |
1951 | 0 | write_unlock(&tmem_rwlock); |
1952 | 0 | read_lock(&tmem_rwlock); |
1953 | 0 |
|
1954 | 0 | oidp = &op.u.gen.oid; |
1955 | 0 | switch ( op.cmd ) |
1956 | 0 | { |
1957 | 0 | case TMEM_NEW_POOL: |
1958 | 0 | case TMEM_DESTROY_POOL: |
1959 | 0 | BUG(); /* Done earlier. */ |
1960 | 0 | break; |
1961 | 0 | case TMEM_PUT_PAGE: |
1962 | 0 | if (tmem_ensure_avail_pages()) |
1963 | 0 | rc = do_tmem_put(pool, oidp, op.u.gen.index, op.u.gen.cmfn, |
1964 | 0 | tmem_cli_buf_null); |
1965 | 0 | else |
1966 | 0 | rc = -ENOMEM; |
1967 | 0 | break; |
1968 | 0 | case TMEM_GET_PAGE: |
1969 | 0 | rc = do_tmem_get(pool, oidp, op.u.gen.index, op.u.gen.cmfn, |
1970 | 0 | tmem_cli_buf_null); |
1971 | 0 | break; |
1972 | 0 | case TMEM_FLUSH_PAGE: |
1973 | 0 | rc = do_tmem_flush_page(pool, oidp, op.u.gen.index); |
1974 | 0 | break; |
1975 | 0 | case TMEM_FLUSH_OBJECT: |
1976 | 0 | rc = do_tmem_flush_object(pool, oidp); |
1977 | 0 | break; |
1978 | 0 | default: |
1979 | 0 | tmem_client_warn("tmem: op %d not implemented\n", op.cmd); |
1980 | 0 | rc = -ENOSYS; |
1981 | 0 | break; |
1982 | 0 | } |
1983 | 0 | read_unlock(&tmem_rwlock); |
1984 | 0 | if ( rc < 0 ) |
1985 | 0 | tmem_stats.errored_tmem_ops++; |
1986 | 0 | return rc; |
1987 | 0 | } |
1988 | 0 | break; |
1989 | 0 |
|
1990 | 0 | } |
1991 | 0 | out: |
1992 | 0 | write_unlock(&tmem_rwlock); |
1993 | 0 | if ( rc < 0 ) |
1994 | 0 | tmem_stats.errored_tmem_ops++; |
1995 | 0 | return rc; |
1996 | 0 | } |
1997 | | |
1998 | | /* This should be called when the host is destroying a client (domain). */ |
1999 | | void tmem_destroy(void *v) |
2000 | 0 | { |
2001 | 0 | struct client *client = (struct client *)v; |
2002 | 0 |
|
2003 | 0 | if ( client == NULL ) |
2004 | 0 | return; |
2005 | 0 |
|
2006 | 0 | if ( !client->domain->is_dying ) |
2007 | 0 | { |
2008 | 0 | printk("tmem: tmem_destroy can only destroy dying client\n"); |
2009 | 0 | return; |
2010 | 0 | } |
2011 | 0 |
|
2012 | 0 | write_lock(&tmem_rwlock); |
2013 | 0 |
|
2014 | 0 | printk("tmem: flushing tmem pools for %s=%d\n", |
2015 | 0 | tmem_cli_id_str, client->cli_id); |
2016 | 0 | client_flush(client); |
2017 | 0 |
|
2018 | 0 | write_unlock(&tmem_rwlock); |
2019 | 0 | } |
2020 | | |
2021 | | #define MAX_EVICTS 10 /* Should be variable or set via XEN_SYSCTL_TMEM_OP_ ?? */ |
2022 | | void *tmem_relinquish_pages(unsigned int order, unsigned int memflags) |
2023 | 0 | { |
2024 | 0 | struct page_info *pfp; |
2025 | 0 | unsigned long evicts_per_relinq = 0; |
2026 | 0 | int max_evictions = 10; |
2027 | 0 |
|
2028 | 0 | if (!tmem_enabled() || !tmem_freeable_pages()) |
2029 | 0 | return NULL; |
2030 | 0 |
|
2031 | 0 | tmem_stats.relinq_attempts++; |
2032 | 0 | if ( order > 0 ) |
2033 | 0 | { |
2034 | 0 | #ifndef NDEBUG |
2035 | 0 | printk("tmem_relinquish_page: failing order=%d\n", order); |
2036 | 0 | #endif |
2037 | 0 | return NULL; |
2038 | 0 | } |
2039 | 0 |
|
2040 | 0 | while ( (pfp = tmem_page_list_get()) == NULL ) |
2041 | 0 | { |
2042 | 0 | if ( (max_evictions-- <= 0) || !tmem_evict()) |
2043 | 0 | break; |
2044 | 0 | evicts_per_relinq++; |
2045 | 0 | } |
2046 | 0 | if ( evicts_per_relinq > tmem_stats.max_evicts_per_relinq ) |
2047 | 0 | tmem_stats.max_evicts_per_relinq = evicts_per_relinq; |
2048 | 0 | if ( pfp != NULL ) |
2049 | 0 | { |
2050 | 0 | if ( !(memflags & MEMF_tmem) ) |
2051 | 0 | scrub_one_page(pfp); |
2052 | 0 | tmem_stats.relinq_pgs++; |
2053 | 0 | } |
2054 | 0 |
|
2055 | 0 | return pfp; |
2056 | 0 | } |
2057 | | |
2058 | | unsigned long tmem_freeable_pages(void) |
2059 | 86.5k | { |
2060 | 86.5k | if ( !tmem_enabled() ) |
2061 | 86.5k | return 0; |
2062 | 86.5k | |
2063 | 0 | return tmem_page_list_pages + _atomic_read(freeable_page_count); |
2064 | 86.5k | } |
2065 | | |
2066 | | /* Called at hypervisor startup. */ |
2067 | | static int __init init_tmem(void) |
2068 | 1 | { |
2069 | 1 | if ( !tmem_enabled() ) |
2070 | 1 | return 0; |
2071 | 1 | |
2072 | 0 | if ( !tmem_mempool_init() ) |
2073 | 0 | return 0; |
2074 | 0 |
|
2075 | 0 | if ( tmem_init() ) |
2076 | 0 | { |
2077 | 0 | printk("tmem: initialized comp=%d\n", tmem_compression_enabled()); |
2078 | 0 | tmem_initialized = 1; |
2079 | 0 | } |
2080 | 0 | else |
2081 | 0 | printk("tmem: initialization FAILED\n"); |
2082 | 0 |
|
2083 | 0 | return 0; |
2084 | 0 | } |
2085 | | __initcall(init_tmem); |
2086 | | |
2087 | | /* |
2088 | | * Local variables: |
2089 | | * mode: C |
2090 | | * c-file-style: "BSD" |
2091 | | * c-basic-offset: 4 |
2092 | | * tab-width: 4 |
2093 | | * indent-tabs-mode: nil |
2094 | | * End: |
2095 | | */ |