debuggers.hg
annotate xen/common/tmem.c @ 20964:a3fa6d444b25
Fix domain reference leaks
Besides two unlikely/rarely hit ones in x86 code, the main offender
was tmh_client_from_cli_id(), which didn't even have a counterpart
(albeit it had a comment correctly saying that it causes d->refcnt to
get incremented). Unfortunately(?) this required a bit of code
restructuring (as I needed to change the code anyway, I also fixed
a couple os missing bounds checks which would sooner or later be
reported as security vulnerabilities), so I would hope Dan could give
it his blessing before it gets applied.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
Besides two unlikely/rarely hit ones in x86 code, the main offender
was tmh_client_from_cli_id(), which didn't even have a counterpart
(albeit it had a comment correctly saying that it causes d->refcnt to
get incremented). Unfortunately(?) this required a bit of code
restructuring (as I needed to change the code anyway, I also fixed
a couple os missing bounds checks which would sooner or later be
reported as security vulnerabilities), so I would hope Dan could give
it his blessing before it gets applied.
Signed-off-by: Jan Beulich <jbeulich@novell.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Wed Feb 10 09:18:43 2010 +0000 (2010-02-10) |
parents | 277bfc2d47b1 |
children | 87f1e5b7660b |
rev | line source |
---|---|
keir@19684 | 1 /****************************************************************************** |
keir@19684 | 2 * tmem.c |
keir@19684 | 3 * |
keir@19684 | 4 * Transcendent memory |
keir@19684 | 5 * |
keir@19684 | 6 * Copyright (c) 2009, Dan Magenheimer, Oracle Corp. |
keir@19684 | 7 */ |
keir@19684 | 8 |
keir@19684 | 9 /* TODO list: 090129 |
keir@19684 | 10 - improve on reclamation policy |
keir@19684 | 11 - use different tlsf pools for each client (maybe each pool) |
keir@19684 | 12 - implement page accounting and minimal QoS limits |
keir@19684 | 13 - test shared access more completely (need pv cluster fs) |
keir@19684 | 14 - add feedback-driven compression (not for persistent pools though!) |
keir@19684 | 15 - add data-structure total bytes overhead stats |
keir@19684 | 16 */ |
keir@19684 | 17 |
keir@19684 | 18 #ifdef __XEN__ |
keir@19684 | 19 #include <xen/tmem_xen.h> /* host-specific (eg Xen) code goes here */ |
keir@19684 | 20 #endif |
keir@19684 | 21 |
keir@19684 | 22 #include <xen/tmem.h> |
keir@19684 | 23 #include <xen/rbtree.h> |
keir@19684 | 24 #include <xen/radix-tree.h> |
keir@19684 | 25 #include <xen/list.h> |
keir@19684 | 26 |
keir@19684 | 27 #define EXPORT /* indicates code other modules are dependent upon */ |
keir@19684 | 28 #define FORWARD |
keir@19684 | 29 |
keir@20067 | 30 #define TMEM_SPEC_VERSION 0 |
keir@20067 | 31 |
keir@19684 | 32 /************ INTERFACE TO TMEM HOST-DEPENDENT (tmh) CODE ************/ |
keir@19684 | 33 |
keir@19684 | 34 #define CLI_ID_NULL TMH_CLI_ID_NULL |
keir@19684 | 35 #define cli_id_str tmh_cli_id_str |
keir@19684 | 36 #define client_str tmh_client_str |
keir@19684 | 37 |
keir@19684 | 38 /************ DEBUG and STATISTICS (+ some compression testing) *******/ |
keir@19684 | 39 |
keir@19684 | 40 #ifndef NDEBUG |
keir@19684 | 41 #define SENTINELS |
keir@19684 | 42 #define NOINLINE noinline |
keir@19684 | 43 #else |
keir@19684 | 44 #define NOINLINE |
keir@19684 | 45 #endif |
keir@19684 | 46 |
keir@19684 | 47 #ifdef SENTINELS |
keir@19684 | 48 #define DECL_SENTINEL unsigned long sentinel; |
keir@19684 | 49 #define SET_SENTINEL(_x,_y) _x->sentinel = _y##_SENTINEL |
keir@19684 | 50 #define INVERT_SENTINEL(_x,_y) _x->sentinel = ~_y##_SENTINEL |
keir@19684 | 51 #define ASSERT_SENTINEL(_x,_y) \ |
keir@19684 | 52 ASSERT(_x->sentinel != ~_y##_SENTINEL);ASSERT(_x->sentinel == _y##_SENTINEL) |
keir@19684 | 53 #ifdef __i386__ |
keir@19684 | 54 #define POOL_SENTINEL 0x87658765 |
keir@19684 | 55 #define OBJ_SENTINEL 0x12345678 |
keir@19684 | 56 #define OBJNODE_SENTINEL 0xfedcba09 |
keir@19684 | 57 #define PGD_SENTINEL 0x43214321 |
keir@19684 | 58 #else |
keir@19684 | 59 #define POOL_SENTINEL 0x8765876587658765 |
keir@19684 | 60 #define OBJ_SENTINEL 0x1234567812345678 |
keir@19684 | 61 #define OBJNODE_SENTINEL 0xfedcba0987654321 |
keir@19684 | 62 #define PGD_SENTINEL 0x4321432143214321 |
keir@19684 | 63 #endif |
keir@19684 | 64 #else |
keir@19684 | 65 #define DECL_SENTINEL |
keir@19684 | 66 #define SET_SENTINEL(_x,_y) do { } while (0) |
keir@19684 | 67 #define ASSERT_SENTINEL(_x,_y) do { } while (0) |
keir@19684 | 68 #define INVERT_SENTINEL(_x,_y) do { } while (0) |
keir@19684 | 69 #endif |
keir@19684 | 70 |
keir@19684 | 71 /* global statistics (none need to be locked) */ |
keir@19684 | 72 static unsigned long total_tmem_ops = 0; |
keir@19684 | 73 static unsigned long errored_tmem_ops = 0; |
keir@19684 | 74 static unsigned long total_flush_pool = 0; |
keir@19684 | 75 static unsigned long alloc_failed = 0, alloc_page_failed = 0; |
keir@19684 | 76 static unsigned long evicted_pgs = 0, evict_attempts = 0; |
keir@19684 | 77 static unsigned long relinq_pgs = 0, relinq_attempts = 0; |
keir@19684 | 78 static unsigned long max_evicts_per_relinq = 0; |
keir@19684 | 79 static unsigned long low_on_memory = 0; |
keir@19684 | 80 static int global_obj_count_max = 0; |
keir@19684 | 81 static int global_pgp_count_max = 0; |
keir@19684 | 82 static int global_page_count_max = 0; |
keir@19684 | 83 static int global_rtree_node_count_max = 0; |
keir@19684 | 84 static long global_eph_count_max = 0; |
keir@19684 | 85 static unsigned long failed_copies; |
keir@19684 | 86 |
keir@19684 | 87 DECL_CYC_COUNTER(succ_get); |
keir@19684 | 88 DECL_CYC_COUNTER(succ_put); |
keir@19684 | 89 DECL_CYC_COUNTER(non_succ_get); |
keir@19684 | 90 DECL_CYC_COUNTER(non_succ_put); |
keir@19684 | 91 DECL_CYC_COUNTER(flush); |
keir@19684 | 92 DECL_CYC_COUNTER(flush_obj); |
keir@19684 | 93 #ifdef COMPARE_COPY_PAGE_SSE2 |
keir@19684 | 94 EXTERN_CYC_COUNTER(pg_copy1); |
keir@19684 | 95 EXTERN_CYC_COUNTER(pg_copy2); |
keir@19684 | 96 EXTERN_CYC_COUNTER(pg_copy3); |
keir@19684 | 97 EXTERN_CYC_COUNTER(pg_copy4); |
keir@19684 | 98 #else |
keir@19684 | 99 EXTERN_CYC_COUNTER(pg_copy); |
keir@19684 | 100 #endif |
keir@19684 | 101 DECL_CYC_COUNTER(compress); |
keir@19684 | 102 DECL_CYC_COUNTER(decompress); |
keir@19684 | 103 |
keir@19684 | 104 /************ CORE DATA STRUCTURES ************************************/ |
keir@19684 | 105 |
keir@19684 | 106 #define MAX_POOLS_PER_DOMAIN 16 |
keir@19684 | 107 #define MAX_GLOBAL_SHARED_POOLS 16 |
keir@19684 | 108 |
keir@19684 | 109 struct tm_pool; |
keir@20067 | 110 struct tmem_page_descriptor; |
keir@19684 | 111 struct client { |
keir@19684 | 112 struct list_head client_list; |
keir@19684 | 113 struct tm_pool *pools[MAX_POOLS_PER_DOMAIN]; |
keir@19684 | 114 tmh_client_t *tmh; |
keir@19684 | 115 struct list_head ephemeral_page_list; |
keir@19684 | 116 long eph_count, eph_count_max; |
keir@19684 | 117 cli_id_t cli_id; |
keir@19684 | 118 uint32_t weight; |
keir@19684 | 119 uint32_t cap; |
keir@19684 | 120 bool_t compress; |
keir@19684 | 121 bool_t frozen; |
keir@20067 | 122 bool_t shared_auth_required; |
keir@20067 | 123 /* for save/restore/migration */ |
keir@20067 | 124 bool_t live_migrating; |
keir@20067 | 125 bool_t was_frozen; |
keir@20067 | 126 struct list_head persistent_invalidated_list; |
keir@20067 | 127 struct tmem_page_descriptor *cur_pgp; |
keir@20067 | 128 /* statistics collection */ |
keir@19684 | 129 unsigned long compress_poor, compress_nomem; |
keir@19684 | 130 unsigned long compressed_pages; |
keir@19684 | 131 uint64_t compressed_sum_size; |
keir@19897 | 132 uint64_t total_cycles; |
keir@19897 | 133 unsigned long succ_pers_puts, succ_eph_gets, succ_pers_gets; |
keir@20067 | 134 /* shared pool authentication */ |
keir@20067 | 135 uint64_t shared_auth_uuid[MAX_GLOBAL_SHARED_POOLS][2]; |
keir@19684 | 136 }; |
keir@19684 | 137 typedef struct client client_t; |
keir@19684 | 138 |
keir@19684 | 139 struct share_list { |
keir@19684 | 140 struct list_head share_list; |
keir@19684 | 141 client_t *client; |
keir@19684 | 142 }; |
keir@19684 | 143 typedef struct share_list sharelist_t; |
keir@19684 | 144 |
keir@19684 | 145 #define OBJ_HASH_BUCKETS 256 /* must be power of two */ |
keir@19684 | 146 #define OBJ_HASH_BUCKETS_MASK (OBJ_HASH_BUCKETS-1) |
keir@19684 | 147 #define OBJ_HASH(_oid) (tmh_hash(_oid, BITS_PER_LONG) & OBJ_HASH_BUCKETS_MASK) |
keir@19684 | 148 |
keir@19684 | 149 struct tm_pool { |
keir@19684 | 150 bool_t shared; |
keir@19684 | 151 bool_t persistent; |
keir@20535 | 152 bool_t is_dying; |
keir@20067 | 153 int pageshift; /* 0 == 2**12 */ |
keir@19684 | 154 struct list_head pool_list; /* FIXME do we need this anymore? */ |
keir@19684 | 155 client_t *client; |
keir@19684 | 156 uint64_t uuid[2]; /* 0 for private, non-zero for shared */ |
keir@19684 | 157 uint32_t pool_id; |
keir@19684 | 158 rwlock_t pool_rwlock; |
keir@19684 | 159 struct rb_root obj_rb_root[OBJ_HASH_BUCKETS]; /* protected by pool_rwlock */ |
keir@19684 | 160 struct list_head share_list; /* valid if shared */ |
keir@19684 | 161 int shared_count; /* valid if shared */ |
keir@20067 | 162 /* for save/restore/migration */ |
keir@20067 | 163 struct list_head persistent_page_list; |
keir@20067 | 164 struct tmem_page_descriptor *cur_pgp; |
keir@20067 | 165 /* statistics collection */ |
keir@19684 | 166 atomic_t pgp_count; |
keir@19684 | 167 int pgp_count_max; |
keir@19684 | 168 long obj_count; /* atomicity depends on pool_rwlock held for write */ |
keir@19684 | 169 long obj_count_max; |
keir@19684 | 170 unsigned long objnode_count, objnode_count_max; |
keir@19684 | 171 uint64_t sum_life_cycles; |
keir@19684 | 172 uint64_t sum_evicted_cycles; |
keir@19684 | 173 unsigned long puts, good_puts, no_mem_puts; |
keir@19684 | 174 unsigned long dup_puts_flushed, dup_puts_replaced; |
keir@19684 | 175 unsigned long gets, found_gets; |
keir@19684 | 176 unsigned long flushs, flushs_found; |
keir@19684 | 177 unsigned long flush_objs, flush_objs_found; |
keir@20067 | 178 DECL_SENTINEL |
keir@19684 | 179 }; |
keir@19684 | 180 typedef struct tm_pool pool_t; |
keir@19684 | 181 |
keir@19684 | 182 #define is_persistent(_p) (_p->persistent) |
keir@19684 | 183 #define is_ephemeral(_p) (!(_p->persistent)) |
keir@19684 | 184 #define is_shared(_p) (_p->shared) |
keir@19684 | 185 #define is_private(_p) (!(_p->shared)) |
keir@19684 | 186 |
keir@19684 | 187 struct tmem_object_root { |
keir@19684 | 188 DECL_SENTINEL |
keir@19684 | 189 uint64_t oid; |
keir@19684 | 190 struct rb_node rb_tree_node; /* protected by pool->pool_rwlock */ |
keir@19684 | 191 unsigned long objnode_count; /* atomicity depends on obj_spinlock */ |
keir@19684 | 192 long pgp_count; /* atomicity depends on obj_spinlock */ |
keir@19684 | 193 struct radix_tree_root tree_root; /* tree of pages within object */ |
keir@19684 | 194 pool_t *pool; |
keir@19684 | 195 cli_id_t last_client; |
keir@19684 | 196 spinlock_t obj_spinlock; |
keir@19684 | 197 bool_t no_evict; /* if globally locked, pseudo-locks against eviction */ |
keir@19684 | 198 }; |
keir@19684 | 199 typedef struct tmem_object_root obj_t; |
keir@19684 | 200 |
keir@19684 | 201 typedef struct radix_tree_node rtn_t; |
keir@19684 | 202 struct tmem_object_node { |
keir@19684 | 203 obj_t *obj; |
keir@19684 | 204 DECL_SENTINEL |
keir@19684 | 205 rtn_t rtn; |
keir@19684 | 206 }; |
keir@19684 | 207 typedef struct tmem_object_node objnode_t; |
keir@19684 | 208 |
keir@19684 | 209 struct tmem_page_descriptor { |
keir@20067 | 210 union { |
keir@20067 | 211 struct list_head global_eph_pages; |
keir@20067 | 212 struct list_head client_inv_pages; |
keir@20067 | 213 }; |
keir@20067 | 214 union { |
keir@20067 | 215 struct list_head client_eph_pages; |
keir@20067 | 216 struct list_head pool_pers_pages; |
keir@20067 | 217 }; |
keir@20067 | 218 union { |
keir@20067 | 219 obj_t *obj; |
keir@20067 | 220 uint64_t inv_oid; /* used for invalid list only */ |
keir@20067 | 221 }; |
keir@19684 | 222 uint32_t index; |
keir@20067 | 223 size_t size; /* 0 == PAGE_SIZE (pfp), -1 == data invalid, |
keir@20067 | 224 else compressed data (cdata) */ |
keir@19684 | 225 union { |
keir@19684 | 226 pfp_t *pfp; /* page frame pointer */ |
keir@19684 | 227 char *cdata; /* compressed data */ |
keir@19684 | 228 }; |
keir@20067 | 229 union { |
keir@20067 | 230 uint64_t timestamp; |
keir@20067 | 231 uint32_t pool_id; /* used for invalid list only */ |
keir@20067 | 232 }; |
keir@19684 | 233 DECL_SENTINEL |
keir@19684 | 234 }; |
keir@19684 | 235 typedef struct tmem_page_descriptor pgp_t; |
keir@19684 | 236 |
keir@19684 | 237 static LIST_HEAD(global_ephemeral_page_list); /* all pages in ephemeral pools */ |
keir@19684 | 238 |
keir@19684 | 239 static LIST_HEAD(global_client_list); |
keir@19684 | 240 static LIST_HEAD(global_pool_list); |
keir@19684 | 241 |
keir@19684 | 242 static pool_t *global_shared_pools[MAX_GLOBAL_SHARED_POOLS] = { 0 }; |
keir@20067 | 243 static bool_t global_shared_auth = 0; |
keir@19684 | 244 static atomic_t client_weight_total = ATOMIC_INIT(0); |
keir@19684 | 245 static int tmem_initialized = 0; |
keir@19684 | 246 |
keir@19684 | 247 /************ CONCURRENCY ***********************************************/ |
keir@19684 | 248 |
keir@19684 | 249 EXPORT DEFINE_SPINLOCK(tmem_spinlock); /* used iff tmh_lock_all */ |
keir@19684 | 250 EXPORT DEFINE_RWLOCK(tmem_rwlock); /* used iff !tmh_lock_all */ |
keir@19684 | 251 static DEFINE_SPINLOCK(eph_lists_spinlock); /* protects global AND clients */ |
keir@20067 | 252 static DEFINE_SPINLOCK(pers_lists_spinlock); |
keir@19684 | 253 |
keir@19684 | 254 #define tmem_spin_lock(_l) do {if (!tmh_lock_all) spin_lock(_l);}while(0) |
keir@19684 | 255 #define tmem_spin_unlock(_l) do {if (!tmh_lock_all) spin_unlock(_l);}while(0) |
keir@19684 | 256 #define tmem_read_lock(_l) do {if (!tmh_lock_all) read_lock(_l);}while(0) |
keir@19684 | 257 #define tmem_read_unlock(_l) do {if (!tmh_lock_all) read_unlock(_l);}while(0) |
keir@19684 | 258 #define tmem_write_lock(_l) do {if (!tmh_lock_all) write_lock(_l);}while(0) |
keir@19684 | 259 #define tmem_write_unlock(_l) do {if (!tmh_lock_all) write_unlock(_l);}while(0) |
keir@19684 | 260 #define tmem_write_trylock(_l) ((tmh_lock_all)?1:write_trylock(_l)) |
keir@19684 | 261 #define tmem_spin_trylock(_l) (tmh_lock_all?1:spin_trylock(_l)) |
keir@19684 | 262 |
keir@19684 | 263 #define ASSERT_SPINLOCK(_l) ASSERT(tmh_lock_all || spin_is_locked(_l)) |
keir@19684 | 264 #define ASSERT_WRITELOCK(_l) ASSERT(tmh_lock_all || rw_is_write_locked(_l)) |
keir@19684 | 265 |
keir@19684 | 266 /* global counters (should use long_atomic_t access) */ |
keir@19684 | 267 static long global_eph_count = 0; /* atomicity depends on eph_lists_spinlock */ |
keir@19684 | 268 static atomic_t global_obj_count = ATOMIC_INIT(0); |
keir@19684 | 269 static atomic_t global_pgp_count = ATOMIC_INIT(0); |
keir@19684 | 270 static atomic_t global_page_count = ATOMIC_INIT(0); |
keir@19684 | 271 static atomic_t global_rtree_node_count = ATOMIC_INIT(0); |
keir@19684 | 272 |
keir@19684 | 273 #define atomic_inc_and_max(_c) do { \ |
keir@19684 | 274 atomic_inc(&_c); \ |
keir@19684 | 275 if ( _atomic_read(_c) > _c##_max ) \ |
keir@19684 | 276 _c##_max = _atomic_read(_c); \ |
keir@19684 | 277 } while (0) |
keir@19684 | 278 |
keir@19684 | 279 #define atomic_dec_and_assert(_c) do { \ |
keir@19684 | 280 atomic_dec(&_c); \ |
keir@19684 | 281 ASSERT(_atomic_read(_c) >= 0); \ |
keir@19684 | 282 } while (0) |
keir@19684 | 283 |
keir@19684 | 284 |
keir@19684 | 285 /************ MEMORY ALLOCATION INTERFACE *****************************/ |
keir@19684 | 286 |
keir@19684 | 287 #define tmem_malloc(_type,_pool) \ |
keir@19684 | 288 _tmem_malloc(sizeof(_type), __alignof__(_type), _pool) |
keir@19684 | 289 |
keir@19684 | 290 #define tmem_malloc_bytes(_size,_pool) \ |
keir@19684 | 291 _tmem_malloc(_size, 1, _pool) |
keir@19684 | 292 |
keir@19684 | 293 static NOINLINE void *_tmem_malloc(size_t size, size_t align, pool_t *pool) |
keir@19684 | 294 { |
keir@19684 | 295 void *v; |
keir@19684 | 296 |
keir@19684 | 297 if ( (pool != NULL) && is_persistent(pool) ) |
keir@19684 | 298 v = tmh_alloc_subpage_thispool(pool,size,align); |
keir@19684 | 299 else |
keir@19684 | 300 v = tmh_alloc_subpage(pool, size, align); |
keir@19684 | 301 if ( v == NULL ) |
keir@19684 | 302 alloc_failed++; |
keir@19684 | 303 return v; |
keir@19684 | 304 } |
keir@19684 | 305 |
keir@19684 | 306 static NOINLINE void tmem_free(void *p, size_t size, pool_t *pool) |
keir@19684 | 307 { |
keir@19684 | 308 if ( pool == NULL || !is_persistent(pool) ) |
keir@19684 | 309 tmh_free_subpage(p,size); |
keir@19684 | 310 else |
keir@19684 | 311 tmh_free_subpage_thispool(pool,p,size); |
keir@19684 | 312 } |
keir@19684 | 313 |
keir@19684 | 314 static NOINLINE pfp_t *tmem_page_alloc(pool_t *pool) |
keir@19684 | 315 { |
keir@19684 | 316 pfp_t *pfp = NULL; |
keir@19684 | 317 |
keir@19684 | 318 if ( pool != NULL && is_persistent(pool) ) |
keir@19684 | 319 pfp = tmh_alloc_page_thispool(pool); |
keir@19684 | 320 else |
keir@19684 | 321 pfp = tmh_alloc_page(pool,0); |
keir@19684 | 322 if ( pfp == NULL ) |
keir@19684 | 323 alloc_page_failed++; |
keir@19684 | 324 else |
keir@19684 | 325 atomic_inc_and_max(global_page_count); |
keir@19684 | 326 return pfp; |
keir@19684 | 327 } |
keir@19684 | 328 |
keir@19684 | 329 static NOINLINE void tmem_page_free(pool_t *pool, pfp_t *pfp) |
keir@19684 | 330 { |
keir@19684 | 331 ASSERT(pfp); |
keir@19684 | 332 if ( pool == NULL || !is_persistent(pool) ) |
keir@19684 | 333 tmh_free_page(pfp); |
keir@19684 | 334 else |
keir@19684 | 335 tmh_free_page_thispool(pool,pfp); |
keir@19684 | 336 atomic_dec_and_assert(global_page_count); |
keir@19684 | 337 } |
keir@19684 | 338 |
keir@19684 | 339 /************ PAGE DESCRIPTOR MANIPULATION ROUTINES *******************/ |
keir@19684 | 340 |
keir@19684 | 341 /* allocate a pgp_t and associate it with an object */ |
keir@19684 | 342 static NOINLINE pgp_t *pgp_alloc(obj_t *obj) |
keir@19684 | 343 { |
keir@19684 | 344 pgp_t *pgp; |
keir@19684 | 345 pool_t *pool; |
keir@19684 | 346 |
keir@19684 | 347 ASSERT(obj != NULL); |
keir@19684 | 348 ASSERT(obj->pool != NULL); |
keir@19684 | 349 pool = obj->pool; |
keir@19684 | 350 if ( (pgp = tmem_malloc(pgp_t, pool)) == NULL ) |
keir@19684 | 351 return NULL; |
keir@19684 | 352 pgp->obj = obj; |
keir@19684 | 353 INIT_LIST_HEAD(&pgp->global_eph_pages); |
keir@19684 | 354 INIT_LIST_HEAD(&pgp->client_eph_pages); |
keir@19684 | 355 pgp->pfp = NULL; |
keir@19684 | 356 pgp->size = -1; |
keir@19684 | 357 pgp->index = -1; |
keir@19684 | 358 pgp->timestamp = get_cycles(); |
keir@19684 | 359 SET_SENTINEL(pgp,PGD); |
keir@19684 | 360 atomic_inc_and_max(global_pgp_count); |
keir@19684 | 361 atomic_inc_and_max(pool->pgp_count); |
keir@19684 | 362 return pgp; |
keir@19684 | 363 } |
keir@19684 | 364 |
keir@19684 | 365 static pgp_t *pgp_lookup_in_obj(obj_t *obj, uint32_t index) |
keir@19684 | 366 { |
keir@19684 | 367 ASSERT(obj != NULL); |
keir@19684 | 368 ASSERT_SPINLOCK(&obj->obj_spinlock); |
keir@19684 | 369 ASSERT_SENTINEL(obj,OBJ); |
keir@19684 | 370 ASSERT(obj->pool != NULL); |
keir@19684 | 371 ASSERT_SENTINEL(obj->pool,POOL); |
keir@19684 | 372 return radix_tree_lookup(&obj->tree_root, index); |
keir@19684 | 373 } |
keir@19684 | 374 |
keir@19684 | 375 static NOINLINE void pgp_free_data(pgp_t *pgp, pool_t *pool) |
keir@19684 | 376 { |
keir@19684 | 377 if ( pgp->pfp == NULL ) |
keir@19684 | 378 return; |
keir@19684 | 379 if ( !pgp->size ) |
keir@19684 | 380 tmem_page_free(pgp->obj->pool,pgp->pfp); |
keir@19684 | 381 else |
keir@19684 | 382 { |
keir@19684 | 383 tmem_free(pgp->cdata,pgp->size,pool); |
keir@19684 | 384 if ( pool != NULL ) |
keir@19684 | 385 { |
keir@19684 | 386 pool->client->compressed_pages--; |
keir@19684 | 387 pool->client->compressed_sum_size -= pgp->size; |
keir@19684 | 388 } |
keir@19684 | 389 } |
keir@19684 | 390 pgp->pfp = NULL; |
keir@19684 | 391 pgp->size = -1; |
keir@19684 | 392 } |
keir@19684 | 393 |
keir@19684 | 394 static NOINLINE void pgp_free(pgp_t *pgp, int from_delete) |
keir@19684 | 395 { |
keir@19684 | 396 pool_t *pool = NULL; |
keir@19684 | 397 |
keir@19684 | 398 ASSERT_SENTINEL(pgp,PGD); |
keir@19684 | 399 ASSERT(pgp->obj != NULL); |
keir@19684 | 400 ASSERT_SENTINEL(pgp->obj,OBJ); |
keir@19684 | 401 ASSERT_SENTINEL(pgp->obj->pool,POOL); |
keir@20067 | 402 ASSERT(pgp->obj->pool->client != NULL); |
keir@19684 | 403 if ( from_delete ) |
keir@19684 | 404 ASSERT(pgp_lookup_in_obj(pgp->obj,pgp->index) == NULL); |
keir@19684 | 405 ASSERT(pgp->obj->pool != NULL); |
keir@19684 | 406 pool = pgp->obj->pool; |
keir@20067 | 407 if ( is_ephemeral(pool) ) |
keir@20067 | 408 { |
keir@20067 | 409 ASSERT(list_empty(&pgp->global_eph_pages)); |
keir@20067 | 410 ASSERT(list_empty(&pgp->client_eph_pages)); |
keir@20067 | 411 } |
keir@19684 | 412 pgp_free_data(pgp, pool); |
keir@20067 | 413 atomic_dec_and_assert(global_pgp_count); |
keir@20067 | 414 atomic_dec_and_assert(pool->pgp_count); |
keir@20067 | 415 pgp->size = -1; |
keir@20067 | 416 if ( is_persistent(pool) && pool->client->live_migrating ) |
keir@20067 | 417 { |
keir@20067 | 418 pgp->inv_oid = pgp->obj->oid; |
keir@20067 | 419 pgp->pool_id = pool->pool_id; |
keir@20067 | 420 return; |
keir@20067 | 421 } |
keir@19684 | 422 INVERT_SENTINEL(pgp,PGD); |
keir@19684 | 423 pgp->obj = NULL; |
keir@19684 | 424 pgp->index = -1; |
keir@20067 | 425 tmem_free(pgp,sizeof(pgp_t),pool); |
keir@20067 | 426 } |
keir@20067 | 427 |
keir@20067 | 428 static NOINLINE void pgp_free_from_inv_list(client_t *client, pgp_t *pgp) |
keir@20067 | 429 { |
keir@20067 | 430 pool_t *pool = client->pools[pgp->pool_id]; |
keir@20067 | 431 |
keir@20067 | 432 ASSERT_SENTINEL(pool,POOL); |
keir@20067 | 433 ASSERT_SENTINEL(pgp,PGD); |
keir@20067 | 434 INVERT_SENTINEL(pgp,PGD); |
keir@20067 | 435 pgp->obj = NULL; |
keir@20067 | 436 pgp->index = -1; |
keir@19684 | 437 tmem_free(pgp,sizeof(pgp_t),pool); |
keir@19684 | 438 } |
keir@19684 | 439 |
keir@19684 | 440 /* remove the page from appropriate lists but not from parent object */ |
keir@19684 | 441 static void pgp_delist(pgp_t *pgp, bool_t no_eph_lock) |
keir@19684 | 442 { |
keir@20067 | 443 client_t *client; |
keir@20067 | 444 |
keir@19684 | 445 ASSERT(pgp != NULL); |
keir@19684 | 446 ASSERT(pgp->obj != NULL); |
keir@19684 | 447 ASSERT(pgp->obj->pool != NULL); |
keir@20067 | 448 client = pgp->obj->pool->client; |
keir@20067 | 449 ASSERT(client != NULL); |
keir@19684 | 450 if ( is_ephemeral(pgp->obj->pool) ) |
keir@19684 | 451 { |
keir@19684 | 452 if ( !no_eph_lock ) |
keir@19684 | 453 tmem_spin_lock(&eph_lists_spinlock); |
keir@19684 | 454 if ( !list_empty(&pgp->client_eph_pages) ) |
keir@20067 | 455 client->eph_count--; |
keir@20067 | 456 ASSERT(client->eph_count >= 0); |
keir@19684 | 457 list_del_init(&pgp->client_eph_pages); |
keir@19684 | 458 if ( !list_empty(&pgp->global_eph_pages) ) |
keir@19684 | 459 global_eph_count--; |
keir@19684 | 460 ASSERT(global_eph_count >= 0); |
keir@19684 | 461 list_del_init(&pgp->global_eph_pages); |
keir@19684 | 462 if ( !no_eph_lock ) |
keir@19684 | 463 tmem_spin_unlock(&eph_lists_spinlock); |
keir@20067 | 464 } else { |
keir@20067 | 465 if ( client->live_migrating ) |
keir@20067 | 466 { |
keir@20067 | 467 tmem_spin_lock(&pers_lists_spinlock); |
keir@20067 | 468 list_add_tail(&pgp->client_inv_pages, |
keir@20067 | 469 &client->persistent_invalidated_list); |
keir@20067 | 470 if ( pgp != pgp->obj->pool->cur_pgp ) |
keir@20067 | 471 list_del_init(&pgp->pool_pers_pages); |
keir@20067 | 472 tmem_spin_unlock(&pers_lists_spinlock); |
keir@20067 | 473 } else { |
keir@20067 | 474 tmem_spin_lock(&pers_lists_spinlock); |
keir@20067 | 475 list_del_init(&pgp->pool_pers_pages); |
keir@20067 | 476 tmem_spin_unlock(&pers_lists_spinlock); |
keir@20067 | 477 } |
keir@19684 | 478 } |
keir@19684 | 479 } |
keir@19684 | 480 |
keir@19684 | 481 /* remove page from lists (but not from parent object) and free it */ |
keir@19684 | 482 static NOINLINE void pgp_delete(pgp_t *pgp, bool_t no_eph_lock) |
keir@19684 | 483 { |
keir@19684 | 484 uint64_t life; |
keir@19684 | 485 |
keir@19684 | 486 ASSERT(pgp != NULL); |
keir@19684 | 487 ASSERT(pgp->obj != NULL); |
keir@19684 | 488 ASSERT(pgp->obj->pool != NULL); |
keir@19684 | 489 life = get_cycles() - pgp->timestamp; |
keir@19684 | 490 pgp->obj->pool->sum_life_cycles += life; |
keir@19684 | 491 pgp_delist(pgp, no_eph_lock); |
keir@19684 | 492 pgp_free(pgp,1); |
keir@19684 | 493 } |
keir@19684 | 494 |
keir@19684 | 495 /* called only indirectly by radix_tree_destroy */ |
keir@19684 | 496 static NOINLINE void pgp_destroy(void *v) |
keir@19684 | 497 { |
keir@19684 | 498 pgp_t *pgp = (pgp_t *)v; |
keir@19684 | 499 |
keir@19684 | 500 ASSERT_SPINLOCK(&pgp->obj->obj_spinlock); |
keir@19684 | 501 pgp_delist(pgp,0); |
keir@19684 | 502 ASSERT(pgp->obj != NULL); |
keir@19684 | 503 pgp->obj->pgp_count--; |
keir@19684 | 504 ASSERT(pgp->obj->pgp_count >= 0); |
keir@19684 | 505 pgp_free(pgp,0); |
keir@19684 | 506 } |
keir@19684 | 507 |
keir@19684 | 508 FORWARD static rtn_t *rtn_alloc(void *arg); |
keir@19684 | 509 FORWARD static void rtn_free(rtn_t *rtn); |
keir@19684 | 510 |
keir@19684 | 511 static int pgp_add_to_obj(obj_t *obj, uint32_t index, pgp_t *pgp) |
keir@19684 | 512 { |
keir@19684 | 513 int ret; |
keir@19684 | 514 |
keir@19684 | 515 ASSERT_SPINLOCK(&obj->obj_spinlock); |
keir@19684 | 516 ret = radix_tree_insert(&obj->tree_root, index, pgp, rtn_alloc, obj); |
keir@19684 | 517 if ( !ret ) |
keir@19684 | 518 obj->pgp_count++; |
keir@19684 | 519 return ret; |
keir@19684 | 520 } |
keir@19684 | 521 |
keir@19684 | 522 static NOINLINE pgp_t *pgp_delete_from_obj(obj_t *obj, uint32_t index) |
keir@19684 | 523 { |
keir@19684 | 524 pgp_t *pgp; |
keir@19684 | 525 |
keir@19684 | 526 ASSERT(obj != NULL); |
keir@19684 | 527 ASSERT_SPINLOCK(&obj->obj_spinlock); |
keir@19684 | 528 ASSERT_SENTINEL(obj,OBJ); |
keir@19684 | 529 ASSERT(obj->pool != NULL); |
keir@19684 | 530 ASSERT_SENTINEL(obj->pool,POOL); |
keir@19684 | 531 pgp = radix_tree_delete(&obj->tree_root, index, rtn_free); |
keir@19684 | 532 if ( pgp != NULL ) |
keir@19684 | 533 obj->pgp_count--; |
keir@19684 | 534 ASSERT(obj->pgp_count >= 0); |
keir@19684 | 535 |
keir@19684 | 536 return pgp; |
keir@19684 | 537 } |
keir@19684 | 538 |
keir@19684 | 539 /************ RADIX TREE NODE MANIPULATION ROUTINES *******************/ |
keir@19684 | 540 |
keir@19684 | 541 /* called only indirectly from radix_tree_insert */ |
keir@19684 | 542 static NOINLINE rtn_t *rtn_alloc(void *arg) |
keir@19684 | 543 { |
keir@19684 | 544 objnode_t *objnode; |
keir@19684 | 545 obj_t *obj = (obj_t *)arg; |
keir@19684 | 546 |
keir@19684 | 547 ASSERT_SENTINEL(obj,OBJ); |
keir@19684 | 548 ASSERT(obj->pool != NULL); |
keir@19684 | 549 ASSERT_SENTINEL(obj->pool,POOL); |
keir@19684 | 550 objnode = tmem_malloc(objnode_t,obj->pool); |
keir@19684 | 551 if (objnode == NULL) |
keir@19684 | 552 return NULL; |
keir@19684 | 553 objnode->obj = obj; |
keir@19684 | 554 SET_SENTINEL(objnode,OBJNODE); |
keir@19684 | 555 memset(&objnode->rtn, 0, sizeof(rtn_t)); |
keir@19684 | 556 if (++obj->pool->objnode_count > obj->pool->objnode_count_max) |
keir@19684 | 557 obj->pool->objnode_count_max = obj->pool->objnode_count; |
keir@19684 | 558 atomic_inc_and_max(global_rtree_node_count); |
keir@19684 | 559 obj->objnode_count++; |
keir@19684 | 560 return &objnode->rtn; |
keir@19684 | 561 } |
keir@19684 | 562 |
keir@19684 | 563 /* called only indirectly from radix_tree_delete/destroy */ |
keir@19684 | 564 static void rtn_free(rtn_t *rtn) |
keir@19684 | 565 { |
keir@19684 | 566 pool_t *pool; |
keir@19684 | 567 objnode_t *objnode; |
keir@19684 | 568 int i; |
keir@19684 | 569 |
keir@19684 | 570 ASSERT(rtn != NULL); |
keir@19684 | 571 for (i = 0; i < RADIX_TREE_MAP_SIZE; i++) |
keir@19684 | 572 ASSERT(rtn->slots[i] == NULL); |
keir@19684 | 573 objnode = container_of(rtn,objnode_t,rtn); |
keir@19684 | 574 ASSERT_SENTINEL(objnode,OBJNODE); |
keir@19684 | 575 INVERT_SENTINEL(objnode,OBJNODE); |
keir@19684 | 576 ASSERT(objnode->obj != NULL); |
keir@19684 | 577 ASSERT_SPINLOCK(&objnode->obj->obj_spinlock); |
keir@19684 | 578 ASSERT_SENTINEL(objnode->obj,OBJ); |
keir@19684 | 579 pool = objnode->obj->pool; |
keir@19684 | 580 ASSERT(pool != NULL); |
keir@19684 | 581 ASSERT_SENTINEL(pool,POOL); |
keir@19684 | 582 pool->objnode_count--; |
keir@19684 | 583 objnode->obj->objnode_count--; |
keir@19684 | 584 objnode->obj = NULL; |
keir@19684 | 585 tmem_free(objnode,sizeof(objnode_t),pool); |
keir@19684 | 586 atomic_dec_and_assert(global_rtree_node_count); |
keir@19684 | 587 } |
keir@19684 | 588 |
keir@19684 | 589 /************ POOL OBJECT COLLECTION MANIPULATION ROUTINES *******************/ |
keir@19684 | 590 |
keir@19684 | 591 /* searches for object==oid in pool, returns locked object if found */ |
keir@19684 | 592 static NOINLINE obj_t * obj_find(pool_t *pool, uint64_t oid) |
keir@19684 | 593 { |
keir@19684 | 594 struct rb_node *node; |
keir@19684 | 595 obj_t *obj; |
keir@19684 | 596 |
keir@19684 | 597 restart_find: |
keir@19684 | 598 tmem_read_lock(&pool->pool_rwlock); |
keir@19684 | 599 node = pool->obj_rb_root[OBJ_HASH(oid)].rb_node; |
keir@19684 | 600 while ( node ) |
keir@19684 | 601 { |
keir@19684 | 602 obj = container_of(node, obj_t, rb_tree_node); |
keir@19684 | 603 if ( obj->oid == oid ) |
keir@19684 | 604 { |
keir@19684 | 605 if ( tmh_lock_all ) |
keir@19684 | 606 obj->no_evict = 1; |
keir@19684 | 607 else |
keir@19684 | 608 { |
keir@19684 | 609 if ( !tmem_spin_trylock(&obj->obj_spinlock) ) |
keir@19684 | 610 { |
keir@19684 | 611 tmem_read_unlock(&pool->pool_rwlock); |
keir@19684 | 612 goto restart_find; |
keir@19684 | 613 } |
keir@19684 | 614 tmem_read_unlock(&pool->pool_rwlock); |
keir@19684 | 615 } |
keir@19684 | 616 return obj; |
keir@19684 | 617 } |
keir@19684 | 618 else if ( oid < obj->oid ) |
keir@19684 | 619 node = node->rb_left; |
keir@19684 | 620 else |
keir@19684 | 621 node = node->rb_right; |
keir@19684 | 622 } |
keir@19684 | 623 tmem_read_unlock(&pool->pool_rwlock); |
keir@19684 | 624 return NULL; |
keir@19684 | 625 } |
keir@19684 | 626 |
keir@19684 | 627 /* free an object that has no more pgps in it */ |
keir@19684 | 628 static NOINLINE void obj_free(obj_t *obj, int no_rebalance) |
keir@19684 | 629 { |
keir@19684 | 630 pool_t *pool; |
keir@19684 | 631 uint64_t old_oid; |
keir@19684 | 632 |
keir@19684 | 633 ASSERT_SPINLOCK(&obj->obj_spinlock); |
keir@19684 | 634 ASSERT(obj != NULL); |
keir@19684 | 635 ASSERT_SENTINEL(obj,OBJ); |
keir@19684 | 636 ASSERT(obj->pgp_count == 0); |
keir@19684 | 637 pool = obj->pool; |
keir@19684 | 638 ASSERT(pool != NULL); |
keir@20067 | 639 ASSERT(pool->client != NULL); |
keir@19684 | 640 ASSERT_WRITELOCK(&pool->pool_rwlock); |
keir@19684 | 641 if ( obj->tree_root.rnode != NULL ) /* may be a "stump" with no leaves */ |
keir@19684 | 642 radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free); |
keir@19684 | 643 ASSERT((long)obj->objnode_count == 0); |
keir@19684 | 644 ASSERT(obj->tree_root.rnode == NULL); |
keir@19684 | 645 pool->obj_count--; |
keir@19684 | 646 ASSERT(pool->obj_count >= 0); |
keir@19684 | 647 INVERT_SENTINEL(obj,OBJ); |
keir@19684 | 648 obj->pool = NULL; |
keir@19684 | 649 old_oid = obj->oid; |
keir@19684 | 650 obj->oid = -1; |
keir@19684 | 651 obj->last_client = CLI_ID_NULL; |
keir@19684 | 652 atomic_dec_and_assert(global_obj_count); |
keir@19684 | 653 /* use no_rebalance only if all objects are being destroyed anyway */ |
keir@19684 | 654 if ( !no_rebalance ) |
keir@19684 | 655 rb_erase(&obj->rb_tree_node,&pool->obj_rb_root[OBJ_HASH(old_oid)]); |
keir@19684 | 656 tmem_free(obj,sizeof(obj_t),pool); |
keir@19684 | 657 } |
keir@19684 | 658 |
keir@19684 | 659 static NOINLINE int obj_rb_insert(struct rb_root *root, obj_t *obj) |
keir@19684 | 660 { |
keir@19684 | 661 struct rb_node **new, *parent = NULL; |
keir@19684 | 662 obj_t *this; |
keir@19684 | 663 |
keir@19684 | 664 new = &(root->rb_node); |
keir@19684 | 665 while ( *new ) |
keir@19684 | 666 { |
keir@19684 | 667 this = container_of(*new, obj_t, rb_tree_node); |
keir@19684 | 668 parent = *new; |
keir@19684 | 669 if ( obj->oid < this->oid ) |
keir@19684 | 670 new = &((*new)->rb_left); |
keir@19684 | 671 else if ( obj->oid > this->oid ) |
keir@19684 | 672 new = &((*new)->rb_right); |
keir@19684 | 673 else |
keir@19684 | 674 return 0; |
keir@19684 | 675 } |
keir@19684 | 676 rb_link_node(&obj->rb_tree_node, parent, new); |
keir@19684 | 677 rb_insert_color(&obj->rb_tree_node, root); |
keir@19684 | 678 return 1; |
keir@19684 | 679 } |
keir@19684 | 680 |
keir@19684 | 681 /* |
keir@19684 | 682 * allocate, initialize, and insert an tmem_object_root |
keir@19684 | 683 * (should be called only if find failed) |
keir@19684 | 684 */ |
keir@19684 | 685 static NOINLINE obj_t * obj_new(pool_t *pool, uint64_t oid) |
keir@19684 | 686 { |
keir@19684 | 687 obj_t *obj; |
keir@19684 | 688 |
keir@19684 | 689 ASSERT(pool != NULL); |
keir@19684 | 690 ASSERT_WRITELOCK(&pool->pool_rwlock); |
keir@19684 | 691 if ( (obj = tmem_malloc(obj_t,pool)) == NULL ) |
keir@19684 | 692 return NULL; |
keir@19684 | 693 pool->obj_count++; |
keir@19684 | 694 if (pool->obj_count > pool->obj_count_max) |
keir@19684 | 695 pool->obj_count_max = pool->obj_count; |
keir@19684 | 696 atomic_inc_and_max(global_obj_count); |
keir@19684 | 697 INIT_RADIX_TREE(&obj->tree_root,0); |
keir@19684 | 698 spin_lock_init(&obj->obj_spinlock); |
keir@19684 | 699 obj->pool = pool; |
keir@19684 | 700 obj->oid = oid; |
keir@19684 | 701 obj->objnode_count = 0; |
keir@19684 | 702 obj->pgp_count = 0; |
keir@19684 | 703 obj->last_client = CLI_ID_NULL; |
keir@19684 | 704 SET_SENTINEL(obj,OBJ); |
keir@19684 | 705 tmem_spin_lock(&obj->obj_spinlock); |
keir@19684 | 706 obj_rb_insert(&pool->obj_rb_root[OBJ_HASH(oid)], obj); |
keir@19684 | 707 obj->no_evict = 1; |
keir@19684 | 708 ASSERT_SPINLOCK(&obj->obj_spinlock); |
keir@19684 | 709 return obj; |
keir@19684 | 710 } |
keir@19684 | 711 |
keir@19684 | 712 /* free an object after destroying any pgps in it */ |
keir@19734 | 713 static NOINLINE void obj_destroy(obj_t *obj, int no_rebalance) |
keir@19684 | 714 { |
keir@19684 | 715 ASSERT_WRITELOCK(&obj->pool->pool_rwlock); |
keir@19684 | 716 radix_tree_destroy(&obj->tree_root, pgp_destroy, rtn_free); |
keir@19734 | 717 obj_free(obj,no_rebalance); |
keir@19684 | 718 } |
keir@19684 | 719 |
keir@19734 | 720 /* destroys all objs in a pool, or only if obj->last_client matches cli_id */ |
keir@19734 | 721 static void pool_destroy_objs(pool_t *pool, bool_t selective, cli_id_t cli_id) |
keir@19684 | 722 { |
keir@19684 | 723 struct rb_node *node; |
keir@19684 | 724 obj_t *obj; |
keir@19684 | 725 int i; |
keir@19684 | 726 |
keir@19684 | 727 tmem_write_lock(&pool->pool_rwlock); |
keir@20535 | 728 pool->is_dying = 1; |
keir@19684 | 729 for (i = 0; i < OBJ_HASH_BUCKETS; i++) |
keir@19684 | 730 { |
keir@19684 | 731 node = rb_first(&pool->obj_rb_root[i]); |
keir@19684 | 732 while ( node != NULL ) |
keir@19684 | 733 { |
keir@19684 | 734 obj = container_of(node, obj_t, rb_tree_node); |
keir@19684 | 735 tmem_spin_lock(&obj->obj_spinlock); |
keir@19684 | 736 node = rb_next(node); |
keir@19734 | 737 ASSERT(obj->no_evict == 0); |
keir@19734 | 738 if ( !selective ) |
keir@20535 | 739 /* FIXME: should be obj,1 but walking/erasing rbtree is racy */ |
keir@20535 | 740 obj_destroy(obj,0); |
keir@19734 | 741 else if ( obj->last_client == cli_id ) |
keir@19734 | 742 obj_destroy(obj,0); |
keir@19684 | 743 else |
keir@19684 | 744 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 745 } |
keir@19684 | 746 } |
keir@19684 | 747 tmem_write_unlock(&pool->pool_rwlock); |
keir@19684 | 748 } |
keir@19684 | 749 |
keir@19684 | 750 |
keir@19684 | 751 /************ POOL MANIPULATION ROUTINES ******************************/ |
keir@19684 | 752 |
keir@19684 | 753 static pool_t * pool_alloc(void) |
keir@19684 | 754 { |
keir@19684 | 755 pool_t *pool; |
keir@19684 | 756 int i; |
keir@19684 | 757 |
keir@20079 | 758 if ( (pool = tmh_alloc_infra(sizeof(pool_t),__alignof__(pool_t))) == NULL ) |
keir@19684 | 759 return NULL; |
keir@19684 | 760 for (i = 0; i < OBJ_HASH_BUCKETS; i++) |
keir@19684 | 761 pool->obj_rb_root[i] = RB_ROOT; |
keir@19684 | 762 INIT_LIST_HEAD(&pool->pool_list); |
keir@20067 | 763 INIT_LIST_HEAD(&pool->persistent_page_list); |
keir@20067 | 764 pool->cur_pgp = NULL; |
keir@19684 | 765 rwlock_init(&pool->pool_rwlock); |
keir@19684 | 766 pool->pgp_count_max = pool->obj_count_max = 0; |
keir@19684 | 767 pool->objnode_count = pool->objnode_count_max = 0; |
keir@19684 | 768 atomic_set(&pool->pgp_count,0); |
keir@20067 | 769 pool->obj_count = 0; pool->shared_count = 0; |
keir@20067 | 770 pool->pageshift = PAGE_SHIFT - 12; |
keir@19684 | 771 pool->good_puts = pool->puts = pool->dup_puts_flushed = 0; |
keir@19684 | 772 pool->dup_puts_replaced = pool->no_mem_puts = 0; |
keir@19684 | 773 pool->found_gets = pool->gets = 0; |
keir@19684 | 774 pool->flushs_found = pool->flushs = 0; |
keir@19684 | 775 pool->flush_objs_found = pool->flush_objs = 0; |
keir@20535 | 776 pool->is_dying = 0; |
keir@19684 | 777 SET_SENTINEL(pool,POOL); |
keir@19684 | 778 return pool; |
keir@19684 | 779 } |
keir@19684 | 780 |
keir@19684 | 781 static NOINLINE void pool_free(pool_t *pool) |
keir@19684 | 782 { |
keir@19684 | 783 ASSERT_SENTINEL(pool,POOL); |
keir@19684 | 784 INVERT_SENTINEL(pool,POOL); |
keir@19684 | 785 pool->client = NULL; |
keir@19684 | 786 list_del(&pool->pool_list); |
keir@20079 | 787 tmh_free_infra(pool); |
keir@19684 | 788 } |
keir@19684 | 789 |
keir@19684 | 790 /* register new_client as a user of this shared pool and return new |
keir@19684 | 791 total number of registered users */ |
keir@19684 | 792 static int shared_pool_join(pool_t *pool, client_t *new_client) |
keir@19684 | 793 { |
keir@19684 | 794 sharelist_t *sl; |
keir@19684 | 795 |
keir@19684 | 796 ASSERT(is_shared(pool)); |
keir@19684 | 797 if ( (sl = tmem_malloc(sharelist_t,NULL)) == NULL ) |
keir@19684 | 798 return -1; |
keir@19684 | 799 sl->client = new_client; |
keir@19684 | 800 list_add_tail(&sl->share_list, &pool->share_list); |
keir@19734 | 801 if ( new_client->cli_id != pool->client->cli_id ) |
keir@19734 | 802 printk("adding new %s %d to shared pool owned by %s %d\n", |
keir@19734 | 803 client_str, new_client->cli_id, client_str, pool->client->cli_id); |
keir@19684 | 804 return ++pool->shared_count; |
keir@19684 | 805 } |
keir@19684 | 806 |
keir@19684 | 807 /* reassign "ownership" of the pool to another client that shares this pool */ |
keir@19684 | 808 static NOINLINE void shared_pool_reassign(pool_t *pool) |
keir@19684 | 809 { |
keir@19684 | 810 sharelist_t *sl; |
keir@19684 | 811 int poolid; |
keir@19684 | 812 client_t *old_client = pool->client, *new_client; |
keir@19684 | 813 |
keir@19684 | 814 ASSERT(is_shared(pool)); |
keir@19684 | 815 if ( list_empty(&pool->share_list) ) |
keir@19684 | 816 { |
keir@19684 | 817 ASSERT(pool->shared_count == 0); |
keir@19684 | 818 return; |
keir@19684 | 819 } |
keir@19684 | 820 old_client->pools[pool->pool_id] = NULL; |
keir@19684 | 821 sl = list_entry(pool->share_list.next, sharelist_t, share_list); |
keir@19684 | 822 ASSERT(sl->client != old_client); |
keir@19684 | 823 pool->client = new_client = sl->client; |
keir@19684 | 824 for (poolid = 0; poolid < MAX_POOLS_PER_DOMAIN; poolid++) |
keir@19684 | 825 if (new_client->pools[poolid] == pool) |
keir@19684 | 826 break; |
keir@19684 | 827 ASSERT(poolid != MAX_POOLS_PER_DOMAIN); |
keir@19734 | 828 new_client->eph_count += _atomic_read(pool->pgp_count); |
keir@19734 | 829 old_client->eph_count -= _atomic_read(pool->pgp_count); |
keir@19734 | 830 list_splice_init(&old_client->ephemeral_page_list, |
keir@19734 | 831 &new_client->ephemeral_page_list); |
keir@19684 | 832 printk("reassigned shared pool from %s=%d to %s=%d pool_id=%d\n", |
keir@19684 | 833 cli_id_str, old_client->cli_id, cli_id_str, new_client->cli_id, poolid); |
keir@19684 | 834 pool->pool_id = poolid; |
keir@19684 | 835 } |
keir@19684 | 836 |
keir@19684 | 837 /* destroy all objects with last_client same as passed cli_id, |
keir@19684 | 838 remove pool's cli_id from list of sharers of this pool */ |
keir@19684 | 839 static NOINLINE int shared_pool_quit(pool_t *pool, cli_id_t cli_id) |
keir@19684 | 840 { |
keir@19684 | 841 sharelist_t *sl; |
keir@19684 | 842 int s_poolid; |
keir@19684 | 843 |
keir@19684 | 844 ASSERT(is_shared(pool)); |
keir@19684 | 845 ASSERT(pool->client != NULL); |
keir@19684 | 846 |
keir@19734 | 847 ASSERT_WRITELOCK(&tmem_rwlock); |
keir@19734 | 848 pool_destroy_objs(pool,1,cli_id); |
keir@19684 | 849 list_for_each_entry(sl,&pool->share_list, share_list) |
keir@19684 | 850 { |
keir@19684 | 851 if (sl->client->cli_id != cli_id) |
keir@19684 | 852 continue; |
keir@19684 | 853 list_del(&sl->share_list); |
keir@19684 | 854 tmem_free(sl,sizeof(sharelist_t),pool); |
keir@19684 | 855 --pool->shared_count; |
keir@19684 | 856 if (pool->client->cli_id == cli_id) |
keir@19684 | 857 shared_pool_reassign(pool); |
keir@19684 | 858 if (pool->shared_count) |
keir@19684 | 859 return pool->shared_count; |
keir@19684 | 860 for (s_poolid = 0; s_poolid < MAX_GLOBAL_SHARED_POOLS; s_poolid++) |
keir@19684 | 861 if ( (global_shared_pools[s_poolid]) == pool ) |
keir@19684 | 862 { |
keir@19684 | 863 global_shared_pools[s_poolid] = NULL; |
keir@19684 | 864 break; |
keir@19684 | 865 } |
keir@19684 | 866 return 0; |
keir@19684 | 867 } |
keir@19684 | 868 printk("tmem: no match unsharing pool, %s=%d\n", |
keir@19684 | 869 cli_id_str,pool->client->cli_id); |
keir@19684 | 870 return -1; |
keir@19684 | 871 } |
keir@19684 | 872 |
keir@19684 | 873 /* flush all data (owned by cli_id) from a pool and, optionally, free it */ |
keir@19684 | 874 static void pool_flush(pool_t *pool, cli_id_t cli_id, bool_t destroy) |
keir@19684 | 875 { |
keir@19684 | 876 ASSERT(pool != NULL); |
keir@19684 | 877 if ( (is_shared(pool)) && (shared_pool_quit(pool,cli_id) > 0) ) |
keir@19684 | 878 { |
keir@19734 | 879 printk("tmem: %s=%d no longer using shared pool %d owned by %s=%d\n", |
keir@19734 | 880 cli_id_str, cli_id, pool->pool_id, cli_id_str,pool->client->cli_id); |
keir@19684 | 881 return; |
keir@19684 | 882 } |
keir@19684 | 883 printk("%s %s-%s tmem pool ",destroy?"destroying":"flushing", |
keir@19684 | 884 is_persistent(pool) ? "persistent" : "ephemeral" , |
keir@19684 | 885 is_shared(pool) ? "shared" : "private"); |
keir@19684 | 886 printk("%s=%d pool_id=%d\n", cli_id_str,pool->client->cli_id,pool->pool_id); |
keir@20067 | 887 if ( pool->client->live_migrating ) |
keir@20067 | 888 { |
keir@20067 | 889 printk("can't %s pool while %s is live-migrating\n", |
keir@20067 | 890 destroy?"destroy":"flush", client_str); |
keir@20067 | 891 return; |
keir@20067 | 892 } |
keir@19734 | 893 pool_destroy_objs(pool,0,CLI_ID_NULL); |
keir@19684 | 894 if ( destroy ) |
keir@19684 | 895 { |
keir@19684 | 896 pool->client->pools[pool->pool_id] = NULL; |
keir@19684 | 897 pool_free(pool); |
keir@19684 | 898 } |
keir@19684 | 899 } |
keir@19684 | 900 |
keir@19684 | 901 /************ CLIENT MANIPULATION OPERATIONS **************************/ |
keir@19684 | 902 |
keir@20067 | 903 static client_t *client_create(cli_id_t cli_id) |
keir@19684 | 904 { |
keir@20079 | 905 client_t *client = tmh_alloc_infra(sizeof(client_t),__alignof__(client_t)); |
keir@20067 | 906 int i; |
keir@19684 | 907 |
keir@19684 | 908 printk("tmem: initializing tmem capability for %s=%d...",cli_id_str,cli_id); |
keir@19684 | 909 if ( client == NULL ) |
keir@19684 | 910 { |
keir@19684 | 911 printk("failed... out of memory\n"); |
keir@19684 | 912 return NULL; |
keir@19684 | 913 } |
keir@19684 | 914 memset(client,0,sizeof(client_t)); |
keir@20964 | 915 if ( (client->tmh = tmh_client_init(cli_id)) == NULL ) |
keir@19684 | 916 { |
keir@19684 | 917 printk("failed... can't allocate host-dependent part of client\n"); |
keir@19684 | 918 if ( client ) |
keir@20079 | 919 tmh_free_infra(client); |
keir@19684 | 920 return NULL; |
keir@19684 | 921 } |
keir@20964 | 922 tmh_set_client_from_id(client, client->tmh, cli_id); |
keir@19684 | 923 client->cli_id = cli_id; |
keir@19684 | 924 #ifdef __i386__ |
keir@19684 | 925 client->compress = 0; |
keir@19684 | 926 #else |
keir@19684 | 927 client->compress = tmh_compression_enabled(); |
keir@19684 | 928 #endif |
keir@20067 | 929 client->shared_auth_required = tmh_shared_auth(); |
keir@20067 | 930 for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) |
keir@20067 | 931 client->shared_auth_uuid[i][0] = |
keir@20067 | 932 client->shared_auth_uuid[i][1] = -1L; |
keir@20067 | 933 client->frozen = 0; client->live_migrating = 0; |
keir@20067 | 934 client->weight = 0; client->cap = 0; |
keir@19684 | 935 list_add_tail(&client->client_list, &global_client_list); |
keir@19684 | 936 INIT_LIST_HEAD(&client->ephemeral_page_list); |
keir@20067 | 937 INIT_LIST_HEAD(&client->persistent_invalidated_list); |
keir@20067 | 938 client->cur_pgp = NULL; |
keir@19684 | 939 client->eph_count = client->eph_count_max = 0; |
keir@19897 | 940 client->total_cycles = 0; client->succ_pers_puts = 0; |
keir@19897 | 941 client->succ_eph_gets = 0; client->succ_pers_gets = 0; |
keir@19684 | 942 printk("ok\n"); |
keir@19684 | 943 return client; |
keir@19684 | 944 } |
keir@19684 | 945 |
keir@19684 | 946 static void client_free(client_t *client) |
keir@19684 | 947 { |
keir@19684 | 948 list_del(&client->client_list); |
keir@19684 | 949 tmh_client_destroy(client->tmh); |
keir@20507 | 950 tmh_free_infra(client); |
keir@19684 | 951 } |
keir@19684 | 952 |
keir@19684 | 953 /* flush all data from a client and, optionally, free it */ |
keir@19684 | 954 static void client_flush(client_t *client, bool_t destroy) |
keir@19684 | 955 { |
keir@19684 | 956 int i; |
keir@19684 | 957 pool_t *pool; |
keir@19684 | 958 |
keir@19684 | 959 for (i = 0; i < MAX_POOLS_PER_DOMAIN; i++) |
keir@19684 | 960 { |
keir@19684 | 961 if ( (pool = client->pools[i]) == NULL ) |
keir@19684 | 962 continue; |
keir@19684 | 963 pool_flush(pool,client->cli_id,destroy); |
keir@19684 | 964 if ( destroy ) |
keir@19684 | 965 client->pools[i] = NULL; |
keir@19684 | 966 } |
keir@19684 | 967 if ( destroy ) |
keir@19684 | 968 client_free(client); |
keir@19684 | 969 } |
keir@19684 | 970 |
keir@19684 | 971 static bool_t client_over_quota(client_t *client) |
keir@19684 | 972 { |
keir@19684 | 973 int total = _atomic_read(client_weight_total); |
keir@19684 | 974 |
keir@19684 | 975 ASSERT(client != NULL); |
keir@19684 | 976 if ( (total == 0) || (client->weight == 0) || |
keir@19684 | 977 (client->eph_count == 0) ) |
keir@19684 | 978 return 0; |
keir@19684 | 979 return ( ((global_eph_count*100L) / client->eph_count ) > |
keir@19684 | 980 ((total*100L) / client->weight) ); |
keir@19684 | 981 } |
keir@19684 | 982 |
keir@20067 | 983 static void client_freeze(client_t *client, int freeze) |
keir@20067 | 984 { |
keir@20067 | 985 client->frozen = freeze; |
keir@20067 | 986 } |
keir@20067 | 987 |
keir@19684 | 988 /************ MEMORY REVOCATION ROUTINES *******************************/ |
keir@19684 | 989 |
keir@19684 | 990 static int tmem_evict(void) |
keir@19684 | 991 { |
keir@19684 | 992 client_t *client = tmh_client_from_current(); |
keir@19684 | 993 pgp_t *pgp = NULL, *pgp_del; |
keir@19684 | 994 obj_t *obj; |
keir@19684 | 995 pool_t *pool; |
keir@19684 | 996 int ret = 0; |
keir@19684 | 997 bool_t hold_pool_rwlock = 0; |
keir@19684 | 998 |
keir@19684 | 999 evict_attempts++; |
keir@19684 | 1000 tmem_spin_lock(&eph_lists_spinlock); |
keir@19684 | 1001 if ( (client != NULL) && client_over_quota(client) && |
keir@19684 | 1002 !list_empty(&client->ephemeral_page_list) ) |
keir@19684 | 1003 { |
keir@19684 | 1004 list_for_each_entry(pgp,&client->ephemeral_page_list,client_eph_pages) |
keir@19684 | 1005 { |
keir@19684 | 1006 obj = pgp->obj; |
keir@19684 | 1007 pool = obj->pool; |
keir@20535 | 1008 if ( pool->is_dying ) |
keir@20535 | 1009 continue; |
keir@19684 | 1010 if ( tmh_lock_all && !obj->no_evict ) |
keir@19684 | 1011 goto found; |
keir@19684 | 1012 if ( tmem_spin_trylock(&obj->obj_spinlock) ) |
keir@19684 | 1013 { |
keir@19684 | 1014 if ( obj->pgp_count > 1 ) |
keir@19684 | 1015 goto found; |
keir@19684 | 1016 if ( tmem_write_trylock(&pool->pool_rwlock) ) |
keir@19684 | 1017 { |
keir@19684 | 1018 hold_pool_rwlock = 1; |
keir@19684 | 1019 goto found; |
keir@19684 | 1020 } |
keir@19684 | 1021 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 1022 } |
keir@19684 | 1023 } |
keir@19684 | 1024 } else if ( list_empty(&global_ephemeral_page_list) ) { |
keir@19684 | 1025 goto out; |
keir@19684 | 1026 } else { |
keir@19684 | 1027 list_for_each_entry(pgp,&global_ephemeral_page_list,global_eph_pages) |
keir@19684 | 1028 { |
keir@19684 | 1029 obj = pgp->obj; |
keir@19684 | 1030 pool = obj->pool; |
keir@20535 | 1031 if ( pool->is_dying ) |
keir@20535 | 1032 continue; |
keir@19684 | 1033 if ( tmh_lock_all && !obj->no_evict ) |
keir@19684 | 1034 goto found; |
keir@19684 | 1035 if ( tmem_spin_trylock(&obj->obj_spinlock) ) |
keir@19684 | 1036 { |
keir@19684 | 1037 if ( obj->pgp_count > 1 ) |
keir@19684 | 1038 goto found; |
keir@19684 | 1039 if ( tmem_write_trylock(&pool->pool_rwlock) ) |
keir@19684 | 1040 { |
keir@19684 | 1041 hold_pool_rwlock = 1; |
keir@19684 | 1042 goto found; |
keir@19684 | 1043 } |
keir@19684 | 1044 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 1045 } |
keir@19684 | 1046 } |
keir@19684 | 1047 } |
keir@19684 | 1048 |
keir@19684 | 1049 ret = 0; |
keir@19684 | 1050 goto out; |
keir@19684 | 1051 |
keir@19684 | 1052 found: |
keir@19684 | 1053 ASSERT(pgp != NULL); |
keir@19684 | 1054 ASSERT_SENTINEL(pgp,PGD); |
keir@19684 | 1055 obj = pgp->obj; |
keir@19684 | 1056 ASSERT(obj != NULL); |
keir@19684 | 1057 ASSERT(obj->no_evict == 0); |
keir@19684 | 1058 ASSERT(obj->pool != NULL); |
keir@19684 | 1059 ASSERT_SENTINEL(obj,OBJ); |
keir@19684 | 1060 |
keir@19684 | 1061 ASSERT_SPINLOCK(&obj->obj_spinlock); |
keir@19684 | 1062 pgp_del = pgp_delete_from_obj(obj, pgp->index); |
keir@19684 | 1063 ASSERT(pgp_del == pgp); |
keir@19684 | 1064 pgp_delete(pgp,1); |
keir@19684 | 1065 if ( obj->pgp_count == 0 ) |
keir@19684 | 1066 { |
keir@19684 | 1067 ASSERT_WRITELOCK(&pool->pool_rwlock); |
keir@19684 | 1068 obj_free(obj,0); |
keir@19684 | 1069 } |
keir@19684 | 1070 else |
keir@19684 | 1071 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 1072 if ( hold_pool_rwlock ) |
keir@19684 | 1073 tmem_write_unlock(&pool->pool_rwlock); |
keir@19684 | 1074 evicted_pgs++; |
keir@19684 | 1075 ret = 1; |
keir@19684 | 1076 |
keir@19684 | 1077 out: |
keir@19684 | 1078 tmem_spin_unlock(&eph_lists_spinlock); |
keir@19684 | 1079 return ret; |
keir@19684 | 1080 } |
keir@19684 | 1081 |
keir@19684 | 1082 static unsigned long tmem_relinquish_npages(unsigned long n) |
keir@19684 | 1083 { |
keir@19684 | 1084 unsigned long avail_pages = 0; |
keir@19684 | 1085 |
keir@19684 | 1086 while ( (avail_pages = tmh_avail_pages()) < n ) |
keir@19684 | 1087 { |
keir@19684 | 1088 if ( !tmem_evict() ) |
keir@19684 | 1089 break; |
keir@19684 | 1090 } |
keir@19684 | 1091 if ( avail_pages ) |
keir@19684 | 1092 tmh_release_avail_pages_to_host(); |
keir@19684 | 1093 return avail_pages; |
keir@19684 | 1094 } |
keir@19684 | 1095 |
keir@20648 | 1096 /* Under certain conditions (e.g. if each client is putting pages for exactly |
keir@20648 | 1097 * one object), once locks are held, freeing up memory may |
keir@20648 | 1098 * result in livelocks and very long "put" times, so we try to ensure there |
keir@20648 | 1099 * is a minimum amount of memory (1MB) available BEFORE any data structure |
keir@20648 | 1100 * locks are held */ |
keir@20648 | 1101 static inline void tmem_ensure_avail_pages(void) |
keir@20648 | 1102 { |
keir@20648 | 1103 int failed_evict = 10; |
keir@20648 | 1104 |
keir@20648 | 1105 while ( !tmh_free_mb() ) |
keir@20648 | 1106 { |
keir@20648 | 1107 if ( tmem_evict() ) |
keir@20648 | 1108 continue; |
keir@20648 | 1109 else if ( failed_evict-- <= 0 ) |
keir@20648 | 1110 break; |
keir@20648 | 1111 } |
keir@20648 | 1112 } |
keir@20648 | 1113 |
keir@19684 | 1114 /************ TMEM CORE OPERATIONS ************************************/ |
keir@19684 | 1115 |
keir@20067 | 1116 static NOINLINE int do_tmem_put_compress(pgp_t *pgp, tmem_cli_mfn_t cmfn, |
keir@20067 | 1117 void *cva) |
keir@19684 | 1118 { |
keir@19684 | 1119 void *dst, *p; |
keir@19684 | 1120 size_t size; |
keir@19684 | 1121 int ret = 0; |
keir@19684 | 1122 DECL_LOCAL_CYC_COUNTER(compress); |
keir@19684 | 1123 |
keir@19684 | 1124 ASSERT(pgp != NULL); |
keir@19684 | 1125 ASSERT(pgp->obj != NULL); |
keir@19684 | 1126 ASSERT_SPINLOCK(&pgp->obj->obj_spinlock); |
keir@19684 | 1127 ASSERT(pgp->obj->pool != NULL); |
keir@19684 | 1128 ASSERT(pgp->obj->pool->client != NULL); |
keir@19684 | 1129 #ifdef __i386__ |
keir@19684 | 1130 return -ENOMEM; |
keir@19684 | 1131 #endif |
keir@19684 | 1132 if ( pgp->pfp != NULL ) |
keir@19684 | 1133 pgp_free_data(pgp, pgp->obj->pool); /* FIXME... is this right? */ |
keir@19684 | 1134 START_CYC_COUNTER(compress); |
keir@20067 | 1135 ret = tmh_compress_from_client(cmfn, &dst, &size, cva); |
keir@19684 | 1136 if ( (ret == -EFAULT) || (ret == 0) ) |
keir@19684 | 1137 goto out; |
keir@19684 | 1138 else if ( (size == 0) || (size >= tmem_subpage_maxsize()) ) |
keir@19684 | 1139 ret = 0; |
keir@19684 | 1140 else if ( (p = tmem_malloc_bytes(size,pgp->obj->pool)) == NULL ) |
keir@19684 | 1141 ret = -ENOMEM; |
keir@19684 | 1142 else |
keir@19684 | 1143 { |
keir@19684 | 1144 memcpy(p,dst,size); |
keir@19684 | 1145 pgp->cdata = p; |
keir@19684 | 1146 pgp->size = size; |
keir@19684 | 1147 pgp->obj->pool->client->compressed_pages++; |
keir@19684 | 1148 pgp->obj->pool->client->compressed_sum_size += size; |
keir@19684 | 1149 ret = 1; |
keir@19684 | 1150 } |
keir@19684 | 1151 |
keir@19684 | 1152 out: |
keir@19684 | 1153 END_CYC_COUNTER(compress); |
keir@19684 | 1154 return ret; |
keir@19684 | 1155 } |
keir@19684 | 1156 |
keir@19684 | 1157 static NOINLINE int do_tmem_dup_put(pgp_t *pgp, tmem_cli_mfn_t cmfn, |
keir@20067 | 1158 uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cva) |
keir@19684 | 1159 { |
keir@19684 | 1160 pool_t *pool; |
keir@19684 | 1161 obj_t *obj; |
keir@19684 | 1162 client_t *client; |
keir@19684 | 1163 pgp_t *pgpfound = NULL; |
keir@19684 | 1164 int ret; |
keir@19684 | 1165 |
keir@19684 | 1166 ASSERT(pgp != NULL); |
keir@19684 | 1167 ASSERT(pgp->pfp != NULL); |
keir@19684 | 1168 ASSERT(pgp->size != -1); |
keir@19684 | 1169 obj = pgp->obj; |
keir@19684 | 1170 ASSERT_SPINLOCK(&obj->obj_spinlock); |
keir@19684 | 1171 ASSERT(obj != NULL); |
keir@19684 | 1172 pool = obj->pool; |
keir@19684 | 1173 ASSERT(pool != NULL); |
keir@19684 | 1174 client = pool->client; |
keir@20067 | 1175 if ( client->live_migrating ) |
keir@20067 | 1176 goto failed_dup; /* no dups allowed when migrating */ |
keir@20067 | 1177 /* can we successfully manipulate pgp to change out the data? */ |
keir@20067 | 1178 if ( len != 0 && client->compress && pgp->size != 0 ) |
keir@19684 | 1179 { |
keir@20067 | 1180 ret = do_tmem_put_compress(pgp,cmfn,cva); |
keir@19684 | 1181 if ( ret == 1 ) |
keir@19684 | 1182 goto done; |
keir@19684 | 1183 else if ( ret == 0 ) |
keir@19684 | 1184 goto copy_uncompressed; |
keir@19684 | 1185 else if ( ret == -ENOMEM ) |
keir@19684 | 1186 goto failed_dup; |
keir@19684 | 1187 else if ( ret == -EFAULT ) |
keir@19684 | 1188 goto bad_copy; |
keir@19684 | 1189 } |
keir@19684 | 1190 |
keir@19684 | 1191 copy_uncompressed: |
keir@19684 | 1192 if ( pgp->pfp ) |
keir@19684 | 1193 pgp_free_data(pgp, pool); |
keir@19684 | 1194 if ( ( pgp->pfp = tmem_page_alloc(pool) ) == NULL ) |
keir@19684 | 1195 goto failed_dup; |
keir@19684 | 1196 /* tmh_copy_from_client properly handles len==0 and offsets != 0 */ |
keir@20067 | 1197 ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,0); |
keir@19684 | 1198 if ( ret == -EFAULT ) |
keir@19684 | 1199 goto bad_copy; |
keir@19684 | 1200 pgp->size = 0; |
keir@19684 | 1201 |
keir@19684 | 1202 done: |
keir@19684 | 1203 /* successfully replaced data, clean up and return success */ |
keir@19684 | 1204 if ( is_shared(pool) ) |
keir@19684 | 1205 obj->last_client = client->cli_id; |
keir@19684 | 1206 obj->no_evict = 0; |
keir@19684 | 1207 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 1208 pool->dup_puts_replaced++; |
keir@19684 | 1209 pool->good_puts++; |
keir@19897 | 1210 if ( is_persistent(pool) ) |
keir@19897 | 1211 client->succ_pers_puts++; |
keir@19684 | 1212 return 1; |
keir@19684 | 1213 |
keir@19684 | 1214 bad_copy: |
keir@19684 | 1215 /* this should only happen if the client passed a bad mfn */ |
keir@19684 | 1216 failed_copies++; |
keir@19684 | 1217 ASSERT(0); |
keir@19684 | 1218 return -EFAULT; |
keir@19684 | 1219 |
keir@19684 | 1220 failed_dup: |
keir@19684 | 1221 /* couldn't change out the data, flush the old data and return |
keir@19684 | 1222 * -ENOSPC instead of -ENOMEM to differentiate failed _dup_ put */ |
keir@19684 | 1223 pgpfound = pgp_delete_from_obj(obj, pgp->index); |
keir@19684 | 1224 ASSERT(pgpfound == pgp); |
keir@19684 | 1225 pgp_delete(pgpfound,0); |
keir@19684 | 1226 if ( obj->pgp_count == 0 ) |
keir@19684 | 1227 { |
keir@19684 | 1228 tmem_write_lock(&pool->pool_rwlock); |
keir@19684 | 1229 obj_free(obj,0); |
keir@19684 | 1230 tmem_write_unlock(&pool->pool_rwlock); |
keir@19684 | 1231 } else { |
keir@19684 | 1232 obj->no_evict = 0; |
keir@19684 | 1233 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 1234 } |
keir@19684 | 1235 pool->dup_puts_flushed++; |
keir@19684 | 1236 return -ENOSPC; |
keir@19684 | 1237 } |
keir@19684 | 1238 |
keir@19684 | 1239 |
keir@20067 | 1240 static NOINLINE int do_tmem_put(pool_t *pool, |
keir@20067 | 1241 uint64_t oid, uint32_t index, |
keir@19684 | 1242 tmem_cli_mfn_t cmfn, uint32_t tmem_offset, |
keir@20067 | 1243 uint32_t pfn_offset, uint32_t len, void *cva) |
keir@19684 | 1244 { |
keir@19684 | 1245 obj_t *obj = NULL, *objfound = NULL, *objnew = NULL; |
keir@19684 | 1246 pgp_t *pgp = NULL, *pgpdel = NULL; |
keir@19684 | 1247 client_t *client = pool->client; |
keir@19684 | 1248 int ret = client->frozen ? -EFROZEN : -ENOMEM; |
keir@19684 | 1249 |
keir@19684 | 1250 ASSERT(pool != NULL); |
keir@19684 | 1251 pool->puts++; |
keir@19684 | 1252 /* does page already exist (dup)? if so, handle specially */ |
keir@19684 | 1253 if ( (obj = objfound = obj_find(pool,oid)) != NULL ) |
keir@19684 | 1254 { |
keir@19684 | 1255 ASSERT_SPINLOCK(&objfound->obj_spinlock); |
keir@19684 | 1256 if ((pgp = pgp_lookup_in_obj(objfound, index)) != NULL) |
keir@20067 | 1257 return do_tmem_dup_put(pgp,cmfn,tmem_offset,pfn_offset,len,cva); |
keir@19684 | 1258 } |
keir@19684 | 1259 |
keir@19684 | 1260 /* no puts allowed into a frozen pool (except dup puts) */ |
keir@19684 | 1261 if ( client->frozen ) |
keir@19684 | 1262 goto free; |
keir@19684 | 1263 |
keir@19684 | 1264 if ( (objfound == NULL) ) |
keir@19684 | 1265 { |
keir@19684 | 1266 tmem_write_lock(&pool->pool_rwlock); |
keir@19684 | 1267 if ( (obj = objnew = obj_new(pool,oid)) == NULL ) |
keir@19684 | 1268 { |
keir@19684 | 1269 tmem_write_unlock(&pool->pool_rwlock); |
keir@19684 | 1270 return -ENOMEM; |
keir@19684 | 1271 } |
keir@19684 | 1272 ASSERT_SPINLOCK(&objnew->obj_spinlock); |
keir@19684 | 1273 tmem_write_unlock(&pool->pool_rwlock); |
keir@19684 | 1274 } |
keir@19684 | 1275 |
keir@19684 | 1276 ASSERT((obj != NULL)&&((objnew==obj)||(objfound==obj))&&(objnew!=objfound)); |
keir@19684 | 1277 ASSERT_SPINLOCK(&obj->obj_spinlock); |
keir@19684 | 1278 if ( (pgp = pgp_alloc(obj)) == NULL ) |
keir@19684 | 1279 goto free; |
keir@19684 | 1280 |
keir@19684 | 1281 ret = pgp_add_to_obj(obj, index, pgp); |
keir@19684 | 1282 if ( ret == -ENOMEM ) |
keir@19684 | 1283 /* warning, may result in partially built radix tree ("stump") */ |
keir@19684 | 1284 goto free; |
keir@19684 | 1285 ASSERT(ret != -EEXIST); |
keir@19684 | 1286 pgp->index = index; |
keir@19684 | 1287 |
keir@20067 | 1288 if ( len != 0 && client->compress ) |
keir@19684 | 1289 { |
keir@19684 | 1290 ASSERT(pgp->pfp == NULL); |
keir@20067 | 1291 ret = do_tmem_put_compress(pgp,cmfn,cva); |
keir@19684 | 1292 if ( ret == 1 ) |
keir@19684 | 1293 goto insert_page; |
keir@19684 | 1294 if ( ret == -ENOMEM ) |
keir@19684 | 1295 { |
keir@19684 | 1296 client->compress_nomem++; |
keir@19684 | 1297 goto delete_and_free; |
keir@19684 | 1298 } |
keir@19684 | 1299 if ( ret == 0 ) |
keir@19684 | 1300 { |
keir@19684 | 1301 client->compress_poor++; |
keir@19684 | 1302 goto copy_uncompressed; |
keir@19684 | 1303 } |
keir@19684 | 1304 if ( ret == -EFAULT ) |
keir@19684 | 1305 goto bad_copy; |
keir@19684 | 1306 } |
keir@19684 | 1307 |
keir@19684 | 1308 copy_uncompressed: |
keir@19684 | 1309 if ( ( pgp->pfp = tmem_page_alloc(pool) ) == NULL ) |
keir@19684 | 1310 { |
keir@19684 | 1311 ret == -ENOMEM; |
keir@19684 | 1312 goto delete_and_free; |
keir@19684 | 1313 } |
keir@19684 | 1314 /* tmh_copy_from_client properly handles len==0 (TMEM_NEW_PAGE) */ |
keir@20067 | 1315 ret = tmh_copy_from_client(pgp->pfp,cmfn,tmem_offset,pfn_offset,len,cva); |
keir@19684 | 1316 if ( ret == -EFAULT ) |
keir@19684 | 1317 goto bad_copy; |
keir@19684 | 1318 pgp->size = 0; |
keir@19684 | 1319 |
keir@19684 | 1320 insert_page: |
keir@19684 | 1321 if ( is_ephemeral(pool) ) |
keir@19684 | 1322 { |
keir@19684 | 1323 tmem_spin_lock(&eph_lists_spinlock); |
keir@19684 | 1324 list_add_tail(&pgp->global_eph_pages, |
keir@19684 | 1325 &global_ephemeral_page_list); |
keir@19684 | 1326 if (++global_eph_count > global_eph_count_max) |
keir@19684 | 1327 global_eph_count_max = global_eph_count; |
keir@19684 | 1328 list_add_tail(&pgp->client_eph_pages, |
keir@19684 | 1329 &client->ephemeral_page_list); |
keir@19684 | 1330 if (++client->eph_count > client->eph_count_max) |
keir@19684 | 1331 client->eph_count_max = client->eph_count; |
keir@19684 | 1332 tmem_spin_unlock(&eph_lists_spinlock); |
keir@20067 | 1333 } else { /* is_persistent */ |
keir@20067 | 1334 tmem_spin_lock(&pers_lists_spinlock); |
keir@20067 | 1335 list_add_tail(&pgp->pool_pers_pages, |
keir@20067 | 1336 &pool->persistent_page_list); |
keir@20067 | 1337 tmem_spin_unlock(&pers_lists_spinlock); |
keir@19684 | 1338 } |
keir@19684 | 1339 ASSERT( ((objnew==obj)||(objfound==obj)) && (objnew!=objfound)); |
keir@19684 | 1340 if ( is_shared(pool) ) |
keir@19684 | 1341 obj->last_client = client->cli_id; |
keir@19684 | 1342 obj->no_evict = 0; |
keir@19684 | 1343 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 1344 pool->good_puts++; |
keir@19897 | 1345 if ( is_persistent(pool) ) |
keir@19897 | 1346 client->succ_pers_puts++; |
keir@19684 | 1347 return 1; |
keir@19684 | 1348 |
keir@19684 | 1349 delete_and_free: |
keir@19684 | 1350 ASSERT((obj != NULL) && (pgp != NULL) && (pgp->index != -1)); |
keir@19684 | 1351 pgpdel = pgp_delete_from_obj(obj, pgp->index); |
keir@19684 | 1352 ASSERT(pgp == pgpdel); |
keir@19684 | 1353 |
keir@19684 | 1354 free: |
keir@19684 | 1355 if ( pgp ) |
keir@19684 | 1356 pgp_delete(pgp,0); |
keir@19684 | 1357 if ( objfound ) |
keir@19684 | 1358 { |
keir@19684 | 1359 objfound->no_evict = 0; |
keir@19684 | 1360 tmem_spin_unlock(&objfound->obj_spinlock); |
keir@19684 | 1361 } |
keir@19684 | 1362 if ( objnew ) |
keir@19684 | 1363 { |
keir@19684 | 1364 tmem_write_lock(&pool->pool_rwlock); |
keir@19684 | 1365 obj_free(objnew,0); |
keir@19684 | 1366 tmem_write_unlock(&pool->pool_rwlock); |
keir@19684 | 1367 } |
keir@19684 | 1368 pool->no_mem_puts++; |
keir@19684 | 1369 return ret; |
keir@19684 | 1370 |
keir@19684 | 1371 bad_copy: |
keir@19684 | 1372 /* this should only happen if the client passed a bad mfn */ |
keir@19684 | 1373 failed_copies++; |
keir@19684 | 1374 ASSERT(0); |
keir@19684 | 1375 goto free; |
keir@19684 | 1376 } |
keir@19684 | 1377 |
keir@19684 | 1378 static NOINLINE int do_tmem_get(pool_t *pool, uint64_t oid, uint32_t index, |
keir@19684 | 1379 tmem_cli_mfn_t cmfn, uint32_t tmem_offset, |
keir@20067 | 1380 uint32_t pfn_offset, uint32_t len, void *cva) |
keir@19684 | 1381 { |
keir@19684 | 1382 obj_t *obj; |
keir@19684 | 1383 pgp_t *pgp; |
keir@19684 | 1384 client_t *client = pool->client; |
keir@19684 | 1385 DECL_LOCAL_CYC_COUNTER(decompress); |
keir@19684 | 1386 |
keir@19684 | 1387 if ( !_atomic_read(pool->pgp_count) ) |
keir@19684 | 1388 return -EEMPTY; |
keir@19684 | 1389 |
keir@19684 | 1390 pool->gets++; |
keir@19684 | 1391 obj = obj_find(pool,oid); |
keir@19684 | 1392 if ( obj == NULL ) |
keir@19684 | 1393 return 0; |
keir@19684 | 1394 |
keir@19684 | 1395 ASSERT_SPINLOCK(&obj->obj_spinlock); |
keir@19684 | 1396 if (is_shared(pool) || is_persistent(pool) ) |
keir@19684 | 1397 pgp = pgp_lookup_in_obj(obj, index); |
keir@19684 | 1398 else |
keir@19684 | 1399 pgp = pgp_delete_from_obj(obj, index); |
keir@19684 | 1400 if ( pgp == NULL ) |
keir@19684 | 1401 { |
keir@19684 | 1402 obj->no_evict = 0; |
keir@19684 | 1403 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 1404 return 0; |
keir@19684 | 1405 } |
keir@19684 | 1406 ASSERT(pgp->size != -1); |
keir@19684 | 1407 if ( pgp->size != 0 ) |
keir@19684 | 1408 { |
keir@19684 | 1409 START_CYC_COUNTER(decompress); |
keir@20067 | 1410 if ( tmh_decompress_to_client(cmfn, pgp->cdata, |
keir@20067 | 1411 pgp->size, cva) == -EFAULT ) |
keir@19684 | 1412 goto bad_copy; |
keir@19684 | 1413 END_CYC_COUNTER(decompress); |
keir@19684 | 1414 } |
keir@19684 | 1415 else if ( tmh_copy_to_client(cmfn, pgp->pfp, tmem_offset, |
keir@20067 | 1416 pfn_offset, len, cva) == -EFAULT) |
keir@19684 | 1417 goto bad_copy; |
keir@19684 | 1418 if ( is_ephemeral(pool) ) |
keir@19684 | 1419 { |
keir@19684 | 1420 if ( is_private(pool) ) |
keir@19684 | 1421 { |
keir@19684 | 1422 pgp_delete(pgp,0); |
keir@19684 | 1423 if ( obj->pgp_count == 0 ) |
keir@19684 | 1424 { |
keir@19684 | 1425 tmem_write_lock(&pool->pool_rwlock); |
keir@19684 | 1426 obj_free(obj,0); |
keir@19684 | 1427 obj = NULL; |
keir@19684 | 1428 tmem_write_unlock(&pool->pool_rwlock); |
keir@19684 | 1429 } |
keir@19684 | 1430 } else { |
keir@19684 | 1431 tmem_spin_lock(&eph_lists_spinlock); |
keir@19684 | 1432 list_del(&pgp->global_eph_pages); |
keir@19684 | 1433 list_add_tail(&pgp->global_eph_pages,&global_ephemeral_page_list); |
keir@19684 | 1434 list_del(&pgp->client_eph_pages); |
keir@19684 | 1435 list_add_tail(&pgp->client_eph_pages,&client->ephemeral_page_list); |
keir@19684 | 1436 tmem_spin_unlock(&eph_lists_spinlock); |
keir@19684 | 1437 ASSERT(obj != NULL); |
keir@19684 | 1438 obj->last_client = tmh_get_cli_id_from_current(); |
keir@19684 | 1439 } |
keir@19684 | 1440 } |
keir@19684 | 1441 if ( obj != NULL ) |
keir@19684 | 1442 { |
keir@19684 | 1443 obj->no_evict = 0; |
keir@19684 | 1444 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 1445 } |
keir@19684 | 1446 pool->found_gets++; |
keir@19897 | 1447 if ( is_ephemeral(pool) ) |
keir@19897 | 1448 client->succ_eph_gets++; |
keir@19897 | 1449 else |
keir@19897 | 1450 client->succ_pers_gets++; |
keir@19684 | 1451 return 1; |
keir@19684 | 1452 |
keir@19684 | 1453 bad_copy: |
keir@19684 | 1454 /* this should only happen if the client passed a bad mfn */ |
keir@19684 | 1455 failed_copies++; |
keir@19684 | 1456 ASSERT(0); |
keir@19684 | 1457 return -EFAULT; |
keir@19684 | 1458 |
keir@19684 | 1459 } |
keir@19684 | 1460 |
keir@19684 | 1461 static NOINLINE int do_tmem_flush_page(pool_t *pool, uint64_t oid, uint32_t index) |
keir@19684 | 1462 { |
keir@19684 | 1463 obj_t *obj; |
keir@19684 | 1464 pgp_t *pgp; |
keir@19684 | 1465 |
keir@19684 | 1466 pool->flushs++; |
keir@19684 | 1467 obj = obj_find(pool,oid); |
keir@19684 | 1468 if ( obj == NULL ) |
keir@19684 | 1469 goto out; |
keir@19684 | 1470 pgp = pgp_delete_from_obj(obj, index); |
keir@19684 | 1471 if ( pgp == NULL ) |
keir@19684 | 1472 { |
keir@19684 | 1473 obj->no_evict = 0; |
keir@19684 | 1474 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 1475 goto out; |
keir@19684 | 1476 } |
keir@19684 | 1477 pgp_delete(pgp,0); |
keir@19684 | 1478 if ( obj->pgp_count == 0 ) |
keir@19684 | 1479 { |
keir@19684 | 1480 tmem_write_lock(&pool->pool_rwlock); |
keir@19684 | 1481 obj_free(obj,0); |
keir@19684 | 1482 tmem_write_unlock(&pool->pool_rwlock); |
keir@19684 | 1483 } else { |
keir@19684 | 1484 obj->no_evict = 0; |
keir@19684 | 1485 tmem_spin_unlock(&obj->obj_spinlock); |
keir@19684 | 1486 } |
keir@19684 | 1487 pool->flushs_found++; |
keir@19684 | 1488 |
keir@19684 | 1489 out: |
keir@19684 | 1490 if ( pool->client->frozen ) |
keir@19684 | 1491 return -EFROZEN; |
keir@19684 | 1492 else |
keir@19684 | 1493 return 1; |
keir@19684 | 1494 } |
keir@19684 | 1495 |
keir@19684 | 1496 static NOINLINE int do_tmem_flush_object(pool_t *pool, uint64_t oid) |
keir@19684 | 1497 { |
keir@19684 | 1498 obj_t *obj; |
keir@19684 | 1499 |
keir@19684 | 1500 pool->flush_objs++; |
keir@19684 | 1501 obj = obj_find(pool,oid); |
keir@19684 | 1502 if ( obj == NULL ) |
keir@19684 | 1503 goto out; |
keir@19684 | 1504 tmem_write_lock(&pool->pool_rwlock); |
keir@19734 | 1505 obj_destroy(obj,0); |
keir@19684 | 1506 pool->flush_objs_found++; |
keir@19684 | 1507 tmem_write_unlock(&pool->pool_rwlock); |
keir@19684 | 1508 |
keir@19684 | 1509 out: |
keir@19684 | 1510 if ( pool->client->frozen ) |
keir@19684 | 1511 return -EFROZEN; |
keir@19684 | 1512 else |
keir@19684 | 1513 return 1; |
keir@19684 | 1514 } |
keir@19684 | 1515 |
keir@19684 | 1516 static NOINLINE int do_tmem_destroy_pool(uint32_t pool_id) |
keir@19684 | 1517 { |
keir@19684 | 1518 client_t *client = tmh_client_from_current(); |
keir@19684 | 1519 pool_t *pool; |
keir@19684 | 1520 |
keir@19684 | 1521 if ( client->pools == NULL ) |
keir@19684 | 1522 return 0; |
keir@19684 | 1523 if ( (pool = client->pools[pool_id]) == NULL ) |
keir@19684 | 1524 return 0; |
keir@19684 | 1525 client->pools[pool_id] = NULL; |
keir@19684 | 1526 pool_flush(pool,client->cli_id,1); |
keir@19684 | 1527 return 1; |
keir@19684 | 1528 } |
keir@19684 | 1529 |
keir@20067 | 1530 static NOINLINE int do_tmem_new_pool(cli_id_t this_cli_id, |
keir@20964 | 1531 uint32_t d_poolid, uint32_t flags, |
keir@20067 | 1532 uint64_t uuid_lo, uint64_t uuid_hi) |
keir@19684 | 1533 { |
keir@20067 | 1534 client_t *client; |
keir@20067 | 1535 cli_id_t cli_id; |
keir@19684 | 1536 int persistent = flags & TMEM_POOL_PERSIST; |
keir@19684 | 1537 int shared = flags & TMEM_POOL_SHARED; |
keir@19684 | 1538 int pagebits = (flags >> TMEM_POOL_PAGESIZE_SHIFT) |
keir@19684 | 1539 & TMEM_POOL_PAGESIZE_MASK; |
keir@19684 | 1540 int specversion = (flags >> TMEM_POOL_VERSION_SHIFT) |
keir@19684 | 1541 & TMEM_POOL_VERSION_MASK; |
keir@19684 | 1542 pool_t *pool, *shpool; |
keir@20964 | 1543 int s_poolid, first_unused_s_poolid; |
keir@20067 | 1544 int i; |
keir@19684 | 1545 |
keir@20067 | 1546 if ( this_cli_id == CLI_ID_NULL ) |
keir@20067 | 1547 cli_id = tmh_get_cli_id_from_current(); |
keir@20964 | 1548 else |
keir@20067 | 1549 cli_id = this_cli_id; |
keir@19684 | 1550 printk("tmem: allocating %s-%s tmem pool for %s=%d...", |
keir@19684 | 1551 persistent ? "persistent" : "ephemeral" , |
keir@19684 | 1552 shared ? "shared" : "private", cli_id_str, cli_id); |
keir@20067 | 1553 if ( specversion != TMEM_SPEC_VERSION ) |
keir@19684 | 1554 { |
keir@19684 | 1555 printk("failed... unsupported spec version\n"); |
keir@19684 | 1556 return -EPERM; |
keir@19684 | 1557 } |
keir@19684 | 1558 if ( pagebits != (PAGE_SHIFT - 12) ) |
keir@19684 | 1559 { |
keir@19684 | 1560 printk("failed... unsupported pagesize %d\n",1<<(pagebits+12)); |
keir@19684 | 1561 return -EPERM; |
keir@19684 | 1562 } |
keir@19684 | 1563 if ( (pool = pool_alloc()) == NULL ) |
keir@19684 | 1564 { |
keir@19684 | 1565 printk("failed... out of memory\n"); |
keir@19684 | 1566 return -ENOMEM; |
keir@19684 | 1567 } |
keir@20067 | 1568 if ( this_cli_id != CLI_ID_NULL ) |
keir@20067 | 1569 { |
keir@20964 | 1570 if ( (client = tmh_client_from_cli_id(this_cli_id)) == NULL |
keir@20964 | 1571 || d_poolid >= MAX_POOLS_PER_DOMAIN |
keir@20964 | 1572 || client->pools[d_poolid] != NULL ) |
keir@20964 | 1573 goto fail; |
keir@20067 | 1574 } |
keir@20964 | 1575 else |
keir@19684 | 1576 { |
keir@20964 | 1577 client = tmh_client_from_current(); |
keir@20964 | 1578 ASSERT(client != NULL); |
keir@20964 | 1579 for ( d_poolid = 0; d_poolid < MAX_POOLS_PER_DOMAIN; d_poolid++ ) |
keir@20964 | 1580 if ( client->pools[d_poolid] == NULL ) |
keir@20964 | 1581 break; |
keir@20964 | 1582 if ( d_poolid >= MAX_POOLS_PER_DOMAIN ) |
keir@20964 | 1583 { |
keir@20964 | 1584 printk("failed... no more pool slots available for this %s\n", |
keir@20964 | 1585 client_str); |
keir@20964 | 1586 goto fail; |
keir@20964 | 1587 } |
keir@19684 | 1588 } |
keir@20067 | 1589 if ( shared ) |
keir@20067 | 1590 { |
keir@20067 | 1591 if ( uuid_lo == -1L && uuid_hi == -1L ) |
keir@20067 | 1592 shared = 0; |
keir@20067 | 1593 if ( client->shared_auth_required && !global_shared_auth ) |
keir@20067 | 1594 { |
keir@20067 | 1595 for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) |
keir@20067 | 1596 if ( (client->shared_auth_uuid[i][0] == uuid_lo) && |
keir@20067 | 1597 (client->shared_auth_uuid[i][1] == uuid_hi) ) |
keir@20067 | 1598 break; |
keir@20067 | 1599 if ( i == MAX_GLOBAL_SHARED_POOLS ) |
keir@20067 | 1600 shared = 0; |
keir@20067 | 1601 } |
keir@20067 | 1602 } |
keir@19684 | 1603 pool->shared = shared; |
keir@19684 | 1604 pool->client = client; |
keir@19684 | 1605 if ( shared ) |
keir@19684 | 1606 { |
keir@19684 | 1607 first_unused_s_poolid = MAX_GLOBAL_SHARED_POOLS; |
keir@19684 | 1608 for ( s_poolid = 0; s_poolid < MAX_GLOBAL_SHARED_POOLS; s_poolid++ ) |
keir@19684 | 1609 { |
keir@19684 | 1610 if ( (shpool = global_shared_pools[s_poolid]) != NULL ) |
keir@19684 | 1611 { |
keir@19684 | 1612 if ( shpool->uuid[0] == uuid_lo && shpool->uuid[1] == uuid_hi ) |
keir@19684 | 1613 { |
keir@19734 | 1614 printk("(matches shared pool uuid=%"PRIx64".%"PRIx64") ", |
keir@19684 | 1615 uuid_hi, uuid_lo); |
keir@19684 | 1616 printk("pool_id=%d\n",d_poolid); |
keir@19684 | 1617 client->pools[d_poolid] = global_shared_pools[s_poolid]; |
keir@19684 | 1618 shared_pool_join(global_shared_pools[s_poolid], client); |
keir@19684 | 1619 pool_free(pool); |
keir@20964 | 1620 if ( this_cli_id != CLI_ID_NULL ) |
keir@20964 | 1621 tmh_client_put(client->tmh); |
keir@19684 | 1622 return d_poolid; |
keir@19684 | 1623 } |
keir@19684 | 1624 } |
keir@19684 | 1625 else if ( first_unused_s_poolid == MAX_GLOBAL_SHARED_POOLS ) |
keir@19684 | 1626 first_unused_s_poolid = s_poolid; |
keir@19684 | 1627 } |
keir@19684 | 1628 if ( first_unused_s_poolid == MAX_GLOBAL_SHARED_POOLS ) |
keir@19684 | 1629 { |
keir@19684 | 1630 printk("tmem: failed... no global shared pool slots available\n"); |
keir@19684 | 1631 goto fail; |
keir@19684 | 1632 } |
keir@19684 | 1633 else |
keir@19684 | 1634 { |
keir@19684 | 1635 INIT_LIST_HEAD(&pool->share_list); |
keir@19684 | 1636 pool->shared_count = 0; |
keir@19684 | 1637 global_shared_pools[first_unused_s_poolid] = pool; |
keir@19684 | 1638 (void)shared_pool_join(pool,client); |
keir@19684 | 1639 } |
keir@19684 | 1640 } |
keir@19684 | 1641 client->pools[d_poolid] = pool; |
keir@20964 | 1642 if ( this_cli_id != CLI_ID_NULL ) |
keir@20964 | 1643 tmh_client_put(client->tmh); |
keir@19684 | 1644 list_add_tail(&pool->pool_list, &global_pool_list); |
keir@19684 | 1645 pool->pool_id = d_poolid; |
keir@19684 | 1646 pool->persistent = persistent; |
keir@19684 | 1647 pool->uuid[0] = uuid_lo; pool->uuid[1] = uuid_hi; |
keir@19684 | 1648 printk("pool_id=%d\n",d_poolid); |
keir@19684 | 1649 return d_poolid; |
keir@19684 | 1650 |
keir@19684 | 1651 fail: |
keir@19684 | 1652 pool_free(pool); |
keir@20964 | 1653 if ( this_cli_id != CLI_ID_NULL ) |
keir@20964 | 1654 tmh_client_put(client->tmh); |
keir@19684 | 1655 return -EPERM; |
keir@19684 | 1656 } |
keir@19684 | 1657 |
keir@19684 | 1658 /************ TMEM CONTROL OPERATIONS ************************************/ |
keir@19684 | 1659 |
keir@19684 | 1660 /* freeze/thaw all pools belonging to client cli_id (all domains if -1) */ |
keir@20067 | 1661 static int tmemc_freeze_pools(cli_id_t cli_id, int arg) |
keir@19684 | 1662 { |
keir@19684 | 1663 client_t *client; |
keir@19684 | 1664 bool_t freeze = (arg == TMEMC_FREEZE) ? 1 : 0; |
keir@19684 | 1665 bool_t destroy = (arg == TMEMC_DESTROY) ? 1 : 0; |
keir@19684 | 1666 char *s; |
keir@19684 | 1667 |
keir@19684 | 1668 s = destroy ? "destroyed" : ( freeze ? "frozen" : "thawed" ); |
keir@19684 | 1669 if ( cli_id == CLI_ID_NULL ) |
keir@19684 | 1670 { |
keir@19684 | 1671 list_for_each_entry(client,&global_client_list,client_list) |
keir@20067 | 1672 client_freeze(client,freeze); |
keir@19734 | 1673 printk("tmem: all pools %s for all %ss\n",s,client_str); |
keir@19684 | 1674 } |
keir@19684 | 1675 else |
keir@19684 | 1676 { |
keir@19684 | 1677 if ( (client = tmh_client_from_cli_id(cli_id)) == NULL) |
keir@19684 | 1678 return -1; |
keir@20067 | 1679 client_freeze(client,freeze); |
keir@20964 | 1680 tmh_client_put(client->tmh); |
keir@19684 | 1681 printk("tmem: all pools %s for %s=%d\n",s,cli_id_str,cli_id); |
keir@19684 | 1682 } |
keir@19684 | 1683 return 0; |
keir@19684 | 1684 } |
keir@19684 | 1685 |
keir@20067 | 1686 static int tmemc_flush_mem(cli_id_t cli_id, uint32_t kb) |
keir@19684 | 1687 { |
keir@19684 | 1688 uint32_t npages, flushed_pages, flushed_kb; |
keir@19684 | 1689 |
keir@19684 | 1690 if ( cli_id != CLI_ID_NULL ) |
keir@19684 | 1691 { |
keir@19684 | 1692 printk("tmem: %s-specific flush not supported yet, use --all\n", |
keir@19684 | 1693 client_str); |
keir@19684 | 1694 return -1; |
keir@19684 | 1695 } |
keir@19684 | 1696 /* convert kb to pages, rounding up if necessary */ |
keir@19684 | 1697 npages = (kb + ((1 << (PAGE_SHIFT-10))-1)) >> (PAGE_SHIFT-10); |
keir@19684 | 1698 flushed_pages = tmem_relinquish_npages(npages); |
keir@19684 | 1699 flushed_kb = flushed_pages << (PAGE_SHIFT-10); |
keir@19684 | 1700 return flushed_kb; |
keir@19684 | 1701 } |
keir@19684 | 1702 |
keir@19684 | 1703 /* |
keir@19684 | 1704 * These tmemc_list* routines output lots of stats in a format that is |
keir@19684 | 1705 * intended to be program-parseable, not human-readable. Further, by |
keir@19684 | 1706 * tying each group of stats to a line format indicator (e.g. G= for |
keir@19684 | 1707 * global stats) and each individual stat to a two-letter specifier |
keir@19684 | 1708 * (e.g. Ec:nnnnn in the G= line says there are nnnnn pages in the |
keir@19684 | 1709 * global ephemeral pool), it should allow the stats reported to be |
keir@19684 | 1710 * forward and backwards compatible as tmem evolves. |
keir@19684 | 1711 */ |
keir@19684 | 1712 #define BSIZE 1024 |
keir@19684 | 1713 |
keir@19684 | 1714 static int tmemc_list_client(client_t *c, tmem_cli_va_t buf, int off, |
keir@19684 | 1715 uint32_t len, bool_t use_long) |
keir@19684 | 1716 { |
keir@19684 | 1717 char info[BSIZE]; |
keir@19684 | 1718 int i, n = 0, sum = 0; |
keir@19684 | 1719 pool_t *p; |
keir@19684 | 1720 bool_t s; |
keir@19684 | 1721 |
keir@19897 | 1722 n = scnprintf(info,BSIZE,"C=CI:%d,ww:%d,ca:%d,co:%d,fr:%d," |
keir@19897 | 1723 "Tc:%"PRIu64",Ge:%ld,Pp:%ld,Gp:%ld%c", |
keir@19897 | 1724 c->cli_id, c->weight, c->cap, c->compress, c->frozen, |
keir@19897 | 1725 c->total_cycles, c->succ_eph_gets, c->succ_pers_puts, c->succ_pers_gets, |
keir@19897 | 1726 use_long ? ',' : '\n'); |
keir@19684 | 1727 if (use_long) |
keir@19684 | 1728 n += scnprintf(info+n,BSIZE-n, |
keir@19687 | 1729 "Ec:%ld,Em:%ld,cp:%ld,cb:%"PRId64",cn:%ld,cm:%ld\n", |
keir@19684 | 1730 c->eph_count, c->eph_count_max, |
keir@19687 | 1731 c->compressed_pages, c->compressed_sum_size, |
keir@19684 | 1732 c->compress_poor, c->compress_nomem); |
keir@19684 | 1733 tmh_copy_to_client_buf_offset(buf,off+sum,info,n+1); |
keir@19684 | 1734 sum += n; |
keir@19684 | 1735 for ( i = 0; i < MAX_POOLS_PER_DOMAIN; i++ ) |
keir@19684 | 1736 { |
keir@19684 | 1737 if ( (p = c->pools[i]) == NULL ) |
keir@19684 | 1738 continue; |
keir@19684 | 1739 s = is_shared(p); |
keir@19687 | 1740 n = scnprintf(info,BSIZE,"P=CI:%d,PI:%d," |
keir@19687 | 1741 "PT:%c%c,U0:%"PRIx64",U1:%"PRIx64"%c", |
keir@19687 | 1742 c->cli_id, p->pool_id, |
keir@19687 | 1743 is_persistent(p) ? 'P' : 'E', s ? 'S' : 'P', |
keir@19687 | 1744 (uint64_t)(s ? p->uuid[0] : 0), |
keir@19687 | 1745 (uint64_t)(s ? p->uuid[1] : 0LL), |
keir@19687 | 1746 use_long ? ',' : '\n'); |
keir@19684 | 1747 if (use_long) |
keir@19684 | 1748 n += scnprintf(info+n,BSIZE-n, |
keir@19684 | 1749 "Pc:%d,Pm:%d,Oc:%ld,Om:%ld,Nc:%lu,Nm:%lu," |
keir@19684 | 1750 "ps:%lu,pt:%lu,pd:%lu,pr:%lu,px:%lu,gs:%lu,gt:%lu," |
keir@19684 | 1751 "fs:%lu,ft:%lu,os:%lu,ot:%lu\n", |
keir@19684 | 1752 _atomic_read(p->pgp_count), p->pgp_count_max, |
keir@19684 | 1753 p->obj_count, p->obj_count_max, |
keir@19684 | 1754 p->objnode_count, p->objnode_count_max, |
keir@19684 | 1755 p->good_puts, p->puts,p->dup_puts_flushed, p->dup_puts_replaced, |
keir@19684 | 1756 p->no_mem_puts, |
keir@19684 | 1757 p->found_gets, p->gets, |
keir@19684 | 1758 p->flushs_found, p->flushs, p->flush_objs_found, p->flush_objs); |
keir@19684 | 1759 if ( sum + n >= len ) |
keir@19684 | 1760 return sum; |
keir@19684 | 1761 tmh_copy_to_client_buf_offset(buf,off+sum,info,n+1); |
keir@19684 | 1762 sum += n; |
keir@19684 | 1763 } |
keir@19684 | 1764 return sum; |
keir@19684 | 1765 } |
keir@19684 | 1766 |
keir@19684 | 1767 static int tmemc_list_shared(tmem_cli_va_t buf, int off, uint32_t len, |
keir@19684 | 1768 bool_t use_long) |
keir@19684 | 1769 { |
keir@19684 | 1770 char info[BSIZE]; |
keir@19684 | 1771 int i, n = 0, sum = 0; |
keir@19684 | 1772 pool_t *p; |
keir@19684 | 1773 sharelist_t *sl; |
keir@19684 | 1774 |
keir@19684 | 1775 for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++ ) |
keir@19684 | 1776 { |
keir@19684 | 1777 if ( (p = global_shared_pools[i]) == NULL ) |
keir@19684 | 1778 continue; |
keir@19687 | 1779 n = scnprintf(info+n,BSIZE-n,"S=SI:%d,PT:%c%c,U0:%"PRIx64",U1:%"PRIx64, |
keir@19687 | 1780 i, is_persistent(p) ? 'P' : 'E', |
keir@19687 | 1781 is_shared(p) ? 'S' : 'P', |
keir@19687 | 1782 p->uuid[0], p->uuid[1]); |
keir@19684 | 1783 list_for_each_entry(sl,&p->share_list, share_list) |
keir@19684 | 1784 n += scnprintf(info+n,BSIZE-n,",SC:%d",sl->client->cli_id); |
keir@19684 | 1785 n += scnprintf(info+n,BSIZE-n,"%c", use_long ? ',' : '\n'); |
keir@19684 | 1786 if (use_long) |
keir@19684 | 1787 n += scnprintf(info+n,BSIZE-n, |
keir@19684 | 1788 "Pc:%d,Pm:%d,Oc:%ld,Om:%ld,Nc:%lu,Nm:%lu," |
keir@19684 | 1789 "ps:%lu,pt:%lu,pd:%lu,pr:%lu,px:%lu,gs:%lu,gt:%lu," |
keir@19684 | 1790 "fs:%lu,ft:%lu,os:%lu,ot:%lu\n", |
keir@19684 | 1791 _atomic_read(p->pgp_count), p->pgp_count_max, |
keir@19684 | 1792 p->obj_count, p->obj_count_max, |
keir@19684 | 1793 p->objnode_count, p->objnode_count_max, |
keir@19684 | 1794 p->good_puts, p->puts,p->dup_puts_flushed, p->dup_puts_replaced, |
keir@19684 | 1795 p->no_mem_puts, |
keir@19684 | 1796 p->found_gets, p->gets, |
keir@19684 | 1797 p->flushs_found, p->flushs, p->flush_objs_found, p->flush_objs); |
keir@19684 | 1798 if ( sum + n >= len ) |
keir@19684 | 1799 return sum; |
keir@19684 | 1800 tmh_copy_to_client_buf_offset(buf,off+sum,info,n+1); |
keir@19684 | 1801 sum += n; |
keir@19684 | 1802 } |
keir@19684 | 1803 return sum; |
keir@19684 | 1804 } |
keir@19684 | 1805 |
keir@19684 | 1806 #ifdef TMEM_PERF |
keir@19684 | 1807 static int tmemc_list_global_perf(tmem_cli_va_t buf, int off, uint32_t len, |
keir@19684 | 1808 bool_t use_long) |
keir@19684 | 1809 { |
keir@19684 | 1810 char info[BSIZE]; |
keir@19684 | 1811 int n = 0, sum = 0; |
keir@19684 | 1812 |
keir@19684 | 1813 n = scnprintf(info+n,BSIZE-n,"T="); |
keir@19684 | 1814 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,succ_get,"G"); |
keir@19684 | 1815 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,succ_put,"P"); |
keir@19684 | 1816 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,non_succ_get,"g"); |
keir@19684 | 1817 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,non_succ_put,"p"); |
keir@19684 | 1818 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,flush,"F"); |
keir@19684 | 1819 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,flush_obj,"O"); |
keir@19684 | 1820 #ifdef COMPARE_COPY_PAGE_SSE2 |
keir@19684 | 1821 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,pg_copy1,"1"); |
keir@19684 | 1822 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,pg_copy2,"2"); |
keir@19684 | 1823 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,pg_copy3,"3"); |
keir@19684 | 1824 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,pg_copy4,"4"); |
keir@19684 | 1825 #else |
keir@19684 | 1826 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,pg_copy,"C"); |
keir@19684 | 1827 #endif |
keir@19684 | 1828 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,compress,"c"); |
keir@19684 | 1829 n += SCNPRINTF_CYC_COUNTER(info+n,BSIZE-n,decompress,"d"); |
keir@19684 | 1830 n--; /* overwrite trailing comma */ |
keir@19684 | 1831 n += scnprintf(info+n,BSIZE-n,"\n"); |
keir@19684 | 1832 if ( sum + n >= len ) |
keir@19684 | 1833 return sum; |
keir@19684 | 1834 tmh_copy_to_client_buf_offset(buf,off+sum,info,n+1); |
keir@19684 | 1835 sum += n; |
keir@19684 | 1836 return sum; |
keir@19684 | 1837 } |
keir@19684 | 1838 #else |
keir@19684 | 1839 #define tmemc_list_global_perf(_buf,_off,_len,_use) (0) |
keir@19684 | 1840 #endif |
keir@19684 | 1841 |
keir@19684 | 1842 static int tmemc_list_global(tmem_cli_va_t buf, int off, uint32_t len, |
keir@19684 | 1843 bool_t use_long) |
keir@19684 | 1844 { |
keir@19684 | 1845 char info[BSIZE]; |
keir@19684 | 1846 int n = 0, sum = off; |
keir@19684 | 1847 |
keir@19684 | 1848 n += scnprintf(info,BSIZE,"G=" |
keir@19684 | 1849 "Tt:%lu,Te:%lu,Cf:%lu,Af:%lu,Pf:%lu,Ta:%lu," |
keir@19684 | 1850 "Lm:%lu,Et:%lu,Ea:%lu,Rt:%lu,Ra:%lu,Rx:%lu,Fp:%lu%c", |
keir@19684 | 1851 total_tmem_ops, errored_tmem_ops, failed_copies, |
keir@19684 | 1852 alloc_failed, alloc_page_failed, tmh_avail_pages(), |
keir@19684 | 1853 low_on_memory, evicted_pgs, |
keir@19684 | 1854 evict_attempts, relinq_pgs, relinq_attempts, max_evicts_per_relinq, |
keir@19684 | 1855 total_flush_pool, use_long ? ',' : '\n'); |
keir@19684 | 1856 if (use_long) |
keir@19684 | 1857 n += scnprintf(info+n,BSIZE-n, |
keir@19684 | 1858 "Ec:%ld,Em:%ld,Oc:%d,Om:%d,Nc:%d,Nm:%d,Pc:%d,Pm:%d\n", |
keir@19684 | 1859 global_eph_count, global_eph_count_max, |
keir@19684 | 1860 _atomic_read(global_obj_count), global_obj_count_max, |
keir@19684 | 1861 _atomic_read(global_rtree_node_count), global_rtree_node_count_max, |
keir@19684 | 1862 _atomic_read(global_pgp_count), global_pgp_count_max); |
keir@19684 | 1863 if ( sum + n >= len ) |
keir@19684 | 1864 return sum; |
keir@19684 | 1865 tmh_copy_to_client_buf_offset(buf,off+sum,info,n+1); |
keir@19684 | 1866 sum += n; |
keir@19684 | 1867 return sum; |
keir@19684 | 1868 } |
keir@19684 | 1869 |
keir@20067 | 1870 static int tmemc_list(cli_id_t cli_id, tmem_cli_va_t buf, uint32_t len, |
keir@19684 | 1871 bool_t use_long) |
keir@19684 | 1872 { |
keir@19684 | 1873 client_t *client; |
keir@19684 | 1874 int off = 0; |
keir@19684 | 1875 |
keir@19684 | 1876 if ( cli_id == CLI_ID_NULL ) { |
keir@19684 | 1877 off = tmemc_list_global(buf,0,len,use_long); |
keir@19684 | 1878 off += tmemc_list_shared(buf,off,len-off,use_long); |
keir@19684 | 1879 list_for_each_entry(client,&global_client_list,client_list) |
keir@19684 | 1880 off += tmemc_list_client(client, buf, off, len-off, use_long); |
keir@19684 | 1881 off += tmemc_list_global_perf(buf,off,len-off,use_long); |
keir@19684 | 1882 } |
keir@19684 | 1883 else if ( (client = tmh_client_from_cli_id(cli_id)) == NULL) |
keir@19684 | 1884 return -1; |
keir@20964 | 1885 else { |
keir@19684 | 1886 off = tmemc_list_client(client, buf, 0, len, use_long); |
keir@20964 | 1887 tmh_client_put(client->tmh); |
keir@20964 | 1888 } |
keir@19684 | 1889 |
keir@19684 | 1890 return 0; |
keir@19684 | 1891 } |
keir@19684 | 1892 |
keir@19684 | 1893 static int tmemc_set_var_one(client_t *client, uint32_t subop, uint32_t arg1) |
keir@19684 | 1894 { |
keir@19684 | 1895 cli_id_t cli_id = client->cli_id; |
keir@19684 | 1896 uint32_t old_weight; |
keir@19684 | 1897 |
keir@19684 | 1898 switch (subop) |
keir@19684 | 1899 { |
keir@19684 | 1900 case TMEMC_SET_WEIGHT: |
keir@19684 | 1901 old_weight = client->weight; |
keir@19684 | 1902 client->weight = arg1; |
keir@19684 | 1903 printk("tmem: weight set to %d for %s=%d\n",arg1,cli_id_str,cli_id); |
keir@19684 | 1904 atomic_sub(old_weight,&client_weight_total); |
keir@19684 | 1905 atomic_add(client->weight,&client_weight_total); |
keir@19684 | 1906 break; |
keir@19684 | 1907 case TMEMC_SET_CAP: |
keir@19684 | 1908 client->cap = arg1; |
keir@19684 | 1909 printk("tmem: cap set to %d for %s=%d\n",arg1,cli_id_str,cli_id); |
keir@19684 | 1910 break; |
keir@19684 | 1911 case TMEMC_SET_COMPRESS: |
keir@20067 | 1912 #ifdef __i386__ |
keir@20067 | 1913 return -1; |
keir@20067 | 1914 #endif |
keir@19684 | 1915 client->compress = arg1 ? 1 : 0; |
keir@19684 | 1916 printk("tmem: compression %s for %s=%d\n", |
keir@19684 | 1917 arg1 ? "enabled" : "disabled",cli_id_str,cli_id); |
keir@19684 | 1918 break; |
keir@19684 | 1919 default: |
keir@19684 | 1920 printk("tmem: unknown subop %d for tmemc_set_var\n",subop); |
keir@19684 | 1921 return -1; |
keir@19684 | 1922 } |
keir@19684 | 1923 return 0; |
keir@19684 | 1924 } |
keir@19684 | 1925 |
keir@20067 | 1926 static int tmemc_set_var(cli_id_t cli_id, uint32_t subop, uint32_t arg1) |
keir@19684 | 1927 { |
keir@19684 | 1928 client_t *client; |
keir@19684 | 1929 |
keir@19684 | 1930 if ( cli_id == CLI_ID_NULL ) |
keir@19684 | 1931 list_for_each_entry(client,&global_client_list,client_list) |
keir@19684 | 1932 tmemc_set_var_one(client, subop, arg1); |
keir@19684 | 1933 else if ( (client = tmh_client_from_cli_id(cli_id)) == NULL) |
keir@19684 | 1934 return -1; |
keir@19684 | 1935 else |
keir@20964 | 1936 { |
keir@20964 | 1937 tmemc_set_var_one(client, subop, arg1); |
keir@20964 | 1938 tmh_client_put(client->tmh); |
keir@20964 | 1939 } |
keir@19684 | 1940 return 0; |
keir@19684 | 1941 } |
keir@19684 | 1942 |
keir@20067 | 1943 static NOINLINE int tmemc_shared_pool_auth(cli_id_t cli_id, uint64_t uuid_lo, |
keir@20067 | 1944 uint64_t uuid_hi, bool_t auth) |
keir@20067 | 1945 { |
keir@20067 | 1946 client_t *client; |
keir@20067 | 1947 int i, free = -1; |
keir@20067 | 1948 |
keir@20067 | 1949 if ( cli_id == CLI_ID_NULL ) |
keir@20067 | 1950 { |
keir@20067 | 1951 global_shared_auth = auth; |
keir@20067 | 1952 return 1; |
keir@20067 | 1953 } |
keir@20067 | 1954 client = tmh_client_from_cli_id(cli_id); |
keir@20964 | 1955 if ( client == NULL ) |
keir@20964 | 1956 return -EINVAL; |
keir@20067 | 1957 for ( i = 0; i < MAX_GLOBAL_SHARED_POOLS; i++) |
keir@20067 | 1958 { |
keir@20067 | 1959 if ( (client->shared_auth_uuid[i][0] == uuid_lo) && |
keir@20067 | 1960 (client->shared_auth_uuid[i][1] == uuid_hi) ) |
keir@20067 | 1961 { |
keir@20067 | 1962 if ( auth == 0 ) |
keir@20067 | 1963 client->shared_auth_uuid[i][0] = |
keir@20067 | 1964 client->shared_auth_uuid[i][1] = -1L; |
keir@20964 | 1965 tmh_client_put(client->tmh); |
keir@20067 | 1966 return 1; |
keir@20067 | 1967 } |
keir@20067 | 1968 if ( (auth == 1) && (client->shared_auth_uuid[i][0] == -1L) && |
keir@20067 | 1969 (client->shared_auth_uuid[i][1] == -1L) && (free == -1) ) |
keir@20067 | 1970 free = i; |
keir@20067 | 1971 } |
keir@20067 | 1972 if ( auth == 0 ) |
keir@20964 | 1973 { |
keir@20964 | 1974 tmh_client_put(client->tmh); |
keir@20067 | 1975 return 0; |
keir@20964 | 1976 } |
keir@20067 | 1977 if ( auth == 1 && free == -1 ) |
keir@20067 | 1978 return -ENOMEM; |
keir@20067 | 1979 client->shared_auth_uuid[free][0] = uuid_lo; |
keir@20067 | 1980 client->shared_auth_uuid[free][1] = uuid_hi; |
keir@20964 | 1981 tmh_client_put(client->tmh); |
keir@20067 | 1982 return 1; |
keir@20067 | 1983 } |
keir@20067 | 1984 |
keir@20067 | 1985 static NOINLINE int tmemc_save_subop(int cli_id, uint32_t pool_id, |
keir@20067 | 1986 uint32_t subop, tmem_cli_va_t buf, uint32_t arg1) |
keir@20067 | 1987 { |
keir@20067 | 1988 client_t *client = tmh_client_from_cli_id(cli_id); |
keir@20964 | 1989 pool_t *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) |
keir@20964 | 1990 ? NULL : client->pools[pool_id]; |
keir@20067 | 1991 uint32_t p; |
keir@20067 | 1992 uint64_t *uuid; |
keir@20067 | 1993 pgp_t *pgp, *pgp2; |
keir@20964 | 1994 int rc = -1; |
keir@20067 | 1995 |
keir@20067 | 1996 switch(subop) |
keir@20067 | 1997 { |
keir@20067 | 1998 case TMEMC_SAVE_BEGIN: |
keir@20067 | 1999 if ( client == NULL ) |
keir@20067 | 2000 return 0; |
keir@20067 | 2001 for (p = 0; p < MAX_POOLS_PER_DOMAIN; p++) |
keir@20067 | 2002 if ( client->pools[p] != NULL ) |
keir@20067 | 2003 break; |
keir@20067 | 2004 if ( p == MAX_POOLS_PER_DOMAIN ) |
keir@20964 | 2005 { |
keir@20964 | 2006 rc = 0; |
keir@20964 | 2007 break; |
keir@20964 | 2008 } |
keir@20067 | 2009 client->was_frozen = client->frozen; |
keir@20067 | 2010 client->frozen = 1; |
keir@20067 | 2011 if ( arg1 != 0 ) |
keir@20067 | 2012 client->live_migrating = 1; |
keir@20964 | 2013 rc = 1; |
keir@20964 | 2014 break; |
keir@20067 | 2015 case TMEMC_RESTORE_BEGIN: |
keir@20964 | 2016 if ( client == NULL && (client = client_create(cli_id)) != NULL ) |
keir@20964 | 2017 return 1; |
keir@20964 | 2018 break; |
keir@20067 | 2019 case TMEMC_SAVE_GET_VERSION: |
keir@20964 | 2020 rc = TMEM_SPEC_VERSION; |
keir@20964 | 2021 break; |
keir@20067 | 2022 case TMEMC_SAVE_GET_MAXPOOLS: |
keir@20964 | 2023 rc = MAX_POOLS_PER_DOMAIN; |
keir@20964 | 2024 break; |
keir@20067 | 2025 case TMEMC_SAVE_GET_CLIENT_WEIGHT: |
keir@20964 | 2026 rc = client->weight == -1 ? -2 : client->weight; |
keir@20964 | 2027 break; |
keir@20067 | 2028 case TMEMC_SAVE_GET_CLIENT_CAP: |
keir@20964 | 2029 rc = client->cap == -1 ? -2 : client->cap; |
keir@20964 | 2030 break; |
keir@20067 | 2031 case TMEMC_SAVE_GET_CLIENT_FLAGS: |
keir@20964 | 2032 rc = (client->compress ? TMEM_CLIENT_COMPRESS : 0 ) | |
keir@20964 | 2033 (client->was_frozen ? TMEM_CLIENT_FROZEN : 0 ); |
keir@20964 | 2034 break; |
keir@20067 | 2035 case TMEMC_SAVE_GET_POOL_FLAGS: |
keir@20067 | 2036 if ( pool == NULL ) |
keir@20964 | 2037 break; |
keir@20964 | 2038 rc = (pool->persistent ? TMEM_POOL_PERSIST : 0) | |
keir@20964 | 2039 (pool->shared ? TMEM_POOL_SHARED : 0) | |
keir@20964 | 2040 (pool->pageshift << TMEM_POOL_PAGESIZE_SHIFT); |
keir@20964 | 2041 break; |
keir@20067 | 2042 case TMEMC_SAVE_GET_POOL_NPAGES: |
keir@20067 | 2043 if ( pool == NULL ) |
keir@20964 | 2044 break; |
keir@20964 | 2045 rc = _atomic_read(pool->pgp_count); |
keir@20964 | 2046 break; |
keir@20067 | 2047 case TMEMC_SAVE_GET_POOL_UUID: |
keir@20067 | 2048 if ( pool == NULL ) |
keir@20964 | 2049 break; |
keir@20067 | 2050 uuid = (uint64_t *)buf.p; |
keir@20067 | 2051 *uuid++ = pool->uuid[0]; |
keir@20067 | 2052 *uuid = pool->uuid[1]; |
keir@20964 | 2053 rc = 0; |
keir@20067 | 2054 case TMEMC_SAVE_END: |
keir@20067 | 2055 client->live_migrating = 0; |
keir@20067 | 2056 if ( !list_empty(&client->persistent_invalidated_list) ) |
keir@20067 | 2057 list_for_each_entry_safe(pgp,pgp2, |
keir@20067 | 2058 &client->persistent_invalidated_list, client_inv_pages) |
keir@20067 | 2059 pgp_free_from_inv_list(client,pgp); |
keir@20067 | 2060 client->frozen = client->was_frozen; |
keir@20964 | 2061 rc = 0; |
keir@20067 | 2062 } |
keir@20964 | 2063 if ( client ) |
keir@20964 | 2064 tmh_client_put(client->tmh); |
keir@20964 | 2065 return rc; |
keir@20067 | 2066 } |
keir@20067 | 2067 |
keir@20067 | 2068 static NOINLINE int tmemc_save_get_next_page(int cli_id, int pool_id, |
keir@20067 | 2069 tmem_cli_va_t buf, uint32_t bufsize) |
keir@20067 | 2070 { |
keir@20067 | 2071 client_t *client = tmh_client_from_cli_id(cli_id); |
keir@20964 | 2072 pool_t *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) |
keir@20964 | 2073 ? NULL : client->pools[pool_id]; |
keir@20067 | 2074 pgp_t *pgp; |
keir@20067 | 2075 int ret = 0; |
keir@20067 | 2076 struct tmem_handle *h; |
keir@20067 | 2077 unsigned int pagesize = 1 << (pool->pageshift+12); |
keir@20067 | 2078 |
keir@20964 | 2079 if ( pool == NULL || is_ephemeral(pool) ) |
keir@20964 | 2080 { |
keir@20964 | 2081 tmh_client_put(client->tmh); |
keir@20067 | 2082 return -1; |
keir@20964 | 2083 } |
keir@20067 | 2084 if ( bufsize < pagesize + sizeof(struct tmem_handle) ) |
keir@20964 | 2085 { |
keir@20964 | 2086 tmh_client_put(client->tmh); |
keir@20067 | 2087 return -ENOMEM; |
keir@20964 | 2088 } |
keir@20067 | 2089 |
keir@20067 | 2090 tmem_spin_lock(&pers_lists_spinlock); |
keir@20067 | 2091 if ( list_empty(&pool->persistent_page_list) ) |
keir@20067 | 2092 { |
keir@20067 | 2093 ret = -1; |
keir@20067 | 2094 goto out; |
keir@20067 | 2095 } |
keir@20067 | 2096 /* note: pool->cur_pgp is the pgp last returned by get_next_page */ |
keir@20067 | 2097 if ( pool->cur_pgp == NULL ) |
keir@20067 | 2098 { |
keir@20067 | 2099 /* process the first one */ |
keir@20067 | 2100 pool->cur_pgp = pgp = list_entry((&pool->persistent_page_list)->next, |
keir@20067 | 2101 pgp_t,pool_pers_pages); |
keir@20067 | 2102 } else if ( list_is_last(&pool->cur_pgp->pool_pers_pages, |
keir@20067 | 2103 &pool->persistent_page_list) ) |
keir@20067 | 2104 { |
keir@20067 | 2105 /* already processed the last one in the list */ |
keir@20067 | 2106 ret = -1; |
keir@20067 | 2107 goto out; |
keir@20067 | 2108 } |
keir@20067 | 2109 pgp = list_entry((&pool->cur_pgp->pool_pers_pages)->next, |
keir@20067 | 2110 pgp_t,pool_pers_pages); |
keir@20067 | 2111 pool->cur_pgp = pgp; |
keir@20067 | 2112 h = (struct tmem_handle *)buf.p; |
keir@20067 | 2113 h->oid = pgp->obj->oid; |
keir@20067 | 2114 h->index = pgp->index; |
keir@20067 | 2115 buf.p = (void *)(h+1); |
keir@20067 | 2116 ret = do_tmem_get(pool, h->oid, h->index,0,0,0,pagesize,buf.p); |
keir@20067 | 2117 |
keir@20067 | 2118 out: |
keir@20067 | 2119 tmem_spin_unlock(&pers_lists_spinlock); |
keir@20964 | 2120 tmh_client_put(client->tmh); |
keir@20067 | 2121 return ret; |
keir@20067 | 2122 } |
keir@20067 | 2123 |
keir@20067 | 2124 static NOINLINE int tmemc_save_get_next_inv(int cli_id, tmem_cli_va_t buf, |
keir@20067 | 2125 uint32_t bufsize) |
keir@20067 | 2126 { |
keir@20067 | 2127 client_t *client = tmh_client_from_cli_id(cli_id); |
keir@20067 | 2128 pgp_t *pgp; |
keir@20067 | 2129 struct tmem_handle *h; |
keir@20067 | 2130 int ret = 0; |
keir@20067 | 2131 |
keir@20067 | 2132 if ( client == NULL ) |
keir@20067 | 2133 return 0; |
keir@20067 | 2134 if ( bufsize < sizeof(struct tmem_handle) ) |
keir@20964 | 2135 { |
keir@20964 | 2136 tmh_client_put(client->tmh); |
keir@20067 | 2137 return 0; |
keir@20964 | 2138 } |
keir@20067 | 2139 tmem_spin_lock(&pers_lists_spinlock); |
keir@20067 | 2140 if ( list_empty(&client->persistent_invalidated_list) ) |
keir@20067 | 2141 goto out; |
keir@20067 | 2142 if ( client->cur_pgp == NULL ) |
keir@20067 | 2143 { |
keir@20067 | 2144 pgp = list_entry((&client->persistent_invalidated_list)->next, |
keir@20067 | 2145 pgp_t,client_inv_pages); |
keir@20067 | 2146 client->cur_pgp = pgp; |
keir@20067 | 2147 } else if ( list_is_last(&client->cur_pgp->client_inv_pages, |
keir@20067 | 2148 &client->persistent_invalidated_list) ) |
keir@20067 | 2149 { |
keir@20067 | 2150 client->cur_pgp = NULL; |
keir@20067 | 2151 ret = 0; |
keir@20067 | 2152 goto out; |
keir@20067 | 2153 } else { |
keir@20067 | 2154 pgp = list_entry((&client->cur_pgp->client_inv_pages)->next, |
keir@20067 | 2155 pgp_t,client_inv_pages); |
keir@20067 | 2156 client->cur_pgp = pgp; |
keir@20067 | 2157 } |
keir@20067 | 2158 h = (struct tmem_handle *)buf.p; |
keir@20067 | 2159 h->pool_id = pgp->pool_id; |
keir@20067 | 2160 h->oid = pgp->inv_oid; |
keir@20067 | 2161 h->index = pgp->index; |
keir@20067 | 2162 ret = 1; |
keir@20067 | 2163 out: |
keir@20067 | 2164 tmem_spin_unlock(&pers_lists_spinlock); |
keir@20964 | 2165 tmh_client_put(client->tmh); |
keir@20067 | 2166 return ret; |
keir@20067 | 2167 } |
keir@20067 | 2168 |
keir@20067 | 2169 static int tmemc_restore_put_page(int cli_id, int pool_id, uint64_t oid, |
keir@20067 | 2170 uint32_t index, tmem_cli_va_t buf, uint32_t bufsize) |
keir@20067 | 2171 { |
keir@20067 | 2172 client_t *client = tmh_client_from_cli_id(cli_id); |
keir@20964 | 2173 pool_t *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) |
keir@20964 | 2174 ? NULL : client->pools[pool_id]; |
keir@20964 | 2175 int rc = pool ? do_tmem_put(pool,oid,index,0,0,0,bufsize,buf.p) : -1; |
keir@20067 | 2176 |
keir@20964 | 2177 if ( client ) |
keir@20964 | 2178 tmh_client_put(client->tmh); |
keir@20964 | 2179 return rc; |
keir@20067 | 2180 } |
keir@20067 | 2181 |
keir@20067 | 2182 static int tmemc_restore_flush_page(int cli_id, int pool_id, uint64_t oid, |
keir@20067 | 2183 uint32_t index) |
keir@20067 | 2184 { |
keir@20067 | 2185 client_t *client = tmh_client_from_cli_id(cli_id); |
keir@20964 | 2186 pool_t *pool = (client == NULL || pool_id >= MAX_POOLS_PER_DOMAIN) |
keir@20964 | 2187 ? NULL : client->pools[pool_id]; |
keir@20964 | 2188 int rc = pool ? do_tmem_flush_page(pool, oid, index) : -1; |
keir@20067 | 2189 |
keir@20964 | 2190 if ( client ) |
keir@20964 | 2191 tmh_client_put(client->tmh); |
keir@20964 | 2192 return rc; |
keir@20067 | 2193 } |
keir@20067 | 2194 |
keir@20067 | 2195 static NOINLINE int do_tmem_control(struct tmem_op *op) |
keir@19684 | 2196 { |
keir@19684 | 2197 int ret; |
keir@20067 | 2198 uint32_t pool_id = op->pool_id; |
keir@20067 | 2199 uint32_t subop = op->u.ctrl.subop; |
keir@19684 | 2200 |
keir@19684 | 2201 if (!tmh_current_is_privileged()) |
keir@19684 | 2202 { |
keir@19684 | 2203 /* don't fail... mystery: sometimes dom0 fails here */ |
keir@19684 | 2204 /* return -EPERM; */ |
keir@19684 | 2205 } |
keir@19684 | 2206 switch(subop) |
keir@19684 | 2207 { |
keir@19684 | 2208 case TMEMC_THAW: |
keir@19684 | 2209 case TMEMC_FREEZE: |
keir@19684 | 2210 case TMEMC_DESTROY: |
keir@20067 | 2211 ret = tmemc_freeze_pools(op->u.ctrl.cli_id,subop); |
keir@19684 | 2212 break; |
keir@19684 | 2213 case TMEMC_FLUSH: |
keir@20067 | 2214 ret = tmemc_flush_mem(op->u.ctrl.cli_id,op->u.ctrl.arg1); |
keir@19684 | 2215 break; |
keir@19684 | 2216 case TMEMC_LIST: |
keir@20067 | 2217 ret = tmemc_list(op->u.ctrl.cli_id,op->u.ctrl.buf, |
keir@20067 | 2218 op->u.ctrl.arg1,op->u.ctrl.arg2); |
keir@19684 | 2219 break; |
keir@19684 | 2220 case TMEMC_SET_WEIGHT: |
keir@19684 | 2221 case TMEMC_SET_CAP: |
keir@19684 | 2222 case TMEMC_SET_COMPRESS: |
keir@20067 | 2223 ret = tmemc_set_var(op->u.ctrl.cli_id,subop,op->u.ctrl.arg1); |
keir@20067 | 2224 break; |
keir@20079 | 2225 case TMEMC_QUERY_FREEABLE_MB: |
keir@20812 | 2226 ret = tmh_freeable_pages() >> (20 - PAGE_SHIFT); |
keir@20079 | 2227 break; |
keir@20067 | 2228 case TMEMC_SAVE_BEGIN: |
keir@20067 | 2229 case TMEMC_RESTORE_BEGIN: |
keir@20067 | 2230 case TMEMC_SAVE_GET_VERSION: |
keir@20067 | 2231 case TMEMC_SAVE_GET_MAXPOOLS: |
keir@20067 | 2232 case TMEMC_SAVE_GET_CLIENT_WEIGHT: |
keir@20067 | 2233 case TMEMC_SAVE_GET_CLIENT_CAP: |
keir@20067 | 2234 case TMEMC_SAVE_GET_CLIENT_FLAGS: |
keir@20067 | 2235 case TMEMC_SAVE_GET_POOL_FLAGS: |
keir@20067 | 2236 case TMEMC_SAVE_GET_POOL_NPAGES: |
keir@20067 | 2237 case TMEMC_SAVE_GET_POOL_UUID: |
keir@20067 | 2238 case TMEMC_SAVE_END: |
keir@20067 | 2239 ret = tmemc_save_subop(op->u.ctrl.cli_id,pool_id,subop, |
keir@20067 | 2240 op->u.ctrl.buf,op->u.ctrl.arg1); |
keir@20067 | 2241 break; |
keir@20067 | 2242 case TMEMC_SAVE_GET_NEXT_PAGE: |
keir@20067 | 2243 ret = tmemc_save_get_next_page(op->u.ctrl.cli_id, pool_id, |
keir@20067 | 2244 op->u.ctrl.buf, op->u.ctrl.arg1); |
keir@20067 | 2245 break; |
keir@20067 | 2246 case TMEMC_SAVE_GET_NEXT_INV: |
keir@20067 | 2247 ret = tmemc_save_get_next_inv(op->u.ctrl.cli_id, op->u.ctrl.buf, |
keir@20067 | 2248 op->u.ctrl.arg1); |
keir@20067 | 2249 break; |
keir@20067 | 2250 case TMEMC_RESTORE_PUT_PAGE: |
keir@20067 | 2251 ret = tmemc_restore_put_page(op->u.ctrl.cli_id,pool_id, |
keir@20067 | 2252 op->u.ctrl.arg3, op->u.ctrl.arg2, |
keir@20067 | 2253 op->u.ctrl.buf, op->u.ctrl.arg1); |
keir@20067 | 2254 break; |
keir@20067 | 2255 case TMEMC_RESTORE_FLUSH_PAGE: |
keir@20067 | 2256 ret = tmemc_restore_flush_page(op->u.ctrl.cli_id,pool_id, |
keir@20067 | 2257 op->u.ctrl.arg3, op->u.ctrl.arg2); |
keir@19684 | 2258 break; |
keir@19684 | 2259 default: |
keir@19684 | 2260 ret = -1; |
keir@19684 | 2261 } |
keir@19684 | 2262 return ret; |
keir@19684 | 2263 } |
keir@19684 | 2264 |
keir@19684 | 2265 /************ EXPORTed FUNCTIONS **************************************/ |
keir@19684 | 2266 |
keir@19684 | 2267 EXPORT long do_tmem_op(tmem_cli_op_t uops) |
keir@19684 | 2268 { |
keir@19684 | 2269 struct tmem_op op; |
keir@19684 | 2270 client_t *client = tmh_client_from_current(); |
keir@19684 | 2271 pool_t *pool = NULL; |
keir@19684 | 2272 int rc = 0; |
keir@19684 | 2273 bool_t succ_get = 0, succ_put = 0; |
keir@19684 | 2274 bool_t non_succ_get = 0, non_succ_put = 0; |
keir@19684 | 2275 bool_t flush = 0, flush_obj = 0; |
keir@19684 | 2276 bool_t tmem_write_lock_set = 0, tmem_read_lock_set = 0; |
keir@19684 | 2277 DECL_LOCAL_CYC_COUNTER(succ_get); |
keir@19684 | 2278 DECL_LOCAL_CYC_COUNTER(succ_put); |
keir@19684 | 2279 DECL_LOCAL_CYC_COUNTER(non_succ_get); |
keir@19684 | 2280 DECL_LOCAL_CYC_COUNTER(non_succ_put); |
keir@19684 | 2281 DECL_LOCAL_CYC_COUNTER(flush); |
keir@19684 | 2282 DECL_LOCAL_CYC_COUNTER(flush_obj); |
keir@19684 | 2283 |
keir@19684 | 2284 if ( !tmem_initialized ) |
keir@19684 | 2285 return -ENODEV; |
keir@19684 | 2286 |
keir@19684 | 2287 total_tmem_ops++; |
keir@19684 | 2288 |
keir@19684 | 2289 if ( tmh_lock_all ) |
keir@19684 | 2290 { |
keir@19684 | 2291 if ( tmh_lock_all > 1 ) |
keir@19684 | 2292 spin_lock_irq(&tmem_spinlock); |
keir@19684 | 2293 else |
keir@19684 | 2294 spin_lock(&tmem_spinlock); |
keir@19684 | 2295 } |
keir@19684 | 2296 |
keir@19684 | 2297 START_CYC_COUNTER(succ_get); |
keir@19684 | 2298 DUP_START_CYC_COUNTER(succ_put,succ_get); |
keir@19684 | 2299 DUP_START_CYC_COUNTER(non_succ_get,succ_get); |
keir@19684 | 2300 DUP_START_CYC_COUNTER(non_succ_put,succ_get); |
keir@19684 | 2301 DUP_START_CYC_COUNTER(flush,succ_get); |
keir@19684 | 2302 DUP_START_CYC_COUNTER(flush_obj,succ_get); |
keir@19684 | 2303 |
keir@20499 | 2304 if ( client != NULL && tmh_client_is_dying(client) ) |
keir@20499 | 2305 { |
keir@20499 | 2306 rc = -ENODEV; |
keir@20499 | 2307 goto out; |
keir@20499 | 2308 } |
keir@20499 | 2309 |
keir@19684 | 2310 if ( unlikely(tmh_get_tmemop_from_client(&op, uops) != 0) ) |
keir@19684 | 2311 { |
keir@19684 | 2312 printk("tmem: can't get tmem struct from %s\n",client_str); |
keir@19684 | 2313 rc = -EFAULT; |
keir@19684 | 2314 goto out; |
keir@19684 | 2315 } |
keir@19684 | 2316 |
keir@19684 | 2317 if ( op.cmd == TMEM_CONTROL ) |
keir@19684 | 2318 { |
keir@19684 | 2319 tmem_write_lock(&tmem_rwlock); |
keir@19684 | 2320 tmem_write_lock_set = 1; |
keir@20067 | 2321 rc = do_tmem_control(&op); |
keir@20067 | 2322 goto out; |
keir@20067 | 2323 } else if ( op.cmd == TMEM_AUTH ) { |
keir@20067 | 2324 tmem_write_lock(&tmem_rwlock); |
keir@20067 | 2325 tmem_write_lock_set = 1; |
keir@20067 | 2326 rc = tmemc_shared_pool_auth(op.u.new.arg1,op.u.new.uuid[0], |
keir@20067 | 2327 op.u.new.uuid[1],op.u.new.flags); |
keir@20067 | 2328 goto out; |
keir@20067 | 2329 } else if ( op.cmd == TMEM_RESTORE_NEW ) { |
keir@20067 | 2330 tmem_write_lock(&tmem_rwlock); |
keir@20067 | 2331 tmem_write_lock_set = 1; |
keir@20067 | 2332 rc = do_tmem_new_pool(op.u.new.arg1, op.pool_id, op.u.new.flags, |
keir@20067 | 2333 op.u.new.uuid[0], op.u.new.uuid[1]); |
keir@19684 | 2334 goto out; |
keir@19684 | 2335 } |
keir@19684 | 2336 |
keir@19684 | 2337 /* create per-client tmem structure dynamically on first use by client */ |
keir@19684 | 2338 if ( client == NULL ) |
keir@19684 | 2339 { |
keir@19684 | 2340 tmem_write_lock(&tmem_rwlock); |
keir@19684 | 2341 tmem_write_lock_set = 1; |
keir@20067 | 2342 if ( (client = client_create(tmh_get_cli_id_from_current())) == NULL ) |
keir@19684 | 2343 { |
keir@19684 | 2344 printk("tmem: can't create tmem structure for %s\n",client_str); |
keir@19684 | 2345 rc = -ENOMEM; |
keir@19684 | 2346 goto out; |
keir@19684 | 2347 } |
keir@19684 | 2348 } |
keir@19684 | 2349 |
keir@19734 | 2350 if ( op.cmd == TMEM_NEW_POOL || op.cmd == TMEM_DESTROY_POOL ) |
keir@19684 | 2351 { |
keir@19684 | 2352 if ( !tmem_write_lock_set ) |
keir@19684 | 2353 { |
keir@19684 | 2354 tmem_write_lock(&tmem_rwlock); |
keir@19684 | 2355 tmem_write_lock_set = 1; |
keir@19684 | 2356 } |
keir@19684 | 2357 } |
keir@19684 | 2358 else |
keir@19684 | 2359 { |
keir@19684 | 2360 if ( !tmem_write_lock_set ) |
keir@19684 | 2361 { |
keir@19684 | 2362 tmem_read_lock(&tmem_rwlock); |
keir@19684 | 2363 tmem_read_lock_set = 1; |
keir@19684 | 2364 } |
keir@19684 | 2365 if ( ((uint32_t)op.pool_id >= MAX_POOLS_PER_DOMAIN) || |
keir@19684 | 2366 ((pool = client->pools[op.pool_id]) == NULL) ) |
keir@19684 | 2367 { |
keir@19684 | 2368 rc = -ENODEV; |
keir@19684 | 2369 printk("tmem: operation requested on uncreated pool\n"); |
keir@19684 | 2370 goto out; |
keir@19684 | 2371 } |
keir@19684 | 2372 ASSERT_SENTINEL(pool,POOL); |
keir@19684 | 2373 } |
keir@19684 | 2374 |
keir@19684 | 2375 switch ( op.cmd ) |
keir@19684 | 2376 { |
keir@19684 | 2377 case TMEM_NEW_POOL: |
keir@20067 | 2378 rc = do_tmem_new_pool(CLI_ID_NULL, 0, op.u.new.flags, |
keir@19809 | 2379 op.u.new.uuid[0], op.u.new.uuid[1]); |
keir@19684 | 2380 break; |
keir@19684 | 2381 case TMEM_NEW_PAGE: |
keir@20648 | 2382 tmem_ensure_avail_pages(); |
keir@20067 | 2383 rc = do_tmem_put(pool, op.u.gen.object, |
keir@20067 | 2384 op.u.gen.index, op.u.gen.cmfn, 0, 0, 0, NULL); |
keir@19684 | 2385 break; |
keir@19684 | 2386 case TMEM_PUT_PAGE: |
keir@20648 | 2387 tmem_ensure_avail_pages(); |
keir@20067 | 2388 rc = do_tmem_put(pool, op.u.gen.object, |
keir@20067 | 2389 op.u.gen.index, op.u.gen.cmfn, 0, 0, PAGE_SIZE, NULL); |
keir@19684 | 2390 if (rc == 1) succ_put = 1; |
keir@19684 | 2391 else non_succ_put = 1; |
keir@19684 | 2392 break; |
keir@19684 | 2393 case TMEM_GET_PAGE: |
keir@19809 | 2394 rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn, |
keir@20067 | 2395 0, 0, PAGE_SIZE, 0); |
keir@19684 | 2396 if (rc == 1) succ_get = 1; |
keir@19684 | 2397 else non_succ_get = 1; |
keir@19684 | 2398 break; |
keir@19684 | 2399 case TMEM_FLUSH_PAGE: |
keir@19684 | 2400 flush = 1; |
keir@19809 | 2401 rc = do_tmem_flush_page(pool, op.u.gen.object, op.u.gen.index); |
keir@19684 | 2402 break; |
keir@19684 | 2403 case TMEM_FLUSH_OBJECT: |
keir@19809 | 2404 rc = do_tmem_flush_object(pool, op.u.gen.object); |
keir@19684 | 2405 flush_obj = 1; |
keir@19684 | 2406 break; |
keir@19684 | 2407 case TMEM_DESTROY_POOL: |
keir@19684 | 2408 flush = 1; |
keir@19684 | 2409 rc = do_tmem_destroy_pool(op.pool_id); |
keir@19684 | 2410 break; |
keir@19684 | 2411 case TMEM_READ: |
keir@19809 | 2412 rc = do_tmem_get(pool, op.u.gen.object, op.u.gen.index, op.u.gen.cmfn, |
keir@19809 | 2413 op.u.gen.tmem_offset, op.u.gen.pfn_offset, |
keir@20067 | 2414 op.u.gen.len,0); |
keir@19684 | 2415 break; |
keir@19684 | 2416 case TMEM_WRITE: |
keir@20067 | 2417 rc = do_tmem_put(pool, op.u.gen.object, |
keir@20067 | 2418 op.u.gen.index, op.u.gen.cmfn, |
keir@19809 | 2419 op.u.gen.tmem_offset, op.u.gen.pfn_offset, |
keir@20067 | 2420 op.u.gen.len, NULL); |
keir@19684 | 2421 break; |
keir@19684 | 2422 case TMEM_XCHG: |
keir@19684 | 2423 /* need to hold global lock to ensure xchg is atomic */ |
keir@19684 | 2424 printk("tmem_xchg op not implemented yet\n"); |
keir@19684 | 2425 rc = 0; |
keir@19684 | 2426 break; |
keir@19684 | 2427 default: |
keir@19684 | 2428 printk("tmem: op %d not implemented\n", op.cmd); |
keir@19684 | 2429 rc = 0; |
keir@19684 | 2430 break; |
keir@19684 | 2431 } |
keir@19684 | 2432 |
keir@19684 | 2433 out: |
keir@19684 | 2434 if ( rc < 0 ) |
keir@19684 | 2435 errored_tmem_ops++; |
keir@19684 | 2436 if ( succ_get ) |
keir@19897 | 2437 END_CYC_COUNTER_CLI(succ_get,client); |
keir@19684 | 2438 else if ( succ_put ) |
keir@19897 | 2439 END_CYC_COUNTER_CLI(succ_put,client); |
keir@19684 | 2440 else if ( non_succ_get ) |
keir@19897 | 2441 END_CYC_COUNTER_CLI(non_succ_get,client); |
keir@19684 | 2442 else if ( non_succ_put ) |
keir@19897 | 2443 END_CYC_COUNTER_CLI(non_succ_put,client); |
keir@19684 | 2444 else if ( flush ) |
keir@19897 | 2445 END_CYC_COUNTER_CLI(flush,client); |
keir@19897 | 2446 else if ( flush_obj ) |
keir@19897 | 2447 END_CYC_COUNTER_CLI(flush_obj,client); |
keir@19684 | 2448 |
keir@19684 | 2449 if ( tmh_lock_all ) |
keir@19684 | 2450 { |
keir@19684 | 2451 if ( tmh_lock_all > 1 ) |
keir@19684 | 2452 spin_unlock_irq(&tmem_spinlock); |
keir@19684 | 2453 else |
keir@19684 | 2454 spin_unlock(&tmem_spinlock); |
keir@19684 | 2455 } else { |
keir@19684 | 2456 if ( tmem_write_lock_set ) |
keir@19684 | 2457 write_unlock(&tmem_rwlock); |
keir@19684 | 2458 else if ( tmem_read_lock_set ) |
keir@19684 | 2459 read_unlock(&tmem_rwlock); |
keir@19684 | 2460 else |
keir@19684 | 2461 ASSERT(0); |
keir@19684 | 2462 } |
keir@19684 | 2463 |
keir@19684 | 2464 return rc; |
keir@19684 | 2465 } |
keir@19684 | 2466 |
keir@19684 | 2467 /* this should be called when the host is destroying a client */ |
keir@19684 | 2468 EXPORT void tmem_destroy(void *v) |
keir@19684 | 2469 { |
keir@19684 | 2470 client_t *client = (client_t *)v; |
keir@19684 | 2471 |
keir@19724 | 2472 if ( client == NULL ) |
keir@19724 | 2473 return; |
keir@19724 | 2474 |
keir@20499 | 2475 if ( !tmh_client_is_dying(client) ) |
keir@20499 | 2476 { |
keir@20499 | 2477 printk("tmem: tmem_destroy can only destroy dying client\n"); |
keir@20499 | 2478 return; |
keir@20499 | 2479 } |
keir@20499 | 2480 |
keir@19684 | 2481 if ( tmh_lock_all ) |
keir@19684 | 2482 spin_lock(&tmem_spinlock); |
keir@19684 | 2483 else |
keir@19684 | 2484 write_lock(&tmem_rwlock); |
keir@19684 | 2485 |
keir@19724 | 2486 printk("tmem: flushing tmem pools for %s=%d\n", |
keir@19724 | 2487 cli_id_str, client->cli_id); |
keir@19724 | 2488 client_flush(client, 1); |
keir@19684 | 2489 |
keir@19684 | 2490 if ( tmh_lock_all ) |
keir@19684 | 2491 spin_unlock(&tmem_spinlock); |
keir@19684 | 2492 else |
keir@19684 | 2493 write_unlock(&tmem_rwlock); |
keir@19684 | 2494 } |
keir@19684 | 2495 |
keir@19684 | 2496 /* freezing all pools guarantees that no additional memory will be consumed */ |
keir@19684 | 2497 EXPORT void tmem_freeze_all(unsigned char key) |
keir@19684 | 2498 { |
keir@19684 | 2499 static int freeze = 0; |
keir@19684 | 2500 |
keir@19684 | 2501 if ( tmh_lock_all ) |
keir@19684 | 2502 spin_lock(&tmem_spinlock); |
keir@19684 | 2503 else |
keir@19684 | 2504 write_lock(&tmem_rwlock); |
keir@19684 | 2505 |
keir@19684 | 2506 freeze = !freeze; |
keir@19684 | 2507 tmemc_freeze_pools(CLI_ID_NULL,freeze); |
keir@19684 | 2508 |
keir@19684 | 2509 if ( tmh_lock_all ) |
keir@19684 | 2510 spin_unlock(&tmem_spinlock); |
keir@19684 | 2511 else |
keir@19684 | 2512 write_unlock(&tmem_rwlock); |
keir@19684 | 2513 } |
keir@19684 | 2514 |
keir@19684 | 2515 #define MAX_EVICTS 10 /* should be variable or set via TMEMC_ ?? */ |
keir@19684 | 2516 |
keir@19684 | 2517 EXPORT void *tmem_relinquish_pages(unsigned int order, unsigned int memflags) |
keir@19684 | 2518 { |
keir@19684 | 2519 pfp_t *pfp; |
keir@19684 | 2520 unsigned long evicts_per_relinq = 0; |
keir@19684 | 2521 int max_evictions = 10; |
keir@19684 | 2522 |
keir@20812 | 2523 if (!tmh_enabled() || !tmh_freeable_pages()) |
keir@19684 | 2524 return NULL; |
keir@19684 | 2525 #ifdef __i386__ |
keir@19684 | 2526 return NULL; |
keir@19684 | 2527 #endif |
keir@19684 | 2528 |
keir@19684 | 2529 relinq_attempts++; |
keir@19684 | 2530 if ( order > 0 ) |
keir@19684 | 2531 { |
keir@19684 | 2532 printk("tmem_relinquish_page: failing order=%d\n", order); |
keir@19684 | 2533 return NULL; |
keir@19684 | 2534 } |
keir@19684 | 2535 |
keir@19684 | 2536 if ( tmh_called_from_tmem(memflags) ) |
keir@19684 | 2537 { |
keir@19684 | 2538 if ( tmh_lock_all ) |
keir@19684 | 2539 spin_lock(&tmem_spinlock); |
keir@19684 | 2540 else |
keir@19684 | 2541 read_lock(&tmem_rwlock); |
keir@19684 | 2542 } |
keir@19684 | 2543 |
keir@19684 | 2544 while ( (pfp = tmh_alloc_page(NULL,1)) == NULL ) |
keir@19684 | 2545 { |
keir@19684 | 2546 if ( (max_evictions-- <= 0) || !tmem_evict()) |
keir@19684 | 2547 break; |
keir@19684 | 2548 evicts_per_relinq++; |
keir@19684 | 2549 } |
keir@19684 | 2550 if ( evicts_per_relinq > max_evicts_per_relinq ) |
keir@19684 | 2551 max_evicts_per_relinq = evicts_per_relinq; |
keir@19684 | 2552 tmh_scrub_page(pfp, memflags); |
keir@19684 | 2553 if ( pfp != NULL ) |
keir@19684 | 2554 relinq_pgs++; |
keir@19684 | 2555 |
keir@19684 | 2556 if ( tmh_called_from_tmem(memflags) ) |
keir@19684 | 2557 { |
keir@19684 | 2558 if ( tmh_lock_all ) |
keir@19684 | 2559 spin_unlock(&tmem_spinlock); |
keir@19684 | 2560 else |
keir@19684 | 2561 read_unlock(&tmem_rwlock); |
keir@19684 | 2562 } |
keir@19684 | 2563 |
keir@19684 | 2564 return pfp; |
keir@19684 | 2565 } |
keir@19684 | 2566 |
keir@19684 | 2567 /* called at hypervisor startup */ |
keir@19684 | 2568 EXPORT void init_tmem(void) |
keir@19684 | 2569 { |
keir@19684 | 2570 if ( !tmh_enabled() ) |
keir@19684 | 2571 return; |
keir@19684 | 2572 |
keir@19684 | 2573 radix_tree_init(); |
keir@19684 | 2574 if ( tmh_init() ) |
keir@19684 | 2575 { |
keir@19684 | 2576 printk("tmem: initialized comp=%d global-lock=%d\n", |
keir@19684 | 2577 tmh_compression_enabled(), tmh_lock_all); |
keir@19684 | 2578 tmem_initialized = 1; |
keir@19684 | 2579 } |
keir@19684 | 2580 else |
keir@19684 | 2581 printk("tmem: initialization FAILED\n"); |
keir@19684 | 2582 } |
keir@19684 | 2583 |
keir@19684 | 2584 /* |
keir@19684 | 2585 * Local variables: |
keir@19684 | 2586 * mode: C |
keir@19684 | 2587 * c-set-style: "BSD" |
keir@19684 | 2588 * c-basic-offset: 4 |
keir@19684 | 2589 * tab-width: 4 |
keir@19684 | 2590 * indent-tabs-mode: nil |
keir@19684 | 2591 * End: |
keir@19684 | 2592 */ |