debuggers.hg

annotate xen/include/xen/tmem_xen.h @ 20964:a3fa6d444b25

Fix domain reference leaks

Besides two unlikely/rarely hit ones in x86 code, the main offender
was tmh_client_from_cli_id(), which didn't even have a counterpart
(albeit it had a comment correctly saying that it causes d->refcnt to
get incremented). Unfortunately(?) this required a bit of code
restructuring (as I needed to change the code anyway, I also fixed
a couple os missing bounds checks which would sooner or later be
reported as security vulnerabilities), so I would hope Dan could give
it his blessing before it gets applied.

Signed-off-by: Jan Beulich <jbeulich@novell.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Feb 10 09:18:43 2010 +0000 (2010-02-10)
parents 277bfc2d47b1
children 61372a4f4e76
rev   line source
keir@19684 1 /******************************************************************************
keir@19684 2 * tmem_xen.h
keir@19684 3 *
keir@19684 4 * Xen-specific Transcendent memory
keir@19684 5 *
keir@19684 6 * Copyright (c) 2009, Dan Magenheimer, Oracle Corp.
keir@19684 7 */
keir@19684 8
keir@19684 9 #ifndef __XEN_TMEM_XEN_H__
keir@19684 10 #define __XEN_TMEM_XEN_H__
keir@19684 11
keir@19684 12 #include <xen/config.h>
keir@19684 13 #include <xen/mm.h> /* heap alloc/free */
keir@19684 14 #include <xen/xmalloc.h> /* xmalloc/xfree */
keir@19684 15 #include <xen/sched.h> /* struct domain */
keir@19684 16 #include <xen/guest_access.h> /* copy_from_guest */
keir@19684 17 #include <xen/hash.h> /* hash_long */
keir@19684 18 #include <public/tmem.h>
keir@19809 19 #ifdef CONFIG_COMPAT
keir@19809 20 #include <compat/tmem.h>
keir@19809 21 #endif
keir@19684 22
keir@19684 23 struct tmem_host_dependent_client {
keir@19684 24 struct domain *domain;
keir@19684 25 struct xmem_pool *persistent_pool;
keir@19684 26 };
keir@19684 27 typedef struct tmem_host_dependent_client tmh_client_t;
keir@19684 28
keir@19684 29 #define IS_PAGE_ALIGNED(addr) \
keir@19684 30 ((void *)((((unsigned long)addr + (PAGE_SIZE - 1)) & PAGE_MASK)) == addr)
keir@19684 31 #define IS_VALID_PAGE(_pi) ( mfn_valid(page_to_mfn(_pi)) )
keir@19684 32
keir@19684 33 extern struct xmem_pool *tmh_mempool;
keir@19684 34 extern unsigned int tmh_mempool_maxalloc;
keir@19684 35 extern struct page_list_head tmh_page_list;
keir@19684 36 extern spinlock_t tmh_page_list_lock;
keir@19684 37 extern unsigned long tmh_page_list_pages;
keir@20079 38 extern atomic_t freeable_page_count;
keir@19684 39
keir@19684 40 extern spinlock_t tmem_lock;
keir@19684 41 extern spinlock_t tmem_spinlock;
keir@19684 42 extern rwlock_t tmem_rwlock;
keir@19684 43
keir@19684 44 extern void tmh_copy_page(char *to, char*from);
keir@19684 45 extern int tmh_init(void);
keir@19684 46 #define tmh_hash hash_long
keir@19684 47
keir@19684 48 extern void tmh_release_avail_pages_to_host(void);
keir@19684 49 extern void tmh_scrub_page(struct page_info *pi, unsigned int memflags);
keir@19684 50
keir@19684 51 extern int opt_tmem_compress;
keir@19684 52 static inline int tmh_compression_enabled(void)
keir@19684 53 {
keir@19684 54 return opt_tmem_compress;
keir@19684 55 }
keir@19684 56
keir@20067 57 extern int opt_tmem_shared_auth;
keir@20067 58 static inline int tmh_shared_auth(void)
keir@20067 59 {
keir@20067 60 return opt_tmem_shared_auth;
keir@20067 61 }
keir@20067 62
keir@19684 63 extern int opt_tmem;
keir@19684 64 static inline int tmh_enabled(void)
keir@19684 65 {
keir@19684 66 return opt_tmem;
keir@19684 67 }
keir@19684 68
keir@19684 69 extern int opt_tmem_lock;
keir@19684 70
keir@19684 71 extern int opt_tmem_flush_dups;
keir@19684 72
keir@19684 73 /*
keir@19684 74 * Memory free page list management
keir@19684 75 */
keir@19684 76
keir@19684 77 static inline struct page_info *tmh_page_list_get(void)
keir@19684 78 {
keir@19684 79 struct page_info *pi;
keir@19684 80
keir@19684 81 spin_lock(&tmh_page_list_lock);
keir@19684 82 if ( (pi = page_list_remove_head(&tmh_page_list)) != NULL )
keir@19684 83 tmh_page_list_pages--;
keir@19684 84 spin_unlock(&tmh_page_list_lock);
keir@19684 85 ASSERT((pi == NULL) || IS_VALID_PAGE(pi));
keir@19684 86 return pi;
keir@19684 87 }
keir@19684 88
keir@19684 89 static inline void tmh_page_list_put(struct page_info *pi)
keir@19684 90 {
keir@19684 91 ASSERT(IS_VALID_PAGE(pi));
keir@19684 92 spin_lock(&tmh_page_list_lock);
keir@19684 93 page_list_add(pi, &tmh_page_list);
keir@19684 94 tmh_page_list_pages++;
keir@19684 95 spin_unlock(&tmh_page_list_lock);
keir@19684 96 }
keir@19684 97
keir@19684 98 static inline unsigned long tmh_avail_pages(void)
keir@19684 99 {
keir@19684 100 return tmh_page_list_pages;
keir@19684 101 }
keir@19684 102
keir@19684 103 /*
keir@20079 104 * Memory allocation for persistent data
keir@19684 105 */
keir@19684 106
keir@19684 107 static inline bool_t domain_fully_allocated(struct domain *d)
keir@19684 108 {
keir@19684 109 return ( d->tot_pages >= d->max_pages );
keir@19684 110 }
keir@19684 111 #define tmh_client_memory_fully_allocated(_pool) \
keir@19684 112 domain_fully_allocated(_pool->client->tmh->domain)
keir@19684 113
keir@19684 114 static inline void *_tmh_alloc_subpage_thispool(struct xmem_pool *cmem_mempool,
keir@19684 115 size_t size, size_t align)
keir@19684 116 {
keir@19684 117 #if 0
keir@19684 118 if ( d->tot_pages >= d->max_pages )
keir@19684 119 return NULL;
keir@19684 120 #endif
keir@19684 121 #ifdef __i386__
keir@19684 122 return _xmalloc(size,align);
keir@19684 123 #else
keir@19684 124 ASSERT( size < tmh_mempool_maxalloc );
keir@19684 125 if ( cmem_mempool == NULL )
keir@19684 126 return NULL;
keir@19684 127 return xmem_pool_alloc(size, cmem_mempool);
keir@19684 128 #endif
keir@19684 129 }
keir@19684 130 #define tmh_alloc_subpage_thispool(_pool, _s, _a) \
keir@19684 131 _tmh_alloc_subpage_thispool(pool->client->tmh->persistent_pool, \
keir@19684 132 _s, _a)
keir@19684 133
keir@19684 134 static inline void _tmh_free_subpage_thispool(struct xmem_pool *cmem_mempool,
keir@19684 135 void *ptr, size_t size)
keir@19684 136 {
keir@19684 137 #ifdef __i386__
keir@19684 138 xfree(ptr);
keir@19684 139 #else
keir@19684 140 ASSERT( size < tmh_mempool_maxalloc );
keir@19684 141 ASSERT( cmem_mempool != NULL );
keir@19684 142 xmem_pool_free(ptr,cmem_mempool);
keir@19684 143 #endif
keir@19684 144 }
keir@19684 145 #define tmh_free_subpage_thispool(_pool, _p, _s) \
keir@19684 146 _tmh_free_subpage_thispool(_pool->client->tmh->persistent_pool, _p, _s)
keir@19684 147
keir@19684 148 static inline struct page_info *_tmh_alloc_page_thispool(struct domain *d)
keir@19684 149 {
keir@19684 150 struct page_info *pi;
keir@19684 151
keir@19684 152 /* note that this tot_pages check is not protected by d->page_alloc_lock,
keir@19684 153 * so may race and periodically fail in donate_page or alloc_domheap_pages
keir@19684 154 * That's OK... neither is a problem, though chatty if log_lvl is set */
keir@19684 155 if ( d->tot_pages >= d->max_pages )
keir@19684 156 return NULL;
keir@19684 157
keir@19684 158 if ( tmh_page_list_pages )
keir@19684 159 {
keir@19684 160 if ( (pi = tmh_page_list_get()) != NULL )
keir@19684 161 {
keir@19684 162 if ( donate_page(d,pi,0) == 0 )
keir@19684 163 goto out;
keir@19684 164 else
keir@19684 165 tmh_page_list_put(pi);
keir@19684 166 }
keir@19684 167 }
keir@19684 168
keir@19684 169 pi = alloc_domheap_pages(d,0,MEMF_tmem);
keir@19684 170
keir@19684 171 out:
keir@19684 172 ASSERT((pi == NULL) || IS_VALID_PAGE(pi));
keir@19684 173 return pi;
keir@19684 174 }
keir@19684 175 #define tmh_alloc_page_thispool(_pool) \
keir@19684 176 _tmh_alloc_page_thispool(_pool->client->tmh->domain)
keir@19684 177
keir@19684 178 static inline void _tmh_free_page_thispool(struct page_info *pi)
keir@19684 179 {
keir@19684 180 struct domain *d = page_get_owner(pi);
keir@19684 181
keir@19684 182 ASSERT(IS_VALID_PAGE(pi));
keir@19684 183 if ( (d == NULL) || steal_page(d,pi,0) == 0 )
keir@19684 184 tmh_page_list_put(pi);
keir@19684 185 else
keir@19684 186 {
keir@19684 187 scrub_one_page(pi);
keir@19684 188 ASSERT((pi->count_info & ~(PGC_allocated | 1)) == 0);
keir@19684 189 free_domheap_pages(pi,0);
keir@19684 190 }
keir@19684 191 }
keir@19684 192 #define tmh_free_page_thispool(_pool,_pg) \
keir@19684 193 _tmh_free_page_thispool(_pg)
keir@19684 194
keir@19684 195 /*
keir@19684 196 * Memory allocation for ephemeral (non-persistent) data
keir@19684 197 */
keir@19684 198
keir@19684 199 static inline void *tmh_alloc_subpage(void *pool, size_t size,
keir@19684 200 size_t align)
keir@19684 201 {
keir@19684 202 #ifdef __i386__
keir@19684 203 ASSERT( size < PAGE_SIZE );
keir@19684 204 return _xmalloc(size, align);
keir@19684 205 #else
keir@19684 206 ASSERT( size < tmh_mempool_maxalloc );
keir@19684 207 ASSERT( tmh_mempool != NULL );
keir@19684 208 return xmem_pool_alloc(size, tmh_mempool);
keir@19684 209 #endif
keir@19684 210 }
keir@19684 211
keir@19684 212 static inline void tmh_free_subpage(void *ptr, size_t size)
keir@19684 213 {
keir@19684 214 #ifdef __i386__
keir@19684 215 ASSERT( size < PAGE_SIZE );
keir@19684 216 xfree(ptr);
keir@19684 217 #else
keir@19684 218 ASSERT( size < tmh_mempool_maxalloc );
keir@19684 219 xmem_pool_free(ptr,tmh_mempool);
keir@19684 220 #endif
keir@19684 221 }
keir@19684 222
keir@19684 223 static inline struct page_info *tmh_alloc_page(void *pool, int no_heap)
keir@19684 224 {
keir@19684 225 struct page_info *pi = tmh_page_list_get();
keir@19684 226
keir@19684 227 if ( pi == NULL && !no_heap )
keir@19684 228 pi = alloc_domheap_pages(0,0,MEMF_tmem);
keir@19684 229 ASSERT((pi == NULL) || IS_VALID_PAGE(pi));
keir@20536 230 if ( pi != NULL && !no_heap )
keir@20079 231 atomic_inc(&freeable_page_count);
keir@19684 232 return pi;
keir@19684 233 }
keir@19684 234
keir@19684 235 static inline void tmh_free_page(struct page_info *pi)
keir@19684 236 {
keir@19684 237 ASSERT(IS_VALID_PAGE(pi));
keir@19684 238 tmh_page_list_put(pi);
keir@20079 239 atomic_dec(&freeable_page_count);
keir@19684 240 }
keir@19684 241
keir@19684 242 static inline unsigned int tmem_subpage_maxsize(void)
keir@19684 243 {
keir@19684 244 return tmh_mempool_maxalloc;
keir@19684 245 }
keir@19684 246
keir@20812 247 static inline unsigned long tmh_freeable_pages(void)
keir@20079 248 {
keir@20812 249 return tmh_avail_pages() + _atomic_read(freeable_page_count);
keir@20079 250 }
keir@20079 251
keir@20648 252 static inline unsigned long tmh_free_mb(void)
keir@20648 253 {
keir@20648 254 return (tmh_avail_pages() + total_free_pages()) >> (20 - PAGE_SHIFT);
keir@20648 255 }
keir@20648 256
keir@20079 257 /*
keir@20079 258 * Memory allocation for "infrastructure" data
keir@20079 259 */
keir@20079 260
keir@20079 261 static inline void *tmh_alloc_infra(size_t size, size_t align)
keir@20079 262 {
keir@20079 263 return _xmalloc(size,align);
keir@20079 264 }
keir@20079 265
keir@20079 266 static inline void tmh_free_infra(void *p)
keir@20079 267 {
keir@20079 268 return xfree(p);
keir@20079 269 }
keir@20079 270
keir@19684 271 #define tmh_lock_all opt_tmem_lock
keir@19684 272 #define tmh_flush_dups opt_tmem_flush_dups
keir@19684 273 #define tmh_called_from_tmem(_memflags) (_memflags & MEMF_tmem)
keir@19684 274
keir@19684 275 /* "Client" (==domain) abstraction */
keir@19684 276
keir@19684 277 struct client;
keir@19684 278 typedef domid_t cli_id_t;
keir@19684 279 typedef struct domain tmh_cli_ptr_t;
keir@19684 280 typedef struct page_info pfp_t;
keir@19684 281
keir@20964 282 extern tmh_client_t *tmh_client_init(cli_id_t);
keir@20964 283 extern void tmh_client_destroy(tmh_client_t *);
keir@20964 284
keir@19684 285 /* this appears to be unreliable when a domain is being shut down */
keir@19684 286 static inline struct client *tmh_client_from_cli_id(cli_id_t cli_id)
keir@19684 287 {
keir@20499 288 struct domain *d = get_domain_by_id(cli_id); /* incs d->refcnt! */
keir@19684 289 if (d == NULL)
keir@19684 290 return NULL;
keir@19684 291 return (struct client *)(d->tmem);
keir@19684 292 }
keir@19684 293
keir@20964 294 static inline void tmh_client_put(tmh_client_t *tmh)
keir@20964 295 {
keir@20964 296 put_domain(tmh->domain);
keir@20964 297 }
keir@20964 298
keir@19684 299 static inline struct client *tmh_client_from_current(void)
keir@19684 300 {
keir@19684 301 return (struct client *)(current->domain->tmem);
keir@19684 302 }
keir@19684 303
keir@20499 304 #define tmh_client_is_dying(_client) (!!_client->tmh->domain->is_dying)
keir@20499 305
keir@19684 306 static inline cli_id_t tmh_get_cli_id_from_current(void)
keir@19684 307 {
keir@19684 308 return current->domain->domain_id;
keir@19684 309 }
keir@19684 310
keir@19684 311 static inline tmh_cli_ptr_t *tmh_get_cli_ptr_from_current(void)
keir@19684 312 {
keir@19684 313 return current->domain;
keir@19684 314 }
keir@19684 315
keir@20964 316 static inline void tmh_set_client_from_id(struct client *client,
keir@20964 317 tmh_client_t *tmh, cli_id_t cli_id)
keir@19684 318 {
keir@20067 319 struct domain *d = get_domain_by_id(cli_id);
keir@20067 320 d->tmem = client;
keir@20964 321 tmh->domain = d;
keir@19684 322 }
keir@19684 323
keir@19684 324 static inline bool_t tmh_current_is_privileged(void)
keir@19684 325 {
keir@19684 326 return IS_PRIV(current->domain);
keir@19684 327 }
keir@19684 328
keir@19684 329 /* these typedefs are in the public/tmem.h interface
keir@19684 330 typedef XEN_GUEST_HANDLE(void) cli_mfn_t;
keir@19684 331 typedef XEN_GUEST_HANDLE(char) cli_va_t;
keir@19684 332 */
keir@19800 333 typedef XEN_GUEST_HANDLE(tmem_op_t) tmem_cli_op_t;
keir@19684 334
keir@19684 335 static inline int tmh_get_tmemop_from_client(tmem_op_t *op, tmem_cli_op_t uops)
keir@19684 336 {
keir@19809 337 #ifdef CONFIG_COMPAT
keir@19809 338 if ( is_pv_32on64_vcpu(current) )
keir@19809 339 {
keir@19809 340 int rc;
keir@19809 341 enum XLAT_tmem_op_u u;
keir@19809 342 tmem_op_compat_t cop;
keir@19809 343
keir@19809 344 rc = copy_from_guest(&cop, guest_handle_cast(uops, void), 1);
keir@19809 345 if ( rc )
keir@19809 346 return rc;
keir@19809 347 switch ( cop.cmd )
keir@19809 348 {
keir@20067 349 case TMEM_NEW_POOL: u = XLAT_tmem_op_u_new; break;
keir@20067 350 case TMEM_CONTROL: u = XLAT_tmem_op_u_ctrl; break;
keir@20067 351 case TMEM_AUTH: u = XLAT_tmem_op_u_new; break;
keir@20067 352 case TMEM_RESTORE_NEW:u = XLAT_tmem_op_u_new; break;
keir@20067 353 default: u = XLAT_tmem_op_u_gen ; break;
keir@19809 354 }
keir@19809 355 #define XLAT_tmem_op_HNDL_u_ctrl_buf(_d_, _s_) \
keir@19809 356 guest_from_compat_handle((_d_)->u.ctrl.buf, (_s_)->u.ctrl.buf)
keir@19809 357 XLAT_tmem_op(op, &cop);
keir@19809 358 #undef XLAT_tmem_op_HNDL_u_ctrl_buf
keir@19809 359 return 0;
keir@19809 360 }
keir@19809 361 #endif
keir@19800 362 return copy_from_guest(op, uops, 1);
keir@19684 363 }
keir@19684 364
keir@19684 365 static inline void tmh_copy_to_client_buf_offset(tmem_cli_va_t clibuf, int off,
keir@19684 366 char *tmembuf, int len)
keir@19684 367 {
keir@19684 368 copy_to_guest_offset(clibuf,off,tmembuf,len);
keir@19684 369 }
keir@19684 370
keir@19684 371 #define TMH_CLI_ID_NULL ((cli_id_t)((domid_t)-1L))
keir@19684 372
keir@19684 373 #define tmh_cli_id_str "domid"
keir@19684 374 #define tmh_client_str "domain"
keir@19684 375
keir@20067 376 extern int tmh_decompress_to_client(tmem_cli_mfn_t,void*,size_t,void*);
keir@19684 377
keir@20067 378 extern int tmh_compress_from_client(tmem_cli_mfn_t,void**,size_t *,void*);
keir@19684 379
keir@19684 380 extern int tmh_copy_from_client(pfp_t *pfp,
keir@19684 381 tmem_cli_mfn_t cmfn, uint32_t tmem_offset,
keir@20067 382 uint32_t pfn_offset, uint32_t len, void *cva);
keir@19684 383
keir@19684 384 extern int tmh_copy_to_client(tmem_cli_mfn_t cmfn, pfp_t *pfp,
keir@20067 385 uint32_t tmem_offset, uint32_t pfn_offset, uint32_t len, void *cva);
keir@19684 386
keir@19684 387
keir@19684 388 #define TMEM_PERF
keir@19684 389 #ifdef TMEM_PERF
keir@19684 390 #define DECL_CYC_COUNTER(x) \
keir@19684 391 uint64_t x##_sum_cycles = 0, x##_count = 0; \
keir@19684 392 uint32_t x##_min_cycles = 0x7fffffff, x##_max_cycles = 0;
keir@19684 393 #define EXTERN_CYC_COUNTER(x) \
keir@19684 394 extern uint64_t x##_sum_cycles, x##_count; \
keir@19684 395 extern uint32_t x##_min_cycles, x##_max_cycles;
keir@19684 396 #define DECL_LOCAL_CYC_COUNTER(x) \
keir@19684 397 int64_t x##_start = 0
keir@19684 398 #define START_CYC_COUNTER(x) x##_start = get_cycles()
keir@19684 399 #define DUP_START_CYC_COUNTER(x,y) x##_start = y##_start
keir@19684 400 /* following might race, but since its advisory only, don't care */
keir@19684 401 #define END_CYC_COUNTER(x) \
keir@19684 402 do { \
keir@19684 403 x##_start = get_cycles() - x##_start; \
keir@19684 404 if (x##_start > 0 && x##_start < 1000000000) { \
keir@19684 405 x##_sum_cycles += x##_start; x##_count++; \
keir@19684 406 if ((uint32_t)x##_start < x##_min_cycles) x##_min_cycles = x##_start; \
keir@19684 407 if ((uint32_t)x##_start > x##_max_cycles) x##_max_cycles = x##_start; \
keir@19684 408 } \
keir@19684 409 } while (0)
keir@19897 410 #define END_CYC_COUNTER_CLI(x,y) \
keir@19897 411 do { \
keir@19897 412 x##_start = get_cycles() - x##_start; \
keir@19897 413 if (x##_start > 0 && x##_start < 1000000000) { \
keir@19897 414 x##_sum_cycles += x##_start; x##_count++; \
keir@19897 415 if ((uint32_t)x##_start < x##_min_cycles) x##_min_cycles = x##_start; \
keir@19897 416 if ((uint32_t)x##_start > x##_max_cycles) x##_max_cycles = x##_start; \
keir@19897 417 y->total_cycles += x##_start; \
keir@19897 418 } \
keir@19897 419 } while (0)
keir@19684 420 #define RESET_CYC_COUNTER(x) { x##_sum_cycles = 0, x##_count = 0; \
keir@19684 421 x##_min_cycles = 0x7fffffff, x##_max_cycles = 0; }
keir@19684 422 #define SCNPRINTF_CYC_COUNTER(buf,size,x,tag) \
keir@19684 423 scnprintf(buf,size, \
keir@19684 424 tag"n:%"PRIu64","tag"t:%"PRIu64","tag"x:%"PRId32","tag"m:%"PRId32",", \
keir@19684 425 x##_count,x##_sum_cycles,x##_max_cycles,x##_min_cycles)
keir@19684 426 #else
keir@19684 427 #define DECL_CYC_COUNTER(x)
keir@19684 428 #define EXTERN_CYC_COUNTER(x) \
keir@19684 429 extern uint64_t x##_sum_cycles, x##_count; \
keir@19684 430 extern uint32_t x##_min_cycles, x##_max_cycles;
keir@19684 431 #define DECL_LOCAL_CYC_COUNTER(x) do { } while (0)
keir@19684 432 #define START_CYC_COUNTER(x) do { } while (0)
keir@19684 433 #define DUP_START_CYC_COUNTER(x) do { } while (0)
keir@19684 434 #define END_CYC_COUNTER(x) do { } while (0)
keir@19684 435 #define SCNPRINTF_CYC_COUNTER(buf,size,x,tag) (0)
keir@19684 436 #define RESET_CYC_COUNTER(x) do { } while (0)
keir@19684 437 #endif
keir@19684 438
keir@19684 439 #endif /* __XEN_TMEM_XEN_H__ */