Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/mm/mem_sharing.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * arch/x86/mm/mem_sharing.c
3
 *
4
 * Memory sharing support.
5
 *
6
 * Copyright (c) 2011 GridCentric, Inc. (Adin Scannell & Andres Lagar-Cavilla)
7
 * Copyright (c) 2009 Citrix Systems, Inc. (Grzegorz Milos)
8
 *
9
 * This program is free software; you can redistribute it and/or modify
10
 * it under the terms of the GNU General Public License as published by
11
 * the Free Software Foundation; either version 2 of the License, or
12
 * (at your option) any later version.
13
 *
14
 * This program is distributed in the hope that it will be useful,
15
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17
 * GNU General Public License for more details.
18
 *
19
 * You should have received a copy of the GNU General Public License
20
 * along with this program; If not, see <http://www.gnu.org/licenses/>.
21
 */
22
23
#include <xen/types.h>
24
#include <xen/domain_page.h>
25
#include <xen/spinlock.h>
26
#include <xen/rwlock.h>
27
#include <xen/mm.h>
28
#include <xen/grant_table.h>
29
#include <xen/sched.h>
30
#include <xen/rcupdate.h>
31
#include <xen/guest_access.h>
32
#include <xen/vm_event.h>
33
#include <asm/page.h>
34
#include <asm/string.h>
35
#include <asm/p2m.h>
36
#include <asm/altp2m.h>
37
#include <asm/atomic.h>
38
#include <asm/event.h>
39
#include <xsm/xsm.h>
40
41
#include "mm-locks.h"
42
43
static shr_handle_t next_handle = 1;
44
45
typedef struct pg_lock_data {
46
    int mm_unlock_level;
47
    unsigned short recurse_count;
48
} pg_lock_data_t;
49
50
static DEFINE_PER_CPU(pg_lock_data_t, __pld);
51
52
#define MEM_SHARING_DEBUG(_f, _a...)                                  \
53
0
    debugtrace_printk("mem_sharing_debug: %s(): " _f, __func__, ##_a)
54
55
/* Reverse map defines */
56
0
#define RMAP_HASHTAB_ORDER  0
57
#define RMAP_HASHTAB_SIZE   \
58
0
        ((PAGE_SIZE << RMAP_HASHTAB_ORDER) / sizeof(struct list_head))
59
#define RMAP_USES_HASHTAB(page) \
60
0
        ((page)->sharing->hash_table.flag == NULL)
61
0
#define RMAP_HEAVY_SHARED_PAGE   RMAP_HASHTAB_SIZE
62
/* A bit of hysteresis. We don't want to be mutating between list and hash
63
 * table constantly. */
64
0
#define RMAP_LIGHT_SHARED_PAGE   (RMAP_HEAVY_SHARED_PAGE >> 2)
65
66
#if MEM_SHARING_AUDIT
67
68
static struct list_head shr_audit_list;
69
static spinlock_t shr_audit_lock;
70
static DEFINE_RCU_READ_LOCK(shr_audit_read_lock);
71
72
/* RCU delayed free of audit list entry */
73
static void _free_pg_shared_info(struct rcu_head *head)
74
0
{
75
0
    xfree(container_of(head, struct page_sharing_info, rcu_head));
76
0
}
77
78
static inline void audit_add_list(struct page_info *page)
79
0
{
80
0
    INIT_LIST_HEAD(&page->sharing->entry);
81
0
    spin_lock(&shr_audit_lock);
82
0
    list_add_rcu(&page->sharing->entry, &shr_audit_list);
83
0
    spin_unlock(&shr_audit_lock);
84
0
}
85
86
/* Removes from the audit list and cleans up the page sharing metadata. */
87
static inline void page_sharing_dispose(struct page_info *page)
88
0
{
89
0
    /* Unlikely given our thresholds, but we should be careful. */
90
0
    if ( unlikely(RMAP_USES_HASHTAB(page)) )
91
0
        free_xenheap_pages(page->sharing->hash_table.bucket, 
92
0
                            RMAP_HASHTAB_ORDER);
93
0
94
0
    spin_lock(&shr_audit_lock);
95
0
    list_del_rcu(&page->sharing->entry);
96
0
    spin_unlock(&shr_audit_lock);
97
0
    INIT_RCU_HEAD(&page->sharing->rcu_head);
98
0
    call_rcu(&page->sharing->rcu_head, _free_pg_shared_info);
99
0
}
100
101
#else
102
103
#define audit_add_list(p)  ((void)0)
104
static inline void page_sharing_dispose(struct page_info *page)
105
{
106
    /* Unlikely given our thresholds, but we should be careful. */
107
    if ( unlikely(RMAP_USES_HASHTAB(page)) )
108
        free_xenheap_pages(page->sharing->hash_table.bucket, 
109
                            RMAP_HASHTAB_ORDER);
110
    xfree(page->sharing);
111
}
112
113
#endif /* MEM_SHARING_AUDIT */
114
115
static inline int mem_sharing_page_lock(struct page_info *pg)
116
0
{
117
0
    int rc;
118
0
    pg_lock_data_t *pld = &(this_cpu(__pld));
119
0
120
0
    page_sharing_mm_pre_lock();
121
0
    rc = page_lock(pg);
122
0
    if ( rc )
123
0
    {
124
0
        preempt_disable();
125
0
        page_sharing_mm_post_lock(&pld->mm_unlock_level, 
126
0
                                  &pld->recurse_count);
127
0
    }
128
0
    return rc;
129
0
}
130
131
static inline void mem_sharing_page_unlock(struct page_info *pg)
132
0
{
133
0
    pg_lock_data_t *pld = &(this_cpu(__pld));
134
0
135
0
    page_sharing_mm_unlock(pld->mm_unlock_level, 
136
0
                           &pld->recurse_count);
137
0
    preempt_enable();
138
0
    page_unlock(pg);
139
0
}
140
141
static inline shr_handle_t get_next_handle(void)
142
0
{
143
0
    /* Get the next handle get_page style */ 
144
0
    uint64_t x, y = next_handle;
145
0
    do {
146
0
        x = y;
147
0
    }
148
0
    while ( (y = cmpxchg(&next_handle, x, x + 1)) != x );
149
0
    return x + 1;
150
0
}
151
152
#define mem_sharing_enabled(d) \
153
0
    (is_hvm_domain(d) && (d)->arch.hvm_domain.mem_sharing_enabled)
154
155
#undef mfn_to_page
156
0
#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
157
#undef page_to_mfn
158
0
#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
159
160
static atomic_t nr_saved_mfns   = ATOMIC_INIT(0); 
161
static atomic_t nr_shared_mfns  = ATOMIC_INIT(0);
162
163
/** Reverse map **/
164
/* Every shared frame keeps a reverse map (rmap) of <domain, gfn> tuples that
165
 * this shared frame backs. For pages with a low degree of sharing, a O(n)
166
 * search linked list is good enough. For pages with higher degree of sharing,
167
 * we use a hash table instead. */
168
169
typedef struct gfn_info
170
{
171
    unsigned long gfn;
172
    domid_t domain; 
173
    struct list_head list;
174
} gfn_info_t;
175
176
static inline void
177
rmap_init(struct page_info *page)
178
0
{
179
0
    /* We always start off as a doubly linked list. */
180
0
    INIT_LIST_HEAD(&page->sharing->gfns);
181
0
}
182
183
/* Exceedingly simple "hash function" */
184
#define HASH(domain, gfn)       \
185
0
    (((gfn) + (domain)) % RMAP_HASHTAB_SIZE)
186
187
/* Conversions. Tuned by the thresholds. Should only happen twice 
188
 * (once each) during the lifetime of a shared page */
189
static inline int
190
rmap_list_to_hash_table(struct page_info *page)
191
0
{
192
0
    unsigned int i;
193
0
    struct list_head *pos, *tmp, *b =
194
0
        alloc_xenheap_pages(RMAP_HASHTAB_ORDER, 0);
195
0
196
0
    if ( b == NULL )
197
0
        return -ENOMEM;
198
0
199
0
    for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ )
200
0
        INIT_LIST_HEAD(b + i);
201
0
202
0
    list_for_each_safe(pos, tmp, &page->sharing->gfns)
203
0
    {
204
0
        gfn_info_t *gfn_info = list_entry(pos, gfn_info_t, list);
205
0
        struct list_head *bucket = b + HASH(gfn_info->domain, gfn_info->gfn);
206
0
        list_del(pos);
207
0
        list_add(pos, bucket);
208
0
    }
209
0
210
0
    page->sharing->hash_table.bucket = b;
211
0
    page->sharing->hash_table.flag   = NULL;
212
0
213
0
    return 0;
214
0
}
215
216
static inline void
217
rmap_hash_table_to_list(struct page_info *page)
218
0
{
219
0
    unsigned int i;
220
0
    struct list_head *bucket = page->sharing->hash_table.bucket;
221
0
222
0
    INIT_LIST_HEAD(&page->sharing->gfns);
223
0
224
0
    for ( i = 0; i < RMAP_HASHTAB_SIZE; i++ )
225
0
    {
226
0
        struct list_head *pos, *tmp, *head = bucket + i;
227
0
        list_for_each_safe(pos, tmp, head)
228
0
        {
229
0
            list_del(pos);
230
0
            list_add(pos, &page->sharing->gfns);
231
0
        }
232
0
    }
233
0
234
0
    free_xenheap_pages(bucket, RMAP_HASHTAB_ORDER);
235
0
}
236
237
/* Generic accessors to the rmap */
238
static inline unsigned long
239
rmap_count(struct page_info *pg)
240
0
{
241
0
    unsigned long count;
242
0
    unsigned long t = read_atomic(&pg->u.inuse.type_info);
243
0
    count = t & PGT_count_mask;
244
0
    if ( t & PGT_locked )
245
0
        count--;
246
0
    return count;
247
0
}
248
249
/* The page type count is always decreased after removing from the rmap.
250
 * Use a convert flag to avoid mutating the rmap if in the middle of an 
251
 * iterator, or if the page will be soon destroyed anyways. */
252
static inline void
253
rmap_del(gfn_info_t *gfn_info, struct page_info *page, int convert)
254
0
{
255
0
    if ( RMAP_USES_HASHTAB(page) && convert &&
256
0
         (rmap_count(page) <= RMAP_LIGHT_SHARED_PAGE) )
257
0
        rmap_hash_table_to_list(page);
258
0
259
0
    /* Regardless of rmap type, same removal operation */
260
0
    list_del(&gfn_info->list);
261
0
}
262
263
/* The page type count is always increased before adding to the rmap. */
264
static inline void
265
rmap_add(gfn_info_t *gfn_info, struct page_info *page)
266
0
{
267
0
    struct list_head *head;
268
0
269
0
    if ( !RMAP_USES_HASHTAB(page) &&
270
0
         (rmap_count(page) >= RMAP_HEAVY_SHARED_PAGE) )
271
0
        /* The conversion may fail with ENOMEM. We'll be less efficient,
272
0
         * but no reason to panic. */
273
0
        (void)rmap_list_to_hash_table(page);
274
0
275
0
    head = (RMAP_USES_HASHTAB(page)) ?
276
0
        page->sharing->hash_table.bucket + 
277
0
                            HASH(gfn_info->domain, gfn_info->gfn) :
278
0
        &page->sharing->gfns;
279
0
280
0
    INIT_LIST_HEAD(&gfn_info->list);
281
0
    list_add(&gfn_info->list, head);
282
0
}
283
284
static inline gfn_info_t *
285
rmap_retrieve(uint16_t domain_id, unsigned long gfn, 
286
                            struct page_info *page)
287
0
{
288
0
    gfn_info_t *gfn_info;
289
0
    struct list_head *le, *head;
290
0
291
0
    head = (RMAP_USES_HASHTAB(page)) ?
292
0
        page->sharing->hash_table.bucket + HASH(domain_id, gfn) :
293
0
        &page->sharing->gfns;
294
0
295
0
    list_for_each(le, head)
296
0
    {
297
0
        gfn_info = list_entry(le, gfn_info_t, list);
298
0
        if ( (gfn_info->gfn == gfn) && (gfn_info->domain == domain_id) )
299
0
            return gfn_info;
300
0
    }
301
0
302
0
    /* Nothing was found */
303
0
    return NULL;
304
0
}
305
306
/* Returns true if the rmap has only one entry. O(1) complexity. */
307
static inline int rmap_has_one_entry(struct page_info *page)
308
0
{
309
0
    return (rmap_count(page) == 1);
310
0
}
311
312
/* Returns true if the rmap has any entries. O(1) complexity. */
313
static inline int rmap_has_entries(struct page_info *page)
314
0
{
315
0
    return (rmap_count(page) != 0);
316
0
}
317
318
/* The iterator hides the details of how the rmap is implemented. This
319
 * involves splitting the list_for_each_safe macro into two steps. */
320
struct rmap_iterator {
321
    struct list_head *curr;
322
    struct list_head *next;
323
    unsigned int bucket;
324
};
325
326
static inline void
327
rmap_seed_iterator(struct page_info *page, struct rmap_iterator *ri)
328
0
{
329
0
    ri->curr = (RMAP_USES_HASHTAB(page)) ?
330
0
                page->sharing->hash_table.bucket :
331
0
                &page->sharing->gfns;
332
0
    ri->next = ri->curr->next; 
333
0
    ri->bucket = 0;
334
0
}
335
336
static inline gfn_info_t *
337
rmap_iterate(struct page_info *page, struct rmap_iterator *ri)
338
0
{
339
0
    struct list_head *head = (RMAP_USES_HASHTAB(page)) ?
340
0
                page->sharing->hash_table.bucket + ri->bucket :
341
0
                &page->sharing->gfns;
342
0
343
0
retry:
344
0
    if ( ri->next == head)
345
0
    {
346
0
        if ( RMAP_USES_HASHTAB(page) )
347
0
        {
348
0
            ri->bucket++;
349
0
            if ( ri->bucket >= RMAP_HASHTAB_SIZE )
350
0
                /* No more hash table buckets */
351
0
                return NULL;
352
0
            head = page->sharing->hash_table.bucket + ri->bucket;
353
0
            ri->curr = head;
354
0
            ri->next = ri->curr->next;
355
0
            goto retry;
356
0
        } else
357
0
            /* List exhausted */
358
0
            return NULL;
359
0
    }
360
0
361
0
    ri->curr = ri->next;
362
0
    ri->next = ri->curr->next;
363
0
364
0
    return list_entry(ri->curr, gfn_info_t, list);
365
0
}
366
367
static inline gfn_info_t *mem_sharing_gfn_alloc(struct page_info *page,
368
                                                struct domain *d,
369
                                                unsigned long gfn)
370
0
{
371
0
    gfn_info_t *gfn_info = xmalloc(gfn_info_t);
372
0
373
0
    if ( gfn_info == NULL )
374
0
        return NULL; 
375
0
376
0
    gfn_info->gfn = gfn;
377
0
    gfn_info->domain = d->domain_id;
378
0
379
0
    rmap_add(gfn_info, page);
380
0
381
0
    /* Increment our number of shared pges. */
382
0
    atomic_inc(&d->shr_pages);
383
0
384
0
    return gfn_info;
385
0
}
386
387
static inline void mem_sharing_gfn_destroy(struct page_info *page,
388
                                           struct domain *d,
389
                                           gfn_info_t *gfn_info)
390
0
{
391
0
    /* Decrement the number of pages. */
392
0
    atomic_dec(&d->shr_pages);
393
0
394
0
    /* Free the gfn_info structure. */
395
0
    rmap_del(gfn_info, page, 1);
396
0
    xfree(gfn_info);
397
0
}
398
399
static struct page_info* mem_sharing_lookup(unsigned long mfn)
400
0
{
401
0
    if ( mfn_valid(_mfn(mfn)) )
402
0
    {
403
0
        struct page_info* page = mfn_to_page(_mfn(mfn));
404
0
        if ( page_get_owner(page) == dom_cow )
405
0
        {
406
0
            /* Count has to be at least two, because we're called
407
0
             * with the mfn locked (1) and this is supposed to be 
408
0
             * a shared page (1). */
409
0
            unsigned long t = read_atomic(&page->u.inuse.type_info);
410
0
            ASSERT((t & PGT_type_mask) == PGT_shared_page);
411
0
            ASSERT((t & PGT_count_mask) >= 2);
412
0
            ASSERT(get_gpfn_from_mfn(mfn) == SHARED_M2P_ENTRY); 
413
0
            return page;
414
0
        }
415
0
    }
416
0
417
0
    return NULL;
418
0
}
419
420
static int audit(void)
421
0
{
422
0
#if MEM_SHARING_AUDIT
423
0
    int errors = 0;
424
0
    unsigned long count_expected;
425
0
    unsigned long count_found = 0;
426
0
    struct list_head *ae;
427
0
428
0
    count_expected = atomic_read(&nr_shared_mfns);
429
0
430
0
    rcu_read_lock(&shr_audit_read_lock);
431
0
432
0
    list_for_each_rcu(ae, &shr_audit_list)
433
0
    {
434
0
        struct page_sharing_info *pg_shared_info;
435
0
        unsigned long nr_gfns = 0;
436
0
        struct page_info *pg;
437
0
        mfn_t mfn;
438
0
        gfn_info_t *g;
439
0
        struct rmap_iterator ri;
440
0
441
0
        pg_shared_info = list_entry(ae, struct page_sharing_info, entry);
442
0
        pg = pg_shared_info->pg;
443
0
        mfn = page_to_mfn(pg);
444
0
445
0
        /* If we can't lock it, it's definitely not a shared page */
446
0
        if ( !mem_sharing_page_lock(pg) )
447
0
        {
448
0
           MEM_SHARING_DEBUG("mfn %lx in audit list, but cannot be locked (%lx)!\n",
449
0
                              mfn_x(mfn), pg->u.inuse.type_info);
450
0
           errors++;
451
0
           continue;
452
0
        }
453
0
454
0
        /* Check if the MFN has correct type, owner and handle. */ 
455
0
        if ( (pg->u.inuse.type_info & PGT_type_mask) != PGT_shared_page )
456
0
        {
457
0
           MEM_SHARING_DEBUG("mfn %lx in audit list, but not PGT_shared_page (%lx)!\n",
458
0
                              mfn_x(mfn), pg->u.inuse.type_info & PGT_type_mask);
459
0
           errors++;
460
0
           continue;
461
0
        }
462
0
463
0
        /* Check the page owner. */
464
0
        if ( page_get_owner(pg) != dom_cow )
465
0
        {
466
0
           MEM_SHARING_DEBUG("mfn %lx shared, but wrong owner (%hu)!\n",
467
0
                             mfn_x(mfn), page_get_owner(pg)->domain_id);
468
0
           errors++;
469
0
        }
470
0
471
0
        /* Check the m2p entry */
472
0
        if ( get_gpfn_from_mfn(mfn_x(mfn)) != SHARED_M2P_ENTRY )
473
0
        {
474
0
           MEM_SHARING_DEBUG("mfn %lx shared, but wrong m2p entry (%lx)!\n",
475
0
                             mfn_x(mfn), get_gpfn_from_mfn(mfn_x(mfn)));
476
0
           errors++;
477
0
        }
478
0
479
0
        /* Check we have a list */
480
0
        if ( (!pg->sharing) || !rmap_has_entries(pg) )
481
0
        {
482
0
           MEM_SHARING_DEBUG("mfn %lx shared, but empty gfn list!\n",
483
0
                             mfn_x(mfn));
484
0
           errors++;
485
0
           continue;
486
0
        }
487
0
488
0
        /* We've found a page that is shared */
489
0
        count_found++;
490
0
491
0
        /* Check if all GFNs map to the MFN, and the p2m types */
492
0
        rmap_seed_iterator(pg, &ri);
493
0
        while ( (g = rmap_iterate(pg, &ri)) != NULL )
494
0
        {
495
0
            struct domain *d;
496
0
            p2m_type_t t;
497
0
            mfn_t o_mfn;
498
0
499
0
            d = get_domain_by_id(g->domain);
500
0
            if ( d == NULL )
501
0
            {
502
0
                MEM_SHARING_DEBUG("Unknown dom: %hu, for PFN=%lx, MFN=%lx\n",
503
0
                                  g->domain, g->gfn, mfn_x(mfn));
504
0
                errors++;
505
0
                continue;
506
0
            }
507
0
            o_mfn = get_gfn_query_unlocked(d, g->gfn, &t); 
508
0
            if ( mfn_x(o_mfn) != mfn_x(mfn) )
509
0
            {
510
0
                MEM_SHARING_DEBUG("Incorrect P2M for d=%hu, PFN=%lx."
511
0
                                  "Expecting MFN=%lx, got %lx\n",
512
0
                                  g->domain, g->gfn, mfn_x(mfn), mfn_x(o_mfn));
513
0
                errors++;
514
0
            }
515
0
            if ( t != p2m_ram_shared )
516
0
            {
517
0
                MEM_SHARING_DEBUG("Incorrect P2M type for d=%hu, PFN=%lx MFN=%lx."
518
0
                                  "Expecting t=%d, got %d\n",
519
0
                                  g->domain, g->gfn, mfn_x(mfn), p2m_ram_shared, t);
520
0
                errors++;
521
0
            }
522
0
            put_domain(d);
523
0
            nr_gfns++;
524
0
        }
525
0
        /* The type count has an extra ref because we have locked the page */
526
0
        if ( (nr_gfns + 1) != (pg->u.inuse.type_info & PGT_count_mask) )
527
0
        {
528
0
            MEM_SHARING_DEBUG("Mismatched counts for MFN=%lx."
529
0
                              "nr_gfns in list %lu, in type_info %lx\n",
530
0
                              mfn_x(mfn), nr_gfns, 
531
0
                              (pg->u.inuse.type_info & PGT_count_mask));
532
0
            errors++;
533
0
        }
534
0
535
0
        mem_sharing_page_unlock(pg);
536
0
    }
537
0
538
0
    rcu_read_unlock(&shr_audit_read_lock);
539
0
540
0
    if ( count_found != count_expected )
541
0
    {
542
0
        MEM_SHARING_DEBUG("Expected %ld shared mfns, found %ld.",
543
0
                          count_expected, count_found);
544
0
        errors++;
545
0
    }
546
0
547
0
    return errors;
548
0
#else
549
    return -EOPNOTSUPP;
550
#endif
551
0
}
552
553
int mem_sharing_notify_enomem(struct domain *d, unsigned long gfn,
554
                                bool_t allow_sleep) 
555
0
{
556
0
    struct vcpu *v = current;
557
0
    int rc;
558
0
    vm_event_request_t req = {
559
0
        .reason = VM_EVENT_REASON_MEM_SHARING,
560
0
        .vcpu_id = v->vcpu_id,
561
0
        .u.mem_sharing.gfn = gfn,
562
0
        .u.mem_sharing.p2mt = p2m_ram_shared
563
0
    };
564
0
565
0
    if ( (rc = __vm_event_claim_slot(d, 
566
0
                        d->vm_event_share, allow_sleep)) < 0 )
567
0
        return rc;
568
0
569
0
    if ( v->domain == d )
570
0
    {
571
0
        req.flags = VM_EVENT_FLAG_VCPU_PAUSED;
572
0
        vm_event_vcpu_pause(v);
573
0
    }
574
0
575
0
    vm_event_put_request(d, d->vm_event_share, &req);
576
0
577
0
    return 0;
578
0
}
579
580
unsigned int mem_sharing_get_nr_saved_mfns(void)
581
0
{
582
0
    return ((unsigned int)atomic_read(&nr_saved_mfns));
583
0
}
584
585
unsigned int mem_sharing_get_nr_shared_mfns(void)
586
0
{
587
0
    return (unsigned int)atomic_read(&nr_shared_mfns);
588
0
}
589
590
/* Functions that change a page's type and ownership */
591
static int page_make_sharable(struct domain *d, 
592
                       struct page_info *page, 
593
                       int expected_refcnt)
594
0
{
595
0
    bool_t drop_dom_ref;
596
0
597
0
    spin_lock(&d->page_alloc_lock);
598
0
599
0
    if ( d->is_dying )
600
0
    {
601
0
        spin_unlock(&d->page_alloc_lock);
602
0
        return -EBUSY;
603
0
    }
604
0
605
0
    /* Change page type and count atomically */
606
0
    if ( !get_page_and_type(page, d, PGT_shared_page) )
607
0
    {
608
0
        spin_unlock(&d->page_alloc_lock);
609
0
        return -EINVAL;
610
0
    }
611
0
612
0
    /* Check it wasn't already sharable and undo if it was */
613
0
    if ( (page->u.inuse.type_info & PGT_count_mask) != 1 )
614
0
    {
615
0
        spin_unlock(&d->page_alloc_lock);
616
0
        put_page_and_type(page);
617
0
        return -EEXIST;
618
0
    }
619
0
620
0
    /* Check if the ref count is 2. The first from PGC_allocated, and
621
0
     * the second from get_page_and_type at the top of this function */
622
0
    if ( page->count_info != (PGC_allocated | (2 + expected_refcnt)) )
623
0
    {
624
0
        spin_unlock(&d->page_alloc_lock);
625
0
        /* Return type count back to zero */
626
0
        put_page_and_type(page);
627
0
        return -E2BIG;
628
0
    }
629
0
630
0
    page_set_owner(page, dom_cow);
631
0
    drop_dom_ref = !domain_adjust_tot_pages(d, -1);
632
0
    page_list_del(page, &d->page_list);
633
0
    spin_unlock(&d->page_alloc_lock);
634
0
635
0
    if ( drop_dom_ref )
636
0
        put_domain(d);
637
0
    return 0;
638
0
}
639
640
static int page_make_private(struct domain *d, struct page_info *page)
641
0
{
642
0
    unsigned long expected_type;
643
0
644
0
    if ( !get_page(page, dom_cow) )
645
0
        return -EINVAL;
646
0
    
647
0
    spin_lock(&d->page_alloc_lock);
648
0
649
0
    if ( d->is_dying )
650
0
    {
651
0
        spin_unlock(&d->page_alloc_lock);
652
0
        put_page(page);
653
0
        return -EBUSY;
654
0
    }
655
0
656
0
    /* We can only change the type if count is one */
657
0
    /* Because we are locking pages individually, we need to drop
658
0
     * the lock here, while the page is typed. We cannot risk the 
659
0
     * race of page_unlock and then put_page_type. */
660
0
    expected_type = (PGT_shared_page | PGT_validated | PGT_locked | 2);
661
0
    if ( page->u.inuse.type_info != expected_type )
662
0
    {
663
0
        spin_unlock(&d->page_alloc_lock);
664
0
        put_page(page);
665
0
        return -EEXIST;
666
0
    }
667
0
668
0
    /* Drop the final typecount */
669
0
    put_page_and_type(page);
670
0
671
0
    /* Now that we've dropped the type, we can unlock */
672
0
    mem_sharing_page_unlock(page);
673
0
674
0
    /* Change the owner */
675
0
    ASSERT(page_get_owner(page) == dom_cow);
676
0
    page_set_owner(page, d);
677
0
678
0
    if ( domain_adjust_tot_pages(d, 1) == 1 )
679
0
        get_knownalive_domain(d);
680
0
    page_list_add_tail(page, &d->page_list);
681
0
    spin_unlock(&d->page_alloc_lock);
682
0
683
0
    put_page(page);
684
0
685
0
    return 0;
686
0
}
687
688
static inline struct page_info *__grab_shared_page(mfn_t mfn)
689
0
{
690
0
    struct page_info *pg = NULL;
691
0
692
0
    if ( !mfn_valid(mfn) )
693
0
        return NULL;
694
0
    pg = mfn_to_page(mfn);
695
0
696
0
    /* If the page is not validated we can't lock it, and if it's  
697
0
     * not validated it's obviously not shared. */
698
0
    if ( !mem_sharing_page_lock(pg) )
699
0
        return NULL;
700
0
701
0
    if ( mem_sharing_lookup(mfn_x(mfn)) == NULL )
702
0
    {
703
0
        mem_sharing_page_unlock(pg);
704
0
        return NULL;
705
0
    }
706
0
707
0
    return pg;
708
0
}
709
710
static int debug_mfn(mfn_t mfn)
711
0
{
712
0
    struct page_info *page;
713
0
    int num_refs;
714
0
715
0
    if ( (page = __grab_shared_page(mfn)) == NULL)
716
0
    {
717
0
        gdprintk(XENLOG_ERR, "Invalid MFN=%lx\n", mfn_x(mfn));
718
0
        return -EINVAL;
719
0
    }
720
0
721
0
    MEM_SHARING_DEBUG( 
722
0
            "Debug page: MFN=%lx is ci=%lx, ti=%lx, owner_id=%d\n",
723
0
            mfn_x(page_to_mfn(page)), 
724
0
            page->count_info, 
725
0
            page->u.inuse.type_info,
726
0
            page_get_owner(page)->domain_id);
727
0
728
0
    /* -1 because the page is locked and that's an additional type ref */
729
0
    num_refs = ((int) (page->u.inuse.type_info & PGT_count_mask)) - 1;
730
0
    mem_sharing_page_unlock(page);
731
0
    return num_refs;
732
0
}
733
734
static int debug_gfn(struct domain *d, gfn_t gfn)
735
0
{
736
0
    p2m_type_t p2mt;
737
0
    mfn_t mfn;
738
0
    int num_refs;
739
0
740
0
    mfn = get_gfn_query(d, gfn_x(gfn), &p2mt);
741
0
742
0
    MEM_SHARING_DEBUG("Debug for dom%d, gfn=%" PRI_gfn "\n", 
743
0
                      d->domain_id, gfn_x(gfn));
744
0
    num_refs = debug_mfn(mfn);
745
0
    put_gfn(d, gfn_x(gfn));
746
0
747
0
    return num_refs;
748
0
}
749
750
static int debug_gref(struct domain *d, grant_ref_t ref)
751
0
{
752
0
    int rc;
753
0
    uint16_t status;
754
0
    gfn_t gfn;
755
0
756
0
    rc = mem_sharing_gref_to_gfn(d->grant_table, ref, &gfn, &status);
757
0
    if ( rc )
758
0
    {
759
0
        MEM_SHARING_DEBUG("Asked to debug [dom=%d,gref=%u]: error %d.\n",
760
0
                          d->domain_id, ref, rc);
761
0
        return rc;
762
0
    }
763
0
    
764
0
    MEM_SHARING_DEBUG(
765
0
            "==> Grant [dom=%d,ref=%d], status=%x. ", 
766
0
            d->domain_id, ref, status);
767
0
768
0
    return debug_gfn(d, gfn);
769
0
}
770
771
static int nominate_page(struct domain *d, gfn_t gfn,
772
                         int expected_refcnt, shr_handle_t *phandle)
773
0
{
774
0
    struct p2m_domain *hp2m = p2m_get_hostp2m(d);
775
0
    p2m_type_t p2mt;
776
0
    p2m_access_t p2ma;
777
0
    mfn_t mfn;
778
0
    struct page_info *page = NULL; /* gcc... */
779
0
    int ret;
780
0
781
0
    *phandle = 0UL;
782
0
783
0
    mfn = get_gfn_type_access(hp2m, gfn_x(gfn), &p2mt, &p2ma, 0, NULL);
784
0
785
0
    /* Check if mfn is valid */
786
0
    ret = -EINVAL;
787
0
    if ( !mfn_valid(mfn) )
788
0
        goto out;
789
0
790
0
    /* Return the handle if the page is already shared */
791
0
    if ( p2m_is_shared(p2mt) ) {
792
0
        struct page_info *pg = __grab_shared_page(mfn);
793
0
        if ( !pg )
794
0
        {
795
0
            gprintk(XENLOG_ERR,
796
0
                    "Shared p2m entry gfn %" PRI_gfn ", but could not grab mfn %" PRI_mfn " dom%d\n",
797
0
                    gfn_x(gfn), mfn_x(mfn), d->domain_id);
798
0
            BUG();
799
0
        }
800
0
        *phandle = pg->sharing->handle;
801
0
        ret = 0;
802
0
        mem_sharing_page_unlock(pg);
803
0
        goto out;
804
0
    }
805
0
806
0
    /* Check p2m type */
807
0
    if ( !p2m_is_sharable(p2mt) )
808
0
        goto out;
809
0
810
0
    /* Check if there are mem_access/remapped altp2m entries for this page */
811
0
    if ( altp2m_active(d) )
812
0
    {
813
0
        unsigned int i;
814
0
        struct p2m_domain *ap2m;
815
0
        mfn_t amfn;
816
0
        p2m_type_t ap2mt;
817
0
        p2m_access_t ap2ma;
818
0
819
0
        altp2m_list_lock(d);
820
0
821
0
        for ( i = 0; i < MAX_ALTP2M; i++ )
822
0
        {
823
0
            ap2m = d->arch.altp2m_p2m[i];
824
0
            if ( !ap2m )
825
0
                continue;
826
0
827
0
            amfn = get_gfn_type_access(ap2m, gfn_x(gfn), &ap2mt, &ap2ma, 0, NULL);
828
0
            if ( mfn_valid(amfn) && (!mfn_eq(amfn, mfn) || ap2ma != p2ma) )
829
0
            {
830
0
                altp2m_list_unlock(d);
831
0
                goto out;
832
0
            }
833
0
        }
834
0
835
0
        altp2m_list_unlock(d);
836
0
    }
837
0
838
0
    /* Try to convert the mfn to the sharable type */
839
0
    page = mfn_to_page(mfn);
840
0
    ret = page_make_sharable(d, page, expected_refcnt); 
841
0
    if ( ret ) 
842
0
        goto out;
843
0
844
0
    /* Now that the page is validated, we can lock it. There is no 
845
0
     * race because we're holding the p2m entry, so no one else 
846
0
     * could be nominating this gfn */
847
0
    ret = -ENOENT;
848
0
    if ( !mem_sharing_page_lock(page) )
849
0
        goto out;
850
0
851
0
    /* Initialize the shared state */
852
0
    ret = -ENOMEM;
853
0
    if ( (page->sharing = 
854
0
            xmalloc(struct page_sharing_info)) == NULL )
855
0
    {
856
0
        /* Making a page private atomically unlocks it */
857
0
        BUG_ON(page_make_private(d, page) != 0);
858
0
        goto out;
859
0
    }
860
0
    page->sharing->pg = page;
861
0
    rmap_init(page);
862
0
863
0
    /* Create the handle */
864
0
    page->sharing->handle = get_next_handle();  
865
0
866
0
    /* Create the local gfn info */
867
0
    if ( mem_sharing_gfn_alloc(page, d, gfn_x(gfn)) == NULL )
868
0
    {
869
0
        xfree(page->sharing);
870
0
        page->sharing = NULL;
871
0
        BUG_ON(page_make_private(d, page) != 0);
872
0
        goto out;
873
0
    }
874
0
875
0
    /* Change the p2m type, should never fail with p2m locked. */
876
0
    BUG_ON(p2m_change_type_one(d, gfn_x(gfn), p2mt, p2m_ram_shared));
877
0
878
0
    /* Account for this page. */
879
0
    atomic_inc(&nr_shared_mfns);
880
0
881
0
    /* Update m2p entry to SHARED_M2P_ENTRY */
882
0
    set_gpfn_from_mfn(mfn_x(mfn), SHARED_M2P_ENTRY);
883
0
884
0
    *phandle = page->sharing->handle;
885
0
    audit_add_list(page);
886
0
    mem_sharing_page_unlock(page);
887
0
    ret = 0;
888
0
889
0
out:
890
0
    put_gfn(d, gfn_x(gfn));
891
0
    return ret;
892
0
}
893
894
static int share_pages(struct domain *sd, gfn_t sgfn, shr_handle_t sh,
895
                       struct domain *cd, gfn_t cgfn, shr_handle_t ch)
896
0
{
897
0
    struct page_info *spage, *cpage, *firstpg, *secondpg;
898
0
    gfn_info_t *gfn;
899
0
    struct domain *d;
900
0
    int ret = -EINVAL;
901
0
    mfn_t smfn, cmfn;
902
0
    p2m_type_t smfn_type, cmfn_type;
903
0
    struct two_gfns tg;
904
0
    struct rmap_iterator ri;
905
0
906
0
    get_two_gfns(sd, gfn_x(sgfn), &smfn_type, NULL, &smfn,
907
0
                 cd, gfn_x(cgfn), &cmfn_type, NULL, &cmfn,
908
0
                 0, &tg);
909
0
910
0
    /* This tricky business is to avoid two callers deadlocking if 
911
0
     * grabbing pages in opposite client/source order */
912
0
    if( mfn_x(smfn) == mfn_x(cmfn) )
913
0
    {
914
0
        /* The pages are already the same.  We could return some
915
0
         * kind of error here, but no matter how you look at it,
916
0
         * the pages are already 'shared'.  It possibly represents
917
0
         * a big problem somewhere else, but as far as sharing is
918
0
         * concerned: great success! */
919
0
        ret = 0;
920
0
        goto err_out;
921
0
    }
922
0
    else if ( mfn_x(smfn) < mfn_x(cmfn) )
923
0
    {
924
0
        ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
925
0
        spage = firstpg = __grab_shared_page(smfn);
926
0
        if ( spage == NULL )
927
0
            goto err_out;
928
0
929
0
        ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
930
0
        cpage = secondpg = __grab_shared_page(cmfn);
931
0
        if ( cpage == NULL )
932
0
        {
933
0
            mem_sharing_page_unlock(spage);
934
0
            goto err_out;
935
0
        }
936
0
    } else {
937
0
        ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
938
0
        cpage = firstpg = __grab_shared_page(cmfn);
939
0
        if ( cpage == NULL )
940
0
            goto err_out;
941
0
942
0
        ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
943
0
        spage = secondpg = __grab_shared_page(smfn);
944
0
        if ( spage == NULL )
945
0
        {
946
0
            mem_sharing_page_unlock(cpage);
947
0
            goto err_out;
948
0
        }
949
0
    }
950
0
951
0
    ASSERT(smfn_type == p2m_ram_shared);
952
0
    ASSERT(cmfn_type == p2m_ram_shared);
953
0
954
0
    /* Check that the handles match */
955
0
    if ( spage->sharing->handle != sh )
956
0
    {
957
0
        ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
958
0
        mem_sharing_page_unlock(secondpg);
959
0
        mem_sharing_page_unlock(firstpg);
960
0
        goto err_out;
961
0
    }
962
0
    if ( cpage->sharing->handle != ch )
963
0
    {
964
0
        ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
965
0
        mem_sharing_page_unlock(secondpg);
966
0
        mem_sharing_page_unlock(firstpg);
967
0
        goto err_out;
968
0
    }
969
0
970
0
    /* Merge the lists together */
971
0
    rmap_seed_iterator(cpage, &ri);
972
0
    while ( (gfn = rmap_iterate(cpage, &ri)) != NULL)
973
0
    {
974
0
        /* Get the source page and type, this should never fail: 
975
0
         * we are under shr lock, and got a successful lookup */
976
0
        BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page));
977
0
        /* Move the gfn_info from client list to source list.
978
0
         * Don't change the type of rmap for the client page. */
979
0
        rmap_del(gfn, cpage, 0);
980
0
        rmap_add(gfn, spage);
981
0
        put_page_and_type(cpage);
982
0
        d = get_domain_by_id(gfn->domain);
983
0
        BUG_ON(!d);
984
0
        BUG_ON(set_shared_p2m_entry(d, gfn->gfn, smfn));
985
0
        put_domain(d);
986
0
    }
987
0
    ASSERT(list_empty(&cpage->sharing->gfns));
988
0
989
0
    /* Clear the rest of the shared state */
990
0
    page_sharing_dispose(cpage);
991
0
    cpage->sharing = NULL;
992
0
993
0
    mem_sharing_page_unlock(secondpg);
994
0
    mem_sharing_page_unlock(firstpg);
995
0
996
0
    /* Free the client page */
997
0
    if(test_and_clear_bit(_PGC_allocated, &cpage->count_info))
998
0
        put_page(cpage);
999
0
1000
0
    /* We managed to free a domain page. */
1001
0
    atomic_dec(&nr_shared_mfns);
1002
0
    atomic_inc(&nr_saved_mfns);
1003
0
    ret = 0;
1004
0
    
1005
0
err_out:
1006
0
    put_two_gfns(&tg);
1007
0
    return ret;
1008
0
}
1009
1010
int mem_sharing_add_to_physmap(struct domain *sd, unsigned long sgfn, shr_handle_t sh,
1011
                            struct domain *cd, unsigned long cgfn) 
1012
0
{
1013
0
    struct page_info *spage;
1014
0
    int ret = -EINVAL;
1015
0
    mfn_t smfn, cmfn;
1016
0
    p2m_type_t smfn_type, cmfn_type;
1017
0
    struct gfn_info *gfn_info;
1018
0
    struct p2m_domain *p2m = p2m_get_hostp2m(cd);
1019
0
    p2m_access_t a;
1020
0
    struct two_gfns tg;
1021
0
1022
0
    get_two_gfns(sd, sgfn, &smfn_type, NULL, &smfn,
1023
0
                 cd, cgfn, &cmfn_type, &a, &cmfn,
1024
0
                 0, &tg);
1025
0
1026
0
    /* Get the source shared page, check and lock */
1027
0
    ret = XENMEM_SHARING_OP_S_HANDLE_INVALID;
1028
0
    spage = __grab_shared_page(smfn);
1029
0
    if ( spage == NULL )
1030
0
        goto err_out;
1031
0
    ASSERT(smfn_type == p2m_ram_shared);
1032
0
1033
0
    /* Check that the handles match */
1034
0
    if ( spage->sharing->handle != sh )
1035
0
        goto err_unlock;
1036
0
1037
0
    /* Make sure the target page is a hole in the physmap. These are typically
1038
0
     * p2m_mmio_dm, but also accept p2m_invalid and paged out pages. See the
1039
0
     * definition of p2m_is_hole in p2m.h. */
1040
0
    if ( !p2m_is_hole(cmfn_type) )
1041
0
    {
1042
0
        ret = XENMEM_SHARING_OP_C_HANDLE_INVALID;
1043
0
        goto err_unlock;
1044
0
    }
1045
0
1046
0
    /* This is simpler than regular sharing */
1047
0
    BUG_ON(!get_page_and_type(spage, dom_cow, PGT_shared_page));
1048
0
    if ( (gfn_info = mem_sharing_gfn_alloc(spage, cd, cgfn)) == NULL )
1049
0
    {
1050
0
        put_page_and_type(spage);
1051
0
        ret = -ENOMEM;
1052
0
        goto err_unlock;
1053
0
    }
1054
0
1055
0
    ret = p2m_set_entry(p2m, _gfn(cgfn), smfn, PAGE_ORDER_4K,
1056
0
                        p2m_ram_shared, a);
1057
0
1058
0
    /* Tempted to turn this into an assert */
1059
0
    if ( ret )
1060
0
    {
1061
0
        mem_sharing_gfn_destroy(spage, cd, gfn_info);
1062
0
        put_page_and_type(spage);
1063
0
    } else {
1064
0
        /* There is a chance we're plugging a hole where a paged out page was */
1065
0
        if ( p2m_is_paging(cmfn_type) && (cmfn_type != p2m_ram_paging_out) )
1066
0
        {
1067
0
            atomic_dec(&cd->paged_pages);
1068
0
            /* Further, there is a chance this was a valid page. Don't leak it. */
1069
0
            if ( mfn_valid(cmfn) )
1070
0
            {
1071
0
                struct page_info *cpage = mfn_to_page(cmfn);
1072
0
                ASSERT(cpage != NULL);
1073
0
                if ( test_and_clear_bit(_PGC_allocated, &cpage->count_info) )
1074
0
                    put_page(cpage);
1075
0
            }
1076
0
        }
1077
0
    }
1078
0
1079
0
    atomic_inc(&nr_saved_mfns);
1080
0
1081
0
err_unlock:
1082
0
    mem_sharing_page_unlock(spage);
1083
0
err_out:
1084
0
    put_two_gfns(&tg);
1085
0
    return ret;
1086
0
}
1087
1088
1089
/* A note on the rationale for unshare error handling:
1090
 *  1. Unshare can only fail with ENOMEM. Any other error conditions BUG_ON()'s
1091
 *  2. We notify a potential dom0 helper through a vm_event ring. But we
1092
 *     allow the notification to not go to sleep. If the event ring is full 
1093
 *     of ENOMEM warnings, then it's on the ball.
1094
 *  3. We cannot go to sleep until the unshare is resolved, because we might
1095
 *     be buried deep into locks (e.g. something -> copy_to_user -> __hvm_copy) 
1096
 *  4. So, we make sure we:
1097
 *     4.1. return an error
1098
 *     4.2. do not corrupt shared memory
1099
 *     4.3. do not corrupt guest memory
1100
 *     4.4. let the guest deal with it if the error propagation will reach it
1101
 */
1102
int __mem_sharing_unshare_page(struct domain *d,
1103
                             unsigned long gfn, 
1104
                             uint16_t flags)
1105
0
{
1106
0
    p2m_type_t p2mt;
1107
0
    mfn_t mfn;
1108
0
    struct page_info *page, *old_page;
1109
0
    int last_gfn;
1110
0
    gfn_info_t *gfn_info = NULL;
1111
0
   
1112
0
    mfn = get_gfn(d, gfn, &p2mt);
1113
0
    
1114
0
    /* Has someone already unshared it? */
1115
0
    if ( !p2m_is_shared(p2mt) ) {
1116
0
        put_gfn(d, gfn);
1117
0
        return 0;
1118
0
    }
1119
0
1120
0
    page = __grab_shared_page(mfn);
1121
0
    if ( page == NULL )
1122
0
    {
1123
0
        gdprintk(XENLOG_ERR, "Domain p2m is shared, but page is not: "
1124
0
                                "%lx\n", gfn);
1125
0
        BUG();
1126
0
    }
1127
0
1128
0
    gfn_info = rmap_retrieve(d->domain_id, gfn, page);
1129
0
    if ( unlikely(gfn_info == NULL) )
1130
0
    {
1131
0
        gdprintk(XENLOG_ERR, "Could not find gfn_info for shared gfn: "
1132
0
                                "%lx\n", gfn);
1133
0
        BUG();
1134
0
    }
1135
0
1136
0
    /* Do the accounting first. If anything fails below, we have bigger
1137
0
     * bigger fish to fry. First, remove the gfn from the list. */ 
1138
0
    last_gfn = rmap_has_one_entry(page);
1139
0
    if ( last_gfn )
1140
0
    {
1141
0
        /* Clean up shared state. Get rid of the <domid, gfn> tuple
1142
0
         * before destroying the rmap. */
1143
0
        mem_sharing_gfn_destroy(page, d, gfn_info);
1144
0
        page_sharing_dispose(page);
1145
0
        page->sharing = NULL;
1146
0
        atomic_dec(&nr_shared_mfns);
1147
0
    }
1148
0
    else
1149
0
        atomic_dec(&nr_saved_mfns);
1150
0
1151
0
    /* If the GFN is getting destroyed drop the references to MFN 
1152
0
     * (possibly freeing the page), and exit early */
1153
0
    if ( flags & MEM_SHARING_DESTROY_GFN )
1154
0
    {
1155
0
        if ( !last_gfn )
1156
0
            mem_sharing_gfn_destroy(page, d, gfn_info);
1157
0
        put_page_and_type(page);
1158
0
        mem_sharing_page_unlock(page);
1159
0
        if ( last_gfn && 
1160
0
            test_and_clear_bit(_PGC_allocated, &page->count_info) ) 
1161
0
            put_page(page);
1162
0
        put_gfn(d, gfn);
1163
0
1164
0
        return 0;
1165
0
    }
1166
0
 
1167
0
    if ( last_gfn )
1168
0
    {
1169
0
        /* Making a page private atomically unlocks it */
1170
0
        BUG_ON(page_make_private(d, page) != 0);
1171
0
        goto private_page_found;
1172
0
    }
1173
0
1174
0
    old_page = page;
1175
0
    page = alloc_domheap_page(d, 0);
1176
0
    if ( !page ) 
1177
0
    {
1178
0
        /* Undo dec of nr_saved_mfns, as the retry will decrease again. */
1179
0
        atomic_inc(&nr_saved_mfns);
1180
0
        mem_sharing_page_unlock(old_page);
1181
0
        put_gfn(d, gfn);
1182
0
        /* Caller is responsible for placing an event
1183
0
         * in the ring */
1184
0
        return -ENOMEM;
1185
0
    }
1186
0
1187
0
    copy_domain_page(page_to_mfn(page), page_to_mfn(old_page));
1188
0
1189
0
    BUG_ON(set_shared_p2m_entry(d, gfn, page_to_mfn(page)));
1190
0
    mem_sharing_gfn_destroy(old_page, d, gfn_info);
1191
0
    mem_sharing_page_unlock(old_page);
1192
0
    put_page_and_type(old_page);
1193
0
1194
0
private_page_found:    
1195
0
    if ( p2m_change_type_one(d, gfn, p2m_ram_shared, p2m_ram_rw) )
1196
0
    {
1197
0
        gdprintk(XENLOG_ERR, "Could not change p2m type d %hu gfn %lx.\n", 
1198
0
                                d->domain_id, gfn);
1199
0
        BUG();
1200
0
    }
1201
0
1202
0
    /* Update m2p entry */
1203
0
    set_gpfn_from_mfn(mfn_x(page_to_mfn(page)), gfn);
1204
0
1205
0
    /* Now that the gfn<->mfn map is properly established,
1206
0
     * marking dirty is feasible */
1207
0
    paging_mark_dirty(d, page_to_mfn(page));
1208
0
    /* We do not need to unlock a private page */
1209
0
    put_gfn(d, gfn);
1210
0
    return 0;
1211
0
}
1212
1213
int relinquish_shared_pages(struct domain *d)
1214
0
{
1215
0
    int rc = 0;
1216
0
    struct p2m_domain *p2m = p2m_get_hostp2m(d);
1217
0
    unsigned long gfn, count = 0;
1218
0
1219
0
    if ( p2m == NULL )
1220
0
        return 0;
1221
0
1222
0
    p2m_lock(p2m);
1223
0
    for ( gfn = p2m->next_shared_gfn_to_relinquish;
1224
0
          gfn <= p2m->max_mapped_pfn; gfn++ )
1225
0
    {
1226
0
        p2m_access_t a;
1227
0
        p2m_type_t t;
1228
0
        mfn_t mfn;
1229
0
        int set_rc;
1230
0
1231
0
        if ( atomic_read(&d->shr_pages) == 0 )
1232
0
            break;
1233
0
        mfn = p2m->get_entry(p2m, _gfn(gfn), &t, &a, 0, NULL, NULL);
1234
0
        if ( mfn_valid(mfn) && (t == p2m_ram_shared) )
1235
0
        {
1236
0
            /* Does not fail with ENOMEM given the DESTROY flag */
1237
0
            BUG_ON(__mem_sharing_unshare_page(d, gfn, 
1238
0
                    MEM_SHARING_DESTROY_GFN));
1239
0
            /* Clear out the p2m entry so no one else may try to
1240
0
             * unshare.  Must succeed: we just read the old entry and
1241
0
             * we hold the p2m lock. */
1242
0
            set_rc = p2m->set_entry(p2m, _gfn(gfn), _mfn(0), PAGE_ORDER_4K,
1243
0
                                    p2m_invalid, p2m_access_rwx, -1);
1244
0
            ASSERT(set_rc == 0);
1245
0
            count += 0x10;
1246
0
        }
1247
0
        else
1248
0
            ++count;
1249
0
1250
0
        /* Preempt every 2MiB (shared) or 32MiB (unshared) - arbitrary. */
1251
0
        if ( count >= 0x2000 )
1252
0
        {
1253
0
            if ( hypercall_preempt_check() )
1254
0
            {
1255
0
                p2m->next_shared_gfn_to_relinquish = gfn + 1;
1256
0
                rc = -ERESTART;
1257
0
                break;
1258
0
            }
1259
0
            count = 0;
1260
0
        }
1261
0
    }
1262
0
1263
0
    p2m_unlock(p2m);
1264
0
    return rc;
1265
0
}
1266
1267
static int range_share(struct domain *d, struct domain *cd,
1268
                       struct mem_sharing_op_range *range)
1269
0
{
1270
0
    int rc = 0;
1271
0
    shr_handle_t sh, ch;
1272
0
    unsigned long start = range->opaque ?: range->first_gfn;
1273
0
1274
0
    while ( range->last_gfn >= start )
1275
0
    {
1276
0
        /*
1277
0
         * We only break out if we run out of memory as individual pages may
1278
0
         * legitimately be unsharable and we just want to skip over those.
1279
0
         */
1280
0
        rc = nominate_page(d, _gfn(start), 0, &sh);
1281
0
        if ( rc == -ENOMEM )
1282
0
            break;
1283
0
1284
0
        if ( !rc )
1285
0
        {
1286
0
            rc = nominate_page(cd, _gfn(start), 0, &ch);
1287
0
            if ( rc == -ENOMEM )
1288
0
                break;
1289
0
1290
0
            if ( !rc )
1291
0
            {
1292
0
                /* If we get here this should be guaranteed to succeed. */
1293
0
                rc = share_pages(d, _gfn(start), sh, cd, _gfn(start), ch);
1294
0
                ASSERT(!rc);
1295
0
            }
1296
0
        }
1297
0
1298
0
        /* Check for continuation if it's not the last iteration. */
1299
0
        if ( range->last_gfn >= ++start && hypercall_preempt_check() )
1300
0
        {
1301
0
            rc = 1;
1302
0
            break;
1303
0
        }
1304
0
    }
1305
0
1306
0
    range->opaque = start;
1307
0
1308
0
    /*
1309
0
     * The last page may fail with -EINVAL, and for range sharing we don't
1310
0
     * care about that.
1311
0
     */
1312
0
    if ( range->last_gfn < start && rc == -EINVAL )
1313
0
        rc = 0;
1314
0
1315
0
    return rc;
1316
0
}
1317
1318
int mem_sharing_memop(XEN_GUEST_HANDLE_PARAM(xen_mem_sharing_op_t) arg)
1319
0
{
1320
0
    int rc;
1321
0
    xen_mem_sharing_op_t mso;
1322
0
    struct domain *d;
1323
0
1324
0
    rc = -EFAULT;
1325
0
    if ( copy_from_guest(&mso, arg, 1) )
1326
0
        return rc;
1327
0
1328
0
    if ( mso.op == XENMEM_sharing_op_audit )
1329
0
        return audit();
1330
0
1331
0
    rc = rcu_lock_live_remote_domain_by_id(mso.domain, &d);
1332
0
    if ( rc )
1333
0
        return rc;
1334
0
1335
0
    rc = xsm_mem_sharing(XSM_DM_PRIV, d);
1336
0
    if ( rc )
1337
0
        goto out;
1338
0
1339
0
    /* Only HAP is supported */
1340
0
    rc = -ENODEV;
1341
0
    if ( !hap_enabled(d) || !d->arch.hvm_domain.mem_sharing_enabled )
1342
0
        goto out;
1343
0
1344
0
    switch ( mso.op )
1345
0
    {
1346
0
        case XENMEM_sharing_op_nominate_gfn:
1347
0
        {
1348
0
            shr_handle_t handle;
1349
0
1350
0
            rc = -EINVAL;
1351
0
            if ( !mem_sharing_enabled(d) )
1352
0
                goto out;
1353
0
1354
0
            rc = nominate_page(d, _gfn(mso.u.nominate.u.gfn), 0, &handle);
1355
0
            mso.u.nominate.handle = handle;
1356
0
        }
1357
0
        break;
1358
0
1359
0
        case XENMEM_sharing_op_nominate_gref:
1360
0
        {
1361
0
            grant_ref_t gref = mso.u.nominate.u.grant_ref;
1362
0
            gfn_t gfn;
1363
0
            shr_handle_t handle;
1364
0
1365
0
            rc = -EINVAL;
1366
0
            if ( !mem_sharing_enabled(d) )
1367
0
                goto out;
1368
0
            rc = mem_sharing_gref_to_gfn(d->grant_table, gref, &gfn, NULL);
1369
0
            if ( rc < 0 )
1370
0
                goto out;
1371
0
1372
0
            rc = nominate_page(d, gfn, 3, &handle);
1373
0
            mso.u.nominate.handle = handle;
1374
0
        }
1375
0
        break;
1376
0
1377
0
        case XENMEM_sharing_op_share:
1378
0
        {
1379
0
            gfn_t sgfn, cgfn;
1380
0
            struct domain *cd;
1381
0
            shr_handle_t sh, ch;
1382
0
1383
0
            rc = -EINVAL;
1384
0
            if ( !mem_sharing_enabled(d) )
1385
0
                goto out;
1386
0
1387
0
            rc = rcu_lock_live_remote_domain_by_id(mso.u.share.client_domain,
1388
0
                                                   &cd);
1389
0
            if ( rc )
1390
0
                goto out;
1391
0
1392
0
            rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, mso.op);
1393
0
            if ( rc )
1394
0
            {
1395
0
                rcu_unlock_domain(cd);
1396
0
                goto out;
1397
0
            }
1398
0
1399
0
            if ( !mem_sharing_enabled(cd) )
1400
0
            {
1401
0
                rcu_unlock_domain(cd);
1402
0
                rc = -EINVAL;
1403
0
                goto out;
1404
0
            }
1405
0
1406
0
            if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.source_gfn) )
1407
0
            {
1408
0
                grant_ref_t gref = (grant_ref_t) 
1409
0
                                    (XENMEM_SHARING_OP_FIELD_GET_GREF(
1410
0
                                        mso.u.share.source_gfn));
1411
0
                rc = mem_sharing_gref_to_gfn(d->grant_table, gref, &sgfn,
1412
0
                                             NULL);
1413
0
                if ( rc < 0 )
1414
0
                {
1415
0
                    rcu_unlock_domain(cd);
1416
0
                    goto out;
1417
0
                }
1418
0
            }
1419
0
            else
1420
0
                sgfn = _gfn(mso.u.share.source_gfn);
1421
0
1422
0
            if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.client_gfn) )
1423
0
            {
1424
0
                grant_ref_t gref = (grant_ref_t) 
1425
0
                                    (XENMEM_SHARING_OP_FIELD_GET_GREF(
1426
0
                                        mso.u.share.client_gfn));
1427
0
                rc = mem_sharing_gref_to_gfn(cd->grant_table, gref, &cgfn,
1428
0
                                             NULL);
1429
0
                if ( rc < 0 )
1430
0
                {
1431
0
                    rcu_unlock_domain(cd);
1432
0
                    goto out;
1433
0
                }
1434
0
            }
1435
0
            else
1436
0
                cgfn = _gfn(mso.u.share.client_gfn);
1437
0
1438
0
            sh = mso.u.share.source_handle;
1439
0
            ch = mso.u.share.client_handle;
1440
0
1441
0
            rc = share_pages(d, sgfn, sh, cd, cgfn, ch);
1442
0
1443
0
            rcu_unlock_domain(cd);
1444
0
        }
1445
0
        break;
1446
0
1447
0
        case XENMEM_sharing_op_add_physmap:
1448
0
        {
1449
0
            unsigned long sgfn, cgfn;
1450
0
            struct domain *cd;
1451
0
            shr_handle_t sh;
1452
0
1453
0
            rc = -EINVAL;
1454
0
            if ( !mem_sharing_enabled(d) )
1455
0
                goto out;
1456
0
1457
0
            rc = rcu_lock_live_remote_domain_by_id(mso.u.share.client_domain,
1458
0
                                                   &cd);
1459
0
            if ( rc )
1460
0
                goto out;
1461
0
1462
0
            rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd, mso.op);
1463
0
            if ( rc )
1464
0
            {
1465
0
                rcu_unlock_domain(cd);
1466
0
                goto out;
1467
0
            }
1468
0
1469
0
            if ( !mem_sharing_enabled(cd) )
1470
0
            {
1471
0
                rcu_unlock_domain(cd);
1472
0
                rc = -EINVAL;
1473
0
                goto out;
1474
0
            }
1475
0
1476
0
            if ( XENMEM_SHARING_OP_FIELD_IS_GREF(mso.u.share.source_gfn) )
1477
0
            {
1478
0
                /* Cannot add a gref to the physmap */
1479
0
                rcu_unlock_domain(cd);
1480
0
                rc = -EINVAL;
1481
0
                goto out;
1482
0
            }
1483
0
1484
0
            sgfn    = mso.u.share.source_gfn;
1485
0
            sh      = mso.u.share.source_handle;
1486
0
            cgfn    = mso.u.share.client_gfn;
1487
0
1488
0
            rc = mem_sharing_add_to_physmap(d, sgfn, sh, cd, cgfn); 
1489
0
1490
0
            rcu_unlock_domain(cd);
1491
0
        }
1492
0
        break;
1493
0
1494
0
        case XENMEM_sharing_op_range_share:
1495
0
        {
1496
0
            unsigned long max_sgfn, max_cgfn;
1497
0
            struct domain *cd;
1498
0
1499
0
            rc = -EINVAL;
1500
0
            if ( mso.u.range._pad[0] || mso.u.range._pad[1] ||
1501
0
                 mso.u.range._pad[2] )
1502
0
                 goto out;
1503
0
1504
0
            /*
1505
0
             * We use opaque for the hypercall continuation value.
1506
0
             * Ideally the user sets this to 0 in the beginning but
1507
0
             * there is no good way of enforcing that here, so we just check
1508
0
             * that it's at least in range.
1509
0
             */
1510
0
            if ( mso.u.range.opaque &&
1511
0
                 (mso.u.range.opaque < mso.u.range.first_gfn ||
1512
0
                  mso.u.range.opaque > mso.u.range.last_gfn) )
1513
0
                goto out;
1514
0
1515
0
            if ( !mem_sharing_enabled(d) )
1516
0
                goto out;
1517
0
1518
0
            rc = rcu_lock_live_remote_domain_by_id(mso.u.range.client_domain,
1519
0
                                                   &cd);
1520
0
            if ( rc )
1521
0
                goto out;
1522
0
1523
0
            /*
1524
0
             * We reuse XENMEM_sharing_op_share XSM check here as this is
1525
0
             * essentially the same concept repeated over multiple pages.
1526
0
             */
1527
0
            rc = xsm_mem_sharing_op(XSM_DM_PRIV, d, cd,
1528
0
                                    XENMEM_sharing_op_share);
1529
0
            if ( rc )
1530
0
            {
1531
0
                rcu_unlock_domain(cd);
1532
0
                goto out;
1533
0
            }
1534
0
1535
0
            if ( !mem_sharing_enabled(cd) )
1536
0
            {
1537
0
                rcu_unlock_domain(cd);
1538
0
                rc = -EINVAL;
1539
0
                goto out;
1540
0
            }
1541
0
1542
0
            /*
1543
0
             * Sanity check only, the client should keep the domains paused for
1544
0
             * the duration of this op.
1545
0
             */
1546
0
            if ( !atomic_read(&d->pause_count) ||
1547
0
                 !atomic_read(&cd->pause_count) )
1548
0
            {
1549
0
                rcu_unlock_domain(cd);
1550
0
                rc = -EINVAL;
1551
0
                goto out;
1552
0
            }
1553
0
1554
0
            max_sgfn = domain_get_maximum_gpfn(d);
1555
0
            max_cgfn = domain_get_maximum_gpfn(cd);
1556
0
1557
0
            if ( max_sgfn < mso.u.range.first_gfn ||
1558
0
                 max_sgfn < mso.u.range.last_gfn ||
1559
0
                 max_cgfn < mso.u.range.first_gfn ||
1560
0
                 max_cgfn < mso.u.range.last_gfn )
1561
0
            {
1562
0
                rcu_unlock_domain(cd);
1563
0
                rc = -EINVAL;
1564
0
                goto out;
1565
0
            }
1566
0
1567
0
            rc = range_share(d, cd, &mso.u.range);
1568
0
            rcu_unlock_domain(cd);
1569
0
1570
0
            if ( rc > 0 )
1571
0
            {
1572
0
                if ( __copy_to_guest(arg, &mso, 1) )
1573
0
                    rc = -EFAULT;
1574
0
                else
1575
0
                    rc = hypercall_create_continuation(__HYPERVISOR_memory_op,
1576
0
                                                       "lh", XENMEM_sharing_op,
1577
0
                                                       arg);
1578
0
            }
1579
0
            else
1580
0
                mso.u.range.opaque = 0;
1581
0
        }
1582
0
        break;
1583
0
1584
0
        case XENMEM_sharing_op_debug_gfn:
1585
0
            rc = debug_gfn(d, _gfn(mso.u.debug.u.gfn));
1586
0
            break;
1587
0
1588
0
        case XENMEM_sharing_op_debug_gref:
1589
0
            rc = debug_gref(d, mso.u.debug.u.gref);
1590
0
            break;
1591
0
1592
0
        default:
1593
0
            rc = -ENOSYS;
1594
0
            break;
1595
0
    }
1596
0
1597
0
    if ( !rc && __copy_to_guest(arg, &mso, 1) )
1598
0
        rc = -EFAULT;
1599
0
1600
0
out:
1601
0
    rcu_unlock_domain(d);
1602
0
    return rc;
1603
0
}
1604
1605
int mem_sharing_domctl(struct domain *d, struct xen_domctl_mem_sharing_op *mec)
1606
0
{
1607
0
    int rc;
1608
0
1609
0
    /* Only HAP is supported */
1610
0
    if ( !hap_enabled(d) )
1611
0
         return -ENODEV;
1612
0
1613
0
    switch(mec->op)
1614
0
    {
1615
0
        case XEN_DOMCTL_MEM_SHARING_CONTROL:
1616
0
        {
1617
0
            rc = 0;
1618
0
            if ( unlikely(need_iommu(d) && mec->u.enable) )
1619
0
                rc = -EXDEV;
1620
0
            else
1621
0
                d->arch.hvm_domain.mem_sharing_enabled = mec->u.enable;
1622
0
        }
1623
0
        break;
1624
0
1625
0
        default:
1626
0
            rc = -ENOSYS;
1627
0
    }
1628
0
1629
0
    return rc;
1630
0
}
1631
1632
void __init mem_sharing_init(void)
1633
1
{
1634
1
    printk("Initing memory sharing.\n");
1635
1
#if MEM_SHARING_AUDIT
1636
1
    spin_lock_init(&shr_audit_lock);
1637
1
    INIT_LIST_HEAD(&shr_audit_list);
1638
1
#endif
1639
1
}
1640