Coverage Report

Created: 2017-10-25 09:10

/root/src/xen/xen/arch/x86/x86_64/mm.c
Line
Count
Source (jump to first uncovered line)
1
/******************************************************************************
2
 * arch/x86/x86_64/mm.c
3
 * 
4
 * Modifications to Linux original are copyright (c) 2004, K A Fraser tr This 
5
 * program is free software; you can redistribute it and/or modify it under 
6
 * the terms of the GNU General Public License as published by the Free 
7
 * Software Foundation; either version 2 of the License, or (at your option) 
8
 * any later version.
9
 * 
10
 * This program is distributed in the hope that it will be useful, but WITHOUT 
11
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
12
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
13
 * more details.
14
 * 
15
 * You should have received a copy of the GNU General Public License along 
16
 * with this program; If not, see <http://www.gnu.org/licenses/>.
17
 */
18
19
asm(".file \"" __FILE__ "\"");
20
21
#include <xen/lib.h>
22
#include <xen/init.h>
23
#include <xen/mm.h>
24
#include <xen/sched.h>
25
#include <xen/numa.h>
26
#include <xen/nodemask.h>
27
#include <xen/guest_access.h>
28
#include <xen/hypercall.h>
29
#include <xen/mem_access.h>
30
#include <asm/current.h>
31
#include <asm/asm_defns.h>
32
#include <asm/page.h>
33
#include <asm/flushtlb.h>
34
#include <asm/fixmap.h>
35
#include <asm/hypercall.h>
36
#include <asm/msr.h>
37
#include <asm/setup.h>
38
#include <asm/numa.h>
39
#include <asm/mem_paging.h>
40
#include <asm/mem_sharing.h>
41
#include <public/memory.h>
42
43
unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
44
45
l2_pgentry_t *compat_idle_pg_table_l2;
46
47
void *do_page_walk(struct vcpu *v, unsigned long addr)
48
0
{
49
0
    unsigned long mfn = pagetable_get_pfn(v->arch.guest_table);
50
0
    l4_pgentry_t l4e, *l4t;
51
0
    l3_pgentry_t l3e, *l3t;
52
0
    l2_pgentry_t l2e, *l2t;
53
0
    l1_pgentry_t l1e, *l1t;
54
0
55
0
    if ( !is_pv_vcpu(v) || !is_canonical_address(addr) )
56
0
        return NULL;
57
0
58
0
    l4t = map_domain_page(_mfn(mfn));
59
0
    l4e = l4t[l4_table_offset(addr)];
60
0
    unmap_domain_page(l4t);
61
0
    if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) )
62
0
        return NULL;
63
0
64
0
    l3t = map_l3t_from_l4e(l4e);
65
0
    l3e = l3t[l3_table_offset(addr)];
66
0
    unmap_domain_page(l3t);
67
0
    mfn = l3e_get_pfn(l3e);
68
0
    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) )
69
0
        return NULL;
70
0
    if ( (l3e_get_flags(l3e) & _PAGE_PSE) )
71
0
    {
72
0
        mfn += PFN_DOWN(addr & ((1UL << L3_PAGETABLE_SHIFT) - 1));
73
0
        goto ret;
74
0
    }
75
0
76
0
    l2t = map_domain_page(_mfn(mfn));
77
0
    l2e = l2t[l2_table_offset(addr)];
78
0
    unmap_domain_page(l2t);
79
0
    mfn = l2e_get_pfn(l2e);
80
0
    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) )
81
0
        return NULL;
82
0
    if ( (l2e_get_flags(l2e) & _PAGE_PSE) )
83
0
    {
84
0
        mfn += PFN_DOWN(addr & ((1UL << L2_PAGETABLE_SHIFT) - 1));
85
0
        goto ret;
86
0
    }
87
0
88
0
    l1t = map_domain_page(_mfn(mfn));
89
0
    l1e = l1t[l1_table_offset(addr)];
90
0
    unmap_domain_page(l1t);
91
0
    mfn = l1e_get_pfn(l1e);
92
0
    if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) )
93
0
        return NULL;
94
0
95
0
 ret:
96
0
    return map_domain_page(_mfn(mfn)) + (addr & ~PAGE_MASK);
97
0
}
98
99
/*
100
 * Allocate page table pages for m2p table
101
 */
102
struct mem_hotadd_info
103
{
104
    unsigned long spfn;
105
    unsigned long epfn;
106
    unsigned long cur;
107
};
108
109
static int hotadd_mem_valid(unsigned long pfn, struct mem_hotadd_info *info)
110
0
{
111
0
    return (pfn < info->epfn && pfn >= info->spfn);
112
0
}
113
114
static unsigned long alloc_hotadd_mfn(struct mem_hotadd_info *info)
115
0
{
116
0
    unsigned mfn;
117
0
118
0
    ASSERT((info->cur + ( 1UL << PAGETABLE_ORDER) < info->epfn) &&
119
0
            info->cur >= info->spfn);
120
0
121
0
    mfn = info->cur;
122
0
    info->cur += (1UL << PAGETABLE_ORDER);
123
0
    return mfn;
124
0
}
125
126
0
#define M2P_NO_MAPPED   0
127
0
#define M2P_2M_MAPPED   1
128
0
#define M2P_1G_MAPPED   2
129
static int m2p_mapped(unsigned long spfn)
130
0
{
131
0
    unsigned long va;
132
0
    l3_pgentry_t *l3_ro_mpt;
133
0
    l2_pgentry_t *l2_ro_mpt;
134
0
135
0
    va = RO_MPT_VIRT_START + spfn * sizeof(*machine_to_phys_mapping);
136
0
    l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(va)]);
137
0
138
0
    switch ( l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
139
0
             (_PAGE_PRESENT |_PAGE_PSE))
140
0
    {
141
0
        case _PAGE_PSE|_PAGE_PRESENT:
142
0
            return M2P_1G_MAPPED;
143
0
        /* Check for next level */
144
0
        case _PAGE_PRESENT:
145
0
            break;
146
0
        default:
147
0
            return M2P_NO_MAPPED;
148
0
    }
149
0
    l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]);
150
0
151
0
    if (l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT)
152
0
        return M2P_2M_MAPPED;
153
0
154
0
    return M2P_NO_MAPPED;
155
0
}
156
157
static int share_hotadd_m2p_table(struct mem_hotadd_info *info)
158
0
{
159
0
    unsigned long i, n, v, m2p_start_mfn = 0;
160
0
    l3_pgentry_t l3e;
161
0
    l2_pgentry_t l2e;
162
0
163
0
    /* M2P table is mappable read-only by privileged domains. */
164
0
    for ( v  = RDWR_MPT_VIRT_START;
165
0
          v != RDWR_MPT_VIRT_END;
166
0
          v += n << PAGE_SHIFT )
167
0
    {
168
0
        n = L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES;
169
0
        l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
170
0
            l3_table_offset(v)];
171
0
        if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
172
0
            continue;
173
0
        if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
174
0
        {
175
0
            n = L1_PAGETABLE_ENTRIES;
176
0
            l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
177
0
            if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
178
0
                continue;
179
0
            m2p_start_mfn = l2e_get_pfn(l2e);
180
0
        }
181
0
        else
182
0
            continue;
183
0
184
0
        for ( i = 0; i < n; i++ )
185
0
        {
186
0
            struct page_info *page = mfn_to_page(m2p_start_mfn + i);
187
0
            if (hotadd_mem_valid(m2p_start_mfn + i, info))
188
0
                share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
189
0
        }
190
0
    }
191
0
192
0
    for ( v  = RDWR_COMPAT_MPT_VIRT_START;
193
0
          v != RDWR_COMPAT_MPT_VIRT_END;
194
0
          v += 1 << L2_PAGETABLE_SHIFT )
195
0
    {
196
0
        l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
197
0
            l3_table_offset(v)];
198
0
        if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
199
0
            continue;
200
0
        l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
201
0
        if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
202
0
            continue;
203
0
        m2p_start_mfn = l2e_get_pfn(l2e);
204
0
205
0
        for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
206
0
        {
207
0
            struct page_info *page = mfn_to_page(m2p_start_mfn + i);
208
0
            if (hotadd_mem_valid(m2p_start_mfn + i, info))
209
0
                share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
210
0
        }
211
0
    }
212
0
    return 0;
213
0
}
214
215
static void destroy_compat_m2p_mapping(struct mem_hotadd_info *info)
216
0
{
217
0
    unsigned long i, va, rwva, pt_pfn;
218
0
    unsigned long smap = info->spfn, emap = info->spfn;
219
0
220
0
    l3_pgentry_t *l3_ro_mpt;
221
0
    l2_pgentry_t *l2_ro_mpt;
222
0
223
0
    if ( smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) )
224
0
        return;
225
0
226
0
    if ( emap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) )
227
0
        emap = (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2;
228
0
229
0
    l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
230
0
231
0
    ASSERT(l3e_get_flags(l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)]) & _PAGE_PRESENT);
232
0
233
0
    l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)]);
234
0
235
0
    for ( i = smap; i < emap; )
236
0
    {
237
0
        va = HIRO_COMPAT_MPT_VIRT_START +
238
0
              i * sizeof(*compat_machine_to_phys_mapping);
239
0
        rwva = RDWR_COMPAT_MPT_VIRT_START +
240
0
             i * sizeof(*compat_machine_to_phys_mapping);
241
0
        if ( l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT )
242
0
        {
243
0
            pt_pfn = l2e_get_pfn(l2_ro_mpt[l2_table_offset(va)]);
244
0
            if ( hotadd_mem_valid(pt_pfn, info) )
245
0
            {
246
0
                destroy_xen_mappings(rwva, rwva +
247
0
                        (1UL << L2_PAGETABLE_SHIFT));
248
0
                l2e_write(&l2_ro_mpt[l2_table_offset(va)], l2e_empty());
249
0
            }
250
0
        }
251
0
252
0
        i += 1UL << (L2_PAGETABLE_SHIFT - 2);
253
0
    }
254
0
255
0
    return;
256
0
}
257
258
static void destroy_m2p_mapping(struct mem_hotadd_info *info)
259
0
{
260
0
    l3_pgentry_t *l3_ro_mpt;
261
0
    unsigned long i, va, rwva;
262
0
    unsigned long smap = info->spfn, emap = info->epfn;
263
0
264
0
    l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]);
265
0
266
0
    /*
267
0
     * No need to clean m2p structure existing before the hotplug
268
0
     */
269
0
    for (i = smap; i < emap;)
270
0
    {
271
0
        unsigned long pt_pfn;
272
0
        l2_pgentry_t *l2_ro_mpt;
273
0
274
0
        va = RO_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping);
275
0
        rwva = RDWR_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping);
276
0
277
0
        /* 1G mapping should not be created by mem hotadd */
278
0
        if (!(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & _PAGE_PRESENT) ||
279
0
            (l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & _PAGE_PSE))
280
0
        {
281
0
            i = ( i & ~((1UL << (L3_PAGETABLE_SHIFT - 3)) - 1)) +
282
0
                (1UL << (L3_PAGETABLE_SHIFT - 3) );
283
0
            continue;
284
0
        }
285
0
286
0
        l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]);
287
0
        if (!(l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT))
288
0
        {
289
0
            i = ( i & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)) +
290
0
                    (1UL << (L2_PAGETABLE_SHIFT - 3)) ;
291
0
            continue;
292
0
        }
293
0
294
0
        pt_pfn = l2e_get_pfn(l2_ro_mpt[l2_table_offset(va)]);
295
0
        if ( hotadd_mem_valid(pt_pfn, info) )
296
0
        {
297
0
            destroy_xen_mappings(rwva, rwva + (1UL << L2_PAGETABLE_SHIFT));
298
0
299
0
            l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]);
300
0
            l2e_write(&l2_ro_mpt[l2_table_offset(va)], l2e_empty());
301
0
        }
302
0
        i = ( i & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)) +
303
0
              (1UL << (L2_PAGETABLE_SHIFT - 3));
304
0
    }
305
0
306
0
    destroy_compat_m2p_mapping(info);
307
0
308
0
    /* Brute-Force flush all TLB */
309
0
    flush_tlb_all();
310
0
    return;
311
0
}
312
313
/*
314
 * Allocate and map the compatibility mode machine-to-phys table.
315
 * spfn/epfn: the pfn ranges to be setup
316
 * free_s/free_e: the pfn ranges that is free still
317
 */
318
static int setup_compat_m2p_table(struct mem_hotadd_info *info)
319
0
{
320
0
    unsigned long i, va, smap, emap, rwva, epfn = info->epfn, mfn;
321
0
    unsigned int n;
322
0
    l3_pgentry_t *l3_ro_mpt = NULL;
323
0
    l2_pgentry_t *l2_ro_mpt = NULL;
324
0
    int err = 0;
325
0
326
0
    smap = info->spfn & (~((1UL << (L2_PAGETABLE_SHIFT - 2)) -1));
327
0
328
0
    /*
329
0
     * Notice: For hot-added memory, only range below m2p_compat_vstart
330
0
     * will be filled up (assuming memory is discontinous when booting).
331
0
     */
332
0
    if   ((smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2)) )
333
0
        return 0;
334
0
335
0
    if ( epfn > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) )
336
0
        epfn = (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2;
337
0
338
0
    emap = ( (epfn + ((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1 )) &
339
0
                ~((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1) );
340
0
341
0
    va = HIRO_COMPAT_MPT_VIRT_START +
342
0
         smap * sizeof(*compat_machine_to_phys_mapping);
343
0
    l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(va)]);
344
0
345
0
    ASSERT(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & _PAGE_PRESENT);
346
0
347
0
    l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]);
348
0
349
0
#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int))
350
0
#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
351
0
             sizeof(*compat_machine_to_phys_mapping))
352
0
    BUILD_BUG_ON((sizeof(*frame_table) & -sizeof(*frame_table)) % \
353
0
                 sizeof(*compat_machine_to_phys_mapping));
354
0
355
0
    for ( i = smap; i < emap; i += (1UL << (L2_PAGETABLE_SHIFT - 2)) )
356
0
    {
357
0
        va = HIRO_COMPAT_MPT_VIRT_START +
358
0
              i * sizeof(*compat_machine_to_phys_mapping);
359
0
360
0
        rwva = RDWR_COMPAT_MPT_VIRT_START +
361
0
                i * sizeof(*compat_machine_to_phys_mapping);
362
0
363
0
        if (l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT)
364
0
            continue;
365
0
366
0
        for ( n = 0; n < CNT; ++n)
367
0
            if ( mfn_valid(_mfn(i + n * PDX_GROUP_COUNT)) )
368
0
                break;
369
0
        if ( n == CNT )
370
0
            continue;
371
0
372
0
        mfn = alloc_hotadd_mfn(info);
373
0
        err = map_pages_to_xen(rwva, mfn, 1UL << PAGETABLE_ORDER,
374
0
                               PAGE_HYPERVISOR);
375
0
        if ( err )
376
0
            break;
377
0
        /* Fill with INVALID_M2P_ENTRY. */
378
0
        memset((void *)rwva, 0xFF, 1UL << L2_PAGETABLE_SHIFT);
379
0
        /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */
380
0
        l2e_write(&l2_ro_mpt[l2_table_offset(va)],
381
0
                  l2e_from_pfn(mfn, _PAGE_PSE|_PAGE_PRESENT));
382
0
    }
383
0
#undef CNT
384
0
#undef MFN
385
0
    return err;
386
0
}
387
388
/*
389
 * Allocate and map the machine-to-phys table.
390
 * The L3 for RO/RWRW MPT and the L2 for compatible MPT should be setup already
391
 */
392
static int setup_m2p_table(struct mem_hotadd_info *info)
393
0
{
394
0
    unsigned long i, va, smap, emap;
395
0
    unsigned int n;
396
0
    l2_pgentry_t *l2_ro_mpt = NULL;
397
0
    l3_pgentry_t *l3_ro_mpt = NULL;
398
0
    int ret = 0;
399
0
400
0
    ASSERT(l4e_get_flags(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)])
401
0
            & _PAGE_PRESENT);
402
0
    l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]);
403
0
404
0
    smap = (info->spfn & (~((1UL << (L2_PAGETABLE_SHIFT - 3)) -1)));
405
0
    emap = ((info->epfn + ((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1 )) &
406
0
                ~((1UL << (L2_PAGETABLE_SHIFT - 3)) -1));
407
0
408
0
    va = RO_MPT_VIRT_START + smap * sizeof(*machine_to_phys_mapping);
409
0
410
0
#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long))
411
0
#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
412
0
             sizeof(*machine_to_phys_mapping))
413
0
414
0
    BUILD_BUG_ON((sizeof(*frame_table) & -sizeof(*frame_table)) % \
415
0
                 sizeof(*machine_to_phys_mapping));
416
0
417
0
    i = smap;
418
0
    while ( i < emap )
419
0
    {
420
0
        switch ( m2p_mapped(i) )
421
0
        {
422
0
        case M2P_1G_MAPPED:
423
0
            i = ( i & ~((1UL << (L3_PAGETABLE_SHIFT - 3)) - 1)) +
424
0
                (1UL << (L3_PAGETABLE_SHIFT - 3));
425
0
            continue;
426
0
        case M2P_2M_MAPPED:
427
0
            i = (i & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)) +
428
0
                (1UL << (L2_PAGETABLE_SHIFT - 3));
429
0
            continue;
430
0
        default:
431
0
            break;
432
0
        }
433
0
434
0
        va = RO_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping);
435
0
436
0
        for ( n = 0; n < CNT; ++n)
437
0
            if ( mfn_valid(_mfn(i + n * PDX_GROUP_COUNT)) )
438
0
                break;
439
0
        if ( n < CNT )
440
0
        {
441
0
            unsigned long mfn = alloc_hotadd_mfn(info);
442
0
443
0
            ret = map_pages_to_xen(
444
0
                        RDWR_MPT_VIRT_START + i * sizeof(unsigned long),
445
0
                        mfn, 1UL << PAGETABLE_ORDER,
446
0
                        PAGE_HYPERVISOR);
447
0
            if ( ret )
448
0
                goto error;
449
0
            /* Fill with INVALID_M2P_ENTRY. */
450
0
            memset((void *)(RDWR_MPT_VIRT_START + i * sizeof(unsigned long)),
451
0
                   0xFF, 1UL << L2_PAGETABLE_SHIFT);
452
0
453
0
            ASSERT(!(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
454
0
                  _PAGE_PSE));
455
0
            if ( l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) &
456
0
              _PAGE_PRESENT )
457
0
                l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]) +
458
0
                  l2_table_offset(va);
459
0
            else
460
0
            {
461
0
                l2_ro_mpt = alloc_xen_pagetable();
462
0
                if ( !l2_ro_mpt )
463
0
                {
464
0
                    ret = -ENOMEM;
465
0
                    goto error;
466
0
                }
467
0
468
0
                clear_page(l2_ro_mpt);
469
0
                l3e_write(&l3_ro_mpt[l3_table_offset(va)],
470
0
                          l3e_from_paddr(__pa(l2_ro_mpt),
471
0
                                         __PAGE_HYPERVISOR_RO | _PAGE_USER));
472
0
                l2_ro_mpt += l2_table_offset(va);
473
0
            }
474
0
475
0
            /* NB. Cannot be GLOBAL: guest user mode should not see it. */
476
0
            l2e_write(l2_ro_mpt, l2e_from_pfn(mfn,
477
0
                   /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
478
0
        }
479
0
        if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
480
0
            l2_ro_mpt = NULL;
481
0
        i += ( 1UL << (L2_PAGETABLE_SHIFT - 3));
482
0
    }
483
0
#undef CNT
484
0
#undef MFN
485
0
486
0
    ret = setup_compat_m2p_table(info);
487
0
error:
488
0
    return ret;
489
0
}
490
491
void __init paging_init(void)
492
1
{
493
1
    unsigned long i, mpt_size, va;
494
1
    unsigned int n, memflags;
495
1
    l3_pgentry_t *l3_ro_mpt;
496
1
    l2_pgentry_t *l2_ro_mpt = NULL;
497
1
    struct page_info *l1_pg;
498
1
499
1
    /*
500
1
     * We setup the L3s for 1:1 mapping if host support memory hotplug
501
1
     * to avoid sync the 1:1 mapping on page fault handler
502
1
     */
503
1
    for ( va = DIRECTMAP_VIRT_START;
504
1
          va < DIRECTMAP_VIRT_END && (void *)va < __va(mem_hotplug);
505
0
          va += (1UL << L4_PAGETABLE_SHIFT) )
506
0
    {
507
0
        if ( !(l4e_get_flags(idle_pg_table[l4_table_offset(va)]) &
508
0
              _PAGE_PRESENT) )
509
0
        {
510
0
            l3_pgentry_t *pl3t = alloc_xen_pagetable();
511
0
512
0
            if ( !pl3t )
513
0
                goto nomem;
514
0
            clear_page(pl3t);
515
0
            l4e_write(&idle_pg_table[l4_table_offset(va)],
516
0
                      l4e_from_paddr(__pa(pl3t), __PAGE_HYPERVISOR_RW));
517
0
        }
518
0
    }
519
1
520
1
    /* Create user-accessible L2 directory to map the MPT for guests. */
521
1
    if ( (l3_ro_mpt = alloc_xen_pagetable()) == NULL )
522
0
        goto nomem;
523
1
    clear_page(l3_ro_mpt);
524
1
    l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)],
525
1
              l4e_from_paddr(__pa(l3_ro_mpt), __PAGE_HYPERVISOR_RO | _PAGE_USER));
526
1
527
1
    /*
528
1
     * Allocate and map the machine-to-phys table.
529
1
     * This also ensures L3 is present for fixmaps.
530
1
     */
531
1
    mpt_size  = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
532
1
    mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
533
1
#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long))
534
40
#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
535
40
             sizeof(*machine_to_phys_mapping))
536
1
    BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
537
1
                 sizeof(*machine_to_phys_mapping));
538
19
    for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
539
18
    {
540
18
        BUILD_BUG_ON(RO_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
541
18
        va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
542
18
        memflags = MEMF_node(phys_to_nid(i <<
543
18
            (L2_PAGETABLE_SHIFT - 3 + PAGE_SHIFT)));
544
18
545
18
        if ( cpu_has_page1gb &&
546
18
             !((unsigned long)l2_ro_mpt & ~PAGE_MASK) &&
547
1
             (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) )
548
0
        {
549
0
            unsigned int k, holes;
550
0
551
0
            for ( holes = k = 0; k < 1 << PAGETABLE_ORDER; ++k)
552
0
            {
553
0
                for ( n = 0; n < CNT; ++n)
554
0
                    if ( mfn_valid(_mfn(MFN(i + k) + n * PDX_GROUP_COUNT)) )
555
0
                        break;
556
0
                if ( n == CNT )
557
0
                    ++holes;
558
0
            }
559
0
            if ( k == holes )
560
0
            {
561
0
                i += (1UL << PAGETABLE_ORDER) - 1;
562
0
                continue;
563
0
            }
564
0
            if ( holes == 0 &&
565
0
                 (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER,
566
0
                                              memflags)) != NULL )
567
0
            {
568
0
                map_pages_to_xen(
569
0
                    RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
570
0
                    page_to_mfn(l1_pg),
571
0
                    1UL << (2 * PAGETABLE_ORDER),
572
0
                    PAGE_HYPERVISOR);
573
0
                memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
574
0
                       0x77, 1UL << L3_PAGETABLE_SHIFT);
575
0
576
0
                ASSERT(!l2_table_offset(va));
577
0
                /* NB. Cannot be GLOBAL: guest user mode should not see it. */
578
0
                l3e_write(&l3_ro_mpt[l3_table_offset(va)],
579
0
                    l3e_from_page(l1_pg,
580
0
                        /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
581
0
                i += (1UL << PAGETABLE_ORDER) - 1;
582
0
                continue;
583
0
            }
584
0
        }
585
18
586
22
        for ( n = 0; n < CNT; ++n)
587
21
            if ( mfn_valid(_mfn(MFN(i) + n * PDX_GROUP_COUNT)) )
588
17
                break;
589
18
        if ( n == CNT )
590
1
            l1_pg = NULL;
591
17
        else if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
592
17
                                               memflags)) == NULL )
593
0
            goto nomem;
594
17
        else
595
17
        {
596
17
            map_pages_to_xen(
597
17
                RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
598
17
                page_to_mfn(l1_pg),
599
17
                1UL << PAGETABLE_ORDER,
600
17
                PAGE_HYPERVISOR);
601
17
            /* Fill with INVALID_M2P_ENTRY. */
602
17
            memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
603
17
                   0xFF, 1UL << L2_PAGETABLE_SHIFT);
604
17
        }
605
18
        if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
606
1
        {
607
1
            if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL )
608
0
                goto nomem;
609
1
            clear_page(l2_ro_mpt);
610
1
            l3e_write(&l3_ro_mpt[l3_table_offset(va)],
611
1
                      l3e_from_paddr(__pa(l2_ro_mpt),
612
1
                                     __PAGE_HYPERVISOR_RO | _PAGE_USER));
613
1
            ASSERT(!l2_table_offset(va));
614
1
        }
615
18
        /* NB. Cannot be GLOBAL: guest user mode should not see it. */
616
18
        if ( l1_pg )
617
18
            l2e_write(l2_ro_mpt, l2e_from_page(
618
18
                l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
619
18
        l2_ro_mpt++;
620
18
    }
621
1
#undef CNT
622
1
#undef MFN
623
1
624
1
    /* Create user-accessible L2 directory to map the MPT for compat guests. */
625
1
    BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) !=
626
1
                 l4_table_offset(HIRO_COMPAT_MPT_VIRT_START));
627
1
    l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(
628
1
        HIRO_COMPAT_MPT_VIRT_START)]);
629
1
    if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL )
630
0
        goto nomem;
631
1
    compat_idle_pg_table_l2 = l2_ro_mpt;
632
1
    clear_page(l2_ro_mpt);
633
1
    l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)],
634
1
              l3e_from_paddr(__pa(l2_ro_mpt), __PAGE_HYPERVISOR_RO));
635
1
    l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START);
636
1
    /* Allocate and map the compatibility mode machine-to-phys table. */
637
1
    mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1));
638
1
    if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START )
639
0
        mpt_size = RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START;
640
1
    mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
641
1
    if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END )
642
1
        m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size;
643
1
#define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int))
644
18
#define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
645
18
             sizeof(*compat_machine_to_phys_mapping))
646
1
    BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
647
1
                 sizeof(*compat_machine_to_phys_mapping));
648
10
    for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++, l2_ro_mpt++ )
649
9
    {
650
9
        memflags = MEMF_node(phys_to_nid(i <<
651
9
            (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT)));
652
9
        for ( n = 0; n < CNT; ++n)
653
9
            if ( mfn_valid(_mfn(MFN(i) + n * PDX_GROUP_COUNT)) )
654
9
                break;
655
9
        if ( n == CNT )
656
0
            continue;
657
9
        if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
658
9
                                               memflags)) == NULL )
659
0
            goto nomem;
660
9
        map_pages_to_xen(
661
9
            RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
662
9
            page_to_mfn(l1_pg),
663
9
            1UL << PAGETABLE_ORDER,
664
9
            PAGE_HYPERVISOR);
665
9
        memset((void *)(RDWR_COMPAT_MPT_VIRT_START +
666
9
                        (i << L2_PAGETABLE_SHIFT)),
667
9
               0x55,
668
9
               1UL << L2_PAGETABLE_SHIFT);
669
9
        /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */
670
9
        l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT));
671
9
    }
672
1
#undef CNT
673
1
#undef MFN
674
1
675
1
    machine_to_phys_mapping_valid = 1;
676
1
677
1
    /* Set up linear page table mapping. */
678
1
    l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)],
679
1
              l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR_RW));
680
1
    return;
681
1
682
0
 nomem:
683
0
    panic("Not enough memory for m2p table");
684
0
}
685
686
void __init zap_low_mappings(void)
687
1
{
688
1
    BUG_ON(num_online_cpus() != 1);
689
1
690
1
    /* Remove aliased mapping of first 1:1 PML4 entry. */
691
1
    l4e_write(&idle_pg_table[0], l4e_empty());
692
1
    flush_local(FLUSH_TLB_GLOBAL);
693
1
694
1
    /* Replace with mapping of the boot trampoline only. */
695
1
    map_pages_to_xen(trampoline_phys, trampoline_phys >> PAGE_SHIFT,
696
1
                     PFN_UP(trampoline_end - trampoline_start),
697
1
                     __PAGE_HYPERVISOR);
698
1
}
699
700
int setup_compat_arg_xlat(struct vcpu *v)
701
12
{
702
12
    return create_perdomain_mapping(v->domain, ARG_XLAT_START(v),
703
12
                                    PFN_UP(COMPAT_ARG_XLAT_SIZE),
704
12
                                    NULL, NIL(struct page_info *));
705
12
}
706
707
void free_compat_arg_xlat(struct vcpu *v)
708
0
{
709
0
    destroy_perdomain_mapping(v->domain, ARG_XLAT_START(v),
710
0
                              PFN_UP(COMPAT_ARG_XLAT_SIZE));
711
0
}
712
713
static void cleanup_frame_table(struct mem_hotadd_info *info)
714
0
{
715
0
    unsigned long sva, eva;
716
0
    l3_pgentry_t l3e;
717
0
    l2_pgentry_t l2e;
718
0
    unsigned long spfn, epfn;
719
0
720
0
    spfn = info->spfn;
721
0
    epfn = info->epfn;
722
0
723
0
    sva = (unsigned long)pdx_to_page(pfn_to_pdx(spfn));
724
0
    eva = (unsigned long)pdx_to_page(pfn_to_pdx(epfn));
725
0
726
0
    /* Intialize all page */
727
0
    memset(mfn_to_page(spfn), -1,
728
0
           (unsigned long)mfn_to_page(epfn) - (unsigned long)mfn_to_page(spfn));
729
0
730
0
    while (sva < eva)
731
0
    {
732
0
        l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(sva)])[
733
0
          l3_table_offset(sva)];
734
0
        if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ||
735
0
             (l3e_get_flags(l3e) & _PAGE_PSE) )
736
0
        {
737
0
            sva = (sva & ~((1UL << L3_PAGETABLE_SHIFT) - 1)) +
738
0
                    (1UL << L3_PAGETABLE_SHIFT);
739
0
            continue;
740
0
        }
741
0
742
0
        l2e = l3e_to_l2e(l3e)[l2_table_offset(sva)];
743
0
        ASSERT(l2e_get_flags(l2e) & _PAGE_PRESENT);
744
0
745
0
        if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) ==
746
0
              (_PAGE_PSE | _PAGE_PRESENT) )
747
0
        {
748
0
            if (hotadd_mem_valid(l2e_get_pfn(l2e), info))
749
0
                destroy_xen_mappings(sva & ~((1UL << L2_PAGETABLE_SHIFT) - 1),
750
0
                         ((sva & ~((1UL << L2_PAGETABLE_SHIFT) -1 )) +
751
0
                            (1UL << L2_PAGETABLE_SHIFT) - 1));
752
0
753
0
            sva = (sva & ~((1UL << L2_PAGETABLE_SHIFT) -1 )) +
754
0
                  (1UL << L2_PAGETABLE_SHIFT);
755
0
            continue;
756
0
        }
757
0
758
0
        ASSERT(l1e_get_flags(l2e_to_l1e(l2e)[l1_table_offset(sva)]) &
759
0
                _PAGE_PRESENT);
760
0
         sva = (sva & ~((1UL << PAGE_SHIFT) - 1)) +
761
0
                    (1UL << PAGE_SHIFT);
762
0
    }
763
0
764
0
    /* Brute-Force flush all TLB */
765
0
    flush_tlb_all();
766
0
}
767
768
static int setup_frametable_chunk(void *start, void *end,
769
                                  struct mem_hotadd_info *info)
770
0
{
771
0
    unsigned long s = (unsigned long)start;
772
0
    unsigned long e = (unsigned long)end;
773
0
    unsigned long mfn;
774
0
    int err;
775
0
776
0
    ASSERT(!(s & ((1 << L2_PAGETABLE_SHIFT) - 1)));
777
0
    ASSERT(!(e & ((1 << L2_PAGETABLE_SHIFT) - 1)));
778
0
779
0
    for ( ; s < e; s += (1UL << L2_PAGETABLE_SHIFT))
780
0
    {
781
0
        mfn = alloc_hotadd_mfn(info);
782
0
        err = map_pages_to_xen(s, mfn, 1UL << PAGETABLE_ORDER,
783
0
                               PAGE_HYPERVISOR);
784
0
        if ( err )
785
0
            return err;
786
0
    }
787
0
    memset(start, -1, s - (unsigned long)start);
788
0
789
0
    return 0;
790
0
}
791
792
static int extend_frame_table(struct mem_hotadd_info *info)
793
0
{
794
0
    unsigned long cidx, nidx, eidx, spfn, epfn;
795
0
796
0
    spfn = info->spfn;
797
0
    epfn = info->epfn;
798
0
799
0
    eidx = (pfn_to_pdx(epfn) + PDX_GROUP_COUNT - 1) / PDX_GROUP_COUNT;
800
0
    nidx = cidx = pfn_to_pdx(spfn)/PDX_GROUP_COUNT;
801
0
802
0
    ASSERT( pfn_to_pdx(epfn) <= (DIRECTMAP_SIZE >> PAGE_SHIFT) &&
803
0
            pfn_to_pdx(epfn) <= FRAMETABLE_NR );
804
0
805
0
    if ( test_bit(cidx, pdx_group_valid) )
806
0
        cidx = find_next_zero_bit(pdx_group_valid, eidx, cidx);
807
0
808
0
    if ( cidx >= eidx )
809
0
        return 0;
810
0
811
0
    while ( cidx < eidx )
812
0
    {
813
0
        int err;
814
0
815
0
        nidx = find_next_bit(pdx_group_valid, eidx, cidx);
816
0
        if ( nidx >= eidx )
817
0
            nidx = eidx;
818
0
        err = setup_frametable_chunk(pdx_to_page(cidx * PDX_GROUP_COUNT ),
819
0
                                     pdx_to_page(nidx * PDX_GROUP_COUNT),
820
0
                                     info);
821
0
        if ( err )
822
0
            return err;
823
0
824
0
        cidx = find_next_zero_bit(pdx_group_valid, eidx, nidx);
825
0
    }
826
0
827
0
    memset(mfn_to_page(spfn), 0,
828
0
           (unsigned long)mfn_to_page(epfn) - (unsigned long)mfn_to_page(spfn));
829
0
    return 0;
830
0
}
831
832
void __init subarch_init_memory(void)
833
1
{
834
1
    unsigned long i, n, v, m2p_start_mfn;
835
1
    l3_pgentry_t l3e;
836
1
    l2_pgentry_t l2e;
837
1
838
1
    BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
839
1
    BUILD_BUG_ON(RDWR_MPT_VIRT_END   & ((1UL << L3_PAGETABLE_SHIFT) - 1));
840
1
    /* M2P table is mappable read-only by privileged domains. */
841
1
    for ( v  = RDWR_MPT_VIRT_START;
842
768
          v != RDWR_MPT_VIRT_END;
843
767
          v += n << PAGE_SHIFT )
844
767
    {
845
767
        n = L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES;
846
767
        l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
847
767
            l3_table_offset(v)];
848
767
        if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
849
255
            continue;
850
512
        if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
851
512
        {
852
512
            n = L1_PAGETABLE_ENTRIES;
853
512
            l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
854
512
            if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
855
495
                continue;
856
17
            m2p_start_mfn = l2e_get_pfn(l2e);
857
17
        }
858
512
        else
859
0
        {
860
0
            m2p_start_mfn = l3e_get_pfn(l3e);
861
0
        }
862
512
863
8.72k
        for ( i = 0; i < n; i++ )
864
8.70k
        {
865
8.70k
            struct page_info *page = mfn_to_page(m2p_start_mfn + i);
866
8.70k
            share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
867
8.70k
        }
868
17
    }
869
1
870
1
    for ( v  = RDWR_COMPAT_MPT_VIRT_START;
871
513
          v != RDWR_COMPAT_MPT_VIRT_END;
872
512
          v += 1 << L2_PAGETABLE_SHIFT )
873
512
    {
874
512
        l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
875
512
            l3_table_offset(v)];
876
512
        if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
877
0
            continue;
878
512
        l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
879
512
        if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
880
503
            continue;
881
9
        m2p_start_mfn = l2e_get_pfn(l2e);
882
9
883
4.61k
        for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
884
4.60k
        {
885
4.60k
            struct page_info *page = mfn_to_page(m2p_start_mfn + i);
886
4.60k
            share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
887
4.60k
        }
888
9
    }
889
1
890
1
    /* Mark all of direct map NX if hardware supports it. */
891
1
    if ( !cpu_has_nx )
892
0
        return;
893
1
894
1
    for ( i = l4_table_offset(DIRECTMAP_VIRT_START);
895
250
          i < l4_table_offset(DIRECTMAP_VIRT_END); ++i )
896
249
    {
897
249
        l4_pgentry_t l4e = idle_pg_table[i];
898
249
899
249
        if ( l4e_get_flags(l4e) & _PAGE_PRESENT )
900
1
        {
901
1
            l4e_add_flags(l4e, _PAGE_NX_BIT);
902
1
            idle_pg_table[i] = l4e;
903
1
        }
904
249
    }
905
1
}
906
907
long subarch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg)
908
0
{
909
0
    struct xen_machphys_mfn_list xmml;
910
0
    l3_pgentry_t l3e;
911
0
    l2_pgentry_t l2e;
912
0
    unsigned long v, limit;
913
0
    xen_pfn_t mfn, last_mfn;
914
0
    unsigned int i;
915
0
    long rc = 0;
916
0
917
0
    switch ( cmd )
918
0
    {
919
0
    case XENMEM_machphys_mfn_list:
920
0
        if ( copy_from_guest(&xmml, arg, 1) )
921
0
            return -EFAULT;
922
0
923
0
        BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
924
0
        BUILD_BUG_ON(RDWR_MPT_VIRT_END   & ((1UL << L3_PAGETABLE_SHIFT) - 1));
925
0
        for ( i = 0, v = RDWR_MPT_VIRT_START, last_mfn = 0;
926
0
              (i != xmml.max_extents) &&
927
0
              (v < (unsigned long)(machine_to_phys_mapping + max_page));
928
0
              i++, v += 1UL << L2_PAGETABLE_SHIFT )
929
0
        {
930
0
            l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
931
0
                l3_table_offset(v)];
932
0
            if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
933
0
                mfn = last_mfn;
934
0
            else if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
935
0
            {
936
0
                l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
937
0
                if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
938
0
                    mfn = l2e_get_pfn(l2e);
939
0
                else
940
0
                    mfn = last_mfn;
941
0
            }
942
0
            else
943
0
            {
944
0
                mfn = l3e_get_pfn(l3e)
945
0
                    + (l2_table_offset(v) << PAGETABLE_ORDER);
946
0
            }
947
0
            ASSERT(mfn);
948
0
            if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
949
0
                return -EFAULT;
950
0
            last_mfn = mfn;
951
0
        }
952
0
953
0
        xmml.nr_extents = i;
954
0
        if ( __copy_to_guest(arg, &xmml, 1) )
955
0
            return -EFAULT;
956
0
957
0
        break;
958
0
959
0
    case XENMEM_machphys_compat_mfn_list:
960
0
        if ( copy_from_guest(&xmml, arg, 1) )
961
0
            return -EFAULT;
962
0
963
0
        limit = (unsigned long)(compat_machine_to_phys_mapping + max_page);
964
0
        if ( limit > RDWR_COMPAT_MPT_VIRT_END )
965
0
            limit = RDWR_COMPAT_MPT_VIRT_END;
966
0
        for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START, last_mfn = 0;
967
0
              (i != xmml.max_extents) && (v < limit);
968
0
              i++, v += 1 << L2_PAGETABLE_SHIFT )
969
0
        {
970
0
            l2e = compat_idle_pg_table_l2[l2_table_offset(v)];
971
0
            if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
972
0
                mfn = l2e_get_pfn(l2e);
973
0
            else
974
0
                mfn = last_mfn;
975
0
            ASSERT(mfn);
976
0
            if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
977
0
                return -EFAULT;
978
0
            last_mfn = mfn;
979
0
        }
980
0
981
0
        xmml.nr_extents = i;
982
0
        if ( __copy_to_guest(arg, &xmml, 1) )
983
0
            rc = -EFAULT;
984
0
985
0
        break;
986
0
987
0
    case XENMEM_get_sharing_freed_pages:
988
0
        return mem_sharing_get_nr_saved_mfns();
989
0
990
0
    case XENMEM_get_sharing_shared_pages:
991
0
        return mem_sharing_get_nr_shared_mfns();
992
0
993
0
    case XENMEM_paging_op:
994
0
        return mem_paging_memop(guest_handle_cast(arg, xen_mem_paging_op_t));
995
0
996
0
    case XENMEM_sharing_op:
997
0
        return mem_sharing_memop(guest_handle_cast(arg, xen_mem_sharing_op_t));
998
0
999
0
    default:
1000
0
        rc = -ENOSYS;
1001
0
        break;
1002
0
    }
1003
0
1004
0
    return rc;
1005
0
}
1006
1007
long do_stack_switch(unsigned long ss, unsigned long esp)
1008
0
{
1009
0
    fixup_guest_stack_selector(current->domain, ss);
1010
0
    current->arch.pv_vcpu.kernel_ss = ss;
1011
0
    current->arch.pv_vcpu.kernel_sp = esp;
1012
0
    return 0;
1013
0
}
1014
1015
long do_set_segment_base(unsigned int which, unsigned long base)
1016
0
{
1017
0
    struct vcpu *v = current;
1018
0
    long ret = 0;
1019
0
1020
0
    if ( is_pv_32bit_vcpu(v) )
1021
0
        return -ENOSYS; /* x86/64 only. */
1022
0
1023
0
    switch ( which )
1024
0
    {
1025
0
    case SEGBASE_FS:
1026
0
        if ( is_canonical_address(base) )
1027
0
        {
1028
0
            wrfsbase(base);
1029
0
            v->arch.pv_vcpu.fs_base = base;
1030
0
        }
1031
0
        else
1032
0
            ret = -EINVAL;
1033
0
        break;
1034
0
1035
0
    case SEGBASE_GS_USER:
1036
0
        if ( is_canonical_address(base) )
1037
0
        {
1038
0
            wrmsrl(MSR_SHADOW_GS_BASE, base);
1039
0
            v->arch.pv_vcpu.gs_base_user = base;
1040
0
        }
1041
0
        else
1042
0
            ret = -EINVAL;
1043
0
        break;
1044
0
1045
0
    case SEGBASE_GS_KERNEL:
1046
0
        if ( is_canonical_address(base) )
1047
0
        {
1048
0
            wrgsbase(base);
1049
0
            v->arch.pv_vcpu.gs_base_kernel = base;
1050
0
        }
1051
0
        else
1052
0
            ret = -EINVAL;
1053
0
        break;
1054
0
1055
0
    case SEGBASE_GS_USER_SEL:
1056
0
        __asm__ __volatile__ (
1057
0
            "     swapgs              \n"
1058
0
            "1:   movl %k0,%%gs       \n"
1059
0
            "    "safe_swapgs"        \n"
1060
0
            ".section .fixup,\"ax\"   \n"
1061
0
            "2:   xorl %k0,%k0        \n"
1062
0
            "     jmp  1b             \n"
1063
0
            ".previous                \n"
1064
0
            _ASM_EXTABLE(1b, 2b)
1065
0
            : : "r" (base&0xffff) );
1066
0
        break;
1067
0
1068
0
    default:
1069
0
        ret = -EINVAL;
1070
0
        break;
1071
0
    }
1072
0
1073
0
    return ret;
1074
0
}
1075
1076
1077
/* Returns TRUE if given descriptor is valid for GDT or LDT. */
1078
int check_descriptor(const struct domain *dom, struct desc_struct *d)
1079
0
{
1080
0
    u32 a = d->a, b = d->b;
1081
0
    u16 cs;
1082
0
    unsigned int dpl;
1083
0
1084
0
    /* A not-present descriptor will always fault, so is safe. */
1085
0
    if ( !(b & _SEGMENT_P) ) 
1086
0
        return 1;
1087
0
1088
0
    /* Check and fix up the DPL. */
1089
0
    dpl = (b >> 13) & 3;
1090
0
    __fixup_guest_selector(dom, dpl);
1091
0
    b = (b & ~_SEGMENT_DPL) | (dpl << 13);
1092
0
1093
0
    /* All code and data segments are okay. No base/limit checking. */
1094
0
    if ( (b & _SEGMENT_S) )
1095
0
    {
1096
0
        if ( is_pv_32bit_domain(dom) )
1097
0
        {
1098
0
            unsigned long base, limit;
1099
0
1100
0
            if ( b & _SEGMENT_L )
1101
0
                goto bad;
1102
0
1103
0
            /*
1104
0
             * Older PAE Linux guests use segments which are limited to
1105
0
             * 0xf6800000. Extend these to allow access to the larger read-only
1106
0
             * M2P table available in 32on64 mode.
1107
0
             */
1108
0
            base = (b & (0xff << 24)) | ((b & 0xff) << 16) | (a >> 16);
1109
0
1110
0
            limit = (b & 0xf0000) | (a & 0xffff);
1111
0
            limit++; /* We add one because limit is inclusive. */
1112
0
1113
0
            if ( (b & _SEGMENT_G) )
1114
0
                limit <<= 12;
1115
0
1116
0
            if ( (base == 0) && (limit > HYPERVISOR_COMPAT_VIRT_START(dom)) )
1117
0
            {
1118
0
                a |= 0x0000ffff;
1119
0
                b |= 0x000f0000;
1120
0
            }
1121
0
        }
1122
0
1123
0
        goto good;
1124
0
    }
1125
0
1126
0
    /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
1127
0
    if ( (b & _SEGMENT_TYPE) == 0x000 )
1128
0
        return 1;
1129
0
1130
0
    /* Everything but a call gate is discarded here. */
1131
0
    if ( (b & _SEGMENT_TYPE) != 0xc00 )
1132
0
        goto bad;
1133
0
1134
0
    /* Validate the target code selector. */
1135
0
    cs = a >> 16;
1136
0
    if ( !guest_gate_selector_okay(dom, cs) )
1137
0
        goto bad;
1138
0
    /*
1139
0
     * Force DPL to zero, causing a GP fault with its error code indicating
1140
0
     * the gate in use, allowing emulation. This is necessary because with
1141
0
     * native guests (kernel in ring 3) call gates cannot be used directly
1142
0
     * to transition from user to kernel mode (and whether a gate is used
1143
0
     * to enter the kernel can only be determined when the gate is being
1144
0
     * used), and with compat guests call gates cannot be used at all as
1145
0
     * there are only 64-bit ones.
1146
0
     * Store the original DPL in the selector's RPL field.
1147
0
     */
1148
0
    b &= ~_SEGMENT_DPL;
1149
0
    cs = (cs & ~3) | dpl;
1150
0
    a = (a & 0xffffU) | (cs << 16);
1151
0
1152
0
    /* Reserved bits must be zero. */
1153
0
    if ( b & (is_pv_32bit_domain(dom) ? 0xe0 : 0xff) )
1154
0
        goto bad;
1155
0
        
1156
0
 good:
1157
0
    d->a = a;
1158
0
    d->b = b;
1159
0
    return 1;
1160
0
 bad:
1161
0
    return 0;
1162
0
}
1163
1164
int pagefault_by_memadd(unsigned long addr, struct cpu_user_regs *regs)
1165
0
{
1166
0
    struct domain *d = current->domain;
1167
0
1168
0
    return mem_hotplug && guest_mode(regs) && is_pv_32bit_domain(d) &&
1169
0
           (addr >= HYPERVISOR_COMPAT_VIRT_START(d)) &&
1170
0
           (addr < MACH2PHYS_COMPAT_VIRT_END);
1171
0
}
1172
1173
int handle_memadd_fault(unsigned long addr, struct cpu_user_regs *regs)
1174
0
{
1175
0
    struct domain *d = current->domain;
1176
0
    l4_pgentry_t *pl4e = NULL;
1177
0
    l4_pgentry_t l4e;
1178
0
    l3_pgentry_t  *pl3e = NULL;
1179
0
    l3_pgentry_t l3e;
1180
0
    l2_pgentry_t *pl2e = NULL;
1181
0
    l2_pgentry_t l2e, idle_l2e;
1182
0
    unsigned long mfn, idle_index;
1183
0
    int ret = 0;
1184
0
1185
0
    if (!is_pv_32bit_domain(d))
1186
0
        return 0;
1187
0
1188
0
    if ( (addr < HYPERVISOR_COMPAT_VIRT_START(d)) ||
1189
0
         (addr >= MACH2PHYS_COMPAT_VIRT_END) )
1190
0
        return 0;
1191
0
1192
0
    mfn = (read_cr3()) >> PAGE_SHIFT;
1193
0
1194
0
    pl4e = map_domain_page(_mfn(mfn));
1195
0
1196
0
    l4e = pl4e[0];
1197
0
1198
0
    if (!(l4e_get_flags(l4e) & _PAGE_PRESENT))
1199
0
        goto unmap;
1200
0
1201
0
    mfn = l4e_get_pfn(l4e);
1202
0
    /* We don't need get page type here since it is current CR3 */
1203
0
    pl3e = map_domain_page(_mfn(mfn));
1204
0
1205
0
    l3e = pl3e[3];
1206
0
1207
0
    if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
1208
0
        goto unmap;
1209
0
1210
0
    mfn = l3e_get_pfn(l3e);
1211
0
    pl2e = map_domain_page(_mfn(mfn));
1212
0
1213
0
    l2e = pl2e[l2_table_offset(addr)];
1214
0
1215
0
    if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT))
1216
0
        goto unmap;
1217
0
1218
0
    idle_index = (l2_table_offset(addr) -
1219
0
                        COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d))/
1220
0
                  sizeof(l2_pgentry_t);
1221
0
    idle_l2e = compat_idle_pg_table_l2[idle_index];
1222
0
    if (!(l2e_get_flags(idle_l2e) & _PAGE_PRESENT))
1223
0
        goto unmap;
1224
0
1225
0
    memcpy(&pl2e[l2_table_offset(addr)],
1226
0
            &compat_idle_pg_table_l2[idle_index],
1227
0
            sizeof(l2_pgentry_t));
1228
0
1229
0
    ret = EXCRET_fault_fixed;
1230
0
1231
0
unmap:
1232
0
    if ( pl4e )
1233
0
        unmap_domain_page(pl4e);
1234
0
    if ( pl3e )
1235
0
        unmap_domain_page(pl3e);
1236
0
    if ( pl2e )
1237
0
        unmap_domain_page(pl2e);
1238
0
1239
0
    return ret;
1240
0
}
1241
1242
void domain_set_alloc_bitsize(struct domain *d)
1243
0
{
1244
0
    if ( !is_pv_32bit_domain(d) ||
1245
0
         (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) ||
1246
0
         d->arch.physaddr_bitsize > 0 )
1247
0
        return;
1248
0
    d->arch.physaddr_bitsize =
1249
0
        /* 2^n entries can be contained in guest's p2m mapping space */
1250
0
        fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1
1251
0
        /* 2^n pages -> 2^(n+PAGE_SHIFT) bits */
1252
0
        + PAGE_SHIFT;
1253
0
}
1254
1255
unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits)
1256
43.2k
{
1257
43.2k
    if ( (d == NULL) || (d->arch.physaddr_bitsize == 0) )
1258
43.2k
        return bits;
1259
0
    return min(d->arch.physaddr_bitsize, bits);
1260
43.2k
}
1261
1262
static int transfer_pages_to_heap(struct mem_hotadd_info *info)
1263
0
{
1264
0
    unsigned long i;
1265
0
    struct page_info *pg;
1266
0
1267
0
    /*
1268
0
     * Mark the allocated page before put free pages to buddy allocator
1269
0
     * to avoid merge in free_heap_pages
1270
0
     */
1271
0
    for (i = info->spfn; i < info->cur; i++)
1272
0
    {
1273
0
        pg = mfn_to_page(i);
1274
0
        pg->count_info = PGC_state_inuse;
1275
0
    }
1276
0
1277
0
    init_domheap_pages(pfn_to_paddr(info->cur), pfn_to_paddr(info->epfn));
1278
0
1279
0
    return 0;
1280
0
}
1281
1282
static int mem_hotadd_check(unsigned long spfn, unsigned long epfn)
1283
0
{
1284
0
    unsigned long s, e, length, sidx, eidx;
1285
0
1286
0
    if ( (spfn >= epfn) )
1287
0
        return 0;
1288
0
1289
0
    if (pfn_to_pdx(epfn) > FRAMETABLE_NR)
1290
0
        return 0;
1291
0
1292
0
    if ( (spfn | epfn) & ((1UL << PAGETABLE_ORDER) - 1) )
1293
0
        return 0;
1294
0
1295
0
    if ( (spfn | epfn) & pfn_hole_mask )
1296
0
        return 0;
1297
0
1298
0
    /* Make sure the new range is not present now */
1299
0
    sidx = ((pfn_to_pdx(spfn) + PDX_GROUP_COUNT - 1)  & ~(PDX_GROUP_COUNT - 1))
1300
0
            / PDX_GROUP_COUNT;
1301
0
    eidx = (pfn_to_pdx(epfn - 1) & ~(PDX_GROUP_COUNT - 1)) / PDX_GROUP_COUNT;
1302
0
    if (sidx >= eidx)
1303
0
        return 0;
1304
0
1305
0
    s = find_next_zero_bit(pdx_group_valid, eidx, sidx);
1306
0
    if ( s > eidx )
1307
0
        return 0;
1308
0
    e = find_next_bit(pdx_group_valid, eidx, s);
1309
0
    if ( e < eidx )
1310
0
        return 0;
1311
0
1312
0
    /* Caculate at most required m2p/compat m2p/frametable pages */
1313
0
    s = (spfn & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1));
1314
0
    e = (epfn + (1UL << (L2_PAGETABLE_SHIFT - 3)) - 1) &
1315
0
            ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1);
1316
0
1317
0
    length = (e - s) * sizeof(unsigned long);
1318
0
1319
0
    s = (spfn & ~((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1));
1320
0
    e = (epfn + (1UL << (L2_PAGETABLE_SHIFT - 2)) - 1) &
1321
0
            ~((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1);
1322
0
1323
0
    e = min_t(unsigned long, e,
1324
0
            (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2);
1325
0
1326
0
    if ( e > s )
1327
0
        length += (e -s) * sizeof(unsigned int);
1328
0
1329
0
    s = pfn_to_pdx(spfn) & ~(PDX_GROUP_COUNT - 1);
1330
0
    e = ( pfn_to_pdx(epfn) + (PDX_GROUP_COUNT - 1) ) & ~(PDX_GROUP_COUNT - 1);
1331
0
1332
0
    length += (e - s) * sizeof(struct page_info);
1333
0
1334
0
    if ((length >> PAGE_SHIFT) > (epfn - spfn))
1335
0
        return 0;
1336
0
1337
0
    return 1;
1338
0
}
1339
1340
/*
1341
 * A bit paranoid for memory allocation failure issue since
1342
 * it may be reason for memory add
1343
 */
1344
int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm)
1345
0
{
1346
0
    struct mem_hotadd_info info;
1347
0
    int ret;
1348
0
    nodeid_t node;
1349
0
    unsigned long old_max = max_page, old_total = total_pages;
1350
0
    unsigned long old_node_start, old_node_span, orig_online;
1351
0
    unsigned long i;
1352
0
1353
0
    dprintk(XENLOG_INFO, "memory_add %lx ~ %lx with pxm %x\n", spfn, epfn, pxm);
1354
0
1355
0
    if ( !mem_hotadd_check(spfn, epfn) )
1356
0
        return -EINVAL;
1357
0
1358
0
    if ( (node = setup_node(pxm)) == NUMA_NO_NODE )
1359
0
        return -EINVAL;
1360
0
1361
0
    if ( !valid_numa_range(spfn << PAGE_SHIFT, epfn << PAGE_SHIFT, node) )
1362
0
    {
1363
0
        printk(XENLOG_WARNING
1364
0
               "pfn range %lx..%lx PXM %x node %x is not NUMA-valid\n",
1365
0
               spfn, epfn, pxm, node);
1366
0
        return -EINVAL;
1367
0
    }
1368
0
1369
0
    i = virt_to_mfn(HYPERVISOR_VIRT_END - 1) + 1;
1370
0
    if ( spfn < i )
1371
0
    {
1372
0
        ret = map_pages_to_xen((unsigned long)mfn_to_virt(spfn), spfn,
1373
0
                               min(epfn, i) - spfn, PAGE_HYPERVISOR);
1374
0
        if ( ret )
1375
0
            goto destroy_directmap;
1376
0
    }
1377
0
    if ( i < epfn )
1378
0
    {
1379
0
        if ( i < spfn )
1380
0
            i = spfn;
1381
0
        ret = map_pages_to_xen((unsigned long)mfn_to_virt(i), i,
1382
0
                               epfn - i, __PAGE_HYPERVISOR_RW);
1383
0
        if ( ret )
1384
0
            goto destroy_directmap;
1385
0
    }
1386
0
1387
0
    old_node_start = node_start_pfn(node);
1388
0
    old_node_span = node_spanned_pages(node);
1389
0
    orig_online = node_online(node);
1390
0
1391
0
    if ( !orig_online )
1392
0
    {
1393
0
        dprintk(XENLOG_WARNING, "node %x pxm %x is not online\n",node, pxm);
1394
0
        NODE_DATA(node)->node_start_pfn = spfn;
1395
0
        NODE_DATA(node)->node_spanned_pages =
1396
0
                epfn - node_start_pfn(node);
1397
0
        node_set_online(node);
1398
0
    }
1399
0
    else
1400
0
    {
1401
0
        if (node_start_pfn(node) > spfn)
1402
0
            NODE_DATA(node)->node_start_pfn = spfn;
1403
0
        if (node_end_pfn(node) < epfn)
1404
0
            NODE_DATA(node)->node_spanned_pages = epfn - node_start_pfn(node);
1405
0
    }
1406
0
1407
0
    info.spfn = spfn;
1408
0
    info.epfn = epfn;
1409
0
    info.cur = spfn;
1410
0
1411
0
    ret = extend_frame_table(&info);
1412
0
    if (ret)
1413
0
        goto destroy_frametable;
1414
0
1415
0
    /* Set max_page as setup_m2p_table will use it*/
1416
0
    if (max_page < epfn)
1417
0
    {
1418
0
        max_page = epfn;
1419
0
        max_pdx = pfn_to_pdx(max_page - 1) + 1;
1420
0
    }
1421
0
    total_pages += epfn - spfn;
1422
0
1423
0
    set_pdx_range(spfn, epfn);
1424
0
    ret = setup_m2p_table(&info);
1425
0
1426
0
    if ( ret )
1427
0
        goto destroy_m2p;
1428
0
1429
0
    if ( iommu_enabled && !iommu_passthrough && !need_iommu(hardware_domain) )
1430
0
    {
1431
0
        for ( i = spfn; i < epfn; i++ )
1432
0
            if ( iommu_map_page(hardware_domain, i, i, IOMMUF_readable|IOMMUF_writable) )
1433
0
                break;
1434
0
        if ( i != epfn )
1435
0
        {
1436
0
            while (i-- > old_max)
1437
0
                /* If statement to satisfy __must_check. */
1438
0
                if ( iommu_unmap_page(hardware_domain, i) )
1439
0
                    continue;
1440
0
1441
0
            goto destroy_m2p;
1442
0
        }
1443
0
    }
1444
0
1445
0
    /* We can't revert any more */
1446
0
    share_hotadd_m2p_table(&info);
1447
0
    transfer_pages_to_heap(&info);
1448
0
1449
0
    return 0;
1450
0
1451
0
destroy_m2p:
1452
0
    destroy_m2p_mapping(&info);
1453
0
    max_page = old_max;
1454
0
    total_pages = old_total;
1455
0
    max_pdx = pfn_to_pdx(max_page - 1) + 1;
1456
0
destroy_frametable:
1457
0
    cleanup_frame_table(&info);
1458
0
    if ( !orig_online )
1459
0
        node_set_offline(node);
1460
0
    NODE_DATA(node)->node_start_pfn = old_node_start;
1461
0
    NODE_DATA(node)->node_spanned_pages = old_node_span;
1462
0
 destroy_directmap:
1463
0
    destroy_xen_mappings((unsigned long)mfn_to_virt(spfn),
1464
0
                         (unsigned long)mfn_to_virt(epfn));
1465
0
1466
0
    return ret;
1467
0
}
1468
1469
#include "compat/mm.c"
1470
1471
/*
1472
 * Local variables:
1473
 * mode: C
1474
 * c-file-style: "BSD"
1475
 * c-basic-offset: 4
1476
 * tab-width: 4
1477
 * indent-tabs-mode: nil
1478
 * End:
1479
 */