/root/src/xen/xen/arch/x86/x86_64/mm.c
Line | Count | Source (jump to first uncovered line) |
1 | | /****************************************************************************** |
2 | | * arch/x86/x86_64/mm.c |
3 | | * |
4 | | * Modifications to Linux original are copyright (c) 2004, K A Fraser tr This |
5 | | * program is free software; you can redistribute it and/or modify it under |
6 | | * the terms of the GNU General Public License as published by the Free |
7 | | * Software Foundation; either version 2 of the License, or (at your option) |
8 | | * any later version. |
9 | | * |
10 | | * This program is distributed in the hope that it will be useful, but WITHOUT |
11 | | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
12 | | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for |
13 | | * more details. |
14 | | * |
15 | | * You should have received a copy of the GNU General Public License along |
16 | | * with this program; If not, see <http://www.gnu.org/licenses/>. |
17 | | */ |
18 | | |
19 | | asm(".file \"" __FILE__ "\""); |
20 | | |
21 | | #include <xen/lib.h> |
22 | | #include <xen/init.h> |
23 | | #include <xen/mm.h> |
24 | | #include <xen/sched.h> |
25 | | #include <xen/numa.h> |
26 | | #include <xen/nodemask.h> |
27 | | #include <xen/guest_access.h> |
28 | | #include <xen/hypercall.h> |
29 | | #include <xen/mem_access.h> |
30 | | #include <asm/current.h> |
31 | | #include <asm/asm_defns.h> |
32 | | #include <asm/page.h> |
33 | | #include <asm/flushtlb.h> |
34 | | #include <asm/fixmap.h> |
35 | | #include <asm/hypercall.h> |
36 | | #include <asm/msr.h> |
37 | | #include <asm/setup.h> |
38 | | #include <asm/numa.h> |
39 | | #include <asm/mem_paging.h> |
40 | | #include <asm/mem_sharing.h> |
41 | | #include <public/memory.h> |
42 | | |
43 | | unsigned int __read_mostly m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START; |
44 | | |
45 | | l2_pgentry_t *compat_idle_pg_table_l2; |
46 | | |
47 | | void *do_page_walk(struct vcpu *v, unsigned long addr) |
48 | 0 | { |
49 | 0 | unsigned long mfn = pagetable_get_pfn(v->arch.guest_table); |
50 | 0 | l4_pgentry_t l4e, *l4t; |
51 | 0 | l3_pgentry_t l3e, *l3t; |
52 | 0 | l2_pgentry_t l2e, *l2t; |
53 | 0 | l1_pgentry_t l1e, *l1t; |
54 | 0 |
|
55 | 0 | if ( !is_pv_vcpu(v) || !is_canonical_address(addr) ) |
56 | 0 | return NULL; |
57 | 0 |
|
58 | 0 | l4t = map_domain_page(_mfn(mfn)); |
59 | 0 | l4e = l4t[l4_table_offset(addr)]; |
60 | 0 | unmap_domain_page(l4t); |
61 | 0 | if ( !(l4e_get_flags(l4e) & _PAGE_PRESENT) ) |
62 | 0 | return NULL; |
63 | 0 |
|
64 | 0 | l3t = map_l3t_from_l4e(l4e); |
65 | 0 | l3e = l3t[l3_table_offset(addr)]; |
66 | 0 | unmap_domain_page(l3t); |
67 | 0 | mfn = l3e_get_pfn(l3e); |
68 | 0 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) ) |
69 | 0 | return NULL; |
70 | 0 | if ( (l3e_get_flags(l3e) & _PAGE_PSE) ) |
71 | 0 | { |
72 | 0 | mfn += PFN_DOWN(addr & ((1UL << L3_PAGETABLE_SHIFT) - 1)); |
73 | 0 | goto ret; |
74 | 0 | } |
75 | 0 |
|
76 | 0 | l2t = map_domain_page(_mfn(mfn)); |
77 | 0 | l2e = l2t[l2_table_offset(addr)]; |
78 | 0 | unmap_domain_page(l2t); |
79 | 0 | mfn = l2e_get_pfn(l2e); |
80 | 0 | if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) ) |
81 | 0 | return NULL; |
82 | 0 | if ( (l2e_get_flags(l2e) & _PAGE_PSE) ) |
83 | 0 | { |
84 | 0 | mfn += PFN_DOWN(addr & ((1UL << L2_PAGETABLE_SHIFT) - 1)); |
85 | 0 | goto ret; |
86 | 0 | } |
87 | 0 |
|
88 | 0 | l1t = map_domain_page(_mfn(mfn)); |
89 | 0 | l1e = l1t[l1_table_offset(addr)]; |
90 | 0 | unmap_domain_page(l1t); |
91 | 0 | mfn = l1e_get_pfn(l1e); |
92 | 0 | if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || !mfn_valid(_mfn(mfn)) ) |
93 | 0 | return NULL; |
94 | 0 |
|
95 | 0 | ret: |
96 | 0 | return map_domain_page(_mfn(mfn)) + (addr & ~PAGE_MASK); |
97 | 0 | } |
98 | | |
99 | | /* |
100 | | * Allocate page table pages for m2p table |
101 | | */ |
102 | | struct mem_hotadd_info |
103 | | { |
104 | | unsigned long spfn; |
105 | | unsigned long epfn; |
106 | | unsigned long cur; |
107 | | }; |
108 | | |
109 | | static int hotadd_mem_valid(unsigned long pfn, struct mem_hotadd_info *info) |
110 | 0 | { |
111 | 0 | return (pfn < info->epfn && pfn >= info->spfn); |
112 | 0 | } |
113 | | |
114 | | static unsigned long alloc_hotadd_mfn(struct mem_hotadd_info *info) |
115 | 0 | { |
116 | 0 | unsigned mfn; |
117 | 0 |
|
118 | 0 | ASSERT((info->cur + ( 1UL << PAGETABLE_ORDER) < info->epfn) && |
119 | 0 | info->cur >= info->spfn); |
120 | 0 |
|
121 | 0 | mfn = info->cur; |
122 | 0 | info->cur += (1UL << PAGETABLE_ORDER); |
123 | 0 | return mfn; |
124 | 0 | } |
125 | | |
126 | 0 | #define M2P_NO_MAPPED 0 |
127 | 0 | #define M2P_2M_MAPPED 1 |
128 | 0 | #define M2P_1G_MAPPED 2 |
129 | | static int m2p_mapped(unsigned long spfn) |
130 | 0 | { |
131 | 0 | unsigned long va; |
132 | 0 | l3_pgentry_t *l3_ro_mpt; |
133 | 0 | l2_pgentry_t *l2_ro_mpt; |
134 | 0 |
|
135 | 0 | va = RO_MPT_VIRT_START + spfn * sizeof(*machine_to_phys_mapping); |
136 | 0 | l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(va)]); |
137 | 0 |
|
138 | 0 | switch ( l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & |
139 | 0 | (_PAGE_PRESENT |_PAGE_PSE)) |
140 | 0 | { |
141 | 0 | case _PAGE_PSE|_PAGE_PRESENT: |
142 | 0 | return M2P_1G_MAPPED; |
143 | 0 | /* Check for next level */ |
144 | 0 | case _PAGE_PRESENT: |
145 | 0 | break; |
146 | 0 | default: |
147 | 0 | return M2P_NO_MAPPED; |
148 | 0 | } |
149 | 0 | l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]); |
150 | 0 |
|
151 | 0 | if (l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT) |
152 | 0 | return M2P_2M_MAPPED; |
153 | 0 |
|
154 | 0 | return M2P_NO_MAPPED; |
155 | 0 | } |
156 | | |
157 | | static int share_hotadd_m2p_table(struct mem_hotadd_info *info) |
158 | 0 | { |
159 | 0 | unsigned long i, n, v, m2p_start_mfn = 0; |
160 | 0 | l3_pgentry_t l3e; |
161 | 0 | l2_pgentry_t l2e; |
162 | 0 |
|
163 | 0 | /* M2P table is mappable read-only by privileged domains. */ |
164 | 0 | for ( v = RDWR_MPT_VIRT_START; |
165 | 0 | v != RDWR_MPT_VIRT_END; |
166 | 0 | v += n << PAGE_SHIFT ) |
167 | 0 | { |
168 | 0 | n = L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES; |
169 | 0 | l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[ |
170 | 0 | l3_table_offset(v)]; |
171 | 0 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) |
172 | 0 | continue; |
173 | 0 | if ( !(l3e_get_flags(l3e) & _PAGE_PSE) ) |
174 | 0 | { |
175 | 0 | n = L1_PAGETABLE_ENTRIES; |
176 | 0 | l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; |
177 | 0 | if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) |
178 | 0 | continue; |
179 | 0 | m2p_start_mfn = l2e_get_pfn(l2e); |
180 | 0 | } |
181 | 0 | else |
182 | 0 | continue; |
183 | 0 |
|
184 | 0 | for ( i = 0; i < n; i++ ) |
185 | 0 | { |
186 | 0 | struct page_info *page = mfn_to_page(m2p_start_mfn + i); |
187 | 0 | if (hotadd_mem_valid(m2p_start_mfn + i, info)) |
188 | 0 | share_xen_page_with_privileged_guests(page, XENSHARE_readonly); |
189 | 0 | } |
190 | 0 | } |
191 | 0 |
|
192 | 0 | for ( v = RDWR_COMPAT_MPT_VIRT_START; |
193 | 0 | v != RDWR_COMPAT_MPT_VIRT_END; |
194 | 0 | v += 1 << L2_PAGETABLE_SHIFT ) |
195 | 0 | { |
196 | 0 | l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[ |
197 | 0 | l3_table_offset(v)]; |
198 | 0 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) |
199 | 0 | continue; |
200 | 0 | l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; |
201 | 0 | if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) |
202 | 0 | continue; |
203 | 0 | m2p_start_mfn = l2e_get_pfn(l2e); |
204 | 0 |
|
205 | 0 | for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) |
206 | 0 | { |
207 | 0 | struct page_info *page = mfn_to_page(m2p_start_mfn + i); |
208 | 0 | if (hotadd_mem_valid(m2p_start_mfn + i, info)) |
209 | 0 | share_xen_page_with_privileged_guests(page, XENSHARE_readonly); |
210 | 0 | } |
211 | 0 | } |
212 | 0 | return 0; |
213 | 0 | } |
214 | | |
215 | | static void destroy_compat_m2p_mapping(struct mem_hotadd_info *info) |
216 | 0 | { |
217 | 0 | unsigned long i, va, rwva, pt_pfn; |
218 | 0 | unsigned long smap = info->spfn, emap = info->spfn; |
219 | 0 |
|
220 | 0 | l3_pgentry_t *l3_ro_mpt; |
221 | 0 | l2_pgentry_t *l2_ro_mpt; |
222 | 0 |
|
223 | 0 | if ( smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) ) |
224 | 0 | return; |
225 | 0 |
|
226 | 0 | if ( emap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) ) |
227 | 0 | emap = (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2; |
228 | 0 |
|
229 | 0 | l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)]); |
230 | 0 |
|
231 | 0 | ASSERT(l3e_get_flags(l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)]) & _PAGE_PRESENT); |
232 | 0 |
|
233 | 0 | l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)]); |
234 | 0 |
|
235 | 0 | for ( i = smap; i < emap; ) |
236 | 0 | { |
237 | 0 | va = HIRO_COMPAT_MPT_VIRT_START + |
238 | 0 | i * sizeof(*compat_machine_to_phys_mapping); |
239 | 0 | rwva = RDWR_COMPAT_MPT_VIRT_START + |
240 | 0 | i * sizeof(*compat_machine_to_phys_mapping); |
241 | 0 | if ( l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT ) |
242 | 0 | { |
243 | 0 | pt_pfn = l2e_get_pfn(l2_ro_mpt[l2_table_offset(va)]); |
244 | 0 | if ( hotadd_mem_valid(pt_pfn, info) ) |
245 | 0 | { |
246 | 0 | destroy_xen_mappings(rwva, rwva + |
247 | 0 | (1UL << L2_PAGETABLE_SHIFT)); |
248 | 0 | l2e_write(&l2_ro_mpt[l2_table_offset(va)], l2e_empty()); |
249 | 0 | } |
250 | 0 | } |
251 | 0 |
|
252 | 0 | i += 1UL << (L2_PAGETABLE_SHIFT - 2); |
253 | 0 | } |
254 | 0 |
|
255 | 0 | return; |
256 | 0 | } |
257 | | |
258 | | static void destroy_m2p_mapping(struct mem_hotadd_info *info) |
259 | 0 | { |
260 | 0 | l3_pgentry_t *l3_ro_mpt; |
261 | 0 | unsigned long i, va, rwva; |
262 | 0 | unsigned long smap = info->spfn, emap = info->epfn; |
263 | 0 |
|
264 | 0 | l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]); |
265 | 0 |
|
266 | 0 | /* |
267 | 0 | * No need to clean m2p structure existing before the hotplug |
268 | 0 | */ |
269 | 0 | for (i = smap; i < emap;) |
270 | 0 | { |
271 | 0 | unsigned long pt_pfn; |
272 | 0 | l2_pgentry_t *l2_ro_mpt; |
273 | 0 |
|
274 | 0 | va = RO_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping); |
275 | 0 | rwva = RDWR_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping); |
276 | 0 |
|
277 | 0 | /* 1G mapping should not be created by mem hotadd */ |
278 | 0 | if (!(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & _PAGE_PRESENT) || |
279 | 0 | (l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & _PAGE_PSE)) |
280 | 0 | { |
281 | 0 | i = ( i & ~((1UL << (L3_PAGETABLE_SHIFT - 3)) - 1)) + |
282 | 0 | (1UL << (L3_PAGETABLE_SHIFT - 3) ); |
283 | 0 | continue; |
284 | 0 | } |
285 | 0 |
|
286 | 0 | l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]); |
287 | 0 | if (!(l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT)) |
288 | 0 | { |
289 | 0 | i = ( i & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)) + |
290 | 0 | (1UL << (L2_PAGETABLE_SHIFT - 3)) ; |
291 | 0 | continue; |
292 | 0 | } |
293 | 0 |
|
294 | 0 | pt_pfn = l2e_get_pfn(l2_ro_mpt[l2_table_offset(va)]); |
295 | 0 | if ( hotadd_mem_valid(pt_pfn, info) ) |
296 | 0 | { |
297 | 0 | destroy_xen_mappings(rwva, rwva + (1UL << L2_PAGETABLE_SHIFT)); |
298 | 0 |
|
299 | 0 | l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]); |
300 | 0 | l2e_write(&l2_ro_mpt[l2_table_offset(va)], l2e_empty()); |
301 | 0 | } |
302 | 0 | i = ( i & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)) + |
303 | 0 | (1UL << (L2_PAGETABLE_SHIFT - 3)); |
304 | 0 | } |
305 | 0 |
|
306 | 0 | destroy_compat_m2p_mapping(info); |
307 | 0 |
|
308 | 0 | /* Brute-Force flush all TLB */ |
309 | 0 | flush_tlb_all(); |
310 | 0 | return; |
311 | 0 | } |
312 | | |
313 | | /* |
314 | | * Allocate and map the compatibility mode machine-to-phys table. |
315 | | * spfn/epfn: the pfn ranges to be setup |
316 | | * free_s/free_e: the pfn ranges that is free still |
317 | | */ |
318 | | static int setup_compat_m2p_table(struct mem_hotadd_info *info) |
319 | 0 | { |
320 | 0 | unsigned long i, va, smap, emap, rwva, epfn = info->epfn, mfn; |
321 | 0 | unsigned int n; |
322 | 0 | l3_pgentry_t *l3_ro_mpt = NULL; |
323 | 0 | l2_pgentry_t *l2_ro_mpt = NULL; |
324 | 0 | int err = 0; |
325 | 0 |
|
326 | 0 | smap = info->spfn & (~((1UL << (L2_PAGETABLE_SHIFT - 2)) -1)); |
327 | 0 |
|
328 | 0 | /* |
329 | 0 | * Notice: For hot-added memory, only range below m2p_compat_vstart |
330 | 0 | * will be filled up (assuming memory is discontinous when booting). |
331 | 0 | */ |
332 | 0 | if ((smap > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2)) ) |
333 | 0 | return 0; |
334 | 0 |
|
335 | 0 | if ( epfn > ((RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2) ) |
336 | 0 | epfn = (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2; |
337 | 0 |
|
338 | 0 | emap = ( (epfn + ((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1 )) & |
339 | 0 | ~((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1) ); |
340 | 0 |
|
341 | 0 | va = HIRO_COMPAT_MPT_VIRT_START + |
342 | 0 | smap * sizeof(*compat_machine_to_phys_mapping); |
343 | 0 | l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(va)]); |
344 | 0 |
|
345 | 0 | ASSERT(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & _PAGE_PRESENT); |
346 | 0 |
|
347 | 0 | l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]); |
348 | 0 |
|
349 | 0 | #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int)) |
350 | 0 | #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ |
351 | 0 | sizeof(*compat_machine_to_phys_mapping)) |
352 | 0 | BUILD_BUG_ON((sizeof(*frame_table) & -sizeof(*frame_table)) % \ |
353 | 0 | sizeof(*compat_machine_to_phys_mapping)); |
354 | 0 |
|
355 | 0 | for ( i = smap; i < emap; i += (1UL << (L2_PAGETABLE_SHIFT - 2)) ) |
356 | 0 | { |
357 | 0 | va = HIRO_COMPAT_MPT_VIRT_START + |
358 | 0 | i * sizeof(*compat_machine_to_phys_mapping); |
359 | 0 |
|
360 | 0 | rwva = RDWR_COMPAT_MPT_VIRT_START + |
361 | 0 | i * sizeof(*compat_machine_to_phys_mapping); |
362 | 0 |
|
363 | 0 | if (l2e_get_flags(l2_ro_mpt[l2_table_offset(va)]) & _PAGE_PRESENT) |
364 | 0 | continue; |
365 | 0 |
|
366 | 0 | for ( n = 0; n < CNT; ++n) |
367 | 0 | if ( mfn_valid(_mfn(i + n * PDX_GROUP_COUNT)) ) |
368 | 0 | break; |
369 | 0 | if ( n == CNT ) |
370 | 0 | continue; |
371 | 0 |
|
372 | 0 | mfn = alloc_hotadd_mfn(info); |
373 | 0 | err = map_pages_to_xen(rwva, mfn, 1UL << PAGETABLE_ORDER, |
374 | 0 | PAGE_HYPERVISOR); |
375 | 0 | if ( err ) |
376 | 0 | break; |
377 | 0 | /* Fill with INVALID_M2P_ENTRY. */ |
378 | 0 | memset((void *)rwva, 0xFF, 1UL << L2_PAGETABLE_SHIFT); |
379 | 0 | /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */ |
380 | 0 | l2e_write(&l2_ro_mpt[l2_table_offset(va)], |
381 | 0 | l2e_from_pfn(mfn, _PAGE_PSE|_PAGE_PRESENT)); |
382 | 0 | } |
383 | 0 | #undef CNT |
384 | 0 | #undef MFN |
385 | 0 | return err; |
386 | 0 | } |
387 | | |
388 | | /* |
389 | | * Allocate and map the machine-to-phys table. |
390 | | * The L3 for RO/RWRW MPT and the L2 for compatible MPT should be setup already |
391 | | */ |
392 | | static int setup_m2p_table(struct mem_hotadd_info *info) |
393 | 0 | { |
394 | 0 | unsigned long i, va, smap, emap; |
395 | 0 | unsigned int n; |
396 | 0 | l2_pgentry_t *l2_ro_mpt = NULL; |
397 | 0 | l3_pgentry_t *l3_ro_mpt = NULL; |
398 | 0 | int ret = 0; |
399 | 0 |
|
400 | 0 | ASSERT(l4e_get_flags(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]) |
401 | 0 | & _PAGE_PRESENT); |
402 | 0 | l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)]); |
403 | 0 |
|
404 | 0 | smap = (info->spfn & (~((1UL << (L2_PAGETABLE_SHIFT - 3)) -1))); |
405 | 0 | emap = ((info->epfn + ((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1 )) & |
406 | 0 | ~((1UL << (L2_PAGETABLE_SHIFT - 3)) -1)); |
407 | 0 |
|
408 | 0 | va = RO_MPT_VIRT_START + smap * sizeof(*machine_to_phys_mapping); |
409 | 0 |
|
410 | 0 | #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long)) |
411 | 0 | #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ |
412 | 0 | sizeof(*machine_to_phys_mapping)) |
413 | 0 |
|
414 | 0 | BUILD_BUG_ON((sizeof(*frame_table) & -sizeof(*frame_table)) % \ |
415 | 0 | sizeof(*machine_to_phys_mapping)); |
416 | 0 |
|
417 | 0 | i = smap; |
418 | 0 | while ( i < emap ) |
419 | 0 | { |
420 | 0 | switch ( m2p_mapped(i) ) |
421 | 0 | { |
422 | 0 | case M2P_1G_MAPPED: |
423 | 0 | i = ( i & ~((1UL << (L3_PAGETABLE_SHIFT - 3)) - 1)) + |
424 | 0 | (1UL << (L3_PAGETABLE_SHIFT - 3)); |
425 | 0 | continue; |
426 | 0 | case M2P_2M_MAPPED: |
427 | 0 | i = (i & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)) + |
428 | 0 | (1UL << (L2_PAGETABLE_SHIFT - 3)); |
429 | 0 | continue; |
430 | 0 | default: |
431 | 0 | break; |
432 | 0 | } |
433 | 0 |
|
434 | 0 | va = RO_MPT_VIRT_START + i * sizeof(*machine_to_phys_mapping); |
435 | 0 |
|
436 | 0 | for ( n = 0; n < CNT; ++n) |
437 | 0 | if ( mfn_valid(_mfn(i + n * PDX_GROUP_COUNT)) ) |
438 | 0 | break; |
439 | 0 | if ( n < CNT ) |
440 | 0 | { |
441 | 0 | unsigned long mfn = alloc_hotadd_mfn(info); |
442 | 0 |
|
443 | 0 | ret = map_pages_to_xen( |
444 | 0 | RDWR_MPT_VIRT_START + i * sizeof(unsigned long), |
445 | 0 | mfn, 1UL << PAGETABLE_ORDER, |
446 | 0 | PAGE_HYPERVISOR); |
447 | 0 | if ( ret ) |
448 | 0 | goto error; |
449 | 0 | /* Fill with INVALID_M2P_ENTRY. */ |
450 | 0 | memset((void *)(RDWR_MPT_VIRT_START + i * sizeof(unsigned long)), |
451 | 0 | 0xFF, 1UL << L2_PAGETABLE_SHIFT); |
452 | 0 |
|
453 | 0 | ASSERT(!(l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & |
454 | 0 | _PAGE_PSE)); |
455 | 0 | if ( l3e_get_flags(l3_ro_mpt[l3_table_offset(va)]) & |
456 | 0 | _PAGE_PRESENT ) |
457 | 0 | l2_ro_mpt = l3e_to_l2e(l3_ro_mpt[l3_table_offset(va)]) + |
458 | 0 | l2_table_offset(va); |
459 | 0 | else |
460 | 0 | { |
461 | 0 | l2_ro_mpt = alloc_xen_pagetable(); |
462 | 0 | if ( !l2_ro_mpt ) |
463 | 0 | { |
464 | 0 | ret = -ENOMEM; |
465 | 0 | goto error; |
466 | 0 | } |
467 | 0 |
|
468 | 0 | clear_page(l2_ro_mpt); |
469 | 0 | l3e_write(&l3_ro_mpt[l3_table_offset(va)], |
470 | 0 | l3e_from_paddr(__pa(l2_ro_mpt), |
471 | 0 | __PAGE_HYPERVISOR_RO | _PAGE_USER)); |
472 | 0 | l2_ro_mpt += l2_table_offset(va); |
473 | 0 | } |
474 | 0 |
|
475 | 0 | /* NB. Cannot be GLOBAL: guest user mode should not see it. */ |
476 | 0 | l2e_write(l2_ro_mpt, l2e_from_pfn(mfn, |
477 | 0 | /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); |
478 | 0 | } |
479 | 0 | if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) ) |
480 | 0 | l2_ro_mpt = NULL; |
481 | 0 | i += ( 1UL << (L2_PAGETABLE_SHIFT - 3)); |
482 | 0 | } |
483 | 0 | #undef CNT |
484 | 0 | #undef MFN |
485 | 0 |
|
486 | 0 | ret = setup_compat_m2p_table(info); |
487 | 0 | error: |
488 | 0 | return ret; |
489 | 0 | } |
490 | | |
491 | | void __init paging_init(void) |
492 | 1 | { |
493 | 1 | unsigned long i, mpt_size, va; |
494 | 1 | unsigned int n, memflags; |
495 | 1 | l3_pgentry_t *l3_ro_mpt; |
496 | 1 | l2_pgentry_t *l2_ro_mpt = NULL; |
497 | 1 | struct page_info *l1_pg; |
498 | 1 | |
499 | 1 | /* |
500 | 1 | * We setup the L3s for 1:1 mapping if host support memory hotplug |
501 | 1 | * to avoid sync the 1:1 mapping on page fault handler |
502 | 1 | */ |
503 | 1 | for ( va = DIRECTMAP_VIRT_START; |
504 | 1 | va < DIRECTMAP_VIRT_END && (void *)va < __va(mem_hotplug); |
505 | 0 | va += (1UL << L4_PAGETABLE_SHIFT) ) |
506 | 0 | { |
507 | 0 | if ( !(l4e_get_flags(idle_pg_table[l4_table_offset(va)]) & |
508 | 0 | _PAGE_PRESENT) ) |
509 | 0 | { |
510 | 0 | l3_pgentry_t *pl3t = alloc_xen_pagetable(); |
511 | 0 |
|
512 | 0 | if ( !pl3t ) |
513 | 0 | goto nomem; |
514 | 0 | clear_page(pl3t); |
515 | 0 | l4e_write(&idle_pg_table[l4_table_offset(va)], |
516 | 0 | l4e_from_paddr(__pa(pl3t), __PAGE_HYPERVISOR_RW)); |
517 | 0 | } |
518 | 0 | } |
519 | 1 | |
520 | 1 | /* Create user-accessible L2 directory to map the MPT for guests. */ |
521 | 1 | if ( (l3_ro_mpt = alloc_xen_pagetable()) == NULL ) |
522 | 0 | goto nomem; |
523 | 1 | clear_page(l3_ro_mpt); |
524 | 1 | l4e_write(&idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)], |
525 | 1 | l4e_from_paddr(__pa(l3_ro_mpt), __PAGE_HYPERVISOR_RO | _PAGE_USER)); |
526 | 1 | |
527 | 1 | /* |
528 | 1 | * Allocate and map the machine-to-phys table. |
529 | 1 | * This also ensures L3 is present for fixmaps. |
530 | 1 | */ |
531 | 1 | mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1; |
532 | 1 | mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); |
533 | 1 | #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long)) |
534 | 40 | #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ |
535 | 40 | sizeof(*machine_to_phys_mapping)) |
536 | 1 | BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \ |
537 | 1 | sizeof(*machine_to_phys_mapping)); |
538 | 19 | for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ ) |
539 | 18 | { |
540 | 18 | BUILD_BUG_ON(RO_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1)); |
541 | 18 | va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT); |
542 | 18 | memflags = MEMF_node(phys_to_nid(i << |
543 | 18 | (L2_PAGETABLE_SHIFT - 3 + PAGE_SHIFT))); |
544 | 18 | |
545 | 18 | if ( cpu_has_page1gb && |
546 | 18 | !((unsigned long)l2_ro_mpt & ~PAGE_MASK) && |
547 | 1 | (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) ) |
548 | 0 | { |
549 | 0 | unsigned int k, holes; |
550 | 0 |
|
551 | 0 | for ( holes = k = 0; k < 1 << PAGETABLE_ORDER; ++k) |
552 | 0 | { |
553 | 0 | for ( n = 0; n < CNT; ++n) |
554 | 0 | if ( mfn_valid(_mfn(MFN(i + k) + n * PDX_GROUP_COUNT)) ) |
555 | 0 | break; |
556 | 0 | if ( n == CNT ) |
557 | 0 | ++holes; |
558 | 0 | } |
559 | 0 | if ( k == holes ) |
560 | 0 | { |
561 | 0 | i += (1UL << PAGETABLE_ORDER) - 1; |
562 | 0 | continue; |
563 | 0 | } |
564 | 0 | if ( holes == 0 && |
565 | 0 | (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER, |
566 | 0 | memflags)) != NULL ) |
567 | 0 | { |
568 | 0 | map_pages_to_xen( |
569 | 0 | RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), |
570 | 0 | page_to_mfn(l1_pg), |
571 | 0 | 1UL << (2 * PAGETABLE_ORDER), |
572 | 0 | PAGE_HYPERVISOR); |
573 | 0 | memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), |
574 | 0 | 0x77, 1UL << L3_PAGETABLE_SHIFT); |
575 | 0 |
|
576 | 0 | ASSERT(!l2_table_offset(va)); |
577 | 0 | /* NB. Cannot be GLOBAL: guest user mode should not see it. */ |
578 | 0 | l3e_write(&l3_ro_mpt[l3_table_offset(va)], |
579 | 0 | l3e_from_page(l1_pg, |
580 | 0 | /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); |
581 | 0 | i += (1UL << PAGETABLE_ORDER) - 1; |
582 | 0 | continue; |
583 | 0 | } |
584 | 0 | } |
585 | 18 | |
586 | 22 | for ( n = 0; n < CNT; ++n) |
587 | 21 | if ( mfn_valid(_mfn(MFN(i) + n * PDX_GROUP_COUNT)) ) |
588 | 17 | break; |
589 | 18 | if ( n == CNT ) |
590 | 1 | l1_pg = NULL; |
591 | 17 | else if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, |
592 | 17 | memflags)) == NULL ) |
593 | 0 | goto nomem; |
594 | 17 | else |
595 | 17 | { |
596 | 17 | map_pages_to_xen( |
597 | 17 | RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), |
598 | 17 | page_to_mfn(l1_pg), |
599 | 17 | 1UL << PAGETABLE_ORDER, |
600 | 17 | PAGE_HYPERVISOR); |
601 | 17 | /* Fill with INVALID_M2P_ENTRY. */ |
602 | 17 | memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), |
603 | 17 | 0xFF, 1UL << L2_PAGETABLE_SHIFT); |
604 | 17 | } |
605 | 18 | if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) ) |
606 | 1 | { |
607 | 1 | if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL ) |
608 | 0 | goto nomem; |
609 | 1 | clear_page(l2_ro_mpt); |
610 | 1 | l3e_write(&l3_ro_mpt[l3_table_offset(va)], |
611 | 1 | l3e_from_paddr(__pa(l2_ro_mpt), |
612 | 1 | __PAGE_HYPERVISOR_RO | _PAGE_USER)); |
613 | 1 | ASSERT(!l2_table_offset(va)); |
614 | 1 | } |
615 | 18 | /* NB. Cannot be GLOBAL: guest user mode should not see it. */ |
616 | 18 | if ( l1_pg ) |
617 | 18 | l2e_write(l2_ro_mpt, l2e_from_page( |
618 | 18 | l1_pg, /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT)); |
619 | 18 | l2_ro_mpt++; |
620 | 18 | } |
621 | 1 | #undef CNT |
622 | 1 | #undef MFN |
623 | 1 | |
624 | 1 | /* Create user-accessible L2 directory to map the MPT for compat guests. */ |
625 | 1 | BUILD_BUG_ON(l4_table_offset(RDWR_MPT_VIRT_START) != |
626 | 1 | l4_table_offset(HIRO_COMPAT_MPT_VIRT_START)); |
627 | 1 | l3_ro_mpt = l4e_to_l3e(idle_pg_table[l4_table_offset( |
628 | 1 | HIRO_COMPAT_MPT_VIRT_START)]); |
629 | 1 | if ( (l2_ro_mpt = alloc_xen_pagetable()) == NULL ) |
630 | 0 | goto nomem; |
631 | 1 | compat_idle_pg_table_l2 = l2_ro_mpt; |
632 | 1 | clear_page(l2_ro_mpt); |
633 | 1 | l3e_write(&l3_ro_mpt[l3_table_offset(HIRO_COMPAT_MPT_VIRT_START)], |
634 | 1 | l3e_from_paddr(__pa(l2_ro_mpt), __PAGE_HYPERVISOR_RO)); |
635 | 1 | l2_ro_mpt += l2_table_offset(HIRO_COMPAT_MPT_VIRT_START); |
636 | 1 | /* Allocate and map the compatibility mode machine-to-phys table. */ |
637 | 1 | mpt_size = (mpt_size >> 1) + (1UL << (L2_PAGETABLE_SHIFT - 1)); |
638 | 1 | if ( mpt_size > RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START ) |
639 | 0 | mpt_size = RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START; |
640 | 1 | mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL); |
641 | 1 | if ( (m2p_compat_vstart + mpt_size) < MACH2PHYS_COMPAT_VIRT_END ) |
642 | 1 | m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size; |
643 | 1 | #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned int)) |
644 | 18 | #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \ |
645 | 18 | sizeof(*compat_machine_to_phys_mapping)) |
646 | 1 | BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \ |
647 | 1 | sizeof(*compat_machine_to_phys_mapping)); |
648 | 10 | for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++, l2_ro_mpt++ ) |
649 | 9 | { |
650 | 9 | memflags = MEMF_node(phys_to_nid(i << |
651 | 9 | (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT))); |
652 | 9 | for ( n = 0; n < CNT; ++n) |
653 | 9 | if ( mfn_valid(_mfn(MFN(i) + n * PDX_GROUP_COUNT)) ) |
654 | 9 | break; |
655 | 9 | if ( n == CNT ) |
656 | 0 | continue; |
657 | 9 | if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, |
658 | 9 | memflags)) == NULL ) |
659 | 0 | goto nomem; |
660 | 9 | map_pages_to_xen( |
661 | 9 | RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT), |
662 | 9 | page_to_mfn(l1_pg), |
663 | 9 | 1UL << PAGETABLE_ORDER, |
664 | 9 | PAGE_HYPERVISOR); |
665 | 9 | memset((void *)(RDWR_COMPAT_MPT_VIRT_START + |
666 | 9 | (i << L2_PAGETABLE_SHIFT)), |
667 | 9 | 0x55, |
668 | 9 | 1UL << L2_PAGETABLE_SHIFT); |
669 | 9 | /* NB. Cannot be GLOBAL as the ptes get copied into per-VM space. */ |
670 | 9 | l2e_write(l2_ro_mpt, l2e_from_page(l1_pg, _PAGE_PSE|_PAGE_PRESENT)); |
671 | 9 | } |
672 | 1 | #undef CNT |
673 | 1 | #undef MFN |
674 | 1 | |
675 | 1 | machine_to_phys_mapping_valid = 1; |
676 | 1 | |
677 | 1 | /* Set up linear page table mapping. */ |
678 | 1 | l4e_write(&idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)], |
679 | 1 | l4e_from_paddr(__pa(idle_pg_table), __PAGE_HYPERVISOR_RW)); |
680 | 1 | return; |
681 | 1 | |
682 | 0 | nomem: |
683 | 0 | panic("Not enough memory for m2p table"); |
684 | 0 | } |
685 | | |
686 | | void __init zap_low_mappings(void) |
687 | 1 | { |
688 | 1 | BUG_ON(num_online_cpus() != 1); |
689 | 1 | |
690 | 1 | /* Remove aliased mapping of first 1:1 PML4 entry. */ |
691 | 1 | l4e_write(&idle_pg_table[0], l4e_empty()); |
692 | 1 | flush_local(FLUSH_TLB_GLOBAL); |
693 | 1 | |
694 | 1 | /* Replace with mapping of the boot trampoline only. */ |
695 | 1 | map_pages_to_xen(trampoline_phys, trampoline_phys >> PAGE_SHIFT, |
696 | 1 | PFN_UP(trampoline_end - trampoline_start), |
697 | 1 | __PAGE_HYPERVISOR); |
698 | 1 | } |
699 | | |
700 | | int setup_compat_arg_xlat(struct vcpu *v) |
701 | 12 | { |
702 | 12 | return create_perdomain_mapping(v->domain, ARG_XLAT_START(v), |
703 | 12 | PFN_UP(COMPAT_ARG_XLAT_SIZE), |
704 | 12 | NULL, NIL(struct page_info *)); |
705 | 12 | } |
706 | | |
707 | | void free_compat_arg_xlat(struct vcpu *v) |
708 | 0 | { |
709 | 0 | destroy_perdomain_mapping(v->domain, ARG_XLAT_START(v), |
710 | 0 | PFN_UP(COMPAT_ARG_XLAT_SIZE)); |
711 | 0 | } |
712 | | |
713 | | static void cleanup_frame_table(struct mem_hotadd_info *info) |
714 | 0 | { |
715 | 0 | unsigned long sva, eva; |
716 | 0 | l3_pgentry_t l3e; |
717 | 0 | l2_pgentry_t l2e; |
718 | 0 | unsigned long spfn, epfn; |
719 | 0 |
|
720 | 0 | spfn = info->spfn; |
721 | 0 | epfn = info->epfn; |
722 | 0 |
|
723 | 0 | sva = (unsigned long)pdx_to_page(pfn_to_pdx(spfn)); |
724 | 0 | eva = (unsigned long)pdx_to_page(pfn_to_pdx(epfn)); |
725 | 0 |
|
726 | 0 | /* Intialize all page */ |
727 | 0 | memset(mfn_to_page(spfn), -1, |
728 | 0 | (unsigned long)mfn_to_page(epfn) - (unsigned long)mfn_to_page(spfn)); |
729 | 0 |
|
730 | 0 | while (sva < eva) |
731 | 0 | { |
732 | 0 | l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(sva)])[ |
733 | 0 | l3_table_offset(sva)]; |
734 | 0 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || |
735 | 0 | (l3e_get_flags(l3e) & _PAGE_PSE) ) |
736 | 0 | { |
737 | 0 | sva = (sva & ~((1UL << L3_PAGETABLE_SHIFT) - 1)) + |
738 | 0 | (1UL << L3_PAGETABLE_SHIFT); |
739 | 0 | continue; |
740 | 0 | } |
741 | 0 |
|
742 | 0 | l2e = l3e_to_l2e(l3e)[l2_table_offset(sva)]; |
743 | 0 | ASSERT(l2e_get_flags(l2e) & _PAGE_PRESENT); |
744 | 0 |
|
745 | 0 | if ( (l2e_get_flags(l2e) & (_PAGE_PRESENT | _PAGE_PSE)) == |
746 | 0 | (_PAGE_PSE | _PAGE_PRESENT) ) |
747 | 0 | { |
748 | 0 | if (hotadd_mem_valid(l2e_get_pfn(l2e), info)) |
749 | 0 | destroy_xen_mappings(sva & ~((1UL << L2_PAGETABLE_SHIFT) - 1), |
750 | 0 | ((sva & ~((1UL << L2_PAGETABLE_SHIFT) -1 )) + |
751 | 0 | (1UL << L2_PAGETABLE_SHIFT) - 1)); |
752 | 0 |
|
753 | 0 | sva = (sva & ~((1UL << L2_PAGETABLE_SHIFT) -1 )) + |
754 | 0 | (1UL << L2_PAGETABLE_SHIFT); |
755 | 0 | continue; |
756 | 0 | } |
757 | 0 |
|
758 | 0 | ASSERT(l1e_get_flags(l2e_to_l1e(l2e)[l1_table_offset(sva)]) & |
759 | 0 | _PAGE_PRESENT); |
760 | 0 | sva = (sva & ~((1UL << PAGE_SHIFT) - 1)) + |
761 | 0 | (1UL << PAGE_SHIFT); |
762 | 0 | } |
763 | 0 |
|
764 | 0 | /* Brute-Force flush all TLB */ |
765 | 0 | flush_tlb_all(); |
766 | 0 | } |
767 | | |
768 | | static int setup_frametable_chunk(void *start, void *end, |
769 | | struct mem_hotadd_info *info) |
770 | 0 | { |
771 | 0 | unsigned long s = (unsigned long)start; |
772 | 0 | unsigned long e = (unsigned long)end; |
773 | 0 | unsigned long mfn; |
774 | 0 | int err; |
775 | 0 |
|
776 | 0 | ASSERT(!(s & ((1 << L2_PAGETABLE_SHIFT) - 1))); |
777 | 0 | ASSERT(!(e & ((1 << L2_PAGETABLE_SHIFT) - 1))); |
778 | 0 |
|
779 | 0 | for ( ; s < e; s += (1UL << L2_PAGETABLE_SHIFT)) |
780 | 0 | { |
781 | 0 | mfn = alloc_hotadd_mfn(info); |
782 | 0 | err = map_pages_to_xen(s, mfn, 1UL << PAGETABLE_ORDER, |
783 | 0 | PAGE_HYPERVISOR); |
784 | 0 | if ( err ) |
785 | 0 | return err; |
786 | 0 | } |
787 | 0 | memset(start, -1, s - (unsigned long)start); |
788 | 0 |
|
789 | 0 | return 0; |
790 | 0 | } |
791 | | |
792 | | static int extend_frame_table(struct mem_hotadd_info *info) |
793 | 0 | { |
794 | 0 | unsigned long cidx, nidx, eidx, spfn, epfn; |
795 | 0 |
|
796 | 0 | spfn = info->spfn; |
797 | 0 | epfn = info->epfn; |
798 | 0 |
|
799 | 0 | eidx = (pfn_to_pdx(epfn) + PDX_GROUP_COUNT - 1) / PDX_GROUP_COUNT; |
800 | 0 | nidx = cidx = pfn_to_pdx(spfn)/PDX_GROUP_COUNT; |
801 | 0 |
|
802 | 0 | ASSERT( pfn_to_pdx(epfn) <= (DIRECTMAP_SIZE >> PAGE_SHIFT) && |
803 | 0 | pfn_to_pdx(epfn) <= FRAMETABLE_NR ); |
804 | 0 |
|
805 | 0 | if ( test_bit(cidx, pdx_group_valid) ) |
806 | 0 | cidx = find_next_zero_bit(pdx_group_valid, eidx, cidx); |
807 | 0 |
|
808 | 0 | if ( cidx >= eidx ) |
809 | 0 | return 0; |
810 | 0 |
|
811 | 0 | while ( cidx < eidx ) |
812 | 0 | { |
813 | 0 | int err; |
814 | 0 |
|
815 | 0 | nidx = find_next_bit(pdx_group_valid, eidx, cidx); |
816 | 0 | if ( nidx >= eidx ) |
817 | 0 | nidx = eidx; |
818 | 0 | err = setup_frametable_chunk(pdx_to_page(cidx * PDX_GROUP_COUNT ), |
819 | 0 | pdx_to_page(nidx * PDX_GROUP_COUNT), |
820 | 0 | info); |
821 | 0 | if ( err ) |
822 | 0 | return err; |
823 | 0 |
|
824 | 0 | cidx = find_next_zero_bit(pdx_group_valid, eidx, nidx); |
825 | 0 | } |
826 | 0 |
|
827 | 0 | memset(mfn_to_page(spfn), 0, |
828 | 0 | (unsigned long)mfn_to_page(epfn) - (unsigned long)mfn_to_page(spfn)); |
829 | 0 | return 0; |
830 | 0 | } |
831 | | |
832 | | void __init subarch_init_memory(void) |
833 | 1 | { |
834 | 1 | unsigned long i, n, v, m2p_start_mfn; |
835 | 1 | l3_pgentry_t l3e; |
836 | 1 | l2_pgentry_t l2e; |
837 | 1 | |
838 | 1 | BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1)); |
839 | 1 | BUILD_BUG_ON(RDWR_MPT_VIRT_END & ((1UL << L3_PAGETABLE_SHIFT) - 1)); |
840 | 1 | /* M2P table is mappable read-only by privileged domains. */ |
841 | 1 | for ( v = RDWR_MPT_VIRT_START; |
842 | 768 | v != RDWR_MPT_VIRT_END; |
843 | 767 | v += n << PAGE_SHIFT ) |
844 | 767 | { |
845 | 767 | n = L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES; |
846 | 767 | l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[ |
847 | 767 | l3_table_offset(v)]; |
848 | 767 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) |
849 | 255 | continue; |
850 | 512 | if ( !(l3e_get_flags(l3e) & _PAGE_PSE) ) |
851 | 512 | { |
852 | 512 | n = L1_PAGETABLE_ENTRIES; |
853 | 512 | l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; |
854 | 512 | if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) |
855 | 495 | continue; |
856 | 17 | m2p_start_mfn = l2e_get_pfn(l2e); |
857 | 17 | } |
858 | 512 | else |
859 | 0 | { |
860 | 0 | m2p_start_mfn = l3e_get_pfn(l3e); |
861 | 0 | } |
862 | 512 | |
863 | 8.72k | for ( i = 0; i < n; i++ ) |
864 | 8.70k | { |
865 | 8.70k | struct page_info *page = mfn_to_page(m2p_start_mfn + i); |
866 | 8.70k | share_xen_page_with_privileged_guests(page, XENSHARE_readonly); |
867 | 8.70k | } |
868 | 17 | } |
869 | 1 | |
870 | 1 | for ( v = RDWR_COMPAT_MPT_VIRT_START; |
871 | 513 | v != RDWR_COMPAT_MPT_VIRT_END; |
872 | 512 | v += 1 << L2_PAGETABLE_SHIFT ) |
873 | 512 | { |
874 | 512 | l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[ |
875 | 512 | l3_table_offset(v)]; |
876 | 512 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) |
877 | 0 | continue; |
878 | 512 | l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; |
879 | 512 | if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) ) |
880 | 503 | continue; |
881 | 9 | m2p_start_mfn = l2e_get_pfn(l2e); |
882 | 9 | |
883 | 4.61k | for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ ) |
884 | 4.60k | { |
885 | 4.60k | struct page_info *page = mfn_to_page(m2p_start_mfn + i); |
886 | 4.60k | share_xen_page_with_privileged_guests(page, XENSHARE_readonly); |
887 | 4.60k | } |
888 | 9 | } |
889 | 1 | |
890 | 1 | /* Mark all of direct map NX if hardware supports it. */ |
891 | 1 | if ( !cpu_has_nx ) |
892 | 0 | return; |
893 | 1 | |
894 | 1 | for ( i = l4_table_offset(DIRECTMAP_VIRT_START); |
895 | 250 | i < l4_table_offset(DIRECTMAP_VIRT_END); ++i ) |
896 | 249 | { |
897 | 249 | l4_pgentry_t l4e = idle_pg_table[i]; |
898 | 249 | |
899 | 249 | if ( l4e_get_flags(l4e) & _PAGE_PRESENT ) |
900 | 1 | { |
901 | 1 | l4e_add_flags(l4e, _PAGE_NX_BIT); |
902 | 1 | idle_pg_table[i] = l4e; |
903 | 1 | } |
904 | 249 | } |
905 | 1 | } |
906 | | |
907 | | long subarch_memory_op(unsigned long cmd, XEN_GUEST_HANDLE_PARAM(void) arg) |
908 | 0 | { |
909 | 0 | struct xen_machphys_mfn_list xmml; |
910 | 0 | l3_pgentry_t l3e; |
911 | 0 | l2_pgentry_t l2e; |
912 | 0 | unsigned long v, limit; |
913 | 0 | xen_pfn_t mfn, last_mfn; |
914 | 0 | unsigned int i; |
915 | 0 | long rc = 0; |
916 | 0 |
|
917 | 0 | switch ( cmd ) |
918 | 0 | { |
919 | 0 | case XENMEM_machphys_mfn_list: |
920 | 0 | if ( copy_from_guest(&xmml, arg, 1) ) |
921 | 0 | return -EFAULT; |
922 | 0 |
|
923 | 0 | BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1)); |
924 | 0 | BUILD_BUG_ON(RDWR_MPT_VIRT_END & ((1UL << L3_PAGETABLE_SHIFT) - 1)); |
925 | 0 | for ( i = 0, v = RDWR_MPT_VIRT_START, last_mfn = 0; |
926 | 0 | (i != xmml.max_extents) && |
927 | 0 | (v < (unsigned long)(machine_to_phys_mapping + max_page)); |
928 | 0 | i++, v += 1UL << L2_PAGETABLE_SHIFT ) |
929 | 0 | { |
930 | 0 | l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[ |
931 | 0 | l3_table_offset(v)]; |
932 | 0 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) |
933 | 0 | mfn = last_mfn; |
934 | 0 | else if ( !(l3e_get_flags(l3e) & _PAGE_PSE) ) |
935 | 0 | { |
936 | 0 | l2e = l3e_to_l2e(l3e)[l2_table_offset(v)]; |
937 | 0 | if ( l2e_get_flags(l2e) & _PAGE_PRESENT ) |
938 | 0 | mfn = l2e_get_pfn(l2e); |
939 | 0 | else |
940 | 0 | mfn = last_mfn; |
941 | 0 | } |
942 | 0 | else |
943 | 0 | { |
944 | 0 | mfn = l3e_get_pfn(l3e) |
945 | 0 | + (l2_table_offset(v) << PAGETABLE_ORDER); |
946 | 0 | } |
947 | 0 | ASSERT(mfn); |
948 | 0 | if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) ) |
949 | 0 | return -EFAULT; |
950 | 0 | last_mfn = mfn; |
951 | 0 | } |
952 | 0 |
|
953 | 0 | xmml.nr_extents = i; |
954 | 0 | if ( __copy_to_guest(arg, &xmml, 1) ) |
955 | 0 | return -EFAULT; |
956 | 0 |
|
957 | 0 | break; |
958 | 0 |
|
959 | 0 | case XENMEM_machphys_compat_mfn_list: |
960 | 0 | if ( copy_from_guest(&xmml, arg, 1) ) |
961 | 0 | return -EFAULT; |
962 | 0 |
|
963 | 0 | limit = (unsigned long)(compat_machine_to_phys_mapping + max_page); |
964 | 0 | if ( limit > RDWR_COMPAT_MPT_VIRT_END ) |
965 | 0 | limit = RDWR_COMPAT_MPT_VIRT_END; |
966 | 0 | for ( i = 0, v = RDWR_COMPAT_MPT_VIRT_START, last_mfn = 0; |
967 | 0 | (i != xmml.max_extents) && (v < limit); |
968 | 0 | i++, v += 1 << L2_PAGETABLE_SHIFT ) |
969 | 0 | { |
970 | 0 | l2e = compat_idle_pg_table_l2[l2_table_offset(v)]; |
971 | 0 | if ( l2e_get_flags(l2e) & _PAGE_PRESENT ) |
972 | 0 | mfn = l2e_get_pfn(l2e); |
973 | 0 | else |
974 | 0 | mfn = last_mfn; |
975 | 0 | ASSERT(mfn); |
976 | 0 | if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) ) |
977 | 0 | return -EFAULT; |
978 | 0 | last_mfn = mfn; |
979 | 0 | } |
980 | 0 |
|
981 | 0 | xmml.nr_extents = i; |
982 | 0 | if ( __copy_to_guest(arg, &xmml, 1) ) |
983 | 0 | rc = -EFAULT; |
984 | 0 |
|
985 | 0 | break; |
986 | 0 |
|
987 | 0 | case XENMEM_get_sharing_freed_pages: |
988 | 0 | return mem_sharing_get_nr_saved_mfns(); |
989 | 0 |
|
990 | 0 | case XENMEM_get_sharing_shared_pages: |
991 | 0 | return mem_sharing_get_nr_shared_mfns(); |
992 | 0 |
|
993 | 0 | case XENMEM_paging_op: |
994 | 0 | return mem_paging_memop(guest_handle_cast(arg, xen_mem_paging_op_t)); |
995 | 0 |
|
996 | 0 | case XENMEM_sharing_op: |
997 | 0 | return mem_sharing_memop(guest_handle_cast(arg, xen_mem_sharing_op_t)); |
998 | 0 |
|
999 | 0 | default: |
1000 | 0 | rc = -ENOSYS; |
1001 | 0 | break; |
1002 | 0 | } |
1003 | 0 |
|
1004 | 0 | return rc; |
1005 | 0 | } |
1006 | | |
1007 | | long do_stack_switch(unsigned long ss, unsigned long esp) |
1008 | 0 | { |
1009 | 0 | fixup_guest_stack_selector(current->domain, ss); |
1010 | 0 | current->arch.pv_vcpu.kernel_ss = ss; |
1011 | 0 | current->arch.pv_vcpu.kernel_sp = esp; |
1012 | 0 | return 0; |
1013 | 0 | } |
1014 | | |
1015 | | long do_set_segment_base(unsigned int which, unsigned long base) |
1016 | 0 | { |
1017 | 0 | struct vcpu *v = current; |
1018 | 0 | long ret = 0; |
1019 | 0 |
|
1020 | 0 | if ( is_pv_32bit_vcpu(v) ) |
1021 | 0 | return -ENOSYS; /* x86/64 only. */ |
1022 | 0 |
|
1023 | 0 | switch ( which ) |
1024 | 0 | { |
1025 | 0 | case SEGBASE_FS: |
1026 | 0 | if ( is_canonical_address(base) ) |
1027 | 0 | { |
1028 | 0 | wrfsbase(base); |
1029 | 0 | v->arch.pv_vcpu.fs_base = base; |
1030 | 0 | } |
1031 | 0 | else |
1032 | 0 | ret = -EINVAL; |
1033 | 0 | break; |
1034 | 0 |
|
1035 | 0 | case SEGBASE_GS_USER: |
1036 | 0 | if ( is_canonical_address(base) ) |
1037 | 0 | { |
1038 | 0 | wrmsrl(MSR_SHADOW_GS_BASE, base); |
1039 | 0 | v->arch.pv_vcpu.gs_base_user = base; |
1040 | 0 | } |
1041 | 0 | else |
1042 | 0 | ret = -EINVAL; |
1043 | 0 | break; |
1044 | 0 |
|
1045 | 0 | case SEGBASE_GS_KERNEL: |
1046 | 0 | if ( is_canonical_address(base) ) |
1047 | 0 | { |
1048 | 0 | wrgsbase(base); |
1049 | 0 | v->arch.pv_vcpu.gs_base_kernel = base; |
1050 | 0 | } |
1051 | 0 | else |
1052 | 0 | ret = -EINVAL; |
1053 | 0 | break; |
1054 | 0 |
|
1055 | 0 | case SEGBASE_GS_USER_SEL: |
1056 | 0 | __asm__ __volatile__ ( |
1057 | 0 | " swapgs \n" |
1058 | 0 | "1: movl %k0,%%gs \n" |
1059 | 0 | " "safe_swapgs" \n" |
1060 | 0 | ".section .fixup,\"ax\" \n" |
1061 | 0 | "2: xorl %k0,%k0 \n" |
1062 | 0 | " jmp 1b \n" |
1063 | 0 | ".previous \n" |
1064 | 0 | _ASM_EXTABLE(1b, 2b) |
1065 | 0 | : : "r" (base&0xffff) ); |
1066 | 0 | break; |
1067 | 0 |
|
1068 | 0 | default: |
1069 | 0 | ret = -EINVAL; |
1070 | 0 | break; |
1071 | 0 | } |
1072 | 0 |
|
1073 | 0 | return ret; |
1074 | 0 | } |
1075 | | |
1076 | | |
1077 | | /* Returns TRUE if given descriptor is valid for GDT or LDT. */ |
1078 | | int check_descriptor(const struct domain *dom, struct desc_struct *d) |
1079 | 0 | { |
1080 | 0 | u32 a = d->a, b = d->b; |
1081 | 0 | u16 cs; |
1082 | 0 | unsigned int dpl; |
1083 | 0 |
|
1084 | 0 | /* A not-present descriptor will always fault, so is safe. */ |
1085 | 0 | if ( !(b & _SEGMENT_P) ) |
1086 | 0 | return 1; |
1087 | 0 |
|
1088 | 0 | /* Check and fix up the DPL. */ |
1089 | 0 | dpl = (b >> 13) & 3; |
1090 | 0 | __fixup_guest_selector(dom, dpl); |
1091 | 0 | b = (b & ~_SEGMENT_DPL) | (dpl << 13); |
1092 | 0 |
|
1093 | 0 | /* All code and data segments are okay. No base/limit checking. */ |
1094 | 0 | if ( (b & _SEGMENT_S) ) |
1095 | 0 | { |
1096 | 0 | if ( is_pv_32bit_domain(dom) ) |
1097 | 0 | { |
1098 | 0 | unsigned long base, limit; |
1099 | 0 |
|
1100 | 0 | if ( b & _SEGMENT_L ) |
1101 | 0 | goto bad; |
1102 | 0 |
|
1103 | 0 | /* |
1104 | 0 | * Older PAE Linux guests use segments which are limited to |
1105 | 0 | * 0xf6800000. Extend these to allow access to the larger read-only |
1106 | 0 | * M2P table available in 32on64 mode. |
1107 | 0 | */ |
1108 | 0 | base = (b & (0xff << 24)) | ((b & 0xff) << 16) | (a >> 16); |
1109 | 0 |
|
1110 | 0 | limit = (b & 0xf0000) | (a & 0xffff); |
1111 | 0 | limit++; /* We add one because limit is inclusive. */ |
1112 | 0 |
|
1113 | 0 | if ( (b & _SEGMENT_G) ) |
1114 | 0 | limit <<= 12; |
1115 | 0 |
|
1116 | 0 | if ( (base == 0) && (limit > HYPERVISOR_COMPAT_VIRT_START(dom)) ) |
1117 | 0 | { |
1118 | 0 | a |= 0x0000ffff; |
1119 | 0 | b |= 0x000f0000; |
1120 | 0 | } |
1121 | 0 | } |
1122 | 0 |
|
1123 | 0 | goto good; |
1124 | 0 | } |
1125 | 0 |
|
1126 | 0 | /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */ |
1127 | 0 | if ( (b & _SEGMENT_TYPE) == 0x000 ) |
1128 | 0 | return 1; |
1129 | 0 |
|
1130 | 0 | /* Everything but a call gate is discarded here. */ |
1131 | 0 | if ( (b & _SEGMENT_TYPE) != 0xc00 ) |
1132 | 0 | goto bad; |
1133 | 0 |
|
1134 | 0 | /* Validate the target code selector. */ |
1135 | 0 | cs = a >> 16; |
1136 | 0 | if ( !guest_gate_selector_okay(dom, cs) ) |
1137 | 0 | goto bad; |
1138 | 0 | /* |
1139 | 0 | * Force DPL to zero, causing a GP fault with its error code indicating |
1140 | 0 | * the gate in use, allowing emulation. This is necessary because with |
1141 | 0 | * native guests (kernel in ring 3) call gates cannot be used directly |
1142 | 0 | * to transition from user to kernel mode (and whether a gate is used |
1143 | 0 | * to enter the kernel can only be determined when the gate is being |
1144 | 0 | * used), and with compat guests call gates cannot be used at all as |
1145 | 0 | * there are only 64-bit ones. |
1146 | 0 | * Store the original DPL in the selector's RPL field. |
1147 | 0 | */ |
1148 | 0 | b &= ~_SEGMENT_DPL; |
1149 | 0 | cs = (cs & ~3) | dpl; |
1150 | 0 | a = (a & 0xffffU) | (cs << 16); |
1151 | 0 |
|
1152 | 0 | /* Reserved bits must be zero. */ |
1153 | 0 | if ( b & (is_pv_32bit_domain(dom) ? 0xe0 : 0xff) ) |
1154 | 0 | goto bad; |
1155 | 0 | |
1156 | 0 | good: |
1157 | 0 | d->a = a; |
1158 | 0 | d->b = b; |
1159 | 0 | return 1; |
1160 | 0 | bad: |
1161 | 0 | return 0; |
1162 | 0 | } |
1163 | | |
1164 | | int pagefault_by_memadd(unsigned long addr, struct cpu_user_regs *regs) |
1165 | 0 | { |
1166 | 0 | struct domain *d = current->domain; |
1167 | 0 |
|
1168 | 0 | return mem_hotplug && guest_mode(regs) && is_pv_32bit_domain(d) && |
1169 | 0 | (addr >= HYPERVISOR_COMPAT_VIRT_START(d)) && |
1170 | 0 | (addr < MACH2PHYS_COMPAT_VIRT_END); |
1171 | 0 | } |
1172 | | |
1173 | | int handle_memadd_fault(unsigned long addr, struct cpu_user_regs *regs) |
1174 | 0 | { |
1175 | 0 | struct domain *d = current->domain; |
1176 | 0 | l4_pgentry_t *pl4e = NULL; |
1177 | 0 | l4_pgentry_t l4e; |
1178 | 0 | l3_pgentry_t *pl3e = NULL; |
1179 | 0 | l3_pgentry_t l3e; |
1180 | 0 | l2_pgentry_t *pl2e = NULL; |
1181 | 0 | l2_pgentry_t l2e, idle_l2e; |
1182 | 0 | unsigned long mfn, idle_index; |
1183 | 0 | int ret = 0; |
1184 | 0 |
|
1185 | 0 | if (!is_pv_32bit_domain(d)) |
1186 | 0 | return 0; |
1187 | 0 |
|
1188 | 0 | if ( (addr < HYPERVISOR_COMPAT_VIRT_START(d)) || |
1189 | 0 | (addr >= MACH2PHYS_COMPAT_VIRT_END) ) |
1190 | 0 | return 0; |
1191 | 0 |
|
1192 | 0 | mfn = (read_cr3()) >> PAGE_SHIFT; |
1193 | 0 |
|
1194 | 0 | pl4e = map_domain_page(_mfn(mfn)); |
1195 | 0 |
|
1196 | 0 | l4e = pl4e[0]; |
1197 | 0 |
|
1198 | 0 | if (!(l4e_get_flags(l4e) & _PAGE_PRESENT)) |
1199 | 0 | goto unmap; |
1200 | 0 |
|
1201 | 0 | mfn = l4e_get_pfn(l4e); |
1202 | 0 | /* We don't need get page type here since it is current CR3 */ |
1203 | 0 | pl3e = map_domain_page(_mfn(mfn)); |
1204 | 0 |
|
1205 | 0 | l3e = pl3e[3]; |
1206 | 0 |
|
1207 | 0 | if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) ) |
1208 | 0 | goto unmap; |
1209 | 0 |
|
1210 | 0 | mfn = l3e_get_pfn(l3e); |
1211 | 0 | pl2e = map_domain_page(_mfn(mfn)); |
1212 | 0 |
|
1213 | 0 | l2e = pl2e[l2_table_offset(addr)]; |
1214 | 0 |
|
1215 | 0 | if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT)) |
1216 | 0 | goto unmap; |
1217 | 0 |
|
1218 | 0 | idle_index = (l2_table_offset(addr) - |
1219 | 0 | COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(d))/ |
1220 | 0 | sizeof(l2_pgentry_t); |
1221 | 0 | idle_l2e = compat_idle_pg_table_l2[idle_index]; |
1222 | 0 | if (!(l2e_get_flags(idle_l2e) & _PAGE_PRESENT)) |
1223 | 0 | goto unmap; |
1224 | 0 |
|
1225 | 0 | memcpy(&pl2e[l2_table_offset(addr)], |
1226 | 0 | &compat_idle_pg_table_l2[idle_index], |
1227 | 0 | sizeof(l2_pgentry_t)); |
1228 | 0 |
|
1229 | 0 | ret = EXCRET_fault_fixed; |
1230 | 0 |
|
1231 | 0 | unmap: |
1232 | 0 | if ( pl4e ) |
1233 | 0 | unmap_domain_page(pl4e); |
1234 | 0 | if ( pl3e ) |
1235 | 0 | unmap_domain_page(pl3e); |
1236 | 0 | if ( pl2e ) |
1237 | 0 | unmap_domain_page(pl2e); |
1238 | 0 |
|
1239 | 0 | return ret; |
1240 | 0 | } |
1241 | | |
1242 | | void domain_set_alloc_bitsize(struct domain *d) |
1243 | 0 | { |
1244 | 0 | if ( !is_pv_32bit_domain(d) || |
1245 | 0 | (MACH2PHYS_COMPAT_NR_ENTRIES(d) >= max_page) || |
1246 | 0 | d->arch.physaddr_bitsize > 0 ) |
1247 | 0 | return; |
1248 | 0 | d->arch.physaddr_bitsize = |
1249 | 0 | /* 2^n entries can be contained in guest's p2m mapping space */ |
1250 | 0 | fls(MACH2PHYS_COMPAT_NR_ENTRIES(d)) - 1 |
1251 | 0 | /* 2^n pages -> 2^(n+PAGE_SHIFT) bits */ |
1252 | 0 | + PAGE_SHIFT; |
1253 | 0 | } |
1254 | | |
1255 | | unsigned int domain_clamp_alloc_bitsize(struct domain *d, unsigned int bits) |
1256 | 43.2k | { |
1257 | 43.2k | if ( (d == NULL) || (d->arch.physaddr_bitsize == 0) ) |
1258 | 43.2k | return bits; |
1259 | 0 | return min(d->arch.physaddr_bitsize, bits); |
1260 | 43.2k | } |
1261 | | |
1262 | | static int transfer_pages_to_heap(struct mem_hotadd_info *info) |
1263 | 0 | { |
1264 | 0 | unsigned long i; |
1265 | 0 | struct page_info *pg; |
1266 | 0 |
|
1267 | 0 | /* |
1268 | 0 | * Mark the allocated page before put free pages to buddy allocator |
1269 | 0 | * to avoid merge in free_heap_pages |
1270 | 0 | */ |
1271 | 0 | for (i = info->spfn; i < info->cur; i++) |
1272 | 0 | { |
1273 | 0 | pg = mfn_to_page(i); |
1274 | 0 | pg->count_info = PGC_state_inuse; |
1275 | 0 | } |
1276 | 0 |
|
1277 | 0 | init_domheap_pages(pfn_to_paddr(info->cur), pfn_to_paddr(info->epfn)); |
1278 | 0 |
|
1279 | 0 | return 0; |
1280 | 0 | } |
1281 | | |
1282 | | static int mem_hotadd_check(unsigned long spfn, unsigned long epfn) |
1283 | 0 | { |
1284 | 0 | unsigned long s, e, length, sidx, eidx; |
1285 | 0 |
|
1286 | 0 | if ( (spfn >= epfn) ) |
1287 | 0 | return 0; |
1288 | 0 |
|
1289 | 0 | if (pfn_to_pdx(epfn) > FRAMETABLE_NR) |
1290 | 0 | return 0; |
1291 | 0 |
|
1292 | 0 | if ( (spfn | epfn) & ((1UL << PAGETABLE_ORDER) - 1) ) |
1293 | 0 | return 0; |
1294 | 0 |
|
1295 | 0 | if ( (spfn | epfn) & pfn_hole_mask ) |
1296 | 0 | return 0; |
1297 | 0 |
|
1298 | 0 | /* Make sure the new range is not present now */ |
1299 | 0 | sidx = ((pfn_to_pdx(spfn) + PDX_GROUP_COUNT - 1) & ~(PDX_GROUP_COUNT - 1)) |
1300 | 0 | / PDX_GROUP_COUNT; |
1301 | 0 | eidx = (pfn_to_pdx(epfn - 1) & ~(PDX_GROUP_COUNT - 1)) / PDX_GROUP_COUNT; |
1302 | 0 | if (sidx >= eidx) |
1303 | 0 | return 0; |
1304 | 0 |
|
1305 | 0 | s = find_next_zero_bit(pdx_group_valid, eidx, sidx); |
1306 | 0 | if ( s > eidx ) |
1307 | 0 | return 0; |
1308 | 0 | e = find_next_bit(pdx_group_valid, eidx, s); |
1309 | 0 | if ( e < eidx ) |
1310 | 0 | return 0; |
1311 | 0 |
|
1312 | 0 | /* Caculate at most required m2p/compat m2p/frametable pages */ |
1313 | 0 | s = (spfn & ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1)); |
1314 | 0 | e = (epfn + (1UL << (L2_PAGETABLE_SHIFT - 3)) - 1) & |
1315 | 0 | ~((1UL << (L2_PAGETABLE_SHIFT - 3)) - 1); |
1316 | 0 |
|
1317 | 0 | length = (e - s) * sizeof(unsigned long); |
1318 | 0 |
|
1319 | 0 | s = (spfn & ~((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1)); |
1320 | 0 | e = (epfn + (1UL << (L2_PAGETABLE_SHIFT - 2)) - 1) & |
1321 | 0 | ~((1UL << (L2_PAGETABLE_SHIFT - 2)) - 1); |
1322 | 0 |
|
1323 | 0 | e = min_t(unsigned long, e, |
1324 | 0 | (RDWR_COMPAT_MPT_VIRT_END - RDWR_COMPAT_MPT_VIRT_START) >> 2); |
1325 | 0 |
|
1326 | 0 | if ( e > s ) |
1327 | 0 | length += (e -s) * sizeof(unsigned int); |
1328 | 0 |
|
1329 | 0 | s = pfn_to_pdx(spfn) & ~(PDX_GROUP_COUNT - 1); |
1330 | 0 | e = ( pfn_to_pdx(epfn) + (PDX_GROUP_COUNT - 1) ) & ~(PDX_GROUP_COUNT - 1); |
1331 | 0 |
|
1332 | 0 | length += (e - s) * sizeof(struct page_info); |
1333 | 0 |
|
1334 | 0 | if ((length >> PAGE_SHIFT) > (epfn - spfn)) |
1335 | 0 | return 0; |
1336 | 0 |
|
1337 | 0 | return 1; |
1338 | 0 | } |
1339 | | |
1340 | | /* |
1341 | | * A bit paranoid for memory allocation failure issue since |
1342 | | * it may be reason for memory add |
1343 | | */ |
1344 | | int memory_add(unsigned long spfn, unsigned long epfn, unsigned int pxm) |
1345 | 0 | { |
1346 | 0 | struct mem_hotadd_info info; |
1347 | 0 | int ret; |
1348 | 0 | nodeid_t node; |
1349 | 0 | unsigned long old_max = max_page, old_total = total_pages; |
1350 | 0 | unsigned long old_node_start, old_node_span, orig_online; |
1351 | 0 | unsigned long i; |
1352 | 0 |
|
1353 | 0 | dprintk(XENLOG_INFO, "memory_add %lx ~ %lx with pxm %x\n", spfn, epfn, pxm); |
1354 | 0 |
|
1355 | 0 | if ( !mem_hotadd_check(spfn, epfn) ) |
1356 | 0 | return -EINVAL; |
1357 | 0 |
|
1358 | 0 | if ( (node = setup_node(pxm)) == NUMA_NO_NODE ) |
1359 | 0 | return -EINVAL; |
1360 | 0 |
|
1361 | 0 | if ( !valid_numa_range(spfn << PAGE_SHIFT, epfn << PAGE_SHIFT, node) ) |
1362 | 0 | { |
1363 | 0 | printk(XENLOG_WARNING |
1364 | 0 | "pfn range %lx..%lx PXM %x node %x is not NUMA-valid\n", |
1365 | 0 | spfn, epfn, pxm, node); |
1366 | 0 | return -EINVAL; |
1367 | 0 | } |
1368 | 0 |
|
1369 | 0 | i = virt_to_mfn(HYPERVISOR_VIRT_END - 1) + 1; |
1370 | 0 | if ( spfn < i ) |
1371 | 0 | { |
1372 | 0 | ret = map_pages_to_xen((unsigned long)mfn_to_virt(spfn), spfn, |
1373 | 0 | min(epfn, i) - spfn, PAGE_HYPERVISOR); |
1374 | 0 | if ( ret ) |
1375 | 0 | goto destroy_directmap; |
1376 | 0 | } |
1377 | 0 | if ( i < epfn ) |
1378 | 0 | { |
1379 | 0 | if ( i < spfn ) |
1380 | 0 | i = spfn; |
1381 | 0 | ret = map_pages_to_xen((unsigned long)mfn_to_virt(i), i, |
1382 | 0 | epfn - i, __PAGE_HYPERVISOR_RW); |
1383 | 0 | if ( ret ) |
1384 | 0 | goto destroy_directmap; |
1385 | 0 | } |
1386 | 0 |
|
1387 | 0 | old_node_start = node_start_pfn(node); |
1388 | 0 | old_node_span = node_spanned_pages(node); |
1389 | 0 | orig_online = node_online(node); |
1390 | 0 |
|
1391 | 0 | if ( !orig_online ) |
1392 | 0 | { |
1393 | 0 | dprintk(XENLOG_WARNING, "node %x pxm %x is not online\n",node, pxm); |
1394 | 0 | NODE_DATA(node)->node_start_pfn = spfn; |
1395 | 0 | NODE_DATA(node)->node_spanned_pages = |
1396 | 0 | epfn - node_start_pfn(node); |
1397 | 0 | node_set_online(node); |
1398 | 0 | } |
1399 | 0 | else |
1400 | 0 | { |
1401 | 0 | if (node_start_pfn(node) > spfn) |
1402 | 0 | NODE_DATA(node)->node_start_pfn = spfn; |
1403 | 0 | if (node_end_pfn(node) < epfn) |
1404 | 0 | NODE_DATA(node)->node_spanned_pages = epfn - node_start_pfn(node); |
1405 | 0 | } |
1406 | 0 |
|
1407 | 0 | info.spfn = spfn; |
1408 | 0 | info.epfn = epfn; |
1409 | 0 | info.cur = spfn; |
1410 | 0 |
|
1411 | 0 | ret = extend_frame_table(&info); |
1412 | 0 | if (ret) |
1413 | 0 | goto destroy_frametable; |
1414 | 0 |
|
1415 | 0 | /* Set max_page as setup_m2p_table will use it*/ |
1416 | 0 | if (max_page < epfn) |
1417 | 0 | { |
1418 | 0 | max_page = epfn; |
1419 | 0 | max_pdx = pfn_to_pdx(max_page - 1) + 1; |
1420 | 0 | } |
1421 | 0 | total_pages += epfn - spfn; |
1422 | 0 |
|
1423 | 0 | set_pdx_range(spfn, epfn); |
1424 | 0 | ret = setup_m2p_table(&info); |
1425 | 0 |
|
1426 | 0 | if ( ret ) |
1427 | 0 | goto destroy_m2p; |
1428 | 0 |
|
1429 | 0 | if ( iommu_enabled && !iommu_passthrough && !need_iommu(hardware_domain) ) |
1430 | 0 | { |
1431 | 0 | for ( i = spfn; i < epfn; i++ ) |
1432 | 0 | if ( iommu_map_page(hardware_domain, i, i, IOMMUF_readable|IOMMUF_writable) ) |
1433 | 0 | break; |
1434 | 0 | if ( i != epfn ) |
1435 | 0 | { |
1436 | 0 | while (i-- > old_max) |
1437 | 0 | /* If statement to satisfy __must_check. */ |
1438 | 0 | if ( iommu_unmap_page(hardware_domain, i) ) |
1439 | 0 | continue; |
1440 | 0 |
|
1441 | 0 | goto destroy_m2p; |
1442 | 0 | } |
1443 | 0 | } |
1444 | 0 |
|
1445 | 0 | /* We can't revert any more */ |
1446 | 0 | share_hotadd_m2p_table(&info); |
1447 | 0 | transfer_pages_to_heap(&info); |
1448 | 0 |
|
1449 | 0 | return 0; |
1450 | 0 |
|
1451 | 0 | destroy_m2p: |
1452 | 0 | destroy_m2p_mapping(&info); |
1453 | 0 | max_page = old_max; |
1454 | 0 | total_pages = old_total; |
1455 | 0 | max_pdx = pfn_to_pdx(max_page - 1) + 1; |
1456 | 0 | destroy_frametable: |
1457 | 0 | cleanup_frame_table(&info); |
1458 | 0 | if ( !orig_online ) |
1459 | 0 | node_set_offline(node); |
1460 | 0 | NODE_DATA(node)->node_start_pfn = old_node_start; |
1461 | 0 | NODE_DATA(node)->node_spanned_pages = old_node_span; |
1462 | 0 | destroy_directmap: |
1463 | 0 | destroy_xen_mappings((unsigned long)mfn_to_virt(spfn), |
1464 | 0 | (unsigned long)mfn_to_virt(epfn)); |
1465 | 0 |
|
1466 | 0 | return ret; |
1467 | 0 | } |
1468 | | |
1469 | | #include "compat/mm.c" |
1470 | | |
1471 | | /* |
1472 | | * Local variables: |
1473 | | * mode: C |
1474 | | * c-file-style: "BSD" |
1475 | | * c-basic-offset: 4 |
1476 | | * tab-width: 4 |
1477 | | * indent-tabs-mode: nil |
1478 | | * End: |
1479 | | */ |