debuggers.hg

view xen/arch/x86/x86_64/mm.c @ 3668:d55d523078f7

bitkeeper revision 1.1159.212.77 (4202221693AFbvFZWeMHHIjQfbzTIQ)

More x86_64 prgress. Many more gaps filled in. Next step is DOM0
construction.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Thu Feb 03 13:07:34 2005 +0000 (2005-02-03)
parents fec8b1778268
children 677cb76cff18
line source
1 /******************************************************************************
2 * arch/x86/x86_64/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <asm/page.h>
26 #include <asm/flushtlb.h>
27 #include <asm/fixmap.h>
28 #include <asm/domain_page.h>
30 void *safe_page_alloc(void)
31 {
32 extern int early_boot;
33 if ( early_boot )
34 return __va(alloc_boot_pages(PAGE_SIZE, PAGE_SIZE));
35 return (void *)alloc_xenheap_page();
36 }
38 /* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
39 int map_pages(
40 pagetable_t *pt,
41 unsigned long v,
42 unsigned long p,
43 unsigned long s,
44 unsigned long flags)
45 {
46 l4_pgentry_t *pl4e;
47 l3_pgentry_t *pl3e;
48 l2_pgentry_t *pl2e;
49 l1_pgentry_t *pl1e;
50 void *newpg;
52 while ( s != 0 )
53 {
54 pl4e = &pt[l4_table_offset(v)];
55 if ( !(l4_pgentry_val(*pl4e) & _PAGE_PRESENT) )
56 {
57 newpg = safe_page_alloc();
58 clear_page(newpg);
59 *pl4e = mk_l4_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
60 }
62 pl3e = l4_pgentry_to_l3(*pl4e) + l3_table_offset(v);
63 if ( !(l3_pgentry_val(*pl3e) & _PAGE_PRESENT) )
64 {
65 newpg = safe_page_alloc();
66 clear_page(newpg);
67 *pl3e = mk_l3_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
68 }
70 pl2e = l3_pgentry_to_l2(*pl3e) + l2_table_offset(v);
72 if ( ((s|v|p) & ((1<<L2_PAGETABLE_SHIFT)-1)) == 0 )
73 {
74 /* Super-page mapping. */
75 if ( (l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
76 __flush_tlb_pge();
77 *pl2e = mk_l2_pgentry(p|flags|_PAGE_PSE);
79 v += 1 << L2_PAGETABLE_SHIFT;
80 p += 1 << L2_PAGETABLE_SHIFT;
81 s -= 1 << L2_PAGETABLE_SHIFT;
82 }
83 else
84 {
85 /* Normal page mapping. */
86 if ( !(l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
87 {
88 newpg = safe_page_alloc();
89 clear_page(newpg);
90 *pl2e = mk_l2_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
91 }
92 pl1e = l2_pgentry_to_l1(*pl2e) + l1_table_offset(v);
93 if ( (l1_pgentry_val(*pl1e) & _PAGE_PRESENT) )
94 __flush_tlb_one(v);
95 *pl1e = mk_l1_pgentry(p|flags);
97 v += 1 << L1_PAGETABLE_SHIFT;
98 p += 1 << L1_PAGETABLE_SHIFT;
99 s -= 1 << L1_PAGETABLE_SHIFT;
100 }
101 }
103 return 0;
104 }
106 void __set_fixmap(
107 enum fixed_addresses idx, unsigned long p, unsigned long flags)
108 {
109 if ( unlikely(idx >= __end_of_fixed_addresses) )
110 BUG();
111 map_pages(idle_pg_table, fix_to_virt(idx), p, PAGE_SIZE, flags);
112 }
115 void __init paging_init(void)
116 {
117 void *newpt;
118 unsigned long i, p, max;
120 /* Map all of physical memory. */
121 max = (max_page + (1UL << L2_PAGETABLE_SHIFT) - 1UL) &
122 ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
123 map_pages(idle_pg_table, PAGE_OFFSET, 0, max, PAGE_HYPERVISOR);
125 /*
126 * Allocate and map the machine-to-phys table.
127 * This also ensures L3 is present for ioremap().
128 */
129 for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
130 {
131 p = alloc_boot_pages(1UL << L2_PAGETABLE_SHIFT,
132 1UL << L2_PAGETABLE_SHIFT);
133 if ( p == 0 )
134 panic("Not enough memory for m2p table\n");
135 map_pages(idle_pg_table, RDWR_MPT_VIRT_START + i*8, p,
136 1UL << L2_PAGETABLE_SHIFT, PAGE_HYPERVISOR);
137 memset((void *)(RDWR_MPT_VIRT_START + i*8), 0x55,
138 1UL << L2_PAGETABLE_SHIFT);
139 }
141 /* Create read-only mapping of MPT for guest-OS use. */
142 newpt = (void *)alloc_xenheap_page();
143 clear_page(newpt);
144 idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] =
145 mk_l4_pgentry((__pa(newpt) | __PAGE_HYPERVISOR | _PAGE_USER) &
146 ~_PAGE_RW);
147 /* Copy the L3 mappings from the RDWR_MPT area. */
148 p = l4_pgentry_val(idle_pg_table[l4_table_offset(RDWR_MPT_VIRT_START)]);
149 p &= PAGE_MASK;
150 p += l3_table_offset(RDWR_MPT_VIRT_START) * sizeof(l3_pgentry_t);
151 newpt = (void *)((unsigned long)newpt +
152 (l3_table_offset(RO_MPT_VIRT_START) *
153 sizeof(l3_pgentry_t)));
154 memcpy(newpt, __va(p),
155 (RDWR_MPT_VIRT_END - RDWR_MPT_VIRT_START) >> L3_PAGETABLE_SHIFT);
157 /* Set up linear page table mapping. */
158 idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
159 mk_l4_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
160 }
162 void __init zap_low_mappings(void)
163 {
164 idle_pg_table[0] = mk_l4_pgentry(0);
165 flush_tlb_all_pge();
166 }
168 void subarch_init_memory(struct domain *dom_xen)
169 {
170 unsigned long i, v, m2p_start_mfn;
171 l3_pgentry_t l3e;
172 l2_pgentry_t l2e;
174 /* M2P table is mappable read-only by privileged domains. */
175 for ( v = RDWR_MPT_VIRT_START;
176 v != RDWR_MPT_VIRT_END;
177 v += 1 << L2_PAGETABLE_SHIFT )
178 {
179 l3e = l4_pgentry_to_l3(idle_pg_table[l4_table_offset(v)])[
180 l3_table_offset(v)];
181 if ( !(l3_pgentry_val(l3e) & _PAGE_PRESENT) )
182 continue;
183 l2e = l3_pgentry_to_l2(l3e)[l2_table_offset(v)];
184 if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
185 continue;
186 m2p_start_mfn = l2_pgentry_to_pagenr(l2e);
188 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
189 {
190 frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
191 /* gdt to make sure it's only mapped read-only by non-privileged
192 domains. */
193 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
194 frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen;
195 }
196 }
197 }
199 /*
200 * Allows shooting down of borrowed page-table use on specific CPUs.
201 * Specifically, we borrow page tables when running the idle domain.
202 */
203 static void __synchronise_pagetables(void *mask)
204 {
205 struct exec_domain *ed = current;
206 if ( ((unsigned long)mask & (1 << ed->processor)) &&
207 is_idle_task(ed->domain) )
208 write_ptbase(&ed->mm);
209 }
210 void synchronise_pagetables(unsigned long cpu_mask)
211 {
212 __synchronise_pagetables((void *)cpu_mask);
213 smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
214 }
216 long do_stack_switch(unsigned long ss, unsigned long esp)
217 {
218 if ( (ss & 3) != 3 )
219 return -EPERM;
220 current->thread.guestos_ss = ss;
221 current->thread.guestos_sp = esp;
222 return 0;
223 }
226 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
227 int check_descriptor(unsigned long *d)
228 {
229 unsigned long base, limit, a = d[0], b = d[1];
231 /* A not-present descriptor will always fault, so is safe. */
232 if ( !(b & _SEGMENT_P) )
233 goto good;
235 /*
236 * We don't allow a DPL of zero. There is no legitimate reason for
237 * specifying DPL==0, and it gets rather dangerous if we also accept call
238 * gates (consider a call gate pointing at another guestos descriptor with
239 * DPL 0 -- this would get the OS ring-0 privileges).
240 */
241 if ( (b & _SEGMENT_DPL) == 0 )
242 goto bad;
244 if ( !(b & _SEGMENT_S) )
245 {
246 /*
247 * System segment:
248 * 1. Don't allow interrupt or trap gates as they belong in the IDT.
249 * 2. Don't allow TSS descriptors or task gates as we don't
250 * virtualise x86 tasks.
251 * 3. Don't allow LDT descriptors because they're unnecessary and
252 * I'm uneasy about allowing an LDT page to contain LDT
253 * descriptors. In any case, Xen automatically creates the
254 * required descriptor when reloading the LDT register.
255 * 4. We allow call gates but they must not jump to a private segment.
256 */
258 /* Disallow everything but call gates. */
259 if ( (b & _SEGMENT_TYPE) != 0xc00 )
260 goto bad;
262 #if 0
263 /* Can't allow far jump to a Xen-private segment. */
264 if ( !VALID_CODESEL(a>>16) )
265 goto bad;
266 #endif
268 /* Reserved bits must be zero. */
269 if ( (b & 0xe0) != 0 )
270 goto bad;
272 /* No base/limit check is needed for a call gate. */
273 goto good;
274 }
276 /* Check that base is at least a page away from Xen-private area. */
277 base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
278 if ( base >= (PAGE_OFFSET - PAGE_SIZE) )
279 goto bad;
281 /* Check and truncate the limit if necessary. */
282 limit = (b&0xf0000) | (a&0xffff);
283 limit++; /* We add one because limit is inclusive. */
284 if ( (b & _SEGMENT_G) )
285 limit <<= 12;
287 if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
288 {
289 /*
290 * Grows-down limit check.
291 * NB. limit == 0xFFFFF provides no access (if G=1).
292 * limit == 0x00000 provides 4GB-4kB access (if G=1).
293 */
294 if ( (base + limit) > base )
295 {
296 limit = -(base & PAGE_MASK);
297 goto truncate;
298 }
299 }
300 else
301 {
302 /*
303 * Grows-up limit check.
304 * NB. limit == 0xFFFFF provides 4GB access (if G=1).
305 * limit == 0x00000 provides 4kB access (if G=1).
306 */
307 if ( ((base + limit) <= base) ||
308 ((base + limit) > PAGE_OFFSET) )
309 {
310 limit = PAGE_OFFSET - base;
311 truncate:
312 if ( !(b & _SEGMENT_G) )
313 goto bad; /* too dangerous; too hard to work out... */
314 limit = (limit >> 12) - 1;
315 d[0] &= ~0x0ffff; d[0] |= limit & 0x0ffff;
316 d[1] &= ~0xf0000; d[1] |= limit & 0xf0000;
317 }
318 }
320 good:
321 return 1;
322 bad:
323 return 0;
324 }
327 void destroy_gdt(struct exec_domain *ed)
328 {
329 int i;
330 unsigned long pfn;
332 for ( i = 0; i < 16; i++ )
333 {
334 if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[i])) != 0 )
335 put_page_and_type(&frame_table[pfn]);
336 ed->mm.perdomain_ptes[i] = mk_l1_pgentry(0);
337 }
338 }
341 long set_gdt(struct exec_domain *ed,
342 unsigned long *frames,
343 unsigned int entries)
344 {
345 struct domain *d = ed->domain;
346 /* NB. There are 512 8-byte entries per GDT page. */
347 int i = 0, nr_pages = (entries + 511) / 512;
348 struct desc_struct *vgdt;
349 unsigned long pfn;
351 /* Check the first page in the new GDT. */
352 if ( (pfn = frames[0]) >= max_page )
353 goto fail;
355 /* The first page is special because Xen owns a range of entries in it. */
356 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
357 {
358 /* GDT checks failed: try zapping the Xen reserved entries. */
359 if ( !get_page_and_type(&frame_table[pfn], d, PGT_writable_page) )
360 goto fail;
361 vgdt = map_domain_mem(pfn << PAGE_SHIFT);
362 memset(vgdt + FIRST_RESERVED_GDT_ENTRY, 0,
363 NR_RESERVED_GDT_ENTRIES*8);
364 unmap_domain_mem(vgdt);
365 put_page_and_type(&frame_table[pfn]);
367 /* Okay, we zapped the entries. Now try the GDT checks again. */
368 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
369 goto fail;
370 }
372 /* Check the remaining pages in the new GDT. */
373 for ( i = 1; i < nr_pages; i++ )
374 if ( ((pfn = frames[i]) >= max_page) ||
375 !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
376 goto fail;
378 /* Copy reserved GDT entries to the new GDT. */
379 vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
380 memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
381 gdt_table + FIRST_RESERVED_GDT_ENTRY,
382 NR_RESERVED_GDT_ENTRIES*8);
383 unmap_domain_mem(vgdt);
385 /* Tear down the old GDT. */
386 destroy_gdt(ed);
388 /* Install the new GDT. */
389 for ( i = 0; i < nr_pages; i++ )
390 ed->mm.perdomain_ptes[i] =
391 mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
393 SET_GDT_ADDRESS(ed, GDT_VIRT_START(ed));
394 SET_GDT_ENTRIES(ed, entries);
396 return 0;
398 fail:
399 while ( i-- > 0 )
400 put_page_and_type(&frame_table[frames[i]]);
401 return -EINVAL;
402 }
405 long do_set_gdt(unsigned long *frame_list, unsigned int entries)
406 {
407 int nr_pages = (entries + 511) / 512;
408 unsigned long frames[16];
409 long ret;
411 if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
412 return -EINVAL;
414 if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
415 return -EFAULT;
417 if ( (ret = set_gdt(current, frames, entries)) == 0 )
418 {
419 local_flush_tlb();
420 __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
421 }
423 return ret;
424 }
427 long do_update_descriptor(
428 unsigned long pa, unsigned long word1, unsigned long word2)
429 {
430 unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
431 struct pfn_info *page;
432 long ret = -EINVAL;
434 d[0] = word1;
435 d[1] = word2;
437 if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) )
438 return -EINVAL;
440 page = &frame_table[pfn];
441 if ( unlikely(!get_page(page, current->domain)) )
442 return -EINVAL;
444 /* Check if the given frame is in use in an unsafe context. */
445 switch ( page->u.inuse.type_info & PGT_type_mask )
446 {
447 case PGT_gdt_page:
448 /* Disallow updates of Xen-reserved descriptors in the current GDT. */
449 if ( (l1_pgentry_to_pagenr(current->mm.perdomain_ptes[0]) == pfn) &&
450 (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
451 (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
452 goto out;
453 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
454 goto out;
455 break;
456 case PGT_ldt_page:
457 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
458 goto out;
459 break;
460 default:
461 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
462 goto out;
463 break;
464 }
466 /* All is good so make the update. */
467 gdt_pent = map_domain_mem(pa);
468 memcpy(gdt_pent, d, 8);
469 unmap_domain_mem(gdt_pent);
471 put_page_type(page);
473 ret = 0; /* success */
475 out:
476 put_page(page);
477 return ret;
478 }
480 #ifdef MEMORY_GUARD
482 #if 1
484 void *memguard_init(void *heap_start) { return heap_start; }
485 void memguard_guard_range(void *p, unsigned long l) {}
486 void memguard_unguard_range(void *p, unsigned long l) {}
488 #else
490 void *memguard_init(void *heap_start)
491 {
492 l1_pgentry_t *l1;
493 int i, j;
495 /* Round the allocation pointer up to a page boundary. */
496 heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
497 PAGE_MASK);
499 /* Memory guarding is incompatible with super pages. */
500 for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
501 {
502 l1 = (l1_pgentry_t *)heap_start;
503 heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
504 for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
505 l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
506 (j << L1_PAGETABLE_SHIFT) |
507 __PAGE_HYPERVISOR);
508 idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
509 mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
510 }
512 return heap_start;
513 }
515 static void __memguard_change_range(void *p, unsigned long l, int guard)
516 {
517 l1_pgentry_t *l1;
518 l2_pgentry_t *l2;
519 unsigned long _p = (unsigned long)p;
520 unsigned long _l = (unsigned long)l;
522 /* Ensure we are dealing with a page-aligned whole number of pages. */
523 ASSERT((_p&PAGE_MASK) != 0);
524 ASSERT((_l&PAGE_MASK) != 0);
525 ASSERT((_p&~PAGE_MASK) == 0);
526 ASSERT((_l&~PAGE_MASK) == 0);
528 while ( _l != 0 )
529 {
530 l2 = &idle_pg_table[l2_table_offset(_p)];
531 l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
532 if ( guard )
533 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
534 else
535 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
536 _p += PAGE_SIZE;
537 _l -= PAGE_SIZE;
538 }
539 }
541 void memguard_guard_range(void *p, unsigned long l)
542 {
543 __memguard_change_range(p, l, 1);
544 local_flush_tlb();
545 }
547 void memguard_unguard_range(void *p, unsigned long l)
548 {
549 __memguard_change_range(p, l, 0);
550 }
552 #endif
554 #endif