debuggers.hg

view xen/arch/x86/x86_64/mm.c @ 3632:fec8b1778268

bitkeeper revision 1.1159.212.60 (41febc4bKKSkh9u-Zes9v2CmBuLZxA)

More bootstrap fixes for x86/64. Next thing to do is sort out the IDT and
get traps.c working; then we can get rid of a bunch of dummy labels from
end of boot/x86_64.S. We're also going to need some kind of entry.S before
we can safely enable interrupts. Also bear in mind that not all of physical
RAM may be mapped (only first 1GB) and no m2p table is yet allocated or
mapped. Plenty to be done!
author kaf24@viper.(none)
date Mon Jan 31 23:16:27 2005 +0000 (2005-01-31)
parents c754bd0be650
children d55d523078f7
line source
1 /******************************************************************************
2 * arch/x86/x86_64/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <asm/page.h>
26 #include <asm/flushtlb.h>
27 #include <asm/fixmap.h>
28 #include <asm/domain_page.h>
30 unsigned long m2p_start_mfn; /* XXX Kill this (in 32-bit code also). */
32 void *safe_page_alloc(void)
33 {
34 extern int early_boot;
35 if ( early_boot )
36 return __va(alloc_boot_pages(PAGE_SIZE, PAGE_SIZE));
37 return (void *)alloc_xenheap_page();
38 }
40 /* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
41 int map_pages(
42 pagetable_t *pt,
43 unsigned long v,
44 unsigned long p,
45 unsigned long s,
46 unsigned long flags)
47 {
48 l4_pgentry_t *pl4e;
49 l3_pgentry_t *pl3e;
50 l2_pgentry_t *pl2e;
51 l1_pgentry_t *pl1e;
52 void *newpg;
54 while ( s != 0 )
55 {
56 pl4e = &pt[l4_table_offset(v)];
57 if ( !(l4_pgentry_val(*pl4e) & _PAGE_PRESENT) )
58 {
59 newpg = safe_page_alloc();
60 clear_page(newpg);
61 *pl4e = mk_l4_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
62 }
64 pl3e = l4_pgentry_to_l3(*pl4e) + l3_table_offset(v);
65 if ( !(l3_pgentry_val(*pl3e) & _PAGE_PRESENT) )
66 {
67 newpg = safe_page_alloc();
68 clear_page(newpg);
69 *pl3e = mk_l3_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
70 }
72 pl2e = l3_pgentry_to_l2(*pl3e) + l2_table_offset(v);
74 if ( ((s|v|p) & ((1<<L2_PAGETABLE_SHIFT)-1)) == 0 )
75 {
76 /* Super-page mapping. */
77 if ( (l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
78 __flush_tlb_pge();
79 *pl2e = mk_l2_pgentry(p|flags|_PAGE_PSE);
81 v += 1 << L2_PAGETABLE_SHIFT;
82 p += 1 << L2_PAGETABLE_SHIFT;
83 s -= 1 << L2_PAGETABLE_SHIFT;
84 }
85 else
86 {
87 /* Normal page mapping. */
88 if ( !(l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
89 {
90 newpg = safe_page_alloc();
91 clear_page(newpg);
92 *pl2e = mk_l2_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
93 }
94 pl1e = l2_pgentry_to_l1(*pl2e) + l1_table_offset(v);
95 if ( (l1_pgentry_val(*pl1e) & _PAGE_PRESENT) )
96 __flush_tlb_one(v);
97 *pl1e = mk_l1_pgentry(p|flags);
99 v += 1 << L1_PAGETABLE_SHIFT;
100 p += 1 << L1_PAGETABLE_SHIFT;
101 s -= 1 << L1_PAGETABLE_SHIFT;
102 }
103 }
105 return 0;
106 }
108 void __set_fixmap(
109 enum fixed_addresses idx, unsigned long p, unsigned long flags)
110 {
111 if ( unlikely(idx >= __end_of_fixed_addresses) )
112 BUG();
113 map_pages(idle_pg_table, fix_to_virt(idx), p, PAGE_SIZE, flags);
114 }
117 void __init paging_init(void)
118 {
119 void *newpt;
121 /* Allocate and map the machine-to-phys table. */
122 /* XXX TODO XXX */
124 /* Create page table for ioremap(). */
125 newpt = (void *)alloc_xenheap_page();
126 clear_page(newpt);
127 idle_pg_table[IOREMAP_VIRT_START >> L4_PAGETABLE_SHIFT] =
128 mk_l4_pgentry(__pa(newpt) | __PAGE_HYPERVISOR);
130 /* Create read-only mapping of MPT for guest-OS use. */
131 newpt = (void *)alloc_xenheap_page();
132 clear_page(newpt);
133 idle_pg_table[RO_MPT_VIRT_START >> L4_PAGETABLE_SHIFT] =
134 mk_l4_pgentry((__pa(newpt) | __PAGE_HYPERVISOR | _PAGE_USER) &
135 ~_PAGE_RW);
136 /* XXX TODO: Copy appropriate L3 entries from RDWR_MPT_VIRT_START XXX */
138 /* Set up linear page table mapping. */
139 idle_pg_table[LINEAR_PT_VIRT_START >> L4_PAGETABLE_SHIFT] =
140 mk_l4_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
141 }
143 void __init zap_low_mappings(void)
144 {
145 idle_pg_table[0] = mk_l4_pgentry(0);
146 }
149 /*
150 * Allows shooting down of borrowed page-table use on specific CPUs.
151 * Specifically, we borrow page tables when running the idle domain.
152 */
153 static void __synchronise_pagetables(void *mask)
154 {
155 struct exec_domain *ed = current;
156 if ( ((unsigned long)mask & (1 << ed->processor)) &&
157 is_idle_task(ed->domain) )
158 write_ptbase(&ed->mm);
159 }
160 void synchronise_pagetables(unsigned long cpu_mask)
161 {
162 __synchronise_pagetables((void *)cpu_mask);
163 smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
164 }
166 long do_stack_switch(unsigned long ss, unsigned long esp)
167 {
168 #if 0
169 int nr = smp_processor_id();
170 struct tss_struct *t = &init_tss[nr];
172 /* We need to do this check as we load and use SS on guest's behalf. */
173 if ( (ss & 3) == 0 )
174 return -EPERM;
176 current->thread.guestos_ss = ss;
177 current->thread.guestos_sp = esp;
178 t->ss1 = ss;
179 t->esp1 = esp;
180 #endif
181 return 0;
182 }
185 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
186 int check_descriptor(unsigned long *d)
187 {
188 unsigned long base, limit, a = d[0], b = d[1];
190 /* A not-present descriptor will always fault, so is safe. */
191 if ( !(b & _SEGMENT_P) )
192 goto good;
194 /*
195 * We don't allow a DPL of zero. There is no legitimate reason for
196 * specifying DPL==0, and it gets rather dangerous if we also accept call
197 * gates (consider a call gate pointing at another guestos descriptor with
198 * DPL 0 -- this would get the OS ring-0 privileges).
199 */
200 if ( (b & _SEGMENT_DPL) == 0 )
201 goto bad;
203 if ( !(b & _SEGMENT_S) )
204 {
205 /*
206 * System segment:
207 * 1. Don't allow interrupt or trap gates as they belong in the IDT.
208 * 2. Don't allow TSS descriptors or task gates as we don't
209 * virtualise x86 tasks.
210 * 3. Don't allow LDT descriptors because they're unnecessary and
211 * I'm uneasy about allowing an LDT page to contain LDT
212 * descriptors. In any case, Xen automatically creates the
213 * required descriptor when reloading the LDT register.
214 * 4. We allow call gates but they must not jump to a private segment.
215 */
217 /* Disallow everything but call gates. */
218 if ( (b & _SEGMENT_TYPE) != 0xc00 )
219 goto bad;
221 #if 0
222 /* Can't allow far jump to a Xen-private segment. */
223 if ( !VALID_CODESEL(a>>16) )
224 goto bad;
225 #endif
227 /* Reserved bits must be zero. */
228 if ( (b & 0xe0) != 0 )
229 goto bad;
231 /* No base/limit check is needed for a call gate. */
232 goto good;
233 }
235 /* Check that base is at least a page away from Xen-private area. */
236 base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
237 if ( base >= (PAGE_OFFSET - PAGE_SIZE) )
238 goto bad;
240 /* Check and truncate the limit if necessary. */
241 limit = (b&0xf0000) | (a&0xffff);
242 limit++; /* We add one because limit is inclusive. */
243 if ( (b & _SEGMENT_G) )
244 limit <<= 12;
246 if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
247 {
248 /*
249 * Grows-down limit check.
250 * NB. limit == 0xFFFFF provides no access (if G=1).
251 * limit == 0x00000 provides 4GB-4kB access (if G=1).
252 */
253 if ( (base + limit) > base )
254 {
255 limit = -(base & PAGE_MASK);
256 goto truncate;
257 }
258 }
259 else
260 {
261 /*
262 * Grows-up limit check.
263 * NB. limit == 0xFFFFF provides 4GB access (if G=1).
264 * limit == 0x00000 provides 4kB access (if G=1).
265 */
266 if ( ((base + limit) <= base) ||
267 ((base + limit) > PAGE_OFFSET) )
268 {
269 limit = PAGE_OFFSET - base;
270 truncate:
271 if ( !(b & _SEGMENT_G) )
272 goto bad; /* too dangerous; too hard to work out... */
273 limit = (limit >> 12) - 1;
274 d[0] &= ~0x0ffff; d[0] |= limit & 0x0ffff;
275 d[1] &= ~0xf0000; d[1] |= limit & 0xf0000;
276 }
277 }
279 good:
280 return 1;
281 bad:
282 return 0;
283 }
286 void destroy_gdt(struct exec_domain *ed)
287 {
288 int i;
289 unsigned long pfn;
291 for ( i = 0; i < 16; i++ )
292 {
293 if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[i])) != 0 )
294 put_page_and_type(&frame_table[pfn]);
295 ed->mm.perdomain_ptes[i] = mk_l1_pgentry(0);
296 }
297 }
300 long set_gdt(struct exec_domain *ed,
301 unsigned long *frames,
302 unsigned int entries)
303 {
304 struct domain *d = ed->domain;
305 /* NB. There are 512 8-byte entries per GDT page. */
306 int i = 0, nr_pages = (entries + 511) / 512;
307 struct desc_struct *vgdt;
308 unsigned long pfn;
310 /* Check the first page in the new GDT. */
311 if ( (pfn = frames[0]) >= max_page )
312 goto fail;
314 /* The first page is special because Xen owns a range of entries in it. */
315 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
316 {
317 /* GDT checks failed: try zapping the Xen reserved entries. */
318 if ( !get_page_and_type(&frame_table[pfn], d, PGT_writable_page) )
319 goto fail;
320 vgdt = map_domain_mem(pfn << PAGE_SHIFT);
321 memset(vgdt + FIRST_RESERVED_GDT_ENTRY, 0,
322 NR_RESERVED_GDT_ENTRIES*8);
323 unmap_domain_mem(vgdt);
324 put_page_and_type(&frame_table[pfn]);
326 /* Okay, we zapped the entries. Now try the GDT checks again. */
327 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
328 goto fail;
329 }
331 /* Check the remaining pages in the new GDT. */
332 for ( i = 1; i < nr_pages; i++ )
333 if ( ((pfn = frames[i]) >= max_page) ||
334 !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
335 goto fail;
337 /* Copy reserved GDT entries to the new GDT. */
338 vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
339 memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
340 gdt_table + FIRST_RESERVED_GDT_ENTRY,
341 NR_RESERVED_GDT_ENTRIES*8);
342 unmap_domain_mem(vgdt);
344 /* Tear down the old GDT. */
345 destroy_gdt(ed);
347 /* Install the new GDT. */
348 for ( i = 0; i < nr_pages; i++ )
349 ed->mm.perdomain_ptes[i] =
350 mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
352 SET_GDT_ADDRESS(ed, GDT_VIRT_START(ed));
353 SET_GDT_ENTRIES(ed, entries);
355 return 0;
357 fail:
358 while ( i-- > 0 )
359 put_page_and_type(&frame_table[frames[i]]);
360 return -EINVAL;
361 }
364 long do_set_gdt(unsigned long *frame_list, unsigned int entries)
365 {
366 int nr_pages = (entries + 511) / 512;
367 unsigned long frames[16];
368 long ret;
370 if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
371 return -EINVAL;
373 if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
374 return -EFAULT;
376 if ( (ret = set_gdt(current, frames, entries)) == 0 )
377 {
378 local_flush_tlb();
379 __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
380 }
382 return ret;
383 }
386 long do_update_descriptor(
387 unsigned long pa, unsigned long word1, unsigned long word2)
388 {
389 unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
390 struct pfn_info *page;
391 long ret = -EINVAL;
393 d[0] = word1;
394 d[1] = word2;
396 if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) )
397 return -EINVAL;
399 page = &frame_table[pfn];
400 if ( unlikely(!get_page(page, current->domain)) )
401 return -EINVAL;
403 /* Check if the given frame is in use in an unsafe context. */
404 switch ( page->u.inuse.type_info & PGT_type_mask )
405 {
406 case PGT_gdt_page:
407 /* Disallow updates of Xen-reserved descriptors in the current GDT. */
408 if ( (l1_pgentry_to_pagenr(current->mm.perdomain_ptes[0]) == pfn) &&
409 (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
410 (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
411 goto out;
412 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
413 goto out;
414 break;
415 case PGT_ldt_page:
416 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
417 goto out;
418 break;
419 default:
420 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
421 goto out;
422 break;
423 }
425 /* All is good so make the update. */
426 gdt_pent = map_domain_mem(pa);
427 memcpy(gdt_pent, d, 8);
428 unmap_domain_mem(gdt_pent);
430 put_page_type(page);
432 ret = 0; /* success */
434 out:
435 put_page(page);
436 return ret;
437 }
439 #ifdef MEMORY_GUARD
441 #if 1
443 void *memguard_init(void *heap_start) { return heap_start; }
444 void memguard_guard_range(void *p, unsigned long l) {}
445 void memguard_unguard_range(void *p, unsigned long l) {}
447 #else
449 void *memguard_init(void *heap_start)
450 {
451 l1_pgentry_t *l1;
452 int i, j;
454 /* Round the allocation pointer up to a page boundary. */
455 heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
456 PAGE_MASK);
458 /* Memory guarding is incompatible with super pages. */
459 for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
460 {
461 l1 = (l1_pgentry_t *)heap_start;
462 heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
463 for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
464 l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
465 (j << L1_PAGETABLE_SHIFT) |
466 __PAGE_HYPERVISOR);
467 idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
468 mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
469 }
471 return heap_start;
472 }
474 static void __memguard_change_range(void *p, unsigned long l, int guard)
475 {
476 l1_pgentry_t *l1;
477 l2_pgentry_t *l2;
478 unsigned long _p = (unsigned long)p;
479 unsigned long _l = (unsigned long)l;
481 /* Ensure we are dealing with a page-aligned whole number of pages. */
482 ASSERT((_p&PAGE_MASK) != 0);
483 ASSERT((_l&PAGE_MASK) != 0);
484 ASSERT((_p&~PAGE_MASK) == 0);
485 ASSERT((_l&~PAGE_MASK) == 0);
487 while ( _l != 0 )
488 {
489 l2 = &idle_pg_table[l2_table_offset(_p)];
490 l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
491 if ( guard )
492 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
493 else
494 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
495 _p += PAGE_SIZE;
496 _l -= PAGE_SIZE;
497 }
498 }
500 void memguard_guard_range(void *p, unsigned long l)
501 {
502 __memguard_change_range(p, l, 1);
503 local_flush_tlb();
504 }
506 void memguard_unguard_range(void *p, unsigned long l)
507 {
508 __memguard_change_range(p, l, 0);
509 }
511 #endif
513 #endif