debuggers.hg

view xen/arch/x86/x86_32/mm.c @ 3632:fec8b1778268

bitkeeper revision 1.1159.212.60 (41febc4bKKSkh9u-Zes9v2CmBuLZxA)

More bootstrap fixes for x86/64. Next thing to do is sort out the IDT and
get traps.c working; then we can get rid of a bunch of dummy labels from
end of boot/x86_64.S. We're also going to need some kind of entry.S before
we can safely enable interrupts. Also bear in mind that not all of physical
RAM may be mapped (only first 1GB) and no m2p table is yet allocated or
mapped. Plenty to be done!
author kaf24@viper.(none)
date Mon Jan 31 23:16:27 2005 +0000 (2005-01-31)
parents 2c56c6b39a48
children d55d523078f7
line source
1 /******************************************************************************
2 * arch/x86/x86_32/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <asm/page.h>
26 #include <asm/flushtlb.h>
27 #include <asm/fixmap.h>
28 #include <asm/domain_page.h>
30 unsigned long m2p_start_mfn;
32 /* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
33 int map_pages(
34 pagetable_t *pt,
35 unsigned long v,
36 unsigned long p,
37 unsigned long s,
38 unsigned long flags)
39 {
40 l2_pgentry_t *pl2e;
41 l1_pgentry_t *pl1e;
42 void *newpg;
44 while ( s != 0 )
45 {
46 pl2e = &pt[l2_table_offset(v)];
48 if ( ((s|v|p) & ((1<<L2_PAGETABLE_SHIFT)-1)) == 0 )
49 {
50 /* Super-page mapping. */
51 if ( (l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
52 __flush_tlb_pge();
53 *pl2e = mk_l2_pgentry(p|flags|_PAGE_PSE);
55 v += 1 << L2_PAGETABLE_SHIFT;
56 p += 1 << L2_PAGETABLE_SHIFT;
57 s -= 1 << L2_PAGETABLE_SHIFT;
58 }
59 else
60 {
61 /* Normal page mapping. */
62 if ( !(l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
63 {
64 newpg = (void *)alloc_xenheap_page();
65 clear_page(newpg);
66 *pl2e = mk_l2_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
67 }
68 pl1e = l2_pgentry_to_l1(*pl2e) + l1_table_offset(v);
69 if ( (l1_pgentry_val(*pl1e) & _PAGE_PRESENT) )
70 __flush_tlb_one(v);
71 *pl1e = mk_l1_pgentry(p|flags);
73 v += 1 << L1_PAGETABLE_SHIFT;
74 p += 1 << L1_PAGETABLE_SHIFT;
75 s -= 1 << L1_PAGETABLE_SHIFT;
76 }
77 }
79 return 0;
80 }
82 void __set_fixmap(
83 enum fixed_addresses idx, unsigned long p, unsigned long flags)
84 {
85 if ( unlikely(idx >= __end_of_fixed_addresses) )
86 BUG();
87 map_pages(idle_pg_table, fix_to_virt(idx), p, PAGE_SIZE, flags);
88 }
91 void __init paging_init(void)
92 {
93 void *ioremap_pt;
94 unsigned long v, l2e;
95 struct pfn_info *pg;
97 /* Allocate and map the machine-to-phys table. */
98 if ( (pg = alloc_domheap_pages(NULL, 10)) == NULL )
99 panic("Not enough memory to bootstrap Xen.\n");
100 m2p_start_mfn = page_to_pfn(pg);
101 idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
102 mk_l2_pgentry(page_to_phys(pg) | __PAGE_HYPERVISOR | _PAGE_PSE);
104 /* Xen 4MB mappings can all be GLOBAL. */
105 if ( cpu_has_pge )
106 {
107 for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
108 {
109 l2e = l2_pgentry_val(idle_pg_table[v >> L2_PAGETABLE_SHIFT]);
110 if ( l2e & _PAGE_PSE )
111 l2e |= _PAGE_GLOBAL;
112 idle_pg_table[v >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(l2e);
113 }
114 }
116 /* Create page table for ioremap(). */
117 ioremap_pt = (void *)alloc_xenheap_page();
118 clear_page(ioremap_pt);
119 idle_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] =
120 mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
122 /* Create read-only mapping of MPT for guest-OS use. */
123 idle_pg_table[RO_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
124 mk_l2_pgentry(l2_pgentry_val(
125 idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT]) &
126 ~_PAGE_RW);
128 /* Set up mapping cache for domain pages. */
129 mapcache = (unsigned long *)alloc_xenheap_page();
130 clear_page(mapcache);
131 idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] =
132 mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
134 /* Set up linear page table mapping. */
135 idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
136 mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
137 }
139 void __init zap_low_mappings(void)
140 {
141 int i;
142 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
143 idle_pg_table[i] = mk_l2_pgentry(0);
144 flush_tlb_all_pge();
145 }
148 /*
149 * Allows shooting down of borrowed page-table use on specific CPUs.
150 * Specifically, we borrow page tables when running the idle domain.
151 */
152 static void __synchronise_pagetables(void *mask)
153 {
154 struct exec_domain *ed = current;
155 if ( ((unsigned long)mask & (1 << ed->processor)) &&
156 is_idle_task(ed->domain) )
157 write_ptbase(&ed->mm);
158 }
159 void synchronise_pagetables(unsigned long cpu_mask)
160 {
161 __synchronise_pagetables((void *)cpu_mask);
162 smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
163 }
165 long do_stack_switch(unsigned long ss, unsigned long esp)
166 {
167 int nr = smp_processor_id();
168 struct tss_struct *t = &init_tss[nr];
170 /* We need to do this check as we load and use SS on guest's behalf. */
171 if ( (ss & 3) == 0 )
172 return -EPERM;
174 current->thread.guestos_ss = ss;
175 current->thread.guestos_sp = esp;
176 t->ss1 = ss;
177 t->esp1 = esp;
179 return 0;
180 }
183 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
184 int check_descriptor(unsigned long *d)
185 {
186 unsigned long base, limit, a = d[0], b = d[1];
188 /* A not-present descriptor will always fault, so is safe. */
189 if ( !(b & _SEGMENT_P) )
190 goto good;
192 /*
193 * We don't allow a DPL of zero. There is no legitimate reason for
194 * specifying DPL==0, and it gets rather dangerous if we also accept call
195 * gates (consider a call gate pointing at another guestos descriptor with
196 * DPL 0 -- this would get the OS ring-0 privileges).
197 */
198 if ( (b & _SEGMENT_DPL) == 0 )
199 goto bad;
201 if ( !(b & _SEGMENT_S) )
202 {
203 /*
204 * System segment:
205 * 1. Don't allow interrupt or trap gates as they belong in the IDT.
206 * 2. Don't allow TSS descriptors or task gates as we don't
207 * virtualise x86 tasks.
208 * 3. Don't allow LDT descriptors because they're unnecessary and
209 * I'm uneasy about allowing an LDT page to contain LDT
210 * descriptors. In any case, Xen automatically creates the
211 * required descriptor when reloading the LDT register.
212 * 4. We allow call gates but they must not jump to a private segment.
213 */
215 /* Disallow everything but call gates. */
216 if ( (b & _SEGMENT_TYPE) != 0xc00 )
217 goto bad;
219 /* Can't allow far jump to a Xen-private segment. */
220 if ( !VALID_CODESEL(a>>16) )
221 goto bad;
223 /* Reserved bits must be zero. */
224 if ( (b & 0xe0) != 0 )
225 goto bad;
227 /* No base/limit check is needed for a call gate. */
228 goto good;
229 }
231 /* Check that base is at least a page away from Xen-private area. */
232 base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
233 if ( base >= (PAGE_OFFSET - PAGE_SIZE) )
234 goto bad;
236 /* Check and truncate the limit if necessary. */
237 limit = (b&0xf0000) | (a&0xffff);
238 limit++; /* We add one because limit is inclusive. */
239 if ( (b & _SEGMENT_G) )
240 limit <<= 12;
242 if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
243 {
244 /*
245 * Grows-down limit check.
246 * NB. limit == 0xFFFFF provides no access (if G=1).
247 * limit == 0x00000 provides 4GB-4kB access (if G=1).
248 */
249 if ( (base + limit) > base )
250 {
251 limit = -(base & PAGE_MASK);
252 goto truncate;
253 }
254 }
255 else
256 {
257 /*
258 * Grows-up limit check.
259 * NB. limit == 0xFFFFF provides 4GB access (if G=1).
260 * limit == 0x00000 provides 4kB access (if G=1).
261 */
262 if ( ((base + limit) <= base) ||
263 ((base + limit) > PAGE_OFFSET) )
264 {
265 limit = PAGE_OFFSET - base;
266 truncate:
267 if ( !(b & _SEGMENT_G) )
268 goto bad; /* too dangerous; too hard to work out... */
269 limit = (limit >> 12) - 1;
270 d[0] &= ~0x0ffff; d[0] |= limit & 0x0ffff;
271 d[1] &= ~0xf0000; d[1] |= limit & 0xf0000;
272 }
273 }
275 good:
276 return 1;
277 bad:
278 return 0;
279 }
282 void destroy_gdt(struct exec_domain *ed)
283 {
284 int i;
285 unsigned long pfn;
287 for ( i = 0; i < 16; i++ )
288 {
289 if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[i])) != 0 )
290 put_page_and_type(&frame_table[pfn]);
291 ed->mm.perdomain_ptes[i] = mk_l1_pgentry(0);
292 }
293 }
296 long set_gdt(struct exec_domain *ed,
297 unsigned long *frames,
298 unsigned int entries)
299 {
300 struct domain *d = ed->domain;
301 /* NB. There are 512 8-byte entries per GDT page. */
302 int i = 0, nr_pages = (entries + 511) / 512;
303 struct desc_struct *vgdt;
304 unsigned long pfn;
306 /* Check the first page in the new GDT. */
307 if ( (pfn = frames[0]) >= max_page )
308 goto fail;
310 /* The first page is special because Xen owns a range of entries in it. */
311 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
312 {
313 /* GDT checks failed: try zapping the Xen reserved entries. */
314 if ( !get_page_and_type(&frame_table[pfn], d, PGT_writable_page) )
315 goto fail;
316 vgdt = map_domain_mem(pfn << PAGE_SHIFT);
317 memset(vgdt + FIRST_RESERVED_GDT_ENTRY, 0,
318 NR_RESERVED_GDT_ENTRIES*8);
319 unmap_domain_mem(vgdt);
320 put_page_and_type(&frame_table[pfn]);
322 /* Okay, we zapped the entries. Now try the GDT checks again. */
323 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
324 goto fail;
325 }
327 /* Check the remaining pages in the new GDT. */
328 for ( i = 1; i < nr_pages; i++ )
329 if ( ((pfn = frames[i]) >= max_page) ||
330 !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
331 goto fail;
333 /* Copy reserved GDT entries to the new GDT. */
334 vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
335 memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
336 gdt_table + FIRST_RESERVED_GDT_ENTRY,
337 NR_RESERVED_GDT_ENTRIES*8);
338 unmap_domain_mem(vgdt);
340 /* Tear down the old GDT. */
341 destroy_gdt(ed);
343 /* Install the new GDT. */
344 for ( i = 0; i < nr_pages; i++ )
345 ed->mm.perdomain_ptes[i] =
346 mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
348 SET_GDT_ADDRESS(ed, GDT_VIRT_START(ed));
349 SET_GDT_ENTRIES(ed, entries);
351 return 0;
353 fail:
354 while ( i-- > 0 )
355 put_page_and_type(&frame_table[frames[i]]);
356 return -EINVAL;
357 }
360 long do_set_gdt(unsigned long *frame_list, unsigned int entries)
361 {
362 int nr_pages = (entries + 511) / 512;
363 unsigned long frames[16];
364 long ret;
366 if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
367 return -EINVAL;
369 if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
370 return -EFAULT;
372 LOCK_BIGLOCK(current->domain);
374 if ( (ret = set_gdt(current, frames, entries)) == 0 )
375 {
376 local_flush_tlb();
377 __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
378 }
380 UNLOCK_BIGLOCK(current->domain);
382 return ret;
383 }
386 long do_update_descriptor(
387 unsigned long pa, unsigned long word1, unsigned long word2)
388 {
389 unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
390 struct pfn_info *page;
391 struct exec_domain *ed;
392 long ret = -EINVAL;
394 d[0] = word1;
395 d[1] = word2;
397 LOCK_BIGLOCK(current->domain);
399 if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) ) {
400 UNLOCK_BIGLOCK(current->domain);
401 return -EINVAL;
402 }
404 page = &frame_table[pfn];
405 if ( unlikely(!get_page(page, current->domain)) ) {
406 UNLOCK_BIGLOCK(current->domain);
407 return -EINVAL;
408 }
410 /* Check if the given frame is in use in an unsafe context. */
411 switch ( page->u.inuse.type_info & PGT_type_mask )
412 {
413 case PGT_gdt_page:
414 /* Disallow updates of Xen-reserved descriptors in the current GDT. */
415 for_each_exec_domain(current->domain, ed) {
416 if ( (l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[0]) == pfn) &&
417 (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
418 (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
419 goto out;
420 }
421 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
422 goto out;
423 break;
424 case PGT_ldt_page:
425 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
426 goto out;
427 break;
428 default:
429 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
430 goto out;
431 break;
432 }
434 /* All is good so make the update. */
435 gdt_pent = map_domain_mem(pa);
436 memcpy(gdt_pent, d, 8);
437 unmap_domain_mem(gdt_pent);
439 put_page_type(page);
441 ret = 0; /* success */
443 out:
444 put_page(page);
446 UNLOCK_BIGLOCK(current->domain);
448 return ret;
449 }
451 #ifdef MEMORY_GUARD
453 void *memguard_init(void *heap_start)
454 {
455 l1_pgentry_t *l1;
456 int i, j;
458 /* Round the allocation pointer up to a page boundary. */
459 heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
460 PAGE_MASK);
462 /* Memory guarding is incompatible with super pages. */
463 for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
464 {
465 l1 = (l1_pgentry_t *)heap_start;
466 heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
467 for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
468 l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
469 (j << L1_PAGETABLE_SHIFT) |
470 __PAGE_HYPERVISOR);
471 idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
472 mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
473 }
475 return heap_start;
476 }
478 static void __memguard_change_range(void *p, unsigned long l, int guard)
479 {
480 l1_pgentry_t *l1;
481 l2_pgentry_t *l2;
482 unsigned long _p = (unsigned long)p;
483 unsigned long _l = (unsigned long)l;
485 /* Ensure we are dealing with a page-aligned whole number of pages. */
486 ASSERT((_p&PAGE_MASK) != 0);
487 ASSERT((_l&PAGE_MASK) != 0);
488 ASSERT((_p&~PAGE_MASK) == 0);
489 ASSERT((_l&~PAGE_MASK) == 0);
491 while ( _l != 0 )
492 {
493 l2 = &idle_pg_table[l2_table_offset(_p)];
494 l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
495 if ( guard )
496 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
497 else
498 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
499 _p += PAGE_SIZE;
500 _l -= PAGE_SIZE;
501 }
502 }
504 void memguard_guard_range(void *p, unsigned long l)
505 {
506 __memguard_change_range(p, l, 1);
507 local_flush_tlb();
508 }
510 void memguard_unguard_range(void *p, unsigned long l)
511 {
512 __memguard_change_range(p, l, 0);
513 }
515 #endif