debuggers.hg

view xen/arch/x86/x86_32/mm.c @ 3668:d55d523078f7

bitkeeper revision 1.1159.212.77 (4202221693AFbvFZWeMHHIjQfbzTIQ)

More x86_64 prgress. Many more gaps filled in. Next step is DOM0
construction.
Signed-off-by: keir.fraser@cl.cam.ac.uk
author kaf24@scramble.cl.cam.ac.uk
date Thu Feb 03 13:07:34 2005 +0000 (2005-02-03)
parents fec8b1778268
children 677cb76cff18
line source
1 /******************************************************************************
2 * arch/x86/x86_32/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <asm/page.h>
26 #include <asm/flushtlb.h>
27 #include <asm/fixmap.h>
28 #include <asm/domain_page.h>
30 /* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
31 int map_pages(
32 pagetable_t *pt,
33 unsigned long v,
34 unsigned long p,
35 unsigned long s,
36 unsigned long flags)
37 {
38 l2_pgentry_t *pl2e;
39 l1_pgentry_t *pl1e;
40 void *newpg;
42 while ( s != 0 )
43 {
44 pl2e = &pt[l2_table_offset(v)];
46 if ( ((s|v|p) & ((1<<L2_PAGETABLE_SHIFT)-1)) == 0 )
47 {
48 /* Super-page mapping. */
49 if ( (l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
50 __flush_tlb_pge();
51 *pl2e = mk_l2_pgentry(p|flags|_PAGE_PSE);
53 v += 1 << L2_PAGETABLE_SHIFT;
54 p += 1 << L2_PAGETABLE_SHIFT;
55 s -= 1 << L2_PAGETABLE_SHIFT;
56 }
57 else
58 {
59 /* Normal page mapping. */
60 if ( !(l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
61 {
62 newpg = (void *)alloc_xenheap_page();
63 clear_page(newpg);
64 *pl2e = mk_l2_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
65 }
66 pl1e = l2_pgentry_to_l1(*pl2e) + l1_table_offset(v);
67 if ( (l1_pgentry_val(*pl1e) & _PAGE_PRESENT) )
68 __flush_tlb_one(v);
69 *pl1e = mk_l1_pgentry(p|flags);
71 v += 1 << L1_PAGETABLE_SHIFT;
72 p += 1 << L1_PAGETABLE_SHIFT;
73 s -= 1 << L1_PAGETABLE_SHIFT;
74 }
75 }
77 return 0;
78 }
80 void __set_fixmap(
81 enum fixed_addresses idx, unsigned long p, unsigned long flags)
82 {
83 if ( unlikely(idx >= __end_of_fixed_addresses) )
84 BUG();
85 map_pages(idle_pg_table, fix_to_virt(idx), p, PAGE_SIZE, flags);
86 }
89 void __init paging_init(void)
90 {
91 void *ioremap_pt;
92 unsigned long v, l2e;
93 struct pfn_info *pg;
95 /* Allocate and map the machine-to-phys table. */
96 if ( (pg = alloc_domheap_pages(NULL, 10)) == NULL )
97 panic("Not enough memory to bootstrap Xen.\n");
98 idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)] =
99 mk_l2_pgentry(page_to_phys(pg) | __PAGE_HYPERVISOR | _PAGE_PSE);
100 memset((void *)RDWR_MPT_VIRT_START, 0x55, 4UL << 20);
102 /* Xen 4MB mappings can all be GLOBAL. */
103 if ( cpu_has_pge )
104 {
105 for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
106 {
107 l2e = l2_pgentry_val(idle_pg_table[l2_table_offset(v)]);
108 if ( l2e & _PAGE_PSE )
109 l2e |= _PAGE_GLOBAL;
110 idle_pg_table[v >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(l2e);
111 }
112 }
114 /* Create page table for ioremap(). */
115 ioremap_pt = (void *)alloc_xenheap_page();
116 clear_page(ioremap_pt);
117 idle_pg_table[l2_table_offset(IOREMAP_VIRT_START)] =
118 mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
120 /* Create read-only mapping of MPT for guest-OS use. */
121 idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)] =
122 mk_l2_pgentry(l2_pgentry_val(
123 idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]) & ~_PAGE_RW);
125 /* Set up mapping cache for domain pages. */
126 mapcache = (unsigned long *)alloc_xenheap_page();
127 clear_page(mapcache);
128 idle_pg_table[l2_table_offset(MAPCACHE_VIRT_START)] =
129 mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
131 /* Set up linear page table mapping. */
132 idle_pg_table[l2_table_offset(LINEAR_PT_VIRT_START)] =
133 mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
134 }
136 void __init zap_low_mappings(void)
137 {
138 int i;
139 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
140 idle_pg_table[i] = mk_l2_pgentry(0);
141 flush_tlb_all_pge();
142 }
144 void subarch_init_memory(struct domain *dom_xen)
145 {
146 unsigned long i, m2p_start_mfn;
148 /*
149 * We are rather picky about the layout of 'struct pfn_info'. The
150 * count_info and domain fields must be adjacent, as we perform atomic
151 * 64-bit operations on them. Also, just for sanity, we assert the size
152 * of the structure here.
153 */
154 if ( (offsetof(struct pfn_info, u.inuse.domain) !=
155 (offsetof(struct pfn_info, count_info) + sizeof(u32))) ||
156 (sizeof(struct pfn_info) != 24) )
157 {
158 printk("Weird pfn_info layout (%ld,%ld,%d)\n",
159 offsetof(struct pfn_info, count_info),
160 offsetof(struct pfn_info, u.inuse.domain),
161 sizeof(struct pfn_info));
162 for ( ; ; ) ;
163 }
165 /* M2P table is mappable read-only by privileged domains. */
166 m2p_start_mfn = l2_pgentry_to_pagenr(
167 idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]);
168 for ( i = 0; i < 1024; i++ )
169 {
170 frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
171 /* gdt to make sure it's only mapped read-only by non-privileged
172 domains. */
173 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
174 frame_table[m2p_start_mfn+i].u.inuse.domain = dom_xen;
175 }
176 }
178 /*
179 * Allows shooting down of borrowed page-table use on specific CPUs.
180 * Specifically, we borrow page tables when running the idle domain.
181 */
182 static void __synchronise_pagetables(void *mask)
183 {
184 struct exec_domain *ed = current;
185 if ( ((unsigned long)mask & (1 << ed->processor)) &&
186 is_idle_task(ed->domain) )
187 write_ptbase(&ed->mm);
188 }
189 void synchronise_pagetables(unsigned long cpu_mask)
190 {
191 __synchronise_pagetables((void *)cpu_mask);
192 smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
193 }
195 long do_stack_switch(unsigned long ss, unsigned long esp)
196 {
197 int nr = smp_processor_id();
198 struct tss_struct *t = &init_tss[nr];
200 /* We need to do this check as we load and use SS on guest's behalf. */
201 if ( (ss & 3) == 0 )
202 return -EPERM;
204 current->thread.guestos_ss = ss;
205 current->thread.guestos_sp = esp;
206 t->ss1 = ss;
207 t->esp1 = esp;
209 return 0;
210 }
213 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
214 int check_descriptor(unsigned long *d)
215 {
216 unsigned long base, limit, a = d[0], b = d[1];
218 /* A not-present descriptor will always fault, so is safe. */
219 if ( !(b & _SEGMENT_P) )
220 goto good;
222 /*
223 * We don't allow a DPL of zero. There is no legitimate reason for
224 * specifying DPL==0, and it gets rather dangerous if we also accept call
225 * gates (consider a call gate pointing at another guestos descriptor with
226 * DPL 0 -- this would get the OS ring-0 privileges).
227 */
228 if ( (b & _SEGMENT_DPL) == 0 )
229 goto bad;
231 if ( !(b & _SEGMENT_S) )
232 {
233 /*
234 * System segment:
235 * 1. Don't allow interrupt or trap gates as they belong in the IDT.
236 * 2. Don't allow TSS descriptors or task gates as we don't
237 * virtualise x86 tasks.
238 * 3. Don't allow LDT descriptors because they're unnecessary and
239 * I'm uneasy about allowing an LDT page to contain LDT
240 * descriptors. In any case, Xen automatically creates the
241 * required descriptor when reloading the LDT register.
242 * 4. We allow call gates but they must not jump to a private segment.
243 */
245 /* Disallow everything but call gates. */
246 if ( (b & _SEGMENT_TYPE) != 0xc00 )
247 goto bad;
249 /* Can't allow far jump to a Xen-private segment. */
250 if ( !VALID_CODESEL(a>>16) )
251 goto bad;
253 /* Reserved bits must be zero. */
254 if ( (b & 0xe0) != 0 )
255 goto bad;
257 /* No base/limit check is needed for a call gate. */
258 goto good;
259 }
261 /* Check that base is at least a page away from Xen-private area. */
262 base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
263 if ( base >= (PAGE_OFFSET - PAGE_SIZE) )
264 goto bad;
266 /* Check and truncate the limit if necessary. */
267 limit = (b&0xf0000) | (a&0xffff);
268 limit++; /* We add one because limit is inclusive. */
269 if ( (b & _SEGMENT_G) )
270 limit <<= 12;
272 if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
273 {
274 /*
275 * Grows-down limit check.
276 * NB. limit == 0xFFFFF provides no access (if G=1).
277 * limit == 0x00000 provides 4GB-4kB access (if G=1).
278 */
279 if ( (base + limit) > base )
280 {
281 limit = -(base & PAGE_MASK);
282 goto truncate;
283 }
284 }
285 else
286 {
287 /*
288 * Grows-up limit check.
289 * NB. limit == 0xFFFFF provides 4GB access (if G=1).
290 * limit == 0x00000 provides 4kB access (if G=1).
291 */
292 if ( ((base + limit) <= base) ||
293 ((base + limit) > PAGE_OFFSET) )
294 {
295 limit = PAGE_OFFSET - base;
296 truncate:
297 if ( !(b & _SEGMENT_G) )
298 goto bad; /* too dangerous; too hard to work out... */
299 limit = (limit >> 12) - 1;
300 d[0] &= ~0x0ffff; d[0] |= limit & 0x0ffff;
301 d[1] &= ~0xf0000; d[1] |= limit & 0xf0000;
302 }
303 }
305 good:
306 return 1;
307 bad:
308 return 0;
309 }
312 void destroy_gdt(struct exec_domain *ed)
313 {
314 int i;
315 unsigned long pfn;
317 for ( i = 0; i < 16; i++ )
318 {
319 if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[i])) != 0 )
320 put_page_and_type(&frame_table[pfn]);
321 ed->mm.perdomain_ptes[i] = mk_l1_pgentry(0);
322 }
323 }
326 long set_gdt(struct exec_domain *ed,
327 unsigned long *frames,
328 unsigned int entries)
329 {
330 struct domain *d = ed->domain;
331 /* NB. There are 512 8-byte entries per GDT page. */
332 int i = 0, nr_pages = (entries + 511) / 512;
333 struct desc_struct *vgdt;
334 unsigned long pfn;
336 /* Check the first page in the new GDT. */
337 if ( (pfn = frames[0]) >= max_page )
338 goto fail;
340 /* The first page is special because Xen owns a range of entries in it. */
341 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
342 {
343 /* GDT checks failed: try zapping the Xen reserved entries. */
344 if ( !get_page_and_type(&frame_table[pfn], d, PGT_writable_page) )
345 goto fail;
346 vgdt = map_domain_mem(pfn << PAGE_SHIFT);
347 memset(vgdt + FIRST_RESERVED_GDT_ENTRY, 0,
348 NR_RESERVED_GDT_ENTRIES*8);
349 unmap_domain_mem(vgdt);
350 put_page_and_type(&frame_table[pfn]);
352 /* Okay, we zapped the entries. Now try the GDT checks again. */
353 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
354 goto fail;
355 }
357 /* Check the remaining pages in the new GDT. */
358 for ( i = 1; i < nr_pages; i++ )
359 if ( ((pfn = frames[i]) >= max_page) ||
360 !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
361 goto fail;
363 /* Copy reserved GDT entries to the new GDT. */
364 vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
365 memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
366 gdt_table + FIRST_RESERVED_GDT_ENTRY,
367 NR_RESERVED_GDT_ENTRIES*8);
368 unmap_domain_mem(vgdt);
370 /* Tear down the old GDT. */
371 destroy_gdt(ed);
373 /* Install the new GDT. */
374 for ( i = 0; i < nr_pages; i++ )
375 ed->mm.perdomain_ptes[i] =
376 mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
378 SET_GDT_ADDRESS(ed, GDT_VIRT_START(ed));
379 SET_GDT_ENTRIES(ed, entries);
381 return 0;
383 fail:
384 while ( i-- > 0 )
385 put_page_and_type(&frame_table[frames[i]]);
386 return -EINVAL;
387 }
390 long do_set_gdt(unsigned long *frame_list, unsigned int entries)
391 {
392 int nr_pages = (entries + 511) / 512;
393 unsigned long frames[16];
394 long ret;
396 if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
397 return -EINVAL;
399 if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
400 return -EFAULT;
402 LOCK_BIGLOCK(current->domain);
404 if ( (ret = set_gdt(current, frames, entries)) == 0 )
405 {
406 local_flush_tlb();
407 __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
408 }
410 UNLOCK_BIGLOCK(current->domain);
412 return ret;
413 }
416 long do_update_descriptor(
417 unsigned long pa, unsigned long word1, unsigned long word2)
418 {
419 unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
420 struct pfn_info *page;
421 struct exec_domain *ed;
422 long ret = -EINVAL;
424 d[0] = word1;
425 d[1] = word2;
427 LOCK_BIGLOCK(current->domain);
429 if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) ) {
430 UNLOCK_BIGLOCK(current->domain);
431 return -EINVAL;
432 }
434 page = &frame_table[pfn];
435 if ( unlikely(!get_page(page, current->domain)) ) {
436 UNLOCK_BIGLOCK(current->domain);
437 return -EINVAL;
438 }
440 /* Check if the given frame is in use in an unsafe context. */
441 switch ( page->u.inuse.type_info & PGT_type_mask )
442 {
443 case PGT_gdt_page:
444 /* Disallow updates of Xen-reserved descriptors in the current GDT. */
445 for_each_exec_domain(current->domain, ed) {
446 if ( (l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[0]) == pfn) &&
447 (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
448 (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
449 goto out;
450 }
451 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
452 goto out;
453 break;
454 case PGT_ldt_page:
455 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
456 goto out;
457 break;
458 default:
459 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
460 goto out;
461 break;
462 }
464 /* All is good so make the update. */
465 gdt_pent = map_domain_mem(pa);
466 memcpy(gdt_pent, d, 8);
467 unmap_domain_mem(gdt_pent);
469 put_page_type(page);
471 ret = 0; /* success */
473 out:
474 put_page(page);
476 UNLOCK_BIGLOCK(current->domain);
478 return ret;
479 }
481 #ifdef MEMORY_GUARD
483 void *memguard_init(void *heap_start)
484 {
485 l1_pgentry_t *l1;
486 int i, j;
488 /* Round the allocation pointer up to a page boundary. */
489 heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
490 PAGE_MASK);
492 /* Memory guarding is incompatible with super pages. */
493 for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
494 {
495 l1 = (l1_pgentry_t *)heap_start;
496 heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
497 for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
498 l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
499 (j << L1_PAGETABLE_SHIFT) |
500 __PAGE_HYPERVISOR);
501 idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
502 mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
503 }
505 return heap_start;
506 }
508 static void __memguard_change_range(void *p, unsigned long l, int guard)
509 {
510 l1_pgentry_t *l1;
511 l2_pgentry_t *l2;
512 unsigned long _p = (unsigned long)p;
513 unsigned long _l = (unsigned long)l;
515 /* Ensure we are dealing with a page-aligned whole number of pages. */
516 ASSERT((_p&PAGE_MASK) != 0);
517 ASSERT((_l&PAGE_MASK) != 0);
518 ASSERT((_p&~PAGE_MASK) == 0);
519 ASSERT((_l&~PAGE_MASK) == 0);
521 while ( _l != 0 )
522 {
523 l2 = &idle_pg_table[l2_table_offset(_p)];
524 l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
525 if ( guard )
526 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
527 else
528 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
529 _p += PAGE_SIZE;
530 _l -= PAGE_SIZE;
531 }
532 }
534 void memguard_guard_range(void *p, unsigned long l)
535 {
536 __memguard_change_range(p, l, 1);
537 local_flush_tlb();
538 }
540 void memguard_unguard_range(void *p, unsigned long l)
541 {
542 __memguard_change_range(p, l, 0);
543 }
545 #endif