debuggers.hg

view xen/arch/x86/x86_32/mm.c @ 3726:88957a238191

bitkeeper revision 1.1159.1.544 (4207248crq3YxiyLWjUehtHv_Yd3tg)

Merge tempest.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xeno.bk
into tempest.cl.cam.ac.uk:/local/scratch/smh22/xen-unstable.bk
author smh22@tempest.cl.cam.ac.uk
date Mon Feb 07 08:19:24 2005 +0000 (2005-02-07)
parents bbe8541361dd 253e8e10e986
children f5f2757b3aa2
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /******************************************************************************
3 * arch/x86/x86_32/mm.c
4 *
5 * Modifications to Linux original are copyright (c) 2004, K A Fraser
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 #include <xen/config.h>
23 #include <xen/lib.h>
24 #include <xen/init.h>
25 #include <xen/mm.h>
26 #include <asm/page.h>
27 #include <asm/flushtlb.h>
28 #include <asm/fixmap.h>
29 #include <asm/domain_page.h>
31 /* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
32 int map_pages(
33 pagetable_t *pt,
34 unsigned long v,
35 unsigned long p,
36 unsigned long s,
37 unsigned long flags)
38 {
39 l2_pgentry_t *pl2e;
40 l1_pgentry_t *pl1e;
41 void *newpg;
43 while ( s != 0 )
44 {
45 pl2e = &pt[l2_table_offset(v)];
47 if ( ((s|v|p) & ((1<<L2_PAGETABLE_SHIFT)-1)) == 0 )
48 {
49 /* Super-page mapping. */
50 if ( (l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
51 __flush_tlb_pge();
52 *pl2e = mk_l2_pgentry(p|flags|_PAGE_PSE);
54 v += 1 << L2_PAGETABLE_SHIFT;
55 p += 1 << L2_PAGETABLE_SHIFT;
56 s -= 1 << L2_PAGETABLE_SHIFT;
57 }
58 else
59 {
60 /* Normal page mapping. */
61 if ( !(l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
62 {
63 newpg = (void *)alloc_xenheap_page();
64 clear_page(newpg);
65 *pl2e = mk_l2_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
66 }
67 pl1e = l2_pgentry_to_l1(*pl2e) + l1_table_offset(v);
68 if ( (l1_pgentry_val(*pl1e) & _PAGE_PRESENT) )
69 __flush_tlb_one(v);
70 *pl1e = mk_l1_pgentry(p|flags);
72 v += 1 << L1_PAGETABLE_SHIFT;
73 p += 1 << L1_PAGETABLE_SHIFT;
74 s -= 1 << L1_PAGETABLE_SHIFT;
75 }
76 }
78 return 0;
79 }
81 void __set_fixmap(
82 enum fixed_addresses idx, unsigned long p, unsigned long flags)
83 {
84 if ( unlikely(idx >= __end_of_fixed_addresses) )
85 BUG();
86 map_pages(idle_pg_table, fix_to_virt(idx), p, PAGE_SIZE, flags);
87 }
90 void __init paging_init(void)
91 {
92 void *ioremap_pt;
93 unsigned long v, l2e;
94 struct pfn_info *pg;
96 /* Allocate and map the machine-to-phys table. */
97 if ( (pg = alloc_domheap_pages(NULL, 10)) == NULL )
98 panic("Not enough memory to bootstrap Xen.\n");
99 idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)] =
100 mk_l2_pgentry(page_to_phys(pg) | __PAGE_HYPERVISOR | _PAGE_PSE);
101 memset((void *)RDWR_MPT_VIRT_START, 0x55, 4UL << 20);
103 /* Xen 4MB mappings can all be GLOBAL. */
104 if ( cpu_has_pge )
105 {
106 for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
107 {
108 l2e = l2_pgentry_val(idle_pg_table[l2_table_offset(v)]);
109 if ( l2e & _PAGE_PSE )
110 l2e |= _PAGE_GLOBAL;
111 idle_pg_table[v >> L2_PAGETABLE_SHIFT] = mk_l2_pgentry(l2e);
112 }
113 }
115 /* Create page table for ioremap(). */
116 ioremap_pt = (void *)alloc_xenheap_page();
117 clear_page(ioremap_pt);
118 idle_pg_table[l2_table_offset(IOREMAP_VIRT_START)] =
119 mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
121 /* Create read-only mapping of MPT for guest-OS use. */
122 idle_pg_table[l2_table_offset(RO_MPT_VIRT_START)] =
123 mk_l2_pgentry(l2_pgentry_val(
124 idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]) & ~_PAGE_RW);
126 /* Set up mapping cache for domain pages. */
127 mapcache = (unsigned long *)alloc_xenheap_page();
128 clear_page(mapcache);
129 idle_pg_table[l2_table_offset(MAPCACHE_VIRT_START)] =
130 mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
132 /* Set up linear page table mapping. */
133 idle_pg_table[l2_table_offset(LINEAR_PT_VIRT_START)] =
134 mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
135 }
137 void __init zap_low_mappings(void)
138 {
139 int i;
140 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
141 idle_pg_table[i] = mk_l2_pgentry(0);
142 flush_tlb_all_pge();
143 }
145 void subarch_init_memory(struct domain *dom_xen)
146 {
147 unsigned long i, m2p_start_mfn;
149 /*
150 * We are rather picky about the layout of 'struct pfn_info'. The
151 * count_info and domain fields must be adjacent, as we perform atomic
152 * 64-bit operations on them. Also, just for sanity, we assert the size
153 * of the structure here.
154 */
155 if ( (offsetof(struct pfn_info, u.inuse._domain) !=
156 (offsetof(struct pfn_info, count_info) + sizeof(u32))) ||
157 (sizeof(struct pfn_info) != 24) )
158 {
159 printk("Weird pfn_info layout (%ld,%ld,%d)\n",
160 offsetof(struct pfn_info, count_info),
161 offsetof(struct pfn_info, u.inuse._domain),
162 sizeof(struct pfn_info));
163 for ( ; ; ) ;
164 }
166 /* M2P table is mappable read-only by privileged domains. */
167 m2p_start_mfn = l2_pgentry_to_pagenr(
168 idle_pg_table[l2_table_offset(RDWR_MPT_VIRT_START)]);
169 for ( i = 0; i < 1024; i++ )
170 {
171 frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
172 /* gdt to make sure it's only mapped read-only by non-privileged
173 domains. */
174 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
175 page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
176 }
177 }
179 /*
180 * Allows shooting down of borrowed page-table use on specific CPUs.
181 * Specifically, we borrow page tables when running the idle domain.
182 */
183 static void __synchronise_pagetables(void *mask)
184 {
185 struct exec_domain *ed = current;
186 if ( ((unsigned long)mask & (1 << ed->processor)) &&
187 is_idle_task(ed->domain) )
188 write_ptbase(ed);
189 }
190 void synchronise_pagetables(unsigned long cpu_mask)
191 {
192 __synchronise_pagetables((void *)cpu_mask);
193 smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
194 }
196 long do_stack_switch(unsigned long ss, unsigned long esp)
197 {
198 int nr = smp_processor_id();
199 struct tss_struct *t = &init_tss[nr];
201 /* We need to do this check as we load and use SS on guest's behalf. */
202 if ( (ss & 3) == 0 )
203 return -EPERM;
205 current->arch.guestos_ss = ss;
206 current->arch.guestos_sp = esp;
207 t->ss1 = ss;
208 t->esp1 = esp;
210 return 0;
211 }
214 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
215 int check_descriptor(unsigned long *d)
216 {
217 unsigned long base, limit, a = d[0], b = d[1];
219 /* A not-present descriptor will always fault, so is safe. */
220 if ( !(b & _SEGMENT_P) )
221 goto good;
223 /*
224 * We don't allow a DPL of zero. There is no legitimate reason for
225 * specifying DPL==0, and it gets rather dangerous if we also accept call
226 * gates (consider a call gate pointing at another guestos descriptor with
227 * DPL 0 -- this would get the OS ring-0 privileges).
228 */
229 if ( (b & _SEGMENT_DPL) == 0 )
230 goto bad;
232 if ( !(b & _SEGMENT_S) )
233 {
234 /*
235 * System segment:
236 * 1. Don't allow interrupt or trap gates as they belong in the IDT.
237 * 2. Don't allow TSS descriptors or task gates as we don't
238 * virtualise x86 tasks.
239 * 3. Don't allow LDT descriptors because they're unnecessary and
240 * I'm uneasy about allowing an LDT page to contain LDT
241 * descriptors. In any case, Xen automatically creates the
242 * required descriptor when reloading the LDT register.
243 * 4. We allow call gates but they must not jump to a private segment.
244 */
246 /* Disallow everything but call gates. */
247 if ( (b & _SEGMENT_TYPE) != 0xc00 )
248 goto bad;
250 /* Can't allow far jump to a Xen-private segment. */
251 if ( !VALID_CODESEL(a>>16) )
252 goto bad;
254 /* Reserved bits must be zero. */
255 if ( (b & 0xe0) != 0 )
256 goto bad;
258 /* No base/limit check is needed for a call gate. */
259 goto good;
260 }
262 /* Check that base is at least a page away from Xen-private area. */
263 base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
264 if ( base >= (PAGE_OFFSET - PAGE_SIZE) )
265 goto bad;
267 /* Check and truncate the limit if necessary. */
268 limit = (b&0xf0000) | (a&0xffff);
269 limit++; /* We add one because limit is inclusive. */
270 if ( (b & _SEGMENT_G) )
271 limit <<= 12;
273 if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
274 {
275 /*
276 * Grows-down limit check.
277 * NB. limit == 0xFFFFF provides no access (if G=1).
278 * limit == 0x00000 provides 4GB-4kB access (if G=1).
279 */
280 if ( (base + limit) > base )
281 {
282 limit = -(base & PAGE_MASK);
283 goto truncate;
284 }
285 }
286 else
287 {
288 /*
289 * Grows-up limit check.
290 * NB. limit == 0xFFFFF provides 4GB access (if G=1).
291 * limit == 0x00000 provides 4kB access (if G=1).
292 */
293 if ( ((base + limit) <= base) ||
294 ((base + limit) > PAGE_OFFSET) )
295 {
296 limit = PAGE_OFFSET - base;
297 truncate:
298 if ( !(b & _SEGMENT_G) )
299 goto bad; /* too dangerous; too hard to work out... */
300 limit = (limit >> 12) - 1;
301 d[0] &= ~0x0ffff; d[0] |= limit & 0x0ffff;
302 d[1] &= ~0xf0000; d[1] |= limit & 0xf0000;
303 }
304 }
306 good:
307 return 1;
308 bad:
309 return 0;
310 }
313 void destroy_gdt(struct exec_domain *ed)
314 {
315 int i;
316 unsigned long pfn;
318 for ( i = 0; i < 16; i++ )
319 {
320 if ( (pfn = l1_pgentry_to_pagenr(ed->arch.perdomain_ptes[i])) != 0 )
321 put_page_and_type(&frame_table[pfn]);
322 ed->arch.perdomain_ptes[i] = mk_l1_pgentry(0);
323 }
324 }
327 long set_gdt(struct exec_domain *ed,
328 unsigned long *frames,
329 unsigned int entries)
330 {
331 struct domain *d = ed->domain;
332 /* NB. There are 512 8-byte entries per GDT page. */
333 int i = 0, nr_pages = (entries + 511) / 512;
334 struct desc_struct *vgdt;
335 unsigned long pfn;
337 /* Check the first page in the new GDT. */
338 if ( (pfn = frames[0]) >= max_page )
339 goto fail;
341 /* The first page is special because Xen owns a range of entries in it. */
342 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
343 {
344 /* GDT checks failed: try zapping the Xen reserved entries. */
345 if ( !get_page_and_type(&frame_table[pfn], d, PGT_writable_page) )
346 goto fail;
347 vgdt = map_domain_mem(pfn << PAGE_SHIFT);
348 memset(vgdt + FIRST_RESERVED_GDT_ENTRY, 0,
349 NR_RESERVED_GDT_ENTRIES*8);
350 unmap_domain_mem(vgdt);
351 put_page_and_type(&frame_table[pfn]);
353 /* Okay, we zapped the entries. Now try the GDT checks again. */
354 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
355 goto fail;
356 }
358 /* Check the remaining pages in the new GDT. */
359 for ( i = 1; i < nr_pages; i++ )
360 if ( ((pfn = frames[i]) >= max_page) ||
361 !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
362 goto fail;
364 /* Copy reserved GDT entries to the new GDT. */
365 vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
366 memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
367 gdt_table + FIRST_RESERVED_GDT_ENTRY,
368 NR_RESERVED_GDT_ENTRIES*8);
369 unmap_domain_mem(vgdt);
371 /* Tear down the old GDT. */
372 destroy_gdt(ed);
374 /* Install the new GDT. */
375 for ( i = 0; i < nr_pages; i++ )
376 ed->arch.perdomain_ptes[i] =
377 mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
379 SET_GDT_ADDRESS(ed, GDT_VIRT_START(ed));
380 SET_GDT_ENTRIES(ed, entries);
382 return 0;
384 fail:
385 while ( i-- > 0 )
386 put_page_and_type(&frame_table[frames[i]]);
387 return -EINVAL;
388 }
391 long do_set_gdt(unsigned long *frame_list, unsigned int entries)
392 {
393 int nr_pages = (entries + 511) / 512;
394 unsigned long frames[16];
395 long ret;
397 if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
398 return -EINVAL;
400 if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
401 return -EFAULT;
403 LOCK_BIGLOCK(current->domain);
405 if ( (ret = set_gdt(current, frames, entries)) == 0 )
406 {
407 local_flush_tlb();
408 __asm__ __volatile__ ("lgdt %0" : "=m" (*current->arch.gdt));
409 }
411 UNLOCK_BIGLOCK(current->domain);
413 return ret;
414 }
417 long do_update_descriptor(
418 unsigned long pa, unsigned long word1, unsigned long word2)
419 {
420 unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
421 struct pfn_info *page;
422 struct exec_domain *ed;
423 long ret = -EINVAL;
425 d[0] = word1;
426 d[1] = word2;
428 LOCK_BIGLOCK(current->domain);
430 if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) ) {
431 UNLOCK_BIGLOCK(current->domain);
432 return -EINVAL;
433 }
435 page = &frame_table[pfn];
436 if ( unlikely(!get_page(page, current->domain)) ) {
437 UNLOCK_BIGLOCK(current->domain);
438 return -EINVAL;
439 }
441 /* Check if the given frame is in use in an unsafe context. */
442 switch ( page->u.inuse.type_info & PGT_type_mask )
443 {
444 case PGT_gdt_page:
445 /* Disallow updates of Xen-reserved descriptors in the current GDT. */
446 for_each_exec_domain(current->domain, ed) {
447 if ( (l1_pgentry_to_pagenr(ed->arch.perdomain_ptes[0]) == pfn) &&
448 (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
449 (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
450 goto out;
451 }
452 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
453 goto out;
454 break;
455 case PGT_ldt_page:
456 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
457 goto out;
458 break;
459 default:
460 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
461 goto out;
462 break;
463 }
465 /* All is good so make the update. */
466 gdt_pent = map_domain_mem(pa);
467 memcpy(gdt_pent, d, 8);
468 unmap_domain_mem(gdt_pent);
470 put_page_type(page);
472 ret = 0; /* success */
474 out:
475 put_page(page);
477 UNLOCK_BIGLOCK(current->domain);
479 return ret;
480 }
482 #ifdef MEMORY_GUARD
484 void *memguard_init(void *heap_start)
485 {
486 l1_pgentry_t *l1;
487 int i, j;
489 /* Round the allocation pointer up to a page boundary. */
490 heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
491 PAGE_MASK);
493 /* Memory guarding is incompatible with super pages. */
494 for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
495 {
496 l1 = (l1_pgentry_t *)heap_start;
497 heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
498 for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
499 l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
500 (j << L1_PAGETABLE_SHIFT) |
501 __PAGE_HYPERVISOR);
502 idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
503 mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
504 }
506 return heap_start;
507 }
509 static void __memguard_change_range(void *p, unsigned long l, int guard)
510 {
511 l1_pgentry_t *l1;
512 l2_pgentry_t *l2;
513 unsigned long _p = (unsigned long)p;
514 unsigned long _l = (unsigned long)l;
516 /* Ensure we are dealing with a page-aligned whole number of pages. */
517 ASSERT((_p&PAGE_MASK) != 0);
518 ASSERT((_l&PAGE_MASK) != 0);
519 ASSERT((_p&~PAGE_MASK) == 0);
520 ASSERT((_l&~PAGE_MASK) == 0);
522 while ( _l != 0 )
523 {
524 l2 = &idle_pg_table[l2_table_offset(_p)];
525 l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
526 if ( guard )
527 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
528 else
529 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
530 _p += PAGE_SIZE;
531 _l -= PAGE_SIZE;
532 }
533 }
535 void memguard_guard_stack(void *p)
536 {
537 memguard_guard_range(p, PAGE_SIZE);
538 }
540 void memguard_guard_range(void *p, unsigned long l)
541 {
542 __memguard_change_range(p, l, 1);
543 local_flush_tlb();
544 }
546 void memguard_unguard_range(void *p, unsigned long l)
547 {
548 __memguard_change_range(p, l, 0);
549 }
551 #endif