debuggers.hg

view xen/arch/x86/x86_32/mm.c @ 3349:c754bd0be650

bitkeeper revision 1.1159.1.496 (41c85faeMBUejFtICiJueb_Xdh8yJA)

Priv-op emulation in Xen, for RDMSR/WRMSR/WBINVD. Cleaned up Linux
a bit as a result.
author kaf24@scramble.cl.cam.ac.uk
date Tue Dec 21 17:38:54 2004 +0000 (2004-12-21)
parents 2bcd45440fd9
children d1e0d9a8fde0
line source
1 /******************************************************************************
2 * arch/x86/x86_32/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <asm/page.h>
26 #include <asm/flushtlb.h>
27 #include <asm/fixmap.h>
28 #include <asm/domain_page.h>
30 static inline void set_pte_phys(unsigned long vaddr,
31 l1_pgentry_t entry)
32 {
33 l2_pgentry_t *l2ent;
34 l1_pgentry_t *l1ent;
36 l2ent = &idle_pg_table[l2_table_offset(vaddr)];
37 l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
38 *l1ent = entry;
40 /* It's enough to flush this one mapping. */
41 __flush_tlb_one(vaddr);
42 }
45 void __set_fixmap(enum fixed_addresses idx,
46 l1_pgentry_t entry)
47 {
48 unsigned long address = fix_to_virt(idx);
50 if ( likely(idx < __end_of_fixed_addresses) )
51 set_pte_phys(address, entry);
52 else
53 printk("Invalid __set_fixmap\n");
54 }
57 void __init paging_init(void)
58 {
59 void *ioremap_pt;
60 int i;
62 /* Xen heap mappings can be GLOBAL. */
63 if ( cpu_has_pge )
64 {
65 for ( i = 0; i < DIRECTMAP_PHYS_END; i += (1 << L2_PAGETABLE_SHIFT) )
66 ((unsigned long *)idle_pg_table)
67 [(i + PAGE_OFFSET) >> L2_PAGETABLE_SHIFT] |= _PAGE_GLOBAL;
68 }
70 /* Create page table for ioremap(). */
71 ioremap_pt = (void *)alloc_xenheap_page();
72 clear_page(ioremap_pt);
73 idle_pg_table[IOREMAP_VIRT_START >> L2_PAGETABLE_SHIFT] =
74 mk_l2_pgentry(__pa(ioremap_pt) | __PAGE_HYPERVISOR);
76 /* Create read-only mapping of MPT for guest-OS use. */
77 idle_pg_table[RO_MPT_VIRT_START >> L2_PAGETABLE_SHIFT] =
78 mk_l2_pgentry(l2_pgentry_val(
79 idle_pg_table[RDWR_MPT_VIRT_START >> L2_PAGETABLE_SHIFT]) &
80 ~_PAGE_RW);
82 /* Set up mapping cache for domain pages. */
83 mapcache = (unsigned long *)alloc_xenheap_page();
84 clear_page(mapcache);
85 idle_pg_table[MAPCACHE_VIRT_START >> L2_PAGETABLE_SHIFT] =
86 mk_l2_pgentry(__pa(mapcache) | __PAGE_HYPERVISOR);
88 /* Set up linear page table mapping. */
89 idle_pg_table[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT] =
90 mk_l2_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
91 }
93 void __init zap_low_mappings(void)
94 {
95 int i;
96 for ( i = 0; i < DOMAIN_ENTRIES_PER_L2_PAGETABLE; i++ )
97 idle_pg_table[i] = mk_l2_pgentry(0);
98 flush_tlb_all_pge();
99 }
102 /*
103 * Allows shooting down of borrowed page-table use on specific CPUs.
104 * Specifically, we borrow page tables when running the idle domain.
105 */
106 static void __synchronise_pagetables(void *mask)
107 {
108 struct exec_domain *ed = current;
109 if ( ((unsigned long)mask & (1 << ed->processor)) &&
110 is_idle_task(ed->domain) )
111 write_ptbase(&ed->mm);
112 }
113 void synchronise_pagetables(unsigned long cpu_mask)
114 {
115 __synchronise_pagetables((void *)cpu_mask);
116 smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
117 }
119 long do_stack_switch(unsigned long ss, unsigned long esp)
120 {
121 int nr = smp_processor_id();
122 struct tss_struct *t = &init_tss[nr];
124 /* We need to do this check as we load and use SS on guest's behalf. */
125 if ( (ss & 3) == 0 )
126 return -EPERM;
128 current->thread.guestos_ss = ss;
129 current->thread.guestos_sp = esp;
130 t->ss1 = ss;
131 t->esp1 = esp;
133 return 0;
134 }
137 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
138 int check_descriptor(unsigned long *d)
139 {
140 unsigned long base, limit, a = d[0], b = d[1];
142 /* A not-present descriptor will always fault, so is safe. */
143 if ( !(b & _SEGMENT_P) )
144 goto good;
146 /*
147 * We don't allow a DPL of zero. There is no legitimate reason for
148 * specifying DPL==0, and it gets rather dangerous if we also accept call
149 * gates (consider a call gate pointing at another guestos descriptor with
150 * DPL 0 -- this would get the OS ring-0 privileges).
151 */
152 if ( (b & _SEGMENT_DPL) == 0 )
153 goto bad;
155 if ( !(b & _SEGMENT_S) )
156 {
157 /*
158 * System segment:
159 * 1. Don't allow interrupt or trap gates as they belong in the IDT.
160 * 2. Don't allow TSS descriptors or task gates as we don't
161 * virtualise x86 tasks.
162 * 3. Don't allow LDT descriptors because they're unnecessary and
163 * I'm uneasy about allowing an LDT page to contain LDT
164 * descriptors. In any case, Xen automatically creates the
165 * required descriptor when reloading the LDT register.
166 * 4. We allow call gates but they must not jump to a private segment.
167 */
169 /* Disallow everything but call gates. */
170 if ( (b & _SEGMENT_TYPE) != 0xc00 )
171 goto bad;
173 /* Can't allow far jump to a Xen-private segment. */
174 if ( !VALID_CODESEL(a>>16) )
175 goto bad;
177 /* Reserved bits must be zero. */
178 if ( (b & 0xe0) != 0 )
179 goto bad;
181 /* No base/limit check is needed for a call gate. */
182 goto good;
183 }
185 /* Check that base is at least a page away from Xen-private area. */
186 base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
187 if ( base >= (PAGE_OFFSET - PAGE_SIZE) )
188 goto bad;
190 /* Check and truncate the limit if necessary. */
191 limit = (b&0xf0000) | (a&0xffff);
192 limit++; /* We add one because limit is inclusive. */
193 if ( (b & _SEGMENT_G) )
194 limit <<= 12;
196 if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
197 {
198 /*
199 * Grows-down limit check.
200 * NB. limit == 0xFFFFF provides no access (if G=1).
201 * limit == 0x00000 provides 4GB-4kB access (if G=1).
202 */
203 if ( (base + limit) > base )
204 {
205 limit = -(base & PAGE_MASK);
206 goto truncate;
207 }
208 }
209 else
210 {
211 /*
212 * Grows-up limit check.
213 * NB. limit == 0xFFFFF provides 4GB access (if G=1).
214 * limit == 0x00000 provides 4kB access (if G=1).
215 */
216 if ( ((base + limit) <= base) ||
217 ((base + limit) > PAGE_OFFSET) )
218 {
219 limit = PAGE_OFFSET - base;
220 truncate:
221 if ( !(b & _SEGMENT_G) )
222 goto bad; /* too dangerous; too hard to work out... */
223 limit = (limit >> 12) - 1;
224 d[0] &= ~0x0ffff; d[0] |= limit & 0x0ffff;
225 d[1] &= ~0xf0000; d[1] |= limit & 0xf0000;
226 }
227 }
229 good:
230 return 1;
231 bad:
232 return 0;
233 }
236 void destroy_gdt(struct exec_domain *ed)
237 {
238 int i;
239 unsigned long pfn;
241 for ( i = 0; i < 16; i++ )
242 {
243 if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[i])) != 0 )
244 put_page_and_type(&frame_table[pfn]);
245 ed->mm.perdomain_ptes[i] = mk_l1_pgentry(0);
246 }
247 }
250 long set_gdt(struct exec_domain *ed,
251 unsigned long *frames,
252 unsigned int entries)
253 {
254 struct domain *d = ed->domain;
255 /* NB. There are 512 8-byte entries per GDT page. */
256 int i = 0, nr_pages = (entries + 511) / 512;
257 struct desc_struct *vgdt;
258 unsigned long pfn;
260 /* Check the first page in the new GDT. */
261 if ( (pfn = frames[0]) >= max_page )
262 goto fail;
264 /* The first page is special because Xen owns a range of entries in it. */
265 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
266 {
267 /* GDT checks failed: try zapping the Xen reserved entries. */
268 if ( !get_page_and_type(&frame_table[pfn], d, PGT_writable_page) )
269 goto fail;
270 vgdt = map_domain_mem(pfn << PAGE_SHIFT);
271 memset(vgdt + FIRST_RESERVED_GDT_ENTRY, 0,
272 NR_RESERVED_GDT_ENTRIES*8);
273 unmap_domain_mem(vgdt);
274 put_page_and_type(&frame_table[pfn]);
276 /* Okay, we zapped the entries. Now try the GDT checks again. */
277 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
278 goto fail;
279 }
281 /* Check the remaining pages in the new GDT. */
282 for ( i = 1; i < nr_pages; i++ )
283 if ( ((pfn = frames[i]) >= max_page) ||
284 !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
285 goto fail;
287 /* Copy reserved GDT entries to the new GDT. */
288 vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
289 memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
290 gdt_table + FIRST_RESERVED_GDT_ENTRY,
291 NR_RESERVED_GDT_ENTRIES*8);
292 unmap_domain_mem(vgdt);
294 /* Tear down the old GDT. */
295 destroy_gdt(ed);
297 /* Install the new GDT. */
298 for ( i = 0; i < nr_pages; i++ )
299 ed->mm.perdomain_ptes[i] =
300 mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
302 SET_GDT_ADDRESS(ed, GDT_VIRT_START(ed));
303 SET_GDT_ENTRIES(ed, entries);
305 return 0;
307 fail:
308 while ( i-- > 0 )
309 put_page_and_type(&frame_table[frames[i]]);
310 return -EINVAL;
311 }
314 long do_set_gdt(unsigned long *frame_list, unsigned int entries)
315 {
316 int nr_pages = (entries + 511) / 512;
317 unsigned long frames[16];
318 long ret;
320 if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
321 return -EINVAL;
323 if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
324 return -EFAULT;
326 LOCK_BIGLOCK(current->domain);
328 if ( (ret = set_gdt(current, frames, entries)) == 0 )
329 {
330 local_flush_tlb();
331 __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
332 }
334 UNLOCK_BIGLOCK(current->domain);
336 return ret;
337 }
340 long do_update_descriptor(
341 unsigned long pa, unsigned long word1, unsigned long word2)
342 {
343 unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
344 struct pfn_info *page;
345 struct exec_domain *ed;
346 long ret = -EINVAL;
348 d[0] = word1;
349 d[1] = word2;
351 LOCK_BIGLOCK(current->domain);
353 if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) ) {
354 UNLOCK_BIGLOCK(current->domain);
355 return -EINVAL;
356 }
358 page = &frame_table[pfn];
359 if ( unlikely(!get_page(page, current->domain)) ) {
360 UNLOCK_BIGLOCK(current->domain);
361 return -EINVAL;
362 }
364 /* Check if the given frame is in use in an unsafe context. */
365 switch ( page->u.inuse.type_info & PGT_type_mask )
366 {
367 case PGT_gdt_page:
368 /* Disallow updates of Xen-reserved descriptors in the current GDT. */
369 for_each_exec_domain(current->domain, ed) {
370 if ( (l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[0]) == pfn) &&
371 (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
372 (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
373 goto out;
374 }
375 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
376 goto out;
377 break;
378 case PGT_ldt_page:
379 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
380 goto out;
381 break;
382 default:
383 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
384 goto out;
385 break;
386 }
388 /* All is good so make the update. */
389 gdt_pent = map_domain_mem(pa);
390 memcpy(gdt_pent, d, 8);
391 unmap_domain_mem(gdt_pent);
393 put_page_type(page);
395 ret = 0; /* success */
397 out:
398 put_page(page);
400 UNLOCK_BIGLOCK(current->domain);
402 return ret;
403 }
405 #ifdef MEMORY_GUARD
407 void *memguard_init(void *heap_start)
408 {
409 l1_pgentry_t *l1;
410 int i, j;
412 /* Round the allocation pointer up to a page boundary. */
413 heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
414 PAGE_MASK);
416 /* Memory guarding is incompatible with super pages. */
417 for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
418 {
419 l1 = (l1_pgentry_t *)heap_start;
420 heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
421 for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
422 l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
423 (j << L1_PAGETABLE_SHIFT) |
424 __PAGE_HYPERVISOR);
425 idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
426 mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
427 }
429 return heap_start;
430 }
432 static void __memguard_change_range(void *p, unsigned long l, int guard)
433 {
434 l1_pgentry_t *l1;
435 l2_pgentry_t *l2;
436 unsigned long _p = (unsigned long)p;
437 unsigned long _l = (unsigned long)l;
439 /* Ensure we are dealing with a page-aligned whole number of pages. */
440 ASSERT((_p&PAGE_MASK) != 0);
441 ASSERT((_l&PAGE_MASK) != 0);
442 ASSERT((_p&~PAGE_MASK) == 0);
443 ASSERT((_l&~PAGE_MASK) == 0);
445 while ( _l != 0 )
446 {
447 l2 = &idle_pg_table[l2_table_offset(_p)];
448 l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
449 if ( guard )
450 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
451 else
452 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
453 _p += PAGE_SIZE;
454 _l -= PAGE_SIZE;
455 }
456 }
458 void memguard_guard_range(void *p, unsigned long l)
459 {
460 __memguard_change_range(p, l, 1);
461 local_flush_tlb();
462 }
464 void memguard_unguard_range(void *p, unsigned long l)
465 {
466 __memguard_change_range(p, l, 0);
467 }
469 #endif