debuggers.hg

view xen/arch/x86/x86_64/mm.c @ 3336:2711f7eb364c

bitkeeper revision 1.1159.1.490 (41c1bb05aOZv3pnPk-NIbxvGZzv5BQ)

page.h, mm.c:
More cleaning.
author kaf24@pb001.cl.cam.ac.uk
date Thu Dec 16 16:42:45 2004 +0000 (2004-12-16)
parents dda5ab69e74a
children c754bd0be650
line source
1 /******************************************************************************
2 * arch/x86/x86_64/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <asm/page.h>
26 #include <asm/flushtlb.h>
27 #include <asm/fixmap.h>
28 #include <asm/domain_page.h>
30 static inline void set_pte_phys(unsigned long vaddr,
31 l1_pgentry_t entry)
32 {
33 l4_pgentry_t *l4ent;
34 l3_pgentry_t *l3ent;
35 l2_pgentry_t *l2ent;
36 l1_pgentry_t *l1ent;
38 l4ent = &idle_pg_table[l4_table_offset(vaddr)];
39 l3ent = l4_pgentry_to_l3(*l4ent) + l3_table_offset(vaddr);
40 l2ent = l3_pgentry_to_l2(*l3ent) + l2_table_offset(vaddr);
41 l1ent = l2_pgentry_to_l1(*l2ent) + l1_table_offset(vaddr);
42 *l1ent = entry;
44 /* It's enough to flush this one mapping. */
45 __flush_tlb_one(vaddr);
46 }
49 void __set_fixmap(enum fixed_addresses idx,
50 l1_pgentry_t entry)
51 {
52 unsigned long address = fix_to_virt(idx);
54 if ( likely(idx < __end_of_fixed_addresses) )
55 set_pte_phys(address, entry);
56 else
57 printk("Invalid __set_fixmap\n");
58 }
61 void __init paging_init(void)
62 {
63 /* Set up linear page table mapping. */
64 idle_pg_table[LINEAR_PT_VIRT_START >> L4_PAGETABLE_SHIFT] =
65 mk_l4_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
66 }
68 void __init zap_low_mappings(void)
69 {
70 idle_pg_table[0] = mk_l4_pgentry(0);
71 }
74 /*
75 * Allows shooting down of borrowed page-table use on specific CPUs.
76 * Specifically, we borrow page tables when running the idle domain.
77 */
78 static void __synchronise_pagetables(void *mask)
79 {
80 struct exec_domain *ed = current;
81 if ( ((unsigned long)mask & (1 << ed->processor)) &&
82 is_idle_task(ed->domain) )
83 write_ptbase(&ed->mm);
84 }
85 void synchronise_pagetables(unsigned long cpu_mask)
86 {
87 __synchronise_pagetables((void *)cpu_mask);
88 smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
89 }
91 long do_stack_switch(unsigned long ss, unsigned long esp)
92 {
93 #if 0
94 int nr = smp_processor_id();
95 struct tss_struct *t = &init_tss[nr];
97 /* We need to do this check as we load and use SS on guest's behalf. */
98 if ( (ss & 3) == 0 )
99 return -EPERM;
101 current->thread.guestos_ss = ss;
102 current->thread.guestos_sp = esp;
103 t->ss1 = ss;
104 t->esp1 = esp;
105 #endif
106 return 0;
107 }
110 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
111 int check_descriptor(unsigned long *d)
112 {
113 unsigned long base, limit, a = d[0], b = d[1];
115 /* A not-present descriptor will always fault, so is safe. */
116 if ( !(b & _SEGMENT_P) )
117 goto good;
119 /*
120 * We don't allow a DPL of zero. There is no legitimate reason for
121 * specifying DPL==0, and it gets rather dangerous if we also accept call
122 * gates (consider a call gate pointing at another guestos descriptor with
123 * DPL 0 -- this would get the OS ring-0 privileges).
124 */
125 if ( (b & _SEGMENT_DPL) == 0 )
126 goto bad;
128 if ( !(b & _SEGMENT_S) )
129 {
130 /*
131 * System segment:
132 * 1. Don't allow interrupt or trap gates as they belong in the IDT.
133 * 2. Don't allow TSS descriptors or task gates as we don't
134 * virtualise x86 tasks.
135 * 3. Don't allow LDT descriptors because they're unnecessary and
136 * I'm uneasy about allowing an LDT page to contain LDT
137 * descriptors. In any case, Xen automatically creates the
138 * required descriptor when reloading the LDT register.
139 * 4. We allow call gates but they must not jump to a private segment.
140 */
142 /* Disallow everything but call gates. */
143 if ( (b & _SEGMENT_TYPE) != 0xc00 )
144 goto bad;
146 #if 0
147 /* Can't allow far jump to a Xen-private segment. */
148 if ( !VALID_CODESEL(a>>16) )
149 goto bad;
150 #endif
152 /* Reserved bits must be zero. */
153 if ( (b & 0xe0) != 0 )
154 goto bad;
156 /* No base/limit check is needed for a call gate. */
157 goto good;
158 }
160 /* Check that base is at least a page away from Xen-private area. */
161 base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
162 if ( base >= (PAGE_OFFSET - PAGE_SIZE) )
163 goto bad;
165 /* Check and truncate the limit if necessary. */
166 limit = (b&0xf0000) | (a&0xffff);
167 limit++; /* We add one because limit is inclusive. */
168 if ( (b & _SEGMENT_G) )
169 limit <<= 12;
171 if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
172 {
173 /*
174 * Grows-down limit check.
175 * NB. limit == 0xFFFFF provides no access (if G=1).
176 * limit == 0x00000 provides 4GB-4kB access (if G=1).
177 */
178 if ( (base + limit) > base )
179 {
180 limit = -(base & PAGE_MASK);
181 goto truncate;
182 }
183 }
184 else
185 {
186 /*
187 * Grows-up limit check.
188 * NB. limit == 0xFFFFF provides 4GB access (if G=1).
189 * limit == 0x00000 provides 4kB access (if G=1).
190 */
191 if ( ((base + limit) <= base) ||
192 ((base + limit) > PAGE_OFFSET) )
193 {
194 limit = PAGE_OFFSET - base;
195 truncate:
196 if ( !(b & _SEGMENT_G) )
197 goto bad; /* too dangerous; too hard to work out... */
198 limit = (limit >> 12) - 1;
199 d[0] &= ~0x0ffff; d[0] |= limit & 0x0ffff;
200 d[1] &= ~0xf0000; d[1] |= limit & 0xf0000;
201 }
202 }
204 good:
205 return 1;
206 bad:
207 return 0;
208 }
211 void destroy_gdt(struct exec_domain *ed)
212 {
213 int i;
214 unsigned long pfn;
216 for ( i = 0; i < 16; i++ )
217 {
218 if ( (pfn = l1_pgentry_to_pagenr(ed->mm.perdomain_ptes[i])) != 0 )
219 put_page_and_type(&frame_table[pfn]);
220 ed->mm.perdomain_ptes[i] = mk_l1_pgentry(0);
221 }
222 }
225 long set_gdt(struct exec_domain *ed,
226 unsigned long *frames,
227 unsigned int entries)
228 {
229 struct domain *d = ed->domain;
230 /* NB. There are 512 8-byte entries per GDT page. */
231 int i = 0, nr_pages = (entries + 511) / 512;
232 struct desc_struct *vgdt;
233 unsigned long pfn;
235 /* Check the first page in the new GDT. */
236 if ( (pfn = frames[0]) >= max_page )
237 goto fail;
239 /* The first page is special because Xen owns a range of entries in it. */
240 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
241 {
242 /* GDT checks failed: try zapping the Xen reserved entries. */
243 if ( !get_page_and_type(&frame_table[pfn], d, PGT_writable_page) )
244 goto fail;
245 vgdt = map_domain_mem(pfn << PAGE_SHIFT);
246 memset(vgdt + FIRST_RESERVED_GDT_ENTRY, 0,
247 NR_RESERVED_GDT_ENTRIES*8);
248 unmap_domain_mem(vgdt);
249 put_page_and_type(&frame_table[pfn]);
251 /* Okay, we zapped the entries. Now try the GDT checks again. */
252 if ( !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
253 goto fail;
254 }
256 /* Check the remaining pages in the new GDT. */
257 for ( i = 1; i < nr_pages; i++ )
258 if ( ((pfn = frames[i]) >= max_page) ||
259 !get_page_and_type(&frame_table[pfn], d, PGT_gdt_page) )
260 goto fail;
262 /* Copy reserved GDT entries to the new GDT. */
263 vgdt = map_domain_mem(frames[0] << PAGE_SHIFT);
264 memcpy(vgdt + FIRST_RESERVED_GDT_ENTRY,
265 gdt_table + FIRST_RESERVED_GDT_ENTRY,
266 NR_RESERVED_GDT_ENTRIES*8);
267 unmap_domain_mem(vgdt);
269 /* Tear down the old GDT. */
270 destroy_gdt(ed);
272 /* Install the new GDT. */
273 for ( i = 0; i < nr_pages; i++ )
274 ed->mm.perdomain_ptes[i] =
275 mk_l1_pgentry((frames[i] << PAGE_SHIFT) | __PAGE_HYPERVISOR);
277 SET_GDT_ADDRESS(ed, GDT_VIRT_START(ed));
278 SET_GDT_ENTRIES(ed, entries);
280 return 0;
282 fail:
283 while ( i-- > 0 )
284 put_page_and_type(&frame_table[frames[i]]);
285 return -EINVAL;
286 }
289 long do_set_gdt(unsigned long *frame_list, unsigned int entries)
290 {
291 int nr_pages = (entries + 511) / 512;
292 unsigned long frames[16];
293 long ret;
295 if ( (entries <= LAST_RESERVED_GDT_ENTRY) || (entries > 8192) )
296 return -EINVAL;
298 if ( copy_from_user(frames, frame_list, nr_pages * sizeof(unsigned long)) )
299 return -EFAULT;
301 if ( (ret = set_gdt(current, frames, entries)) == 0 )
302 {
303 local_flush_tlb();
304 __asm__ __volatile__ ("lgdt %0" : "=m" (*current->mm.gdt));
305 }
307 return ret;
308 }
311 long do_update_descriptor(
312 unsigned long pa, unsigned long word1, unsigned long word2)
313 {
314 unsigned long *gdt_pent, pfn = pa >> PAGE_SHIFT, d[2];
315 struct pfn_info *page;
316 long ret = -EINVAL;
318 d[0] = word1;
319 d[1] = word2;
321 if ( (pa & 7) || (pfn >= max_page) || !check_descriptor(d) )
322 return -EINVAL;
324 page = &frame_table[pfn];
325 if ( unlikely(!get_page(page, current->domain)) )
326 return -EINVAL;
328 /* Check if the given frame is in use in an unsafe context. */
329 switch ( page->u.inuse.type_info & PGT_type_mask )
330 {
331 case PGT_gdt_page:
332 /* Disallow updates of Xen-reserved descriptors in the current GDT. */
333 if ( (l1_pgentry_to_pagenr(current->mm.perdomain_ptes[0]) == pfn) &&
334 (((pa&(PAGE_SIZE-1))>>3) >= FIRST_RESERVED_GDT_ENTRY) &&
335 (((pa&(PAGE_SIZE-1))>>3) <= LAST_RESERVED_GDT_ENTRY) )
336 goto out;
337 if ( unlikely(!get_page_type(page, PGT_gdt_page)) )
338 goto out;
339 break;
340 case PGT_ldt_page:
341 if ( unlikely(!get_page_type(page, PGT_ldt_page)) )
342 goto out;
343 break;
344 default:
345 if ( unlikely(!get_page_type(page, PGT_writable_page)) )
346 goto out;
347 break;
348 }
350 /* All is good so make the update. */
351 gdt_pent = map_domain_mem(pa);
352 memcpy(gdt_pent, d, 8);
353 unmap_domain_mem(gdt_pent);
355 put_page_type(page);
357 ret = 0; /* success */
359 out:
360 put_page(page);
361 return ret;
362 }
364 #ifdef MEMORY_GUARD
366 void *memguard_init(void *heap_start)
367 {
368 l1_pgentry_t *l1;
369 int i, j;
371 /* Round the allocation pointer up to a page boundary. */
372 heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
373 PAGE_MASK);
375 /* Memory guarding is incompatible with super pages. */
376 for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
377 {
378 l1 = (l1_pgentry_t *)heap_start;
379 heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE);
380 for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
381 l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
382 (j << L1_PAGETABLE_SHIFT) |
383 __PAGE_HYPERVISOR);
384 idle_pg_table[i] = idle_pg_table[i + l2_table_offset(PAGE_OFFSET)] =
385 mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
386 }
388 return heap_start;
389 }
391 static void __memguard_change_range(void *p, unsigned long l, int guard)
392 {
393 l1_pgentry_t *l1;
394 l2_pgentry_t *l2;
395 unsigned long _p = (unsigned long)p;
396 unsigned long _l = (unsigned long)l;
398 /* Ensure we are dealing with a page-aligned whole number of pages. */
399 ASSERT((_p&PAGE_MASK) != 0);
400 ASSERT((_l&PAGE_MASK) != 0);
401 ASSERT((_p&~PAGE_MASK) == 0);
402 ASSERT((_l&~PAGE_MASK) == 0);
404 while ( _l != 0 )
405 {
406 l2 = &idle_pg_table[l2_table_offset(_p)];
407 l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
408 if ( guard )
409 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
410 else
411 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
412 _p += PAGE_SIZE;
413 _l -= PAGE_SIZE;
414 }
415 }
417 void memguard_guard_range(void *p, unsigned long l)
418 {
419 __memguard_change_range(p, l, 1);
420 local_flush_tlb();
421 }
423 void memguard_unguard_range(void *p, unsigned long l)
424 {
425 __memguard_change_range(p, l, 0);
426 }
428 int memguard_is_guarded(void *p)
429 {
430 l1_pgentry_t *l1;
431 l2_pgentry_t *l2;
432 unsigned long _p = (unsigned long)p;
433 l2 = &idle_pg_table[l2_table_offset(_p)];
434 l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
435 return !(l1_pgentry_val(*l1) & _PAGE_PRESENT);
436 }
438 #endif