debuggers.hg

view xen/arch/x86/x86_64/mm.c @ 4629:6375127fdf23

bitkeeper revision 1.1311.1.1 (426641eeBv97w6sl983zxeR4Dc3Utg)

Cleanup page table handling. Add macros to access page table
entries, fixup plenty of places in the code to use the page
table types instead of "unsigned long".

Signed-off-by: Gerd Knorr <kraxel@bytesex.org>
Signed-off-by: michael.fetterman@cl.cam.ac.uk
author mafetter@fleming.research
date Wed Apr 20 11:50:06 2005 +0000 (2005-04-20)
parents 3f2415a328b7
children 1803018b3b05
line source
1 /******************************************************************************
2 * arch/x86/x86_64/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <xen/sched.h>
26 #include <asm/asm_defns.h>
27 #include <asm/page.h>
28 #include <asm/flushtlb.h>
29 #include <asm/fixmap.h>
30 #include <asm/msr.h>
32 static void *safe_page_alloc(void)
33 {
34 extern int early_boot;
35 if ( early_boot )
36 {
37 unsigned long p = alloc_boot_pages(PAGE_SIZE, PAGE_SIZE);
38 if ( p == 0 )
39 goto oom;
40 return phys_to_virt(p);
41 }
42 else
43 {
44 struct pfn_info *pg = alloc_domheap_page(NULL);
45 if ( pg == NULL )
46 goto oom;
47 return page_to_virt(pg);
48 }
49 oom:
50 panic("Out of memory");
51 return NULL;
52 }
54 /* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
55 #define __PTE_MASK (~(_PAGE_GLOBAL|_PAGE_DIRTY|_PAGE_PCD|_PAGE_PWT))
56 int map_pages(
57 root_pgentry_t *pt,
58 unsigned long v,
59 unsigned long p,
60 unsigned long s,
61 unsigned long flags)
62 {
63 l4_pgentry_t *pl4e;
64 l3_pgentry_t *pl3e;
65 l2_pgentry_t *pl2e;
66 l1_pgentry_t *pl1e;
67 void *newpg;
69 while ( s != 0 )
70 {
71 pl4e = &pt[l4_table_offset(v)];
72 if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) )
73 {
74 newpg = safe_page_alloc();
75 clear_page(newpg);
76 *pl4e = l4e_create_phys(__pa(newpg), flags & __PTE_MASK);
77 }
79 pl3e = l4e_to_l3e(*pl4e) + l3_table_offset(v);
80 if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) )
81 {
82 newpg = safe_page_alloc();
83 clear_page(newpg);
84 *pl3e = l3e_create_phys(__pa(newpg), flags & __PTE_MASK);
85 }
87 pl2e = l3e_to_l2e(*pl3e) + l2_table_offset(v);
89 if ( ((s|v|p) & ((1<<L2_PAGETABLE_SHIFT)-1)) == 0 )
90 {
91 /* Super-page mapping. */
92 if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
93 local_flush_tlb_pge();
94 *pl2e = l2e_create_phys(p, flags|_PAGE_PSE);
96 v += 1 << L2_PAGETABLE_SHIFT;
97 p += 1 << L2_PAGETABLE_SHIFT;
98 s -= 1 << L2_PAGETABLE_SHIFT;
99 }
100 else
101 {
102 /* Normal page mapping. */
103 if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) )
104 {
105 newpg = safe_page_alloc();
106 clear_page(newpg);
107 *pl2e = l2e_create_phys(__pa(newpg), flags & __PTE_MASK);
108 }
109 pl1e = l2e_to_l1e(*pl2e) + l1_table_offset(v);
110 if ( (l1e_get_flags(*pl1e) & _PAGE_PRESENT) )
111 local_flush_tlb_one(v);
112 *pl1e = l1e_create_phys(p, flags);
114 v += 1 << L1_PAGETABLE_SHIFT;
115 p += 1 << L1_PAGETABLE_SHIFT;
116 s -= 1 << L1_PAGETABLE_SHIFT;
117 }
118 }
120 return 0;
121 }
123 void __set_fixmap(
124 enum fixed_addresses idx, unsigned long p, unsigned long flags)
125 {
126 if ( unlikely(idx >= __end_of_fixed_addresses) )
127 BUG();
128 map_pages(idle_pg_table, fix_to_virt(idx), p, PAGE_SIZE, flags);
129 }
132 void __init paging_init(void)
133 {
134 unsigned long i, p, max;
135 l3_pgentry_t *l3rw, *l3ro;
136 struct pfn_info *pg;
138 /* Map all of physical memory. */
139 max = ((max_page + L1_PAGETABLE_ENTRIES - 1) &
140 ~(L1_PAGETABLE_ENTRIES - 1)) << PAGE_SHIFT;
141 map_pages(idle_pg_table, PAGE_OFFSET, 0, max, PAGE_HYPERVISOR);
143 /*
144 * Allocate and map the machine-to-phys table.
145 * This also ensures L3 is present for ioremap().
146 */
147 for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
148 {
149 pg = alloc_domheap_pages(
150 NULL, L2_PAGETABLE_SHIFT - L1_PAGETABLE_SHIFT);
151 if ( pg == NULL )
152 panic("Not enough memory for m2p table\n");
153 p = page_to_phys(pg);
154 map_pages(idle_pg_table, RDWR_MPT_VIRT_START + i*8, p,
155 1UL << L2_PAGETABLE_SHIFT, PAGE_HYPERVISOR | _PAGE_USER);
156 memset((void *)(RDWR_MPT_VIRT_START + i*8), 0x55,
157 1UL << L2_PAGETABLE_SHIFT);
158 }
160 /*
161 * Above we mapped the M2P table as user-accessible and read-writable.
162 * Fix security by denying user access at the top level of the page table.
163 */
164 l4e_remove_flags(&idle_pg_table[l4_table_offset(RDWR_MPT_VIRT_START)],
165 _PAGE_USER);
167 /* Create read-only mapping of MPT for guest-OS use. */
168 l3ro = (l3_pgentry_t *)alloc_xenheap_page();
169 clear_page(l3ro);
170 idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] =
171 l4e_create_phys(__pa(l3ro),
172 (__PAGE_HYPERVISOR | _PAGE_USER) & ~_PAGE_RW);
174 /* Copy the L3 mappings from the RDWR_MPT area. */
175 l3rw = l4e_to_l3e(
176 idle_pg_table[l4_table_offset(RDWR_MPT_VIRT_START)]);
177 l3rw += l3_table_offset(RDWR_MPT_VIRT_START);
178 l3ro += l3_table_offset(RO_MPT_VIRT_START);
179 memcpy(l3ro, l3rw,
180 (RDWR_MPT_VIRT_END - RDWR_MPT_VIRT_START) >> L3_PAGETABLE_SHIFT);
182 /* Set up linear page table mapping. */
183 idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
184 l4e_create_phys(__pa(idle_pg_table), __PAGE_HYPERVISOR);
185 }
187 void __init zap_low_mappings(void)
188 {
189 idle_pg_table[0] = l4e_empty();
190 flush_tlb_all_pge();
191 }
193 void subarch_init_memory(struct domain *dom_xen)
194 {
195 unsigned long i, v, m2p_start_mfn;
196 l3_pgentry_t l3e;
197 l2_pgentry_t l2e;
199 /*
200 * We are rather picky about the layout of 'struct pfn_info'. The
201 * count_info and domain fields must be adjacent, as we perform atomic
202 * 64-bit operations on them.
203 */
204 if ( (offsetof(struct pfn_info, u.inuse._domain) !=
205 (offsetof(struct pfn_info, count_info) + sizeof(u32))) )
206 {
207 printk("Weird pfn_info layout (%ld,%ld,%d)\n",
208 offsetof(struct pfn_info, count_info),
209 offsetof(struct pfn_info, u.inuse._domain),
210 sizeof(struct pfn_info));
211 for ( ; ; ) ;
212 }
214 /* M2P table is mappable read-only by privileged domains. */
215 for ( v = RDWR_MPT_VIRT_START;
216 v != RDWR_MPT_VIRT_END;
217 v += 1 << L2_PAGETABLE_SHIFT )
218 {
219 l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
220 l3_table_offset(v)];
221 if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
222 continue;
223 l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
224 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
225 continue;
226 m2p_start_mfn = l2e_get_pfn(l2e);
228 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
229 {
230 frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
231 /* gdt to make sure it's only mapped read-only by non-privileged
232 domains. */
233 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
234 page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
235 }
236 }
237 }
239 long do_stack_switch(unsigned long ss, unsigned long esp)
240 {
241 if ( (ss & 3) != 3 )
242 return -EPERM;
243 current->arch.kernel_ss = ss;
244 current->arch.kernel_sp = esp;
245 return 0;
246 }
248 long do_set_segment_base(unsigned int which, unsigned long base)
249 {
250 struct exec_domain *ed = current;
251 long ret = 0;
253 switch ( which )
254 {
255 case SEGBASE_FS:
256 ed->arch.user_ctxt.fs_base = base;
257 if ( wrmsr_user(MSR_FS_BASE, base, base>>32) )
258 ret = -EFAULT;
259 break;
261 case SEGBASE_GS_USER:
262 ed->arch.user_ctxt.gs_base_user = base;
263 if ( wrmsr_user(MSR_SHADOW_GS_BASE, base, base>>32) )
264 ret = -EFAULT;
265 break;
267 case SEGBASE_GS_KERNEL:
268 ed->arch.user_ctxt.gs_base_kernel = base;
269 if ( wrmsr_user(MSR_GS_BASE, base, base>>32) )
270 ret = -EFAULT;
271 break;
273 case SEGBASE_GS_USER_SEL:
274 __asm__ __volatile__ (
275 " swapgs \n"
276 "1: movl %k0,%%gs \n"
277 " "safe_swapgs" \n"
278 ".section .fixup,\"ax\" \n"
279 "2: xorl %k0,%k0 \n"
280 " jmp 1b \n"
281 ".previous \n"
282 ".section __ex_table,\"a\"\n"
283 " .align 8 \n"
284 " .quad 1b,2b \n"
285 ".previous "
286 : : "r" (base&0xffff) );
287 break;
289 default:
290 ret = -EINVAL;
291 break;
292 }
294 return ret;
295 }
298 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
299 int check_descriptor(struct desc_struct *d)
300 {
301 u32 a = d->a, b = d->b;
303 /* A not-present descriptor will always fault, so is safe. */
304 if ( !(b & _SEGMENT_P) )
305 goto good;
307 /* The guest can only safely be executed in ring 3. */
308 if ( (b & _SEGMENT_DPL) != _SEGMENT_DPL )
309 goto bad;
311 /* All code and data segments are okay. No base/limit checking. */
312 if ( (b & _SEGMENT_S) )
313 goto good;
315 /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
316 if ( (b & _SEGMENT_TYPE) == 0x000 )
317 goto good;
319 /* Everything but a call gate is discarded here. */
320 if ( (b & _SEGMENT_TYPE) != 0xc00 )
321 goto bad;
323 /* Can't allow far jump to a Xen-private segment. */
324 if ( !VALID_CODESEL(a>>16) )
325 goto bad;
327 /* Reserved bits must be zero. */
328 if ( (b & 0xe0) != 0 )
329 goto bad;
331 good:
332 return 1;
333 bad:
334 return 0;
335 }
338 #ifdef MEMORY_GUARD
340 #define ALLOC_PT(_level) \
341 do { \
342 (_level) = (_level ## _pgentry_t *)heap_start; \
343 heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE); \
344 clear_page(_level); \
345 } while ( 0 )
346 void *memguard_init(void *heap_start)
347 {
348 l1_pgentry_t *l1 = NULL;
349 l2_pgentry_t *l2 = NULL;
350 l3_pgentry_t *l3 = NULL;
351 l4_pgentry_t *l4 = &idle_pg_table[l4_table_offset(PAGE_OFFSET)];
352 unsigned long i, j;
354 /* Round the allocation pointer up to a page boundary. */
355 heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
356 PAGE_MASK);
358 /* Memory guarding is incompatible with super pages. */
359 for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
360 {
361 ALLOC_PT(l1);
362 for ( j = 0; j < L1_PAGETABLE_ENTRIES; j++ )
363 l1[j] = l1e_create_phys((i << L2_PAGETABLE_SHIFT) |
364 (j << L1_PAGETABLE_SHIFT),
365 __PAGE_HYPERVISOR);
366 if ( !((unsigned long)l2 & (PAGE_SIZE-1)) )
367 {
368 ALLOC_PT(l2);
369 if ( !((unsigned long)l3 & (PAGE_SIZE-1)) )
370 {
371 ALLOC_PT(l3);
372 *l4++ = l4e_create_phys(virt_to_phys(l3), __PAGE_HYPERVISOR);
373 }
374 *l3++ = l3e_create_phys(virt_to_phys(l2), __PAGE_HYPERVISOR);
375 }
376 *l2++ = l2e_create_phys(virt_to_phys(l1), __PAGE_HYPERVISOR);
377 }
379 return heap_start;
380 }
382 static void __memguard_change_range(void *p, unsigned long l, int guard)
383 {
384 l1_pgentry_t *l1;
385 l2_pgentry_t *l2;
386 l3_pgentry_t *l3;
387 l4_pgentry_t *l4;
388 unsigned long _p = (unsigned long)p;
389 unsigned long _l = (unsigned long)l;
391 /* Ensure we are dealing with a page-aligned whole number of pages. */
392 ASSERT((_p&PAGE_MASK) != 0);
393 ASSERT((_l&PAGE_MASK) != 0);
394 ASSERT((_p&~PAGE_MASK) == 0);
395 ASSERT((_l&~PAGE_MASK) == 0);
397 while ( _l != 0 )
398 {
399 l4 = &idle_pg_table[l4_table_offset(_p)];
400 l3 = l4e_to_l3e(*l4) + l3_table_offset(_p);
401 l2 = l3e_to_l2e(*l3) + l2_table_offset(_p);
402 l1 = l2e_to_l1e(*l2) + l1_table_offset(_p);
403 if ( guard )
404 l1e_remove_flags(l1, _PAGE_PRESENT);
405 else
406 l1e_add_flags(l1, _PAGE_PRESENT);
407 _p += PAGE_SIZE;
408 _l -= PAGE_SIZE;
409 }
410 }
412 void memguard_guard_stack(void *p)
413 {
414 p = (void *)((unsigned long)p + PAGE_SIZE);
415 memguard_guard_range(p, 2 * PAGE_SIZE);
416 }
418 void memguard_guard_range(void *p, unsigned long l)
419 {
420 __memguard_change_range(p, l, 1);
421 local_flush_tlb();
422 }
424 void memguard_unguard_range(void *p, unsigned long l)
425 {
426 __memguard_change_range(p, l, 0);
427 }
429 #endif
431 /*
432 * Local variables:
433 * mode: C
434 * c-set-style: "BSD"
435 * c-basic-offset: 4
436 * tab-width: 4
437 * indent-tabs-mode: nil
438 * End:
439 */