debuggers.hg

view xen/arch/x86/x86_64/mm.c @ 3766:89e86842952a

bitkeeper revision 1.1159.212.132 (4208e2acn2x2RLZsxZIR12IGEO1b3A)

Merge scramble.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into scramble.cl.cam.ac.uk:/local/scratch/kaf24/xen-unstable.bk
author kaf24@scramble.cl.cam.ac.uk
date Tue Feb 08 16:02:52 2005 +0000 (2005-02-08)
parents 23e7cf28ddb3 4dfebfdc7933
children f5f2757b3aa2
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /******************************************************************************
3 * arch/x86/x86_64/mm.c
4 *
5 * Modifications to Linux original are copyright (c) 2004, K A Fraser
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 * GNU General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 */
22 #include <xen/config.h>
23 #include <xen/lib.h>
24 #include <xen/init.h>
25 #include <xen/mm.h>
26 #include <asm/page.h>
27 #include <asm/flushtlb.h>
28 #include <asm/fixmap.h>
29 #include <asm/domain_page.h>
31 void *safe_page_alloc(void)
32 {
33 extern int early_boot;
34 if ( early_boot )
35 return __va(alloc_boot_pages(PAGE_SIZE, PAGE_SIZE));
36 return (void *)alloc_xenheap_page();
37 }
39 /* Map physical byte range (@p, @p+@s) at virt address @v in pagetable @pt. */
40 int map_pages(
41 pagetable_t *pt,
42 unsigned long v,
43 unsigned long p,
44 unsigned long s,
45 unsigned long flags)
46 {
47 l4_pgentry_t *pl4e;
48 l3_pgentry_t *pl3e;
49 l2_pgentry_t *pl2e;
50 l1_pgentry_t *pl1e;
51 void *newpg;
53 while ( s != 0 )
54 {
55 pl4e = &pt[l4_table_offset(v)];
56 if ( !(l4_pgentry_val(*pl4e) & _PAGE_PRESENT) )
57 {
58 newpg = safe_page_alloc();
59 clear_page(newpg);
60 *pl4e = mk_l4_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
61 }
63 pl3e = l4_pgentry_to_l3(*pl4e) + l3_table_offset(v);
64 if ( !(l3_pgentry_val(*pl3e) & _PAGE_PRESENT) )
65 {
66 newpg = safe_page_alloc();
67 clear_page(newpg);
68 *pl3e = mk_l3_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
69 }
71 pl2e = l3_pgentry_to_l2(*pl3e) + l2_table_offset(v);
73 if ( ((s|v|p) & ((1<<L2_PAGETABLE_SHIFT)-1)) == 0 )
74 {
75 /* Super-page mapping. */
76 if ( (l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
77 __flush_tlb_pge();
78 *pl2e = mk_l2_pgentry(p|flags|_PAGE_PSE);
80 v += 1 << L2_PAGETABLE_SHIFT;
81 p += 1 << L2_PAGETABLE_SHIFT;
82 s -= 1 << L2_PAGETABLE_SHIFT;
83 }
84 else
85 {
86 /* Normal page mapping. */
87 if ( !(l2_pgentry_val(*pl2e) & _PAGE_PRESENT) )
88 {
89 newpg = safe_page_alloc();
90 clear_page(newpg);
91 *pl2e = mk_l2_pgentry(__pa(newpg) | __PAGE_HYPERVISOR);
92 }
93 pl1e = l2_pgentry_to_l1(*pl2e) + l1_table_offset(v);
94 if ( (l1_pgentry_val(*pl1e) & _PAGE_PRESENT) )
95 __flush_tlb_one(v);
96 *pl1e = mk_l1_pgentry(p|flags);
98 v += 1 << L1_PAGETABLE_SHIFT;
99 p += 1 << L1_PAGETABLE_SHIFT;
100 s -= 1 << L1_PAGETABLE_SHIFT;
101 }
102 }
104 return 0;
105 }
107 void __set_fixmap(
108 enum fixed_addresses idx, unsigned long p, unsigned long flags)
109 {
110 if ( unlikely(idx >= __end_of_fixed_addresses) )
111 BUG();
112 map_pages(idle_pg_table, fix_to_virt(idx), p, PAGE_SIZE, flags);
113 }
116 void __init paging_init(void)
117 {
118 void *newpt;
119 unsigned long i, p, max;
121 /* Map all of physical memory. */
122 max = ((max_page + ENTRIES_PER_L1_PAGETABLE - 1) &
123 ~(ENTRIES_PER_L1_PAGETABLE - 1)) << PAGE_SHIFT;
124 map_pages(idle_pg_table, PAGE_OFFSET, 0, max, PAGE_HYPERVISOR);
126 /*
127 * Allocate and map the machine-to-phys table.
128 * This also ensures L3 is present for ioremap().
129 */
130 for ( i = 0; i < max_page; i += ((1UL << L2_PAGETABLE_SHIFT) / 8) )
131 {
132 p = alloc_boot_pages(1UL << L2_PAGETABLE_SHIFT,
133 1UL << L2_PAGETABLE_SHIFT);
134 if ( p == 0 )
135 panic("Not enough memory for m2p table\n");
136 map_pages(idle_pg_table, RDWR_MPT_VIRT_START + i*8, p,
137 1UL << L2_PAGETABLE_SHIFT, PAGE_HYPERVISOR);
138 memset((void *)(RDWR_MPT_VIRT_START + i*8), 0x55,
139 1UL << L2_PAGETABLE_SHIFT);
140 }
142 /* Create read-only mapping of MPT for guest-OS use. */
143 newpt = (void *)alloc_xenheap_page();
144 clear_page(newpt);
145 idle_pg_table[l4_table_offset(RO_MPT_VIRT_START)] =
146 mk_l4_pgentry((__pa(newpt) | __PAGE_HYPERVISOR | _PAGE_USER) &
147 ~_PAGE_RW);
148 /* Copy the L3 mappings from the RDWR_MPT area. */
149 p = l4_pgentry_val(idle_pg_table[l4_table_offset(RDWR_MPT_VIRT_START)]);
150 p &= PAGE_MASK;
151 p += l3_table_offset(RDWR_MPT_VIRT_START) * sizeof(l3_pgentry_t);
152 newpt = (void *)((unsigned long)newpt +
153 (l3_table_offset(RO_MPT_VIRT_START) *
154 sizeof(l3_pgentry_t)));
155 memcpy(newpt, __va(p),
156 (RDWR_MPT_VIRT_END - RDWR_MPT_VIRT_START) >> L3_PAGETABLE_SHIFT);
158 /* Set up linear page table mapping. */
159 idle_pg_table[l4_table_offset(LINEAR_PT_VIRT_START)] =
160 mk_l4_pgentry(__pa(idle_pg_table) | __PAGE_HYPERVISOR);
161 }
163 void __init zap_low_mappings(void)
164 {
165 idle_pg_table[0] = mk_l4_pgentry(0);
166 flush_tlb_all_pge();
167 }
169 void subarch_init_memory(struct domain *dom_xen)
170 {
171 unsigned long i, v, m2p_start_mfn;
172 l3_pgentry_t l3e;
173 l2_pgentry_t l2e;
175 /*
176 * We are rather picky about the layout of 'struct pfn_info'. The
177 * count_info and domain fields must be adjacent, as we perform atomic
178 * 64-bit operations on them.
179 */
180 if ( (offsetof(struct pfn_info, u.inuse._domain) !=
181 (offsetof(struct pfn_info, count_info) + sizeof(u32))) )
182 {
183 printk("Weird pfn_info layout (%ld,%ld,%d)\n",
184 offsetof(struct pfn_info, count_info),
185 offsetof(struct pfn_info, u.inuse._domain),
186 sizeof(struct pfn_info));
187 for ( ; ; ) ;
188 }
190 /* M2P table is mappable read-only by privileged domains. */
191 for ( v = RDWR_MPT_VIRT_START;
192 v != RDWR_MPT_VIRT_END;
193 v += 1 << L2_PAGETABLE_SHIFT )
194 {
195 l3e = l4_pgentry_to_l3(idle_pg_table[l4_table_offset(v)])[
196 l3_table_offset(v)];
197 if ( !(l3_pgentry_val(l3e) & _PAGE_PRESENT) )
198 continue;
199 l2e = l3_pgentry_to_l2(l3e)[l2_table_offset(v)];
200 if ( !(l2_pgentry_val(l2e) & _PAGE_PRESENT) )
201 continue;
202 m2p_start_mfn = l2_pgentry_to_pfn(l2e);
204 for ( i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++ )
205 {
206 frame_table[m2p_start_mfn+i].count_info = PGC_allocated | 1;
207 /* gdt to make sure it's only mapped read-only by non-privileged
208 domains. */
209 frame_table[m2p_start_mfn+i].u.inuse.type_info = PGT_gdt_page | 1;
210 page_set_owner(&frame_table[m2p_start_mfn+i], dom_xen);
211 }
212 }
213 }
215 /*
216 * Allows shooting down of borrowed page-table use on specific CPUs.
217 * Specifically, we borrow page tables when running the idle domain.
218 */
219 static void __synchronise_pagetables(void *mask)
220 {
221 struct exec_domain *ed = current;
222 if ( ((unsigned long)mask & (1 << ed->processor)) &&
223 is_idle_task(ed->domain) )
224 write_ptbase(ed);
225 }
226 void synchronise_pagetables(unsigned long cpu_mask)
227 {
228 __synchronise_pagetables((void *)cpu_mask);
229 smp_call_function(__synchronise_pagetables, (void *)cpu_mask, 1, 1);
230 }
232 long do_stack_switch(unsigned long ss, unsigned long esp)
233 {
234 if ( (ss & 3) != 3 )
235 return -EPERM;
236 current->arch.guestos_ss = ss;
237 current->arch.guestos_sp = esp;
238 return 0;
239 }
242 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
243 int check_descriptor(struct desc_struct *d)
244 {
245 u32 a = d->a, b = d->b;
247 /* A not-present descriptor will always fault, so is safe. */
248 if ( !(b & _SEGMENT_P) )
249 goto good;
251 /* The guest can only safely be executed in ring 3. */
252 if ( (b & _SEGMENT_DPL) != 3 )
253 goto bad;
255 /* Any code or data segment is okay. No base/limit checking. */
256 if ( (b & _SEGMENT_S) )
257 goto good;
259 /* Invalid type 0 is harmless. It is used for 2nd half of a call gate. */
260 if ( (b & _SEGMENT_TYPE) == 0x000 )
261 goto good;
263 /* Everything but a call gate is discarded here. */
264 if ( (b & _SEGMENT_TYPE) != 0xc00 )
265 goto bad;
267 /* Can't allow far jump to a Xen-private segment. */
268 if ( !VALID_CODESEL(a>>16) )
269 goto bad;
271 /* Reserved bits must be zero. */
272 if ( (b & 0xe0) != 0 )
273 goto bad;
275 good:
276 return 1;
277 bad:
278 return 0;
279 }
282 #ifdef MEMORY_GUARD
284 #define ALLOC_PT(_level) \
285 do { \
286 (_level) = (_level ## _pgentry_t *)heap_start; \
287 heap_start = (void *)((unsigned long)heap_start + PAGE_SIZE); \
288 clear_page(_level); \
289 } while ( 0 )
290 void *memguard_init(void *heap_start)
291 {
292 l1_pgentry_t *l1 = NULL;
293 l2_pgentry_t *l2 = NULL;
294 l3_pgentry_t *l3 = NULL;
295 l4_pgentry_t *l4 = &idle_pg_table[l4_table_offset(PAGE_OFFSET)];
296 unsigned long i, j;
298 /* Round the allocation pointer up to a page boundary. */
299 heap_start = (void *)(((unsigned long)heap_start + (PAGE_SIZE-1)) &
300 PAGE_MASK);
302 /* Memory guarding is incompatible with super pages. */
303 for ( i = 0; i < (xenheap_phys_end >> L2_PAGETABLE_SHIFT); i++ )
304 {
305 ALLOC_PT(l1);
306 for ( j = 0; j < ENTRIES_PER_L1_PAGETABLE; j++ )
307 l1[j] = mk_l1_pgentry((i << L2_PAGETABLE_SHIFT) |
308 (j << L1_PAGETABLE_SHIFT) |
309 __PAGE_HYPERVISOR);
310 if ( !((unsigned long)l2 & (PAGE_SIZE-1)) )
311 {
312 ALLOC_PT(l2);
313 if ( !((unsigned long)l3 & (PAGE_SIZE-1)) )
314 {
315 ALLOC_PT(l3);
316 *l4++ = mk_l4_pgentry(virt_to_phys(l3) | __PAGE_HYPERVISOR);
317 }
318 *l3++ = mk_l3_pgentry(virt_to_phys(l2) | __PAGE_HYPERVISOR);
319 }
320 *l2++ = mk_l2_pgentry(virt_to_phys(l1) | __PAGE_HYPERVISOR);
321 }
323 return heap_start;
324 }
326 static void __memguard_change_range(void *p, unsigned long l, int guard)
327 {
328 l1_pgentry_t *l1;
329 l2_pgentry_t *l2;
330 l3_pgentry_t *l3;
331 l4_pgentry_t *l4;
332 unsigned long _p = (unsigned long)p;
333 unsigned long _l = (unsigned long)l;
335 /* Ensure we are dealing with a page-aligned whole number of pages. */
336 ASSERT((_p&PAGE_MASK) != 0);
337 ASSERT((_l&PAGE_MASK) != 0);
338 ASSERT((_p&~PAGE_MASK) == 0);
339 ASSERT((_l&~PAGE_MASK) == 0);
341 while ( _l != 0 )
342 {
343 l4 = &idle_pg_table[l4_table_offset(_p)];
344 l3 = l4_pgentry_to_l3(*l4) + l3_table_offset(_p);
345 l2 = l3_pgentry_to_l2(*l3) + l2_table_offset(_p);
346 l1 = l2_pgentry_to_l1(*l2) + l1_table_offset(_p);
347 if ( guard )
348 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) & ~_PAGE_PRESENT);
349 else
350 *l1 = mk_l1_pgentry(l1_pgentry_val(*l1) | _PAGE_PRESENT);
351 _p += PAGE_SIZE;
352 _l -= PAGE_SIZE;
353 }
354 }
356 void memguard_guard_stack(void *p)
357 {
358 p = (void *)((unsigned long)p + PAGE_SIZE);
359 memguard_guard_range(p, 2 * PAGE_SIZE);
360 }
362 void memguard_guard_range(void *p, unsigned long l)
363 {
364 __memguard_change_range(p, l, 1);
365 local_flush_tlb();
366 }
368 void memguard_unguard_range(void *p, unsigned long l)
369 {
370 __memguard_change_range(p, l, 0);
371 }
373 #endif