debuggers.hg

view xen/arch/x86/x86_32/mm.c @ 22906:700ac6445812

Now add KDB to the non-kdb tree
author Mukesh Rathor
date Thu Feb 03 15:42:41 2011 -0800 (2011-02-03)
parents 5a224e101cb3
children
line source
1 /******************************************************************************
2 * arch/x86/x86_32/mm.c
3 *
4 * Modifications to Linux original are copyright (c) 2004, K A Fraser
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 */
21 #include <xen/config.h>
22 #include <xen/lib.h>
23 #include <xen/init.h>
24 #include <xen/mm.h>
25 #include <xen/sched.h>
26 #include <xen/guest_access.h>
27 #include <asm/current.h>
28 #include <asm/page.h>
29 #include <asm/flushtlb.h>
30 #include <asm/fixmap.h>
31 #include <asm/setup.h>
32 #include <public/memory.h>
34 l2_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
35 idle_pg_table_l2[4 * L2_PAGETABLE_ENTRIES];
37 extern l1_pgentry_t l1_identmap[L1_PAGETABLE_ENTRIES];
39 unsigned int __read_mostly PAGE_HYPERVISOR = __PAGE_HYPERVISOR;
40 unsigned int __read_mostly PAGE_HYPERVISOR_NOCACHE = __PAGE_HYPERVISOR_NOCACHE;
42 static unsigned long __read_mostly mpt_size;
44 void *alloc_xen_pagetable(void)
45 {
46 unsigned long mfn;
48 if ( !early_boot )
49 {
50 void *v = alloc_xenheap_page();
51 BUG_ON(v == NULL);
52 return v;
53 }
55 mfn = xenheap_initial_phys_start >> PAGE_SHIFT;
56 xenheap_initial_phys_start += PAGE_SIZE;
57 return mfn_to_virt(mfn);
58 }
60 l2_pgentry_t *virt_to_xen_l2e(unsigned long v)
61 {
62 return &idle_pg_table_l2[l2_linear_offset(v)];
63 }
65 void *do_page_walk(struct vcpu *v, unsigned long addr)
66 {
67 return NULL;
68 }
70 void __init paging_init(void)
71 {
72 unsigned long v;
73 struct page_info *pg;
74 unsigned int i, n;
76 if ( cpu_has_pge )
77 {
78 /* Suitable Xen mapping can be GLOBAL. */
79 set_in_cr4(X86_CR4_PGE);
80 PAGE_HYPERVISOR |= _PAGE_GLOBAL;
81 PAGE_HYPERVISOR_NOCACHE |= _PAGE_GLOBAL;
82 /* Transform early mappings (e.g., the frametable). */
83 for ( v = HYPERVISOR_VIRT_START; v; v += (1 << L2_PAGETABLE_SHIFT) )
84 if ( (l2e_get_flags(idle_pg_table_l2[l2_linear_offset(v)]) &
85 (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT) )
86 l2e_add_flags(idle_pg_table_l2[l2_linear_offset(v)],
87 _PAGE_GLOBAL);
88 for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
89 l1e_add_flags(l1_identmap[i], _PAGE_GLOBAL);
90 }
92 /*
93 * Allocate and map the machine-to-phys table and create read-only mapping
94 * of MPT for guest-OS use.
95 */
96 mpt_size = (max_page * BYTES_PER_LONG) + (1UL << L2_PAGETABLE_SHIFT) - 1;
97 mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
98 #define MFN(x) (((x) << L2_PAGETABLE_SHIFT) / sizeof(unsigned long))
99 #define CNT ((sizeof(*frame_table) & -sizeof(*frame_table)) / \
100 sizeof(*machine_to_phys_mapping))
101 BUILD_BUG_ON((sizeof(*frame_table) & ~sizeof(*frame_table)) % \
102 sizeof(*machine_to_phys_mapping));
103 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
104 {
105 for ( n = 0; n < CNT; ++n)
106 if ( mfn_valid(MFN(i) + n * PDX_GROUP_COUNT) )
107 break;
108 if ( n == CNT )
109 continue;
110 if ( (pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
111 panic("Not enough memory to bootstrap Xen.\n");
112 l2e_write(&idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i],
113 l2e_from_page(pg, PAGE_HYPERVISOR | _PAGE_PSE));
114 /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
115 l2e_write(&idle_pg_table_l2[l2_linear_offset(RO_MPT_VIRT_START) + i],
116 l2e_from_page(
117 pg, (__PAGE_HYPERVISOR | _PAGE_PSE) & ~_PAGE_RW));
118 /* Fill with an obvious debug pattern. */
119 memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)), 0x55,
120 1UL << L2_PAGETABLE_SHIFT);
121 }
122 #undef CNT
123 #undef MFN
125 /* Create page tables for ioremap()/map_domain_page_global(). */
126 for ( i = 0; i < (IOREMAP_MBYTES >> (L2_PAGETABLE_SHIFT - 20)); i++ )
127 {
128 void *p;
129 l2_pgentry_t *pl2e;
130 pl2e = &idle_pg_table_l2[l2_linear_offset(IOREMAP_VIRT_START) + i];
131 if ( l2e_get_flags(*pl2e) & _PAGE_PRESENT )
132 continue;
133 p = alloc_xenheap_page();
134 clear_page(p);
135 l2e_write(pl2e, l2e_from_page(virt_to_page(p), __PAGE_HYPERVISOR));
136 }
137 }
139 void __init setup_idle_pagetable(void)
140 {
141 int i;
143 for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
144 l2e_write(&idle_pg_table_l2[l2_linear_offset(PERDOMAIN_VIRT_START)+i],
145 l2e_from_page(virt_to_page(idle_vcpu[0]->domain->
146 arch.mm_perdomain_pt) + i,
147 __PAGE_HYPERVISOR));
148 }
150 void __init zap_low_mappings(l2_pgentry_t *dom0_l2)
151 {
152 int i;
154 /* Clear temporary idle mappings from the dom0 initial l2. */
155 for ( i = 0; i < (HYPERVISOR_VIRT_START >> L2_PAGETABLE_SHIFT); i++ )
156 if ( l2e_get_intpte(dom0_l2[i]) ==
157 l2e_get_intpte(idle_pg_table_l2[i]) )
158 l2e_write(&dom0_l2[i], l2e_empty());
160 /* Now zap mappings in the idle pagetables. */
161 BUG_ON(l2e_get_pfn(idle_pg_table_l2[0]) != virt_to_mfn(l1_identmap));
162 l2e_write_atomic(&idle_pg_table_l2[0], l2e_empty());
163 destroy_xen_mappings(0, HYPERVISOR_VIRT_START);
165 flush_all(FLUSH_TLB_GLOBAL);
167 /* Replace with mapping of the boot trampoline only. */
168 map_pages_to_xen(BOOT_TRAMPOLINE, BOOT_TRAMPOLINE >> PAGE_SHIFT,
169 0x10, __PAGE_HYPERVISOR);
170 }
172 void __init subarch_init_memory(void)
173 {
174 unsigned long m2p_start_mfn;
175 unsigned int i, j;
176 l2_pgentry_t l2e;
178 BUILD_BUG_ON(sizeof(struct page_info) != 24);
180 /* M2P table is mappable read-only by privileged domains. */
181 for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
182 {
183 l2e = idle_pg_table_l2[l2_linear_offset(RDWR_MPT_VIRT_START) + i];
184 if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
185 continue;
186 m2p_start_mfn = l2e_get_pfn(l2e);
187 for ( j = 0; j < L2_PAGETABLE_ENTRIES; j++ )
188 {
189 struct page_info *page = mfn_to_page(m2p_start_mfn + j);
190 share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
191 }
192 }
194 if ( supervisor_mode_kernel )
195 {
196 /* Guest kernel runs in ring 0, not ring 1. */
197 struct desc_struct *d;
198 d = &boot_cpu_gdt_table[(FLAT_RING1_CS >> 3) - FIRST_RESERVED_GDT_ENTRY];
199 d[0].b &= ~_SEGMENT_DPL;
200 d[1].b &= ~_SEGMENT_DPL;
201 }
202 }
204 long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
205 {
206 struct xen_machphys_mfn_list xmml;
207 unsigned long mfn, last_mfn;
208 unsigned int i, max;
209 l2_pgentry_t l2e;
210 long rc = 0;
212 switch ( op )
213 {
214 case XENMEM_machphys_mfn_list:
215 if ( copy_from_guest(&xmml, arg, 1) )
216 return -EFAULT;
218 max = min_t(unsigned int, xmml.max_extents, mpt_size >> 21);
220 for ( i = 0, last_mfn = 0; i < max; i++ )
221 {
222 l2e = idle_pg_table_l2[l2_linear_offset(
223 RDWR_MPT_VIRT_START + (i << 21))];
224 if ( l2e_get_flags(l2e) & _PAGE_PRESENT )
225 mfn = l2e_get_pfn(l2e);
226 else
227 mfn = last_mfn;
228 ASSERT(mfn);
229 if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
230 return -EFAULT;
231 last_mfn = mfn;
232 }
234 xmml.nr_extents = i;
235 if ( copy_to_guest(arg, &xmml, 1) )
236 return -EFAULT;
238 break;
240 default:
241 rc = -ENOSYS;
242 break;
243 }
245 return rc;
246 }
248 long do_stack_switch(unsigned long ss, unsigned long esp)
249 {
250 struct tss_struct *t = &this_cpu(init_tss);
252 fixup_guest_stack_selector(current->domain, ss);
254 current->arch.guest_context.kernel_ss = ss;
255 current->arch.guest_context.kernel_sp = esp;
256 t->ss1 = ss;
257 t->esp1 = esp;
259 return 0;
260 }
262 /* Returns TRUE if given descriptor is valid for GDT or LDT. */
263 int check_descriptor(const struct domain *dom, struct desc_struct *d)
264 {
265 unsigned long base, limit;
266 u32 a = d->a, b = d->b;
267 u16 cs;
269 /* Let a ring0 guest kernel set any descriptor it wants to. */
270 if ( supervisor_mode_kernel )
271 return 1;
273 /* A not-present descriptor will always fault, so is safe. */
274 if ( !(b & _SEGMENT_P) )
275 goto good;
277 /*
278 * We don't allow a DPL of zero. There is no legitimate reason for
279 * specifying DPL==0, and it gets rather dangerous if we also accept call
280 * gates (consider a call gate pointing at another kernel descriptor with
281 * DPL 0 -- this would get the OS ring-0 privileges).
282 */
283 if ( (b & _SEGMENT_DPL) < (GUEST_KERNEL_RPL(dom) << 13) )
284 d->b = b = (b & ~_SEGMENT_DPL) | (GUEST_KERNEL_RPL(dom) << 13);
286 if ( !(b & _SEGMENT_S) )
287 {
288 /*
289 * System segment:
290 * 1. Don't allow interrupt or trap gates as they belong in the IDT.
291 * 2. Don't allow TSS descriptors or task gates as we don't
292 * virtualise x86 tasks.
293 * 3. Don't allow LDT descriptors because they're unnecessary and
294 * I'm uneasy about allowing an LDT page to contain LDT
295 * descriptors. In any case, Xen automatically creates the
296 * required descriptor when reloading the LDT register.
297 * 4. We allow call gates but they must not jump to a private segment.
298 */
300 /* Disallow everything but call gates. */
301 if ( (b & _SEGMENT_TYPE) != 0xc00 )
302 goto bad;
304 /* Validate and fix up the target code selector. */
305 cs = a >> 16;
306 fixup_guest_code_selector(dom, cs);
307 if ( !guest_gate_selector_okay(dom, cs) )
308 goto bad;
309 a = d->a = (d->a & 0xffffU) | (cs << 16);
311 /* Reserved bits must be zero. */
312 if ( (b & 0xe0) != 0 )
313 goto bad;
315 /* No base/limit check is needed for a call gate. */
316 goto good;
317 }
319 /* Check that base is at least a page away from Xen-private area. */
320 base = (b&(0xff<<24)) | ((b&0xff)<<16) | (a>>16);
321 if ( base >= (GUEST_SEGMENT_MAX_ADDR - PAGE_SIZE) )
322 goto bad;
324 /* Check and truncate the limit if necessary. */
325 limit = (b&0xf0000) | (a&0xffff);
326 limit++; /* We add one because limit is inclusive. */
327 if ( (b & _SEGMENT_G) )
328 limit <<= 12;
330 if ( (b & (_SEGMENT_CODE | _SEGMENT_EC)) == _SEGMENT_EC )
331 {
332 /*
333 * DATA, GROWS-DOWN.
334 * Grows-down limit check.
335 * NB. limit == 0xFFFFF provides no access (if G=1).
336 * limit == 0x00000 provides 4GB-4kB access (if G=1).
337 */
338 if ( (base + limit) > base )
339 {
340 limit = -(base & PAGE_MASK);
341 goto truncate;
342 }
343 }
344 else
345 {
346 /*
347 * DATA, GROWS-UP.
348 * CODE (CONFORMING AND NON-CONFORMING).
349 * Grows-up limit check.
350 * NB. limit == 0xFFFFF provides 4GB access (if G=1).
351 * limit == 0x00000 provides 4kB access (if G=1).
352 */
353 if ( ((base + limit) <= base) ||
354 ((base + limit) > GUEST_SEGMENT_MAX_ADDR) )
355 {
356 limit = GUEST_SEGMENT_MAX_ADDR - base;
357 truncate:
358 if ( !(b & _SEGMENT_G) )
359 goto bad; /* too dangerous; too hard to work out... */
360 limit = (limit >> 12) - 1;
361 d->a &= ~0x0ffff; d->a |= limit & 0x0ffff;
362 d->b &= ~0xf0000; d->b |= limit & 0xf0000;
363 }
364 }
366 good:
367 return 1;
368 bad:
369 return 0;
370 }
372 /*
373 * Local variables:
374 * mode: C
375 * c-set-style: "BSD"
376 * c-basic-offset: 4
377 * tab-width: 4
378 * indent-tabs-mode: nil
379 * End:
380 */