debuggers.hg

view xen/arch/x86/domain.c @ 3726:88957a238191

bitkeeper revision 1.1159.1.544 (4207248crq3YxiyLWjUehtHv_Yd3tg)

Merge tempest.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xeno.bk
into tempest.cl.cam.ac.uk:/local/scratch/smh22/xen-unstable.bk
author smh22@tempest.cl.cam.ac.uk
date Mon Feb 07 08:19:24 2005 +0000 (2005-02-07)
parents 393483ae9f62 d93748c50893
children f5f2757b3aa2
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /******************************************************************************
3 * arch/x86/domain.c
4 *
5 * x86-specific domain handling (e.g., register setup and context switching).
6 */
8 /*
9 * Copyright (C) 1995 Linus Torvalds
10 *
11 * Pentium III FXSR, SSE support
12 * Gareth Hughes <gareth@valinux.com>, May 2000
13 */
15 #include <xen/config.h>
16 #include <xen/init.h>
17 #include <xen/lib.h>
18 #include <xen/errno.h>
19 #include <xen/sched.h>
20 #include <xen/smp.h>
21 #include <xen/delay.h>
22 #include <xen/softirq.h>
23 #include <asm/regs.h>
24 #include <asm/mc146818rtc.h>
25 #include <asm/system.h>
26 #include <asm/io.h>
27 #include <asm/processor.h>
28 #include <asm/desc.h>
29 #include <asm/i387.h>
30 #include <asm/mpspec.h>
31 #include <asm/ldt.h>
32 #include <xen/irq.h>
33 #include <xen/event.h>
34 #include <asm/shadow.h>
35 #include <xen/console.h>
36 #include <xen/elf.h>
37 #include <asm/vmx.h>
38 #include <asm/vmx_vmcs.h>
39 #include <xen/kernel.h>
40 #include <public/io/ioreq.h>
41 #include <xen/multicall.h>
43 /* opt_noreboot: If true, machine will need manual reset on error. */
44 static int opt_noreboot = 0;
45 boolean_param("noreboot", opt_noreboot);
47 static void default_idle(void)
48 {
49 __cli();
50 if ( !softirq_pending(smp_processor_id()) )
51 safe_halt();
52 else
53 __sti();
54 }
56 static __attribute_used__ void idle_loop(void)
57 {
58 int cpu = smp_processor_id();
59 for ( ; ; )
60 {
61 irq_stat[cpu].idle_timestamp = jiffies;
62 while ( !softirq_pending(cpu) )
63 default_idle();
64 do_softirq();
65 }
66 }
68 void startup_cpu_idle_loop(void)
69 {
70 /* Just some sanity to ensure that the scheduler is set up okay. */
71 ASSERT(current->domain->id == IDLE_DOMAIN_ID);
72 domain_unpause_by_systemcontroller(current->domain);
73 __enter_scheduler();
75 /*
76 * Declares CPU setup done to the boot processor.
77 * Therefore memory barrier to ensure state is visible.
78 */
79 smp_mb();
80 init_idle();
82 idle_loop();
83 }
85 static long no_idt[2];
86 static int reboot_mode;
87 int reboot_thru_bios = 0;
89 #ifdef CONFIG_SMP
90 int reboot_smp = 0;
91 static int reboot_cpu = -1;
92 /* shamelessly grabbed from lib/vsprintf.c for readability */
93 #define is_digit(c) ((c) >= '0' && (c) <= '9')
94 #endif
97 static inline void kb_wait(void)
98 {
99 int i;
101 for (i=0; i<0x10000; i++)
102 if ((inb_p(0x64) & 0x02) == 0)
103 break;
104 }
107 void machine_restart(char * __unused)
108 {
109 #ifdef CONFIG_SMP
110 int cpuid;
111 #endif
113 if ( opt_noreboot )
114 {
115 printk("Reboot disabled on cmdline: require manual reset\n");
116 for ( ; ; ) __asm__ __volatile__ ("hlt");
117 }
119 #ifdef CONFIG_SMP
120 cpuid = GET_APIC_ID(apic_read(APIC_ID));
122 /* KAF: Need interrupts enabled for safe IPI. */
123 __sti();
125 if (reboot_smp) {
127 /* check to see if reboot_cpu is valid
128 if its not, default to the BSP */
129 if ((reboot_cpu == -1) ||
130 (reboot_cpu > (NR_CPUS -1)) ||
131 !(phys_cpu_present_map & (1<<cpuid)))
132 reboot_cpu = boot_cpu_physical_apicid;
134 reboot_smp = 0; /* use this as a flag to only go through this once*/
135 /* re-run this function on the other CPUs
136 it will fall though this section since we have
137 cleared reboot_smp, and do the reboot if it is the
138 correct CPU, otherwise it halts. */
139 if (reboot_cpu != cpuid)
140 smp_call_function((void *)machine_restart , NULL, 1, 0);
141 }
143 /* if reboot_cpu is still -1, then we want a tradional reboot,
144 and if we are not running on the reboot_cpu,, halt */
145 if ((reboot_cpu != -1) && (cpuid != reboot_cpu)) {
146 for (;;)
147 __asm__ __volatile__ ("hlt");
148 }
149 /*
150 * Stop all CPUs and turn off local APICs and the IO-APIC, so
151 * other OSs see a clean IRQ state.
152 */
153 smp_send_stop();
154 disable_IO_APIC();
155 #endif
156 #ifdef CONFIG_VMX
157 stop_vmx();
158 #endif
160 if(!reboot_thru_bios) {
161 /* rebooting needs to touch the page at absolute addr 0 */
162 *((unsigned short *)__va(0x472)) = reboot_mode;
163 for (;;) {
164 int i;
165 for (i=0; i<100; i++) {
166 kb_wait();
167 udelay(50);
168 outb(0xfe,0x64); /* pulse reset low */
169 udelay(50);
170 }
171 /* That didn't work - force a triple fault.. */
172 __asm__ __volatile__("lidt %0": "=m" (no_idt));
173 __asm__ __volatile__("int3");
174 }
175 }
177 panic("Need to reinclude BIOS reboot code\n");
178 }
181 void __attribute__((noreturn)) __machine_halt(void *unused)
182 {
183 for ( ; ; )
184 __asm__ __volatile__ ( "cli; hlt" );
185 }
187 void machine_halt(void)
188 {
189 smp_call_function(__machine_halt, NULL, 1, 1);
190 __machine_halt(NULL);
191 }
193 void dump_pageframe_info(struct domain *d)
194 {
195 struct pfn_info *page;
197 if ( d->tot_pages < 10 )
198 {
199 list_for_each_entry ( page, &d->page_list, list )
200 {
201 printk("Page %08x: caf=%08x, taf=%08x\n",
202 page_to_phys(page), page->count_info,
203 page->u.inuse.type_info);
204 }
205 }
207 page = virt_to_page(d->shared_info);
208 printk("Shared_info@%08x: caf=%08x, taf=%08x\n",
209 page_to_phys(page), page->count_info,
210 page->u.inuse.type_info);
211 }
213 struct domain *arch_alloc_domain_struct(void)
214 {
215 return xmalloc(struct domain);
216 }
218 void arch_free_domain_struct(struct domain *d)
219 {
220 xfree(d);
221 }
223 struct exec_domain *arch_alloc_exec_domain_struct(void)
224 {
225 return xmalloc(struct exec_domain);
226 }
228 void arch_free_exec_domain_struct(struct exec_domain *ed)
229 {
230 xfree(ed);
231 }
233 void free_perdomain_pt(struct domain *d)
234 {
235 free_xenheap_page((unsigned long)d->arch.mm_perdomain_pt);
236 }
238 static void continue_idle_task(struct exec_domain *ed)
239 {
240 reset_stack_and_jump(idle_loop);
241 }
243 static void continue_nonidle_task(struct exec_domain *ed)
244 {
245 reset_stack_and_jump(ret_from_intr);
246 }
248 void arch_do_createdomain(struct exec_domain *ed)
249 {
250 struct domain *d = ed->domain;
252 SET_DEFAULT_FAST_TRAP(&ed->arch);
254 if ( d->id == IDLE_DOMAIN_ID )
255 {
256 ed->arch.schedule_tail = continue_idle_task;
257 }
258 else
259 {
260 ed->arch.schedule_tail = continue_nonidle_task;
262 d->shared_info = (void *)alloc_xenheap_page();
263 memset(d->shared_info, 0, PAGE_SIZE);
264 ed->vcpu_info = &d->shared_info->vcpu_data[ed->eid];
265 SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
266 machine_to_phys_mapping[virt_to_phys(d->shared_info) >>
267 PAGE_SHIFT] = INVALID_P2M_ENTRY;
269 d->arch.mm_perdomain_pt = (l1_pgentry_t *)alloc_xenheap_page();
270 memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE);
271 machine_to_phys_mapping[virt_to_phys(d->arch.mm_perdomain_pt) >>
272 PAGE_SHIFT] = INVALID_P2M_ENTRY;
273 ed->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
274 }
275 }
277 void arch_do_boot_vcpu(struct exec_domain *ed)
278 {
279 struct domain *d = ed->domain;
280 ed->arch.schedule_tail = d->exec_domain[0]->arch.schedule_tail;
281 ed->arch.perdomain_ptes =
282 d->arch.mm_perdomain_pt + (ed->eid << PDPT_VCPU_SHIFT);
283 }
285 #ifdef CONFIG_VMX
286 void arch_vmx_do_resume(struct exec_domain *ed)
287 {
288 u64 vmcs_phys_ptr = (u64) virt_to_phys(ed->arch.arch_vmx.vmcs);
290 load_vmcs(&ed->arch.arch_vmx, vmcs_phys_ptr);
291 vmx_do_resume(ed);
292 reset_stack_and_jump(vmx_asm_do_resume);
293 }
295 void arch_vmx_do_launch(struct exec_domain *ed)
296 {
297 u64 vmcs_phys_ptr = (u64) virt_to_phys(ed->arch.arch_vmx.vmcs);
299 load_vmcs(&ed->arch.arch_vmx, vmcs_phys_ptr);
300 vmx_do_launch(ed);
301 reset_stack_and_jump(vmx_asm_do_launch);
302 }
304 static void monitor_mk_pagetable(struct exec_domain *ed)
305 {
306 unsigned long mpfn;
307 l2_pgentry_t *mpl2e;
308 struct pfn_info *mpfn_info;
309 struct domain *d = ed->domain;
311 mpfn_info = alloc_domheap_page(NULL);
312 ASSERT( mpfn_info );
314 mpfn = (unsigned long) (mpfn_info - frame_table);
315 mpl2e = (l2_pgentry_t *) map_domain_mem(mpfn << L1_PAGETABLE_SHIFT);
316 memset(mpl2e, 0, PAGE_SIZE);
318 memcpy(&mpl2e[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
319 &idle_pg_table[DOMAIN_ENTRIES_PER_L2_PAGETABLE],
320 HYPERVISOR_ENTRIES_PER_L2_PAGETABLE * sizeof(l2_pgentry_t));
322 ed->arch.monitor_table = mk_pagetable(mpfn << L1_PAGETABLE_SHIFT);
323 d->arch.shadow_mode = SHM_full_32;
325 mpl2e[PERDOMAIN_VIRT_START >> L2_PAGETABLE_SHIFT] =
326 mk_l2_pgentry((__pa(d->arch.mm_perdomain_pt) & PAGE_MASK)
327 | __PAGE_HYPERVISOR);
329 unmap_domain_mem(mpl2e);
330 }
332 /*
333 * Free the pages for monitor_table and guest_pl2e_cache
334 */
335 static void monitor_rm_pagetable(struct exec_domain *ed)
336 {
337 l2_pgentry_t *mpl2e;
338 unsigned long mpfn;
340 ASSERT( pagetable_val(ed->arch.monitor_table) );
342 mpl2e = (l2_pgentry_t *) map_domain_mem(pagetable_val(ed->arch.monitor_table));
343 /*
344 * First get the pfn for guest_pl2e_cache by looking at monitor_table
345 */
346 mpfn = l2_pgentry_val(mpl2e[LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT])
347 >> PAGE_SHIFT;
349 free_domheap_page(&frame_table[mpfn]);
350 unmap_domain_mem(mpl2e);
352 /*
353 * Then free monitor_table.
354 */
355 mpfn = (pagetable_val(ed->arch.monitor_table)) >> PAGE_SHIFT;
356 free_domheap_page(&frame_table[mpfn]);
358 ed->arch.monitor_table = mk_pagetable(0);
359 }
361 static int vmx_final_setup_guestos(struct exec_domain *ed,
362 full_execution_context_t *full_context)
363 {
364 int error;
365 execution_context_t *context;
366 struct vmcs_struct *vmcs;
368 context = &full_context->cpu_ctxt;
370 /*
371 * Create a new VMCS
372 */
373 if (!(vmcs = alloc_vmcs())) {
374 printk("Failed to create a new VMCS\n");
375 return -ENOMEM;
376 }
378 memset(&ed->arch.arch_vmx, 0, sizeof (struct arch_vmx_struct));
380 ed->arch.arch_vmx.vmcs = vmcs;
381 error = construct_vmcs(&ed->arch.arch_vmx, context, full_context, VMCS_USE_HOST_ENV);
382 if (error < 0) {
383 printk("Failed to construct a new VMCS\n");
384 goto out;
385 }
387 monitor_mk_pagetable(ed);
388 ed->arch.schedule_tail = arch_vmx_do_launch;
389 clear_bit(VMX_CPU_STATE_PG_ENABLED, &ed->arch.arch_vmx.cpu_state);
391 #if defined (__i386)
392 ed->arch.arch_vmx.vmx_platform.real_mode_data =
393 (unsigned long *) context->esi;
394 #endif
396 if (ed == ed->domain->exec_domain[0]) {
397 /*
398 * Required to do this once per domain
399 */
400 memset(&ed->domain->shared_info->evtchn_mask[0], 0xff,
401 sizeof(ed->domain->shared_info->evtchn_mask));
402 clear_bit(IOPACKET_PORT, &ed->domain->shared_info->evtchn_mask[0]);
403 }
405 return 0;
407 out:
408 free_vmcs(vmcs);
409 ed->arch.arch_vmx.vmcs = 0;
410 return error;
411 }
412 #endif
414 int arch_final_setup_guestos(
415 struct exec_domain *d, full_execution_context_t *c)
416 {
417 unsigned long phys_basetab;
418 int i, rc;
420 clear_bit(EDF_DONEFPUINIT, &d->ed_flags);
421 if ( c->flags & ECF_I387_VALID )
422 set_bit(EDF_DONEFPUINIT, &d->ed_flags);
424 memcpy(&d->arch.user_ctxt,
425 &c->cpu_ctxt,
426 sizeof(d->arch.user_ctxt));
428 /* Clear IOPL for unprivileged domains. */
429 if (!IS_PRIV(d->domain))
430 d->arch.user_ctxt.eflags &= 0xffffcfff;
432 /*
433 * This is sufficient! If the descriptor DPL differs from CS RPL then we'll
434 * #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically.
435 * If SS RPL or DPL differs from CS RPL then we'll #GP.
436 */
437 if (!(c->flags & ECF_VMX_GUEST))
438 if ( ((d->arch.user_ctxt.cs & 3) == 0) ||
439 ((d->arch.user_ctxt.ss & 3) == 0) )
440 return -EINVAL;
442 memcpy(&d->arch.i387,
443 &c->fpu_ctxt,
444 sizeof(d->arch.i387));
446 memcpy(d->arch.traps,
447 &c->trap_ctxt,
448 sizeof(d->arch.traps));
450 if ( (rc = (int)set_fast_trap(d, c->fast_trap_idx)) != 0 )
451 return rc;
453 d->arch.ldt_base = c->ldt_base;
454 d->arch.ldt_ents = c->ldt_ents;
456 d->arch.guestos_ss = c->guestos_ss;
457 d->arch.guestos_sp = c->guestos_esp;
459 for ( i = 0; i < 8; i++ )
460 (void)set_debugreg(d, i, c->debugreg[i]);
462 d->arch.event_selector = c->event_callback_cs;
463 d->arch.event_address = c->event_callback_eip;
464 d->arch.failsafe_selector = c->failsafe_callback_cs;
465 d->arch.failsafe_address = c->failsafe_callback_eip;
467 phys_basetab = c->pt_base;
468 d->arch.pagetable = mk_pagetable(phys_basetab);
469 if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d->domain,
470 PGT_base_page_table) )
471 return -EINVAL;
473 /* Failure to set GDT is harmless. */
474 SET_GDT_ENTRIES(d, DEFAULT_GDT_ENTRIES);
475 SET_GDT_ADDRESS(d, DEFAULT_GDT_ADDRESS);
476 if ( c->gdt_ents != 0 )
477 {
478 if ( (rc = (int)set_gdt(d, c->gdt_frames, c->gdt_ents)) != 0 )
479 {
480 put_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT]);
481 return rc;
482 }
483 }
485 #ifdef CONFIG_VMX
486 if (c->flags & ECF_VMX_GUEST)
487 return vmx_final_setup_guestos(d, c);
488 #endif
490 return 0;
491 }
493 void new_thread(struct exec_domain *d,
494 unsigned long start_pc,
495 unsigned long start_stack,
496 unsigned long start_info)
497 {
498 execution_context_t *ec = &d->arch.user_ctxt;
500 /*
501 * Initial register values:
502 * DS,ES,FS,GS = FLAT_GUESTOS_DS
503 * CS:EIP = FLAT_GUESTOS_CS:start_pc
504 * SS:ESP = FLAT_GUESTOS_SS:start_stack
505 * ESI = start_info
506 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
507 */
508 ec->ds = ec->es = ec->fs = ec->gs = FLAT_GUESTOS_DS;
509 ec->ss = FLAT_GUESTOS_SS;
510 ec->cs = FLAT_GUESTOS_CS;
511 ec->eip = start_pc;
512 ec->esp = start_stack;
513 ec->esi = start_info;
515 __save_flags(ec->eflags);
516 ec->eflags |= X86_EFLAGS_IF;
517 }
520 /*
521 * This special macro can be used to load a debugging register
522 */
523 #define loaddebug(_ed,_reg) \
524 __asm__("mov %0,%%db" #_reg \
525 : /* no output */ \
526 :"r" ((_ed)->debugreg[_reg]))
528 void switch_to(struct exec_domain *prev_p, struct exec_domain *next_p)
529 {
530 struct tss_struct *tss = init_tss + smp_processor_id();
531 execution_context_t *stack_ec = get_execution_context();
532 int i;
533 #ifdef CONFIG_VMX
534 unsigned long vmx_domain = next_p->arch.arch_vmx.flags;
535 #endif
537 __cli();
539 /* Switch guest general-register state. */
540 if ( !is_idle_task(prev_p->domain) )
541 {
542 memcpy(&prev_p->arch.user_ctxt,
543 stack_ec,
544 sizeof(*stack_ec));
545 unlazy_fpu(prev_p);
546 CLEAR_FAST_TRAP(&prev_p->arch);
547 }
549 if ( !is_idle_task(next_p->domain) )
550 {
551 memcpy(stack_ec,
552 &next_p->arch.user_ctxt,
553 sizeof(*stack_ec));
555 /* Maybe switch the debug registers. */
556 if ( unlikely(next_p->arch.debugreg[7]) )
557 {
558 loaddebug(&next_p->arch, 0);
559 loaddebug(&next_p->arch, 1);
560 loaddebug(&next_p->arch, 2);
561 loaddebug(&next_p->arch, 3);
562 /* no 4 and 5 */
563 loaddebug(&next_p->arch, 6);
564 loaddebug(&next_p->arch, 7);
565 }
567 #ifdef CONFIG_VMX
568 if ( vmx_domain )
569 {
570 /* Switch page tables. */
571 write_ptbase(next_p);
573 set_current(next_p);
574 /* Switch GDT and LDT. */
575 __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt));
577 __sti();
578 return;
579 }
580 #endif
582 SET_FAST_TRAP(&next_p->arch);
584 #ifdef __i386__
585 /* Switch the guest OS ring-1 stack. */
586 tss->esp1 = next_p->arch.guestos_sp;
587 tss->ss1 = next_p->arch.guestos_ss;
588 #endif
590 /* Switch page tables. */
591 write_ptbase(next_p);
592 }
594 if ( unlikely(prev_p->arch.io_bitmap != NULL) )
595 {
596 for ( i = 0; i < sizeof(prev_p->arch.io_bitmap_sel) * 8; i++ )
597 if ( !test_bit(i, &prev_p->arch.io_bitmap_sel) )
598 memset(&tss->io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
599 ~0U, IOBMP_BYTES_PER_SELBIT);
600 tss->bitmap = IOBMP_INVALID_OFFSET;
601 }
603 if ( unlikely(next_p->arch.io_bitmap != NULL) )
604 {
605 for ( i = 0; i < sizeof(next_p->arch.io_bitmap_sel) * 8; i++ )
606 if ( !test_bit(i, &next_p->arch.io_bitmap_sel) )
607 memcpy(&tss->io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
608 &next_p->arch.io_bitmap[i * IOBMP_BYTES_PER_SELBIT],
609 IOBMP_BYTES_PER_SELBIT);
610 tss->bitmap = IOBMP_OFFSET;
611 }
613 set_current(next_p);
615 /* Switch GDT and LDT. */
616 __asm__ __volatile__ ("lgdt %0" : "=m" (*next_p->arch.gdt));
617 load_LDT(next_p);
619 __sti();
620 }
623 /* XXX Currently the 'domain' field is ignored! XXX */
624 long do_iopl(domid_t domain, unsigned int new_io_pl)
625 {
626 execution_context_t *ec = get_execution_context();
627 ec->eflags = (ec->eflags & 0xffffcfff) | ((new_io_pl&3) << 12);
628 return 0;
629 }
631 unsigned long hypercall_create_continuation(
632 unsigned int op, unsigned int nr_args, ...)
633 {
634 struct mc_state *mcs = &mc_state[smp_processor_id()];
635 execution_context_t *ec;
636 unsigned long *preg;
637 unsigned int i;
638 va_list args;
640 va_start(args, nr_args);
642 if ( test_bit(_MCSF_in_multicall, &mcs->flags) )
643 {
644 __set_bit(_MCSF_call_preempted, &mcs->flags);
646 for ( i = 0; i < nr_args; i++ )
647 mcs->call.args[i] = va_arg(args, unsigned long);
648 }
649 else
650 {
651 ec = get_execution_context();
652 #if defined(__i386__)
653 ec->eax = op;
654 ec->eip -= 2; /* re-execute 'int 0x82' */
656 for ( i = 0, preg = &ec->ebx; i < nr_args; i++, preg++ )
657 *preg = va_arg(args, unsigned long);
658 #else
659 preg = NULL; /* XXX x86/64 */
660 #endif
661 }
663 va_end(args);
665 return op;
666 }
668 static void relinquish_list(struct domain *d, struct list_head *list)
669 {
670 struct list_head *ent;
671 struct pfn_info *page;
672 unsigned long x, y;
674 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
675 spin_lock_recursive(&d->page_alloc_lock);
677 ent = list->next;
678 while ( ent != list )
679 {
680 page = list_entry(ent, struct pfn_info, list);
682 /* Grab a reference to the page so it won't disappear from under us. */
683 if ( unlikely(!get_page(page, d)) )
684 {
685 /* Couldn't get a reference -- someone is freeing this page. */
686 ent = ent->next;
687 continue;
688 }
690 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
691 put_page_and_type(page);
693 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
694 put_page(page);
696 /*
697 * Forcibly invalidate base page tables at this point to break circular
698 * 'linear page table' references. This is okay because MMU structures
699 * are not shared across domains and this domain is now dead. Thus base
700 * tables are not in use so a non-zero count means circular reference.
701 */
702 y = page->u.inuse.type_info;
703 for ( ; ; )
704 {
705 x = y;
706 if ( likely((x & (PGT_type_mask|PGT_validated)) !=
707 (PGT_base_page_table|PGT_validated)) )
708 break;
710 y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
711 if ( likely(y == x) )
712 {
713 free_page_type(page, PGT_base_page_table);
714 break;
715 }
716 }
718 /* Follow the list chain and /then/ potentially free the page. */
719 ent = ent->next;
720 put_page(page);
721 }
723 spin_unlock_recursive(&d->page_alloc_lock);
724 }
726 #ifdef CONFIG_VMX
727 static void vmx_domain_relinquish_memory(struct exec_domain *ed)
728 {
729 struct domain *d = ed->domain;
731 /*
732 * Free VMCS
733 */
734 ASSERT(ed->arch.arch_vmx.vmcs);
735 free_vmcs(ed->arch.arch_vmx.vmcs);
736 ed->arch.arch_vmx.vmcs = 0;
738 monitor_rm_pagetable(ed);
740 if (ed == d->exec_domain[0]) {
741 int i;
742 unsigned long pfn;
744 for (i = 0; i < ENTRIES_PER_L1_PAGETABLE; i++) {
745 unsigned long l1e;
747 l1e = l1_pgentry_val(d->arch.mm_perdomain_pt[i]);
748 if (l1e & _PAGE_PRESENT) {
749 pfn = l1e >> PAGE_SHIFT;
750 free_domheap_page(&frame_table[pfn]);
751 }
752 }
753 }
755 }
756 #endif
758 void domain_relinquish_memory(struct domain *d)
759 {
760 struct exec_domain *ed;
762 /* Ensure that noone is running over the dead domain's page tables. */
763 synchronise_pagetables(~0UL);
765 /* Exit shadow mode before deconstructing final guest page table. */
766 shadow_mode_disable(d);
768 /* Drop the in-use reference to the page-table base. */
769 for_each_exec_domain ( d, ed )
770 {
771 if ( pagetable_val(ed->arch.pagetable) != 0 )
772 put_page_and_type(&frame_table[pagetable_val(ed->arch.pagetable) >>
773 PAGE_SHIFT]);
774 }
776 #ifdef CONFIG_VMX
777 if ( VMX_DOMAIN(d->exec_domain[0]) )
778 for_each_exec_domain ( d, ed )
779 vmx_domain_relinquish_memory(ed);
780 #endif
782 /*
783 * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
784 * it automatically gets squashed when the guest's mappings go away.
785 */
786 for_each_exec_domain(d, ed)
787 destroy_gdt(ed);
789 /* Relinquish every page of memory. */
790 relinquish_list(d, &d->xenpage_list);
791 relinquish_list(d, &d->page_list);
792 }