xen-vtx-unstable

annotate xen/arch/x86/domain.c @ 6759:b5d91089e42c

Newer binutils is a bit stricter and errors out when you try
to use movl on a 16 bit word on x86_64. Using just a "mov"
compiles fine and should result in the same code.

{standard input}: Assembler messages:
{standard input}:2138: Error: suffix or operands invalid for `mov'
{standard input}:2140: Error: suffix or operands invalid for `mov'
{standard input}:2142: Error: suffix or operands invalid for `mov'
{standard input}:2144: Error: suffix or operands invalid for `mov'

Signed-off-by: Rik van Riel <riel@redhat.com>
author kaf24@firebug.cl.cam.ac.uk
date Tue Sep 13 10:21:22 2005 +0000 (2005-09-13)
parents 219d96d545fc
children 4d899a738d59 8ca0f98ba8e2
rev   line source
kaf24@1749 1 /******************************************************************************
kaf24@1749 2 * arch/x86/domain.c
kaf24@1749 3 *
kaf24@1749 4 * x86-specific domain handling (e.g., register setup and context switching).
kaf24@1749 5 */
kaf24@1749 6
kaf24@1672 7 /*
kaf24@1672 8 * Copyright (C) 1995 Linus Torvalds
kaf24@1672 9 *
kaf24@1672 10 * Pentium III FXSR, SSE support
kaf24@5576 11 * Gareth Hughes <gareth@valinux.com>, May 2000
kaf24@1672 12 */
kaf24@1672 13
kaf24@1672 14 #include <xen/config.h>
kaf24@3334 15 #include <xen/init.h>
kaf24@1672 16 #include <xen/lib.h>
kaf24@1672 17 #include <xen/errno.h>
kaf24@1672 18 #include <xen/sched.h>
kaf24@1672 19 #include <xen/smp.h>
kaf24@1672 20 #include <xen/delay.h>
kaf24@1672 21 #include <xen/softirq.h>
cwc22@4023 22 #include <xen/grant_table.h>
ach61@2805 23 #include <asm/regs.h>
kaf24@1672 24 #include <asm/mc146818rtc.h>
kaf24@1672 25 #include <asm/system.h>
kaf24@1672 26 #include <asm/io.h>
kaf24@1672 27 #include <asm/processor.h>
kaf24@1672 28 #include <asm/desc.h>
kaf24@1672 29 #include <asm/i387.h>
kaf24@1672 30 #include <asm/mpspec.h>
kaf24@1672 31 #include <asm/ldt.h>
kaf24@1672 32 #include <xen/irq.h>
kaf24@1672 33 #include <xen/event.h>
kaf24@1749 34 #include <asm/shadow.h>
djm@1714 35 #include <xen/console.h>
djm@1714 36 #include <xen/elf.h>
iap10@3290 37 #include <asm/vmx.h>
iap10@3290 38 #include <asm/vmx_vmcs.h>
kaf24@3754 39 #include <asm/msr.h>
kaf24@4779 40 #include <asm/physdev.h>
iap10@3290 41 #include <xen/kernel.h>
iap10@3290 42 #include <public/io/ioreq.h>
kaf24@3139 43 #include <xen/multicall.h>
djm@1714 44
kaf24@3334 45 /* opt_noreboot: If true, machine will need manual reset on error. */
kaf24@3334 46 static int opt_noreboot = 0;
kaf24@3334 47 boolean_param("noreboot", opt_noreboot);
kaf24@3334 48
kaf24@4373 49 struct percpu_ctxt {
kaf24@5289 50 struct vcpu *curr_vcpu;
kaf24@6199 51 unsigned int context_not_finalised;
kaf24@6199 52 unsigned int dirty_segment_mask;
kaf24@4373 53 } __cacheline_aligned;
kaf24@4373 54 static struct percpu_ctxt percpu_ctxt[NR_CPUS];
djm@1714 55
kaf24@5289 56 static void continue_idle_task(struct vcpu *v)
kaf24@4698 57 {
kaf24@4698 58 reset_stack_and_jump(idle_loop);
kaf24@4698 59 }
kaf24@4698 60
kaf24@5289 61 static void continue_nonidle_task(struct vcpu *v)
kaf24@4698 62 {
kaf24@4698 63 reset_stack_and_jump(ret_from_intr);
kaf24@4698 64 }
kaf24@4698 65
kaf24@3272 66 static void default_idle(void)
kaf24@1672 67 {
kaf24@4373 68 local_irq_disable();
kaf24@3272 69 if ( !softirq_pending(smp_processor_id()) )
kaf24@3272 70 safe_halt();
kaf24@3272 71 else
kaf24@4373 72 local_irq_enable();
kaf24@1672 73 }
kaf24@1672 74
kaf24@4707 75 void idle_loop(void)
kaf24@1672 76 {
kaf24@1672 77 int cpu = smp_processor_id();
kaf24@5146 78
kaf24@1672 79 for ( ; ; )
kaf24@1672 80 {
kaf24@1672 81 irq_stat[cpu].idle_timestamp = jiffies;
kaf24@5146 82
kaf24@1672 83 while ( !softirq_pending(cpu) )
kaf24@4267 84 {
kaf24@4267 85 page_scrub_schedule_work();
kaf24@1672 86 default_idle();
kaf24@4267 87 }
kaf24@5146 88
kaf24@1672 89 do_softirq();
kaf24@1672 90 }
kaf24@1672 91 }
kaf24@1672 92
kaf24@1672 93 void startup_cpu_idle_loop(void)
kaf24@1672 94 {
kaf24@5289 95 struct vcpu *v = current;
kaf24@4698 96
kaf24@5289 97 ASSERT(is_idle_task(v->domain));
kaf24@5289 98 percpu_ctxt[smp_processor_id()].curr_vcpu = v;
kaf24@5301 99 cpu_set(smp_processor_id(), v->domain->cpumask);
kaf24@5289 100 v->arch.schedule_tail = continue_idle_task;
kaf24@4698 101
kaf24@5146 102 idle_loop();
kaf24@1672 103 }
kaf24@1672 104
kaf24@1672 105 static long no_idt[2];
kaf24@1672 106 static int reboot_mode;
kaf24@1672 107
kaf24@1672 108 static inline void kb_wait(void)
kaf24@1672 109 {
kaf24@1672 110 int i;
kaf24@1672 111
kaf24@3797 112 for ( i = 0; i < 0x10000; i++ )
kaf24@3797 113 if ( (inb_p(0x64) & 0x02) == 0 )
kaf24@1672 114 break;
kaf24@1672 115 }
kaf24@1672 116
kaf24@1672 117 void machine_restart(char * __unused)
kaf24@1672 118 {
kaf24@3797 119 int i;
kaf24@5576 120
kaf24@1672 121 if ( opt_noreboot )
kaf24@1672 122 {
kaf24@1672 123 printk("Reboot disabled on cmdline: require manual reset\n");
kaf24@3797 124 for ( ; ; )
kaf24@3797 125 safe_halt();
kaf24@1672 126 }
kaf24@1672 127
kaf24@5321 128 watchdog_disable();
kaf24@5321 129 console_start_sync();
kaf24@5321 130
kaf24@4373 131 local_irq_enable();
kaf24@1672 132
kaf24@3797 133 /* Ensure we are the boot CPU. */
kaf24@3797 134 if ( GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid )
kaf24@3797 135 {
kaf24@3797 136 smp_call_function((void *)machine_restart, NULL, 1, 0);
kaf24@3797 137 for ( ; ; )
kaf24@3797 138 safe_halt();
kaf24@1672 139 }
kaf24@1672 140
kaf24@1672 141 /*
kaf24@1672 142 * Stop all CPUs and turn off local APICs and the IO-APIC, so
kaf24@1672 143 * other OSs see a clean IRQ state.
kaf24@1672 144 */
kaf24@1672 145 smp_send_stop();
kaf24@1672 146 disable_IO_APIC();
kaf24@3797 147
iap10@3290 148 #ifdef CONFIG_VMX
iap10@3290 149 stop_vmx();
kaf24@1672 150 #endif
kaf24@1672 151
kaf24@3797 152 /* Rebooting needs to touch the page at absolute address 0. */
kaf24@3797 153 *((unsigned short *)__va(0x472)) = reboot_mode;
kaf24@3797 154
kaf24@3797 155 for ( ; ; )
kaf24@3797 156 {
kaf24@3797 157 /* Pulse the keyboard reset line. */
kaf24@3797 158 for ( i = 0; i < 100; i++ )
kaf24@3797 159 {
kaf24@3797 160 kb_wait();
kaf24@3797 161 udelay(50);
kaf24@3797 162 outb(0xfe,0x64); /* pulse reset low */
kaf24@3797 163 udelay(50);
kaf24@1672 164 }
kaf24@3797 165
kaf24@3797 166 /* That didn't work - force a triple fault.. */
kaf24@3797 167 __asm__ __volatile__("lidt %0": "=m" (no_idt));
kaf24@3797 168 __asm__ __volatile__("int3");
kaf24@1672 169 }
kaf24@1672 170 }
kaf24@1672 171
kaf24@1811 172
kaf24@1811 173 void __attribute__((noreturn)) __machine_halt(void *unused)
kaf24@1811 174 {
kaf24@1811 175 for ( ; ; )
kaf24@3797 176 safe_halt();
kaf24@1811 177 }
kaf24@1811 178
kaf24@1672 179 void machine_halt(void)
kaf24@1672 180 {
kaf24@4926 181 watchdog_disable();
kaf24@5321 182 console_start_sync();
kaf24@3797 183 smp_call_function(__machine_halt, NULL, 1, 0);
kaf24@1811 184 __machine_halt(NULL);
kaf24@1672 185 }
kaf24@1672 186
kaf24@3477 187 void dump_pageframe_info(struct domain *d)
kaf24@3477 188 {
kaf24@3477 189 struct pfn_info *page;
kaf24@3477 190
kaf24@3477 191 if ( d->tot_pages < 10 )
kaf24@3477 192 {
kaf24@3530 193 list_for_each_entry ( page, &d->page_list, list )
kaf24@3477 194 {
kaf24@6039 195 printk("Page %p: caf=%08x, taf=%" PRtype_info "\n",
kaf24@4654 196 _p(page_to_phys(page)), page->count_info,
kaf24@3477 197 page->u.inuse.type_info);
kaf24@3477 198 }
kaf24@3477 199 }
iap10@4575 200
iap10@4575 201 list_for_each_entry ( page, &d->xenpage_list, list )
iap10@4575 202 {
kaf24@6039 203 printk("XenPage %p: caf=%08x, taf=%" PRtype_info "\n",
kaf24@4654 204 _p(page_to_phys(page)), page->count_info,
iap10@4575 205 page->u.inuse.type_info);
iap10@4575 206 }
iap10@4575 207
kaf24@3477 208
kaf24@3477 209 page = virt_to_page(d->shared_info);
kaf24@6039 210 printk("Shared_info@%p: caf=%08x, taf=%" PRtype_info "\n",
kaf24@4654 211 _p(page_to_phys(page)), page->count_info,
kaf24@3477 212 page->u.inuse.type_info);
kaf24@3477 213 }
kaf24@3477 214
kaf24@5289 215 struct vcpu *arch_alloc_vcpu_struct(void)
kaf24@3479 216 {
kaf24@5289 217 return xmalloc(struct vcpu);
kaf24@3479 218 }
kaf24@3479 219
sos22@6268 220 /* We assume that vcpu 0 is always the last one to be freed in a
sos22@6268 221 domain i.e. if v->vcpu_id == 0, the domain should be
sos22@6268 222 single-processor. */
kaf24@5289 223 void arch_free_vcpu_struct(struct vcpu *v)
kaf24@3479 224 {
sos22@6268 225 struct vcpu *p;
sos22@6268 226 for_each_vcpu(v->domain, p) {
sos22@6268 227 if (p->next_in_list == v)
sos22@6268 228 p->next_in_list = v->next_in_list;
sos22@6268 229 }
kaf24@5289 230 xfree(v);
kaf24@3477 231 }
kaf24@3477 232
kaf24@1936 233 void free_perdomain_pt(struct domain *d)
kaf24@1936 234 {
kaf24@5398 235 free_xenheap_page(d->arch.mm_perdomain_pt);
kaf24@3753 236 #ifdef __x86_64__
kaf24@5398 237 free_xenheap_page(d->arch.mm_perdomain_l2);
kaf24@5398 238 free_xenheap_page(d->arch.mm_perdomain_l3);
kaf24@3753 239 #endif
kaf24@1936 240 }
kaf24@1936 241
kaf24@5289 242 void arch_do_createdomain(struct vcpu *v)
djm@1698 243 {
kaf24@5289 244 struct domain *d = v->domain;
kaf24@3630 245
kaf24@5289 246 v->arch.flags = TF_kernel_mode;
kaf24@3272 247
kaf24@5146 248 if ( is_idle_task(d) )
kaf24@4972 249 return;
djm@1698 250
kaf24@5289 251 v->arch.schedule_tail = continue_nonidle_task;
kaf24@4972 252
kaf24@5398 253 d->shared_info = alloc_xenheap_page();
kaf24@4972 254 memset(d->shared_info, 0, PAGE_SIZE);
kaf24@5289 255 v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
kaf24@5289 256 v->cpumap = CPUMAP_RUNANYWHERE;
kaf24@4972 257 SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
kaf24@6481 258 set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT,
kaf24@6481 259 INVALID_M2P_ENTRY);
kaf24@4972 260
kaf24@5398 261 d->arch.mm_perdomain_pt = alloc_xenheap_page();
kaf24@4972 262 memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE);
kaf24@6481 263 set_pfn_from_mfn(virt_to_phys(d->arch.mm_perdomain_pt) >> PAGE_SHIFT,
kaf24@6481 264 INVALID_M2P_ENTRY);
kaf24@5289 265 v->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
kaf24@5289 266 v->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
kaf24@5250 267 l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
kaf24@5004 268
kaf24@5289 269 v->arch.guest_vtable = __linear_l2_table;
kaf24@5289 270 v->arch.shadow_vtable = __shadow_linear_l2_table;
kaf24@3272 271
kaf24@3753 272 #ifdef __x86_64__
kaf24@5289 273 v->arch.guest_vl3table = __linear_l3_table;
kaf24@5289 274 v->arch.guest_vl4table = __linear_l4_table;
kaf24@4972 275
kaf24@5398 276 d->arch.mm_perdomain_l2 = alloc_xenheap_page();
kaf24@4972 277 memset(d->arch.mm_perdomain_l2, 0, PAGE_SIZE);
kaf24@4972 278 d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)] =
kaf24@5250 279 l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt),
kaf24@4972 280 __PAGE_HYPERVISOR);
kaf24@5398 281 d->arch.mm_perdomain_l3 = alloc_xenheap_page();
kaf24@4972 282 memset(d->arch.mm_perdomain_l3, 0, PAGE_SIZE);
kaf24@4972 283 d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
kaf24@5250 284 l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
mafetter@4591 285 __PAGE_HYPERVISOR);
kaf24@3753 286 #endif
kaf24@4972 287
kaf24@4972 288 (void)ptwr_init(d);
kaf24@4972 289
kaf24@4972 290 shadow_lock_init(d);
kaf24@4972 291 INIT_LIST_HEAD(&d->arch.free_shadow_frames);
djm@1698 292 }
djm@1698 293
kaf24@5289 294 void arch_do_boot_vcpu(struct vcpu *v)
kaf24@3662 295 {
kaf24@5289 296 struct domain *d = v->domain;
kaf24@4972 297
kaf24@5289 298 v->arch.flags = TF_kernel_mode;
kaf24@4972 299
kaf24@5289 300 v->arch.schedule_tail = d->vcpu[0]->arch.schedule_tail;
kaf24@4972 301
kaf24@5289 302 v->arch.perdomain_ptes =
kaf24@5289 303 d->arch.mm_perdomain_pt + (v->vcpu_id << PDPT_VCPU_SHIFT);
kaf24@5289 304 v->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
kaf24@5250 305 l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
kaf24@3662 306 }
kaf24@3662 307
kaf24@6113 308 void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
kaf24@6113 309 {
kaf24@6113 310 if ( v->processor == newcpu )
kaf24@6113 311 return;
kaf24@6113 312
kaf24@6113 313 set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
kaf24@6113 314 v->processor = newcpu;
kaf24@6113 315
kaf24@6113 316 if ( VMX_DOMAIN(v) )
kaf24@6113 317 {
kaf24@6113 318 __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
kaf24@6113 319 v->arch.schedule_tail = arch_vmx_do_relaunch;
kaf24@6113 320 }
kaf24@6113 321 }
kaf24@6113 322
iap10@3290 323 #ifdef CONFIG_VMX
kaf24@5659 324 static int vmx_switch_on;
kaf24@5659 325
kaf24@4683 326 static int vmx_final_setup_guest(
kaf24@5289 327 struct vcpu *v, struct vcpu_guest_context *ctxt)
iap10@3290 328 {
iap10@3290 329 int error;
kaf24@4683 330 struct cpu_user_regs *regs;
iap10@3290 331 struct vmcs_struct *vmcs;
iap10@3290 332
kaf24@4683 333 regs = &ctxt->user_regs;
iap10@3290 334
iap10@3290 335 /*
iap10@3290 336 * Create a new VMCS
iap10@3290 337 */
iap10@3290 338 if (!(vmcs = alloc_vmcs())) {
iap10@3290 339 printk("Failed to create a new VMCS\n");
iap10@3290 340 return -ENOMEM;
iap10@3290 341 }
iap10@3290 342
kaf24@5289 343 memset(&v->arch.arch_vmx, 0, sizeof (struct arch_vmx_struct));
iap10@3290 344
kaf24@5289 345 v->arch.arch_vmx.vmcs = vmcs;
kaf24@3753 346 error = construct_vmcs(
kaf24@5289 347 &v->arch.arch_vmx, regs, ctxt, VMCS_USE_HOST_ENV);
kaf24@3753 348 if ( error < 0 )
kaf24@3753 349 {
iap10@3290 350 printk("Failed to construct a new VMCS\n");
iap10@3290 351 goto out;
iap10@3290 352 }
iap10@3290 353
kaf24@5289 354 v->arch.schedule_tail = arch_vmx_do_launch;
iap10@3290 355
kaf24@6113 356 #if defined (__i386__)
arun@5608 357 v->domain->arch.vmx_platform.real_mode_data =
kaf24@4683 358 (unsigned long *) regs->esi;
iap10@3567 359 #endif
iap10@3290 360
kaf24@5289 361 if (v == v->domain->vcpu[0]) {
iap10@3567 362 /*
iap10@3567 363 * Required to do this once per domain
iap10@3823 364 * XXX todo: add a seperate function to do these.
iap10@3567 365 */
kaf24@5289 366 memset(&v->domain->shared_info->evtchn_mask[0], 0xff,
kaf24@5289 367 sizeof(v->domain->shared_info->evtchn_mask));
iap10@3823 368
iap10@3823 369 /* Put the domain in shadow mode even though we're going to be using
iap10@3823 370 * the shared 1:1 page table initially. It shouldn't hurt */
kaf24@5289 371 shadow_mode_enable(v->domain,
maf46@5196 372 SHM_enable|SHM_refcounts|
maf46@4953 373 SHM_translate|SHM_external);
iap10@3567 374 }
iap10@3290 375
kaf24@5659 376 if (!vmx_switch_on)
kaf24@5659 377 vmx_switch_on = 1;
kaf24@5659 378
iap10@3290 379 return 0;
iap10@3290 380
iap10@3290 381 out:
iap10@3290 382 free_vmcs(vmcs);
kaf24@5836 383 if(v->arch.arch_vmx.io_bitmap_a != 0) {
kaf24@6684 384 free_xenheap_pages(
kaf24@6684 385 v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000));
kaf24@5836 386 v->arch.arch_vmx.io_bitmap_a = 0;
kaf24@5836 387 }
kaf24@5836 388 if(v->arch.arch_vmx.io_bitmap_b != 0) {
kaf24@6684 389 free_xenheap_pages(
kaf24@6684 390 v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000));
kaf24@5836 391 v->arch.arch_vmx.io_bitmap_b = 0;
kaf24@5836 392 }
kaf24@5289 393 v->arch.arch_vmx.vmcs = 0;
iap10@3290 394 return error;
iap10@3290 395 }
iap10@3290 396 #endif
iap10@3290 397
iap10@3823 398
iap10@3823 399 /* This is called by arch_final_setup_guest and do_boot_vcpu */
kmacy@4118 400 int arch_set_info_guest(
kaf24@5289 401 struct vcpu *v, struct vcpu_guest_context *c)
djm@1698 402 {
kaf24@5289 403 struct domain *d = v->domain;
djm@1698 404 unsigned long phys_basetab;
kaf24@2465 405 int i, rc;
djm@1698 406
kaf24@2684 407 /*
kaf24@2684 408 * This is sufficient! If the descriptor DPL differs from CS RPL then we'll
kaf24@2684 409 * #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically.
kaf24@2684 410 * If SS RPL or DPL differs from CS RPL then we'll #GP.
kaf24@2684 411 */
kaf24@4689 412 if ( !(c->flags & VGCF_VMX_GUEST) )
kaf24@4689 413 {
kaf24@4683 414 if ( ((c->user_regs.cs & 3) == 0) ||
kaf24@4683 415 ((c->user_regs.ss & 3) == 0) )
sos22@6268 416 return -EINVAL;
kaf24@4689 417 }
kmacy@4118 418
kaf24@5289 419 clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
kaf24@4689 420 if ( c->flags & VGCF_I387_VALID )
kaf24@5289 421 set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
kaf24@2465 422
kaf24@5289 423 v->arch.flags &= ~TF_kernel_mode;
kaf24@5722 424 if ( (c->flags & VGCF_IN_KERNEL) || (c->flags & VGCF_VMX_GUEST) )
kaf24@5289 425 v->arch.flags |= TF_kernel_mode;
kaf24@2465 426
kaf24@5289 427 memcpy(&v->arch.guest_context, c, sizeof(*c));
kmacy@4118 428
kaf24@4852 429 if ( !(c->flags & VGCF_VMX_GUEST) )
kaf24@4852 430 {
kaf24@4852 431 /* IOPL privileges are virtualised. */
kaf24@5289 432 v->arch.iopl = (v->arch.guest_context.user_regs.eflags >> 12) & 3;
kaf24@5289 433 v->arch.guest_context.user_regs.eflags &= ~EF_IOPL;
kaf24@2465 434
kaf24@4852 435 /* Ensure real hardware interrupts are enabled. */
kaf24@5289 436 v->arch.guest_context.user_regs.eflags |= EF_IE;
kaf24@5821 437 }
kaf24@5821 438 else if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
kaf24@5821 439 {
kaf24@5821 440 return modify_vmcs(
kaf24@5821 441 &v->arch.arch_vmx,
kaf24@5821 442 &v->arch.guest_context.user_regs);
kaf24@4852 443 }
kaf24@2465 444
kaf24@5289 445 if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
kmacy@4118 446 return 0;
kaf24@2465 447
kaf24@5289 448 memset(v->arch.guest_context.debugreg, 0,
kaf24@5289 449 sizeof(v->arch.guest_context.debugreg));
djm@1698 450 for ( i = 0; i < 8; i++ )
kaf24@5289 451 (void)set_debugreg(v, i, c->debugreg[i]);
kaf24@2465 452
kaf24@5289 453 if ( v->vcpu_id == 0 )
kaf24@4381 454 d->vm_assist = c->vm_assist;
kaf24@2465 455
kaf24@5576 456 phys_basetab = c->ctrlreg[3];
kaf24@5289 457 v->arch.guest_table = mk_pagetable(phys_basetab);
kaf24@2465 458
mafetter@4799 459 if ( shadow_mode_refcounts(d) )
mafetter@4509 460 {
mafetter@4509 461 if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
mafetter@4509 462 return -EINVAL;
mafetter@4509 463 }
kaf24@6739 464 else if ( !(c->flags & VGCF_VMX_GUEST) )
mafetter@4509 465 {
kaf24@6739 466 if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d,
kaf24@6739 467 PGT_base_page_table) )
kaf24@6739 468 return -EINVAL;
mafetter@4509 469 }
kaf24@2465 470
kaf24@5289 471 if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
kaf24@2465 472 {
kaf24@4972 473 put_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT]);
kaf24@4972 474 return rc;
kaf24@2465 475 }
kaf24@2465 476
kaf24@4689 477 if ( c->flags & VGCF_VMX_GUEST )
mafetter@4192 478 {
kaf24@5576 479 /* VMX uses the initially provided page tables as the P2M map. */
kaf24@5250 480 if ( !pagetable_get_paddr(d->arch.phys_table) )
kaf24@5289 481 d->arch.phys_table = v->arch.guest_table;
mafetter@4192 482
kaf24@5576 483 if ( (rc = vmx_final_setup_guest(v, c)) != 0 )
kaf24@5576 484 return rc;
mafetter@4192 485 }
iap10@3290 486
kaf24@5289 487 update_pagetables(v);
kaf24@6093 488
kaf24@6093 489 if ( v->vcpu_id == 0 )
kaf24@6093 490 init_domain_time(d);
kaf24@6093 491
kmacy@4118 492 /* Don't redo final setup */
kaf24@5289 493 set_bit(_VCPUF_initialised, &v->vcpu_flags);
iap10@3823 494
kaf24@2465 495 return 0;
djm@1698 496 }
djm@1698 497
kaf24@1672 498
kaf24@5289 499 void new_thread(struct vcpu *d,
kaf24@1672 500 unsigned long start_pc,
kaf24@1672 501 unsigned long start_stack,
kaf24@1672 502 unsigned long start_info)
kaf24@1672 503 {
kaf24@4689 504 struct cpu_user_regs *regs = &d->arch.guest_context.user_regs;
kaf24@1672 505
kaf24@1672 506 /*
kaf24@1672 507 * Initial register values:
kaf24@3755 508 * DS,ES,FS,GS = FLAT_KERNEL_DS
kaf24@3755 509 * CS:EIP = FLAT_KERNEL_CS:start_pc
kaf24@3755 510 * SS:ESP = FLAT_KERNEL_SS:start_stack
kaf24@1672 511 * ESI = start_info
kaf24@1672 512 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
kaf24@1672 513 */
kaf24@4683 514 regs->ds = regs->es = regs->fs = regs->gs = FLAT_KERNEL_DS;
kaf24@4683 515 regs->ss = FLAT_KERNEL_SS;
kaf24@4683 516 regs->cs = FLAT_KERNEL_CS;
kaf24@4683 517 regs->eip = start_pc;
kaf24@4683 518 regs->esp = start_stack;
kaf24@4683 519 regs->esi = start_info;
kaf24@1672 520
kaf24@4683 521 __save_flags(regs->eflags);
kaf24@4683 522 regs->eflags |= X86_EFLAGS_IF;
kaf24@1672 523 }
kaf24@1672 524
kaf24@1672 525
kaf24@3761 526 #ifdef __x86_64__
kaf24@3761 527
kaf24@5289 528 void toggle_guest_mode(struct vcpu *v)
kaf24@3828 529 {
kaf24@5289 530 v->arch.flags ^= TF_kernel_mode;
kaf24@4498 531 __asm__ __volatile__ ( "swapgs" );
kaf24@5289 532 update_pagetables(v);
kaf24@5289 533 write_ptbase(v);
kaf24@3828 534 }
kaf24@1672 535
kaf24@3761 536 #define loadsegment(seg,value) ({ \
kaf24@3761 537 int __r = 1; \
kaf24@3761 538 __asm__ __volatile__ ( \
kaf24@3761 539 "1: movl %k1,%%" #seg "\n2:\n" \
kaf24@3761 540 ".section .fixup,\"ax\"\n" \
kaf24@3761 541 "3: xorl %k0,%k0\n" \
kaf24@3761 542 " movl %k0,%%" #seg "\n" \
kaf24@3761 543 " jmp 2b\n" \
kaf24@3761 544 ".previous\n" \
kaf24@3761 545 ".section __ex_table,\"a\"\n" \
kaf24@3761 546 " .align 8\n" \
kaf24@3761 547 " .quad 1b,3b\n" \
kaf24@3761 548 ".previous" \
kaf24@3761 549 : "=r" (__r) : "r" (value), "0" (__r) );\
kaf24@3761 550 __r; })
kaf24@1672 551
kaf24@5659 552 #if CONFIG_VMX
kaf24@6199 553 #define load_msrs(n) if (vmx_switch_on) vmx_load_msrs(n)
kaf24@5659 554 #else
kaf24@6199 555 #define load_msrs(n) ((void)0)
kaf24@5659 556 #endif
kaf24@5659 557
kaf24@6199 558 /*
kaf24@6199 559 * save_segments() writes a mask of segments which are dirty (non-zero),
kaf24@6199 560 * allowing load_segments() to avoid some expensive segment loads and
kaf24@6199 561 * MSR writes.
kaf24@6199 562 */
kaf24@6199 563 #define DIRTY_DS 0x01
kaf24@6199 564 #define DIRTY_ES 0x02
kaf24@6199 565 #define DIRTY_FS 0x04
kaf24@6199 566 #define DIRTY_GS 0x08
kaf24@6199 567 #define DIRTY_FS_BASE 0x10
kaf24@6199 568 #define DIRTY_GS_BASE_USER 0x20
kaf24@6199 569
kaf24@6199 570 static void load_segments(struct vcpu *n)
kaf24@1672 571 {
kaf24@4689 572 struct vcpu_guest_context *nctxt = &n->arch.guest_context;
kaf24@3761 573 int all_segs_okay = 1;
kaf24@6199 574 unsigned int dirty_segment_mask, cpu = smp_processor_id();
kaf24@6199 575
kaf24@6199 576 /* Load and clear the dirty segment mask. */
kaf24@6199 577 dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
kaf24@6199 578 percpu_ctxt[cpu].dirty_segment_mask = 0;
kaf24@3761 579
kaf24@3761 580 /* Either selector != 0 ==> reload. */
kaf24@6199 581 if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
kaf24@4689 582 all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
kaf24@3761 583
kaf24@3761 584 /* Either selector != 0 ==> reload. */
kaf24@6199 585 if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
kaf24@4689 586 all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
kaf24@1672 587
kaf24@3761 588 /*
kaf24@3761 589 * Either selector != 0 ==> reload.
kaf24@3761 590 * Also reload to reset FS_BASE if it was non-zero.
kaf24@3761 591 */
kaf24@6199 592 if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
kaf24@4689 593 nctxt->user_regs.fs) )
kaf24@4689 594 all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
kaf24@3761 595
kaf24@3761 596 /*
kaf24@3761 597 * Either selector != 0 ==> reload.
kaf24@3761 598 * Also reload to reset GS_BASE if it was non-zero.
kaf24@3761 599 */
kaf24@6199 600 if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
kaf24@4689 601 nctxt->user_regs.gs) )
kaf24@3761 602 {
kaf24@3761 603 /* Reset GS_BASE with user %gs? */
kaf24@6199 604 if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
kaf24@4689 605 all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
kaf24@1672 606 }
kaf24@1672 607
kaf24@3761 608 /* This can only be non-zero if selector is NULL. */
kaf24@4689 609 if ( nctxt->fs_base )
kaf24@3761 610 wrmsr(MSR_FS_BASE,
kaf24@4689 611 nctxt->fs_base,
kaf24@4689 612 nctxt->fs_base>>32);
kaf24@1672 613
kaf24@4499 614 /* Most kernels have non-zero GS base, so don't bother testing. */
kaf24@4499 615 /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
kaf24@4499 616 wrmsr(MSR_SHADOW_GS_BASE,
kaf24@4689 617 nctxt->gs_base_kernel,
kaf24@4689 618 nctxt->gs_base_kernel>>32);
kaf24@4499 619
kaf24@3761 620 /* This can only be non-zero if selector is NULL. */
kaf24@4689 621 if ( nctxt->gs_base_user )
kaf24@3761 622 wrmsr(MSR_GS_BASE,
kaf24@4689 623 nctxt->gs_base_user,
kaf24@4689 624 nctxt->gs_base_user>>32);
kaf24@3761 625
kaf24@3761 626 /* If in kernel mode then switch the GS bases around. */
kaf24@3761 627 if ( n->arch.flags & TF_kernel_mode )
kaf24@4499 628 __asm__ __volatile__ ( "swapgs" );
kaf24@1672 629
kaf24@3761 630 if ( unlikely(!all_segs_okay) )
kaf24@3761 631 {
kaf24@4923 632 struct cpu_user_regs *regs = guest_cpu_user_regs();
kaf24@4373 633 unsigned long *rsp =
kaf24@3761 634 (n->arch.flags & TF_kernel_mode) ?
kaf24@3761 635 (unsigned long *)regs->rsp :
kaf24@4689 636 (unsigned long *)nctxt->kernel_sp;
kaf24@1672 637
kaf24@4138 638 if ( !(n->arch.flags & TF_kernel_mode) )
kaf24@4138 639 toggle_guest_mode(n);
kaf24@4140 640 else
kaf24@4140 641 regs->cs &= ~3;
kaf24@4138 642
kaf24@4689 643 if ( put_user(regs->ss, rsp- 1) |
kaf24@4689 644 put_user(regs->rsp, rsp- 2) |
kaf24@4689 645 put_user(regs->rflags, rsp- 3) |
kaf24@4689 646 put_user(regs->cs, rsp- 4) |
kaf24@4689 647 put_user(regs->rip, rsp- 5) |
kaf24@4689 648 put_user(nctxt->user_regs.gs, rsp- 6) |
kaf24@4689 649 put_user(nctxt->user_regs.fs, rsp- 7) |
kaf24@4689 650 put_user(nctxt->user_regs.es, rsp- 8) |
kaf24@4689 651 put_user(nctxt->user_regs.ds, rsp- 9) |
kaf24@4689 652 put_user(regs->r11, rsp-10) |
kaf24@4689 653 put_user(regs->rcx, rsp-11) )
kaf24@1672 654 {
kaf24@3761 655 DPRINTK("Error while creating failsafe callback frame.\n");
kaf24@3761 656 domain_crash();
kaf24@1672 657 }
kaf24@1672 658
kaf24@3761 659 regs->entry_vector = TRAP_syscall;
kaf24@3761 660 regs->rflags &= 0xFFFCBEFFUL;
kaf24@3761 661 regs->ss = __GUEST_SS;
kaf24@3761 662 regs->rsp = (unsigned long)(rsp-11);
kaf24@3761 663 regs->cs = __GUEST_CS;
kaf24@4689 664 regs->rip = nctxt->failsafe_callback_eip;
kaf24@1672 665 }
kaf24@3761 666 }
kaf24@1672 667
kaf24@5289 668 static void save_segments(struct vcpu *v)
kaf24@4373 669 {
kaf24@6199 670 struct vcpu_guest_context *ctxt = &v->arch.guest_context;
kaf24@6199 671 struct cpu_user_regs *regs = &ctxt->user_regs;
kaf24@6199 672 unsigned int dirty_segment_mask = 0;
kaf24@5763 673
kaf24@5763 674 if ( VMX_DOMAIN(v) )
kaf24@5763 675 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
kaf24@5763 676
kaf24@6759 677 __asm__ __volatile__ ( "mov %%ds,%0" : "=m" (regs->ds) );
kaf24@6759 678 __asm__ __volatile__ ( "mov %%es,%0" : "=m" (regs->es) );
kaf24@6759 679 __asm__ __volatile__ ( "mov %%fs,%0" : "=m" (regs->fs) );
kaf24@6759 680 __asm__ __volatile__ ( "mov %%gs,%0" : "=m" (regs->gs) );
kaf24@6199 681
kaf24@6199 682 if ( regs->ds )
kaf24@6199 683 dirty_segment_mask |= DIRTY_DS;
kaf24@6199 684
kaf24@6199 685 if ( regs->es )
kaf24@6199 686 dirty_segment_mask |= DIRTY_ES;
kaf24@1672 687
kaf24@6199 688 if ( regs->fs )
kaf24@6199 689 {
kaf24@6199 690 dirty_segment_mask |= DIRTY_FS;
kaf24@6199 691 ctxt->fs_base = 0; /* != 0 selector kills fs_base */
kaf24@6199 692 }
kaf24@6199 693 else if ( ctxt->fs_base )
kaf24@6199 694 {
kaf24@6199 695 dirty_segment_mask |= DIRTY_FS_BASE;
kaf24@6199 696 }
kaf24@6199 697
kaf24@6199 698 if ( regs->gs )
kaf24@6199 699 {
kaf24@6199 700 dirty_segment_mask |= DIRTY_GS;
kaf24@6199 701 ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
kaf24@6199 702 }
kaf24@6199 703 else if ( ctxt->gs_base_user )
kaf24@6199 704 {
kaf24@6199 705 dirty_segment_mask |= DIRTY_GS_BASE_USER;
kaf24@6199 706 }
kaf24@6199 707
kaf24@6199 708 percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
kaf24@1672 709 }
kaf24@1672 710
kaf24@3761 711 long do_switch_to_user(void)
kaf24@1672 712 {
kaf24@4923 713 struct cpu_user_regs *regs = guest_cpu_user_regs();
kaf24@3761 714 struct switch_to_user stu;
kaf24@5289 715 struct vcpu *v = current;
kaf24@3761 716
kaf24@3764 717 if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
kaf24@5289 718 unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
kaf24@3761 719 return -EFAULT;
kaf24@3761 720
kaf24@5289 721 toggle_guest_mode(v);
kaf24@3761 722
kaf24@3761 723 regs->rip = stu.rip;
kaf24@4140 724 regs->cs = stu.cs | 3; /* force guest privilege */
kaf24@3761 725 regs->rflags = stu.rflags;
kaf24@3761 726 regs->rsp = stu.rsp;
kaf24@4140 727 regs->ss = stu.ss | 3; /* force guest privilege */
kaf24@3761 728
kaf24@4689 729 if ( !(stu.flags & VGCF_IN_SYSCALL) )
kaf24@3761 730 {
kaf24@3761 731 regs->entry_vector = 0;
kaf24@3761 732 regs->r11 = stu.r11;
kaf24@3761 733 regs->rcx = stu.rcx;
kaf24@3761 734 }
kaf24@3761 735
kaf24@4037 736 /* Saved %rax gets written back to regs->rax in entry.S. */
kaf24@4037 737 return stu.rax;
kaf24@3761 738 }
kaf24@3761 739
kaf24@4417 740 #define switch_kernel_stack(_n,_c) ((void)0)
kaf24@4417 741
kaf24@3761 742 #elif defined(__i386__)
kaf24@3761 743
kaf24@6199 744 #define load_segments(n) ((void)0)
kaf24@6199 745 #define load_msrs(n) ((void)0)
kaf24@6199 746 #define save_segments(p) ((void)0)
kaf24@3761 747
kaf24@5289 748 static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
kaf24@4417 749 {
kaf24@4417 750 struct tss_struct *tss = &init_tss[cpu];
kaf24@4689 751 tss->esp1 = n->arch.guest_context.kernel_sp;
kaf24@4689 752 tss->ss1 = n->arch.guest_context.kernel_ss;
kaf24@1672 753 }
kaf24@1672 754
kaf24@3276 755 #endif
kaf24@3276 756
kaf24@5289 757 #define loaddebug(_v,_reg) \
kaf24@5576 758 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
kaf24@1672 759
kaf24@4373 760 static void __context_switch(void)
kaf24@1672 761 {
kaf24@4923 762 struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
kaf24@6199 763 unsigned int cpu = smp_processor_id();
kaf24@6199 764 struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
kaf24@6199 765 struct vcpu *n = current;
iap10@3290 766
kaf24@4373 767 if ( !is_idle_task(p->domain) )
kaf24@1672 768 {
kaf24@4689 769 memcpy(&p->arch.guest_context.user_regs,
kaf24@4689 770 stack_regs,
kaf24@4612 771 CTXT_SWITCH_STACK_BYTES);
kaf24@4373 772 unlazy_fpu(p);
kaf24@4373 773 save_segments(p);
kaf24@4373 774 }
kaf24@4373 775
kaf24@4417 776 if ( !is_idle_task(n->domain) )
kaf24@4373 777 {
kaf24@4689 778 memcpy(stack_regs,
kaf24@4689 779 &n->arch.guest_context.user_regs,
kaf24@4612 780 CTXT_SWITCH_STACK_BYTES);
kaf24@1672 781
kaf24@4417 782 /* Maybe switch the debug registers. */
kaf24@4689 783 if ( unlikely(n->arch.guest_context.debugreg[7]) )
kaf24@4373 784 {
kaf24@4689 785 loaddebug(&n->arch.guest_context, 0);
kaf24@4689 786 loaddebug(&n->arch.guest_context, 1);
kaf24@4689 787 loaddebug(&n->arch.guest_context, 2);
kaf24@4689 788 loaddebug(&n->arch.guest_context, 3);
kaf24@4417 789 /* no 4 and 5 */
kaf24@4689 790 loaddebug(&n->arch.guest_context, 6);
kaf24@4689 791 loaddebug(&n->arch.guest_context, 7);
kaf24@4373 792 }
kaf24@4417 793
kaf24@4417 794 if ( !VMX_DOMAIN(n) )
kaf24@4417 795 {
kaf24@4930 796 set_int80_direct_trap(n);
kaf24@4417 797 switch_kernel_stack(n, cpu);
kaf24@4417 798 }
kaf24@1672 799 }
kaf24@1672 800
kaf24@4453 801 if ( p->domain != n->domain )
kaf24@5301 802 cpu_set(cpu, n->domain->cpumask);
kaf24@4453 803
kaf24@4373 804 write_ptbase(n);
kaf24@4972 805
kaf24@4972 806 if ( p->vcpu_id != n->vcpu_id )
kaf24@4972 807 {
kaf24@4972 808 char gdt_load[10];
kaf24@4972 809 *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
kaf24@4972 810 *(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(n);
kaf24@4972 811 __asm__ __volatile__ ( "lgdt %0" : "=m" (gdt_load) );
kaf24@4972 812 }
kaf24@4453 813
kaf24@4453 814 if ( p->domain != n->domain )
kaf24@5301 815 cpu_clear(cpu, p->domain->cpumask);
kaf24@4373 816
kaf24@5289 817 percpu_ctxt[cpu].curr_vcpu = n;
kaf24@4373 818 }
kaf24@4373 819
kaf24@4373 820
kaf24@5289 821 void context_switch(struct vcpu *prev, struct vcpu *next)
kaf24@4373 822 {
kaf24@6199 823 unsigned int cpu = smp_processor_id();
kaf24@4373 824
kaf24@6200 825 ASSERT(!local_irq_is_enabled());
kaf24@1672 826
kaf24@4373 827 set_current(next);
kaf24@3754 828
kaf24@6199 829 if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
kaf24@4275 830 {
kaf24@4373 831 __context_switch();
kaf24@6199 832 percpu_ctxt[cpu].context_not_finalised = 1;
kaf24@6199 833 }
kaf24@6199 834 }
kaf24@4373 835
kaf24@6199 836 void context_switch_finalise(struct vcpu *next)
kaf24@6199 837 {
kaf24@6199 838 unsigned int cpu = smp_processor_id();
kaf24@6199 839
kaf24@6200 840 ASSERT(local_irq_is_enabled());
kaf24@6200 841
kaf24@6199 842 if ( percpu_ctxt[cpu].context_not_finalised )
kaf24@6199 843 {
kaf24@6199 844 percpu_ctxt[cpu].context_not_finalised = 0;
kaf24@6199 845
kaf24@6199 846 BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
kaf24@6199 847
kaf24@5763 848 if ( VMX_DOMAIN(next) )
kaf24@5763 849 {
kaf24@5763 850 vmx_restore_msrs(next);
kaf24@5763 851 }
kaf24@5763 852 else
kaf24@4373 853 {
kaf24@4373 854 load_LDT(next);
kaf24@6199 855 load_segments(next);
kaf24@6199 856 load_msrs(next);
kaf24@4373 857 }
kaf24@4275 858 }
kaf24@4034 859
kaf24@4373 860 schedule_tail(next);
kaf24@4696 861 BUG();
kaf24@4696 862 }
kaf24@4034 863
kaf24@5289 864 void continue_running(struct vcpu *same)
kaf24@4696 865 {
kaf24@4696 866 schedule_tail(same);
kaf24@4034 867 BUG();
kaf24@1672 868 }
kaf24@1672 869
kaf24@4417 870 int __sync_lazy_execstate(void)
kaf24@4373 871 {
kaf24@6199 872 unsigned long flags;
kaf24@6199 873 int switch_required;
kaf24@6199 874
kaf24@6199 875 local_irq_save(flags);
kaf24@6199 876
kaf24@6199 877 switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
kaf24@6199 878
kaf24@6199 879 if ( switch_required )
kaf24@6199 880 __context_switch();
kaf24@6199 881
kaf24@6199 882 local_irq_restore(flags);
kaf24@6199 883
kaf24@6199 884 return switch_required;
kaf24@4373 885 }
kaf24@4373 886
kaf24@6453 887 void sync_vcpu_execstate(struct vcpu *v)
kaf24@4418 888 {
kaf24@6453 889 unsigned int cpu = v->processor;
kaf24@6453 890
kaf24@6453 891 if ( !cpu_isset(cpu, v->domain->cpumask) )
kaf24@6453 892 return;
kaf24@6453 893
kaf24@5301 894 if ( cpu == smp_processor_id() )
kaf24@6445 895 {
kaf24@5261 896 (void)__sync_lazy_execstate();
kaf24@6445 897 }
kaf24@6445 898 else
kaf24@6445 899 {
kaf24@6445 900 /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
kaf24@6445 901 flush_tlb_mask(cpumask_of_cpu(cpu));
kaf24@6445 902 }
kaf24@4418 903 }
kaf24@4418 904
kaf24@3702 905 unsigned long __hypercall_create_continuation(
kaf24@3149 906 unsigned int op, unsigned int nr_args, ...)
kaf24@3091 907 {
kaf24@3139 908 struct mc_state *mcs = &mc_state[smp_processor_id()];
kaf24@4683 909 struct cpu_user_regs *regs;
kaf24@3091 910 unsigned int i;
kaf24@3091 911 va_list args;
kaf24@3091 912
kaf24@3139 913 va_start(args, nr_args);
kaf24@3139 914
kaf24@3139 915 if ( test_bit(_MCSF_in_multicall, &mcs->flags) )
kaf24@3139 916 {
kaf24@3139 917 __set_bit(_MCSF_call_preempted, &mcs->flags);
kaf24@3091 918
kaf24@3139 919 for ( i = 0; i < nr_args; i++ )
kaf24@3139 920 mcs->call.args[i] = va_arg(args, unsigned long);
kaf24@3139 921 }
kaf24@3139 922 else
kaf24@3139 923 {
kaf24@4923 924 regs = guest_cpu_user_regs();
kaf24@3276 925 #if defined(__i386__)
kaf24@4683 926 regs->eax = op;
kaf24@4683 927 regs->eip -= 2; /* re-execute 'int 0x82' */
kaf24@3139 928
kaf24@3697 929 for ( i = 0; i < nr_args; i++ )
kaf24@3697 930 {
kaf24@3697 931 switch ( i )
kaf24@3697 932 {
kaf24@4683 933 case 0: regs->ebx = va_arg(args, unsigned long); break;
kaf24@4683 934 case 1: regs->ecx = va_arg(args, unsigned long); break;
kaf24@4683 935 case 2: regs->edx = va_arg(args, unsigned long); break;
kaf24@4683 936 case 3: regs->esi = va_arg(args, unsigned long); break;
kaf24@4683 937 case 4: regs->edi = va_arg(args, unsigned long); break;
kaf24@4683 938 case 5: regs->ebp = va_arg(args, unsigned long); break;
kaf24@3697 939 }
kaf24@3697 940 }
kaf24@3697 941 #elif defined(__x86_64__)
kaf24@4683 942 regs->rax = op;
kaf24@4683 943 regs->rip -= 2; /* re-execute 'syscall' */
kaf24@3697 944
kaf24@3697 945 for ( i = 0; i < nr_args; i++ )
kaf24@3697 946 {
kaf24@3697 947 switch ( i )
kaf24@3697 948 {
kaf24@4683 949 case 0: regs->rdi = va_arg(args, unsigned long); break;
kaf24@4683 950 case 1: regs->rsi = va_arg(args, unsigned long); break;
kaf24@4683 951 case 2: regs->rdx = va_arg(args, unsigned long); break;
kaf24@4683 952 case 3: regs->r10 = va_arg(args, unsigned long); break;
kaf24@4683 953 case 4: regs->r8 = va_arg(args, unsigned long); break;
kaf24@4683 954 case 5: regs->r9 = va_arg(args, unsigned long); break;
kaf24@3697 955 }
kaf24@3697 956 }
kaf24@3276 957 #endif
kaf24@3139 958 }
kaf24@3139 959
kaf24@3091 960 va_end(args);
kaf24@3149 961
kaf24@3149 962 return op;
kaf24@3091 963 }
kaf24@3091 964
kaf24@4455 965 #ifdef CONFIG_VMX
kaf24@5289 966 static void vmx_relinquish_resources(struct vcpu *v)
kaf24@4455 967 {
kaf24@5289 968 if ( !VMX_DOMAIN(v) )
kaf24@4455 969 return;
kaf24@4455 970
kaf24@5289 971 BUG_ON(v->arch.arch_vmx.vmcs == NULL);
kaf24@5289 972 free_vmcs(v->arch.arch_vmx.vmcs);
kaf24@5836 973 if(v->arch.arch_vmx.io_bitmap_a != 0) {
kaf24@6684 974 free_xenheap_pages(
kaf24@6684 975 v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000));
kaf24@5836 976 v->arch.arch_vmx.io_bitmap_a = 0;
kaf24@5836 977 }
kaf24@5836 978 if(v->arch.arch_vmx.io_bitmap_b != 0) {
kaf24@6684 979 free_xenheap_pages(
kaf24@6684 980 v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000));
kaf24@5836 981 v->arch.arch_vmx.io_bitmap_b = 0;
kaf24@5836 982 }
kaf24@5289 983 v->arch.arch_vmx.vmcs = 0;
kaf24@4455 984
kaf24@5289 985 free_monitor_pagetable(v);
arun@5608 986 rem_ac_timer(&v->domain->arch.vmx_platform.vmx_pit.pit_timer);
kaf24@4455 987 }
kaf24@4455 988 #else
kaf24@5289 989 #define vmx_relinquish_resources(_v) ((void)0)
kaf24@4455 990 #endif
kaf24@4455 991
kaf24@4455 992 static void relinquish_memory(struct domain *d, struct list_head *list)
djm@1714 993 {
kaf24@2428 994 struct list_head *ent;
djm@1714 995 struct pfn_info *page;
djm@1714 996 unsigned long x, y;
djm@1714 997
kaf24@2428 998 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
kaf24@2428 999 spin_lock_recursive(&d->page_alloc_lock);
kaf24@2428 1000
kaf24@2428 1001 ent = list->next;
kaf24@2428 1002 while ( ent != list )
kaf24@2428 1003 {
kaf24@2428 1004 page = list_entry(ent, struct pfn_info, list);
kaf24@2428 1005
kaf24@2429 1006 /* Grab a reference to the page so it won't disappear from under us. */
kaf24@2429 1007 if ( unlikely(!get_page(page, d)) )
kaf24@2428 1008 {
kaf24@2429 1009 /* Couldn't get a reference -- someone is freeing this page. */
kaf24@2429 1010 ent = ent->next;
kaf24@2428 1011 continue;
kaf24@2428 1012 }
kaf24@2428 1013
kaf24@2429 1014 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
kaf24@2429 1015 put_page_and_type(page);
kaf24@2429 1016
kaf24@2428 1017 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
kaf24@2428 1018 put_page(page);
kaf24@2428 1019
kaf24@2428 1020 /*
kaf24@2428 1021 * Forcibly invalidate base page tables at this point to break circular
kaf24@2428 1022 * 'linear page table' references. This is okay because MMU structures
kaf24@2428 1023 * are not shared across domains and this domain is now dead. Thus base
kaf24@2428 1024 * tables are not in use so a non-zero count means circular reference.
kaf24@2428 1025 */
kaf24@2428 1026 y = page->u.inuse.type_info;
kaf24@2429 1027 for ( ; ; )
kaf24@2429 1028 {
kaf24@2428 1029 x = y;
kaf24@2428 1030 if ( likely((x & (PGT_type_mask|PGT_validated)) !=
kaf24@2428 1031 (PGT_base_page_table|PGT_validated)) )
kaf24@2428 1032 break;
kaf24@2429 1033
kaf24@2428 1034 y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
kaf24@2428 1035 if ( likely(y == x) )
kaf24@2428 1036 {
kaf24@2428 1037 free_page_type(page, PGT_base_page_table);
kaf24@2429 1038 break;
kaf24@2428 1039 }
kaf24@2428 1040 }
kaf24@2429 1041
kaf24@2429 1042 /* Follow the list chain and /then/ potentially free the page. */
kaf24@2429 1043 ent = ent->next;
kaf24@2429 1044 put_page(page);
kaf24@2428 1045 }
kaf24@2428 1046
kaf24@2428 1047 spin_unlock_recursive(&d->page_alloc_lock);
kaf24@2428 1048 }
kaf24@2428 1049
kaf24@4455 1050 void domain_relinquish_resources(struct domain *d)
kaf24@2428 1051 {
kaf24@5289 1052 struct vcpu *v;
kaf24@5576 1053 unsigned long pfn;
cl349@2923 1054
kaf24@5301 1055 BUG_ON(!cpus_empty(d->cpumask));
djm@1714 1056
kaf24@4779 1057 physdev_destroy_state(d);
kaf24@4779 1058
kaf24@4455 1059 ptwr_destroy(d);
kaf24@4455 1060
cwc22@4100 1061 /* Release device mappings of other domains */
kaf24@4455 1062 gnttab_release_dev_mappings(d->grant_table);
djm@1714 1063
kaf24@3761 1064 /* Drop the in-use references to page-table bases. */
kaf24@5289 1065 for_each_vcpu ( d, v )
iap10@3741 1066 {
kaf24@5576 1067 if ( (pfn = pagetable_get_pfn(v->arch.guest_table)) != 0 )
kaf24@3761 1068 {
kaf24@5576 1069 if ( !shadow_mode_refcounts(d) )
kaf24@5576 1070 put_page_type(pfn_to_page(pfn));
kaf24@5576 1071 put_page(pfn_to_page(pfn));
mafetter@4799 1072
kaf24@5289 1073 v->arch.guest_table = mk_pagetable(0);
kaf24@3761 1074 }
kaf24@3761 1075
kaf24@5576 1076 if ( (pfn = pagetable_get_pfn(v->arch.guest_table_user)) != 0 )
kaf24@3761 1077 {
kaf24@5576 1078 if ( !shadow_mode_refcounts(d) )
kaf24@5576 1079 put_page_type(pfn_to_page(pfn));
kaf24@5576 1080 put_page(pfn_to_page(pfn));
mafetter@4799 1081
kaf24@5289 1082 v->arch.guest_table_user = mk_pagetable(0);
kaf24@3761 1083 }
kaf24@4455 1084
kaf24@5289 1085 vmx_relinquish_resources(v);
iap10@3741 1086 }
djm@1714 1087
kaf24@4633 1088 shadow_mode_disable(d);
maf46@4621 1089
kaf24@1749 1090 /*
kaf24@1749 1091 * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
kaf24@1749 1092 * it automatically gets squashed when the guest's mappings go away.
kaf24@1749 1093 */
kaf24@5289 1094 for_each_vcpu(d, v)
kaf24@5289 1095 destroy_gdt(v);
kaf24@1749 1096
kaf24@2428 1097 /* Relinquish every page of memory. */
kaf24@4455 1098 relinquish_memory(d, &d->xenpage_list);
kaf24@4455 1099 relinquish_memory(d, &d->page_list);
djm@1714 1100 }
djm@1714 1101
djm@1714 1102
kaf24@3914 1103 /*
kaf24@3914 1104 * Local variables:
kaf24@3914 1105 * mode: C
kaf24@3914 1106 * c-set-style: "BSD"
kaf24@3914 1107 * c-basic-offset: 4
kaf24@3914 1108 * tab-width: 4
kaf24@3914 1109 * indent-tabs-mode: nil
kaf24@3988 1110 * End:
kaf24@3914 1111 */