xen-vtx-unstable
annotate xen/arch/x86/domain.c @ 6796:0d8c0db04258
Don't return failure when trying to delete a non-existent node.
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author | cl349@firebug.cl.cam.ac.uk |
---|---|
date | Tue Sep 13 21:52:24 2005 +0000 (2005-09-13) |
parents | 72e4e2aab342 |
children |
rev | line source |
---|---|
kaf24@1749 | 1 /****************************************************************************** |
kaf24@1749 | 2 * arch/x86/domain.c |
kaf24@1749 | 3 * |
kaf24@1749 | 4 * x86-specific domain handling (e.g., register setup and context switching). |
kaf24@1749 | 5 */ |
kaf24@1749 | 6 |
kaf24@1672 | 7 /* |
kaf24@1672 | 8 * Copyright (C) 1995 Linus Torvalds |
kaf24@1672 | 9 * |
kaf24@1672 | 10 * Pentium III FXSR, SSE support |
kaf24@5576 | 11 * Gareth Hughes <gareth@valinux.com>, May 2000 |
kaf24@1672 | 12 */ |
kaf24@1672 | 13 |
kaf24@1672 | 14 #include <xen/config.h> |
kaf24@3334 | 15 #include <xen/init.h> |
kaf24@1672 | 16 #include <xen/lib.h> |
kaf24@1672 | 17 #include <xen/errno.h> |
kaf24@1672 | 18 #include <xen/sched.h> |
kaf24@1672 | 19 #include <xen/smp.h> |
kaf24@1672 | 20 #include <xen/delay.h> |
kaf24@1672 | 21 #include <xen/softirq.h> |
cwc22@4023 | 22 #include <xen/grant_table.h> |
ach61@2805 | 23 #include <asm/regs.h> |
kaf24@1672 | 24 #include <asm/mc146818rtc.h> |
kaf24@1672 | 25 #include <asm/system.h> |
kaf24@1672 | 26 #include <asm/io.h> |
kaf24@1672 | 27 #include <asm/processor.h> |
kaf24@1672 | 28 #include <asm/desc.h> |
kaf24@1672 | 29 #include <asm/i387.h> |
kaf24@1672 | 30 #include <asm/mpspec.h> |
kaf24@1672 | 31 #include <asm/ldt.h> |
kaf24@1672 | 32 #include <xen/irq.h> |
kaf24@1672 | 33 #include <xen/event.h> |
kaf24@1749 | 34 #include <asm/shadow.h> |
djm@1714 | 35 #include <xen/console.h> |
djm@1714 | 36 #include <xen/elf.h> |
iap10@3290 | 37 #include <asm/vmx.h> |
iap10@3290 | 38 #include <asm/vmx_vmcs.h> |
kaf24@3754 | 39 #include <asm/msr.h> |
kaf24@4779 | 40 #include <asm/physdev.h> |
iap10@3290 | 41 #include <xen/kernel.h> |
iap10@3290 | 42 #include <public/io/ioreq.h> |
kaf24@3139 | 43 #include <xen/multicall.h> |
djm@1714 | 44 |
kaf24@3334 | 45 /* opt_noreboot: If true, machine will need manual reset on error. */ |
kaf24@3334 | 46 static int opt_noreboot = 0; |
kaf24@3334 | 47 boolean_param("noreboot", opt_noreboot); |
kaf24@3334 | 48 |
kaf24@4373 | 49 struct percpu_ctxt { |
kaf24@5289 | 50 struct vcpu *curr_vcpu; |
kaf24@6199 | 51 unsigned int context_not_finalised; |
kaf24@6199 | 52 unsigned int dirty_segment_mask; |
kaf24@4373 | 53 } __cacheline_aligned; |
kaf24@4373 | 54 static struct percpu_ctxt percpu_ctxt[NR_CPUS]; |
djm@1714 | 55 |
kaf24@5289 | 56 static void continue_idle_task(struct vcpu *v) |
kaf24@4698 | 57 { |
kaf24@4698 | 58 reset_stack_and_jump(idle_loop); |
kaf24@4698 | 59 } |
kaf24@4698 | 60 |
kaf24@5289 | 61 static void continue_nonidle_task(struct vcpu *v) |
kaf24@4698 | 62 { |
kaf24@4698 | 63 reset_stack_and_jump(ret_from_intr); |
kaf24@4698 | 64 } |
kaf24@4698 | 65 |
kaf24@3272 | 66 static void default_idle(void) |
kaf24@1672 | 67 { |
kaf24@4373 | 68 local_irq_disable(); |
kaf24@3272 | 69 if ( !softirq_pending(smp_processor_id()) ) |
kaf24@3272 | 70 safe_halt(); |
kaf24@3272 | 71 else |
kaf24@4373 | 72 local_irq_enable(); |
kaf24@1672 | 73 } |
kaf24@1672 | 74 |
kaf24@4707 | 75 void idle_loop(void) |
kaf24@1672 | 76 { |
kaf24@1672 | 77 int cpu = smp_processor_id(); |
kaf24@5146 | 78 |
kaf24@1672 | 79 for ( ; ; ) |
kaf24@1672 | 80 { |
kaf24@1672 | 81 irq_stat[cpu].idle_timestamp = jiffies; |
kaf24@5146 | 82 |
kaf24@1672 | 83 while ( !softirq_pending(cpu) ) |
kaf24@4267 | 84 { |
kaf24@4267 | 85 page_scrub_schedule_work(); |
kaf24@1672 | 86 default_idle(); |
kaf24@4267 | 87 } |
kaf24@5146 | 88 |
kaf24@1672 | 89 do_softirq(); |
kaf24@1672 | 90 } |
kaf24@1672 | 91 } |
kaf24@1672 | 92 |
kaf24@1672 | 93 void startup_cpu_idle_loop(void) |
kaf24@1672 | 94 { |
kaf24@5289 | 95 struct vcpu *v = current; |
kaf24@4698 | 96 |
kaf24@5289 | 97 ASSERT(is_idle_task(v->domain)); |
kaf24@5289 | 98 percpu_ctxt[smp_processor_id()].curr_vcpu = v; |
kaf24@5301 | 99 cpu_set(smp_processor_id(), v->domain->cpumask); |
kaf24@5289 | 100 v->arch.schedule_tail = continue_idle_task; |
kaf24@4698 | 101 |
kaf24@5146 | 102 idle_loop(); |
kaf24@1672 | 103 } |
kaf24@1672 | 104 |
kaf24@1672 | 105 static long no_idt[2]; |
kaf24@1672 | 106 static int reboot_mode; |
kaf24@1672 | 107 |
kaf24@1672 | 108 static inline void kb_wait(void) |
kaf24@1672 | 109 { |
kaf24@1672 | 110 int i; |
kaf24@1672 | 111 |
kaf24@3797 | 112 for ( i = 0; i < 0x10000; i++ ) |
kaf24@3797 | 113 if ( (inb_p(0x64) & 0x02) == 0 ) |
kaf24@1672 | 114 break; |
kaf24@1672 | 115 } |
kaf24@1672 | 116 |
kaf24@1672 | 117 void machine_restart(char * __unused) |
kaf24@1672 | 118 { |
kaf24@3797 | 119 int i; |
kaf24@5576 | 120 |
kaf24@1672 | 121 if ( opt_noreboot ) |
kaf24@1672 | 122 { |
kaf24@1672 | 123 printk("Reboot disabled on cmdline: require manual reset\n"); |
kaf24@3797 | 124 for ( ; ; ) |
kaf24@3797 | 125 safe_halt(); |
kaf24@1672 | 126 } |
kaf24@1672 | 127 |
kaf24@5321 | 128 watchdog_disable(); |
kaf24@5321 | 129 console_start_sync(); |
kaf24@5321 | 130 |
kaf24@4373 | 131 local_irq_enable(); |
kaf24@1672 | 132 |
kaf24@3797 | 133 /* Ensure we are the boot CPU. */ |
kaf24@3797 | 134 if ( GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid ) |
kaf24@3797 | 135 { |
kaf24@3797 | 136 smp_call_function((void *)machine_restart, NULL, 1, 0); |
kaf24@3797 | 137 for ( ; ; ) |
kaf24@3797 | 138 safe_halt(); |
kaf24@1672 | 139 } |
kaf24@1672 | 140 |
kaf24@1672 | 141 /* |
kaf24@1672 | 142 * Stop all CPUs and turn off local APICs and the IO-APIC, so |
kaf24@1672 | 143 * other OSs see a clean IRQ state. |
kaf24@1672 | 144 */ |
kaf24@1672 | 145 smp_send_stop(); |
kaf24@1672 | 146 disable_IO_APIC(); |
kaf24@3797 | 147 |
iap10@3290 | 148 #ifdef CONFIG_VMX |
iap10@3290 | 149 stop_vmx(); |
kaf24@1672 | 150 #endif |
kaf24@1672 | 151 |
kaf24@3797 | 152 /* Rebooting needs to touch the page at absolute address 0. */ |
kaf24@3797 | 153 *((unsigned short *)__va(0x472)) = reboot_mode; |
kaf24@3797 | 154 |
kaf24@3797 | 155 for ( ; ; ) |
kaf24@3797 | 156 { |
kaf24@3797 | 157 /* Pulse the keyboard reset line. */ |
kaf24@3797 | 158 for ( i = 0; i < 100; i++ ) |
kaf24@3797 | 159 { |
kaf24@3797 | 160 kb_wait(); |
kaf24@3797 | 161 udelay(50); |
kaf24@3797 | 162 outb(0xfe,0x64); /* pulse reset low */ |
kaf24@3797 | 163 udelay(50); |
kaf24@1672 | 164 } |
kaf24@3797 | 165 |
kaf24@3797 | 166 /* That didn't work - force a triple fault.. */ |
kaf24@3797 | 167 __asm__ __volatile__("lidt %0": "=m" (no_idt)); |
kaf24@3797 | 168 __asm__ __volatile__("int3"); |
kaf24@1672 | 169 } |
kaf24@1672 | 170 } |
kaf24@1672 | 171 |
kaf24@1811 | 172 |
kaf24@1811 | 173 void __attribute__((noreturn)) __machine_halt(void *unused) |
kaf24@1811 | 174 { |
kaf24@1811 | 175 for ( ; ; ) |
kaf24@3797 | 176 safe_halt(); |
kaf24@1811 | 177 } |
kaf24@1811 | 178 |
kaf24@1672 | 179 void machine_halt(void) |
kaf24@1672 | 180 { |
kaf24@4926 | 181 watchdog_disable(); |
kaf24@5321 | 182 console_start_sync(); |
kaf24@3797 | 183 smp_call_function(__machine_halt, NULL, 1, 0); |
kaf24@1811 | 184 __machine_halt(NULL); |
kaf24@1672 | 185 } |
kaf24@1672 | 186 |
kaf24@3477 | 187 void dump_pageframe_info(struct domain *d) |
kaf24@3477 | 188 { |
kaf24@3477 | 189 struct pfn_info *page; |
kaf24@3477 | 190 |
kaf24@3477 | 191 if ( d->tot_pages < 10 ) |
kaf24@3477 | 192 { |
kaf24@3530 | 193 list_for_each_entry ( page, &d->page_list, list ) |
kaf24@3477 | 194 { |
kaf24@6039 | 195 printk("Page %p: caf=%08x, taf=%" PRtype_info "\n", |
kaf24@4654 | 196 _p(page_to_phys(page)), page->count_info, |
kaf24@3477 | 197 page->u.inuse.type_info); |
kaf24@3477 | 198 } |
kaf24@3477 | 199 } |
iap10@4575 | 200 |
iap10@4575 | 201 list_for_each_entry ( page, &d->xenpage_list, list ) |
iap10@4575 | 202 { |
kaf24@6039 | 203 printk("XenPage %p: caf=%08x, taf=%" PRtype_info "\n", |
kaf24@4654 | 204 _p(page_to_phys(page)), page->count_info, |
iap10@4575 | 205 page->u.inuse.type_info); |
iap10@4575 | 206 } |
iap10@4575 | 207 |
kaf24@3477 | 208 |
kaf24@3477 | 209 page = virt_to_page(d->shared_info); |
kaf24@6039 | 210 printk("Shared_info@%p: caf=%08x, taf=%" PRtype_info "\n", |
kaf24@4654 | 211 _p(page_to_phys(page)), page->count_info, |
kaf24@3477 | 212 page->u.inuse.type_info); |
kaf24@3477 | 213 } |
kaf24@3477 | 214 |
kaf24@5289 | 215 struct vcpu *arch_alloc_vcpu_struct(void) |
kaf24@3479 | 216 { |
kaf24@5289 | 217 return xmalloc(struct vcpu); |
kaf24@3479 | 218 } |
kaf24@3479 | 219 |
sos22@6268 | 220 /* We assume that vcpu 0 is always the last one to be freed in a |
sos22@6268 | 221 domain i.e. if v->vcpu_id == 0, the domain should be |
sos22@6268 | 222 single-processor. */ |
kaf24@5289 | 223 void arch_free_vcpu_struct(struct vcpu *v) |
kaf24@3479 | 224 { |
sos22@6268 | 225 struct vcpu *p; |
sos22@6268 | 226 for_each_vcpu(v->domain, p) { |
sos22@6268 | 227 if (p->next_in_list == v) |
sos22@6268 | 228 p->next_in_list = v->next_in_list; |
sos22@6268 | 229 } |
kaf24@5289 | 230 xfree(v); |
kaf24@3477 | 231 } |
kaf24@3477 | 232 |
kaf24@1936 | 233 void free_perdomain_pt(struct domain *d) |
kaf24@1936 | 234 { |
kaf24@5398 | 235 free_xenheap_page(d->arch.mm_perdomain_pt); |
kaf24@3753 | 236 #ifdef __x86_64__ |
kaf24@5398 | 237 free_xenheap_page(d->arch.mm_perdomain_l2); |
kaf24@5398 | 238 free_xenheap_page(d->arch.mm_perdomain_l3); |
kaf24@3753 | 239 #endif |
kaf24@1936 | 240 } |
kaf24@1936 | 241 |
kaf24@5289 | 242 void arch_do_createdomain(struct vcpu *v) |
djm@1698 | 243 { |
kaf24@5289 | 244 struct domain *d = v->domain; |
kaf24@3630 | 245 |
kaf24@5289 | 246 v->arch.flags = TF_kernel_mode; |
kaf24@3272 | 247 |
kaf24@5146 | 248 if ( is_idle_task(d) ) |
kaf24@4972 | 249 return; |
djm@1698 | 250 |
kaf24@5289 | 251 v->arch.schedule_tail = continue_nonidle_task; |
kaf24@4972 | 252 |
kaf24@5398 | 253 d->shared_info = alloc_xenheap_page(); |
kaf24@4972 | 254 memset(d->shared_info, 0, PAGE_SIZE); |
kaf24@5289 | 255 v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id]; |
kaf24@5289 | 256 v->cpumap = CPUMAP_RUNANYWHERE; |
kaf24@4972 | 257 SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d); |
kaf24@6481 | 258 set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT, |
kaf24@6481 | 259 INVALID_M2P_ENTRY); |
kaf24@4972 | 260 |
kaf24@5398 | 261 d->arch.mm_perdomain_pt = alloc_xenheap_page(); |
kaf24@4972 | 262 memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE); |
kaf24@6481 | 263 set_pfn_from_mfn(virt_to_phys(d->arch.mm_perdomain_pt) >> PAGE_SHIFT, |
kaf24@6481 | 264 INVALID_M2P_ENTRY); |
kaf24@5289 | 265 v->arch.perdomain_ptes = d->arch.mm_perdomain_pt; |
kaf24@5289 | 266 v->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] = |
kaf24@5250 | 267 l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR); |
kaf24@5004 | 268 |
kaf24@5289 | 269 v->arch.guest_vtable = __linear_l2_table; |
kaf24@5289 | 270 v->arch.shadow_vtable = __shadow_linear_l2_table; |
kaf24@3272 | 271 |
kaf24@3753 | 272 #ifdef __x86_64__ |
kaf24@5289 | 273 v->arch.guest_vl3table = __linear_l3_table; |
kaf24@5289 | 274 v->arch.guest_vl4table = __linear_l4_table; |
kaf24@4972 | 275 |
kaf24@5398 | 276 d->arch.mm_perdomain_l2 = alloc_xenheap_page(); |
kaf24@4972 | 277 memset(d->arch.mm_perdomain_l2, 0, PAGE_SIZE); |
kaf24@4972 | 278 d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)] = |
kaf24@5250 | 279 l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt), |
kaf24@4972 | 280 __PAGE_HYPERVISOR); |
kaf24@5398 | 281 d->arch.mm_perdomain_l3 = alloc_xenheap_page(); |
kaf24@4972 | 282 memset(d->arch.mm_perdomain_l3, 0, PAGE_SIZE); |
kaf24@4972 | 283 d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] = |
kaf24@5250 | 284 l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2), |
mafetter@4591 | 285 __PAGE_HYPERVISOR); |
kaf24@3753 | 286 #endif |
kaf24@4972 | 287 |
kaf24@4972 | 288 (void)ptwr_init(d); |
kaf24@4972 | 289 |
kaf24@4972 | 290 shadow_lock_init(d); |
kaf24@4972 | 291 INIT_LIST_HEAD(&d->arch.free_shadow_frames); |
djm@1698 | 292 } |
djm@1698 | 293 |
kaf24@5289 | 294 void arch_do_boot_vcpu(struct vcpu *v) |
kaf24@3662 | 295 { |
kaf24@5289 | 296 struct domain *d = v->domain; |
kaf24@4972 | 297 |
kaf24@5289 | 298 v->arch.flags = TF_kernel_mode; |
kaf24@4972 | 299 |
kaf24@5289 | 300 v->arch.schedule_tail = d->vcpu[0]->arch.schedule_tail; |
kaf24@4972 | 301 |
kaf24@5289 | 302 v->arch.perdomain_ptes = |
kaf24@5289 | 303 d->arch.mm_perdomain_pt + (v->vcpu_id << PDPT_VCPU_SHIFT); |
kaf24@5289 | 304 v->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] = |
kaf24@5250 | 305 l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR); |
kaf24@3662 | 306 } |
kaf24@3662 | 307 |
kaf24@6113 | 308 void vcpu_migrate_cpu(struct vcpu *v, int newcpu) |
kaf24@6113 | 309 { |
kaf24@6113 | 310 if ( v->processor == newcpu ) |
kaf24@6113 | 311 return; |
kaf24@6113 | 312 |
kaf24@6113 | 313 set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags); |
kaf24@6113 | 314 v->processor = newcpu; |
kaf24@6113 | 315 |
kaf24@6113 | 316 if ( VMX_DOMAIN(v) ) |
kaf24@6113 | 317 { |
kaf24@6113 | 318 __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs)); |
kaf24@6113 | 319 v->arch.schedule_tail = arch_vmx_do_relaunch; |
kaf24@6113 | 320 } |
kaf24@6113 | 321 } |
kaf24@6113 | 322 |
iap10@3290 | 323 #ifdef CONFIG_VMX |
kaf24@5659 | 324 static int vmx_switch_on; |
kaf24@5659 | 325 |
kaf24@4683 | 326 static int vmx_final_setup_guest( |
kaf24@5289 | 327 struct vcpu *v, struct vcpu_guest_context *ctxt) |
iap10@3290 | 328 { |
iap10@3290 | 329 int error; |
kaf24@4683 | 330 struct cpu_user_regs *regs; |
iap10@3290 | 331 struct vmcs_struct *vmcs; |
iap10@3290 | 332 |
kaf24@4683 | 333 regs = &ctxt->user_regs; |
iap10@3290 | 334 |
iap10@3290 | 335 /* |
iap10@3290 | 336 * Create a new VMCS |
iap10@3290 | 337 */ |
iap10@3290 | 338 if (!(vmcs = alloc_vmcs())) { |
iap10@3290 | 339 printk("Failed to create a new VMCS\n"); |
iap10@3290 | 340 return -ENOMEM; |
iap10@3290 | 341 } |
iap10@3290 | 342 |
kaf24@5289 | 343 memset(&v->arch.arch_vmx, 0, sizeof (struct arch_vmx_struct)); |
iap10@3290 | 344 |
kaf24@5289 | 345 v->arch.arch_vmx.vmcs = vmcs; |
kaf24@3753 | 346 error = construct_vmcs( |
kaf24@5289 | 347 &v->arch.arch_vmx, regs, ctxt, VMCS_USE_HOST_ENV); |
kaf24@3753 | 348 if ( error < 0 ) |
kaf24@3753 | 349 { |
iap10@3290 | 350 printk("Failed to construct a new VMCS\n"); |
iap10@3290 | 351 goto out; |
iap10@3290 | 352 } |
iap10@3290 | 353 |
kaf24@5289 | 354 v->arch.schedule_tail = arch_vmx_do_launch; |
iap10@3290 | 355 |
kaf24@6113 | 356 #if defined (__i386__) |
arun@5608 | 357 v->domain->arch.vmx_platform.real_mode_data = |
kaf24@4683 | 358 (unsigned long *) regs->esi; |
iap10@3567 | 359 #endif |
iap10@3290 | 360 |
kaf24@5289 | 361 if (v == v->domain->vcpu[0]) { |
iap10@3567 | 362 /* |
iap10@3567 | 363 * Required to do this once per domain |
iap10@3823 | 364 * XXX todo: add a seperate function to do these. |
iap10@3567 | 365 */ |
kaf24@5289 | 366 memset(&v->domain->shared_info->evtchn_mask[0], 0xff, |
kaf24@5289 | 367 sizeof(v->domain->shared_info->evtchn_mask)); |
iap10@3823 | 368 |
iap10@3823 | 369 /* Put the domain in shadow mode even though we're going to be using |
iap10@3823 | 370 * the shared 1:1 page table initially. It shouldn't hurt */ |
kaf24@5289 | 371 shadow_mode_enable(v->domain, |
maf46@5196 | 372 SHM_enable|SHM_refcounts| |
maf46@4953 | 373 SHM_translate|SHM_external); |
iap10@3567 | 374 } |
iap10@3290 | 375 |
kaf24@5659 | 376 if (!vmx_switch_on) |
kaf24@5659 | 377 vmx_switch_on = 1; |
kaf24@5659 | 378 |
iap10@3290 | 379 return 0; |
iap10@3290 | 380 |
iap10@3290 | 381 out: |
iap10@3290 | 382 free_vmcs(vmcs); |
kaf24@5836 | 383 if(v->arch.arch_vmx.io_bitmap_a != 0) { |
kaf24@6684 | 384 free_xenheap_pages( |
kaf24@6684 | 385 v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000)); |
kaf24@5836 | 386 v->arch.arch_vmx.io_bitmap_a = 0; |
kaf24@5836 | 387 } |
kaf24@5836 | 388 if(v->arch.arch_vmx.io_bitmap_b != 0) { |
kaf24@6684 | 389 free_xenheap_pages( |
kaf24@6684 | 390 v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000)); |
kaf24@5836 | 391 v->arch.arch_vmx.io_bitmap_b = 0; |
kaf24@5836 | 392 } |
kaf24@5289 | 393 v->arch.arch_vmx.vmcs = 0; |
iap10@3290 | 394 return error; |
iap10@3290 | 395 } |
iap10@3290 | 396 #endif |
iap10@3290 | 397 |
iap10@3823 | 398 |
iap10@3823 | 399 /* This is called by arch_final_setup_guest and do_boot_vcpu */ |
kmacy@4118 | 400 int arch_set_info_guest( |
kaf24@5289 | 401 struct vcpu *v, struct vcpu_guest_context *c) |
djm@1698 | 402 { |
kaf24@5289 | 403 struct domain *d = v->domain; |
djm@1698 | 404 unsigned long phys_basetab; |
kaf24@2465 | 405 int i, rc; |
djm@1698 | 406 |
kaf24@2684 | 407 /* |
kaf24@2684 | 408 * This is sufficient! If the descriptor DPL differs from CS RPL then we'll |
kaf24@2684 | 409 * #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically. |
kaf24@2684 | 410 * If SS RPL or DPL differs from CS RPL then we'll #GP. |
kaf24@2684 | 411 */ |
kaf24@4689 | 412 if ( !(c->flags & VGCF_VMX_GUEST) ) |
kaf24@4689 | 413 { |
kaf24@4683 | 414 if ( ((c->user_regs.cs & 3) == 0) || |
kaf24@4683 | 415 ((c->user_regs.ss & 3) == 0) ) |
sos22@6268 | 416 return -EINVAL; |
kaf24@4689 | 417 } |
kmacy@4118 | 418 |
kaf24@5289 | 419 clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags); |
kaf24@4689 | 420 if ( c->flags & VGCF_I387_VALID ) |
kaf24@5289 | 421 set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags); |
kaf24@2465 | 422 |
kaf24@5289 | 423 v->arch.flags &= ~TF_kernel_mode; |
kaf24@5722 | 424 if ( (c->flags & VGCF_IN_KERNEL) || (c->flags & VGCF_VMX_GUEST) ) |
kaf24@5289 | 425 v->arch.flags |= TF_kernel_mode; |
kaf24@2465 | 426 |
kaf24@5289 | 427 memcpy(&v->arch.guest_context, c, sizeof(*c)); |
kmacy@4118 | 428 |
kaf24@4852 | 429 if ( !(c->flags & VGCF_VMX_GUEST) ) |
kaf24@4852 | 430 { |
kaf24@4852 | 431 /* IOPL privileges are virtualised. */ |
kaf24@5289 | 432 v->arch.iopl = (v->arch.guest_context.user_regs.eflags >> 12) & 3; |
kaf24@5289 | 433 v->arch.guest_context.user_regs.eflags &= ~EF_IOPL; |
kaf24@2465 | 434 |
kaf24@4852 | 435 /* Ensure real hardware interrupts are enabled. */ |
kaf24@5289 | 436 v->arch.guest_context.user_regs.eflags |= EF_IE; |
kaf24@5821 | 437 } |
kaf24@5821 | 438 else if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) |
kaf24@5821 | 439 { |
kaf24@5821 | 440 return modify_vmcs( |
kaf24@5821 | 441 &v->arch.arch_vmx, |
kaf24@5821 | 442 &v->arch.guest_context.user_regs); |
kaf24@4852 | 443 } |
kaf24@2465 | 444 |
kaf24@5289 | 445 if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) ) |
kmacy@4118 | 446 return 0; |
kaf24@2465 | 447 |
kaf24@5289 | 448 memset(v->arch.guest_context.debugreg, 0, |
kaf24@5289 | 449 sizeof(v->arch.guest_context.debugreg)); |
djm@1698 | 450 for ( i = 0; i < 8; i++ ) |
kaf24@5289 | 451 (void)set_debugreg(v, i, c->debugreg[i]); |
kaf24@2465 | 452 |
kaf24@5289 | 453 if ( v->vcpu_id == 0 ) |
kaf24@4381 | 454 d->vm_assist = c->vm_assist; |
kaf24@2465 | 455 |
kaf24@5576 | 456 phys_basetab = c->ctrlreg[3]; |
kaf24@5289 | 457 v->arch.guest_table = mk_pagetable(phys_basetab); |
kaf24@2465 | 458 |
mafetter@4799 | 459 if ( shadow_mode_refcounts(d) ) |
mafetter@4509 | 460 { |
mafetter@4509 | 461 if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) ) |
mafetter@4509 | 462 return -EINVAL; |
mafetter@4509 | 463 } |
kaf24@6739 | 464 else if ( !(c->flags & VGCF_VMX_GUEST) ) |
mafetter@4509 | 465 { |
kaf24@6739 | 466 if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d, |
kaf24@6739 | 467 PGT_base_page_table) ) |
kaf24@6739 | 468 return -EINVAL; |
mafetter@4509 | 469 } |
kaf24@2465 | 470 |
kaf24@5289 | 471 if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 ) |
kaf24@2465 | 472 { |
kaf24@4972 | 473 put_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT]); |
kaf24@4972 | 474 return rc; |
kaf24@2465 | 475 } |
kaf24@2465 | 476 |
kaf24@4689 | 477 if ( c->flags & VGCF_VMX_GUEST ) |
mafetter@4192 | 478 { |
kaf24@5576 | 479 /* VMX uses the initially provided page tables as the P2M map. */ |
kaf24@5250 | 480 if ( !pagetable_get_paddr(d->arch.phys_table) ) |
kaf24@5289 | 481 d->arch.phys_table = v->arch.guest_table; |
mafetter@4192 | 482 |
kaf24@5576 | 483 if ( (rc = vmx_final_setup_guest(v, c)) != 0 ) |
kaf24@5576 | 484 return rc; |
mafetter@4192 | 485 } |
iap10@3290 | 486 |
kaf24@5289 | 487 update_pagetables(v); |
kaf24@6093 | 488 |
kaf24@6093 | 489 if ( v->vcpu_id == 0 ) |
kaf24@6093 | 490 init_domain_time(d); |
kaf24@6093 | 491 |
kmacy@4118 | 492 /* Don't redo final setup */ |
kaf24@5289 | 493 set_bit(_VCPUF_initialised, &v->vcpu_flags); |
iap10@3823 | 494 |
kaf24@2465 | 495 return 0; |
djm@1698 | 496 } |
djm@1698 | 497 |
kaf24@1672 | 498 |
kaf24@5289 | 499 void new_thread(struct vcpu *d, |
kaf24@1672 | 500 unsigned long start_pc, |
kaf24@1672 | 501 unsigned long start_stack, |
kaf24@1672 | 502 unsigned long start_info) |
kaf24@1672 | 503 { |
kaf24@4689 | 504 struct cpu_user_regs *regs = &d->arch.guest_context.user_regs; |
kaf24@1672 | 505 |
kaf24@1672 | 506 /* |
kaf24@1672 | 507 * Initial register values: |
kaf24@3755 | 508 * DS,ES,FS,GS = FLAT_KERNEL_DS |
kaf24@3755 | 509 * CS:EIP = FLAT_KERNEL_CS:start_pc |
kaf24@3755 | 510 * SS:ESP = FLAT_KERNEL_SS:start_stack |
kaf24@1672 | 511 * ESI = start_info |
kaf24@1672 | 512 * [EAX,EBX,ECX,EDX,EDI,EBP are zero] |
kaf24@1672 | 513 */ |
kaf24@4683 | 514 regs->ds = regs->es = regs->fs = regs->gs = FLAT_KERNEL_DS; |
kaf24@4683 | 515 regs->ss = FLAT_KERNEL_SS; |
kaf24@4683 | 516 regs->cs = FLAT_KERNEL_CS; |
kaf24@4683 | 517 regs->eip = start_pc; |
kaf24@4683 | 518 regs->esp = start_stack; |
kaf24@4683 | 519 regs->esi = start_info; |
kaf24@1672 | 520 |
kaf24@4683 | 521 __save_flags(regs->eflags); |
kaf24@4683 | 522 regs->eflags |= X86_EFLAGS_IF; |
kaf24@1672 | 523 } |
kaf24@1672 | 524 |
kaf24@1672 | 525 |
kaf24@3761 | 526 #ifdef __x86_64__ |
kaf24@3761 | 527 |
kaf24@5289 | 528 void toggle_guest_mode(struct vcpu *v) |
kaf24@3828 | 529 { |
kaf24@5289 | 530 v->arch.flags ^= TF_kernel_mode; |
kaf24@4498 | 531 __asm__ __volatile__ ( "swapgs" ); |
kaf24@5289 | 532 update_pagetables(v); |
kaf24@5289 | 533 write_ptbase(v); |
kaf24@3828 | 534 } |
kaf24@1672 | 535 |
kaf24@3761 | 536 #define loadsegment(seg,value) ({ \ |
kaf24@3761 | 537 int __r = 1; \ |
kaf24@3761 | 538 __asm__ __volatile__ ( \ |
kaf24@3761 | 539 "1: movl %k1,%%" #seg "\n2:\n" \ |
kaf24@3761 | 540 ".section .fixup,\"ax\"\n" \ |
kaf24@3761 | 541 "3: xorl %k0,%k0\n" \ |
kaf24@3761 | 542 " movl %k0,%%" #seg "\n" \ |
kaf24@3761 | 543 " jmp 2b\n" \ |
kaf24@3761 | 544 ".previous\n" \ |
kaf24@3761 | 545 ".section __ex_table,\"a\"\n" \ |
kaf24@3761 | 546 " .align 8\n" \ |
kaf24@3761 | 547 " .quad 1b,3b\n" \ |
kaf24@3761 | 548 ".previous" \ |
kaf24@3761 | 549 : "=r" (__r) : "r" (value), "0" (__r) );\ |
kaf24@3761 | 550 __r; }) |
kaf24@1672 | 551 |
kaf24@5659 | 552 #if CONFIG_VMX |
kaf24@6199 | 553 #define load_msrs(n) if (vmx_switch_on) vmx_load_msrs(n) |
kaf24@5659 | 554 #else |
kaf24@6199 | 555 #define load_msrs(n) ((void)0) |
kaf24@5659 | 556 #endif |
kaf24@5659 | 557 |
kaf24@6199 | 558 /* |
kaf24@6199 | 559 * save_segments() writes a mask of segments which are dirty (non-zero), |
kaf24@6199 | 560 * allowing load_segments() to avoid some expensive segment loads and |
kaf24@6199 | 561 * MSR writes. |
kaf24@6199 | 562 */ |
kaf24@6199 | 563 #define DIRTY_DS 0x01 |
kaf24@6199 | 564 #define DIRTY_ES 0x02 |
kaf24@6199 | 565 #define DIRTY_FS 0x04 |
kaf24@6199 | 566 #define DIRTY_GS 0x08 |
kaf24@6199 | 567 #define DIRTY_FS_BASE 0x10 |
kaf24@6199 | 568 #define DIRTY_GS_BASE_USER 0x20 |
kaf24@6199 | 569 |
kaf24@6199 | 570 static void load_segments(struct vcpu *n) |
kaf24@1672 | 571 { |
kaf24@4689 | 572 struct vcpu_guest_context *nctxt = &n->arch.guest_context; |
kaf24@3761 | 573 int all_segs_okay = 1; |
kaf24@6199 | 574 unsigned int dirty_segment_mask, cpu = smp_processor_id(); |
kaf24@6199 | 575 |
kaf24@6199 | 576 /* Load and clear the dirty segment mask. */ |
kaf24@6199 | 577 dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask; |
kaf24@6199 | 578 percpu_ctxt[cpu].dirty_segment_mask = 0; |
kaf24@3761 | 579 |
kaf24@3761 | 580 /* Either selector != 0 ==> reload. */ |
kaf24@6199 | 581 if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) ) |
kaf24@4689 | 582 all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds); |
kaf24@3761 | 583 |
kaf24@3761 | 584 /* Either selector != 0 ==> reload. */ |
kaf24@6199 | 585 if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) ) |
kaf24@4689 | 586 all_segs_okay &= loadsegment(es, nctxt->user_regs.es); |
kaf24@1672 | 587 |
kaf24@3761 | 588 /* |
kaf24@3761 | 589 * Either selector != 0 ==> reload. |
kaf24@3761 | 590 * Also reload to reset FS_BASE if it was non-zero. |
kaf24@3761 | 591 */ |
kaf24@6199 | 592 if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) | |
kaf24@4689 | 593 nctxt->user_regs.fs) ) |
kaf24@4689 | 594 all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs); |
kaf24@3761 | 595 |
kaf24@3761 | 596 /* |
kaf24@3761 | 597 * Either selector != 0 ==> reload. |
kaf24@3761 | 598 * Also reload to reset GS_BASE if it was non-zero. |
kaf24@3761 | 599 */ |
kaf24@6199 | 600 if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) | |
kaf24@4689 | 601 nctxt->user_regs.gs) ) |
kaf24@3761 | 602 { |
kaf24@3761 | 603 /* Reset GS_BASE with user %gs? */ |
kaf24@6199 | 604 if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user ) |
kaf24@4689 | 605 all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs); |
kaf24@1672 | 606 } |
kaf24@1672 | 607 |
kaf24@3761 | 608 /* This can only be non-zero if selector is NULL. */ |
kaf24@4689 | 609 if ( nctxt->fs_base ) |
kaf24@3761 | 610 wrmsr(MSR_FS_BASE, |
kaf24@4689 | 611 nctxt->fs_base, |
kaf24@4689 | 612 nctxt->fs_base>>32); |
kaf24@1672 | 613 |
kaf24@4499 | 614 /* Most kernels have non-zero GS base, so don't bother testing. */ |
kaf24@4499 | 615 /* (This is also a serialising instruction, avoiding AMD erratum #88.) */ |
kaf24@4499 | 616 wrmsr(MSR_SHADOW_GS_BASE, |
kaf24@4689 | 617 nctxt->gs_base_kernel, |
kaf24@4689 | 618 nctxt->gs_base_kernel>>32); |
kaf24@4499 | 619 |
kaf24@3761 | 620 /* This can only be non-zero if selector is NULL. */ |
kaf24@4689 | 621 if ( nctxt->gs_base_user ) |
kaf24@3761 | 622 wrmsr(MSR_GS_BASE, |
kaf24@4689 | 623 nctxt->gs_base_user, |
kaf24@4689 | 624 nctxt->gs_base_user>>32); |
kaf24@3761 | 625 |
kaf24@3761 | 626 /* If in kernel mode then switch the GS bases around. */ |
kaf24@3761 | 627 if ( n->arch.flags & TF_kernel_mode ) |
kaf24@4499 | 628 __asm__ __volatile__ ( "swapgs" ); |
kaf24@1672 | 629 |
kaf24@3761 | 630 if ( unlikely(!all_segs_okay) ) |
kaf24@3761 | 631 { |
kaf24@4923 | 632 struct cpu_user_regs *regs = guest_cpu_user_regs(); |
kaf24@4373 | 633 unsigned long *rsp = |
kaf24@3761 | 634 (n->arch.flags & TF_kernel_mode) ? |
kaf24@3761 | 635 (unsigned long *)regs->rsp : |
kaf24@4689 | 636 (unsigned long *)nctxt->kernel_sp; |
kaf24@1672 | 637 |
kaf24@4138 | 638 if ( !(n->arch.flags & TF_kernel_mode) ) |
kaf24@4138 | 639 toggle_guest_mode(n); |
kaf24@4140 | 640 else |
kaf24@4140 | 641 regs->cs &= ~3; |
kaf24@4138 | 642 |
kaf24@4689 | 643 if ( put_user(regs->ss, rsp- 1) | |
kaf24@4689 | 644 put_user(regs->rsp, rsp- 2) | |
kaf24@4689 | 645 put_user(regs->rflags, rsp- 3) | |
kaf24@4689 | 646 put_user(regs->cs, rsp- 4) | |
kaf24@4689 | 647 put_user(regs->rip, rsp- 5) | |
kaf24@4689 | 648 put_user(nctxt->user_regs.gs, rsp- 6) | |
kaf24@4689 | 649 put_user(nctxt->user_regs.fs, rsp- 7) | |
kaf24@4689 | 650 put_user(nctxt->user_regs.es, rsp- 8) | |
kaf24@4689 | 651 put_user(nctxt->user_regs.ds, rsp- 9) | |
kaf24@4689 | 652 put_user(regs->r11, rsp-10) | |
kaf24@4689 | 653 put_user(regs->rcx, rsp-11) ) |
kaf24@1672 | 654 { |
kaf24@3761 | 655 DPRINTK("Error while creating failsafe callback frame.\n"); |
kaf24@3761 | 656 domain_crash(); |
kaf24@1672 | 657 } |
kaf24@1672 | 658 |
kaf24@3761 | 659 regs->entry_vector = TRAP_syscall; |
kaf24@3761 | 660 regs->rflags &= 0xFFFCBEFFUL; |
kaf24@3761 | 661 regs->ss = __GUEST_SS; |
kaf24@3761 | 662 regs->rsp = (unsigned long)(rsp-11); |
kaf24@3761 | 663 regs->cs = __GUEST_CS; |
kaf24@4689 | 664 regs->rip = nctxt->failsafe_callback_eip; |
kaf24@1672 | 665 } |
kaf24@3761 | 666 } |
kaf24@1672 | 667 |
kaf24@5289 | 668 static void save_segments(struct vcpu *v) |
kaf24@4373 | 669 { |
kaf24@6199 | 670 struct vcpu_guest_context *ctxt = &v->arch.guest_context; |
kaf24@6199 | 671 struct cpu_user_regs *regs = &ctxt->user_regs; |
kaf24@6199 | 672 unsigned int dirty_segment_mask = 0; |
kaf24@5763 | 673 |
kaf24@5763 | 674 if ( VMX_DOMAIN(v) ) |
kaf24@5763 | 675 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs); |
kaf24@5763 | 676 |
kaf24@6759 | 677 __asm__ __volatile__ ( "mov %%ds,%0" : "=m" (regs->ds) ); |
kaf24@6759 | 678 __asm__ __volatile__ ( "mov %%es,%0" : "=m" (regs->es) ); |
kaf24@6759 | 679 __asm__ __volatile__ ( "mov %%fs,%0" : "=m" (regs->fs) ); |
kaf24@6759 | 680 __asm__ __volatile__ ( "mov %%gs,%0" : "=m" (regs->gs) ); |
kaf24@6199 | 681 |
kaf24@6199 | 682 if ( regs->ds ) |
kaf24@6199 | 683 dirty_segment_mask |= DIRTY_DS; |
kaf24@6199 | 684 |
kaf24@6199 | 685 if ( regs->es ) |
kaf24@6199 | 686 dirty_segment_mask |= DIRTY_ES; |
kaf24@1672 | 687 |
kaf24@6199 | 688 if ( regs->fs ) |
kaf24@6199 | 689 { |
kaf24@6199 | 690 dirty_segment_mask |= DIRTY_FS; |
kaf24@6199 | 691 ctxt->fs_base = 0; /* != 0 selector kills fs_base */ |
kaf24@6199 | 692 } |
kaf24@6199 | 693 else if ( ctxt->fs_base ) |
kaf24@6199 | 694 { |
kaf24@6199 | 695 dirty_segment_mask |= DIRTY_FS_BASE; |
kaf24@6199 | 696 } |
kaf24@6199 | 697 |
kaf24@6199 | 698 if ( regs->gs ) |
kaf24@6199 | 699 { |
kaf24@6199 | 700 dirty_segment_mask |= DIRTY_GS; |
kaf24@6199 | 701 ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */ |
kaf24@6199 | 702 } |
kaf24@6199 | 703 else if ( ctxt->gs_base_user ) |
kaf24@6199 | 704 { |
kaf24@6199 | 705 dirty_segment_mask |= DIRTY_GS_BASE_USER; |
kaf24@6199 | 706 } |
kaf24@6199 | 707 |
kaf24@6199 | 708 percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask; |
kaf24@1672 | 709 } |
kaf24@1672 | 710 |
kaf24@3761 | 711 long do_switch_to_user(void) |
kaf24@1672 | 712 { |
kaf24@4923 | 713 struct cpu_user_regs *regs = guest_cpu_user_regs(); |
kaf24@3761 | 714 struct switch_to_user stu; |
kaf24@5289 | 715 struct vcpu *v = current; |
kaf24@3761 | 716 |
kaf24@3764 | 717 if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) || |
kaf24@5289 | 718 unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) ) |
kaf24@3761 | 719 return -EFAULT; |
kaf24@3761 | 720 |
kaf24@5289 | 721 toggle_guest_mode(v); |
kaf24@3761 | 722 |
kaf24@3761 | 723 regs->rip = stu.rip; |
kaf24@4140 | 724 regs->cs = stu.cs | 3; /* force guest privilege */ |
kaf24@3761 | 725 regs->rflags = stu.rflags; |
kaf24@3761 | 726 regs->rsp = stu.rsp; |
kaf24@4140 | 727 regs->ss = stu.ss | 3; /* force guest privilege */ |
kaf24@3761 | 728 |
kaf24@4689 | 729 if ( !(stu.flags & VGCF_IN_SYSCALL) ) |
kaf24@3761 | 730 { |
kaf24@3761 | 731 regs->entry_vector = 0; |
kaf24@3761 | 732 regs->r11 = stu.r11; |
kaf24@3761 | 733 regs->rcx = stu.rcx; |
kaf24@3761 | 734 } |
kaf24@3761 | 735 |
kaf24@4037 | 736 /* Saved %rax gets written back to regs->rax in entry.S. */ |
kaf24@4037 | 737 return stu.rax; |
kaf24@3761 | 738 } |
kaf24@3761 | 739 |
kaf24@4417 | 740 #define switch_kernel_stack(_n,_c) ((void)0) |
kaf24@4417 | 741 |
kaf24@3761 | 742 #elif defined(__i386__) |
kaf24@3761 | 743 |
kaf24@6199 | 744 #define load_segments(n) ((void)0) |
kaf24@6199 | 745 #define load_msrs(n) ((void)0) |
kaf24@6199 | 746 #define save_segments(p) ((void)0) |
kaf24@3761 | 747 |
kaf24@5289 | 748 static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu) |
kaf24@4417 | 749 { |
kaf24@4417 | 750 struct tss_struct *tss = &init_tss[cpu]; |
kaf24@4689 | 751 tss->esp1 = n->arch.guest_context.kernel_sp; |
kaf24@4689 | 752 tss->ss1 = n->arch.guest_context.kernel_ss; |
kaf24@1672 | 753 } |
kaf24@1672 | 754 |
kaf24@3276 | 755 #endif |
kaf24@3276 | 756 |
kaf24@5289 | 757 #define loaddebug(_v,_reg) \ |
kaf24@5576 | 758 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg])) |
kaf24@1672 | 759 |
kaf24@4373 | 760 static void __context_switch(void) |
kaf24@1672 | 761 { |
kaf24@4923 | 762 struct cpu_user_regs *stack_regs = guest_cpu_user_regs(); |
kaf24@6199 | 763 unsigned int cpu = smp_processor_id(); |
kaf24@6199 | 764 struct vcpu *p = percpu_ctxt[cpu].curr_vcpu; |
kaf24@6199 | 765 struct vcpu *n = current; |
iap10@3290 | 766 |
kaf24@4373 | 767 if ( !is_idle_task(p->domain) ) |
kaf24@1672 | 768 { |
kaf24@4689 | 769 memcpy(&p->arch.guest_context.user_regs, |
kaf24@4689 | 770 stack_regs, |
kaf24@4612 | 771 CTXT_SWITCH_STACK_BYTES); |
kaf24@4373 | 772 unlazy_fpu(p); |
kaf24@4373 | 773 save_segments(p); |
kaf24@4373 | 774 } |
kaf24@4373 | 775 |
kaf24@4417 | 776 if ( !is_idle_task(n->domain) ) |
kaf24@4373 | 777 { |
kaf24@4689 | 778 memcpy(stack_regs, |
kaf24@4689 | 779 &n->arch.guest_context.user_regs, |
kaf24@4612 | 780 CTXT_SWITCH_STACK_BYTES); |
kaf24@1672 | 781 |
kaf24@4417 | 782 /* Maybe switch the debug registers. */ |
kaf24@4689 | 783 if ( unlikely(n->arch.guest_context.debugreg[7]) ) |
kaf24@4373 | 784 { |
kaf24@4689 | 785 loaddebug(&n->arch.guest_context, 0); |
kaf24@4689 | 786 loaddebug(&n->arch.guest_context, 1); |
kaf24@4689 | 787 loaddebug(&n->arch.guest_context, 2); |
kaf24@4689 | 788 loaddebug(&n->arch.guest_context, 3); |
kaf24@4417 | 789 /* no 4 and 5 */ |
kaf24@4689 | 790 loaddebug(&n->arch.guest_context, 6); |
kaf24@4689 | 791 loaddebug(&n->arch.guest_context, 7); |
kaf24@4373 | 792 } |
kaf24@4417 | 793 |
kaf24@4417 | 794 if ( !VMX_DOMAIN(n) ) |
kaf24@4417 | 795 { |
kaf24@4930 | 796 set_int80_direct_trap(n); |
kaf24@4417 | 797 switch_kernel_stack(n, cpu); |
kaf24@4417 | 798 } |
kaf24@1672 | 799 } |
kaf24@1672 | 800 |
kaf24@4453 | 801 if ( p->domain != n->domain ) |
kaf24@5301 | 802 cpu_set(cpu, n->domain->cpumask); |
kaf24@4453 | 803 |
kaf24@4373 | 804 write_ptbase(n); |
kaf24@4972 | 805 |
kaf24@4972 | 806 if ( p->vcpu_id != n->vcpu_id ) |
kaf24@4972 | 807 { |
kaf24@4972 | 808 char gdt_load[10]; |
kaf24@4972 | 809 *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE; |
kaf24@4972 | 810 *(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(n); |
kaf24@4972 | 811 __asm__ __volatile__ ( "lgdt %0" : "=m" (gdt_load) ); |
kaf24@4972 | 812 } |
kaf24@4453 | 813 |
kaf24@4453 | 814 if ( p->domain != n->domain ) |
kaf24@5301 | 815 cpu_clear(cpu, p->domain->cpumask); |
kaf24@4373 | 816 |
kaf24@5289 | 817 percpu_ctxt[cpu].curr_vcpu = n; |
kaf24@4373 | 818 } |
kaf24@4373 | 819 |
kaf24@4373 | 820 |
kaf24@5289 | 821 void context_switch(struct vcpu *prev, struct vcpu *next) |
kaf24@4373 | 822 { |
kaf24@6199 | 823 unsigned int cpu = smp_processor_id(); |
kaf24@4373 | 824 |
kaf24@6200 | 825 ASSERT(!local_irq_is_enabled()); |
kaf24@1672 | 826 |
kaf24@4373 | 827 set_current(next); |
kaf24@3754 | 828 |
kaf24@6199 | 829 if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) ) |
kaf24@4275 | 830 { |
kaf24@4373 | 831 __context_switch(); |
kaf24@6199 | 832 percpu_ctxt[cpu].context_not_finalised = 1; |
kaf24@6199 | 833 } |
kaf24@6199 | 834 } |
kaf24@4373 | 835 |
kaf24@6199 | 836 void context_switch_finalise(struct vcpu *next) |
kaf24@6199 | 837 { |
kaf24@6199 | 838 unsigned int cpu = smp_processor_id(); |
kaf24@6199 | 839 |
kaf24@6200 | 840 ASSERT(local_irq_is_enabled()); |
kaf24@6200 | 841 |
kaf24@6199 | 842 if ( percpu_ctxt[cpu].context_not_finalised ) |
kaf24@6199 | 843 { |
kaf24@6199 | 844 percpu_ctxt[cpu].context_not_finalised = 0; |
kaf24@6199 | 845 |
kaf24@6199 | 846 BUG_ON(percpu_ctxt[cpu].curr_vcpu != next); |
kaf24@6199 | 847 |
kaf24@5763 | 848 if ( VMX_DOMAIN(next) ) |
kaf24@5763 | 849 { |
kaf24@5763 | 850 vmx_restore_msrs(next); |
kaf24@5763 | 851 } |
kaf24@5763 | 852 else |
kaf24@4373 | 853 { |
kaf24@4373 | 854 load_LDT(next); |
kaf24@6199 | 855 load_segments(next); |
kaf24@6199 | 856 load_msrs(next); |
kaf24@4373 | 857 } |
kaf24@4275 | 858 } |
kaf24@4034 | 859 |
kaf24@4373 | 860 schedule_tail(next); |
kaf24@4696 | 861 BUG(); |
kaf24@4696 | 862 } |
kaf24@4034 | 863 |
kaf24@5289 | 864 void continue_running(struct vcpu *same) |
kaf24@4696 | 865 { |
kaf24@4696 | 866 schedule_tail(same); |
kaf24@4034 | 867 BUG(); |
kaf24@1672 | 868 } |
kaf24@1672 | 869 |
kaf24@4417 | 870 int __sync_lazy_execstate(void) |
kaf24@4373 | 871 { |
kaf24@6199 | 872 unsigned long flags; |
kaf24@6199 | 873 int switch_required; |
kaf24@6199 | 874 |
kaf24@6199 | 875 local_irq_save(flags); |
kaf24@6199 | 876 |
kaf24@6199 | 877 switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current); |
kaf24@6199 | 878 |
kaf24@6199 | 879 if ( switch_required ) |
kaf24@6199 | 880 __context_switch(); |
kaf24@6199 | 881 |
kaf24@6199 | 882 local_irq_restore(flags); |
kaf24@6199 | 883 |
kaf24@6199 | 884 return switch_required; |
kaf24@4373 | 885 } |
kaf24@4373 | 886 |
kaf24@6453 | 887 void sync_vcpu_execstate(struct vcpu *v) |
kaf24@4418 | 888 { |
kaf24@6453 | 889 unsigned int cpu = v->processor; |
kaf24@6453 | 890 |
kaf24@6453 | 891 if ( !cpu_isset(cpu, v->domain->cpumask) ) |
kaf24@6453 | 892 return; |
kaf24@6453 | 893 |
kaf24@5301 | 894 if ( cpu == smp_processor_id() ) |
kaf24@6445 | 895 { |
kaf24@5261 | 896 (void)__sync_lazy_execstate(); |
kaf24@6445 | 897 } |
kaf24@6445 | 898 else |
kaf24@6445 | 899 { |
kaf24@6445 | 900 /* Other cpus call __sync_lazy_execstate from flush ipi handler. */ |
kaf24@6445 | 901 flush_tlb_mask(cpumask_of_cpu(cpu)); |
kaf24@6445 | 902 } |
kaf24@4418 | 903 } |
kaf24@4418 | 904 |
kaf24@3702 | 905 unsigned long __hypercall_create_continuation( |
kaf24@3149 | 906 unsigned int op, unsigned int nr_args, ...) |
kaf24@3091 | 907 { |
kaf24@3139 | 908 struct mc_state *mcs = &mc_state[smp_processor_id()]; |
kaf24@4683 | 909 struct cpu_user_regs *regs; |
kaf24@3091 | 910 unsigned int i; |
kaf24@3091 | 911 va_list args; |
kaf24@3091 | 912 |
kaf24@3139 | 913 va_start(args, nr_args); |
kaf24@3139 | 914 |
kaf24@3139 | 915 if ( test_bit(_MCSF_in_multicall, &mcs->flags) ) |
kaf24@3139 | 916 { |
kaf24@3139 | 917 __set_bit(_MCSF_call_preempted, &mcs->flags); |
kaf24@3091 | 918 |
kaf24@3139 | 919 for ( i = 0; i < nr_args; i++ ) |
kaf24@3139 | 920 mcs->call.args[i] = va_arg(args, unsigned long); |
kaf24@3139 | 921 } |
kaf24@3139 | 922 else |
kaf24@3139 | 923 { |
kaf24@4923 | 924 regs = guest_cpu_user_regs(); |
kaf24@3276 | 925 #if defined(__i386__) |
kaf24@4683 | 926 regs->eax = op; |
kaf24@4683 | 927 regs->eip -= 2; /* re-execute 'int 0x82' */ |
kaf24@3139 | 928 |
kaf24@3697 | 929 for ( i = 0; i < nr_args; i++ ) |
kaf24@3697 | 930 { |
kaf24@3697 | 931 switch ( i ) |
kaf24@3697 | 932 { |
kaf24@4683 | 933 case 0: regs->ebx = va_arg(args, unsigned long); break; |
kaf24@4683 | 934 case 1: regs->ecx = va_arg(args, unsigned long); break; |
kaf24@4683 | 935 case 2: regs->edx = va_arg(args, unsigned long); break; |
kaf24@4683 | 936 case 3: regs->esi = va_arg(args, unsigned long); break; |
kaf24@4683 | 937 case 4: regs->edi = va_arg(args, unsigned long); break; |
kaf24@4683 | 938 case 5: regs->ebp = va_arg(args, unsigned long); break; |
kaf24@3697 | 939 } |
kaf24@3697 | 940 } |
kaf24@3697 | 941 #elif defined(__x86_64__) |
kaf24@4683 | 942 regs->rax = op; |
kaf24@4683 | 943 regs->rip -= 2; /* re-execute 'syscall' */ |
kaf24@3697 | 944 |
kaf24@3697 | 945 for ( i = 0; i < nr_args; i++ ) |
kaf24@3697 | 946 { |
kaf24@3697 | 947 switch ( i ) |
kaf24@3697 | 948 { |
kaf24@4683 | 949 case 0: regs->rdi = va_arg(args, unsigned long); break; |
kaf24@4683 | 950 case 1: regs->rsi = va_arg(args, unsigned long); break; |
kaf24@4683 | 951 case 2: regs->rdx = va_arg(args, unsigned long); break; |
kaf24@4683 | 952 case 3: regs->r10 = va_arg(args, unsigned long); break; |
kaf24@4683 | 953 case 4: regs->r8 = va_arg(args, unsigned long); break; |
kaf24@4683 | 954 case 5: regs->r9 = va_arg(args, unsigned long); break; |
kaf24@3697 | 955 } |
kaf24@3697 | 956 } |
kaf24@3276 | 957 #endif |
kaf24@3139 | 958 } |
kaf24@3139 | 959 |
kaf24@3091 | 960 va_end(args); |
kaf24@3149 | 961 |
kaf24@3149 | 962 return op; |
kaf24@3091 | 963 } |
kaf24@3091 | 964 |
kaf24@4455 | 965 #ifdef CONFIG_VMX |
kaf24@5289 | 966 static void vmx_relinquish_resources(struct vcpu *v) |
kaf24@4455 | 967 { |
kaf24@5289 | 968 if ( !VMX_DOMAIN(v) ) |
kaf24@4455 | 969 return; |
kaf24@4455 | 970 |
kaf24@5289 | 971 BUG_ON(v->arch.arch_vmx.vmcs == NULL); |
kaf24@5289 | 972 free_vmcs(v->arch.arch_vmx.vmcs); |
kaf24@5836 | 973 if(v->arch.arch_vmx.io_bitmap_a != 0) { |
kaf24@6684 | 974 free_xenheap_pages( |
kaf24@6684 | 975 v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000)); |
kaf24@5836 | 976 v->arch.arch_vmx.io_bitmap_a = 0; |
kaf24@5836 | 977 } |
kaf24@5836 | 978 if(v->arch.arch_vmx.io_bitmap_b != 0) { |
kaf24@6684 | 979 free_xenheap_pages( |
kaf24@6684 | 980 v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000)); |
kaf24@5836 | 981 v->arch.arch_vmx.io_bitmap_b = 0; |
kaf24@5836 | 982 } |
kaf24@5289 | 983 v->arch.arch_vmx.vmcs = 0; |
kaf24@4455 | 984 |
kaf24@5289 | 985 free_monitor_pagetable(v); |
arun@5608 | 986 rem_ac_timer(&v->domain->arch.vmx_platform.vmx_pit.pit_timer); |
kaf24@4455 | 987 } |
kaf24@4455 | 988 #else |
kaf24@5289 | 989 #define vmx_relinquish_resources(_v) ((void)0) |
kaf24@4455 | 990 #endif |
kaf24@4455 | 991 |
kaf24@4455 | 992 static void relinquish_memory(struct domain *d, struct list_head *list) |
djm@1714 | 993 { |
kaf24@2428 | 994 struct list_head *ent; |
djm@1714 | 995 struct pfn_info *page; |
djm@1714 | 996 unsigned long x, y; |
djm@1714 | 997 |
kaf24@2428 | 998 /* Use a recursive lock, as we may enter 'free_domheap_page'. */ |
kaf24@2428 | 999 spin_lock_recursive(&d->page_alloc_lock); |
kaf24@2428 | 1000 |
kaf24@2428 | 1001 ent = list->next; |
kaf24@2428 | 1002 while ( ent != list ) |
kaf24@2428 | 1003 { |
kaf24@2428 | 1004 page = list_entry(ent, struct pfn_info, list); |
kaf24@2428 | 1005 |
kaf24@2429 | 1006 /* Grab a reference to the page so it won't disappear from under us. */ |
kaf24@2429 | 1007 if ( unlikely(!get_page(page, d)) ) |
kaf24@2428 | 1008 { |
kaf24@2429 | 1009 /* Couldn't get a reference -- someone is freeing this page. */ |
kaf24@2429 | 1010 ent = ent->next; |
kaf24@2428 | 1011 continue; |
kaf24@2428 | 1012 } |
kaf24@2428 | 1013 |
kaf24@2429 | 1014 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) ) |
kaf24@2429 | 1015 put_page_and_type(page); |
kaf24@2429 | 1016 |
kaf24@2428 | 1017 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) ) |
kaf24@2428 | 1018 put_page(page); |
kaf24@2428 | 1019 |
kaf24@2428 | 1020 /* |
kaf24@2428 | 1021 * Forcibly invalidate base page tables at this point to break circular |
kaf24@2428 | 1022 * 'linear page table' references. This is okay because MMU structures |
kaf24@2428 | 1023 * are not shared across domains and this domain is now dead. Thus base |
kaf24@2428 | 1024 * tables are not in use so a non-zero count means circular reference. |
kaf24@2428 | 1025 */ |
kaf24@2428 | 1026 y = page->u.inuse.type_info; |
kaf24@2429 | 1027 for ( ; ; ) |
kaf24@2429 | 1028 { |
kaf24@2428 | 1029 x = y; |
kaf24@2428 | 1030 if ( likely((x & (PGT_type_mask|PGT_validated)) != |
kaf24@2428 | 1031 (PGT_base_page_table|PGT_validated)) ) |
kaf24@2428 | 1032 break; |
kaf24@2429 | 1033 |
kaf24@2428 | 1034 y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated); |
kaf24@2428 | 1035 if ( likely(y == x) ) |
kaf24@2428 | 1036 { |
kaf24@2428 | 1037 free_page_type(page, PGT_base_page_table); |
kaf24@2429 | 1038 break; |
kaf24@2428 | 1039 } |
kaf24@2428 | 1040 } |
kaf24@2429 | 1041 |
kaf24@2429 | 1042 /* Follow the list chain and /then/ potentially free the page. */ |
kaf24@2429 | 1043 ent = ent->next; |
kaf24@2429 | 1044 put_page(page); |
kaf24@2428 | 1045 } |
kaf24@2428 | 1046 |
kaf24@2428 | 1047 spin_unlock_recursive(&d->page_alloc_lock); |
kaf24@2428 | 1048 } |
kaf24@2428 | 1049 |
kaf24@4455 | 1050 void domain_relinquish_resources(struct domain *d) |
kaf24@2428 | 1051 { |
kaf24@5289 | 1052 struct vcpu *v; |
kaf24@5576 | 1053 unsigned long pfn; |
cl349@2923 | 1054 |
kaf24@5301 | 1055 BUG_ON(!cpus_empty(d->cpumask)); |
djm@1714 | 1056 |
kaf24@4779 | 1057 physdev_destroy_state(d); |
kaf24@4779 | 1058 |
kaf24@4455 | 1059 ptwr_destroy(d); |
kaf24@4455 | 1060 |
cwc22@4100 | 1061 /* Release device mappings of other domains */ |
kaf24@4455 | 1062 gnttab_release_dev_mappings(d->grant_table); |
djm@1714 | 1063 |
kaf24@3761 | 1064 /* Drop the in-use references to page-table bases. */ |
kaf24@5289 | 1065 for_each_vcpu ( d, v ) |
iap10@3741 | 1066 { |
kaf24@5576 | 1067 if ( (pfn = pagetable_get_pfn(v->arch.guest_table)) != 0 ) |
kaf24@3761 | 1068 { |
kaf24@5576 | 1069 if ( !shadow_mode_refcounts(d) ) |
kaf24@5576 | 1070 put_page_type(pfn_to_page(pfn)); |
kaf24@5576 | 1071 put_page(pfn_to_page(pfn)); |
mafetter@4799 | 1072 |
kaf24@5289 | 1073 v->arch.guest_table = mk_pagetable(0); |
kaf24@3761 | 1074 } |
kaf24@3761 | 1075 |
kaf24@5576 | 1076 if ( (pfn = pagetable_get_pfn(v->arch.guest_table_user)) != 0 ) |
kaf24@3761 | 1077 { |
kaf24@5576 | 1078 if ( !shadow_mode_refcounts(d) ) |
kaf24@5576 | 1079 put_page_type(pfn_to_page(pfn)); |
kaf24@5576 | 1080 put_page(pfn_to_page(pfn)); |
mafetter@4799 | 1081 |
kaf24@5289 | 1082 v->arch.guest_table_user = mk_pagetable(0); |
kaf24@3761 | 1083 } |
kaf24@4455 | 1084 |
kaf24@5289 | 1085 vmx_relinquish_resources(v); |
iap10@3741 | 1086 } |
djm@1714 | 1087 |
kaf24@4633 | 1088 shadow_mode_disable(d); |
maf46@4621 | 1089 |
kaf24@1749 | 1090 /* |
kaf24@1749 | 1091 * Relinquish GDT mappings. No need for explicit unmapping of the LDT as |
kaf24@1749 | 1092 * it automatically gets squashed when the guest's mappings go away. |
kaf24@1749 | 1093 */ |
kaf24@5289 | 1094 for_each_vcpu(d, v) |
kaf24@5289 | 1095 destroy_gdt(v); |
kaf24@1749 | 1096 |
kaf24@2428 | 1097 /* Relinquish every page of memory. */ |
kaf24@4455 | 1098 relinquish_memory(d, &d->xenpage_list); |
kaf24@4455 | 1099 relinquish_memory(d, &d->page_list); |
djm@1714 | 1100 } |
djm@1714 | 1101 |
djm@1714 | 1102 |
kaf24@3914 | 1103 /* |
kaf24@3914 | 1104 * Local variables: |
kaf24@3914 | 1105 * mode: C |
kaf24@3914 | 1106 * c-set-style: "BSD" |
kaf24@3914 | 1107 * c-basic-offset: 4 |
kaf24@3914 | 1108 * tab-width: 4 |
kaf24@3914 | 1109 * indent-tabs-mode: nil |
kaf24@3988 | 1110 * End: |
kaf24@3914 | 1111 */ |