rev |
line source |
kaf24@1749
|
1 /******************************************************************************
|
kaf24@1749
|
2 * arch/x86/domain.c
|
kaf24@1749
|
3 *
|
kaf24@1749
|
4 * x86-specific domain handling (e.g., register setup and context switching).
|
kaf24@1749
|
5 */
|
kaf24@1749
|
6
|
kaf24@1672
|
7 /*
|
kaf24@1672
|
8 * Copyright (C) 1995 Linus Torvalds
|
kaf24@1672
|
9 *
|
kaf24@1672
|
10 * Pentium III FXSR, SSE support
|
kaf24@5576
|
11 * Gareth Hughes <gareth@valinux.com>, May 2000
|
kaf24@1672
|
12 */
|
kaf24@1672
|
13
|
kaf24@1672
|
14 #include <xen/config.h>
|
kaf24@3334
|
15 #include <xen/init.h>
|
kaf24@1672
|
16 #include <xen/lib.h>
|
kaf24@1672
|
17 #include <xen/errno.h>
|
kaf24@1672
|
18 #include <xen/sched.h>
|
kaf24@1672
|
19 #include <xen/smp.h>
|
kaf24@1672
|
20 #include <xen/delay.h>
|
kaf24@1672
|
21 #include <xen/softirq.h>
|
cwc22@4023
|
22 #include <xen/grant_table.h>
|
ach61@2805
|
23 #include <asm/regs.h>
|
kaf24@1672
|
24 #include <asm/mc146818rtc.h>
|
kaf24@1672
|
25 #include <asm/system.h>
|
kaf24@1672
|
26 #include <asm/io.h>
|
kaf24@1672
|
27 #include <asm/processor.h>
|
kaf24@1672
|
28 #include <asm/desc.h>
|
kaf24@1672
|
29 #include <asm/i387.h>
|
kaf24@1672
|
30 #include <asm/mpspec.h>
|
kaf24@1672
|
31 #include <asm/ldt.h>
|
kaf24@1672
|
32 #include <xen/irq.h>
|
kaf24@1672
|
33 #include <xen/event.h>
|
kaf24@1749
|
34 #include <asm/shadow.h>
|
djm@1714
|
35 #include <xen/console.h>
|
djm@1714
|
36 #include <xen/elf.h>
|
iap10@3290
|
37 #include <asm/vmx.h>
|
iap10@3290
|
38 #include <asm/vmx_vmcs.h>
|
kaf24@3754
|
39 #include <asm/msr.h>
|
kaf24@4779
|
40 #include <asm/physdev.h>
|
iap10@3290
|
41 #include <xen/kernel.h>
|
iap10@3290
|
42 #include <public/io/ioreq.h>
|
kaf24@3139
|
43 #include <xen/multicall.h>
|
djm@1714
|
44
|
kaf24@3334
|
45 /* opt_noreboot: If true, machine will need manual reset on error. */
|
kaf24@3334
|
46 static int opt_noreboot = 0;
|
kaf24@3334
|
47 boolean_param("noreboot", opt_noreboot);
|
kaf24@3334
|
48
|
kaf24@4373
|
49 struct percpu_ctxt {
|
kaf24@5289
|
50 struct vcpu *curr_vcpu;
|
kaf24@6199
|
51 unsigned int context_not_finalised;
|
kaf24@6199
|
52 unsigned int dirty_segment_mask;
|
kaf24@4373
|
53 } __cacheline_aligned;
|
kaf24@4373
|
54 static struct percpu_ctxt percpu_ctxt[NR_CPUS];
|
djm@1714
|
55
|
kaf24@5289
|
56 static void continue_idle_task(struct vcpu *v)
|
kaf24@4698
|
57 {
|
kaf24@4698
|
58 reset_stack_and_jump(idle_loop);
|
kaf24@4698
|
59 }
|
kaf24@4698
|
60
|
kaf24@5289
|
61 static void continue_nonidle_task(struct vcpu *v)
|
kaf24@4698
|
62 {
|
kaf24@4698
|
63 reset_stack_and_jump(ret_from_intr);
|
kaf24@4698
|
64 }
|
kaf24@4698
|
65
|
kaf24@3272
|
66 static void default_idle(void)
|
kaf24@1672
|
67 {
|
kaf24@4373
|
68 local_irq_disable();
|
kaf24@3272
|
69 if ( !softirq_pending(smp_processor_id()) )
|
kaf24@3272
|
70 safe_halt();
|
kaf24@3272
|
71 else
|
kaf24@4373
|
72 local_irq_enable();
|
kaf24@1672
|
73 }
|
kaf24@1672
|
74
|
kaf24@4707
|
75 void idle_loop(void)
|
kaf24@1672
|
76 {
|
kaf24@1672
|
77 int cpu = smp_processor_id();
|
kaf24@5146
|
78
|
kaf24@1672
|
79 for ( ; ; )
|
kaf24@1672
|
80 {
|
kaf24@1672
|
81 irq_stat[cpu].idle_timestamp = jiffies;
|
kaf24@5146
|
82
|
kaf24@1672
|
83 while ( !softirq_pending(cpu) )
|
kaf24@4267
|
84 {
|
kaf24@4267
|
85 page_scrub_schedule_work();
|
kaf24@1672
|
86 default_idle();
|
kaf24@4267
|
87 }
|
kaf24@5146
|
88
|
kaf24@1672
|
89 do_softirq();
|
kaf24@1672
|
90 }
|
kaf24@1672
|
91 }
|
kaf24@1672
|
92
|
kaf24@1672
|
93 void startup_cpu_idle_loop(void)
|
kaf24@1672
|
94 {
|
kaf24@5289
|
95 struct vcpu *v = current;
|
kaf24@4698
|
96
|
kaf24@5289
|
97 ASSERT(is_idle_task(v->domain));
|
kaf24@5289
|
98 percpu_ctxt[smp_processor_id()].curr_vcpu = v;
|
kaf24@5301
|
99 cpu_set(smp_processor_id(), v->domain->cpumask);
|
kaf24@5289
|
100 v->arch.schedule_tail = continue_idle_task;
|
kaf24@4698
|
101
|
kaf24@5146
|
102 idle_loop();
|
kaf24@1672
|
103 }
|
kaf24@1672
|
104
|
kaf24@1672
|
105 static long no_idt[2];
|
kaf24@1672
|
106 static int reboot_mode;
|
kaf24@1672
|
107
|
kaf24@1672
|
108 static inline void kb_wait(void)
|
kaf24@1672
|
109 {
|
kaf24@1672
|
110 int i;
|
kaf24@1672
|
111
|
kaf24@3797
|
112 for ( i = 0; i < 0x10000; i++ )
|
kaf24@3797
|
113 if ( (inb_p(0x64) & 0x02) == 0 )
|
kaf24@1672
|
114 break;
|
kaf24@1672
|
115 }
|
kaf24@1672
|
116
|
kaf24@1672
|
117 void machine_restart(char * __unused)
|
kaf24@1672
|
118 {
|
kaf24@3797
|
119 int i;
|
kaf24@5576
|
120
|
kaf24@1672
|
121 if ( opt_noreboot )
|
kaf24@1672
|
122 {
|
kaf24@1672
|
123 printk("Reboot disabled on cmdline: require manual reset\n");
|
kaf24@3797
|
124 for ( ; ; )
|
kaf24@3797
|
125 safe_halt();
|
kaf24@1672
|
126 }
|
kaf24@1672
|
127
|
kaf24@5321
|
128 watchdog_disable();
|
kaf24@5321
|
129 console_start_sync();
|
kaf24@5321
|
130
|
kaf24@4373
|
131 local_irq_enable();
|
kaf24@1672
|
132
|
kaf24@3797
|
133 /* Ensure we are the boot CPU. */
|
kaf24@3797
|
134 if ( GET_APIC_ID(apic_read(APIC_ID)) != boot_cpu_physical_apicid )
|
kaf24@3797
|
135 {
|
kaf24@3797
|
136 smp_call_function((void *)machine_restart, NULL, 1, 0);
|
kaf24@3797
|
137 for ( ; ; )
|
kaf24@3797
|
138 safe_halt();
|
kaf24@1672
|
139 }
|
kaf24@1672
|
140
|
kaf24@1672
|
141 /*
|
kaf24@1672
|
142 * Stop all CPUs and turn off local APICs and the IO-APIC, so
|
kaf24@1672
|
143 * other OSs see a clean IRQ state.
|
kaf24@1672
|
144 */
|
kaf24@1672
|
145 smp_send_stop();
|
kaf24@1672
|
146 disable_IO_APIC();
|
kaf24@3797
|
147
|
iap10@3290
|
148 #ifdef CONFIG_VMX
|
iap10@3290
|
149 stop_vmx();
|
kaf24@1672
|
150 #endif
|
kaf24@1672
|
151
|
kaf24@3797
|
152 /* Rebooting needs to touch the page at absolute address 0. */
|
kaf24@3797
|
153 *((unsigned short *)__va(0x472)) = reboot_mode;
|
kaf24@3797
|
154
|
kaf24@3797
|
155 for ( ; ; )
|
kaf24@3797
|
156 {
|
kaf24@3797
|
157 /* Pulse the keyboard reset line. */
|
kaf24@3797
|
158 for ( i = 0; i < 100; i++ )
|
kaf24@3797
|
159 {
|
kaf24@3797
|
160 kb_wait();
|
kaf24@3797
|
161 udelay(50);
|
kaf24@3797
|
162 outb(0xfe,0x64); /* pulse reset low */
|
kaf24@3797
|
163 udelay(50);
|
kaf24@1672
|
164 }
|
kaf24@3797
|
165
|
kaf24@3797
|
166 /* That didn't work - force a triple fault.. */
|
kaf24@3797
|
167 __asm__ __volatile__("lidt %0": "=m" (no_idt));
|
kaf24@3797
|
168 __asm__ __volatile__("int3");
|
kaf24@1672
|
169 }
|
kaf24@1672
|
170 }
|
kaf24@1672
|
171
|
kaf24@1811
|
172
|
kaf24@1811
|
173 void __attribute__((noreturn)) __machine_halt(void *unused)
|
kaf24@1811
|
174 {
|
kaf24@1811
|
175 for ( ; ; )
|
kaf24@3797
|
176 safe_halt();
|
kaf24@1811
|
177 }
|
kaf24@1811
|
178
|
kaf24@1672
|
179 void machine_halt(void)
|
kaf24@1672
|
180 {
|
kaf24@4926
|
181 watchdog_disable();
|
kaf24@5321
|
182 console_start_sync();
|
kaf24@3797
|
183 smp_call_function(__machine_halt, NULL, 1, 0);
|
kaf24@1811
|
184 __machine_halt(NULL);
|
kaf24@1672
|
185 }
|
kaf24@1672
|
186
|
kaf24@3477
|
187 void dump_pageframe_info(struct domain *d)
|
kaf24@3477
|
188 {
|
kaf24@3477
|
189 struct pfn_info *page;
|
kaf24@3477
|
190
|
kaf24@3477
|
191 if ( d->tot_pages < 10 )
|
kaf24@3477
|
192 {
|
kaf24@3530
|
193 list_for_each_entry ( page, &d->page_list, list )
|
kaf24@3477
|
194 {
|
kaf24@6039
|
195 printk("Page %p: caf=%08x, taf=%" PRtype_info "\n",
|
kaf24@4654
|
196 _p(page_to_phys(page)), page->count_info,
|
kaf24@3477
|
197 page->u.inuse.type_info);
|
kaf24@3477
|
198 }
|
kaf24@3477
|
199 }
|
iap10@4575
|
200
|
iap10@4575
|
201 list_for_each_entry ( page, &d->xenpage_list, list )
|
iap10@4575
|
202 {
|
kaf24@6039
|
203 printk("XenPage %p: caf=%08x, taf=%" PRtype_info "\n",
|
kaf24@4654
|
204 _p(page_to_phys(page)), page->count_info,
|
iap10@4575
|
205 page->u.inuse.type_info);
|
iap10@4575
|
206 }
|
iap10@4575
|
207
|
kaf24@3477
|
208
|
kaf24@3477
|
209 page = virt_to_page(d->shared_info);
|
kaf24@6039
|
210 printk("Shared_info@%p: caf=%08x, taf=%" PRtype_info "\n",
|
kaf24@4654
|
211 _p(page_to_phys(page)), page->count_info,
|
kaf24@3477
|
212 page->u.inuse.type_info);
|
kaf24@3477
|
213 }
|
kaf24@3477
|
214
|
kaf24@5289
|
215 struct vcpu *arch_alloc_vcpu_struct(void)
|
kaf24@3479
|
216 {
|
kaf24@5289
|
217 return xmalloc(struct vcpu);
|
kaf24@3479
|
218 }
|
kaf24@3479
|
219
|
sos22@6268
|
220 /* We assume that vcpu 0 is always the last one to be freed in a
|
sos22@6268
|
221 domain i.e. if v->vcpu_id == 0, the domain should be
|
sos22@6268
|
222 single-processor. */
|
kaf24@5289
|
223 void arch_free_vcpu_struct(struct vcpu *v)
|
kaf24@3479
|
224 {
|
sos22@6268
|
225 struct vcpu *p;
|
sos22@6268
|
226 for_each_vcpu(v->domain, p) {
|
sos22@6268
|
227 if (p->next_in_list == v)
|
sos22@6268
|
228 p->next_in_list = v->next_in_list;
|
sos22@6268
|
229 }
|
kaf24@5289
|
230 xfree(v);
|
kaf24@3477
|
231 }
|
kaf24@3477
|
232
|
kaf24@1936
|
233 void free_perdomain_pt(struct domain *d)
|
kaf24@1936
|
234 {
|
kaf24@5398
|
235 free_xenheap_page(d->arch.mm_perdomain_pt);
|
kaf24@3753
|
236 #ifdef __x86_64__
|
kaf24@5398
|
237 free_xenheap_page(d->arch.mm_perdomain_l2);
|
kaf24@5398
|
238 free_xenheap_page(d->arch.mm_perdomain_l3);
|
kaf24@3753
|
239 #endif
|
kaf24@1936
|
240 }
|
kaf24@1936
|
241
|
kaf24@5289
|
242 void arch_do_createdomain(struct vcpu *v)
|
djm@1698
|
243 {
|
kaf24@5289
|
244 struct domain *d = v->domain;
|
kaf24@3630
|
245
|
kaf24@5289
|
246 v->arch.flags = TF_kernel_mode;
|
kaf24@3272
|
247
|
kaf24@5146
|
248 if ( is_idle_task(d) )
|
kaf24@4972
|
249 return;
|
djm@1698
|
250
|
kaf24@5289
|
251 v->arch.schedule_tail = continue_nonidle_task;
|
kaf24@4972
|
252
|
kaf24@5398
|
253 d->shared_info = alloc_xenheap_page();
|
kaf24@4972
|
254 memset(d->shared_info, 0, PAGE_SIZE);
|
kaf24@5289
|
255 v->vcpu_info = &d->shared_info->vcpu_data[v->vcpu_id];
|
kaf24@5289
|
256 v->cpumap = CPUMAP_RUNANYWHERE;
|
kaf24@4972
|
257 SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d);
|
kaf24@6481
|
258 set_pfn_from_mfn(virt_to_phys(d->shared_info) >> PAGE_SHIFT,
|
kaf24@6481
|
259 INVALID_M2P_ENTRY);
|
kaf24@4972
|
260
|
kaf24@5398
|
261 d->arch.mm_perdomain_pt = alloc_xenheap_page();
|
kaf24@4972
|
262 memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE);
|
kaf24@6481
|
263 set_pfn_from_mfn(virt_to_phys(d->arch.mm_perdomain_pt) >> PAGE_SHIFT,
|
kaf24@6481
|
264 INVALID_M2P_ENTRY);
|
kaf24@5289
|
265 v->arch.perdomain_ptes = d->arch.mm_perdomain_pt;
|
kaf24@5289
|
266 v->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
|
kaf24@5250
|
267 l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
|
kaf24@5004
|
268
|
kaf24@5289
|
269 v->arch.guest_vtable = __linear_l2_table;
|
kaf24@5289
|
270 v->arch.shadow_vtable = __shadow_linear_l2_table;
|
kaf24@3272
|
271
|
kaf24@3753
|
272 #ifdef __x86_64__
|
kaf24@5289
|
273 v->arch.guest_vl3table = __linear_l3_table;
|
kaf24@5289
|
274 v->arch.guest_vl4table = __linear_l4_table;
|
kaf24@4972
|
275
|
kaf24@5398
|
276 d->arch.mm_perdomain_l2 = alloc_xenheap_page();
|
kaf24@4972
|
277 memset(d->arch.mm_perdomain_l2, 0, PAGE_SIZE);
|
kaf24@4972
|
278 d->arch.mm_perdomain_l2[l2_table_offset(PERDOMAIN_VIRT_START)] =
|
kaf24@5250
|
279 l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt),
|
kaf24@4972
|
280 __PAGE_HYPERVISOR);
|
kaf24@5398
|
281 d->arch.mm_perdomain_l3 = alloc_xenheap_page();
|
kaf24@4972
|
282 memset(d->arch.mm_perdomain_l3, 0, PAGE_SIZE);
|
kaf24@4972
|
283 d->arch.mm_perdomain_l3[l3_table_offset(PERDOMAIN_VIRT_START)] =
|
kaf24@5250
|
284 l3e_from_page(virt_to_page(d->arch.mm_perdomain_l2),
|
mafetter@4591
|
285 __PAGE_HYPERVISOR);
|
kaf24@3753
|
286 #endif
|
kaf24@4972
|
287
|
kaf24@4972
|
288 (void)ptwr_init(d);
|
kaf24@4972
|
289
|
kaf24@4972
|
290 shadow_lock_init(d);
|
kaf24@4972
|
291 INIT_LIST_HEAD(&d->arch.free_shadow_frames);
|
djm@1698
|
292 }
|
djm@1698
|
293
|
kaf24@5289
|
294 void arch_do_boot_vcpu(struct vcpu *v)
|
kaf24@3662
|
295 {
|
kaf24@5289
|
296 struct domain *d = v->domain;
|
kaf24@4972
|
297
|
kaf24@5289
|
298 v->arch.flags = TF_kernel_mode;
|
kaf24@4972
|
299
|
kaf24@5289
|
300 v->arch.schedule_tail = d->vcpu[0]->arch.schedule_tail;
|
kaf24@4972
|
301
|
kaf24@5289
|
302 v->arch.perdomain_ptes =
|
kaf24@5289
|
303 d->arch.mm_perdomain_pt + (v->vcpu_id << PDPT_VCPU_SHIFT);
|
kaf24@5289
|
304 v->arch.perdomain_ptes[FIRST_RESERVED_GDT_PAGE] =
|
kaf24@5250
|
305 l1e_from_page(virt_to_page(gdt_table), PAGE_HYPERVISOR);
|
kaf24@3662
|
306 }
|
kaf24@3662
|
307
|
kaf24@6113
|
308 void vcpu_migrate_cpu(struct vcpu *v, int newcpu)
|
kaf24@6113
|
309 {
|
kaf24@6113
|
310 if ( v->processor == newcpu )
|
kaf24@6113
|
311 return;
|
kaf24@6113
|
312
|
kaf24@6113
|
313 set_bit(_VCPUF_cpu_migrated, &v->vcpu_flags);
|
kaf24@6113
|
314 v->processor = newcpu;
|
kaf24@6113
|
315
|
kaf24@6113
|
316 if ( VMX_DOMAIN(v) )
|
kaf24@6113
|
317 {
|
kaf24@6113
|
318 __vmpclear(virt_to_phys(v->arch.arch_vmx.vmcs));
|
kaf24@6113
|
319 v->arch.schedule_tail = arch_vmx_do_relaunch;
|
kaf24@6113
|
320 }
|
kaf24@6113
|
321 }
|
kaf24@6113
|
322
|
iap10@3290
|
323 #ifdef CONFIG_VMX
|
kaf24@5659
|
324 static int vmx_switch_on;
|
kaf24@5659
|
325
|
kaf24@4683
|
326 static int vmx_final_setup_guest(
|
kaf24@5289
|
327 struct vcpu *v, struct vcpu_guest_context *ctxt)
|
iap10@3290
|
328 {
|
iap10@3290
|
329 int error;
|
kaf24@4683
|
330 struct cpu_user_regs *regs;
|
iap10@3290
|
331 struct vmcs_struct *vmcs;
|
iap10@3290
|
332
|
kaf24@4683
|
333 regs = &ctxt->user_regs;
|
iap10@3290
|
334
|
iap10@3290
|
335 /*
|
iap10@3290
|
336 * Create a new VMCS
|
iap10@3290
|
337 */
|
iap10@3290
|
338 if (!(vmcs = alloc_vmcs())) {
|
iap10@3290
|
339 printk("Failed to create a new VMCS\n");
|
iap10@3290
|
340 return -ENOMEM;
|
iap10@3290
|
341 }
|
iap10@3290
|
342
|
kaf24@5289
|
343 memset(&v->arch.arch_vmx, 0, sizeof (struct arch_vmx_struct));
|
iap10@3290
|
344
|
kaf24@5289
|
345 v->arch.arch_vmx.vmcs = vmcs;
|
kaf24@3753
|
346 error = construct_vmcs(
|
kaf24@5289
|
347 &v->arch.arch_vmx, regs, ctxt, VMCS_USE_HOST_ENV);
|
kaf24@3753
|
348 if ( error < 0 )
|
kaf24@3753
|
349 {
|
iap10@3290
|
350 printk("Failed to construct a new VMCS\n");
|
iap10@3290
|
351 goto out;
|
iap10@3290
|
352 }
|
iap10@3290
|
353
|
kaf24@5289
|
354 v->arch.schedule_tail = arch_vmx_do_launch;
|
iap10@3290
|
355
|
kaf24@6113
|
356 #if defined (__i386__)
|
arun@5608
|
357 v->domain->arch.vmx_platform.real_mode_data =
|
kaf24@4683
|
358 (unsigned long *) regs->esi;
|
iap10@3567
|
359 #endif
|
iap10@3290
|
360
|
kaf24@5289
|
361 if (v == v->domain->vcpu[0]) {
|
iap10@3567
|
362 /*
|
iap10@3567
|
363 * Required to do this once per domain
|
iap10@3823
|
364 * XXX todo: add a seperate function to do these.
|
iap10@3567
|
365 */
|
kaf24@5289
|
366 memset(&v->domain->shared_info->evtchn_mask[0], 0xff,
|
kaf24@5289
|
367 sizeof(v->domain->shared_info->evtchn_mask));
|
iap10@3823
|
368
|
iap10@3823
|
369 /* Put the domain in shadow mode even though we're going to be using
|
iap10@3823
|
370 * the shared 1:1 page table initially. It shouldn't hurt */
|
kaf24@5289
|
371 shadow_mode_enable(v->domain,
|
maf46@5196
|
372 SHM_enable|SHM_refcounts|
|
maf46@4953
|
373 SHM_translate|SHM_external);
|
iap10@3567
|
374 }
|
iap10@3290
|
375
|
kaf24@5659
|
376 if (!vmx_switch_on)
|
kaf24@5659
|
377 vmx_switch_on = 1;
|
kaf24@5659
|
378
|
iap10@3290
|
379 return 0;
|
iap10@3290
|
380
|
iap10@3290
|
381 out:
|
iap10@3290
|
382 free_vmcs(vmcs);
|
kaf24@5836
|
383 if(v->arch.arch_vmx.io_bitmap_a != 0) {
|
kaf24@6684
|
384 free_xenheap_pages(
|
kaf24@6684
|
385 v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000));
|
kaf24@5836
|
386 v->arch.arch_vmx.io_bitmap_a = 0;
|
kaf24@5836
|
387 }
|
kaf24@5836
|
388 if(v->arch.arch_vmx.io_bitmap_b != 0) {
|
kaf24@6684
|
389 free_xenheap_pages(
|
kaf24@6684
|
390 v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000));
|
kaf24@5836
|
391 v->arch.arch_vmx.io_bitmap_b = 0;
|
kaf24@5836
|
392 }
|
kaf24@5289
|
393 v->arch.arch_vmx.vmcs = 0;
|
iap10@3290
|
394 return error;
|
iap10@3290
|
395 }
|
iap10@3290
|
396 #endif
|
iap10@3290
|
397
|
iap10@3823
|
398
|
iap10@3823
|
399 /* This is called by arch_final_setup_guest and do_boot_vcpu */
|
kmacy@4118
|
400 int arch_set_info_guest(
|
kaf24@5289
|
401 struct vcpu *v, struct vcpu_guest_context *c)
|
djm@1698
|
402 {
|
kaf24@5289
|
403 struct domain *d = v->domain;
|
djm@1698
|
404 unsigned long phys_basetab;
|
kaf24@2465
|
405 int i, rc;
|
djm@1698
|
406
|
kaf24@2684
|
407 /*
|
kaf24@2684
|
408 * This is sufficient! If the descriptor DPL differs from CS RPL then we'll
|
kaf24@2684
|
409 * #GP. If DS, ES, FS, GS are DPL 0 then they'll be cleared automatically.
|
kaf24@2684
|
410 * If SS RPL or DPL differs from CS RPL then we'll #GP.
|
kaf24@2684
|
411 */
|
kaf24@4689
|
412 if ( !(c->flags & VGCF_VMX_GUEST) )
|
kaf24@4689
|
413 {
|
kaf24@4683
|
414 if ( ((c->user_regs.cs & 3) == 0) ||
|
kaf24@4683
|
415 ((c->user_regs.ss & 3) == 0) )
|
sos22@6268
|
416 return -EINVAL;
|
kaf24@4689
|
417 }
|
kmacy@4118
|
418
|
kaf24@5289
|
419 clear_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
|
kaf24@4689
|
420 if ( c->flags & VGCF_I387_VALID )
|
kaf24@5289
|
421 set_bit(_VCPUF_fpu_initialised, &v->vcpu_flags);
|
kaf24@2465
|
422
|
kaf24@5289
|
423 v->arch.flags &= ~TF_kernel_mode;
|
kaf24@5722
|
424 if ( (c->flags & VGCF_IN_KERNEL) || (c->flags & VGCF_VMX_GUEST) )
|
kaf24@5289
|
425 v->arch.flags |= TF_kernel_mode;
|
kaf24@2465
|
426
|
kaf24@5289
|
427 memcpy(&v->arch.guest_context, c, sizeof(*c));
|
kmacy@4118
|
428
|
kaf24@4852
|
429 if ( !(c->flags & VGCF_VMX_GUEST) )
|
kaf24@4852
|
430 {
|
kaf24@4852
|
431 /* IOPL privileges are virtualised. */
|
kaf24@5289
|
432 v->arch.iopl = (v->arch.guest_context.user_regs.eflags >> 12) & 3;
|
kaf24@5289
|
433 v->arch.guest_context.user_regs.eflags &= ~EF_IOPL;
|
kaf24@2465
|
434
|
kaf24@4852
|
435 /* Ensure real hardware interrupts are enabled. */
|
kaf24@5289
|
436 v->arch.guest_context.user_regs.eflags |= EF_IE;
|
kaf24@5821
|
437 }
|
kaf24@5821
|
438 else if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
|
kaf24@5821
|
439 {
|
kaf24@5821
|
440 return modify_vmcs(
|
kaf24@5821
|
441 &v->arch.arch_vmx,
|
kaf24@5821
|
442 &v->arch.guest_context.user_regs);
|
kaf24@4852
|
443 }
|
kaf24@2465
|
444
|
kaf24@5289
|
445 if ( test_bit(_VCPUF_initialised, &v->vcpu_flags) )
|
kmacy@4118
|
446 return 0;
|
kaf24@2465
|
447
|
kaf24@5289
|
448 memset(v->arch.guest_context.debugreg, 0,
|
kaf24@5289
|
449 sizeof(v->arch.guest_context.debugreg));
|
djm@1698
|
450 for ( i = 0; i < 8; i++ )
|
kaf24@5289
|
451 (void)set_debugreg(v, i, c->debugreg[i]);
|
kaf24@2465
|
452
|
kaf24@5289
|
453 if ( v->vcpu_id == 0 )
|
kaf24@4381
|
454 d->vm_assist = c->vm_assist;
|
kaf24@2465
|
455
|
kaf24@5576
|
456 phys_basetab = c->ctrlreg[3];
|
kaf24@5289
|
457 v->arch.guest_table = mk_pagetable(phys_basetab);
|
kaf24@2465
|
458
|
mafetter@4799
|
459 if ( shadow_mode_refcounts(d) )
|
mafetter@4509
|
460 {
|
mafetter@4509
|
461 if ( !get_page(&frame_table[phys_basetab>>PAGE_SHIFT], d) )
|
mafetter@4509
|
462 return -EINVAL;
|
mafetter@4509
|
463 }
|
kaf24@6739
|
464 else if ( !(c->flags & VGCF_VMX_GUEST) )
|
mafetter@4509
|
465 {
|
kaf24@6739
|
466 if ( !get_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT], d,
|
kaf24@6739
|
467 PGT_base_page_table) )
|
kaf24@6739
|
468 return -EINVAL;
|
mafetter@4509
|
469 }
|
kaf24@2465
|
470
|
kaf24@5289
|
471 if ( (rc = (int)set_gdt(v, c->gdt_frames, c->gdt_ents)) != 0 )
|
kaf24@2465
|
472 {
|
kaf24@4972
|
473 put_page_and_type(&frame_table[phys_basetab>>PAGE_SHIFT]);
|
kaf24@4972
|
474 return rc;
|
kaf24@2465
|
475 }
|
kaf24@2465
|
476
|
kaf24@4689
|
477 if ( c->flags & VGCF_VMX_GUEST )
|
mafetter@4192
|
478 {
|
kaf24@5576
|
479 /* VMX uses the initially provided page tables as the P2M map. */
|
kaf24@5250
|
480 if ( !pagetable_get_paddr(d->arch.phys_table) )
|
kaf24@5289
|
481 d->arch.phys_table = v->arch.guest_table;
|
mafetter@4192
|
482
|
kaf24@5576
|
483 if ( (rc = vmx_final_setup_guest(v, c)) != 0 )
|
kaf24@5576
|
484 return rc;
|
mafetter@4192
|
485 }
|
iap10@3290
|
486
|
kaf24@5289
|
487 update_pagetables(v);
|
kaf24@6093
|
488
|
kaf24@6093
|
489 if ( v->vcpu_id == 0 )
|
kaf24@6093
|
490 init_domain_time(d);
|
kaf24@6093
|
491
|
kmacy@4118
|
492 /* Don't redo final setup */
|
kaf24@5289
|
493 set_bit(_VCPUF_initialised, &v->vcpu_flags);
|
iap10@3823
|
494
|
kaf24@2465
|
495 return 0;
|
djm@1698
|
496 }
|
djm@1698
|
497
|
kaf24@1672
|
498
|
kaf24@5289
|
499 void new_thread(struct vcpu *d,
|
kaf24@1672
|
500 unsigned long start_pc,
|
kaf24@1672
|
501 unsigned long start_stack,
|
kaf24@1672
|
502 unsigned long start_info)
|
kaf24@1672
|
503 {
|
kaf24@4689
|
504 struct cpu_user_regs *regs = &d->arch.guest_context.user_regs;
|
kaf24@1672
|
505
|
kaf24@1672
|
506 /*
|
kaf24@1672
|
507 * Initial register values:
|
kaf24@3755
|
508 * DS,ES,FS,GS = FLAT_KERNEL_DS
|
kaf24@3755
|
509 * CS:EIP = FLAT_KERNEL_CS:start_pc
|
kaf24@3755
|
510 * SS:ESP = FLAT_KERNEL_SS:start_stack
|
kaf24@1672
|
511 * ESI = start_info
|
kaf24@1672
|
512 * [EAX,EBX,ECX,EDX,EDI,EBP are zero]
|
kaf24@1672
|
513 */
|
kaf24@4683
|
514 regs->ds = regs->es = regs->fs = regs->gs = FLAT_KERNEL_DS;
|
kaf24@4683
|
515 regs->ss = FLAT_KERNEL_SS;
|
kaf24@4683
|
516 regs->cs = FLAT_KERNEL_CS;
|
kaf24@4683
|
517 regs->eip = start_pc;
|
kaf24@4683
|
518 regs->esp = start_stack;
|
kaf24@4683
|
519 regs->esi = start_info;
|
kaf24@1672
|
520
|
kaf24@4683
|
521 __save_flags(regs->eflags);
|
kaf24@4683
|
522 regs->eflags |= X86_EFLAGS_IF;
|
kaf24@1672
|
523 }
|
kaf24@1672
|
524
|
kaf24@1672
|
525
|
kaf24@3761
|
526 #ifdef __x86_64__
|
kaf24@3761
|
527
|
kaf24@5289
|
528 void toggle_guest_mode(struct vcpu *v)
|
kaf24@3828
|
529 {
|
kaf24@5289
|
530 v->arch.flags ^= TF_kernel_mode;
|
kaf24@4498
|
531 __asm__ __volatile__ ( "swapgs" );
|
kaf24@5289
|
532 update_pagetables(v);
|
kaf24@5289
|
533 write_ptbase(v);
|
kaf24@3828
|
534 }
|
kaf24@1672
|
535
|
kaf24@3761
|
536 #define loadsegment(seg,value) ({ \
|
kaf24@3761
|
537 int __r = 1; \
|
kaf24@3761
|
538 __asm__ __volatile__ ( \
|
kaf24@3761
|
539 "1: movl %k1,%%" #seg "\n2:\n" \
|
kaf24@3761
|
540 ".section .fixup,\"ax\"\n" \
|
kaf24@3761
|
541 "3: xorl %k0,%k0\n" \
|
kaf24@3761
|
542 " movl %k0,%%" #seg "\n" \
|
kaf24@3761
|
543 " jmp 2b\n" \
|
kaf24@3761
|
544 ".previous\n" \
|
kaf24@3761
|
545 ".section __ex_table,\"a\"\n" \
|
kaf24@3761
|
546 " .align 8\n" \
|
kaf24@3761
|
547 " .quad 1b,3b\n" \
|
kaf24@3761
|
548 ".previous" \
|
kaf24@3761
|
549 : "=r" (__r) : "r" (value), "0" (__r) );\
|
kaf24@3761
|
550 __r; })
|
kaf24@1672
|
551
|
kaf24@5659
|
552 #if CONFIG_VMX
|
kaf24@6199
|
553 #define load_msrs(n) if (vmx_switch_on) vmx_load_msrs(n)
|
kaf24@5659
|
554 #else
|
kaf24@6199
|
555 #define load_msrs(n) ((void)0)
|
kaf24@5659
|
556 #endif
|
kaf24@5659
|
557
|
kaf24@6199
|
558 /*
|
kaf24@6199
|
559 * save_segments() writes a mask of segments which are dirty (non-zero),
|
kaf24@6199
|
560 * allowing load_segments() to avoid some expensive segment loads and
|
kaf24@6199
|
561 * MSR writes.
|
kaf24@6199
|
562 */
|
kaf24@6199
|
563 #define DIRTY_DS 0x01
|
kaf24@6199
|
564 #define DIRTY_ES 0x02
|
kaf24@6199
|
565 #define DIRTY_FS 0x04
|
kaf24@6199
|
566 #define DIRTY_GS 0x08
|
kaf24@6199
|
567 #define DIRTY_FS_BASE 0x10
|
kaf24@6199
|
568 #define DIRTY_GS_BASE_USER 0x20
|
kaf24@6199
|
569
|
kaf24@6199
|
570 static void load_segments(struct vcpu *n)
|
kaf24@1672
|
571 {
|
kaf24@4689
|
572 struct vcpu_guest_context *nctxt = &n->arch.guest_context;
|
kaf24@3761
|
573 int all_segs_okay = 1;
|
kaf24@6199
|
574 unsigned int dirty_segment_mask, cpu = smp_processor_id();
|
kaf24@6199
|
575
|
kaf24@6199
|
576 /* Load and clear the dirty segment mask. */
|
kaf24@6199
|
577 dirty_segment_mask = percpu_ctxt[cpu].dirty_segment_mask;
|
kaf24@6199
|
578 percpu_ctxt[cpu].dirty_segment_mask = 0;
|
kaf24@3761
|
579
|
kaf24@3761
|
580 /* Either selector != 0 ==> reload. */
|
kaf24@6199
|
581 if ( unlikely((dirty_segment_mask & DIRTY_DS) | nctxt->user_regs.ds) )
|
kaf24@4689
|
582 all_segs_okay &= loadsegment(ds, nctxt->user_regs.ds);
|
kaf24@3761
|
583
|
kaf24@3761
|
584 /* Either selector != 0 ==> reload. */
|
kaf24@6199
|
585 if ( unlikely((dirty_segment_mask & DIRTY_ES) | nctxt->user_regs.es) )
|
kaf24@4689
|
586 all_segs_okay &= loadsegment(es, nctxt->user_regs.es);
|
kaf24@1672
|
587
|
kaf24@3761
|
588 /*
|
kaf24@3761
|
589 * Either selector != 0 ==> reload.
|
kaf24@3761
|
590 * Also reload to reset FS_BASE if it was non-zero.
|
kaf24@3761
|
591 */
|
kaf24@6199
|
592 if ( unlikely((dirty_segment_mask & (DIRTY_FS | DIRTY_FS_BASE)) |
|
kaf24@4689
|
593 nctxt->user_regs.fs) )
|
kaf24@4689
|
594 all_segs_okay &= loadsegment(fs, nctxt->user_regs.fs);
|
kaf24@3761
|
595
|
kaf24@3761
|
596 /*
|
kaf24@3761
|
597 * Either selector != 0 ==> reload.
|
kaf24@3761
|
598 * Also reload to reset GS_BASE if it was non-zero.
|
kaf24@3761
|
599 */
|
kaf24@6199
|
600 if ( unlikely((dirty_segment_mask & (DIRTY_GS | DIRTY_GS_BASE_USER)) |
|
kaf24@4689
|
601 nctxt->user_regs.gs) )
|
kaf24@3761
|
602 {
|
kaf24@3761
|
603 /* Reset GS_BASE with user %gs? */
|
kaf24@6199
|
604 if ( (dirty_segment_mask & DIRTY_GS) || !nctxt->gs_base_user )
|
kaf24@4689
|
605 all_segs_okay &= loadsegment(gs, nctxt->user_regs.gs);
|
kaf24@1672
|
606 }
|
kaf24@1672
|
607
|
kaf24@3761
|
608 /* This can only be non-zero if selector is NULL. */
|
kaf24@4689
|
609 if ( nctxt->fs_base )
|
kaf24@3761
|
610 wrmsr(MSR_FS_BASE,
|
kaf24@4689
|
611 nctxt->fs_base,
|
kaf24@4689
|
612 nctxt->fs_base>>32);
|
kaf24@1672
|
613
|
kaf24@4499
|
614 /* Most kernels have non-zero GS base, so don't bother testing. */
|
kaf24@4499
|
615 /* (This is also a serialising instruction, avoiding AMD erratum #88.) */
|
kaf24@4499
|
616 wrmsr(MSR_SHADOW_GS_BASE,
|
kaf24@4689
|
617 nctxt->gs_base_kernel,
|
kaf24@4689
|
618 nctxt->gs_base_kernel>>32);
|
kaf24@4499
|
619
|
kaf24@3761
|
620 /* This can only be non-zero if selector is NULL. */
|
kaf24@4689
|
621 if ( nctxt->gs_base_user )
|
kaf24@3761
|
622 wrmsr(MSR_GS_BASE,
|
kaf24@4689
|
623 nctxt->gs_base_user,
|
kaf24@4689
|
624 nctxt->gs_base_user>>32);
|
kaf24@3761
|
625
|
kaf24@3761
|
626 /* If in kernel mode then switch the GS bases around. */
|
kaf24@3761
|
627 if ( n->arch.flags & TF_kernel_mode )
|
kaf24@4499
|
628 __asm__ __volatile__ ( "swapgs" );
|
kaf24@1672
|
629
|
kaf24@3761
|
630 if ( unlikely(!all_segs_okay) )
|
kaf24@3761
|
631 {
|
kaf24@4923
|
632 struct cpu_user_regs *regs = guest_cpu_user_regs();
|
kaf24@4373
|
633 unsigned long *rsp =
|
kaf24@3761
|
634 (n->arch.flags & TF_kernel_mode) ?
|
kaf24@3761
|
635 (unsigned long *)regs->rsp :
|
kaf24@4689
|
636 (unsigned long *)nctxt->kernel_sp;
|
kaf24@1672
|
637
|
kaf24@4138
|
638 if ( !(n->arch.flags & TF_kernel_mode) )
|
kaf24@4138
|
639 toggle_guest_mode(n);
|
kaf24@4140
|
640 else
|
kaf24@4140
|
641 regs->cs &= ~3;
|
kaf24@4138
|
642
|
kaf24@4689
|
643 if ( put_user(regs->ss, rsp- 1) |
|
kaf24@4689
|
644 put_user(regs->rsp, rsp- 2) |
|
kaf24@4689
|
645 put_user(regs->rflags, rsp- 3) |
|
kaf24@4689
|
646 put_user(regs->cs, rsp- 4) |
|
kaf24@4689
|
647 put_user(regs->rip, rsp- 5) |
|
kaf24@4689
|
648 put_user(nctxt->user_regs.gs, rsp- 6) |
|
kaf24@4689
|
649 put_user(nctxt->user_regs.fs, rsp- 7) |
|
kaf24@4689
|
650 put_user(nctxt->user_regs.es, rsp- 8) |
|
kaf24@4689
|
651 put_user(nctxt->user_regs.ds, rsp- 9) |
|
kaf24@4689
|
652 put_user(regs->r11, rsp-10) |
|
kaf24@4689
|
653 put_user(regs->rcx, rsp-11) )
|
kaf24@1672
|
654 {
|
kaf24@3761
|
655 DPRINTK("Error while creating failsafe callback frame.\n");
|
kaf24@3761
|
656 domain_crash();
|
kaf24@1672
|
657 }
|
kaf24@1672
|
658
|
kaf24@3761
|
659 regs->entry_vector = TRAP_syscall;
|
kaf24@3761
|
660 regs->rflags &= 0xFFFCBEFFUL;
|
kaf24@3761
|
661 regs->ss = __GUEST_SS;
|
kaf24@3761
|
662 regs->rsp = (unsigned long)(rsp-11);
|
kaf24@3761
|
663 regs->cs = __GUEST_CS;
|
kaf24@4689
|
664 regs->rip = nctxt->failsafe_callback_eip;
|
kaf24@1672
|
665 }
|
kaf24@3761
|
666 }
|
kaf24@1672
|
667
|
kaf24@5289
|
668 static void save_segments(struct vcpu *v)
|
kaf24@4373
|
669 {
|
kaf24@6199
|
670 struct vcpu_guest_context *ctxt = &v->arch.guest_context;
|
kaf24@6199
|
671 struct cpu_user_regs *regs = &ctxt->user_regs;
|
kaf24@6199
|
672 unsigned int dirty_segment_mask = 0;
|
kaf24@5763
|
673
|
kaf24@5763
|
674 if ( VMX_DOMAIN(v) )
|
kaf24@5763
|
675 rdmsrl(MSR_SHADOW_GS_BASE, v->arch.arch_vmx.msr_content.shadow_gs);
|
kaf24@5763
|
676
|
kaf24@6759
|
677 __asm__ __volatile__ ( "mov %%ds,%0" : "=m" (regs->ds) );
|
kaf24@6759
|
678 __asm__ __volatile__ ( "mov %%es,%0" : "=m" (regs->es) );
|
kaf24@6759
|
679 __asm__ __volatile__ ( "mov %%fs,%0" : "=m" (regs->fs) );
|
kaf24@6759
|
680 __asm__ __volatile__ ( "mov %%gs,%0" : "=m" (regs->gs) );
|
kaf24@6199
|
681
|
kaf24@6199
|
682 if ( regs->ds )
|
kaf24@6199
|
683 dirty_segment_mask |= DIRTY_DS;
|
kaf24@6199
|
684
|
kaf24@6199
|
685 if ( regs->es )
|
kaf24@6199
|
686 dirty_segment_mask |= DIRTY_ES;
|
kaf24@1672
|
687
|
kaf24@6199
|
688 if ( regs->fs )
|
kaf24@6199
|
689 {
|
kaf24@6199
|
690 dirty_segment_mask |= DIRTY_FS;
|
kaf24@6199
|
691 ctxt->fs_base = 0; /* != 0 selector kills fs_base */
|
kaf24@6199
|
692 }
|
kaf24@6199
|
693 else if ( ctxt->fs_base )
|
kaf24@6199
|
694 {
|
kaf24@6199
|
695 dirty_segment_mask |= DIRTY_FS_BASE;
|
kaf24@6199
|
696 }
|
kaf24@6199
|
697
|
kaf24@6199
|
698 if ( regs->gs )
|
kaf24@6199
|
699 {
|
kaf24@6199
|
700 dirty_segment_mask |= DIRTY_GS;
|
kaf24@6199
|
701 ctxt->gs_base_user = 0; /* != 0 selector kills gs_base_user */
|
kaf24@6199
|
702 }
|
kaf24@6199
|
703 else if ( ctxt->gs_base_user )
|
kaf24@6199
|
704 {
|
kaf24@6199
|
705 dirty_segment_mask |= DIRTY_GS_BASE_USER;
|
kaf24@6199
|
706 }
|
kaf24@6199
|
707
|
kaf24@6199
|
708 percpu_ctxt[smp_processor_id()].dirty_segment_mask = dirty_segment_mask;
|
kaf24@1672
|
709 }
|
kaf24@1672
|
710
|
kaf24@3761
|
711 long do_switch_to_user(void)
|
kaf24@1672
|
712 {
|
kaf24@4923
|
713 struct cpu_user_regs *regs = guest_cpu_user_regs();
|
kaf24@3761
|
714 struct switch_to_user stu;
|
kaf24@5289
|
715 struct vcpu *v = current;
|
kaf24@3761
|
716
|
kaf24@3764
|
717 if ( unlikely(copy_from_user(&stu, (void *)regs->rsp, sizeof(stu))) ||
|
kaf24@5289
|
718 unlikely(pagetable_get_paddr(v->arch.guest_table_user) == 0) )
|
kaf24@3761
|
719 return -EFAULT;
|
kaf24@3761
|
720
|
kaf24@5289
|
721 toggle_guest_mode(v);
|
kaf24@3761
|
722
|
kaf24@3761
|
723 regs->rip = stu.rip;
|
kaf24@4140
|
724 regs->cs = stu.cs | 3; /* force guest privilege */
|
kaf24@3761
|
725 regs->rflags = stu.rflags;
|
kaf24@3761
|
726 regs->rsp = stu.rsp;
|
kaf24@4140
|
727 regs->ss = stu.ss | 3; /* force guest privilege */
|
kaf24@3761
|
728
|
kaf24@4689
|
729 if ( !(stu.flags & VGCF_IN_SYSCALL) )
|
kaf24@3761
|
730 {
|
kaf24@3761
|
731 regs->entry_vector = 0;
|
kaf24@3761
|
732 regs->r11 = stu.r11;
|
kaf24@3761
|
733 regs->rcx = stu.rcx;
|
kaf24@3761
|
734 }
|
kaf24@3761
|
735
|
kaf24@4037
|
736 /* Saved %rax gets written back to regs->rax in entry.S. */
|
kaf24@4037
|
737 return stu.rax;
|
kaf24@3761
|
738 }
|
kaf24@3761
|
739
|
kaf24@4417
|
740 #define switch_kernel_stack(_n,_c) ((void)0)
|
kaf24@4417
|
741
|
kaf24@3761
|
742 #elif defined(__i386__)
|
kaf24@3761
|
743
|
kaf24@6199
|
744 #define load_segments(n) ((void)0)
|
kaf24@6199
|
745 #define load_msrs(n) ((void)0)
|
kaf24@6199
|
746 #define save_segments(p) ((void)0)
|
kaf24@3761
|
747
|
kaf24@5289
|
748 static inline void switch_kernel_stack(struct vcpu *n, unsigned int cpu)
|
kaf24@4417
|
749 {
|
kaf24@4417
|
750 struct tss_struct *tss = &init_tss[cpu];
|
kaf24@4689
|
751 tss->esp1 = n->arch.guest_context.kernel_sp;
|
kaf24@4689
|
752 tss->ss1 = n->arch.guest_context.kernel_ss;
|
kaf24@1672
|
753 }
|
kaf24@1672
|
754
|
kaf24@3276
|
755 #endif
|
kaf24@3276
|
756
|
kaf24@5289
|
757 #define loaddebug(_v,_reg) \
|
kaf24@5576
|
758 __asm__ __volatile__ ("mov %0,%%db" #_reg : : "r" ((_v)->debugreg[_reg]))
|
kaf24@1672
|
759
|
kaf24@4373
|
760 static void __context_switch(void)
|
kaf24@1672
|
761 {
|
kaf24@4923
|
762 struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
|
kaf24@6199
|
763 unsigned int cpu = smp_processor_id();
|
kaf24@6199
|
764 struct vcpu *p = percpu_ctxt[cpu].curr_vcpu;
|
kaf24@6199
|
765 struct vcpu *n = current;
|
iap10@3290
|
766
|
kaf24@4373
|
767 if ( !is_idle_task(p->domain) )
|
kaf24@1672
|
768 {
|
kaf24@4689
|
769 memcpy(&p->arch.guest_context.user_regs,
|
kaf24@4689
|
770 stack_regs,
|
kaf24@4612
|
771 CTXT_SWITCH_STACK_BYTES);
|
kaf24@4373
|
772 unlazy_fpu(p);
|
kaf24@4373
|
773 save_segments(p);
|
kaf24@4373
|
774 }
|
kaf24@4373
|
775
|
kaf24@4417
|
776 if ( !is_idle_task(n->domain) )
|
kaf24@4373
|
777 {
|
kaf24@4689
|
778 memcpy(stack_regs,
|
kaf24@4689
|
779 &n->arch.guest_context.user_regs,
|
kaf24@4612
|
780 CTXT_SWITCH_STACK_BYTES);
|
kaf24@1672
|
781
|
kaf24@4417
|
782 /* Maybe switch the debug registers. */
|
kaf24@4689
|
783 if ( unlikely(n->arch.guest_context.debugreg[7]) )
|
kaf24@4373
|
784 {
|
kaf24@4689
|
785 loaddebug(&n->arch.guest_context, 0);
|
kaf24@4689
|
786 loaddebug(&n->arch.guest_context, 1);
|
kaf24@4689
|
787 loaddebug(&n->arch.guest_context, 2);
|
kaf24@4689
|
788 loaddebug(&n->arch.guest_context, 3);
|
kaf24@4417
|
789 /* no 4 and 5 */
|
kaf24@4689
|
790 loaddebug(&n->arch.guest_context, 6);
|
kaf24@4689
|
791 loaddebug(&n->arch.guest_context, 7);
|
kaf24@4373
|
792 }
|
kaf24@4417
|
793
|
kaf24@4417
|
794 if ( !VMX_DOMAIN(n) )
|
kaf24@4417
|
795 {
|
kaf24@4930
|
796 set_int80_direct_trap(n);
|
kaf24@4417
|
797 switch_kernel_stack(n, cpu);
|
kaf24@4417
|
798 }
|
kaf24@1672
|
799 }
|
kaf24@1672
|
800
|
kaf24@4453
|
801 if ( p->domain != n->domain )
|
kaf24@5301
|
802 cpu_set(cpu, n->domain->cpumask);
|
kaf24@4453
|
803
|
kaf24@4373
|
804 write_ptbase(n);
|
kaf24@4972
|
805
|
kaf24@4972
|
806 if ( p->vcpu_id != n->vcpu_id )
|
kaf24@4972
|
807 {
|
kaf24@4972
|
808 char gdt_load[10];
|
kaf24@4972
|
809 *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
|
kaf24@4972
|
810 *(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(n);
|
kaf24@4972
|
811 __asm__ __volatile__ ( "lgdt %0" : "=m" (gdt_load) );
|
kaf24@4972
|
812 }
|
kaf24@4453
|
813
|
kaf24@4453
|
814 if ( p->domain != n->domain )
|
kaf24@5301
|
815 cpu_clear(cpu, p->domain->cpumask);
|
kaf24@4373
|
816
|
kaf24@5289
|
817 percpu_ctxt[cpu].curr_vcpu = n;
|
kaf24@4373
|
818 }
|
kaf24@4373
|
819
|
kaf24@4373
|
820
|
kaf24@5289
|
821 void context_switch(struct vcpu *prev, struct vcpu *next)
|
kaf24@4373
|
822 {
|
kaf24@6199
|
823 unsigned int cpu = smp_processor_id();
|
kaf24@4373
|
824
|
kaf24@6200
|
825 ASSERT(!local_irq_is_enabled());
|
kaf24@1672
|
826
|
kaf24@4373
|
827 set_current(next);
|
kaf24@3754
|
828
|
kaf24@6199
|
829 if ( (percpu_ctxt[cpu].curr_vcpu != next) && !is_idle_task(next->domain) )
|
kaf24@4275
|
830 {
|
kaf24@4373
|
831 __context_switch();
|
kaf24@6199
|
832 percpu_ctxt[cpu].context_not_finalised = 1;
|
kaf24@6199
|
833 }
|
kaf24@6199
|
834 }
|
kaf24@4373
|
835
|
kaf24@6199
|
836 void context_switch_finalise(struct vcpu *next)
|
kaf24@6199
|
837 {
|
kaf24@6199
|
838 unsigned int cpu = smp_processor_id();
|
kaf24@6199
|
839
|
kaf24@6200
|
840 ASSERT(local_irq_is_enabled());
|
kaf24@6200
|
841
|
kaf24@6199
|
842 if ( percpu_ctxt[cpu].context_not_finalised )
|
kaf24@6199
|
843 {
|
kaf24@6199
|
844 percpu_ctxt[cpu].context_not_finalised = 0;
|
kaf24@6199
|
845
|
kaf24@6199
|
846 BUG_ON(percpu_ctxt[cpu].curr_vcpu != next);
|
kaf24@6199
|
847
|
kaf24@5763
|
848 if ( VMX_DOMAIN(next) )
|
kaf24@5763
|
849 {
|
kaf24@5763
|
850 vmx_restore_msrs(next);
|
kaf24@5763
|
851 }
|
kaf24@5763
|
852 else
|
kaf24@4373
|
853 {
|
kaf24@4373
|
854 load_LDT(next);
|
kaf24@6199
|
855 load_segments(next);
|
kaf24@6199
|
856 load_msrs(next);
|
kaf24@4373
|
857 }
|
kaf24@4275
|
858 }
|
kaf24@4034
|
859
|
kaf24@4373
|
860 schedule_tail(next);
|
kaf24@4696
|
861 BUG();
|
kaf24@4696
|
862 }
|
kaf24@4034
|
863
|
kaf24@5289
|
864 void continue_running(struct vcpu *same)
|
kaf24@4696
|
865 {
|
kaf24@4696
|
866 schedule_tail(same);
|
kaf24@4034
|
867 BUG();
|
kaf24@1672
|
868 }
|
kaf24@1672
|
869
|
kaf24@4417
|
870 int __sync_lazy_execstate(void)
|
kaf24@4373
|
871 {
|
kaf24@6199
|
872 unsigned long flags;
|
kaf24@6199
|
873 int switch_required;
|
kaf24@6199
|
874
|
kaf24@6199
|
875 local_irq_save(flags);
|
kaf24@6199
|
876
|
kaf24@6199
|
877 switch_required = (percpu_ctxt[smp_processor_id()].curr_vcpu != current);
|
kaf24@6199
|
878
|
kaf24@6199
|
879 if ( switch_required )
|
kaf24@6199
|
880 __context_switch();
|
kaf24@6199
|
881
|
kaf24@6199
|
882 local_irq_restore(flags);
|
kaf24@6199
|
883
|
kaf24@6199
|
884 return switch_required;
|
kaf24@4373
|
885 }
|
kaf24@4373
|
886
|
kaf24@6453
|
887 void sync_vcpu_execstate(struct vcpu *v)
|
kaf24@4418
|
888 {
|
kaf24@6453
|
889 unsigned int cpu = v->processor;
|
kaf24@6453
|
890
|
kaf24@6453
|
891 if ( !cpu_isset(cpu, v->domain->cpumask) )
|
kaf24@6453
|
892 return;
|
kaf24@6453
|
893
|
kaf24@5301
|
894 if ( cpu == smp_processor_id() )
|
kaf24@6445
|
895 {
|
kaf24@5261
|
896 (void)__sync_lazy_execstate();
|
kaf24@6445
|
897 }
|
kaf24@6445
|
898 else
|
kaf24@6445
|
899 {
|
kaf24@6445
|
900 /* Other cpus call __sync_lazy_execstate from flush ipi handler. */
|
kaf24@6445
|
901 flush_tlb_mask(cpumask_of_cpu(cpu));
|
kaf24@6445
|
902 }
|
kaf24@4418
|
903 }
|
kaf24@4418
|
904
|
kaf24@3702
|
905 unsigned long __hypercall_create_continuation(
|
kaf24@3149
|
906 unsigned int op, unsigned int nr_args, ...)
|
kaf24@3091
|
907 {
|
kaf24@3139
|
908 struct mc_state *mcs = &mc_state[smp_processor_id()];
|
kaf24@4683
|
909 struct cpu_user_regs *regs;
|
kaf24@3091
|
910 unsigned int i;
|
kaf24@3091
|
911 va_list args;
|
kaf24@3091
|
912
|
kaf24@3139
|
913 va_start(args, nr_args);
|
kaf24@3139
|
914
|
kaf24@3139
|
915 if ( test_bit(_MCSF_in_multicall, &mcs->flags) )
|
kaf24@3139
|
916 {
|
kaf24@3139
|
917 __set_bit(_MCSF_call_preempted, &mcs->flags);
|
kaf24@3091
|
918
|
kaf24@3139
|
919 for ( i = 0; i < nr_args; i++ )
|
kaf24@3139
|
920 mcs->call.args[i] = va_arg(args, unsigned long);
|
kaf24@3139
|
921 }
|
kaf24@3139
|
922 else
|
kaf24@3139
|
923 {
|
kaf24@4923
|
924 regs = guest_cpu_user_regs();
|
kaf24@3276
|
925 #if defined(__i386__)
|
kaf24@4683
|
926 regs->eax = op;
|
kaf24@4683
|
927 regs->eip -= 2; /* re-execute 'int 0x82' */
|
kaf24@3139
|
928
|
kaf24@3697
|
929 for ( i = 0; i < nr_args; i++ )
|
kaf24@3697
|
930 {
|
kaf24@3697
|
931 switch ( i )
|
kaf24@3697
|
932 {
|
kaf24@4683
|
933 case 0: regs->ebx = va_arg(args, unsigned long); break;
|
kaf24@4683
|
934 case 1: regs->ecx = va_arg(args, unsigned long); break;
|
kaf24@4683
|
935 case 2: regs->edx = va_arg(args, unsigned long); break;
|
kaf24@4683
|
936 case 3: regs->esi = va_arg(args, unsigned long); break;
|
kaf24@4683
|
937 case 4: regs->edi = va_arg(args, unsigned long); break;
|
kaf24@4683
|
938 case 5: regs->ebp = va_arg(args, unsigned long); break;
|
kaf24@3697
|
939 }
|
kaf24@3697
|
940 }
|
kaf24@3697
|
941 #elif defined(__x86_64__)
|
kaf24@4683
|
942 regs->rax = op;
|
kaf24@4683
|
943 regs->rip -= 2; /* re-execute 'syscall' */
|
kaf24@3697
|
944
|
kaf24@3697
|
945 for ( i = 0; i < nr_args; i++ )
|
kaf24@3697
|
946 {
|
kaf24@3697
|
947 switch ( i )
|
kaf24@3697
|
948 {
|
kaf24@4683
|
949 case 0: regs->rdi = va_arg(args, unsigned long); break;
|
kaf24@4683
|
950 case 1: regs->rsi = va_arg(args, unsigned long); break;
|
kaf24@4683
|
951 case 2: regs->rdx = va_arg(args, unsigned long); break;
|
kaf24@4683
|
952 case 3: regs->r10 = va_arg(args, unsigned long); break;
|
kaf24@4683
|
953 case 4: regs->r8 = va_arg(args, unsigned long); break;
|
kaf24@4683
|
954 case 5: regs->r9 = va_arg(args, unsigned long); break;
|
kaf24@3697
|
955 }
|
kaf24@3697
|
956 }
|
kaf24@3276
|
957 #endif
|
kaf24@3139
|
958 }
|
kaf24@3139
|
959
|
kaf24@3091
|
960 va_end(args);
|
kaf24@3149
|
961
|
kaf24@3149
|
962 return op;
|
kaf24@3091
|
963 }
|
kaf24@3091
|
964
|
kaf24@4455
|
965 #ifdef CONFIG_VMX
|
kaf24@5289
|
966 static void vmx_relinquish_resources(struct vcpu *v)
|
kaf24@4455
|
967 {
|
kaf24@5289
|
968 if ( !VMX_DOMAIN(v) )
|
kaf24@4455
|
969 return;
|
kaf24@4455
|
970
|
kaf24@5289
|
971 BUG_ON(v->arch.arch_vmx.vmcs == NULL);
|
kaf24@5289
|
972 free_vmcs(v->arch.arch_vmx.vmcs);
|
kaf24@5836
|
973 if(v->arch.arch_vmx.io_bitmap_a != 0) {
|
kaf24@6684
|
974 free_xenheap_pages(
|
kaf24@6684
|
975 v->arch.arch_vmx.io_bitmap_a, get_order_from_bytes(0x1000));
|
kaf24@5836
|
976 v->arch.arch_vmx.io_bitmap_a = 0;
|
kaf24@5836
|
977 }
|
kaf24@5836
|
978 if(v->arch.arch_vmx.io_bitmap_b != 0) {
|
kaf24@6684
|
979 free_xenheap_pages(
|
kaf24@6684
|
980 v->arch.arch_vmx.io_bitmap_b, get_order_from_bytes(0x1000));
|
kaf24@5836
|
981 v->arch.arch_vmx.io_bitmap_b = 0;
|
kaf24@5836
|
982 }
|
kaf24@5289
|
983 v->arch.arch_vmx.vmcs = 0;
|
kaf24@4455
|
984
|
kaf24@5289
|
985 free_monitor_pagetable(v);
|
arun@5608
|
986 rem_ac_timer(&v->domain->arch.vmx_platform.vmx_pit.pit_timer);
|
kaf24@4455
|
987 }
|
kaf24@4455
|
988 #else
|
kaf24@5289
|
989 #define vmx_relinquish_resources(_v) ((void)0)
|
kaf24@4455
|
990 #endif
|
kaf24@4455
|
991
|
kaf24@4455
|
992 static void relinquish_memory(struct domain *d, struct list_head *list)
|
djm@1714
|
993 {
|
kaf24@2428
|
994 struct list_head *ent;
|
djm@1714
|
995 struct pfn_info *page;
|
djm@1714
|
996 unsigned long x, y;
|
djm@1714
|
997
|
kaf24@2428
|
998 /* Use a recursive lock, as we may enter 'free_domheap_page'. */
|
kaf24@2428
|
999 spin_lock_recursive(&d->page_alloc_lock);
|
kaf24@2428
|
1000
|
kaf24@2428
|
1001 ent = list->next;
|
kaf24@2428
|
1002 while ( ent != list )
|
kaf24@2428
|
1003 {
|
kaf24@2428
|
1004 page = list_entry(ent, struct pfn_info, list);
|
kaf24@2428
|
1005
|
kaf24@2429
|
1006 /* Grab a reference to the page so it won't disappear from under us. */
|
kaf24@2429
|
1007 if ( unlikely(!get_page(page, d)) )
|
kaf24@2428
|
1008 {
|
kaf24@2429
|
1009 /* Couldn't get a reference -- someone is freeing this page. */
|
kaf24@2429
|
1010 ent = ent->next;
|
kaf24@2428
|
1011 continue;
|
kaf24@2428
|
1012 }
|
kaf24@2428
|
1013
|
kaf24@2429
|
1014 if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
|
kaf24@2429
|
1015 put_page_and_type(page);
|
kaf24@2429
|
1016
|
kaf24@2428
|
1017 if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
|
kaf24@2428
|
1018 put_page(page);
|
kaf24@2428
|
1019
|
kaf24@2428
|
1020 /*
|
kaf24@2428
|
1021 * Forcibly invalidate base page tables at this point to break circular
|
kaf24@2428
|
1022 * 'linear page table' references. This is okay because MMU structures
|
kaf24@2428
|
1023 * are not shared across domains and this domain is now dead. Thus base
|
kaf24@2428
|
1024 * tables are not in use so a non-zero count means circular reference.
|
kaf24@2428
|
1025 */
|
kaf24@2428
|
1026 y = page->u.inuse.type_info;
|
kaf24@2429
|
1027 for ( ; ; )
|
kaf24@2429
|
1028 {
|
kaf24@2428
|
1029 x = y;
|
kaf24@2428
|
1030 if ( likely((x & (PGT_type_mask|PGT_validated)) !=
|
kaf24@2428
|
1031 (PGT_base_page_table|PGT_validated)) )
|
kaf24@2428
|
1032 break;
|
kaf24@2429
|
1033
|
kaf24@2428
|
1034 y = cmpxchg(&page->u.inuse.type_info, x, x & ~PGT_validated);
|
kaf24@2428
|
1035 if ( likely(y == x) )
|
kaf24@2428
|
1036 {
|
kaf24@2428
|
1037 free_page_type(page, PGT_base_page_table);
|
kaf24@2429
|
1038 break;
|
kaf24@2428
|
1039 }
|
kaf24@2428
|
1040 }
|
kaf24@2429
|
1041
|
kaf24@2429
|
1042 /* Follow the list chain and /then/ potentially free the page. */
|
kaf24@2429
|
1043 ent = ent->next;
|
kaf24@2429
|
1044 put_page(page);
|
kaf24@2428
|
1045 }
|
kaf24@2428
|
1046
|
kaf24@2428
|
1047 spin_unlock_recursive(&d->page_alloc_lock);
|
kaf24@2428
|
1048 }
|
kaf24@2428
|
1049
|
kaf24@4455
|
1050 void domain_relinquish_resources(struct domain *d)
|
kaf24@2428
|
1051 {
|
kaf24@5289
|
1052 struct vcpu *v;
|
kaf24@5576
|
1053 unsigned long pfn;
|
cl349@2923
|
1054
|
kaf24@5301
|
1055 BUG_ON(!cpus_empty(d->cpumask));
|
djm@1714
|
1056
|
kaf24@4779
|
1057 physdev_destroy_state(d);
|
kaf24@4779
|
1058
|
kaf24@4455
|
1059 ptwr_destroy(d);
|
kaf24@4455
|
1060
|
cwc22@4100
|
1061 /* Release device mappings of other domains */
|
kaf24@4455
|
1062 gnttab_release_dev_mappings(d->grant_table);
|
djm@1714
|
1063
|
kaf24@3761
|
1064 /* Drop the in-use references to page-table bases. */
|
kaf24@5289
|
1065 for_each_vcpu ( d, v )
|
iap10@3741
|
1066 {
|
kaf24@5576
|
1067 if ( (pfn = pagetable_get_pfn(v->arch.guest_table)) != 0 )
|
kaf24@3761
|
1068 {
|
kaf24@5576
|
1069 if ( !shadow_mode_refcounts(d) )
|
kaf24@5576
|
1070 put_page_type(pfn_to_page(pfn));
|
kaf24@5576
|
1071 put_page(pfn_to_page(pfn));
|
mafetter@4799
|
1072
|
kaf24@5289
|
1073 v->arch.guest_table = mk_pagetable(0);
|
kaf24@3761
|
1074 }
|
kaf24@3761
|
1075
|
kaf24@5576
|
1076 if ( (pfn = pagetable_get_pfn(v->arch.guest_table_user)) != 0 )
|
kaf24@3761
|
1077 {
|
kaf24@5576
|
1078 if ( !shadow_mode_refcounts(d) )
|
kaf24@5576
|
1079 put_page_type(pfn_to_page(pfn));
|
kaf24@5576
|
1080 put_page(pfn_to_page(pfn));
|
mafetter@4799
|
1081
|
kaf24@5289
|
1082 v->arch.guest_table_user = mk_pagetable(0);
|
kaf24@3761
|
1083 }
|
kaf24@4455
|
1084
|
kaf24@5289
|
1085 vmx_relinquish_resources(v);
|
iap10@3741
|
1086 }
|
djm@1714
|
1087
|
kaf24@4633
|
1088 shadow_mode_disable(d);
|
maf46@4621
|
1089
|
kaf24@1749
|
1090 /*
|
kaf24@1749
|
1091 * Relinquish GDT mappings. No need for explicit unmapping of the LDT as
|
kaf24@1749
|
1092 * it automatically gets squashed when the guest's mappings go away.
|
kaf24@1749
|
1093 */
|
kaf24@5289
|
1094 for_each_vcpu(d, v)
|
kaf24@5289
|
1095 destroy_gdt(v);
|
kaf24@1749
|
1096
|
kaf24@2428
|
1097 /* Relinquish every page of memory. */
|
kaf24@4455
|
1098 relinquish_memory(d, &d->xenpage_list);
|
kaf24@4455
|
1099 relinquish_memory(d, &d->page_list);
|
djm@1714
|
1100 }
|
djm@1714
|
1101
|
djm@1714
|
1102
|
kaf24@3914
|
1103 /*
|
kaf24@3914
|
1104 * Local variables:
|
kaf24@3914
|
1105 * mode: C
|
kaf24@3914
|
1106 * c-set-style: "BSD"
|
kaf24@3914
|
1107 * c-basic-offset: 4
|
kaf24@3914
|
1108 * tab-width: 4
|
kaf24@3914
|
1109 * indent-tabs-mode: nil
|
kaf24@3988
|
1110 * End:
|
kaf24@3914
|
1111 */
|