debuggers.hg

view xen/arch/ia64/vmx/vmx_init.c @ 16392:91575bb23d07

[IA64] vti save-restore: hvm domain io page clean up.

- set_hvm_param hypercall clean up.
- The reference counts of the io pages must be incremented.
- Buffered pio wasn't SMP safe.
- Clean up get_vio() parameter.

Signed-off-by: Isaku Yamahata <yamahata@valinux.co.jp>
author Alex Williamson <alex.williamson@hp.com>
date Wed Nov 07 10:31:09 2007 -0700 (2007-11-07)
parents 828cb584c1cc
children f9ca1d8c9e65
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4; indent-tabs-mode:nil -*- */
2 /*
3 * vmx_init.c: initialization work for vt specific domain
4 * Copyright (c) 2005, Intel Corporation.
5 * Kun Tian (Kevin Tian) <kevin.tian@intel.com>
6 * Xuefei Xu (Anthony Xu) <anthony.xu@intel.com>
7 * Fred Yang <fred.yang@intel.com>
8 *
9 * This program is free software; you can redistribute it and/or modify it
10 * under the terms and conditions of the GNU General Public License,
11 * version 2, as published by the Free Software Foundation.
12 *
13 * This program is distributed in the hope it will be useful, but WITHOUT
14 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
16 * more details.
17 *
18 * You should have received a copy of the GNU General Public License along with
19 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
20 * Place - Suite 330, Boston, MA 02111-1307 USA.
21 *
22 */
24 /*
25 * 05/08/16 Kun tian (Kevin Tian) <kevin.tian@intel.com>:
26 * Disable doubling mapping
27 *
28 * 05/03/23 Kun Tian (Kevin Tian) <kevin.tian@intel.com>:
29 * Simplied design in first step:
30 * - One virtual environment
31 * - Domain is bound to one LP
32 * Later to support guest SMP:
33 * - Need interface to handle VP scheduled to different LP
34 */
35 #include <xen/config.h>
36 #include <xen/types.h>
37 #include <xen/sched.h>
38 #include <asm/pal.h>
39 #include <asm/page.h>
40 #include <asm/processor.h>
41 #include <asm/vmx_vcpu.h>
42 #include <xen/lib.h>
43 #include <asm/vmmu.h>
44 #include <public/xen.h>
45 #include <public/hvm/ioreq.h>
46 #include <public/event_channel.h>
47 #include <asm/vmx_phy_mode.h>
48 #include <asm/processor.h>
49 #include <asm/vmx.h>
50 #include <xen/mm.h>
51 #include <asm/viosapic.h>
52 #include <xen/event.h>
53 #include <asm/vlsapic.h>
54 #include "entry.h"
56 /* Global flag to identify whether Intel vmx feature is on */
57 u32 vmx_enabled = 0;
58 static u32 vm_order;
59 static u64 buffer_size;
60 static u64 vp_env_info;
61 static u64 vm_buffer = 0; /* Buffer required to bring up VMX feature */
62 u64 __vsa_base = 0; /* Run-time service base of VMX */
64 /* Check whether vt feature is enabled or not. */
65 void
66 identify_vmx_feature(void)
67 {
68 pal_status_t ret;
69 u64 avail = 1, status = 1, control = 1;
71 vmx_enabled = 0;
72 /* Check VT-i feature */
73 ret = ia64_pal_proc_get_features(&avail, &status, &control);
74 if (ret != PAL_STATUS_SUCCESS) {
75 printk("Get proc features failed.\n");
76 goto no_vti;
77 }
79 /* FIXME: do we need to check status field, to see whether
80 * PSR.vm is actually enabled? If yes, aonther call to
81 * ia64_pal_proc_set_features may be reuqired then.
82 */
83 printk("avail:0x%lx, status:0x%lx,control:0x%lx, vm?0x%lx\n",
84 avail, status, control, avail & PAL_PROC_VM_BIT);
85 if (!(avail & PAL_PROC_VM_BIT)) {
86 printk("No VT feature supported.\n");
87 goto no_vti;
88 }
90 ret = ia64_pal_vp_env_info(&buffer_size, &vp_env_info);
91 if (ret != PAL_STATUS_SUCCESS) {
92 printk("Get vp environment info failed.\n");
93 goto no_vti;
94 }
96 /* Does xen has ability to decode itself? */
97 if (!(vp_env_info & VP_OPCODE))
98 printk("WARNING: no opcode provided from hardware(%lx)!!!\n", vp_env_info);
99 vm_order = get_order(buffer_size);
100 printk("vm buffer size: %ld, order: %d\n", buffer_size, vm_order);
102 vmx_enabled = 1;
103 no_vti:
104 return;
105 }
107 /*
108 * Init virtual environment on current LP
109 * vsa_base is the indicator whether it's first LP to be initialized
110 * for current domain.
111 */
112 void
113 vmx_init_env(void)
114 {
115 u64 status, tmp_base;
117 if (!vm_buffer) {
118 vm_buffer = (unsigned long)alloc_xenheap_pages(vm_order);
119 ASSERT(vm_buffer);
120 vm_buffer = virt_to_xenva((vm_buffer));
121 printk("vm_buffer: 0x%lx\n", vm_buffer);
122 }
124 status=ia64_pal_vp_init_env(__vsa_base ? VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
125 __pa(vm_buffer),
126 vm_buffer,
127 &tmp_base);
129 if (status != PAL_STATUS_SUCCESS) {
130 printk("ia64_pal_vp_init_env failed.\n");
131 return ;
132 }
134 if (!__vsa_base)
135 __vsa_base = tmp_base;
136 else
137 ASSERT(tmp_base == __vsa_base);
139 }
141 typedef union {
142 u64 value;
143 struct {
144 u64 number : 8;
145 u64 revision : 8;
146 u64 model : 8;
147 u64 family : 8;
148 u64 archrev : 8;
149 u64 rv : 24;
150 };
151 } cpuid3_t;
153 /* Allocate vpd from xenheap */
154 static vpd_t *alloc_vpd(void)
155 {
156 int i;
157 cpuid3_t cpuid3;
158 vpd_t *vpd;
159 mapped_regs_t *mregs;
161 vpd = alloc_xenheap_pages(get_order(VPD_SIZE));
162 if (!vpd) {
163 printk("VPD allocation failed.\n");
164 return NULL;
165 }
166 vpd = (vpd_t *)virt_to_xenva(vpd);
168 printk(XENLOG_DEBUG "vpd base: 0x%p, vpd size:%ld\n",
169 vpd, sizeof(vpd_t));
170 memset(vpd, 0, VPD_SIZE);
171 mregs = &vpd->vpd_low;
173 /* CPUID init */
174 for (i = 0; i < 5; i++)
175 mregs->vcpuid[i] = ia64_get_cpuid(i);
177 /* Limit the CPUID number to 5 */
178 cpuid3.value = mregs->vcpuid[3];
179 cpuid3.number = 4; /* 5 - 1 */
180 mregs->vcpuid[3] = cpuid3.value;
182 mregs->vac.a_from_int_cr = 1;
183 mregs->vac.a_to_int_cr = 1;
184 mregs->vac.a_from_psr = 1;
185 mregs->vac.a_from_cpuid = 1;
186 mregs->vac.a_cover = 1;
187 mregs->vac.a_bsw = 1;
188 mregs->vac.a_int = 1;
189 mregs->vdc.d_vmsw = 1;
191 return vpd;
192 }
194 /* Free vpd to xenheap */
195 static void
196 free_vpd(struct vcpu *v)
197 {
198 if ( v->arch.privregs )
199 free_xenheap_pages(v->arch.privregs, get_order(VPD_SIZE));
200 }
202 /*
203 * Create a VP on intialized VMX environment.
204 */
205 static void
206 vmx_create_vp(struct vcpu *v)
207 {
208 u64 ret;
209 vpd_t *vpd = (vpd_t *)v->arch.privregs;
210 u64 ivt_base;
211 extern char vmx_ia64_ivt;
212 /* ia64_ivt is function pointer, so need this tranlation */
213 ivt_base = (u64) &vmx_ia64_ivt;
214 printk(XENLOG_DEBUG "ivt_base: 0x%lx\n", ivt_base);
215 ret = ia64_pal_vp_create((u64 *)vpd, (u64 *)ivt_base, 0);
216 if (ret != PAL_STATUS_SUCCESS){
217 panic_domain(vcpu_regs(v),"ia64_pal_vp_create failed. \n");
218 }
219 }
221 /* Other non-context related tasks can be done in context switch */
222 void
223 vmx_save_state(struct vcpu *v)
224 {
225 u64 status;
227 /* FIXME: about setting of pal_proc_vector... time consuming */
228 status = ia64_pal_vp_save((u64 *)v->arch.privregs, 0);
229 if (status != PAL_STATUS_SUCCESS){
230 panic_domain(vcpu_regs(v),"Save vp status failed\n");
231 }
234 /* Need to save KR when domain switch, though HV itself doesn;t
235 * use them.
236 */
237 v->arch.arch_vmx.vkr[0] = ia64_get_kr(0);
238 v->arch.arch_vmx.vkr[1] = ia64_get_kr(1);
239 v->arch.arch_vmx.vkr[2] = ia64_get_kr(2);
240 v->arch.arch_vmx.vkr[3] = ia64_get_kr(3);
241 v->arch.arch_vmx.vkr[4] = ia64_get_kr(4);
242 v->arch.arch_vmx.vkr[5] = ia64_get_kr(5);
243 v->arch.arch_vmx.vkr[6] = ia64_get_kr(6);
244 v->arch.arch_vmx.vkr[7] = ia64_get_kr(7);
245 }
247 /* Even guest is in physical mode, we still need such double mapping */
248 void
249 vmx_load_state(struct vcpu *v)
250 {
251 u64 status;
253 status = ia64_pal_vp_restore((u64 *)v->arch.privregs, 0);
254 if (status != PAL_STATUS_SUCCESS){
255 panic_domain(vcpu_regs(v),"Restore vp status failed\n");
256 }
258 ia64_set_kr(0, v->arch.arch_vmx.vkr[0]);
259 ia64_set_kr(1, v->arch.arch_vmx.vkr[1]);
260 ia64_set_kr(2, v->arch.arch_vmx.vkr[2]);
261 ia64_set_kr(3, v->arch.arch_vmx.vkr[3]);
262 ia64_set_kr(4, v->arch.arch_vmx.vkr[4]);
263 ia64_set_kr(5, v->arch.arch_vmx.vkr[5]);
264 ia64_set_kr(6, v->arch.arch_vmx.vkr[6]);
265 ia64_set_kr(7, v->arch.arch_vmx.vkr[7]);
266 /* Guest vTLB is not required to be switched explicitly, since
267 * anchored in vcpu */
268 }
270 static int
271 vmx_vcpu_initialise(struct vcpu *v)
272 {
273 struct vmx_ioreq_page *iorp = &v->domain->arch.hvm_domain.ioreq;
275 int rc = alloc_unbound_xen_event_channel(v, 0);
276 if (rc < 0)
277 return rc;
278 v->arch.arch_vmx.xen_port = rc;
280 spin_lock(&iorp->lock);
281 if (v->domain->arch.vmx_platform.ioreq.va != 0) {
282 vcpu_iodata_t *p = get_vio(v);
283 p->vp_eport = v->arch.arch_vmx.xen_port;
284 }
285 spin_unlock(&iorp->lock);
287 gdprintk(XENLOG_INFO, "Allocated port %ld for hvm %d vcpu %d.\n",
288 v->arch.arch_vmx.xen_port, v->domain->domain_id, v->vcpu_id);
290 return 0;
291 }
293 static int vmx_create_event_channels(struct vcpu *v)
294 {
295 struct vcpu *o;
297 if (v->vcpu_id == 0) {
298 /* Ugly: create event channels for every vcpu when vcpu 0
299 starts, so that they're available for ioemu to bind to. */
300 for_each_vcpu(v->domain, o) {
301 int rc = vmx_vcpu_initialise(o);
302 if (rc < 0) //XXX error recovery
303 return rc;
304 }
305 }
307 return 0;
308 }
310 /*
311 * Event channel has destoryed in domain_kill(), so we needn't
312 * do anything here
313 */
314 static void vmx_release_assist_channel(struct vcpu *v)
315 {
316 return;
317 }
319 /* following three functions are based from hvm_xxx_ioreq_page()
320 * in xen/arch/x86/hvm/hvm.c */
321 static void vmx_init_ioreq_page(
322 struct domain *d, struct vmx_ioreq_page *iorp)
323 {
324 memset(iorp, 0, sizeof(*iorp));
325 spin_lock_init(&iorp->lock);
326 domain_pause(d);
327 }
329 static void vmx_destroy_ioreq_page(
330 struct domain *d, struct vmx_ioreq_page *iorp)
331 {
332 spin_lock(&iorp->lock);
334 ASSERT(d->is_dying);
336 if (iorp->va != NULL) {
337 put_page(iorp->page);
338 iorp->page = NULL;
339 iorp->va = NULL;
340 }
342 spin_unlock(&iorp->lock);
343 }
345 int vmx_set_ioreq_page(
346 struct domain *d, struct vmx_ioreq_page *iorp, unsigned long gpfn)
347 {
348 struct page_info *page;
349 unsigned long mfn;
350 pte_t pte;
352 pte = *lookup_noalloc_domain_pte(d, gpfn << PAGE_SHIFT);
353 if (!pte_present(pte) || !pte_mem(pte))
354 return -EINVAL;
355 mfn = (pte_val(pte) & _PFN_MASK) >> PAGE_SHIFT;
356 ASSERT(mfn_valid(mfn));
358 page = mfn_to_page(mfn);
359 if (get_page(page, d) == 0)
360 return -EINVAL;
362 spin_lock(&iorp->lock);
364 if ((iorp->va != NULL) || d->is_dying) {
365 spin_unlock(&iorp->lock);
366 put_page(page);
367 return -EINVAL;
368 }
370 iorp->va = mfn_to_virt(mfn);
371 iorp->page = page;
373 spin_unlock(&iorp->lock);
375 domain_unpause(d);
377 return 0;
378 }
380 /*
381 * Initialize VMX envirenment for guest. Only the 1st vp/vcpu
382 * is registered here.
383 */
384 int
385 vmx_final_setup_guest(struct vcpu *v)
386 {
387 vpd_t *vpd;
388 int rc;
389 struct switch_stack *sw;
391 vpd = alloc_vpd();
392 ASSERT(vpd);
393 if (!vpd)
394 return -ENOMEM;
396 v->arch.privregs = (mapped_regs_t *)vpd;
397 vpd->vpd_low.virt_env_vaddr = vm_buffer;
399 v->domain->arch.vmx_platform.gos_type = OS_UNKNOWN;
400 /* Per-domain vTLB and vhpt implementation. Now vmx domain will stick
401 * to this solution. Maybe it can be deferred until we know created
402 * one as vmx domain */
403 rc = init_domain_tlb(v);
404 if (rc)
405 return rc;
407 rc = vmx_create_event_channels(v);
408 if (rc)
409 return rc;
411 /* v->arch.schedule_tail = arch_vmx_do_launch; */
412 vmx_create_vp(v);
414 /* Physical mode emulation initialization, including
415 * emulation ID allcation and related memory request
416 */
417 physical_mode_init(v);
419 vlsapic_reset(v);
420 vtm_init(v);
422 /* Set up guest 's indicator for VTi domain*/
423 set_bit(ARCH_VMX_DOMAIN, &v->arch.arch_vmx.flags);
425 /* Initialize pNonSys=1 for the first context switching */
426 sw = (struct switch_stack *)vcpu_regs(v) - 1;
427 sw->pr = (1UL << PRED_NON_SYSCALL);
429 return 0;
430 }
432 void
433 vmx_relinquish_guest_resources(struct domain *d)
434 {
435 struct vcpu *v;
437 for_each_vcpu(d, v)
438 vmx_release_assist_channel(v);
440 vacpi_relinquish_resources(d);
442 vmx_destroy_ioreq_page(d, &d->arch.vmx_platform.ioreq);
443 vmx_destroy_ioreq_page(d, &d->arch.vmx_platform.buf_ioreq);
444 vmx_destroy_ioreq_page(d, &d->arch.vmx_platform.buf_pioreq);
445 }
447 void
448 vmx_relinquish_vcpu_resources(struct vcpu *v)
449 {
450 vtime_t *vtm = &(v->arch.arch_vmx.vtm);
452 kill_timer(&vtm->vtm_timer);
454 free_domain_tlb(v);
455 free_vpd(v);
456 }
458 typedef struct io_range {
459 unsigned long start;
460 unsigned long size;
461 unsigned long type;
462 } io_range_t;
464 static const io_range_t io_ranges[] = {
465 {VGA_IO_START, VGA_IO_SIZE, GPFN_FRAME_BUFFER},
466 {MMIO_START, MMIO_SIZE, GPFN_LOW_MMIO},
467 {LEGACY_IO_START, LEGACY_IO_SIZE, GPFN_LEGACY_IO},
468 {IO_SAPIC_START, IO_SAPIC_SIZE, GPFN_IOSAPIC},
469 {PIB_START, PIB_SIZE, GPFN_PIB},
470 };
472 // The P2M table is built in libxc/ia64/xc_ia64_hvm_build.c @ setup_guest()
473 // so only mark IO memory space here
474 static void vmx_build_io_physmap_table(struct domain *d)
475 {
476 unsigned long i, j;
478 /* Mark I/O ranges */
479 for (i = 0; i < (sizeof(io_ranges) / sizeof(io_range_t)); i++) {
480 for (j = io_ranges[i].start;
481 j < io_ranges[i].start + io_ranges[i].size; j += PAGE_SIZE)
482 (void)__assign_domain_page(d, j, io_ranges[i].type,
483 ASSIGN_writable);
484 }
486 }
488 int vmx_setup_platform(struct domain *d)
489 {
490 ASSERT(d != dom0); /* only for non-privileged vti domain */
492 vmx_build_io_physmap_table(d);
494 vmx_init_ioreq_page(d, &d->arch.vmx_platform.ioreq);
495 vmx_init_ioreq_page(d, &d->arch.vmx_platform.buf_ioreq);
496 vmx_init_ioreq_page(d, &d->arch.vmx_platform.buf_pioreq);
498 /* TEMP */
499 d->arch.vmx_platform.pib_base = 0xfee00000UL;
501 d->arch.sal_data = xmalloc(struct xen_sal_data);
502 if (d->arch.sal_data == NULL)
503 return -ENOMEM;
505 /* Only open one port for I/O and interrupt emulation */
506 memset(&d->shared_info->evtchn_mask[0], 0xff,
507 sizeof(d->shared_info->evtchn_mask));
509 /* Initialize iosapic model within hypervisor */
510 viosapic_init(d);
512 vacpi_init(d);
514 return 0;
515 }
517 void vmx_do_resume(struct vcpu *v)
518 {
519 ioreq_t *p;
521 vmx_load_all_rr(v);
522 migrate_timer(&v->arch.arch_vmx.vtm.vtm_timer, v->processor);
524 /* stolen from hvm_do_resume() in arch/x86/hvm/hvm.c */
525 /* NB. Optimised for common case (p->state == STATE_IOREQ_NONE). */
526 p = &get_vio(v)->vp_ioreq;
527 while (p->state != STATE_IOREQ_NONE) {
528 switch (p->state) {
529 case STATE_IORESP_READY: /* IORESP_READY -> NONE */
530 vmx_io_assist(v);
531 break;
532 case STATE_IOREQ_READY:
533 case STATE_IOREQ_INPROCESS:
534 /* IOREQ_{READY,INPROCESS} -> IORESP_READY */
535 wait_on_xen_event_channel(v->arch.arch_vmx.xen_port,
536 (p->state != STATE_IOREQ_READY) &&
537 (p->state != STATE_IOREQ_INPROCESS));
538 break;
539 default:
540 gdprintk(XENLOG_ERR,
541 "Weird HVM iorequest state %d.\n", p->state);
542 domain_crash_synchronous();
543 }
544 }
545 }