xen-vtx-unstable
changeset 6603:f27205ea60ef
merge?
line diff
7.1 --- a/linux-2.6-xen-sparse/mm/memory.c Sat Sep 03 16:57:54 2005 +0000 7.2 +++ b/linux-2.6-xen-sparse/mm/memory.c Sat Sep 03 16:58:50 2005 +0000 7.3 @@ -1367,20 +1367,15 @@ static int do_wp_page(struct mm_struct * 7.4 struct page *old_page, *new_page; 7.5 unsigned long pfn = pte_pfn(pte); 7.6 pte_t entry; 7.7 + struct page invalid_page; 7.8 7.9 if (unlikely(!pfn_valid(pfn))) { 7.10 - /* 7.11 - * This should really halt the system so it can be debugged or 7.12 - * at least the kernel stops what it's doing before it corrupts 7.13 - * data, but for the moment just pretend this is OOM. 7.14 - */ 7.15 - pte_unmap(page_table); 7.16 - printk(KERN_ERR "do_wp_page: bogus page at address %08lx\n", 7.17 - address); 7.18 - spin_unlock(&mm->page_table_lock); 7.19 - return VM_FAULT_OOM; 7.20 + /* This can happen with /dev/mem (PROT_WRITE, MAP_PRIVATE). */ 7.21 + invalid_page.flags = (1<<PG_reserved) | (1<<PG_locked); 7.22 + old_page = &invalid_page; 7.23 + } else { 7.24 + old_page = pfn_to_page(pfn); 7.25 } 7.26 - old_page = pfn_to_page(pfn); 7.27 7.28 if (!TestSetPageLocked(old_page)) { 7.29 int reuse = can_share_swap_page(old_page); 7.30 @@ -1416,7 +1411,13 @@ static int do_wp_page(struct mm_struct * 7.31 new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); 7.32 if (!new_page) 7.33 goto no_new_page; 7.34 - copy_user_highpage(new_page, old_page, address); 7.35 + if (old_page == &invalid_page) { 7.36 + char *vto = kmap_atomic(new_page, KM_USER1); 7.37 + copy_page(vto, (void *)(address & PAGE_MASK)); 7.38 + kunmap_atomic(vto, KM_USER1); 7.39 + } else { 7.40 + copy_user_highpage(new_page, old_page, address); 7.41 + } 7.42 } 7.43 /* 7.44 * Re-check the pte - we dropped the lock
8.1 --- a/tools/firmware/rombios/rombios.c Sat Sep 03 16:57:54 2005 +0000 8.2 +++ b/tools/firmware/rombios/rombios.c Sat Sep 03 16:58:50 2005 +0000 8.3 @@ -31,7 +31,7 @@ 8.4 8.5 // Xen full virtualization does not handle unaligned IO with page crossing. 8.6 // Disable 32-bit PIO as a workaround. 8.7 -#define NO_PIO32 8.8 +#undef NO_PIO32 8.9 8.10 8.11 // ROM BIOS compatability entry points:
9.1 --- a/tools/firmware/vmxassist/Makefile Sat Sep 03 16:57:54 2005 +0000 9.2 +++ b/tools/firmware/vmxassist/Makefile Sat Sep 03 16:58:50 2005 +0000 9.3 @@ -24,7 +24,7 @@ include $(XEN_ROOT)/tools/Rules.mk 9.4 # The emulator code lives in ROM space 9.5 TEXTADDR=0x000D0000 9.6 9.7 -DEFINES=-DDEBUG -DENABLE_VME -DTEXTADDR=${TEXTADDR} 9.8 +DEFINES=-DDEBUG -DTEXTADDR=${TEXTADDR} 9.9 XENINC=-I$(XEN_ROOT)/xen/include -I$(XEN_ROOT)/tools/libxc 9.10 #DEFINES=-DDEBUG -DTEST -DTEXTADDR=${TEXTADDR} 9.11 #XENINC=-I/home/leendert/xen/xeno-unstable.bk/xen/include
10.1 --- a/tools/firmware/vmxassist/TODO Sat Sep 03 16:57:54 2005 +0000 10.2 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 10.3 @@ -1,8 +0,0 @@ 10.4 - 10.5 -- Use the VME extensions (interrupt handling) 10.6 - 10.7 -- Use E820 map in vmxassist instead of cmos hack 10.8 - 10.9 -- Add ACPI support (Nitin's patch) 10.10 - 10.11 -
11.1 --- a/tools/firmware/vmxassist/setup.c Sat Sep 03 16:57:54 2005 +0000 11.2 +++ b/tools/firmware/vmxassist/setup.c Sat Sep 03 16:58:50 2005 +0000 11.3 @@ -353,7 +353,7 @@ main() 11.4 #endif 11.5 setup_gdt(); 11.6 setup_idt(); 11.7 -#ifdef ENABLE_VME 11.8 +#ifndef TEST 11.9 set_cr4(get_cr4() | CR4_VME); 11.10 #endif 11.11 setup_ctx();
12.1 --- a/tools/firmware/vmxassist/vm86.c Sat Sep 03 16:57:54 2005 +0000 12.2 +++ b/tools/firmware/vmxassist/vm86.c Sat Sep 03 16:58:50 2005 +0000 12.3 @@ -465,8 +465,7 @@ movcr(struct regs *regs, unsigned prefix 12.4 * Emulate a segment load in protected mode 12.5 */ 12.6 int 12.7 -load_seg(unsigned long sel, unsigned long *base, unsigned long *limit, 12.8 - union vmcs_arbytes *arbytes) 12.9 +load_seg(unsigned long sel, u32 *base, u32 *limit, union vmcs_arbytes *arbytes) 12.10 { 12.11 unsigned long long entry; 12.12
13.1 --- a/tools/firmware/vmxassist/vmxloader.c Sat Sep 03 16:57:54 2005 +0000 13.2 +++ b/tools/firmware/vmxassist/vmxloader.c Sat Sep 03 16:58:50 2005 +0000 13.3 @@ -110,8 +110,8 @@ main() 13.4 } 13.5 #ifdef _ACPI_ 13.6 puts("Loading ACPI ...\n"); 13.7 - if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000 ){ 13.8 - /* make sure acpi table does not overlap rombios 13.9 + if (ACPI_PHYSICAL_ADDRESS+sizeof(acpi) <= 0xF0000 ){ 13.10 + /* make sure acpi table does not overlap rombios 13.11 * currently acpi less than 8K will be OK. 13.12 */ 13.13 memcpy((void *)ACPI_PHYSICAL_ADDRESS, acpi, sizeof(acpi)); 13.14 @@ -122,5 +122,6 @@ main() 13.15 memcpy((void *)TEXTADDR, vmxassist, sizeof(vmxassist)); 13.16 puts("Go ...\n"); 13.17 ((void (*)())TEXTADDR)(); 13.18 + return 0; 13.19 } 13.20
14.1 --- a/tools/ioemu/exec.c Sat Sep 03 16:57:54 2005 +0000 14.2 +++ b/tools/ioemu/exec.c Sat Sep 03 16:58:50 2005 +0000 14.3 @@ -142,6 +142,10 @@ void cpu_set_log(int log_flags) 14.4 #else 14.5 setvbuf(logfile, NULL, _IOLBF, 0); 14.6 #endif 14.7 +/* 14.8 + stdout = logfile; 14.9 + stderr = logfile; 14.10 +*/ 14.11 } 14.12 } 14.13 14.14 @@ -386,9 +390,6 @@ void cpu_physical_memory_rw(target_phys_ 14.15 io_mem_write[io_index][1](io_mem_opaque[io_index], addr, val); 14.16 l = 2; 14.17 } else { 14.18 - if (l!=1){ 14.19 - fprintf(logfile, "ERROR 8 bit mmio\n"); 14.20 - } 14.21 /* 8 bit access */ 14.22 val = ldub_raw(buf); 14.23 io_mem_write[io_index][0](io_mem_opaque[io_index], addr, val);
15.1 --- a/tools/ioemu/hw/pcnet.c Sat Sep 03 16:57:54 2005 +0000 15.2 +++ b/tools/ioemu/hw/pcnet.c Sat Sep 03 16:58:50 2005 +0000 15.3 @@ -569,6 +569,10 @@ static void pcnet_transmit(PCNetState *s 15.4 cpu_physical_memory_read(PHYSADDR(s, tmd.tmd0.tbadr), 15.5 s->buffer + s->xmit_pos, 4096 - tmd.tmd1.bcnt); 15.6 s->xmit_pos += 4096 - tmd.tmd1.bcnt; 15.7 + 15.8 + tmd.tmd1.own = 0; 15.9 + TMDSTORE(&tmd, PHYSADDR(s,CSR_CXDA(s))); 15.10 + 15.11 #ifdef PCNET_DEBUG 15.12 printf("pcnet_transmit size=%d\n", s->xmit_pos); 15.13 #endif 15.14 @@ -580,10 +584,10 @@ static void pcnet_transmit(PCNetState *s 15.15 s->csr[0] &= ~0x0008; /* clear TDMD */ 15.16 s->csr[4] |= 0x0004; /* set TXSTRT */ 15.17 s->xmit_pos = -1; 15.18 - } 15.19 - 15.20 - tmd.tmd1.own = 0; 15.21 - TMDSTORE(&tmd, PHYSADDR(s,CSR_CXDA(s))); 15.22 + } else { 15.23 + tmd.tmd1.own = 0; 15.24 + TMDSTORE(&tmd, PHYSADDR(s,CSR_CXDA(s))); 15.25 + } 15.26 if (!CSR_TOKINTD(s) || (CSR_LTINTEN(s) && tmd.tmd1.ltint)) 15.27 s->csr[0] |= 0x0200; /* set TINT */ 15.28
16.1 --- a/tools/ioemu/target-i386-dm/helper2.c Sat Sep 03 16:57:54 2005 +0000 16.2 +++ b/tools/ioemu/target-i386-dm/helper2.c Sat Sep 03 16:58:50 2005 +0000 16.3 @@ -169,133 +169,217 @@ ioreq_t* cpu_get_ioreq(void) 16.4 unsigned long 16.5 do_inp(CPUState *env, unsigned long addr, unsigned long size) 16.6 { 16.7 - switch(size) { 16.8 - case 1: 16.9 - return cpu_inb(env, addr); 16.10 - case 2: 16.11 - return cpu_inw(env, addr); 16.12 - case 4: 16.13 - return cpu_inl(env, addr); 16.14 - default: 16.15 - fprintf(logfile, "inp: bad size: %lx %lx\n", addr, size); 16.16 - exit(-1); 16.17 - } 16.18 + switch(size) { 16.19 + case 1: 16.20 + return cpu_inb(env, addr); 16.21 + case 2: 16.22 + return cpu_inw(env, addr); 16.23 + case 4: 16.24 + return cpu_inl(env, addr); 16.25 + default: 16.26 + fprintf(logfile, "inp: bad size: %lx %lx\n", addr, size); 16.27 + exit(-1); 16.28 + } 16.29 } 16.30 16.31 void 16.32 do_outp(CPUState *env, unsigned long addr, unsigned long size, 16.33 unsigned long val) 16.34 { 16.35 - switch(size) { 16.36 - case 1: 16.37 - return cpu_outb(env, addr, val); 16.38 - case 2: 16.39 - return cpu_outw(env, addr, val); 16.40 - case 4: 16.41 - return cpu_outl(env, addr, val); 16.42 - default: 16.43 - fprintf(logfile, "outp: bad size: %lx %lx\n", addr, size); 16.44 - exit(-1); 16.45 - } 16.46 + switch(size) { 16.47 + case 1: 16.48 + return cpu_outb(env, addr, val); 16.49 + case 2: 16.50 + return cpu_outw(env, addr, val); 16.51 + case 4: 16.52 + return cpu_outl(env, addr, val); 16.53 + default: 16.54 + fprintf(logfile, "outp: bad size: %lx %lx\n", addr, size); 16.55 + exit(-1); 16.56 + } 16.57 } 16.58 16.59 extern void cpu_physical_memory_rw(target_phys_addr_t addr, uint8_t *buf, 16.60 int len, int is_write); 16.61 16.62 static inline void 16.63 -read_physical(target_phys_addr_t addr, unsigned long size, void *val) 16.64 +read_physical(u64 addr, unsigned long size, void *val) 16.65 { 16.66 - return cpu_physical_memory_rw(addr, val, size, 0); 16.67 + return cpu_physical_memory_rw((target_phys_addr_t)addr, val, size, 0); 16.68 } 16.69 16.70 static inline void 16.71 -write_physical(target_phys_addr_t addr, unsigned long size, void *val) 16.72 +write_physical(u64 addr, unsigned long size, void *val) 16.73 { 16.74 - return cpu_physical_memory_rw(addr, val, size, 1); 16.75 + return cpu_physical_memory_rw((target_phys_addr_t)addr, val, size, 1); 16.76 } 16.77 16.78 -//send the ioreq to device model 16.79 -void cpu_dispatch_ioreq(CPUState *env, ioreq_t *req) 16.80 +void 16.81 +cpu_ioreq_pio(CPUState *env, ioreq_t *req) 16.82 { 16.83 - int i; 16.84 - int sign; 16.85 + int i, sign; 16.86 16.87 - sign = (req->df) ? -1 : 1; 16.88 + sign = req->df ? -1 : 1; 16.89 16.90 - if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) { 16.91 - if (req->size != 4) { 16.92 - // Bochs expects higher bits to be 0 16.93 - req->u.data &= (1UL << (8 * req->size))-1; 16.94 - } 16.95 - } 16.96 + if (req->dir == IOREQ_READ) { 16.97 + if (!req->pdata_valid) { 16.98 + req->u.data = do_inp(env, req->addr, req->size); 16.99 + } else { 16.100 + unsigned long tmp; 16.101 16.102 - if (req->port_mm == 0){//port io 16.103 - if(req->dir == IOREQ_READ){//read 16.104 - if (!req->pdata_valid) { 16.105 - req->u.data = do_inp(env, req->addr, req->size); 16.106 - } else { 16.107 - unsigned long tmp; 16.108 - 16.109 - for (i = 0; i < req->count; i++) { 16.110 - tmp = do_inp(env, req->addr, req->size); 16.111 - write_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), 16.112 - req->size, &tmp); 16.113 - } 16.114 + for (i = 0; i < req->count; i++) { 16.115 + tmp = do_inp(env, req->addr, req->size); 16.116 + write_physical((target_phys_addr_t) req->u.pdata 16.117 + + (sign * i * req->size), 16.118 + req->size, &tmp); 16.119 } 16.120 - } else if(req->dir == IOREQ_WRITE) { 16.121 - if (!req->pdata_valid) { 16.122 - do_outp(env, req->addr, req->size, req->u.data); 16.123 - } else { 16.124 - for (i = 0; i < req->count; i++) { 16.125 - unsigned long tmp; 16.126 - 16.127 - read_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), req->size, 16.128 - &tmp); 16.129 - do_outp(env, req->addr, req->size, tmp); 16.130 - } 16.131 - } 16.132 - 16.133 } 16.134 - } else if (req->port_mm == 1){//memory map io 16.135 + } else if (req->dir == IOREQ_WRITE) { 16.136 if (!req->pdata_valid) { 16.137 - //handle stos 16.138 - if(req->dir == IOREQ_READ) { //read 16.139 - for (i = 0; i < req->count; i++) { 16.140 - read_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &req->u.data); 16.141 - } 16.142 - } else if(req->dir == IOREQ_WRITE) { //write 16.143 - for (i = 0; i < req->count; i++) { 16.144 - write_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &req->u.data); 16.145 - } 16.146 - } 16.147 + do_outp(env, req->addr, req->size, req->u.data); 16.148 } else { 16.149 - //handle movs 16.150 - unsigned long tmp; 16.151 - if (req->dir == IOREQ_READ) { 16.152 - for (i = 0; i < req->count; i++) { 16.153 - read_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &tmp); 16.154 - write_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), req->size, &tmp); 16.155 - } 16.156 - } else if (req->dir == IOREQ_WRITE) { 16.157 - for (i = 0; i < req->count; i++) { 16.158 - read_physical((target_phys_addr_t)req->u.pdata + (sign * i * req->size), req->size, &tmp); 16.159 - write_physical((target_phys_addr_t)req->addr + (sign * i * req->size), req->size, &tmp); 16.160 - } 16.161 + for (i = 0; i < req->count; i++) { 16.162 + unsigned long tmp; 16.163 + 16.164 + read_physical((target_phys_addr_t) req->u.pdata 16.165 + + (sign * i * req->size), 16.166 + req->size, &tmp); 16.167 + do_outp(env, req->addr, req->size, tmp); 16.168 } 16.169 } 16.170 } 16.171 - /* No state change if state = STATE_IORESP_HOOK */ 16.172 - if (req->state == STATE_IOREQ_INPROCESS) 16.173 - req->state = STATE_IORESP_READY; 16.174 - env->send_event = 1; 16.175 +} 16.176 + 16.177 +void 16.178 +cpu_ioreq_move(CPUState *env, ioreq_t *req) 16.179 +{ 16.180 + int i, sign; 16.181 + 16.182 + sign = req->df ? -1 : 1; 16.183 + 16.184 + if (!req->pdata_valid) { 16.185 + if (req->dir == IOREQ_READ) { 16.186 + for (i = 0; i < req->count; i++) { 16.187 + read_physical(req->addr 16.188 + + (sign * i * req->size), 16.189 + req->size, &req->u.data); 16.190 + } 16.191 + } else if (req->dir == IOREQ_WRITE) { 16.192 + for (i = 0; i < req->count; i++) { 16.193 + write_physical(req->addr 16.194 + + (sign * i * req->size), 16.195 + req->size, &req->u.data); 16.196 + } 16.197 + } 16.198 + } else { 16.199 + unsigned long tmp; 16.200 + 16.201 + if (req->dir == IOREQ_READ) { 16.202 + for (i = 0; i < req->count; i++) { 16.203 + read_physical(req->addr 16.204 + + (sign * i * req->size), 16.205 + req->size, &tmp); 16.206 + write_physical((target_phys_addr_t )req->u.pdata 16.207 + + (sign * i * req->size), 16.208 + req->size, &tmp); 16.209 + } 16.210 + } else if (req->dir == IOREQ_WRITE) { 16.211 + for (i = 0; i < req->count; i++) { 16.212 + read_physical((target_phys_addr_t) req->u.pdata 16.213 + + (sign * i * req->size), 16.214 + req->size, &tmp); 16.215 + write_physical(req->addr 16.216 + + (sign * i * req->size), 16.217 + req->size, &tmp); 16.218 + } 16.219 + } 16.220 + } 16.221 +} 16.222 + 16.223 +void 16.224 +cpu_ioreq_and(CPUState *env, ioreq_t *req) 16.225 +{ 16.226 + unsigned long tmp1, tmp2; 16.227 + 16.228 + if (req->pdata_valid != 0) 16.229 + hw_error("expected scalar value"); 16.230 + 16.231 + read_physical(req->addr, req->size, &tmp1); 16.232 + if (req->dir == IOREQ_WRITE) { 16.233 + tmp2 = tmp1 & (unsigned long) req->u.data; 16.234 + write_physical(req->addr, req->size, &tmp2); 16.235 + } 16.236 + req->u.data = tmp1; 16.237 +} 16.238 + 16.239 +void 16.240 +cpu_ioreq_or(CPUState *env, ioreq_t *req) 16.241 +{ 16.242 + unsigned long tmp1, tmp2; 16.243 + 16.244 + if (req->pdata_valid != 0) 16.245 + hw_error("expected scalar value"); 16.246 + 16.247 + read_physical(req->addr, req->size, &tmp1); 16.248 + if (req->dir == IOREQ_WRITE) { 16.249 + tmp2 = tmp1 | (unsigned long) req->u.data; 16.250 + write_physical(req->addr, req->size, &tmp2); 16.251 + } 16.252 + req->u.data = tmp1; 16.253 +} 16.254 + 16.255 +void 16.256 +cpu_ioreq_xor(CPUState *env, ioreq_t *req) 16.257 +{ 16.258 + unsigned long tmp1, tmp2; 16.259 + 16.260 + if (req->pdata_valid != 0) 16.261 + hw_error("expected scalar value"); 16.262 + 16.263 + read_physical(req->addr, req->size, &tmp1); 16.264 + if (req->dir == IOREQ_WRITE) { 16.265 + tmp2 = tmp1 ^ (unsigned long) req->u.data; 16.266 + write_physical(req->addr, req->size, &tmp2); 16.267 + } 16.268 + req->u.data = tmp1; 16.269 } 16.270 16.271 void 16.272 cpu_handle_ioreq(CPUState *env) 16.273 { 16.274 ioreq_t *req = cpu_get_ioreq(); 16.275 - if (req) 16.276 - cpu_dispatch_ioreq(env, req); 16.277 + 16.278 + if (req) { 16.279 + if ((!req->pdata_valid) && (req->dir == IOREQ_WRITE)) { 16.280 + if (req->size != 4) 16.281 + req->u.data &= (1UL << (8 * req->size))-1; 16.282 + } 16.283 + 16.284 + switch (req->type) { 16.285 + case IOREQ_TYPE_PIO: 16.286 + cpu_ioreq_pio(env, req); 16.287 + break; 16.288 + case IOREQ_TYPE_COPY: 16.289 + cpu_ioreq_move(env, req); 16.290 + break; 16.291 + case IOREQ_TYPE_AND: 16.292 + cpu_ioreq_and(env, req); 16.293 + break; 16.294 + case IOREQ_TYPE_OR: 16.295 + cpu_ioreq_or(env, req); 16.296 + break; 16.297 + case IOREQ_TYPE_XOR: 16.298 + cpu_ioreq_xor(env, req); 16.299 + break; 16.300 + default: 16.301 + hw_error("Invalid ioreq type 0x%x", req->type); 16.302 + } 16.303 + 16.304 + /* No state change if state = STATE_IORESP_HOOK */ 16.305 + if (req->state == STATE_IOREQ_INPROCESS) 16.306 + req->state = STATE_IORESP_READY; 16.307 + env->send_event = 1; 16.308 + } 16.309 } 16.310 16.311 void 16.312 @@ -321,7 +405,7 @@ do_interrupt(CPUState *env, int vector) 16.313 16.314 // Send a message on the event channel. Add the vector to the shared mem 16.315 // page. 16.316 - intr = &(shared_page->sp_global.pic_intr[0]); 16.317 + intr = (unsigned long *) &(shared_page->sp_global.pic_intr[0]); 16.318 atomic_set_bit(vector, intr); 16.319 if (loglevel & CPU_LOG_INT) 16.320 fprintf(logfile, "injecting vector: %x\n", vector);
17.1 --- a/tools/ioemu/vl.c Sat Sep 03 16:57:54 2005 +0000 17.2 +++ b/tools/ioemu/vl.c Sat Sep 03 16:58:50 2005 +0000 17.3 @@ -413,6 +413,11 @@ void hw_error(const char *fmt, ...) 17.4 fprintf(stderr, "qemu: hardware error: "); 17.5 vfprintf(stderr, fmt, ap); 17.6 fprintf(stderr, "\n"); 17.7 + if (logfile) { 17.8 + fprintf(logfile, "qemu: hardware error: "); 17.9 + vfprintf(logfile, fmt, ap); 17.10 + fprintf(logfile, "\n"); 17.11 + } 17.12 va_end(ap); 17.13 abort(); 17.14 }
18.1 --- a/tools/libxc/xc_linux_save.c Sat Sep 03 16:57:54 2005 +0000 18.2 +++ b/tools/libxc/xc_linux_save.c Sat Sep 03 16:58:50 2005 +0000 18.3 @@ -21,6 +21,24 @@ 18.4 18.5 #define MAX_MBIT_RATE 500 18.6 18.7 + 18.8 +/* 18.9 +** Default values for important tuning parameters. Can override by passing 18.10 +** non-zero replacement values to xc_linux_save(). 18.11 +** 18.12 +** XXX SMH: should consider if want to be able to override MAX_MBIT_RATE too. 18.13 +** 18.14 +*/ 18.15 +#define DEF_MAX_ITERS 29 /* limit us to 30 times round loop */ 18.16 +#define DEF_MAX_FACTOR 3 /* never send more than 3x nr_pfns */ 18.17 + 18.18 + 18.19 + 18.20 +/* Flags to control behaviour of xc_linux_save */ 18.21 +#define XCFLAGS_LIVE 1 18.22 +#define XCFLAGS_DEBUG 2 18.23 + 18.24 + 18.25 #define DEBUG 0 18.26 18.27 #if 1 18.28 @@ -320,18 +338,18 @@ static int suspend_and_state(int xc_hand 18.29 xc_dominfo_t *info, 18.30 vcpu_guest_context_t *ctxt) 18.31 { 18.32 - int i=0; 18.33 + int i = 0; 18.34 char ans[30]; 18.35 18.36 printf("suspend\n"); 18.37 fflush(stdout); 18.38 if (fgets(ans, sizeof(ans), stdin) == NULL) { 18.39 - ERR("failed reading suspend reply"); 18.40 - return -1; 18.41 + ERR("failed reading suspend reply"); 18.42 + return -1; 18.43 } 18.44 if (strncmp(ans, "done\n", 5)) { 18.45 - ERR("suspend reply incorrect: %s", ans); 18.46 - return -1; 18.47 + ERR("suspend reply incorrect: %s", ans); 18.48 + return -1; 18.49 } 18.50 18.51 retry: 18.52 @@ -377,20 +395,17 @@ retry: 18.53 return -1; 18.54 } 18.55 18.56 -int xc_linux_save(int xc_handle, int io_fd, u32 dom) 18.57 +int xc_linux_save(int xc_handle, int io_fd, u32 dom, u32 max_iters, 18.58 + u32 max_factor, u32 flags) 18.59 { 18.60 xc_dominfo_t info; 18.61 18.62 int rc = 1, i, j, k, last_iter, iter = 0; 18.63 unsigned long mfn; 18.64 - int live = 0; // (ioctxt->flags & XCFLAGS_LIVE); 18.65 - int debug = 0; // (ioctxt->flags & XCFLAGS_DEBUG); 18.66 + int live = (flags & XCFLAGS_LIVE); 18.67 + int debug = (flags & XCFLAGS_DEBUG); 18.68 int sent_last_iter, skip_this_iter; 18.69 18.70 - /* Important tuning parameters */ 18.71 - int max_iters = 29; /* limit us to 30 times round loop */ 18.72 - int max_factor = 3; /* never send more than 3x nr_pfns */ 18.73 - 18.74 /* The new domain's shared-info frame number. */ 18.75 unsigned long shared_info_frame; 18.76 18.77 @@ -442,8 +457,16 @@ int xc_linux_save(int xc_handle, int io_ 18.78 18.79 MBIT_RATE = START_MBIT_RATE; 18.80 18.81 - DPRINTF("xc_linux_save start %d\n", dom); 18.82 - 18.83 + 18.84 + /* If no explicit control parameters given, use defaults */ 18.85 + if(!max_iters) 18.86 + max_iters = DEF_MAX_ITERS; 18.87 + if(!max_factor) 18.88 + max_factor = DEF_MAX_FACTOR; 18.89 + 18.90 + 18.91 + DPRINTF("xc_linux_save start DOM%u live=%s\n", dom, live?"true":"false"); 18.92 + 18.93 if (mlock(&ctxt, sizeof(ctxt))) { 18.94 ERR("Unable to mlock ctxt"); 18.95 return 1;
19.1 --- a/tools/libxc/xenguest.h Sat Sep 03 16:57:54 2005 +0000 19.2 +++ b/tools/libxc/xenguest.h Sat Sep 03 16:58:50 2005 +0000 19.3 @@ -6,13 +6,12 @@ 19.4 * Copyright (c) 2003-2004, K A Fraser. 19.5 */ 19.6 19.7 -#ifndef XENBUILD_H 19.8 -#define XENBUILD_H 19.9 +#ifndef XENGUEST_H 19.10 +#define XENGUEST_H 19.11 19.12 -#define XCFLAGS_VERBOSE 1 19.13 -#define XCFLAGS_LIVE 2 19.14 -#define XCFLAGS_DEBUG 4 19.15 -#define XCFLAGS_CONFIGURE 8 19.16 +#define XCFLAGS_LIVE 1 19.17 +#define XCFLAGS_DEBUG 2 19.18 + 19.19 19.20 /** 19.21 * This function will save a domain running Linux. 19.22 @@ -22,7 +21,8 @@ 19.23 * @parm dom the id of the domain 19.24 * @return 0 on success, -1 on failure 19.25 */ 19.26 -int xc_linux_save(int xc_handle, int fd, uint32_t dom); 19.27 +int xc_linux_save(int xc_handle, int fd, uint32_t dom, uint32_t max_iters, 19.28 + uint32_t max_factor, uint32_t flags /* XCFLAGS_xxx */); 19.29 19.30 /** 19.31 * This function will restore a saved domain running Linux. 19.32 @@ -35,8 +35,9 @@ int xc_linux_save(int xc_handle, int fd, 19.33 * @parm store_mfn returned with the mfn of the store page 19.34 * @return 0 on success, -1 on failure 19.35 */ 19.36 -int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, unsigned long nr_pfns, 19.37 - unsigned int store_evtchn, unsigned long *store_mfn); 19.38 +int xc_linux_restore(int xc_handle, int io_fd, uint32_t dom, 19.39 + unsigned long nr_pfns, unsigned int store_evtchn, 19.40 + unsigned long *store_mfn); 19.41 19.42 int xc_linux_build(int xc_handle, 19.43 uint32_t domid, 19.44 @@ -65,4 +66,4 @@ int xc_vmx_build(int xc_handle, 19.45 unsigned int store_evtchn, 19.46 unsigned long *store_mfn); 19.47 19.48 -#endif 19.49 +#endif // XENGUEST_H
20.1 --- a/tools/python/xen/xend/XendCheckpoint.py Sat Sep 03 16:57:54 2005 +0000 20.2 +++ b/tools/python/xen/xend/XendCheckpoint.py Sat Sep 03 16:58:50 2005 +0000 20.3 @@ -34,7 +34,7 @@ def read_exact(fd, size, errmsg): 20.4 raise XendError(errmsg) 20.5 return buf 20.6 20.7 -def save(xd, fd, dominfo): 20.8 +def save(xd, fd, dominfo, live): 20.9 write_exact(fd, SIGNATURE, "could not write guest state file: signature") 20.10 20.11 config = sxp.to_string(dominfo.sxpr()) 20.12 @@ -42,8 +42,13 @@ def save(xd, fd, dominfo): 20.13 "could not write guest state file: config len") 20.14 write_exact(fd, config, "could not write guest state file: config") 20.15 20.16 + # xc_save takes three customization parameters: maxit, max_f, and flags 20.17 + # the last controls whether or not save is 'live', while the first two 20.18 + # further customize behaviour when 'live' save is enabled. Passing "0" 20.19 + # simply uses the defaults compiled into libxenguest; see the comments 20.20 + # and/or code in xc_linux_save() for more information. 20.21 cmd = [PATH_XC_SAVE, str(xc.handle()), str(fd), 20.22 - str(dominfo.id)] 20.23 + str(dominfo.id), "0", "0", str(live) ] 20.24 log.info("[xc_save] " + join(cmd)) 20.25 child = xPopen3(cmd, True, -1, [fd, xc.handle()]) 20.26
21.1 --- a/tools/python/xen/xend/XendDomain.py Sat Sep 03 16:57:54 2005 +0000 21.2 +++ b/tools/python/xen/xend/XendDomain.py Sat Sep 03 16:58:50 2005 +0000 21.3 @@ -542,7 +542,7 @@ class XendDomain: 21.4 dominfo.name = "tmp-" + dominfo.name 21.5 21.6 try: 21.7 - XendCheckpoint.save(self, sock.fileno(), dominfo) 21.8 + XendCheckpoint.save(self, sock.fileno(), dominfo, live) 21.9 except: 21.10 if dst == "localhost": 21.11 dominfo.name = string.replace(dominfo.name, "tmp-", "", 1) 21.12 @@ -563,7 +563,8 @@ class XendDomain: 21.13 21.14 fd = os.open(dst, os.O_WRONLY | os.O_CREAT | os.O_TRUNC) 21.15 21.16 - return XendCheckpoint.save(self, fd, dominfo) 21.17 + # For now we don't support 'live checkpoint' 21.18 + return XendCheckpoint.save(self, fd, dominfo, False) 21.19 21.20 except OSError, ex: 21.21 raise XendError("can't write guest state file %s: %s" %
22.1 --- a/tools/xcutils/xc_save.c Sat Sep 03 16:57:54 2005 +0000 22.2 +++ b/tools/xcutils/xc_save.c Sat Sep 03 16:58:50 2005 +0000 22.3 @@ -17,14 +17,17 @@ 22.4 int 22.5 main(int argc, char **argv) 22.6 { 22.7 - unsigned int xc_fd, io_fd, domid; 22.8 + unsigned int xc_fd, io_fd, domid, maxit, max_f, flags; 22.9 22.10 - if (argc != 4) 22.11 - errx(1, "usage: %s xcfd iofd domid", argv[0]); 22.12 + if (argc != 7) 22.13 + errx(1, "usage: %s xcfd iofd domid maxit maxf flags", argv[0]); 22.14 22.15 xc_fd = atoi(argv[1]); 22.16 io_fd = atoi(argv[2]); 22.17 domid = atoi(argv[3]); 22.18 + maxit = atoi(argv[4]); 22.19 + max_f = atoi(argv[5]); 22.20 + flags = atoi(argv[6]); 22.21 22.22 - return xc_linux_save(xc_fd, io_fd, domid); 22.23 + return xc_linux_save(xc_fd, io_fd, domid, maxit, max_f, flags); 22.24 }
28.1 --- a/xen/arch/x86/shadow.c Sat Sep 03 16:57:54 2005 +0000 28.2 +++ b/xen/arch/x86/shadow.c Sat Sep 03 16:58:50 2005 +0000 28.3 @@ -531,7 +531,7 @@ static void shadow_map_l1_into_current_l 28.4 int i, init_table = 0; 28.5 28.6 __guest_get_l2e(v, va, &gl2e); 28.7 - ASSERT(l2e_get_flags(gl2e) & _PAGE_PRESENT); 28.8 + ASSERT(guest_l2e_get_flags(gl2e) & _PAGE_PRESENT); 28.9 gl1pfn = l2e_get_pfn(gl2e); 28.10 28.11 if ( !(sl1mfn = __shadow_status(d, gl1pfn, PGT_l1_shadow)) ) 28.12 @@ -1664,7 +1664,7 @@ static inline int l1pte_write_fault( 28.13 return 0; 28.14 } 28.15 28.16 - ASSERT(l1e_get_flags(gpte) & _PAGE_RW); 28.17 + ASSERT(guest_l1e_get_flags(gpte) & _PAGE_RW); 28.18 guest_l1e_add_flags(gpte, _PAGE_DIRTY | _PAGE_ACCESSED); 28.19 spte = l1e_from_pfn(gmfn, guest_l1e_get_flags(gpte) & ~_PAGE_GLOBAL); 28.20
29.1 --- a/xen/arch/x86/vmx.c Sat Sep 03 16:57:54 2005 +0000 29.2 +++ b/xen/arch/x86/vmx.c Sat Sep 03 16:58:50 2005 +0000 29.3 @@ -602,16 +602,67 @@ static int check_for_null_selector(unsig 29.4 return 0; 29.5 } 29.6 29.7 +void send_pio_req(struct cpu_user_regs *regs, unsigned long port, 29.8 + unsigned long count, int size, long value, int dir, int pvalid) 29.9 +{ 29.10 + struct vcpu *v = current; 29.11 + vcpu_iodata_t *vio; 29.12 + ioreq_t *p; 29.13 + 29.14 + vio = get_vio(v->domain, v->vcpu_id); 29.15 + if (vio == NULL) { 29.16 + printk("bad shared page: %lx\n", (unsigned long) vio); 29.17 + domain_crash_synchronous(); 29.18 + } 29.19 + 29.20 + if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) { 29.21 + printf("VMX I/O has not yet completed\n"); 29.22 + domain_crash_synchronous(); 29.23 + } 29.24 + set_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); 29.25 + 29.26 + p = &vio->vp_ioreq; 29.27 + p->dir = dir; 29.28 + p->pdata_valid = pvalid; 29.29 + 29.30 + p->type = IOREQ_TYPE_PIO; 29.31 + p->size = size; 29.32 + p->addr = port; 29.33 + p->count = count; 29.34 + p->df = regs->eflags & EF_DF ? 1 : 0; 29.35 + 29.36 + if (pvalid) { 29.37 + if (vmx_paging_enabled(current)) 29.38 + p->u.pdata = (void *) gva_to_gpa(value); 29.39 + else 29.40 + p->u.pdata = (void *) value; /* guest VA == guest PA */ 29.41 + } else 29.42 + p->u.data = value; 29.43 + 29.44 + p->state = STATE_IOREQ_READY; 29.45 + 29.46 + if (vmx_portio_intercept(p)) { 29.47 + /* no blocking & no evtchn notification */ 29.48 + clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); 29.49 + return; 29.50 + } 29.51 + 29.52 + evtchn_send(iopacket_port(v->domain)); 29.53 + vmx_wait_io(); 29.54 +} 29.55 + 29.56 static void vmx_io_instruction(struct cpu_user_regs *regs, 29.57 unsigned long exit_qualification, unsigned long inst_len) 29.58 { 29.59 - struct vcpu *d = current; 29.60 - vcpu_iodata_t *vio; 29.61 - ioreq_t *p; 29.62 - unsigned long addr; 29.63 + struct mi_per_cpu_info *mpcip; 29.64 unsigned long eip, cs, eflags; 29.65 + unsigned long port, size, dir; 29.66 int vm86; 29.67 29.68 + mpcip = ¤t->domain->arch.vmx_platform.mpci; 29.69 + mpcip->instr = INSTR_PIO; 29.70 + mpcip->flags = 0; 29.71 + 29.72 __vmread(GUEST_RIP, &eip); 29.73 __vmread(GUEST_CS_SELECTOR, &cs); 29.74 __vmread(GUEST_RFLAGS, &eflags); 29.75 @@ -623,105 +674,87 @@ static void vmx_io_instruction(struct cp 29.76 vm86, cs, eip, exit_qualification); 29.77 29.78 if (test_bit(6, &exit_qualification)) 29.79 - addr = (exit_qualification >> 16) & (0xffff); 29.80 + port = (exit_qualification >> 16) & 0xFFFF; 29.81 else 29.82 - addr = regs->edx & 0xffff; 29.83 - TRACE_VMEXIT (2,addr); 29.84 - 29.85 - vio = get_vio(d->domain, d->vcpu_id); 29.86 - if (vio == 0) { 29.87 - printk("bad shared page: %lx", (unsigned long) vio); 29.88 - domain_crash_synchronous(); 29.89 - } 29.90 - p = &vio->vp_ioreq; 29.91 - p->dir = test_bit(3, &exit_qualification); /* direction */ 29.92 - 29.93 - p->pdata_valid = 0; 29.94 - p->count = 1; 29.95 - p->size = (exit_qualification & 7) + 1; 29.96 + port = regs->edx & 0xffff; 29.97 + TRACE_VMEXIT(2, port); 29.98 + size = (exit_qualification & 7) + 1; 29.99 + dir = test_bit(3, &exit_qualification); /* direction */ 29.100 29.101 if (test_bit(4, &exit_qualification)) { /* string instruction */ 29.102 - unsigned long laddr; 29.103 + unsigned long addr, count = 1; 29.104 + int sign = regs->eflags & EF_DF ? -1 : 1; 29.105 29.106 - __vmread(GUEST_LINEAR_ADDRESS, &laddr); 29.107 + __vmread(GUEST_LINEAR_ADDRESS, &addr); 29.108 + 29.109 /* 29.110 * In protected mode, guest linear address is invalid if the 29.111 * selector is null. 29.112 */ 29.113 - if (!vm86 && check_for_null_selector(eip)) { 29.114 - laddr = (p->dir == IOREQ_WRITE) ? regs->esi : regs->edi; 29.115 - } 29.116 - p->pdata_valid = 1; 29.117 + if (!vm86 && check_for_null_selector(eip)) 29.118 + addr = dir == IOREQ_WRITE ? regs->esi : regs->edi; 29.119 29.120 - p->u.data = laddr; 29.121 - if (vmx_paging_enabled(d)) 29.122 - p->u.pdata = (void *) gva_to_gpa(p->u.data); 29.123 - p->df = (eflags & X86_EFLAGS_DF) ? 1 : 0; 29.124 + if (test_bit(5, &exit_qualification)) { /* "rep" prefix */ 29.125 + mpcip->flags |= REPZ; 29.126 + count = vm86 ? regs->ecx & 0xFFFF : regs->ecx; 29.127 + } 29.128 29.129 - if (test_bit(5, &exit_qualification)) /* "rep" prefix */ 29.130 - p->count = vm86 ? regs->ecx & 0xFFFF : regs->ecx; 29.131 + /* 29.132 + * Handle string pio instructions that cross pages or that 29.133 + * are unaligned. See the comments in vmx_platform.c/handle_mmio() 29.134 + */ 29.135 + if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) { 29.136 + unsigned long value = 0; 29.137 29.138 - /* 29.139 - * Split up string I/O operations that cross page boundaries. Don't 29.140 - * advance %eip so that "rep insb" will restart at the next page. 29.141 - */ 29.142 - if ((p->u.data & PAGE_MASK) != 29.143 - ((p->u.data + p->count * p->size - 1) & PAGE_MASK)) { 29.144 - VMX_DBG_LOG(DBG_LEVEL_2, 29.145 - "String I/O crosses page boundary (cs:eip=0x%lx:0x%lx)\n", 29.146 - cs, eip); 29.147 - if (p->u.data & (p->size - 1)) { 29.148 - printf("Unaligned string I/O operation (cs:eip=0x%lx:0x%lx)\n", 29.149 - cs, eip); 29.150 - domain_crash_synchronous(); 29.151 - } 29.152 - p->count = (PAGE_SIZE - (p->u.data & ~PAGE_MASK)) / p->size; 29.153 - } else { 29.154 - __update_guest_eip(inst_len); 29.155 - } 29.156 - } else if (p->dir == IOREQ_WRITE) { 29.157 - p->u.data = regs->eax; 29.158 + mpcip->flags |= OVERLAP; 29.159 + if (dir == IOREQ_WRITE) 29.160 + vmx_copy(&value, addr, size, VMX_COPY_IN); 29.161 + send_pio_req(regs, port, 1, size, value, dir, 0); 29.162 + } else { 29.163 + if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) { 29.164 + if (sign > 0) 29.165 + count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size; 29.166 + else 29.167 + count = (addr & ~PAGE_MASK) / size; 29.168 + } else 29.169 + __update_guest_eip(inst_len); 29.170 + 29.171 + send_pio_req(regs, port, count, size, addr, dir, 1); 29.172 + } 29.173 + } else { 29.174 __update_guest_eip(inst_len); 29.175 - } else 29.176 - __update_guest_eip(inst_len); 29.177 - 29.178 - p->addr = addr; 29.179 - p->port_mm = 0; 29.180 - 29.181 - /* Check if the packet needs to be intercepted */ 29.182 - if (vmx_portio_intercept(p)) 29.183 - /* no blocking & no evtchn notification */ 29.184 - return; 29.185 - 29.186 - set_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags); 29.187 - p->state = STATE_IOREQ_READY; 29.188 - evtchn_send(iopacket_port(d->domain)); 29.189 - vmx_wait_io(); 29.190 + send_pio_req(regs, port, 1, size, regs->eax, dir, 0); 29.191 + } 29.192 } 29.193 29.194 -enum { COPY_IN = 0, COPY_OUT }; 29.195 - 29.196 -static inline int 29.197 +int 29.198 vmx_copy(void *buf, unsigned long laddr, int size, int dir) 29.199 { 29.200 + unsigned long mfn; 29.201 char *addr; 29.202 - unsigned long mfn; 29.203 + int count; 29.204 + 29.205 + while (size > 0) { 29.206 + count = PAGE_SIZE - (laddr & ~PAGE_MASK); 29.207 + if (count > size) 29.208 + count = size; 29.209 29.210 - if ( (size + (laddr & (PAGE_SIZE - 1))) >= PAGE_SIZE ) 29.211 - { 29.212 - printf("vmx_copy exceeds page boundary\n"); 29.213 - return 0; 29.214 + mfn = get_mfn_from_pfn(laddr >> PAGE_SHIFT); 29.215 + /* XXX check whether laddr is valid */ 29.216 + addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK); 29.217 + 29.218 + if (dir == VMX_COPY_IN) 29.219 + memcpy(buf, addr, count); 29.220 + else 29.221 + memcpy(addr, buf, count); 29.222 + 29.223 + unmap_domain_page(addr); 29.224 + 29.225 + laddr += count; 29.226 + buf += count; 29.227 + size -= count; 29.228 } 29.229 29.230 - mfn = get_mfn_from_pfn(laddr >> PAGE_SHIFT); 29.231 - addr = (char *)map_domain_page(mfn) + (laddr & ~PAGE_MASK); 29.232 - 29.233 - if (dir == COPY_IN) 29.234 - memcpy(buf, addr, size); 29.235 - else 29.236 - memcpy(addr, buf, size); 29.237 - 29.238 - unmap_domain_page(addr); 29.239 return 1; 29.240 } 29.241 29.242 @@ -908,7 +941,7 @@ vmx_assist(struct vcpu *d, int mode) 29.243 u32 cp; 29.244 29.245 /* make sure vmxassist exists (this is not an error) */ 29.246 - if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), COPY_IN)) 29.247 + if (!vmx_copy(&magic, VMXASSIST_MAGIC_OFFSET, sizeof(magic), VMX_COPY_IN)) 29.248 return 0; 29.249 if (magic != VMXASSIST_MAGIC) 29.250 return 0; 29.251 @@ -922,20 +955,20 @@ vmx_assist(struct vcpu *d, int mode) 29.252 */ 29.253 case VMX_ASSIST_INVOKE: 29.254 /* save the old context */ 29.255 - if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), COPY_IN)) 29.256 + if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN)) 29.257 goto error; 29.258 if (cp != 0) { 29.259 if (!vmx_world_save(d, &c)) 29.260 goto error; 29.261 - if (!vmx_copy(&c, cp, sizeof(c), COPY_OUT)) 29.262 + if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_OUT)) 29.263 goto error; 29.264 } 29.265 29.266 /* restore the new context, this should activate vmxassist */ 29.267 - if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), COPY_IN)) 29.268 + if (!vmx_copy(&cp, VMXASSIST_NEW_CONTEXT, sizeof(cp), VMX_COPY_IN)) 29.269 goto error; 29.270 if (cp != 0) { 29.271 - if (!vmx_copy(&c, cp, sizeof(c), COPY_IN)) 29.272 + if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN)) 29.273 goto error; 29.274 if (!vmx_world_restore(d, &c)) 29.275 goto error; 29.276 @@ -949,10 +982,10 @@ vmx_assist(struct vcpu *d, int mode) 29.277 */ 29.278 case VMX_ASSIST_RESTORE: 29.279 /* save the old context */ 29.280 - if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), COPY_IN)) 29.281 + if (!vmx_copy(&cp, VMXASSIST_OLD_CONTEXT, sizeof(cp), VMX_COPY_IN)) 29.282 goto error; 29.283 if (cp != 0) { 29.284 - if (!vmx_copy(&c, cp, sizeof(c), COPY_IN)) 29.285 + if (!vmx_copy(&c, cp, sizeof(c), VMX_COPY_IN)) 29.286 goto error; 29.287 if (!vmx_world_restore(d, &c)) 29.288 goto error; 29.289 @@ -1554,15 +1587,18 @@ asmlinkage void vmx_vmexit_handler(struc 29.290 29.291 __vmread(IDT_VECTORING_INFO_FIELD, &idtv_info_field); 29.292 if (idtv_info_field & INTR_INFO_VALID_MASK) { 29.293 - if ((idtv_info_field & 0x0700) != 0x400) { /* exclude soft ints */ 29.294 - __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); 29.295 + __vmwrite(VM_ENTRY_INTR_INFO_FIELD, idtv_info_field); 29.296 + 29.297 + __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len); 29.298 + if (inst_len >= 1 && inst_len <= 15) 29.299 + __vmwrite(VM_ENTRY_INSTRUCTION_LEN, inst_len); 29.300 29.301 - if (idtv_info_field & 0x800) { /* valid error code */ 29.302 - unsigned long error_code; 29.303 - __vmread(VM_EXIT_INTR_ERROR_CODE, &error_code); 29.304 - __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); 29.305 - } 29.306 - } 29.307 + if (idtv_info_field & 0x800) { /* valid error code */ 29.308 + unsigned long error_code; 29.309 + __vmread(IDT_VECTORING_ERROR_CODE, &error_code); 29.310 + __vmwrite(VM_ENTRY_EXCEPTION_ERROR_CODE, error_code); 29.311 + } 29.312 + 29.313 VMX_DBG_LOG(DBG_LEVEL_1, "idtv_info_field=%x", idtv_info_field); 29.314 } 29.315
30.1 --- a/xen/arch/x86/vmx_intercept.c Sat Sep 03 16:57:54 2005 +0000 30.2 +++ b/xen/arch/x86/vmx_intercept.c Sat Sep 03 16:58:50 2005 +0000 30.3 @@ -172,7 +172,7 @@ int intercept_pit_io(ioreq_t *p) 30.4 30.5 if (p->size != 1 || 30.6 p->pdata_valid || 30.7 - p->port_mm) 30.8 + p->type != IOREQ_TYPE_PIO) 30.9 return 0; 30.10 30.11 if (p->addr == PIT_MODE && 30.12 @@ -284,7 +284,5 @@ void vmx_hooks_assist(struct vcpu *d) 30.13 if (!reinit) 30.14 register_portio_handler(0x40, 4, intercept_pit_io); 30.15 } 30.16 - 30.17 } 30.18 - 30.19 #endif /* CONFIG_VMX */
31.1 --- a/xen/arch/x86/vmx_io.c Sat Sep 03 16:57:54 2005 +0000 31.2 +++ b/xen/arch/x86/vmx_io.c Sat Sep 03 16:58:50 2005 +0000 31.3 @@ -33,6 +33,7 @@ 31.4 #include <asm/vmx_platform.h> 31.5 #include <asm/vmx_virpit.h> 31.6 #include <asm/apic.h> 31.7 +#include <asm/shadow.h> 31.8 31.9 #include <public/io/ioreq.h> 31.10 #include <public/io/vmx_vlapic.h> 31.11 @@ -123,7 +124,6 @@ static void set_reg_value (int size, int 31.12 regs->esp &= 0xFFFF0000; 31.13 regs->esp |= (value & 0xFFFF); 31.14 break; 31.15 - 31.16 case 5: 31.17 regs->ebp &= 0xFFFF0000; 31.18 regs->ebp |= (value & 0xFFFF); 31.19 @@ -207,7 +207,6 @@ static inline void __set_reg_value(unsig 31.20 *reg &= ~0xFFFF; 31.21 *reg |= (value & 0xFFFF); 31.22 break; 31.23 - 31.24 case LONG: 31.25 *reg &= ~0xFFFFFFFF; 31.26 *reg |= (value & 0xFFFFFFFF); 31.27 @@ -322,13 +321,319 @@ static void set_reg_value (int size, int 31.28 } 31.29 #endif 31.30 31.31 +extern long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs); 31.32 + 31.33 +static inline void set_eflags_CF(int size, unsigned long v1, 31.34 + unsigned long v2, struct cpu_user_regs *regs) 31.35 +{ 31.36 + unsigned long mask = (1 << (8 * size)) - 1; 31.37 + 31.38 + if ((v1 & mask) > (v2 & mask)) 31.39 + regs->eflags |= X86_EFLAGS_CF; 31.40 + else 31.41 + regs->eflags &= ~X86_EFLAGS_CF; 31.42 +} 31.43 + 31.44 +static inline void set_eflags_OF(int size, unsigned long v1, 31.45 + unsigned long v2, unsigned long v3, struct cpu_user_regs *regs) 31.46 +{ 31.47 + if ((v3 ^ v2) & (v3 ^ v1) & (1 << ((8 * size) - 1))) 31.48 + regs->eflags |= X86_EFLAGS_OF; 31.49 +} 31.50 + 31.51 +static inline void set_eflags_AF(int size, unsigned long v1, 31.52 + unsigned long v2, unsigned long v3, struct cpu_user_regs *regs) 31.53 +{ 31.54 + if ((v1 ^ v2 ^ v3) & 0x10) 31.55 + regs->eflags |= X86_EFLAGS_AF; 31.56 +} 31.57 + 31.58 +static inline void set_eflags_ZF(int size, unsigned long v1, 31.59 + struct cpu_user_regs *regs) 31.60 +{ 31.61 + unsigned long mask = (1 << (8 * size)) - 1; 31.62 + 31.63 + if ((v1 & mask) == 0) 31.64 + regs->eflags |= X86_EFLAGS_ZF; 31.65 +} 31.66 + 31.67 +static inline void set_eflags_SF(int size, unsigned long v1, 31.68 + struct cpu_user_regs *regs) 31.69 +{ 31.70 + if (v1 & (1 << ((8 * size) - 1))) 31.71 + regs->eflags |= X86_EFLAGS_SF; 31.72 +} 31.73 + 31.74 +static char parity_table[256] = { 31.75 + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 31.76 + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 31.77 + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 31.78 + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 31.79 + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 31.80 + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 31.81 + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 31.82 + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 31.83 + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 31.84 + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 31.85 + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 31.86 + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 31.87 + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1, 31.88 + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 31.89 + 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 31.90 + 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 0, 1 31.91 +}; 31.92 + 31.93 +static inline void set_eflags_PF(int size, unsigned long v1, 31.94 + struct cpu_user_regs *regs) 31.95 +{ 31.96 + if (parity_table[v1 & 0xFF]) 31.97 + regs->eflags |= X86_EFLAGS_PF; 31.98 +} 31.99 + 31.100 +static void vmx_pio_assist(struct cpu_user_regs *regs, ioreq_t *p, 31.101 + struct mi_per_cpu_info *mpcip) 31.102 +{ 31.103 + unsigned long old_eax; 31.104 + int sign = p->df ? -1 : 1; 31.105 + 31.106 + if (p->dir == IOREQ_WRITE) { 31.107 + if (p->pdata_valid) { 31.108 + regs->esi += sign * p->count * p->size; 31.109 + if (mpcip->flags & REPZ) 31.110 + regs->ecx -= p->count; 31.111 + } 31.112 + } else { 31.113 + if (mpcip->flags & OVERLAP) { 31.114 + unsigned long addr; 31.115 + 31.116 + regs->edi += sign * p->count * p->size; 31.117 + if (mpcip->flags & REPZ) 31.118 + regs->ecx -= p->count; 31.119 + 31.120 + addr = regs->edi; 31.121 + if (sign > 0) 31.122 + addr -= p->size; 31.123 + vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT); 31.124 + } else if (p->pdata_valid) { 31.125 + regs->edi += sign * p->count * p->size; 31.126 + if (mpcip->flags & REPZ) 31.127 + regs->ecx -= p->count; 31.128 + } else { 31.129 + old_eax = regs->eax; 31.130 + switch (p->size) { 31.131 + case 1: 31.132 + regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff); 31.133 + break; 31.134 + case 2: 31.135 + regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff); 31.136 + break; 31.137 + case 4: 31.138 + regs->eax = (p->u.data & 0xffffffff); 31.139 + break; 31.140 + default: 31.141 + printk("Error: %s unknown port size\n", __FUNCTION__); 31.142 + domain_crash_synchronous(); 31.143 + } 31.144 + } 31.145 + } 31.146 +} 31.147 + 31.148 +static void vmx_mmio_assist(struct cpu_user_regs *regs, ioreq_t *p, 31.149 + struct mi_per_cpu_info *mpcip) 31.150 +{ 31.151 + int sign = p->df ? -1 : 1; 31.152 + int size = -1, index = -1; 31.153 + unsigned long value = 0, diff = 0; 31.154 + unsigned long src, dst; 31.155 + 31.156 + src = mpcip->operand[0]; 31.157 + dst = mpcip->operand[1]; 31.158 + size = operand_size(src); 31.159 + 31.160 + switch (mpcip->instr) { 31.161 + case INSTR_MOV: 31.162 + if (dst & REGISTER) { 31.163 + index = operand_index(dst); 31.164 + set_reg_value(size, index, 0, regs, p->u.data); 31.165 + } 31.166 + break; 31.167 + 31.168 + case INSTR_MOVZ: 31.169 + if (dst & REGISTER) { 31.170 + index = operand_index(dst); 31.171 + switch (size) { 31.172 + case BYTE: p->u.data = p->u.data & 0xFFULL; break; 31.173 + case WORD: p->u.data = p->u.data & 0xFFFFULL; break; 31.174 + case LONG: p->u.data = p->u.data & 0xFFFFFFFFULL; break; 31.175 + } 31.176 + set_reg_value(operand_size(dst), index, 0, regs, p->u.data); 31.177 + } 31.178 + break; 31.179 + 31.180 + case INSTR_MOVS: 31.181 + sign = p->df ? -1 : 1; 31.182 + regs->esi += sign * p->count * p->size; 31.183 + regs->edi += sign * p->count * p->size; 31.184 + 31.185 + if ((mpcip->flags & OVERLAP) && p->dir == IOREQ_READ) { 31.186 + unsigned long addr = regs->edi; 31.187 + 31.188 + if (sign > 0) 31.189 + addr -= p->size; 31.190 + vmx_copy(&p->u.data, addr, p->size, VMX_COPY_OUT); 31.191 + } 31.192 + 31.193 + if (mpcip->flags & REPZ) 31.194 + regs->ecx -= p->count; 31.195 + break; 31.196 + 31.197 + case INSTR_STOS: 31.198 + sign = p->df ? -1 : 1; 31.199 + regs->edi += sign * p->count * p->size; 31.200 + if (mpcip->flags & REPZ) 31.201 + regs->ecx -= p->count; 31.202 + break; 31.203 + 31.204 + case INSTR_AND: 31.205 + if (src & REGISTER) { 31.206 + index = operand_index(src); 31.207 + value = get_reg_value(size, index, 0, regs); 31.208 + diff = (unsigned long) p->u.data & value; 31.209 + } else if (src & IMMEDIATE) { 31.210 + value = mpcip->immediate; 31.211 + diff = (unsigned long) p->u.data & value; 31.212 + } else if (src & MEMORY) { 31.213 + index = operand_index(dst); 31.214 + value = get_reg_value(size, index, 0, regs); 31.215 + diff = (unsigned long) p->u.data & value; 31.216 + set_reg_value(size, index, 0, regs, diff); 31.217 + } 31.218 + 31.219 + /* 31.220 + * The OF and CF flags are cleared; the SF, ZF, and PF 31.221 + * flags are set according to the result. The state of 31.222 + * the AF flag is undefined. 31.223 + */ 31.224 + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| 31.225 + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); 31.226 + set_eflags_ZF(size, diff, regs); 31.227 + set_eflags_SF(size, diff, regs); 31.228 + set_eflags_PF(size, diff, regs); 31.229 + break; 31.230 + 31.231 + case INSTR_OR: 31.232 + if (src & REGISTER) { 31.233 + index = operand_index(src); 31.234 + value = get_reg_value(size, index, 0, regs); 31.235 + diff = (unsigned long) p->u.data | value; 31.236 + } else if (src & IMMEDIATE) { 31.237 + value = mpcip->immediate; 31.238 + diff = (unsigned long) p->u.data | value; 31.239 + } else if (src & MEMORY) { 31.240 + index = operand_index(dst); 31.241 + value = get_reg_value(size, index, 0, regs); 31.242 + diff = (unsigned long) p->u.data | value; 31.243 + set_reg_value(size, index, 0, regs, diff); 31.244 + } 31.245 + 31.246 + /* 31.247 + * The OF and CF flags are cleared; the SF, ZF, and PF 31.248 + * flags are set according to the result. The state of 31.249 + * the AF flag is undefined. 31.250 + */ 31.251 + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| 31.252 + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); 31.253 + set_eflags_ZF(size, diff, regs); 31.254 + set_eflags_SF(size, diff, regs); 31.255 + set_eflags_PF(size, diff, regs); 31.256 + break; 31.257 + 31.258 + case INSTR_XOR: 31.259 + if (src & REGISTER) { 31.260 + index = operand_index(src); 31.261 + value = get_reg_value(size, index, 0, regs); 31.262 + diff = (unsigned long) p->u.data ^ value; 31.263 + } else if (src & IMMEDIATE) { 31.264 + value = mpcip->immediate; 31.265 + diff = (unsigned long) p->u.data ^ value; 31.266 + } else if (src & MEMORY) { 31.267 + index = operand_index(dst); 31.268 + value = get_reg_value(size, index, 0, regs); 31.269 + diff = (unsigned long) p->u.data ^ value; 31.270 + set_reg_value(size, index, 0, regs, diff); 31.271 + } 31.272 + 31.273 + /* 31.274 + * The OF and CF flags are cleared; the SF, ZF, and PF 31.275 + * flags are set according to the result. The state of 31.276 + * the AF flag is undefined. 31.277 + */ 31.278 + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| 31.279 + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); 31.280 + set_eflags_ZF(size, diff, regs); 31.281 + set_eflags_SF(size, diff, regs); 31.282 + set_eflags_PF(size, diff, regs); 31.283 + break; 31.284 + 31.285 + case INSTR_CMP: 31.286 + if (src & REGISTER) { 31.287 + index = operand_index(src); 31.288 + value = get_reg_value(size, index, 0, regs); 31.289 + diff = (unsigned long) p->u.data - value; 31.290 + } else if (src & IMMEDIATE) { 31.291 + value = mpcip->immediate; 31.292 + diff = (unsigned long) p->u.data - value; 31.293 + } else if (src & MEMORY) { 31.294 + index = operand_index(dst); 31.295 + value = get_reg_value(size, index, 0, regs); 31.296 + diff = value - (unsigned long) p->u.data; 31.297 + } 31.298 + 31.299 + /* 31.300 + * The CF, OF, SF, ZF, AF, and PF flags are set according 31.301 + * to the result 31.302 + */ 31.303 + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF|X86_EFLAGS_AF| 31.304 + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); 31.305 + set_eflags_CF(size, value, (unsigned long) p->u.data, regs); 31.306 + set_eflags_OF(size, diff, value, (unsigned long) p->u.data, regs); 31.307 + set_eflags_AF(size, diff, value, (unsigned long) p->u.data, regs); 31.308 + set_eflags_ZF(size, diff, regs); 31.309 + set_eflags_SF(size, diff, regs); 31.310 + set_eflags_PF(size, diff, regs); 31.311 + break; 31.312 + 31.313 + case INSTR_TEST: 31.314 + if (src & REGISTER) { 31.315 + index = operand_index(src); 31.316 + value = get_reg_value(size, index, 0, regs); 31.317 + } else if (src & IMMEDIATE) { 31.318 + value = mpcip->immediate; 31.319 + } else if (src & MEMORY) { 31.320 + index = operand_index(dst); 31.321 + value = get_reg_value(size, index, 0, regs); 31.322 + } 31.323 + diff = (unsigned long) p->u.data & value; 31.324 + 31.325 + /* 31.326 + * Sets the SF, ZF, and PF status flags. CF and OF are set to 0 31.327 + */ 31.328 + regs->eflags &= ~(X86_EFLAGS_CF|X86_EFLAGS_PF| 31.329 + X86_EFLAGS_ZF|X86_EFLAGS_SF|X86_EFLAGS_OF); 31.330 + set_eflags_ZF(size, diff, regs); 31.331 + set_eflags_SF(size, diff, regs); 31.332 + set_eflags_PF(size, diff, regs); 31.333 + break; 31.334 + } 31.335 + 31.336 + load_cpu_user_regs(regs); 31.337 +} 31.338 + 31.339 void vmx_io_assist(struct vcpu *v) 31.340 { 31.341 vcpu_iodata_t *vio; 31.342 ioreq_t *p; 31.343 struct cpu_user_regs *regs = guest_cpu_user_regs(); 31.344 - unsigned long old_eax; 31.345 - int sign; 31.346 struct mi_per_cpu_info *mpci_p; 31.347 struct cpu_user_regs *inst_decoder_regs; 31.348 31.349 @@ -340,80 +645,26 @@ void vmx_io_assist(struct vcpu *v) 31.350 if (vio == 0) { 31.351 VMX_DBG_LOG(DBG_LEVEL_1, 31.352 "bad shared page: %lx", (unsigned long) vio); 31.353 + printf("bad shared page: %lx\n", (unsigned long) vio); 31.354 domain_crash_synchronous(); 31.355 } 31.356 + 31.357 p = &vio->vp_ioreq; 31.358 - 31.359 - if (p->state == STATE_IORESP_HOOK){ 31.360 + if (p->state == STATE_IORESP_HOOK) 31.361 vmx_hooks_assist(v); 31.362 - } 31.363 31.364 /* clear IO wait VMX flag */ 31.365 if (test_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags)) { 31.366 - if (p->state != STATE_IORESP_READY) { 31.367 - /* An interrupt send event raced us */ 31.368 - return; 31.369 - } else { 31.370 - p->state = STATE_INVALID; 31.371 - } 31.372 - clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); 31.373 - } else { 31.374 - return; 31.375 - } 31.376 - 31.377 - sign = (p->df) ? -1 : 1; 31.378 - if (p->port_mm) { 31.379 - if (p->pdata_valid) { 31.380 - regs->esi += sign * p->count * p->size; 31.381 - regs->edi += sign * p->count * p->size; 31.382 - } else { 31.383 - if (p->dir == IOREQ_WRITE) { 31.384 - return; 31.385 - } 31.386 - int size = -1, index = -1; 31.387 - 31.388 - size = operand_size(v->domain->arch.vmx_platform.mpci.mmio_target); 31.389 - index = operand_index(v->domain->arch.vmx_platform.mpci.mmio_target); 31.390 - 31.391 - if (v->domain->arch.vmx_platform.mpci.mmio_target & WZEROEXTEND) { 31.392 - p->u.data = p->u.data & 0xffff; 31.393 - } 31.394 - set_reg_value(size, index, 0, regs, p->u.data); 31.395 + if (p->state == STATE_IORESP_READY) { 31.396 + p->state = STATE_INVALID; 31.397 + clear_bit(ARCH_VMX_IO_WAIT, &v->arch.arch_vmx.flags); 31.398 31.399 - } 31.400 - load_cpu_user_regs(regs); 31.401 - return; 31.402 - } 31.403 - 31.404 - if (p->dir == IOREQ_WRITE) { 31.405 - if (p->pdata_valid) { 31.406 - regs->esi += sign * p->count * p->size; 31.407 - regs->ecx -= p->count; 31.408 - } 31.409 - return; 31.410 - } else { 31.411 - if (p->pdata_valid) { 31.412 - regs->edi += sign * p->count * p->size; 31.413 - regs->ecx -= p->count; 31.414 - return; 31.415 - } 31.416 - } 31.417 - 31.418 - old_eax = regs->eax; 31.419 - 31.420 - switch(p->size) { 31.421 - case 1: 31.422 - regs->eax = (old_eax & 0xffffff00) | (p->u.data & 0xff); 31.423 - break; 31.424 - case 2: 31.425 - regs->eax = (old_eax & 0xffff0000) | (p->u.data & 0xffff); 31.426 - break; 31.427 - case 4: 31.428 - regs->eax = (p->u.data & 0xffffffff); 31.429 - break; 31.430 - default: 31.431 - printk("Error: %s unknwon port size\n", __FUNCTION__); 31.432 - domain_crash_synchronous(); 31.433 + if (p->type == IOREQ_TYPE_PIO) 31.434 + vmx_pio_assist(regs, p, mpci_p); 31.435 + else 31.436 + vmx_mmio_assist(regs, p, mpci_p); 31.437 + } 31.438 + /* else an interrupt send event raced us */ 31.439 } 31.440 } 31.441 31.442 @@ -456,8 +707,9 @@ void vmx_wait_io() 31.443 int port = iopacket_port(current->domain); 31.444 31.445 do { 31.446 - if(!test_bit(port, ¤t->domain->shared_info->evtchn_pending[0])) 31.447 + if (!test_bit(port, ¤t->domain->shared_info->evtchn_pending[0])) 31.448 do_block(); 31.449 + 31.450 vmx_check_events(current); 31.451 if (!test_bit(ARCH_VMX_IO_WAIT, ¤t->arch.arch_vmx.flags)) 31.452 break;
32.1 --- a/xen/arch/x86/vmx_platform.c Sat Sep 03 16:57:54 2005 +0000 32.2 +++ b/xen/arch/x86/vmx_platform.c Sat Sep 03 16:58:50 2005 +0000 32.3 @@ -64,37 +64,37 @@ static inline long __get_reg_value(unsig 32.4 case QUAD: 32.5 return (long)(reg); 32.6 default: 32.7 - printk("Error: <__get_reg_value>Invalid reg size\n"); 32.8 + printf("Error: (__get_reg_value) Invalid reg size\n"); 32.9 domain_crash_synchronous(); 32.10 } 32.11 } 32.12 32.13 -static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) 32.14 +long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) 32.15 { 32.16 if (size == BYTE) { 32.17 switch (index) { 32.18 - case 0: //%al 32.19 + case 0: /* %al */ 32.20 return (char)(regs->rax & 0xFF); 32.21 - case 1: //%cl 32.22 + case 1: /* %cl */ 32.23 return (char)(regs->rcx & 0xFF); 32.24 - case 2: //%dl 32.25 + case 2: /* %dl */ 32.26 return (char)(regs->rdx & 0xFF); 32.27 - case 3: //%bl 32.28 + case 3: /* %bl */ 32.29 return (char)(regs->rbx & 0xFF); 32.30 - case 4: //%ah 32.31 + case 4: /* %ah */ 32.32 return (char)((regs->rax & 0xFF00) >> 8); 32.33 - case 5: //%ch 32.34 + case 5: /* %ch */ 32.35 return (char)((regs->rcx & 0xFF00) >> 8); 32.36 - case 6: //%dh 32.37 + case 6: /* %dh */ 32.38 return (char)((regs->rdx & 0xFF00) >> 8); 32.39 - case 7: //%bh 32.40 + case 7: /* %bh */ 32.41 return (char)((regs->rbx & 0xFF00) >> 8); 32.42 default: 32.43 - printk("Error: (get_reg_value)Invalid index value\n"); 32.44 + printf("Error: (get_reg_value) Invalid index value\n"); 32.45 domain_crash_synchronous(); 32.46 } 32.47 + } 32.48 32.49 - } 32.50 switch (index) { 32.51 case 0: return __get_reg_value(regs->rax, size); 32.52 case 1: return __get_reg_value(regs->rcx, size); 32.53 @@ -113,7 +113,7 @@ static long get_reg_value(int size, int 32.54 case 14: return __get_reg_value(regs->r14, size); 32.55 case 15: return __get_reg_value(regs->r15, size); 32.56 default: 32.57 - printk("Error: (get_reg_value)Invalid index value\n"); 32.58 + printf("Error: (get_reg_value) Invalid index value\n"); 32.59 domain_crash_synchronous(); 32.60 } 32.61 } 32.62 @@ -129,117 +129,91 @@ void store_cpu_user_regs(struct cpu_user 32.63 __vmread(GUEST_RIP, ®s->eip); 32.64 } 32.65 32.66 -static long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) 32.67 +static inline long __get_reg_value(unsigned long reg, int size) 32.68 { 32.69 - /* 32.70 - * Reference the db_reg[] table 32.71 - */ 32.72 - switch (size) { 32.73 - case BYTE: 32.74 + switch(size) { 32.75 + case WORD: 32.76 + return (short)(reg & 0xFFFF); 32.77 + case LONG: 32.78 + return (int)(reg & 0xFFFFFFFF); 32.79 + default: 32.80 + printf("Error: (__get_reg_value) Invalid reg size\n"); 32.81 + domain_crash_synchronous(); 32.82 + } 32.83 +} 32.84 + 32.85 +long get_reg_value(int size, int index, int seg, struct cpu_user_regs *regs) 32.86 +{ 32.87 + if (size == BYTE) { 32.88 switch (index) { 32.89 - case 0: //%al 32.90 + case 0: /* %al */ 32.91 return (char)(regs->eax & 0xFF); 32.92 - case 1: //%cl 32.93 + case 1: /* %cl */ 32.94 return (char)(regs->ecx & 0xFF); 32.95 - case 2: //%dl 32.96 + case 2: /* %dl */ 32.97 return (char)(regs->edx & 0xFF); 32.98 - case 3: //%bl 32.99 + case 3: /* %bl */ 32.100 return (char)(regs->ebx & 0xFF); 32.101 - case 4: //%ah 32.102 + case 4: /* %ah */ 32.103 return (char)((regs->eax & 0xFF00) >> 8); 32.104 - case 5: //%ch 32.105 + case 5: /* %ch */ 32.106 return (char)((regs->ecx & 0xFF00) >> 8); 32.107 - case 6: //%dh 32.108 + case 6: /* %dh */ 32.109 return (char)((regs->edx & 0xFF00) >> 8); 32.110 - case 7: //%bh 32.111 + case 7: /* %bh */ 32.112 return (char)((regs->ebx & 0xFF00) >> 8); 32.113 default: 32.114 - printk("Error: (get_reg_value)size case 0 error\n"); 32.115 + printf("Error: (get_reg_value) Invalid index value\n"); 32.116 domain_crash_synchronous(); 32.117 } 32.118 - case WORD: 32.119 + } 32.120 + 32.121 switch (index) { 32.122 - case 0: //%ax 32.123 - return (short)(regs->eax & 0xFFFF); 32.124 - case 1: //%cx 32.125 - return (short)(regs->ecx & 0xFFFF); 32.126 - case 2: //%dx 32.127 - return (short)(regs->edx & 0xFFFF); 32.128 - case 3: //%bx 32.129 - return (short)(regs->ebx & 0xFFFF); 32.130 - case 4: //%sp 32.131 - return (short)(regs->esp & 0xFFFF); 32.132 - break; 32.133 - case 5: //%bp 32.134 - return (short)(regs->ebp & 0xFFFF); 32.135 - case 6: //%si 32.136 - return (short)(regs->esi & 0xFFFF); 32.137 - case 7: //%di 32.138 - return (short)(regs->edi & 0xFFFF); 32.139 - default: 32.140 - printk("Error: (get_reg_value)size case 1 error\n"); 32.141 - domain_crash_synchronous(); 32.142 - } 32.143 - case LONG: 32.144 - switch (index) { 32.145 - case 0: //%eax 32.146 - return regs->eax; 32.147 - case 1: //%ecx 32.148 - return regs->ecx; 32.149 - case 2: //%edx 32.150 - return regs->edx; 32.151 - 32.152 - case 3: //%ebx 32.153 - return regs->ebx; 32.154 - case 4: //%esp 32.155 - return regs->esp; 32.156 - case 5: //%ebp 32.157 - return regs->ebp; 32.158 - case 6: //%esi 32.159 - return regs->esi; 32.160 - case 7: //%edi 32.161 - return regs->edi; 32.162 - default: 32.163 - printk("Error: (get_reg_value)size case 2 error\n"); 32.164 - domain_crash_synchronous(); 32.165 - } 32.166 + case 0: return __get_reg_value(regs->eax, size); 32.167 + case 1: return __get_reg_value(regs->ecx, size); 32.168 + case 2: return __get_reg_value(regs->edx, size); 32.169 + case 3: return __get_reg_value(regs->ebx, size); 32.170 + case 4: return __get_reg_value(regs->esp, size); 32.171 + case 5: return __get_reg_value(regs->ebp, size); 32.172 + case 6: return __get_reg_value(regs->esi, size); 32.173 + case 7: return __get_reg_value(regs->edi, size); 32.174 default: 32.175 - printk("Error: (get_reg_value)size case error\n"); 32.176 + printf("Error: (get_reg_value) Invalid index value\n"); 32.177 domain_crash_synchronous(); 32.178 } 32.179 } 32.180 #endif 32.181 32.182 -static inline const unsigned char *check_prefix(const unsigned char *inst, struct instruction *thread_inst, unsigned char *rex_p) 32.183 +static inline unsigned char *check_prefix(unsigned char *inst, 32.184 + struct instruction *thread_inst, unsigned char *rex_p) 32.185 { 32.186 while (1) { 32.187 switch (*inst) { 32.188 - /* rex prefix for em64t instructions*/ 32.189 + /* rex prefix for em64t instructions */ 32.190 case 0x40 ... 0x4e: 32.191 *rex_p = *inst; 32.192 break; 32.193 - 32.194 - case 0xf3: //REPZ 32.195 + case 0xf3: /* REPZ */ 32.196 thread_inst->flags = REPZ; 32.197 - break; 32.198 - case 0xf2: //REPNZ 32.199 + break; 32.200 + case 0xf2: /* REPNZ */ 32.201 thread_inst->flags = REPNZ; 32.202 - break; 32.203 - case 0xf0: //LOCK 32.204 + break; 32.205 + case 0xf0: /* LOCK */ 32.206 break; 32.207 - case 0x2e: //CS 32.208 - case 0x36: //SS 32.209 - case 0x3e: //DS 32.210 - case 0x26: //ES 32.211 - case 0x64: //FS 32.212 - case 0x65: //GS 32.213 - thread_inst->seg_sel = *inst; 32.214 + case 0x2e: /* CS */ 32.215 + case 0x36: /* SS */ 32.216 + case 0x3e: /* DS */ 32.217 + case 0x26: /* ES */ 32.218 + case 0x64: /* FS */ 32.219 + case 0x65: /* GS */ 32.220 + thread_inst->seg_sel = *inst; 32.221 break; 32.222 - case 0x66: //32bit->16bit 32.223 + case 0x66: /* 32bit->16bit */ 32.224 thread_inst->op_size = WORD; 32.225 break; 32.226 case 0x67: 32.227 - printf("Error: Not handling 0x67 (yet)\n"); 32.228 + printf("Error: Not handling 0x67 (yet)\n"); 32.229 domain_crash_synchronous(); 32.230 break; 32.231 default: 32.232 @@ -249,7 +223,7 @@ static inline const unsigned char *check 32.233 } 32.234 } 32.235 32.236 -static inline unsigned long get_immediate(int op16, const unsigned char *inst, int op_size) 32.237 +static inline unsigned long get_immediate(int op16,const unsigned char *inst, int op_size) 32.238 { 32.239 int mod, reg, rm; 32.240 unsigned long val = 0; 32.241 @@ -317,197 +291,299 @@ static inline int get_index(const unsign 32.242 32.243 static void init_instruction(struct instruction *mmio_inst) 32.244 { 32.245 - memset(mmio_inst->i_name, '0', I_NAME_LEN); 32.246 - mmio_inst->op_size = 0; 32.247 - mmio_inst->offset = 0; 32.248 + mmio_inst->instr = 0; 32.249 + mmio_inst->op_size = 0; 32.250 mmio_inst->immediate = 0; 32.251 mmio_inst->seg_sel = 0; 32.252 - mmio_inst->op_num = 0; 32.253 32.254 mmio_inst->operand[0] = 0; 32.255 mmio_inst->operand[1] = 0; 32.256 - mmio_inst->operand[2] = 0; 32.257 32.258 mmio_inst->flags = 0; 32.259 } 32.260 32.261 #define GET_OP_SIZE_FOR_BYTE(op_size) \ 32.262 - do {if (rex) op_size = BYTE_64;else op_size = BYTE;} while(0) 32.263 + do { \ 32.264 + if (rex) \ 32.265 + op_size = BYTE_64; \ 32.266 + else \ 32.267 + op_size = BYTE; \ 32.268 + } while(0) 32.269 32.270 #define GET_OP_SIZE_FOR_NONEBYTE(op_size) \ 32.271 - do {if (rex & 0x8) op_size = QUAD; else if (op_size != WORD) op_size = LONG;} while(0) 32.272 + do { \ 32.273 + if (rex & 0x8) \ 32.274 + op_size = QUAD; \ 32.275 + else if (op_size != WORD) \ 32.276 + op_size = LONG; \ 32.277 + } while(0) 32.278 + 32.279 + 32.280 +/* 32.281 + * Decode mem,accumulator operands (as in <opcode> m8/m16/m32, al,ax,eax) 32.282 + */ 32.283 +static int mem_acc(unsigned char size, struct instruction *instr) 32.284 +{ 32.285 + instr->operand[0] = mk_operand(size, 0, 0, MEMORY); 32.286 + instr->operand[1] = mk_operand(size, 0, 0, REGISTER); 32.287 + return DECODE_success; 32.288 +} 32.289 + 32.290 +/* 32.291 + * Decode accumulator,mem operands (as in <opcode> al,ax,eax, m8/m16/m32) 32.292 + */ 32.293 +static int acc_mem(unsigned char size, struct instruction *instr) 32.294 +{ 32.295 + instr->operand[0] = mk_operand(size, 0, 0, REGISTER); 32.296 + instr->operand[1] = mk_operand(size, 0, 0, MEMORY); 32.297 + return DECODE_success; 32.298 +} 32.299 32.300 -static int vmx_decode(const unsigned char *inst, struct instruction *thread_inst) 32.301 +/* 32.302 + * Decode mem,reg operands (as in <opcode> r32/16, m32/16) 32.303 + */ 32.304 +static int mem_reg(unsigned char size, unsigned char *opcode, 32.305 + struct instruction *instr, unsigned char rex) 32.306 +{ 32.307 + int index = get_index(opcode + 1, rex); 32.308 + 32.309 + instr->operand[0] = mk_operand(size, 0, 0, MEMORY); 32.310 + instr->operand[1] = mk_operand(size, index, 0, REGISTER); 32.311 + return DECODE_success; 32.312 +} 32.313 + 32.314 +/* 32.315 + * Decode reg,mem operands (as in <opcode> m32/16, r32/16) 32.316 + */ 32.317 +static int reg_mem(unsigned char size, unsigned char *opcode, 32.318 + struct instruction *instr, unsigned char rex) 32.319 +{ 32.320 + int index = get_index(opcode + 1, rex); 32.321 + 32.322 + instr->operand[0] = mk_operand(size, index, 0, REGISTER); 32.323 + instr->operand[1] = mk_operand(size, 0, 0, MEMORY); 32.324 + return DECODE_success; 32.325 +} 32.326 + 32.327 +static int vmx_decode(unsigned char *opcode, struct instruction *instr) 32.328 { 32.329 unsigned long eflags; 32.330 int index, vm86 = 0; 32.331 unsigned char rex = 0; 32.332 unsigned char tmp_size = 0; 32.333 32.334 + init_instruction(instr); 32.335 32.336 - init_instruction(thread_inst); 32.337 - 32.338 - inst = check_prefix(inst, thread_inst, &rex); 32.339 + opcode = check_prefix(opcode, instr, &rex); 32.340 32.341 __vmread(GUEST_RFLAGS, &eflags); 32.342 if (eflags & X86_EFLAGS_VM) 32.343 vm86 = 1; 32.344 32.345 if (vm86) { /* meaning is reversed */ 32.346 - if (thread_inst->op_size == WORD) 32.347 - thread_inst->op_size = LONG; 32.348 - else if (thread_inst->op_size == LONG) 32.349 - thread_inst->op_size = WORD; 32.350 - else if (thread_inst->op_size == 0) 32.351 - thread_inst->op_size = WORD; 32.352 + if (instr->op_size == WORD) 32.353 + instr->op_size = LONG; 32.354 + else if (instr->op_size == LONG) 32.355 + instr->op_size = WORD; 32.356 + else if (instr->op_size == 0) 32.357 + instr->op_size = WORD; 32.358 } 32.359 32.360 - switch(*inst) { 32.361 - case 0x81: 32.362 - /* This is only a workaround for cmpl instruction*/ 32.363 - strcpy((char *)thread_inst->i_name, "cmp"); 32.364 - return DECODE_success; 32.365 + switch (*opcode) { 32.366 + case 0x0B: /* or m32/16, r32/16 */ 32.367 + instr->instr = INSTR_OR; 32.368 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.369 + return mem_reg(instr->op_size, opcode, instr, rex); 32.370 + 32.371 + case 0x20: /* and r8, m8 */ 32.372 + instr->instr = INSTR_AND; 32.373 + GET_OP_SIZE_FOR_BYTE(instr->op_size); 32.374 + return reg_mem(instr->op_size, opcode, instr, rex); 32.375 + 32.376 + case 0x21: /* and r32/16, m32/16 */ 32.377 + instr->instr = INSTR_AND; 32.378 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.379 + return reg_mem(instr->op_size, opcode, instr, rex); 32.380 32.381 - case 0x88: 32.382 - /* mov r8 to m8 */ 32.383 - thread_inst->op_size = BYTE; 32.384 - index = get_index((inst + 1), rex); 32.385 - GET_OP_SIZE_FOR_BYTE(tmp_size); 32.386 - thread_inst->operand[0] = mk_operand(tmp_size, index, 0, REGISTER); 32.387 + case 0x23: /* and m32/16, r32/16 */ 32.388 + instr->instr = INSTR_AND; 32.389 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.390 + return mem_reg(instr->op_size, opcode, instr, rex); 32.391 + 32.392 + case 0x30: /* xor r8, m8 */ 32.393 + instr->instr = INSTR_XOR; 32.394 + GET_OP_SIZE_FOR_BYTE(instr->op_size); 32.395 + return reg_mem(instr->op_size, opcode, instr, rex); 32.396 32.397 - break; 32.398 - case 0x89: 32.399 - /* mov r32/16 to m32/16 */ 32.400 - index = get_index((inst + 1), rex); 32.401 - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); 32.402 - thread_inst->operand[0] = mk_operand(thread_inst->op_size, index, 0, REGISTER); 32.403 + case 0x31: /* xor r32/16, m32/16 */ 32.404 + instr->instr = INSTR_XOR; 32.405 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.406 + return reg_mem(instr->op_size, opcode, instr, rex); 32.407 + 32.408 + case 0x39: /* cmp r32/16, m32/16 */ 32.409 + instr->instr = INSTR_CMP; 32.410 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.411 + return reg_mem(instr->op_size, opcode, instr, rex); 32.412 32.413 - break; 32.414 - case 0x8a: 32.415 - /* mov m8 to r8 */ 32.416 - thread_inst->op_size = BYTE; 32.417 - index = get_index((inst + 1), rex); 32.418 - GET_OP_SIZE_FOR_BYTE(tmp_size); 32.419 - thread_inst->operand[1] = mk_operand(tmp_size, index, 0, REGISTER); 32.420 - break; 32.421 - case 0x8b: 32.422 - /* mov r32/16 to m32/16 */ 32.423 - index = get_index((inst + 1), rex); 32.424 - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); 32.425 - thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 0, REGISTER); 32.426 - break; 32.427 - case 0x8c: 32.428 - case 0x8e: 32.429 - printk("%x, This opcode hasn't been handled yet!", *inst); 32.430 - return DECODE_failure; 32.431 - /* Not handle it yet. */ 32.432 - case 0xa0: 32.433 - /* mov byte to al */ 32.434 - thread_inst->op_size = BYTE; 32.435 - GET_OP_SIZE_FOR_BYTE(tmp_size); 32.436 - thread_inst->operand[1] = mk_operand(tmp_size, 0, 0, REGISTER); 32.437 - break; 32.438 - case 0xa1: 32.439 - /* mov word/doubleword to ax/eax */ 32.440 - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); 32.441 - thread_inst->operand[1] = mk_operand(thread_inst->op_size, 0, 0, REGISTER); 32.442 + case 0x81: 32.443 + if (((opcode[1] >> 3) & 7) == 7) { /* cmp $imm, m32/16 */ 32.444 + instr->instr = INSTR_CMP; 32.445 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.446 + 32.447 + instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); 32.448 + instr->immediate = get_immediate(vm86, opcode+1, BYTE); 32.449 + instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); 32.450 + 32.451 + return DECODE_success; 32.452 + } else 32.453 + return DECODE_failure; 32.454 + 32.455 + case 0x84: /* test m8, r8 */ 32.456 + instr->instr = INSTR_TEST; 32.457 + instr->op_size = BYTE; 32.458 + GET_OP_SIZE_FOR_BYTE(tmp_size); 32.459 + return mem_reg(tmp_size, opcode, instr, rex); 32.460 + 32.461 + case 0x88: /* mov r8, m8 */ 32.462 + instr->instr = INSTR_MOV; 32.463 + instr->op_size = BYTE; 32.464 + GET_OP_SIZE_FOR_BYTE(tmp_size); 32.465 + return reg_mem(tmp_size, opcode, instr, rex); 32.466 + 32.467 + case 0x89: /* mov r32/16, m32/16 */ 32.468 + instr->instr = INSTR_MOV; 32.469 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.470 + return reg_mem(instr->op_size, opcode, instr, rex); 32.471 + 32.472 + case 0x8A: /* mov m8, r8 */ 32.473 + instr->instr = INSTR_MOV; 32.474 + instr->op_size = BYTE; 32.475 + GET_OP_SIZE_FOR_BYTE(tmp_size); 32.476 + return mem_reg(tmp_size, opcode, instr, rex); 32.477 + 32.478 + case 0x8B: /* mov m32/16, r32/16 */ 32.479 + instr->instr = INSTR_MOV; 32.480 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.481 + return mem_reg(instr->op_size, opcode, instr, rex); 32.482 32.483 - break; 32.484 - case 0xa2: 32.485 - /* mov al to (seg:offset) */ 32.486 - thread_inst->op_size = BYTE; 32.487 - GET_OP_SIZE_FOR_BYTE(tmp_size); 32.488 - thread_inst->operand[0] = mk_operand(tmp_size, 0, 0, REGISTER); 32.489 - break; 32.490 - case 0xa3: 32.491 - /* mov ax/eax to (seg:offset) */ 32.492 - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); 32.493 - thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, REGISTER); 32.494 - break; 32.495 - case 0xa4: 32.496 - /* movsb */ 32.497 - thread_inst->op_size = BYTE; 32.498 - strcpy((char *)thread_inst->i_name, "movs"); 32.499 - return DECODE_success; 32.500 - case 0xa5: 32.501 - /* movsw/movsl */ 32.502 - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); 32.503 - strcpy((char *)thread_inst->i_name, "movs"); 32.504 - return DECODE_success; 32.505 - case 0xaa: 32.506 - /* stosb */ 32.507 - thread_inst->op_size = BYTE; 32.508 - strcpy((char *)thread_inst->i_name, "stosb"); 32.509 + case 0xA0: /* mov <addr>, al */ 32.510 + instr->instr = INSTR_MOV; 32.511 + instr->op_size = BYTE; 32.512 + GET_OP_SIZE_FOR_BYTE(tmp_size); 32.513 + return mem_acc(tmp_size, instr); 32.514 + 32.515 + case 0xA1: /* mov <addr>, ax/eax */ 32.516 + instr->instr = INSTR_MOV; 32.517 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.518 + return mem_acc(instr->op_size, instr); 32.519 + 32.520 + case 0xA2: /* mov al, <addr> */ 32.521 + instr->instr = INSTR_MOV; 32.522 + instr->op_size = BYTE; 32.523 + GET_OP_SIZE_FOR_BYTE(tmp_size); 32.524 + return acc_mem(tmp_size, instr); 32.525 + 32.526 + case 0xA3: /* mov ax/eax, <addr> */ 32.527 + instr->instr = INSTR_MOV; 32.528 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.529 + return acc_mem(instr->op_size, instr); 32.530 + 32.531 + case 0xA4: /* movsb */ 32.532 + instr->instr = INSTR_MOVS; 32.533 + instr->op_size = BYTE; 32.534 + return DECODE_success; 32.535 + 32.536 + case 0xA5: /* movsw/movsl */ 32.537 + instr->instr = INSTR_MOVS; 32.538 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.539 + return DECODE_success; 32.540 + 32.541 + case 0xAA: /* stosb */ 32.542 + instr->instr = INSTR_STOS; 32.543 + instr->op_size = BYTE; 32.544 + return DECODE_success; 32.545 + 32.546 + case 0xAB: /* stosw/stosl */ 32.547 + instr->instr = INSTR_STOS; 32.548 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.549 + return DECODE_success; 32.550 + 32.551 + case 0xC6: 32.552 + if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm8, m8 */ 32.553 + instr->instr = INSTR_MOV; 32.554 + instr->op_size = BYTE; 32.555 + 32.556 + instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); 32.557 + instr->immediate = get_immediate(vm86, opcode+1, instr->op_size); 32.558 + instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); 32.559 + 32.560 return DECODE_success; 32.561 - case 0xab: 32.562 - /* stosw/stosl */ 32.563 - if (thread_inst->op_size == WORD) { 32.564 - strcpy((char *)thread_inst->i_name, "stosw"); 32.565 - } else { 32.566 - thread_inst->op_size = LONG; 32.567 - strcpy((char *)thread_inst->i_name, "stosl"); 32.568 - } 32.569 + } else 32.570 + return DECODE_failure; 32.571 + 32.572 + case 0xC7: 32.573 + if (((opcode[1] >> 3) & 7) == 0) { /* mov $imm16/32, m16/32 */ 32.574 + instr->instr = INSTR_MOV; 32.575 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.576 + 32.577 + instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); 32.578 + instr->immediate = get_immediate(vm86, opcode+1, instr->op_size); 32.579 + instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); 32.580 + 32.581 return DECODE_success; 32.582 - case 0xc6: 32.583 - /* mov imm8 to m8 */ 32.584 - thread_inst->op_size = BYTE; 32.585 - thread_inst->operand[0] = mk_operand(BYTE, 0, 0, IMMEDIATE); 32.586 - thread_inst->immediate = get_immediate(vm86, 32.587 - (inst+1), thread_inst->op_size); 32.588 - break; 32.589 - case 0xc7: 32.590 - /* mov imm16/32 to m16/32 */ 32.591 - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); 32.592 - thread_inst->operand[0] = mk_operand(thread_inst->op_size, 0, 0, IMMEDIATE); 32.593 - thread_inst->immediate = get_immediate(vm86, (inst+1), thread_inst->op_size); 32.594 - 32.595 - break; 32.596 - case 0x0f: 32.597 - break; 32.598 - default: 32.599 - printk("%x, This opcode hasn't been handled yet!", *inst); 32.600 - return DECODE_failure; 32.601 - } 32.602 - 32.603 - strcpy((char *)thread_inst->i_name, "mov"); 32.604 - if (*inst != 0x0f) { 32.605 - return DECODE_success; 32.606 + } else 32.607 + return DECODE_failure; 32.608 + 32.609 + case 0xF6: 32.610 + if (((opcode[1] >> 3) & 7) == 0) { /* testb $imm8, m8 */ 32.611 + instr->instr = INSTR_TEST; 32.612 + instr->op_size = BYTE; 32.613 + 32.614 + instr->operand[0] = mk_operand(instr->op_size, 0, 0, IMMEDIATE); 32.615 + instr->immediate = get_immediate(vm86, opcode+1, instr->op_size); 32.616 + instr->operand[1] = mk_operand(instr->op_size, 0, 0, MEMORY); 32.617 + 32.618 + return DECODE_success; 32.619 + } else 32.620 + return DECODE_failure; 32.621 + 32.622 + case 0x0F: 32.623 + break; 32.624 + 32.625 + default: 32.626 + printf("%x, This opcode isn't handled yet!\n", *opcode); 32.627 + return DECODE_failure; 32.628 } 32.629 32.630 - inst++; 32.631 - switch (*inst) { 32.632 - 32.633 - /* movz */ 32.634 - case 0xb6: 32.635 - index = get_index((inst + 1), rex); 32.636 - GET_OP_SIZE_FOR_NONEBYTE(thread_inst->op_size); 32.637 - thread_inst->operand[1] = mk_operand(thread_inst->op_size, index, 0, REGISTER); 32.638 - thread_inst->op_size = BYTE; 32.639 - strcpy((char *)thread_inst->i_name, "movzb"); 32.640 - 32.641 - return DECODE_success; 32.642 - case 0xb7: 32.643 - index = get_index((inst + 1), rex); 32.644 - if (rex & 0x8) { 32.645 - thread_inst->op_size = LONG; 32.646 - thread_inst->operand[1] = mk_operand(QUAD, index, 0, REGISTER); 32.647 - } else { 32.648 - thread_inst->op_size = WORD; 32.649 - thread_inst->operand[1] = mk_operand(LONG, index, 0, REGISTER); 32.650 - } 32.651 - 32.652 - strcpy((char *)thread_inst->i_name, "movzw"); 32.653 - 32.654 - return DECODE_success; 32.655 - default: 32.656 - printk("0f %x, This opcode hasn't been handled yet!", *inst); 32.657 - return DECODE_failure; 32.658 + switch (*++opcode) { 32.659 + case 0xB6: /* movz m8, r16/r32 */ 32.660 + instr->instr = INSTR_MOVZ; 32.661 + GET_OP_SIZE_FOR_NONEBYTE(instr->op_size); 32.662 + index = get_index(opcode + 1, rex); 32.663 + instr->operand[0] = mk_operand(BYTE, 0, 0, MEMORY); 32.664 + instr->operand[1] = mk_operand(instr->op_size, index, 0, REGISTER); 32.665 + return DECODE_success; 32.666 + 32.667 + case 0xB7: /* movz m16, r32 */ 32.668 + instr->instr = INSTR_MOVZ; 32.669 + index = get_index(opcode + 1, rex); 32.670 + if (rex & 0x8) { 32.671 + instr->op_size = LONG; 32.672 + instr->operand[1] = mk_operand(QUAD, index, 0, REGISTER); 32.673 + } else { 32.674 + instr->op_size = WORD; 32.675 + instr->operand[1] = mk_operand(LONG, index, 0, REGISTER); 32.676 + } 32.677 + instr->operand[0] = mk_operand(instr->op_size, 0, 0, MEMORY); 32.678 + return DECODE_success; 32.679 + 32.680 + default: 32.681 + printf("0f %x, This opcode isn't handled yet\n", *opcode); 32.682 + return DECODE_failure; 32.683 } 32.684 - 32.685 - /* will never reach here */ 32.686 - return DECODE_failure; 32.687 } 32.688 32.689 +/* XXX use vmx_copy instead */ 32.690 int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len) 32.691 { 32.692 unsigned long gpa; 32.693 @@ -552,40 +628,27 @@ int inst_copy_from_guest(unsigned char * 32.694 return inst_len+remaining; 32.695 } 32.696 32.697 -static int read_from_mmio(struct instruction *inst_p) 32.698 -{ 32.699 - // Only for mov instruction now!!! 32.700 - if (inst_p->operand[1] & REGISTER) 32.701 - return 1; 32.702 - 32.703 - return 0; 32.704 -} 32.705 - 32.706 -// dir: 1 read from mmio 32.707 -// 0 write to mmio 32.708 -static void send_mmio_req(unsigned long gpa, 32.709 - struct instruction *inst_p, long value, int dir, int pvalid) 32.710 +void send_mmio_req(unsigned char type, unsigned long gpa, 32.711 + unsigned long count, int size, long value, int dir, int pvalid) 32.712 { 32.713 struct vcpu *d = current; 32.714 vcpu_iodata_t *vio; 32.715 ioreq_t *p; 32.716 int vm86; 32.717 - struct mi_per_cpu_info *mpci_p; 32.718 - struct cpu_user_regs *inst_decoder_regs; 32.719 + struct cpu_user_regs *regs; 32.720 extern long evtchn_send(int lport); 32.721 32.722 - mpci_p = ¤t->domain->arch.vmx_platform.mpci; 32.723 - inst_decoder_regs = mpci_p->inst_decoder_regs; 32.724 + regs = current->domain->arch.vmx_platform.mpci.inst_decoder_regs; 32.725 32.726 vio = get_vio(d->domain, d->vcpu_id); 32.727 - 32.728 if (vio == NULL) { 32.729 - printk("bad shared page\n"); 32.730 + printf("bad shared page\n"); 32.731 domain_crash_synchronous(); 32.732 } 32.733 + 32.734 p = &vio->vp_ioreq; 32.735 32.736 - vm86 = inst_decoder_regs->eflags & X86_EFLAGS_VM; 32.737 + vm86 = regs->eflags & X86_EFLAGS_VM; 32.738 32.739 if (test_bit(ARCH_VMX_IO_WAIT, &d->arch.arch_vmx.flags)) { 32.740 printf("VMX I/O has not yet completed\n"); 32.741 @@ -596,25 +659,22 @@ static void send_mmio_req(unsigned long 32.742 p->dir = dir; 32.743 p->pdata_valid = pvalid; 32.744 32.745 - p->port_mm = 1; 32.746 - p->size = inst_p->op_size; 32.747 + p->type = type; 32.748 + p->size = size; 32.749 p->addr = gpa; 32.750 - p->u.data = value; 32.751 + p->count = count; 32.752 + p->df = regs->eflags & EF_DF ? 1 : 0; 32.753 + 32.754 + if (pvalid) { 32.755 + if (vmx_paging_enabled(current)) 32.756 + p->u.pdata = (void *) gva_to_gpa(value); 32.757 + else 32.758 + p->u.pdata = (void *) value; /* guest VA == guest PA */ 32.759 + } else 32.760 + p->u.data = value; 32.761 32.762 p->state = STATE_IOREQ_READY; 32.763 32.764 - if (inst_p->flags & REPZ) { 32.765 - if (vm86) 32.766 - p->count = inst_decoder_regs->ecx & 0xFFFF; 32.767 - else 32.768 - p->count = inst_decoder_regs->ecx; 32.769 - p->df = (inst_decoder_regs->eflags & EF_DF) ? 1 : 0; 32.770 - } else 32.771 - p->count = 1; 32.772 - 32.773 - if ((pvalid) && vmx_paging_enabled(current)) 32.774 - p->u.pdata = (void *) gva_to_gpa(p->u.data); 32.775 - 32.776 if (vmx_mmio_intercept(p)){ 32.777 p->state = STATE_IORESP_READY; 32.778 vmx_io_assist(d); 32.779 @@ -625,18 +685,50 @@ static void send_mmio_req(unsigned long 32.780 vmx_wait_io(); 32.781 } 32.782 32.783 +static void mmio_operands(int type, unsigned long gpa, struct instruction *inst, 32.784 + struct mi_per_cpu_info *mpcip, struct cpu_user_regs *regs) 32.785 +{ 32.786 + unsigned long value = 0; 32.787 + int index, size; 32.788 + 32.789 + size = operand_size(inst->operand[0]); 32.790 + 32.791 + mpcip->flags = inst->flags; 32.792 + mpcip->instr = inst->instr; 32.793 + mpcip->operand[0] = inst->operand[0]; /* source */ 32.794 + mpcip->operand[1] = inst->operand[1]; /* destination */ 32.795 + 32.796 + if (inst->operand[0] & REGISTER) { /* dest is memory */ 32.797 + index = operand_index(inst->operand[0]); 32.798 + value = get_reg_value(size, index, 0, regs); 32.799 + send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0); 32.800 + } else if (inst->operand[0] & IMMEDIATE) { /* dest is memory */ 32.801 + value = inst->immediate; 32.802 + send_mmio_req(type, gpa, 1, size, value, IOREQ_WRITE, 0); 32.803 + } else if (inst->operand[0] & MEMORY) { /* dest is register */ 32.804 + /* send the request and wait for the value */ 32.805 + send_mmio_req(type, gpa, 1, size, 0, IOREQ_READ, 0); 32.806 + } else { 32.807 + printf("mmio_operands: invalid operand\n"); 32.808 + domain_crash_synchronous(); 32.809 + } 32.810 +} 32.811 + 32.812 +#define GET_REPEAT_COUNT() \ 32.813 + (mmio_inst.flags & REPZ ? (vm86 ? regs->ecx & 0xFFFF : regs->ecx) : 1) 32.814 + 32.815 void handle_mmio(unsigned long va, unsigned long gpa) 32.816 { 32.817 unsigned long eip, eflags, cs; 32.818 unsigned long inst_len, inst_addr; 32.819 - struct mi_per_cpu_info *mpci_p; 32.820 - struct cpu_user_regs *inst_decoder_regs; 32.821 + struct mi_per_cpu_info *mpcip; 32.822 + struct cpu_user_regs *regs; 32.823 struct instruction mmio_inst; 32.824 unsigned char inst[MAX_INST_LEN]; 32.825 - int vm86, ret; 32.826 + int i, vm86, ret; 32.827 32.828 - mpci_p = ¤t->domain->arch.vmx_platform.mpci; 32.829 - inst_decoder_regs = mpci_p->inst_decoder_regs; 32.830 + mpcip = ¤t->domain->arch.vmx_platform.mpci; 32.831 + regs = mpcip->inst_decoder_regs; 32.832 32.833 __vmread(GUEST_RIP, &eip); 32.834 __vmread(VM_EXIT_INSTRUCTION_LEN, &inst_len); 32.835 @@ -647,108 +739,142 @@ void handle_mmio(unsigned long va, unsig 32.836 __vmread(GUEST_CS_SELECTOR, &cs); 32.837 inst_addr = (cs << 4) + eip; 32.838 } else 32.839 - inst_addr = eip; /* XXX should really look at GDT[cs].base too */ 32.840 + inst_addr = eip; 32.841 32.842 - memset(inst, '0', MAX_INST_LEN); 32.843 + memset(inst, 0, MAX_INST_LEN); 32.844 ret = inst_copy_from_guest(inst, inst_addr, inst_len); 32.845 if (ret != inst_len) { 32.846 - printk("handle_mmio - EXIT: get guest instruction fault\n"); 32.847 + printf("handle_mmio - EXIT: get guest instruction fault\n"); 32.848 domain_crash_synchronous(); 32.849 } 32.850 32.851 - 32.852 init_instruction(&mmio_inst); 32.853 32.854 if (vmx_decode(inst, &mmio_inst) == DECODE_failure) { 32.855 - printk("vmx decode failure: eip=%lx, va=%lx\n %x %x %x %x\n", eip, va, 32.856 - inst[0], inst[1], inst[2], inst[3]); 32.857 + printf("mmio opcode: va 0x%lx, gpa 0x%lx, len %ld:", 32.858 + va, gpa, inst_len); 32.859 + for (i = 0; i < inst_len; i++) 32.860 + printf(" %02x", inst[i] & 0xFF); 32.861 + printf("\n"); 32.862 domain_crash_synchronous(); 32.863 } 32.864 32.865 - __vmwrite(GUEST_RIP, eip + inst_len); 32.866 - store_cpu_user_regs(inst_decoder_regs); 32.867 + store_cpu_user_regs(regs); 32.868 + regs->eip += inst_len; /* advance %eip */ 32.869 32.870 - // Only handle "mov" and "movs" instructions! 32.871 - if (!strncmp((char *)mmio_inst.i_name, "movz", 4)) { 32.872 - if (read_from_mmio(&mmio_inst)) { 32.873 - // Send the request and waiting for return value. 32.874 - mpci_p->mmio_target = mmio_inst.operand[1] | WZEROEXTEND; 32.875 - send_mmio_req(gpa, &mmio_inst, 0, IOREQ_READ, 0); 32.876 - return ; 32.877 - } else { 32.878 - printk("handle_mmio - EXIT: movz error!\n"); 32.879 - domain_crash_synchronous(); 32.880 - } 32.881 - } 32.882 + switch (mmio_inst.instr) { 32.883 + case INSTR_MOV: 32.884 + mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs); 32.885 + break; 32.886 32.887 - if (!strncmp((char *)mmio_inst.i_name, "movs", 4)) { 32.888 + case INSTR_MOVS: 32.889 + { 32.890 + unsigned long count = GET_REPEAT_COUNT(); 32.891 + unsigned long size = mmio_inst.op_size; 32.892 + int sign = regs->eflags & EF_DF ? -1 : 1; 32.893 unsigned long addr = 0; 32.894 int dir; 32.895 32.896 + /* determine non-MMIO address */ 32.897 if (vm86) { 32.898 unsigned long seg; 32.899 32.900 __vmread(GUEST_ES_SELECTOR, &seg); 32.901 - if (((seg << 4) + (inst_decoder_regs->edi & 0xFFFF)) == va) { 32.902 + if (((seg << 4) + (regs->edi & 0xFFFF)) == va) { 32.903 dir = IOREQ_WRITE; 32.904 __vmread(GUEST_DS_SELECTOR, &seg); 32.905 - addr = (seg << 4) + (inst_decoder_regs->esi & 0xFFFF); 32.906 + addr = (seg << 4) + (regs->esi & 0xFFFF); 32.907 } else { 32.908 dir = IOREQ_READ; 32.909 - addr = (seg << 4) + (inst_decoder_regs->edi & 0xFFFF); 32.910 + addr = (seg << 4) + (regs->edi & 0xFFFF); 32.911 } 32.912 - } else { /* XXX should really look at GDT[ds/es].base too */ 32.913 - if (va == inst_decoder_regs->edi) { 32.914 + } else { 32.915 + if (va == regs->edi) { 32.916 dir = IOREQ_WRITE; 32.917 - addr = inst_decoder_regs->esi; 32.918 + addr = regs->esi; 32.919 } else { 32.920 dir = IOREQ_READ; 32.921 - addr = inst_decoder_regs->edi; 32.922 + addr = regs->edi; 32.923 } 32.924 } 32.925 32.926 - send_mmio_req(gpa, &mmio_inst, addr, dir, 1); 32.927 - return; 32.928 + mpcip->flags = mmio_inst.flags; 32.929 + mpcip->instr = mmio_inst.instr; 32.930 + 32.931 + /* 32.932 + * In case of a movs spanning multiple pages, we break the accesses 32.933 + * up into multiple pages (the device model works with non-continguous 32.934 + * physical guest pages). To copy just one page, we adjust %ecx and 32.935 + * do not advance %eip so that the next "rep movs" copies the next page. 32.936 + * Unaligned accesses, for example movsl starting at PGSZ-2, are 32.937 + * turned into a single copy where we handle the overlapping memory 32.938 + * copy ourself. After this copy succeeds, "rep movs" is executed 32.939 + * again. 32.940 + */ 32.941 + if ((addr & PAGE_MASK) != ((addr + size - 1) & PAGE_MASK)) { 32.942 + unsigned long value = 0; 32.943 + 32.944 + mpcip->flags |= OVERLAP; 32.945 + 32.946 + regs->eip -= inst_len; /* do not advance %eip */ 32.947 + 32.948 + if (dir == IOREQ_WRITE) 32.949 + vmx_copy(&value, addr, size, VMX_COPY_IN); 32.950 + send_mmio_req(IOREQ_TYPE_COPY, gpa, 1, size, value, dir, 0); 32.951 + } else { 32.952 + if ((addr & PAGE_MASK) != ((addr + count * size - 1) & PAGE_MASK)) { 32.953 + regs->eip -= inst_len; /* do not advance %eip */ 32.954 + 32.955 + if (sign > 0) 32.956 + count = (PAGE_SIZE - (addr & ~PAGE_MASK)) / size; 32.957 + else 32.958 + count = (addr & ~PAGE_MASK) / size; 32.959 + } 32.960 + 32.961 + send_mmio_req(IOREQ_TYPE_COPY, gpa, count, size, addr, dir, 1); 32.962 + } 32.963 + break; 32.964 } 32.965 32.966 - if (!strncmp((char *)mmio_inst.i_name, "mov", 3)) { 32.967 - long value = 0; 32.968 - int size, index; 32.969 + case INSTR_MOVZ: 32.970 + mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs); 32.971 + break; 32.972 + 32.973 + case INSTR_STOS: 32.974 + /* 32.975 + * Since the destination is always in (contiguous) mmio space we don't 32.976 + * need to break it up into pages. 32.977 + */ 32.978 + mpcip->flags = mmio_inst.flags; 32.979 + mpcip->instr = mmio_inst.instr; 32.980 + send_mmio_req(IOREQ_TYPE_COPY, gpa, 32.981 + GET_REPEAT_COUNT(), mmio_inst.op_size, regs->eax, IOREQ_WRITE, 0); 32.982 + break; 32.983 + 32.984 + case INSTR_OR: 32.985 + mmio_operands(IOREQ_TYPE_OR, gpa, &mmio_inst, mpcip, regs); 32.986 + break; 32.987 32.988 - if (read_from_mmio(&mmio_inst)) { 32.989 - // Send the request and waiting for return value. 32.990 - mpci_p->mmio_target = mmio_inst.operand[1]; 32.991 - send_mmio_req(gpa, &mmio_inst, value, IOREQ_READ, 0); 32.992 - return; 32.993 - } else { 32.994 - // Write to MMIO 32.995 - if (mmio_inst.operand[0] & IMMEDIATE) { 32.996 - value = mmio_inst.immediate; 32.997 - } else if (mmio_inst.operand[0] & REGISTER) { 32.998 - size = operand_size(mmio_inst.operand[0]); 32.999 - index = operand_index(mmio_inst.operand[0]); 32.1000 - value = get_reg_value(size, index, 0, inst_decoder_regs); 32.1001 - } else { 32.1002 - domain_crash_synchronous(); 32.1003 - } 32.1004 - send_mmio_req(gpa, &mmio_inst, value, IOREQ_WRITE, 0); 32.1005 - return; 32.1006 - } 32.1007 + case INSTR_AND: 32.1008 + mmio_operands(IOREQ_TYPE_AND, gpa, &mmio_inst, mpcip, regs); 32.1009 + break; 32.1010 + 32.1011 + case INSTR_XOR: 32.1012 + mmio_operands(IOREQ_TYPE_XOR, gpa, &mmio_inst, mpcip, regs); 32.1013 + break; 32.1014 + 32.1015 + case INSTR_CMP: 32.1016 + mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs); 32.1017 + break; 32.1018 + 32.1019 + case INSTR_TEST: 32.1020 + mmio_operands(IOREQ_TYPE_COPY, gpa, &mmio_inst, mpcip, regs); 32.1021 + break; 32.1022 + 32.1023 + default: 32.1024 + printf("Unhandled MMIO instruction\n"); 32.1025 + domain_crash_synchronous(); 32.1026 } 32.1027 - 32.1028 - if (!strncmp((char *)mmio_inst.i_name, "stos", 4)) { 32.1029 - send_mmio_req(gpa, &mmio_inst, 32.1030 - inst_decoder_regs->eax, IOREQ_WRITE, 0); 32.1031 - return; 32.1032 - } 32.1033 - /* Workaround for cmp instruction */ 32.1034 - if (!strncmp((char *)mmio_inst.i_name, "cmp", 3)) { 32.1035 - inst_decoder_regs->eflags &= ~X86_EFLAGS_ZF; 32.1036 - __vmwrite(GUEST_RFLAGS, inst_decoder_regs->eflags); 32.1037 - return; 32.1038 - } 32.1039 - 32.1040 - domain_crash_synchronous(); 32.1041 } 32.1042 32.1043 #endif /* CONFIG_VMX */
33.1 --- a/xen/common/memory.c Sat Sep 03 16:57:54 2005 +0000 33.2 +++ b/xen/common/memory.c Sat Sep 03 16:58:50 2005 +0000 33.3 @@ -25,7 +25,8 @@ increase_reservation( 33.4 unsigned long *extent_list, 33.5 unsigned int nr_extents, 33.6 unsigned int extent_order, 33.7 - unsigned int flags) 33.8 + unsigned int flags, 33.9 + int *preempted) 33.10 { 33.11 struct pfn_info *page; 33.12 unsigned long i; 33.13 @@ -43,7 +44,10 @@ increase_reservation( 33.14 for ( i = 0; i < nr_extents; i++ ) 33.15 { 33.16 if ( hypercall_preempt_check() ) 33.17 + { 33.18 + *preempted = 1; 33.19 return i; 33.20 + } 33.21 33.22 if ( unlikely((page = alloc_domheap_pages( 33.23 d, extent_order, flags)) == NULL) ) 33.24 @@ -67,7 +71,8 @@ decrease_reservation( 33.25 unsigned long *extent_list, 33.26 unsigned int nr_extents, 33.27 unsigned int extent_order, 33.28 - unsigned int flags) 33.29 + unsigned int flags, 33.30 + int *preempted) 33.31 { 33.32 struct pfn_info *page; 33.33 unsigned long i, j, mpfn; 33.34 @@ -78,7 +83,10 @@ decrease_reservation( 33.35 for ( i = 0; i < nr_extents; i++ ) 33.36 { 33.37 if ( hypercall_preempt_check() ) 33.38 + { 33.39 + *preempted = 1; 33.40 return i; 33.41 + } 33.42 33.43 if ( unlikely(__get_user(mpfn, &extent_list[i]) != 0) ) 33.44 return i; 33.45 @@ -124,7 +132,7 @@ decrease_reservation( 33.46 long do_memory_op(int cmd, void *arg) 33.47 { 33.48 struct domain *d; 33.49 - int rc, start_extent, op, flags = 0; 33.50 + int rc, start_extent, op, flags = 0, preempted = 0; 33.51 struct xen_memory_reservation reservation; 33.52 33.53 op = cmd & ((1 << START_EXTENT_SHIFT) - 1); 33.54 @@ -165,19 +173,18 @@ long do_memory_op(int cmd, void *arg) 33.55 reservation.extent_start, 33.56 reservation.nr_extents, 33.57 reservation.extent_order, 33.58 - flags); 33.59 + flags, 33.60 + &preempted); 33.61 33.62 if ( unlikely(reservation.domid != DOMID_SELF) ) 33.63 put_domain(d); 33.64 33.65 rc += start_extent; 33.66 33.67 - if ( (rc != reservation.nr_extents) && hypercall_preempt_check() ) 33.68 + if ( preempted ) 33.69 return hypercall2_create_continuation( 33.70 - __HYPERVISOR_memory_op, 33.71 - op | (rc << START_EXTENT_SHIFT), 33.72 - arg); 33.73 - 33.74 + __HYPERVISOR_memory_op, op | (rc << START_EXTENT_SHIFT), arg); 33.75 + 33.76 break; 33.77 33.78 case XENMEM_maximum_ram_page:
34.1 --- a/xen/include/asm-x86/vmx.h Sat Sep 03 16:57:54 2005 +0000 34.2 +++ b/xen/include/asm-x86/vmx.h Sat Sep 03 16:58:50 2005 +0000 34.3 @@ -471,4 +471,7 @@ static inline int iopacket_port(struct d 34.4 void load_cpu_user_regs(struct cpu_user_regs *regs); 34.5 void store_cpu_user_regs(struct cpu_user_regs *regs); 34.6 34.7 +enum { VMX_COPY_IN = 0, VMX_COPY_OUT }; 34.8 +int vmx_copy(void *buf, unsigned long laddr, int size, int dir); 34.9 + 34.10 #endif /* __ASM_X86_VMX_H__ */
35.1 --- a/xen/include/asm-x86/vmx_platform.h Sat Sep 03 16:57:54 2005 +0000 35.2 +++ b/xen/include/asm-x86/vmx_platform.h Sat Sep 03 16:58:50 2005 +0000 35.3 @@ -24,8 +24,7 @@ 35.4 #include <asm/vmx_virpit.h> 35.5 #include <asm/vmx_intercept.h> 35.6 35.7 -#define MAX_OPERAND_NUM 3 35.8 -#define I_NAME_LEN 16 35.9 +#define MAX_OPERAND_NUM 2 35.10 35.11 #define mk_operand(size, index, seg, flag) \ 35.12 (((size) << 24) | ((index) << 16) | ((seg) << 8) | (flag)) 35.13 @@ -35,54 +34,60 @@ 35.14 35.15 #define operand_index(operand) \ 35.16 ((operand >> 16) & 0xFF) 35.17 - //For instruction.operand[].size 35.18 + 35.19 +/* for instruction.operand[].size */ 35.20 #define BYTE 1 35.21 #define WORD 2 35.22 #define LONG 4 35.23 #define QUAD 8 35.24 #define BYTE_64 16 35.25 35.26 - //For instruction.operand[].flag 35.27 +/* for instruction.operand[].flag */ 35.28 #define REGISTER 0x1 35.29 #define MEMORY 0x2 35.30 #define IMMEDIATE 0x4 35.31 -#define WZEROEXTEND 0x8 35.32 35.33 - //For instruction.flags 35.34 +/* for instruction.flags */ 35.35 #define REPZ 0x1 35.36 #define REPNZ 0x2 35.37 +#define OVERLAP 0x4 35.38 + 35.39 +#define INSTR_PIO 1 35.40 +#define INSTR_OR 2 35.41 +#define INSTR_AND 3 35.42 +#define INSTR_XOR 4 35.43 +#define INSTR_CMP 5 35.44 +#define INSTR_MOV 6 35.45 +#define INSTR_MOVS 7 35.46 +#define INSTR_MOVZ 8 35.47 +#define INSTR_STOS 9 35.48 +#define INSTR_TEST 10 35.49 35.50 struct instruction { 35.51 - __s8 i_name[I_NAME_LEN]; //Instruction's name 35.52 - __s16 op_size; //The operand's bit size, e.g. 16-bit or 32-bit. 35.53 - 35.54 - __u64 offset; //The effective address 35.55 - //offset = Base + (Index * Scale) + Displacement 35.56 - 35.57 + __s8 instr; /* instruction type */ 35.58 + __s16 op_size; /* the operand's bit size, e.g. 16-bit or 32-bit */ 35.59 __u64 immediate; 35.60 - 35.61 - __u16 seg_sel; //Segmentation selector 35.62 - 35.63 - __u32 operand[MAX_OPERAND_NUM]; //The order of operand is from AT&T Assembly 35.64 - __s16 op_num; //The operand numbers 35.65 - 35.66 - __u32 flags; // 35.67 + __u16 seg_sel; /* segmentation selector */ 35.68 + __u32 operand[MAX_OPERAND_NUM]; /* order is AT&T assembly */ 35.69 + __u32 flags; 35.70 }; 35.71 35.72 #define MAX_INST_LEN 32 35.73 35.74 -struct mi_per_cpu_info 35.75 -{ 35.76 - unsigned long mmio_target; 35.77 - struct cpu_user_regs *inst_decoder_regs; 35.78 +struct mi_per_cpu_info { 35.79 + int flags; 35.80 + int instr; /* instruction */ 35.81 + unsigned long operand[2]; /* operands */ 35.82 + unsigned long immediate; /* immediate portion */ 35.83 + struct cpu_user_regs *inst_decoder_regs; /* current context */ 35.84 }; 35.85 35.86 struct virtual_platform_def { 35.87 - unsigned long *real_mode_data; /* E820, etc. */ 35.88 + unsigned long *real_mode_data; /* E820, etc. */ 35.89 unsigned long shared_page_va; 35.90 struct vmx_virpit_t vmx_pit; 35.91 struct vmx_handler_t vmx_handler; 35.92 - struct mi_per_cpu_info mpci; /* MMIO */ 35.93 + struct mi_per_cpu_info mpci; /* MMIO */ 35.94 }; 35.95 35.96 extern void handle_mmio(unsigned long, unsigned long);
36.1 --- a/xen/include/public/io/ioreq.h Sat Sep 03 16:57:54 2005 +0000 36.2 +++ b/xen/include/public/io/ioreq.h Sat Sep 03 16:58:50 2005 +0000 36.3 @@ -29,9 +29,17 @@ 36.4 #define STATE_IORESP_READY 3 36.5 #define STATE_IORESP_HOOK 4 36.6 36.7 -/* VMExit dispatcher should cooperate with instruction decoder to 36.8 - prepare this structure and notify service OS and DM by sending 36.9 - virq */ 36.10 +#define IOREQ_TYPE_PIO 0 /* pio */ 36.11 +#define IOREQ_TYPE_COPY 1 /* mmio ops */ 36.12 +#define IOREQ_TYPE_AND 2 36.13 +#define IOREQ_TYPE_OR 3 36.14 +#define IOREQ_TYPE_XOR 4 36.15 + 36.16 +/* 36.17 + * VMExit dispatcher should cooperate with instruction decoder to 36.18 + * prepare this structure and notify service OS and DM by sending 36.19 + * virq 36.20 + */ 36.21 typedef struct { 36.22 u64 addr; /* physical address */ 36.23 u64 size; /* size in bytes */ 36.24 @@ -43,8 +51,8 @@ typedef struct { 36.25 u8 state:4; 36.26 u8 pdata_valid:1; /* if 1, use pdata above */ 36.27 u8 dir:1; /* 1=read, 0=write */ 36.28 - u8 port_mm:1; /* 0=portio, 1=mmio */ 36.29 u8 df:1; 36.30 + u8 type; /* I/O type */ 36.31 } ioreq_t; 36.32 36.33 #define MAX_VECTOR 256