debuggers.hg
changeset 20373:809b20f066fb
Refresh to c/s 21046. Also, update kdb as follows:
- update kdb_cmds for xen 4.0
- check if xen addr in xen text during bp so bp is not set in 64bit dom0
- don't check for guest_mode() in f_cur
- support more than 64 cpus to whatever NR_CPUS is
- update kdb_cmds for xen 4.0
- check if xen addr in xen text during bp so bp is not set in 64bit dom0
- don't check for guest_mode() in f_cur
- support more than 64 cpus to whatever NR_CPUS is
line diff
1.1 --- a/.hgignore Mon Nov 02 19:35:54 2009 -0800 1.2 +++ b/.hgignore Fri Mar 19 18:36:57 2010 -0700 1.3 @@ -137,7 +137,8 @@ 1.4 ^tools/firmware/etherboot/gpxe/.*$ 1.5 ^tools/firmware/extboot/extboot.img$ 1.6 ^tools/firmware/extboot/signrom$ 1.7 -^tools/firmware/hvmloader/acpi/acpigen$ 1.8 +^tools/firmware/hvmloader/acpi/dsdt.*\.c$ 1.9 +^tools/firmware/hvmloader/acpi/ssdt_.*\.h$ 1.10 ^tools/firmware/hvmloader/hvmloader$ 1.11 ^tools/firmware/hvmloader/roms\.h$ 1.12 ^tools/firmware/rombios/BIOS-bochs-[^/]*$ 1.13 @@ -147,6 +148,9 @@ 1.14 ^tools/firmware/vgabios/vbetables-gen$ 1.15 ^tools/firmware/vgabios/vbetables\.h$ 1.16 ^tools/flask/loadpolicy/flask-loadpolicy$ 1.17 +^tools/flask/utils/flask-getenforce$ 1.18 +^tools/flask/utils/flask-loadpolicy$ 1.19 +^tools/flask/utils/flask-setenforce$ 1.20 ^tools/fs-back/fs-backend$ 1.21 ^tools/hotplug/common/hotplugpath\.sh$ 1.22 ^tools/include/xen/.*$ 1.23 @@ -176,6 +180,8 @@ 1.24 ^tools/libxen/libxenapi- 1.25 ^tools/libxen/test/test_bindings$ 1.26 ^tools/libxen/test/test_event_handling$ 1.27 +^tools/libxl/libxlu_cfg_y\.output$ 1.28 +^tools/libxl/xl$ 1.29 ^tools/libaio/src/.*\.ol$ 1.30 ^tools/libaio/src/.*\.os$ 1.31 ^tools/misc/cpuperf/cpuperf-perfcntr$ 1.32 @@ -196,9 +202,12 @@ 1.33 ^tools/misc/xen-hvmctx$ 1.34 ^tools/misc/gtraceview$ 1.35 ^tools/misc/gtracestat$ 1.36 +^tools/misc/xenlockprof$ 1.37 ^tools/pygrub/build/.*$ 1.38 ^tools/python/build/.*$ 1.39 ^tools/python/xen/util/path\.py$ 1.40 +^tools/remus/imqebt/imqebt$ 1.41 +^tools/remus/kmod/.*(\.cmd|\.mod|\.ko|\.mod\.c|\.symvers|\.xen)$ 1.42 ^tools/security/secpol_tool$ 1.43 ^tools/security/xen/.*$ 1.44 ^tools/security/xensec_tool$ 1.45 @@ -230,6 +239,7 @@ 1.46 ^tools/xenfb/vncfb$ 1.47 ^tools/xenmon/xentrace_setmask$ 1.48 ^tools/xenmon/xenbaked$ 1.49 +^tools/xenpaging/xenpaging$ 1.50 ^tools/xenpmd/xenpmd$ 1.51 ^tools/xenstat/xentop/xentop$ 1.52 ^tools/xenstore/testsuite/tmp/.*$
2.1 --- a/.hgtags Mon Nov 02 19:35:54 2009 -0800 2.2 +++ b/.hgtags Fri Mar 19 18:36:57 2010 -0700 2.3 @@ -36,3 +36,9 @@ 1e99ba54035623731bc7318a8357aa6a118c5da1 2.4 d611d9ac6d0271b53eb1d4e5d0c4ef20b269eea8 3.4.0-rc1 2.5 087854cf3ed9e30ce6bcf7499c9675b759e1e9e7 3.4.0-rc2 2.6 22e01301ff64c6f9f835f604523ac019f0f5e993 3.4.0-rc3 2.7 +67b5ad8ae87e64159807374ad66d5b5b2fb2ca1f 4.0.0-rc1 2.8 +e5e4573bcaba68a8b93a35768c825c1e8daa23be 4.0.0-rc2 2.9 +7d565d58f49859a2161f0e74844773d3b3312634 4.0.0-rc3 2.10 +912295f1b1f30307975c7514569f6f9c8faae4a7 4.0.0-rc4 2.11 +92f2ee87e5018073edc08734b425bc60bcd80bcd 4.0.0-rc5 2.12 +b4a1832a916f1e8f2aa2ad5b1efa155f9dd0cb4f 4.0.0-rc6
3.1 --- a/Config.mk Mon Nov 02 19:35:54 2009 -0800 3.2 +++ b/Config.mk Fri Mar 19 18:36:57 2010 -0700 3.3 @@ -1,7 +1,7 @@ 3.4 # -*- mode: Makefile; -*- 3.5 3.6 # A debug build of Xen and tools? 3.7 -debug ?= y 3.8 +debug ?= n 3.9 3.10 XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \ 3.11 -e s/i86pc/x86_32/ -e s/amd64/x86_64/) 3.12 @@ -35,6 +35,9 @@ EXTRA_INCLUDES += $(EXTRA_PREFIX)/includ 3.13 EXTRA_LIB += $(EXTRA_PREFIX)/$(LIBLEAFDIR) 3.14 endif 3.15 3.16 +BISON ?= bison 3.17 +FLEX ?= flex 3.18 + 3.19 PYTHON ?= python 3.20 PYTHON_PREFIX_ARG ?= --prefix="$(PREFIX)" 3.21 # The above requires that PREFIX contains *no spaces*. This variable is here 3.22 @@ -153,9 +156,10 @@ QEMU_REMOTE=http://xenbits.xensource.com 3.23 # CONFIG_QEMU ?= ../qemu-xen.git 3.24 CONFIG_QEMU ?= $(QEMU_REMOTE) 3.25 3.26 -QEMU_TAG ?= 71324566f3b95bb88105659439adaef1d5bd155c 3.27 -# Tue Oct 13 18:16:42 2009 +0100 3.28 -# passthrough: support passthrough in stubdoms 3.29 +QEMU_TAG := xen-4.0.0-rc6 3.30 +#QEMU_TAG ?= e5d14857cd67490bf956d97c8888c0be95ed3f78 3.31 +# Thu Feb 18 15:36:29 2010 +0000 3.32 +# When xen_platform_pci=0 also disable fixed Xen platform ioports 3.33 3.34 OCAML_XENSTORED_REPO=http://xenbits.xensource.com/ext/xen-ocaml-tools.hg 3.35
4.1 --- a/README Mon Nov 02 19:35:54 2009 -0800 4.2 +++ b/README Fri Mar 19 18:36:57 2010 -0700 4.3 @@ -1,10 +1,10 @@ 4.4 ################################# 4.5 - __ __ _____ _ _ 4.6 - \ \/ /___ _ __ |___ /| || | 4.7 - \ // _ \ '_ \ |_ \| || |_ 4.8 - / \ __/ | | | ___) |__ _| 4.9 - /_/\_\___|_| |_| |____(_) |_| 4.10 - 4.11 + __ __ _ _ ___ 4.12 + \ \/ /___ _ __ | || | / _ \ 4.13 + \ // _ \ '_ \ | || |_| | | | 4.14 + / \ __/ | | | |__ _| |_| | 4.15 + /_/\_\___|_| |_| |_|(_)___/ 4.16 + 4.17 ################################# 4.18 4.19 http://www.xen.org/ 4.20 @@ -17,14 +17,12 @@ Systems Research Group of the University 4.21 Laboratory, as part of the UK-EPSRC funded XenoServers project. Xen 4.22 is freely-distributable Open Source software, released under the GNU 4.23 GPL. Since its initial public release, Xen has grown a large 4.24 -development community, spearheaded by XenSource Inc, a company created 4.25 -by the original Xen development team to build enterprise products 4.26 -around Xen. 4.27 +development community, spearheaded by xen.org (http://www.xen.org). 4.28 4.29 -The 3.4 release offers excellent performance, hardware support and 4.30 +The 4.0 release offers excellent performance, hardware support and 4.31 enterprise-grade features such as x86_32-PAE, x86_64, SMP guests and 4.32 -live relocation of VMs. Ports to Linux 2.6, Linux 2.4, NetBSD, FreeBSD 4.33 -and Solaris are available from the community. 4.34 +live relocation of VMs. Ports to Linux, NetBSD, FreeBSD and Solaris 4.35 +are available from the community. 4.36 4.37 This file contains some quick-start instructions to install Xen on 4.38 your system. For full documentation, see the Xen User Manual. If this 4.39 @@ -33,53 +31,8 @@ is a pre-built release then you can find 4.40 If you have a source release, then 'make -C docs' will build the 4.41 manual at docs/pdf/user.pdf. 4.42 4.43 -Quick-Start Guide - Pre-Built Binary Release 4.44 -============================================ 4.45 - 4.46 -[NB. Unless noted otherwise, all the following steps should be 4.47 -performed with root privileges.] 4.48 - 4.49 -1. Install the binary distribution onto your filesystem: 4.50 - 4.51 - # sh ./install.sh 4.52 - 4.53 - Among other things, this will install Xen and Xen-ready Linux 4.54 - kernel files in /boot, kernel modules and Python packages in /lib, 4.55 - and various control tools in standard 'bin' directories. 4.56 - 4.57 -2. Configure your bootloader to boot Xen and an initial Linux virtual 4.58 - machine. Note that Xen currently only works with GRUB and pxelinux 4.59 - derived boot loaders: less common alternatives such as LILO are 4.60 - *not* supported. You can most likely find your GRUB menu file at 4.61 - /boot/grub/menu.lst: edit this file to include an entry like the 4.62 - following: 4.63 - 4.64 - title Xen 3.4 / XenLinux 2.6 4.65 - kernel /boot/xen-3.4.gz console=vga 4.66 - module /boot/vmlinuz-2.6-xen root=<root-dev> ro console=tty0 4.67 - module /boot/initrd-2.6-xen.img 4.68 - 4.69 - NB: Not all kernel configs need an initial ram disk (initrd), but 4.70 - if you do specify one you'll need to use the 'module' grub directive 4.71 - rather than 'initrd'. 4.72 - 4.73 - The linux command line takes all the usual options, such as 4.74 - root=<root-dev> to specify your usual root partition (e.g., 4.75 - /dev/hda1). 4.76 - 4.77 - The Xen command line takes a number of optional arguments described 4.78 - in the manual. The most common is 'dom0_mem=xxxM' which sets the 4.79 - amount of memory to allocate for use by your initial virtual 4.80 - machine (known as domain 0). Note that Xen itself reserves about 4.81 - 32MB memory for internal use, which is not available for allocation 4.82 - to virtual machines. 4.83 - 4.84 -3. Reboot your system and select the "Xen 3.4 / XenLinux 2.6" menu 4.85 - option. After booting Xen, Linux will start and your initialisation 4.86 - scripts should execute in the usual way. 4.87 - 4.88 -Quick-Start Guide - Source Release 4.89 -================================== 4.90 +Quick-Start Guide 4.91 +================= 4.92 4.93 First, there are a number of prerequisites for building a Xen source 4.94 release. Make sure you have all the following installed, either by 4.95 @@ -96,6 +49,7 @@ provided by your Linux distributor: 4.96 * bridge-utils package (/sbin/brctl) 4.97 * iproute package (/sbin/ip) 4.98 * hotplug or udev 4.99 + * GNU bison and GNU flex 4.100 4.101 [NB. Unless noted otherwise, all the following steps should be 4.102 performed with root privileges.]
5.1 --- a/buildconfigs/mk.linux-2.6-pvops Mon Nov 02 19:35:54 2009 -0800 5.2 +++ b/buildconfigs/mk.linux-2.6-pvops Fri Mar 19 18:36:57 2010 -0700 5.3 @@ -5,7 +5,11 @@ IMAGE_TARGET ?= bzImage 5.4 5.5 XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config 5.6 5.7 +ifeq ($(GIT_HTTP),y) 5.8 XEN_LINUX_GIT_URL ?= http://www.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git 5.9 +else 5.10 +XEN_LINUX_GIT_URL ?= git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git 5.11 +endif 5.12 XEN_LINUX_GIT_REMOTEBRANCH ?= xen/master 5.13 5.14 EXTRAVERSION ?=
6.1 --- a/buildconfigs/mk.linux-2.6-tip-latest Mon Nov 02 19:35:54 2009 -0800 6.2 +++ b/buildconfigs/mk.linux-2.6-tip-latest Fri Mar 19 18:36:57 2010 -0700 6.3 @@ -5,7 +5,11 @@ IMAGE_TARGET ?= bzImage vmlinux 6.4 6.5 XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config 6.6 6.7 +ifeq ($(GIT_HTTP),y) 6.8 XEN_LINUX_GIT_URL ?= http://www.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-tip.git 6.9 +else 6.10 +XEN_LINUX_GIT_URL ?= git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-tip.git 6.11 +endif 6.12 XEN_LINUX_GIT_REMOTEBRANCH ?= auto-latest 6.13 6.14 EXTRAVERSION ?=
7.1 --- a/config/NetBSD.mk Mon Nov 02 19:35:54 2009 -0800 7.2 +++ b/config/NetBSD.mk Fri Mar 19 18:36:57 2010 -0700 7.3 @@ -6,3 +6,5 @@ CURSES_LIBS = -lcurses 7.4 LIBLEAFDIR_x86_64 = lib 7.5 LIBEXEC = $(PREFIX)/libexec 7.6 PRIVATE_BINDIR = $(BINDIR) 7.7 + 7.8 +WGET = ftp
8.1 --- a/config/StdGNU.mk Mon Nov 02 19:35:54 2009 -0800 8.2 +++ b/config/StdGNU.mk Fri Mar 19 18:36:57 2010 -0700 8.3 @@ -16,6 +16,8 @@ MSGMERGE = msgmerge 8.4 # Allow git to be wrappered in the environment 8.5 GIT ?= git 8.6 8.7 +WGET ?= wget -c 8.8 + 8.9 INSTALL = install 8.10 INSTALL_DIR = $(INSTALL) -d -m0755 -p 8.11 INSTALL_DATA = $(INSTALL) -m0644 -p
9.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 9.2 +++ b/docs/misc/cpuid-config-for-guest.txt Fri Mar 19 18:36:57 2010 -0700 9.3 @@ -0,0 +1,23 @@ 9.4 +CPUID emulation for guest 9.5 +------------------------- 9.6 + 9.7 +When HVM guest tries to execute CPUID, or PV guest tries to execute XEN_CPUID, 9.8 +the xen hypervior traps and emultes them. 9.9 + 9.10 +For HVM guest and PV DomU guest, xen's CPUID emulation can be adjusted using 9.11 +the guest configation file if necessary (e.g., to supply better support for 9.12 +guest live migration). The CPUID syntax in guest configration file is 9.13 +described in detail in the examples like /etc/xen/xmexample.hvm, 9.14 +/etc/xen/xmexample.hvm-stubdom. 9.15 + 9.16 +However, a user (or an administrator) must be aware that the CPUID in guest 9.17 +configuration file can NOT be configured casually. The default CPUID 9.18 +configuration should be safe, but illegal configuration can cause unexpected 9.19 +behaviors of guest -- even can crash guest. 9.20 + 9.21 +For example, we should not expose the MONITOR CPUID feature flag (ECX bit 3; 9.22 +CPUID executed EAX = 1) to HVM guest, otherwise, on guest's attempt of 9.23 +executing MWAIT, the VMExit handler in Xen would inject #UD (Invalid Opcode 9.24 +Exception) into the HVM guest, and guest kernel would panic. 9.25 + 9.26 +/* We can add more unsafe CPUID configuration here in future. */
10.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 10.2 +++ b/docs/misc/pvrdtscp.c Fri Mar 19 18:36:57 2010 -0700 10.3 @@ -0,0 +1,307 @@ 10.4 +/* pvrdtscp algorithm 10.5 + * 10.6 + * This sample code demonstrates the use of the paravirtualized rdtscp 10.7 + * algorithm. Using this algorithm, an application may communicate with 10.8 + * the Xen hypervisor (version 4.0+) to obtain timestamp information which 10.9 + * is both monotonically increasing and has a fixed 1 GHz rate, even across 10.10 + * migrations between machines with different TSC rates and offsets. 10.11 + * Further,the algorithm provides performance near the performance of a 10.12 + * native rdtsc/rdtscp instruction -- much faster than emulation PROVIDED 10.13 + * the application is running on a machine on which the rdtscp instruction 10.14 + * is supported and TSC is "safe". The application must also be running in a 10.15 + * PV domain. (HVM domains may be supported at a later time.) On machines 10.16 + * where TSC is unsafe or the rdtscp instruction is not supported, Xen 10.17 + * (v4.0+) provides emulation which is slower but consistent with the pvrdtscp 10.18 + * algorithm, thus providing support for the algorithm for live migration 10.19 + * across all machines. 10.20 + * 10.21 + * More information can be found within the Xen (4.0+) source tree at 10.22 + * docs/misc/tscmode.txt 10.23 + * 10.24 + * Copyright (c) 2009 Oracle Corporation and/or its affiliates. 10.25 + * All rights reserved 10.26 + * Written by: Dan Magenheimer <dan.magenheimer@oracle.com> 10.27 + * 10.28 + * This code is derived from code licensed under the GNU 10.29 + * General Public License ("GPL") version 2 and is therefore itself 10.30 + * also licensed under the GPL version 2. 10.31 + * 10.32 + * This code is known to compile and run on Oracle Enterprise Linux 5 Update 2 10.33 + * using gcc version 4.1.2, but its purpose is to describe the pvrdtscp 10.34 + * algorithm and its ABI to Xen version 4.0+ 10.35 + */ 10.36 + 10.37 +#include <stdio.h> 10.38 +#include <stdlib.h> 10.39 +#include <string.h> 10.40 +#include <sys/wait.h> 10.41 + 10.42 +#ifdef __LP64__ 10.43 +#define __X86_64__ 10.44 +typedef unsigned short u16; 10.45 +typedef unsigned int u32; 10.46 +typedef unsigned long u64; 10.47 +typedef int i32; 10.48 +typedef long i64; 10.49 +#define NSEC_PER_SEC 1000000000 10.50 +#else 10.51 +#define __X86_32__ 10.52 +typedef unsigned int u16; 10.53 +typedef unsigned long u32; 10.54 +typedef unsigned long long u64; 10.55 +typedef long i32; 10.56 +typedef long long i64; 10.57 +#define NSEC_PER_SEC 1000000000L 10.58 +#endif 10.59 + 10.60 +static inline void hvm_cpuid(u32 idx, u32 sub, 10.61 + u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) 10.62 +{ 10.63 + *eax = idx, *ecx = sub; 10.64 + asm("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) 10.65 + : "0" (*eax), "2" (*ecx)); 10.66 +} 10.67 + 10.68 +static inline void pv_cpuid(u32 idx, u32 sub, 10.69 + u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) 10.70 +{ 10.71 + *eax = idx, *ecx = sub; 10.72 + asm volatile ( "ud2a ; .ascii \"xen\"; cpuid" : "=a" (*eax), 10.73 + "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (*eax), "2" (*ecx)); 10.74 +} 10.75 + 10.76 +static inline u64 do_rdtscp(u32 *aux) 10.77 +{ 10.78 +static u64 last = 0; 10.79 + u32 lo32, hi32; 10.80 + u64 val; 10.81 + 10.82 + asm volatile(".byte 0x0f,0x01,0xf9":"=a"(lo32),"=d"(hi32),"=c" (*aux)); 10.83 + val = lo32 | ((u64)hi32 << 32); 10.84 + return val; 10.85 +} 10.86 + 10.87 +static inline int get_xen_tsc_mode(void) 10.88 +{ 10.89 + u32 val, dummy1, dummy2, dummy3; 10.90 + pv_cpuid(0x40000003,0,&dummy1,&val,&dummy2,&dummy3); 10.91 + return val; 10.92 +} 10.93 + 10.94 +static inline int get_xen_vtsc(void) 10.95 +{ 10.96 + u32 val, dummy1, dummy2, dummy3; 10.97 + pv_cpuid(0x40000003,0,&val,&dummy1,&dummy2,&dummy3); 10.98 + return val & 1; 10.99 +} 10.100 + 10.101 +static inline int get_xen_vtsc_khz(void) 10.102 +{ 10.103 + u32 val, dummy1, dummy2, dummy3; 10.104 + pv_cpuid(0x40000003,0,&dummy1,&dummy2,&val,&dummy3); 10.105 + return val; 10.106 +} 10.107 + 10.108 +static inline u32 get_xen_cpu_khz(void) 10.109 +{ 10.110 + u32 cpu_khz, dummy1, dummy2, dummy3; 10.111 + pv_cpuid(0x40000003,2,&cpu_khz,&dummy1,&dummy2,&dummy3); 10.112 + return cpu_khz; 10.113 +} 10.114 + 10.115 +static inline u32 get_xen_incarnation(void) 10.116 +{ 10.117 + u32 incarn, dummy1, dummy2, dummy3; 10.118 + pv_cpuid(0x40000003,0,&dummy1,&dummy2,&dummy3,&incarn); 10.119 + return incarn; 10.120 +} 10.121 + 10.122 +static inline void get_xen_time_values(u64 *offset, u32 *mul_frac, u32 *shift) 10.123 +{ 10.124 + u32 off_lo, off_hi, sys_lo, sys_hi, dummy; 10.125 + 10.126 + pv_cpuid(0x40000003,1,&off_lo,&off_hi,mul_frac,shift); 10.127 + *offset = off_lo | ((u64)off_hi << 32); 10.128 +} 10.129 + 10.130 +static inline u64 scale_delta(u64 delta, u32 tsc_mul_frac, i32 tsc_shift) 10.131 +{ 10.132 + u64 product; 10.133 +#ifdef __X86_32__ 10.134 + u32 tmp1, tmp2; 10.135 +#endif 10.136 + 10.137 + if ( tsc_shift < 0 ) 10.138 + delta >>= -tsc_shift; 10.139 + else 10.140 + delta <<= tsc_shift; 10.141 + 10.142 +#ifdef __X86_32__ 10.143 + asm ( 10.144 + "mul %5 ; " 10.145 + "mov %4,%%eax ; " 10.146 + "mov %%edx,%4 ; " 10.147 + "mul %5 ; " 10.148 + "xor %5,%5 ; " 10.149 + "add %4,%%eax ; " 10.150 + "adc %5,%%edx ; " 10.151 + : "=A" (product), "=r" (tmp1), "=r" (tmp2) 10.152 + : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (tsc_mul_frac) ); 10.153 +#else 10.154 + asm ( 10.155 + "mul %%rdx ; shrd $32,%%rdx,%%rax" 10.156 + : "=a" (product) : "0" (delta), "d" ((u64)tsc_mul_frac) ); 10.157 +#endif 10.158 + 10.159 + return product; 10.160 +} 10.161 + 10.162 +static inline u64 get_pvrdtscp_timestamp(int *discontinuity) 10.163 +{ 10.164 + static int firsttime = 1; 10.165 + static u64 last_pvrdtscp_timestamp = 0; 10.166 + static u32 last_tsc_aux; 10.167 + static u64 xen_ns_offset; 10.168 + static u32 xen_tsc_to_ns_mul_frac, xen_tsc_to_ns_shift; 10.169 + u32 this_tsc_aux; 10.170 + u64 timestamp, cur_tsc, cur_ns; 10.171 + 10.172 + if (firsttime) { 10.173 + cur_tsc = do_rdtscp(&last_tsc_aux); 10.174 + get_xen_time_values(&xen_ns_offset, &xen_tsc_to_ns_mul_frac, 10.175 + &xen_tsc_to_ns_shift); 10.176 + cur_ns = scale_delta(cur_tsc, xen_tsc_to_ns_mul_frac, 10.177 + xen_tsc_to_ns_shift); 10.178 + timestamp = cur_ns - xen_ns_offset; 10.179 + last_pvrdtscp_timestamp = timestamp; 10.180 + firsttime = 0; 10.181 + } 10.182 + cur_tsc = do_rdtscp(&this_tsc_aux); 10.183 + *discontinuity = 0; 10.184 + while (this_tsc_aux != last_tsc_aux) { 10.185 + /* if tsc_aux changed, try again */ 10.186 + last_tsc_aux = this_tsc_aux; 10.187 + get_xen_time_values(&xen_ns_offset, &xen_tsc_to_ns_mul_frac, 10.188 + &xen_tsc_to_ns_shift); 10.189 + cur_tsc = do_rdtscp(&this_tsc_aux); 10.190 + *discontinuity = 1; 10.191 + } 10.192 + 10.193 + /* compute nsec from TSC and Xen time values */ 10.194 + cur_ns = scale_delta(cur_tsc, xen_tsc_to_ns_mul_frac, 10.195 + xen_tsc_to_ns_shift); 10.196 + timestamp = cur_ns - xen_ns_offset; 10.197 + 10.198 + /* enforce monotonicity just in case */ 10.199 + if ((i64)(timestamp - last_pvrdtscp_timestamp) > 0) 10.200 + last_pvrdtscp_timestamp = timestamp; 10.201 + else { 10.202 + /* this should never happen but we'll check it anyway in 10.203 + * case of some strange combination of scaling errors 10.204 + * occurs across a very fast migration */ 10.205 + printf("Time went backwards by %lluns\n", 10.206 + (unsigned long long)(last_pvrdtscp_timestamp-timestamp)); 10.207 + timestamp = ++last_pvrdtscp_timestamp; 10.208 + } 10.209 + return timestamp; 10.210 +} 10.211 + 10.212 +#define HVM 1 10.213 +#define PVM 0 10.214 + 10.215 +static int running_on_xen(int hvm, u16 *version_major, u16 *version_minor) 10.216 +{ 10.217 + u32 eax, ebx, ecx, edx, base; 10.218 + union { char csig[16]; u32 u[4]; } sig; 10.219 + 10.220 + for (base=0x40000000; base < 0x40010000; base += 0x100) { 10.221 + if (hvm==HVM) 10.222 + hvm_cpuid(base,0,&eax,&ebx,&ecx,&edx); 10.223 + else 10.224 + pv_cpuid(base,0,&eax,&ebx,&ecx,&edx); 10.225 + sig.u[0] = ebx; sig.u[1] = ecx; sig.u[2] = edx; 10.226 + sig.csig[12] = '\0'; 10.227 + if (!strcmp("XenVMMXenVMM",&sig.csig[0]) && (eax >= (base+2))) { 10.228 + if (hvm==HVM) 10.229 + hvm_cpuid(base+1,0,&eax,&ebx,&ecx,&edx); 10.230 + else 10.231 + pv_cpuid(base+1,0,&eax,&ebx,&ecx,&edx); 10.232 + *version_major = (eax >> 16) & 0xffff; 10.233 + *version_minor = eax & 0xffff; 10.234 + return 1; 10.235 + } 10.236 + } 10.237 + return 0; 10.238 +} 10.239 + 10.240 +main(int ac, char **av) 10.241 +{ 10.242 + u32 dummy; 10.243 + u16 version_hi, version_lo; 10.244 + u64 ts, last_ts; 10.245 + int status, discontinuity = 0; 10.246 + pid_t pid; 10.247 + 10.248 + if (running_on_xen(HVM,&version_hi,&version_lo)) { 10.249 + printf("running on Xen v%d.%d as an HVM domain, " 10.250 + "pvrdtsc not supported, exiting\n", 10.251 + (int)version_hi, (int)version_lo); 10.252 + exit(0); 10.253 + } 10.254 + pid = fork(); 10.255 + if (pid == -1) { 10.256 + fprintf(stderr,"Huh? Fork failed\n"); 10.257 + return 0; 10.258 + } 10.259 + else if (pid == 0) { /* child */ 10.260 + pv_cpuid(0x40000000,0,&dummy,&dummy,&dummy,&dummy); 10.261 + exit(0); 10.262 + } 10.263 + waitpid(pid,&status,0); 10.264 + if (!WIFEXITED(status)) 10.265 + exit(0); 10.266 + if (!running_on_xen(PVM,&version_hi,&version_lo)) { 10.267 + printf("not running on Xen, exiting\n"); 10.268 + exit(0); 10.269 + } 10.270 + printf("running on Xen v%d.%d as a PV domain\n", 10.271 + (int)version_hi, (int)version_lo); 10.272 + if ( version_hi <= 3 ) { 10.273 + printf("pvrdtscp requires Xen version 4.0 or greater\n"); 10.274 + /* exit(0); FIXME after xen-unstable is officially v4.0 */ 10.275 + } 10.276 + if ( get_xen_tsc_mode() != 3 ) 10.277 + printf("tsc_mode not pvrdtscp, set tsc_mode=3, exiting\n"); 10.278 + 10.279 + /* OK, we are on Xen, now loop forever checking timestamps */ 10.280 + ts = get_pvrdtscp_timestamp(&discontinuity); 10.281 + printf("Starting with ts=%lluns 0x%llx (%llusec)\n",ts,ts,ts/NSEC_PER_SEC); 10.282 + printf("incarn=%d: vtsc=%d, vtsc_khz=%lu, phys cpu_khz=%lu\n", 10.283 + (unsigned long)get_xen_incarnation(), 10.284 + (unsigned long)get_xen_vtsc(), 10.285 + (unsigned long)get_xen_vtsc_khz(), 10.286 + (unsigned long)get_xen_cpu_khz()); 10.287 + ts = get_pvrdtscp_timestamp(&discontinuity); 10.288 + last_ts = ts; 10.289 + while (1) { 10.290 + ts = get_pvrdtscp_timestamp(&discontinuity); 10.291 + if (discontinuity) 10.292 + printf("migrated/restored, incarn=%d: " 10.293 + "vtsc now %d, vtsc_khz=%lu, phys cpu_khz=%lu\n", 10.294 + (unsigned long)get_xen_incarnation(), 10.295 + (unsigned long)get_xen_vtsc(), 10.296 + (unsigned long)get_xen_vtsc_khz(), 10.297 + (unsigned long)get_xen_cpu_khz()); 10.298 + if (ts < last_ts) 10.299 + /* this should NEVER happen, especially since there 10.300 + * is a check for it in get_pvrdtscp_timestamp() */ 10.301 + printf("Time went backwards: %lluns (%llusec)\n", 10.302 + last_ts-ts,(last_ts-ts)/NSEC_PER_SEC); 10.303 + if (ts > last_ts + 200000000LL) 10.304 + /* this is OK, usually about 2sec for save/restore 10.305 + * and a fraction of a second for live migrate */ 10.306 + printf("Time jumped forward %lluns (%llusec)\n", 10.307 + ts-last_ts,(ts-last_ts)/NSEC_PER_SEC); 10.308 + last_ts = ts; 10.309 + } 10.310 +}
11.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 11.2 +++ b/docs/misc/tscmode.txt Fri Mar 19 18:36:57 2010 -0700 11.3 @@ -0,0 +1,299 @@ 11.4 +TSC_MODE HOW-TO 11.5 +by: Dan Magenheimer <dan.magenheimer@oracle.com> 11.6 + 11.7 +OVERVIEW 11.8 + 11.9 +As of Xen 4.0, a new config option called tsc_mode may be specified 11.10 +for each domain. The default for tsc_mode handles the vast majority 11.11 +of hardware and software environments. This document is targeted 11.12 +for Xen users and administrators that may need to select a non-default 11.13 +tsc_mode. 11.14 + 11.15 +Proper selection of tsc_mode depends on an understanding not only of 11.16 +the guest operating system (OS), but also of the application set that will 11.17 +ever run on this guest OS. This is because tsc_mode applies 11.18 +equally to both the OS and ALL apps that are running on this 11.19 +domain, now or in the future. 11.20 + 11.21 +Key questions to be answered for the OS and/or each application are: 11.22 +- Does the OS/app use the rdtsc instruction at all? (We will explain below 11.23 + how to determine this.) 11.24 +- At what frequency is the rdtsc instruction executed by either the OS 11.25 + or any running apps? If the sum exceeds about 10,000 rdtsc instructions 11.26 + per second per processor, we call this a "high-TSC-frequency" 11.27 + OS/app/environment. (This is relatively rare, and developers of OS's 11.28 + and apps that are high-TSC-frequency are usually aware of it.) 11.29 +- If the OS/app does use rdtsc, will it behave incorrectly if "time goes 11.30 + backwards" or if the frequency of the TSC suddenly changes? If so, 11.31 + we call this a "TSC-sensitive" app or OS; otherwise it is "TSC-resilient". 11.32 + 11.33 +This last is the US$64,000 question as it may be very difficult 11.34 +(or, for legacy apps, even impossible) to predict all possible 11.35 +failure cases. As a result, unless proven otherwise, any app 11.36 +that uses rdtsc must be assumed to be TSC-sensitive and, as we 11.37 +will see, this is the default starting in Xen 4.0. 11.38 + 11.39 +Xen's new tsc_mode parameter determines the circumstances under which 11.40 +the family of rdtsc instructions are executed "natively" vs emulated. 11.41 +Roughly speaking, native means rdtsc is fast but TSC-sensitive apps 11.42 +may, under unpredictable circumstances, run incorrectly; emulated means 11.43 +there is some performance degradation (unobservable in most cases), 11.44 +but TSC-sensitive apps will always run correctly. Prior to Xen 4.0, 11.45 +all rdtsc instructions were native: "fast but potentially incorrect." 11.46 +Starting at Xen 4.0, the default is that all rdtsc instructions are 11.47 +"correct but potentially slow". The tsc_mode parameter in 4.0 provides 11.48 +an intelligent default but allows system administrator's to adjust 11.49 +how rdtsc instructions are executed differently for different domains. 11.50 + 11.51 +The non-default choices for tsc_mode are: 11.52 +- tsc_mode=1 (always emulate). All rdtsc instructions are emulated; 11.53 + this is the best choice when TSC-sensitive apps are running and 11.54 + it is necessary to understand worst-case performance degradation 11.55 + for a specific hardware environment. 11.56 +- tsc_mode=2 (never emulate). This is the same as prior to Xen 4.0 11.57 + and is the best choice if it is certain that all apps running in 11.58 + this VM are TSC-resilient and highest performance is required. 11.59 +- tsc_mode=3 (PVRDTSCP). High-TSC-frequency apps may be paravirtualized 11.60 + (modified) to obtain both correctness and highest performance; any 11.61 + unmodified apps must be TSC-resilient. 11.62 + 11.63 +If tsc_mode is left unspecified (or set to tsc_mode=0), a hybrid 11.64 +algorithm is utilized to ensure correctness while providing the 11.65 +best performance possible given: 11.66 +- the requirement of correctness, 11.67 +- the underlying hardware, and 11.68 +- whether or not the VM has been saved/restored/migrated 11.69 +To understand this in more detail, the rest of this document must 11.70 +be read. 11.71 + 11.72 +DETERMINING RDTSC FREQUENCY 11.73 + 11.74 +To determine the frequency of rdtsc instructions that are emulated, 11.75 +an "xm" command can be used by a privileged user of domain0. The 11.76 +command: 11.77 + 11.78 +# xm debug-key s; xm dmesg | tail 11.79 + 11.80 +provides information about TSC usage in each domain where TSC 11.81 +emulation is currently enabled. 11.82 + 11.83 +TSC HISTORY 11.84 + 11.85 +To understand tsc_mode completely, some background on TSC is required: 11.86 + 11.87 +The x86 "timestamp counter", or TSC, is a 64-bit register on each 11.88 +processor that increases monotonically. Historically, TSC incremented 11.89 +every processor cycle, but on recent processors, it increases 11.90 +at a constant rate even if the processor changes frequency (for example, 11.91 +to reduce processor power usage). TSC is known by x86 programmers 11.92 +as the fastest, highest-precision measurement of the passage of time 11.93 +so it is often used as a foundation for performance monitoring. 11.94 +And since it is guaranteed to be monotonically increasing and, at 11.95 +64 bits, is guaranteed to not wraparound within 10 years, it is 11.96 +sometimes used as a random number or a unique sequence identifier, 11.97 +such as to stamp transactions so they can be replayed in a specific 11.98 +order. 11.99 + 11.100 +On most older SMP and early multi-core machines, TSC was not synchronized 11.101 +between processors. Thus if an application were to read the TSC on 11.102 +one processor, then was moved by the OS to another processor, then read 11.103 +TSC again, it might appear that "time went backwards". This loss of 11.104 +monotonicity resulted in many obscure application bugs when TSC-sensitive 11.105 +apps were ported from a uniprocessor to an SMP environment; as a result, 11.106 +many applications -- especially in the Windows world -- removed their 11.107 +dependency on TSC and replaced their timestamp needs with OS-specific 11.108 +functions, losing both performance and precision. On some more recent 11.109 +generations of multi-core machines, especially multi-socket multi-core 11.110 +machines, the TSC was synchronized but if one processor were to enter 11.111 +certain low-power states, its TSC would stop, destroying the synchrony 11.112 +and again causing obscure bugs. This reinforced decisions to avoid use 11.113 +of TSC altogether. On the most recent generations of multi-core 11.114 +machines, however, synchronization is provided across all processors 11.115 +in all power states, even on multi-socket machines, and provide a 11.116 +flag that indicates that TSC is synchronized and "invariant". Thus 11.117 +TSC is once again useful for applications, and even newer operating 11.118 +systems are using and depending upon TSC for critical timekeeping 11.119 +tasks when running on these recent machines. 11.120 + 11.121 +We will refer to hardware that ensures TSC is both synchronized and 11.122 +invariant as "TSC-safe" and any hardware on which TSC is not (or 11.123 +may not remain) synchronized as "TSC-unsafe". 11.124 + 11.125 +As a result of TSC's sordid history, two classes of applications use 11.126 +TSC: old applications designed for single processors, and the most recent 11.127 +enteprise applications which require high-frequency high-precision 11.128 +timestamping. 11.129 + 11.130 +We will refer to apps that might break if running on a TSC-unsafe 11.131 +machine as "TSC-sensitive"; apps that don't use TSC, or do use 11.132 +TSC but use it in a way that monotonicity and frequency invariance 11.133 +are unimportant as "TSC-resilient". 11.134 + 11.135 +The emergence of virtualization once again complicates the usage of 11.136 +TSC. When features such as save/restore or live migration are employed, 11.137 +a guest OS and all its currently running applications may be invisibly 11.138 +transported to an entirely different physical machine. While TSC 11.139 +may be "safe" on one machine, it is essentially impossible to precisely 11.140 +synchronize TSC across a data center or even a pool of machines. As 11.141 +a result, when run in a virtualized environment, rare and obscure 11.142 +"time going backwards" problems might once again occur for those 11.143 +TSC-sensitive applications. Worse, if a guest OS moves from, for 11.144 +example, a 3GHz 11.145 +machine to a 1.5GHz machine, attempts by an OS/app to measure time 11.146 +intervals with TSC may without notice be incorrect by a factor of two. 11.147 + 11.148 +The rdtsc (read timestamp counter) instruction is used to read the 11.149 +TSC register. The rdtscp instruction is a variant of rdtsc on recent 11.150 +processors. We refer to these together as the rdtsc family of instructions, 11.151 +or just "rdtsc". Instructions in the rdtsc family are non-privileged, but 11.152 +privileged software may set a cpuid bit to cause all rdtsc family 11.153 +instructions to trap. This trap can be detected by Xen, which can 11.154 +then transparently "emulate" the results of the rdtsc instruction and 11.155 +return control to the code following the rdtsc instruction. 11.156 + 11.157 +To provide a "safe" TSC, i.e. to ensure both TSC monontonicity and a 11.158 +fixed rate, Xen provides rdtsc emulation whenever necessary or when 11.159 +explicitly specified by a per-VM configuration option. TSC emulation is 11.160 +relatively slow -- roughly 15-20 times slower than the rdtsc instruction 11.161 +when executed natively. However, except when an OS or application uses 11.162 +the rdtsc instruction at a high frequency (e.g. more than about 10,000 times 11.163 +per second per processor), this performance degradation is not noticable 11.164 +(i.e. <0.3%). And, TSC emulation is nearly always faster than 11.165 +OS-provided alternatives (e.g. Linux's gettimeofday). For environments 11.166 +where it is certain that all apps are TSC-resilient (e.g. 11.167 +"TSC-safeness" is not necessary) and highest performance is a 11.168 +requirement, TSC emulation may be entirely disabled (tsc_mode==2). 11.169 + 11.170 +The default mode (tsc_mode==0) checks TSC-safeness of the underlying 11.171 +hardware on which the virtual machine is launched. If it is 11.172 +TSC-safe, rdtsc will execute at hardware speed; if it is not, rdtsc 11.173 +will be emulated. Once a virtual machine is save/restored or migrated, 11.174 +however, there are two possibilities: For a paravirtualized (PV) domain, 11.175 +TSC will always be emulated. For a fully-virtualized (HVM) domain, 11.176 +TSC remains native IF the source physical machine and target physical machine 11.177 +have the same TSC frequency; else TSC is emulated. Note that, though 11.178 +emulated, the "apparent" TSC frequency will be the TSC frequency 11.179 +of the initial physical machine, even after migration. 11.180 + 11.181 +For environments where both TSC-safeness AND highest performance 11.182 +even across migration is a requirement, application code can be specially 11.183 +modified to use an algorithm explicitly designed into Xen for this purpose. 11.184 +This mode (tsc_mode==3) is called PVRDTSCP, because it requires 11.185 +app paravirtualization (awareness by the app that it may be running 11.186 +on top of Xen), and utilizes a variation of the rdtsc instruction 11.187 +called rdtscp that is available on most recent generation processors. 11.188 +(The rdtscp instruction differs from the rdtsc instruction in that it 11.189 +reads not only the TSC but an additional register set by system software.) 11.190 +When a pvrdtscp-modified app is running on a processor that is both TSC-safe 11.191 +and supports the rdtscp instruction, information can be obtained 11.192 +about migration and TSC frequency/offset adjustment to allow the 11.193 +vast majority of timestamps to be obtained at top performance; when 11.194 +running on a TSC-unsafe processor or a processor that doesn't support 11.195 +the rdtscp instruction, rdtscp is emulated. 11.196 + 11.197 +PVRDTSCP (tsc_mode==3) has two limitations. First, it applies to 11.198 +all apps running in this virtual machine. This means that all 11.199 +apps must either be TSC-resilient or pvrdtscp-modified. Second, 11.200 +highest performance is only obtained on TSC-safe machines that 11.201 +support the rdtscp instruction; when running on older machines, 11.202 +rdtscp is emulated and thus slower. For more information on PVRTSCP, 11.203 +see below. 11.204 + 11.205 +Finally, tsc_mode==1 always enables TSC emulation, regardless of 11.206 +the underlying physical hardware. The "apparent" TSC frequency will 11.207 +be the TSC frequency of the initial physical machine, even after migration. 11.208 +This mode is useful to measure any performance degradation that 11.209 +might be encountered by a tsc_mode==0 domain after migration occurs, 11.210 +or a tsc_mode==3 domain when it is running on TSC-unsafe hardware. 11.211 + 11.212 +Note that while Xen ensures that an emulated TSC is "safe" across migration, 11.213 +it does not ensure that it continues to tick at the same rate during 11.214 +the actual migration. As an oversimplified example, if TSC is ticking 11.215 +once per second in a guest, and the guest is saved when the TSC is 1000, 11.216 +then restored 30 seconds later, TSC is only guaranteed to be greater 11.217 +than or equal to 1001, not precisely 1030. This has some OS implications 11.218 +as will be seen in the next section. 11.219 + 11.220 +TSC INVARIANT BIT and NO_MIGRATE 11.221 + 11.222 +Related to TSC emulation, the "TSC Invariant" bit is architecturally defined 11.223 +in a cpuid bit on the most recent x86 processors. If set, TSC invariance 11.224 +ensures that the TSC is "safe", that is it will increment at a constant rate 11.225 +regardless of power events, will be synchronized across all processors, and 11.226 +was properly initialized to zero on all processors at boot-time 11.227 +by system hardware/BIOS. As long as system software never writes to TSC, 11.228 +TSC will be safe and continuously incremented at a fixed rate and thus 11.229 +can be used as a system "clocksource". 11.230 + 11.231 +This bit is used by some OS's, and specifically by Linux starting with 11.232 +version 2.6.30(?), to select TSC as a system clocksource. Once selected, 11.233 +TSC remains the Linux system clocksource unless manually overridden. In 11.234 +a virtualized environment, since it is not possible to synchronize TSC 11.235 +across all the machines in a pool or data center, a migration may "break" 11.236 +TSC as a usable clocksource; while time will not go backwards, it may 11.237 +not track wallclock time well enough to avoid certain time-sensitive 11.238 +consequences. As a result, Xen can only expose the TSC Invariant bit 11.239 +to a guest OS if it is certain that the domain will never migrate. 11.240 +As of Xen 4.0, the "no_migrate=1" VM configuration option may be specified 11.241 +to disable migration. If no_migrate is selected and the VM is running 11.242 +on a physical machine with "TSC Invariant", Linux 2.6.30+ will safely 11.243 +use TSC as the system clocksource. But, attempts to migrate or, once 11.244 +saved, restore this domain will fail. 11.245 + 11.246 +There is another cpuid-related complication: The x86 cpuid instruction is 11.247 +non-privileged. HVM domains are configured to always trap this instruction 11.248 +to Xen, where Xen can "filter" the result. In a PV OS, all cpuid instructions 11.249 +have been replaced by a parvirtualized equivalent of the cpuid instruction 11.250 +("pvcpuid") and also trap to Xen. But apps in a PV guest that use a 11.251 +cpuid instruction execute it directly, without a trap to Xen. As a result, 11.252 +an app may directly examine the physical TSC Invariant cpuid bit and make 11.253 +decisions based on that bit. This is still an unsolved problem, though 11.254 +a workaround exists as part of the PVRDTSCP tsc_mode for apps that 11.255 +can be modified. 11.256 + 11.257 +MORE ON PVRDTSCP 11.258 + 11.259 +Paravirtualized OS's use the "pvclock" algorithm to manage the passing 11.260 +of time. This sophisticated algorithm obtains information from a memory 11.261 +page shared between Xen and the OS and selects information from this 11.262 +page based on the current virtual CPU (vcpu) in order to properly adapt to 11.263 +TSC-unsafe systems and changes that occur across migration. Neither 11.264 +this shared page nor the vcpu information is available to a userland 11.265 +app so the pvclock algorithm cannot be directly used by an app, at least 11.266 +without performance degradation roughly equal to the cost of just 11.267 +emulating an rdtsc. 11.268 + 11.269 +As a result, as of 4.0, Xen provides capabilities for a userland app 11.270 +to obtain key time values similar to the information accessible 11.271 +to the PV OS pvclock algorithm. The app uses the rdtscp instruction 11.272 +which is defined in recent processors to obtain both the TSC and an 11.273 +auxiliary value called TSC_AUX. Xen is responsible for setting TSC_AUX 11.274 +to the same value on all vcpus running any domain with tsc_mode==3; 11.275 +further, Xen tools are responsible for monotonically incrementing TSC_AUX 11.276 +anytime the domain is restored/migrated (thus changing key time values); 11.277 +and, when the domain is running on a physical machine that either 11.278 +is not TSC-safe or does not support the rdtscp instruction, Xen 11.279 +is responsible for emulating the rdtscp instruction and for setting 11.280 +TSC_AUX to zero on all processors. 11.281 + 11.282 +Xen also provides pvclock information via a "pvcpuid" instruction. 11.283 +While this results in a slow trap, the information changes 11.284 +(and thus must be reobtained via pvcpuid) ONLY when TSC_AUX 11.285 +has changed, which should be very rare relative to a high 11.286 +frequency of rdtscp instructions. 11.287 + 11.288 +Finally, Xen provides additional time-related information via 11.289 +other pvcpuid instructions. First, an app is capable of 11.290 +determining if it is currently running on Xen, next whether 11.291 +the tsc_mode setting of the domain in which it is running, 11.292 +and finally whether the underlying hardware is TSC-safe and 11.293 +supports the rdtscp instruction. 11.294 + 11.295 +As a result, a pvrdtscp-modified app has sufficient information 11.296 +to compute the pvclock "elapsed nanoseconds" which can 11.297 +be used as a timestamp. And this can be done nearly as 11.298 +fast as a native rdtsc instruction, much faster than emulation, 11.299 +and also much faster than nearly all OS-provided time mechanisms. 11.300 +While pvrtscp is too complex for most apps, certain enterprise 11.301 +TSC-sensitive high-TSC-frequency apps may find it useful to 11.302 +obtain a significant performance gain.
12.1 --- a/docs/misc/xsm-flask.txt Mon Nov 02 19:35:54 2009 -0800 12.2 +++ b/docs/misc/xsm-flask.txt Fri Mar 19 18:36:57 2010 -0700 12.3 @@ -168,6 +168,70 @@ adding them to xen.te, although manual r 12.4 often lead to adding parameterized rules to the interfaces in xen.if 12.5 to address the general case. 12.6 12.7 +Device Policy 12.8 +------------- 12.9 + 12.10 +Flask is capable of labeling devices and enforcing policies associated with 12.11 +them. To enable this functionality the latest version of checkpolicy 12.12 +(>= 2.0.20) and libsepol (>=2.0.39) will be needed in order to compile it. To 12.13 +enable the building of the new policies the following changes will need to be 12.14 +done to tools/flask/policy/Makefile. 12.15 + 12.16 +######################################## 12.17 +# 12.18 +# Build a binary policy locally 12.19 +# 12.20 +$(POLVER): policy.conf 12.21 + @echo "Compiling $(NAME) $(POLVER)" 12.22 + $(QUIET) $(CHECKPOLICY) $^ -o $@ (Comment out this line) 12.23 +# Uncomment line below to enable policies for devices 12.24 +# $(QUIET) $(CHECKPOLICY) -t Xen $^ -o $@ (Uncomment this line) 12.25 + 12.26 +######################################## 12.27 +# 12.28 +# Install a binary policy 12.29 +# 12.30 +$(LOADPATH): policy.conf 12.31 + @echo "Compiling and installing $(NAME) $(LOADPATH)" 12.32 + $(QUIET) $(CHECKPOLICY) $^ -o $@ (Comment out this line) 12.33 +# Uncomment line below to enable policies for devices 12.34 +# $(QUIET) $(CHECKPOLICY) -t Xen $^ -o $@ (Uncomment this line) 12.35 + 12.36 + 12.37 +Pirqs, PCI devices, I/O memory and ports can all be labeled. There are 12.38 +commented out lines in xen.te policy for examples on how to label devices. 12.39 + 12.40 +Device Labeling 12.41 +--------------- 12.42 + 12.43 +The "lspci -vvn" command can be used to output all the devices and identifiers 12.44 +associated with them. For example, to label an Intel e1000e ethernet card the 12.45 +lspci output is.. 12.46 + 12.47 +00:19.0 0200: 8086:10de (rev 02) 12.48 + Subsystem: 1028:0276 12.49 + Interrupt: pin A routed to IRQ 33 12.50 + Region 0: Memory at febe0000 (32-bit, non-prefetchable) [size=128K] 12.51 + Region 1: Memory at febd9000 (32-bit, non-prefetchable) [size=4K] 12.52 + Region 2: I/O ports at ecc0 [size=32] 12.53 + Kernel modules: e1000e 12.54 + 12.55 +The labeling can be done with these commands 12.56 + 12.57 +pirqcon 33 system_u:object_r:nicP_t 12.58 +iomemcon 0xfebe0-0xfebff system_u:object_r:nicP_t 12.59 +iomemcon 0xfebd9 system_u:object_r:nicP_t 12.60 +ioportcon 0xecc0-0xecdf system_u:object_r:nicP_t 12.61 +pcidevicecon 0xc800 system_u:object_r:nicP_t 12.62 + 12.63 +Labeling of the PCI device is tricky since there is no output in lspci that 12.64 +makes the information easily available. The easiest way to obtain the 12.65 +information is to look at the avc denial line for the correct hex value. 12.66 + 12.67 +(XEN) avc: denied { add_device } for domid=0 device=0xc800 <--- 12.68 +scontext=system_u:system_r:dom0_t tcontext=system_u:object_r:device_t 12.69 +tclass=resource 12.70 + 12.71 Additional notes on XSM:FLASK 12.72 ----------------------------- 12.73
13.1 --- a/docs/xen-api/revision-history.tex Mon Nov 02 19:35:54 2009 -0800 13.2 +++ b/docs/xen-api/revision-history.tex Fri Mar 19 18:36:57 2010 -0700 13.3 @@ -44,6 +44,12 @@ 13.4 Added description for \texttt{PV/kernel} and \texttt{PV/ramdisk} 13.5 parameters using URIs.\tabularnewline 13.6 \hline 13.7 + 1.0.9 & 20th Nov. 09 & M. Kanno & 13.8 + Added definitions of new classes DSCSI\_HBA and PSCSI\_HBA. 13.9 + Updated the table and the diagram representing relationships 13.10 + between classes. Added host.PSCSI\_HBAs and VM.DSCSI\_HBAs 13.11 + fields.\tabularnewline 13.12 + \hline 13.13 \end{tabular} 13.14 \end{center} 13.15 \end{flushleft}
14.1 --- a/docs/xen-api/xenapi-coversheet.tex Mon Nov 02 19:35:54 2009 -0800 14.2 +++ b/docs/xen-api/xenapi-coversheet.tex Fri Mar 19 18:36:57 2010 -0700 14.3 @@ -17,12 +17,12 @@ 14.4 \newcommand{\coversheetlogo}{xen.eps} 14.5 14.6 %% Document date 14.7 -\newcommand{\datestring}{17th June 2009} 14.8 +\newcommand{\datestring}{20th November 2009} 14.9 14.10 \newcommand{\releasestatement}{Stable Release} 14.11 14.12 %% Document revision 14.13 -\newcommand{\revstring}{API Revision 1.0.8} 14.14 +\newcommand{\revstring}{API Revision 1.0.9} 14.15 14.16 %% Document authors 14.17 \newcommand{\docauthors}{
15.1 --- a/docs/xen-api/xenapi-datamodel-graph.dot Mon Nov 02 19:35:54 2009 -0800 15.2 +++ b/docs/xen-api/xenapi-datamodel-graph.dot Fri Mar 19 18:36:57 2010 -0700 15.3 @@ -14,7 +14,7 @@ fontname="Verdana"; 15.4 15.5 node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user; 15.6 node [ shape=box ]; XSPolicy ACMPolicy DPCI PPCI host_cpu console VTPM; 15.7 -node [ shape=box ]; DSCSI PSCSI; 15.8 +node [ shape=box ]; DSCSI PSCSI DSCSI_HBA PSCSI_HBA; 15.9 node [ shape=ellipse ]; VM_metrics VM_guest_metrics host_metrics; 15.10 node [ shape=ellipse ]; PIF_metrics VIF_metrics VBD_metrics PBD_metrics; 15.11 session -> host [ arrowhead="none" ] 15.12 @@ -44,6 +44,11 @@ DPCI -> VM [ arrowhead="none", arrowtail 15.13 DPCI -> PPCI [ arrowhead="none" ] 15.14 PPCI -> host [ arrowhead="none", arrowtail="crow" ] 15.15 DSCSI -> VM [ arrowhead="none", arrowtail="crow" ] 15.16 +DSCSI_HBA -> VM [ arrowhead="none", arrowtail="crow" ] 15.17 +DSCSI -> DSCSI_HBA [ arrowhead="none", arrowtail="crow" ] 15.18 DSCSI -> PSCSI [ arrowhead="none" ] 15.19 +DSCSI_HBA -> PSCSI_HBA [ arrowhead="crow", arrowtail="none" ] 15.20 PSCSI -> host [ arrowhead="none", arrowtail="crow" ] 15.21 +PSCSI_HBA -> host [ arrowhead="none", arrowtail="crow" ] 15.22 +PSCSI -> PSCSI_HBA [ arrowhead="none", arrowtail="crow" ] 15.23 }
16.1 --- a/docs/xen-api/xenapi-datamodel.tex Mon Nov 02 19:35:54 2009 -0800 16.2 +++ b/docs/xen-api/xenapi-datamodel.tex Fri Mar 19 18:36:57 2010 -0700 16.3 @@ -49,7 +49,9 @@ Name & Description \\ 16.4 {\tt DPCI} & A pass-through PCI device \\ 16.5 {\tt PPCI} & A physical PCI device \\ 16.6 {\tt DSCSI} & A half-virtualized SCSI device \\ 16.7 +{\tt DSCSI\_HBA} & A half-virtualized SCSI host bus adapter \\ 16.8 {\tt PSCSI} & A physical SCSI device \\ 16.9 +{\tt PSCSI\_HBA} & A physical SCSI host bus adapter \\ 16.10 {\tt user} & A user of the system \\ 16.11 {\tt debug} & A basic class for testing \\ 16.12 {\tt XSPolicy} & A class for handling Xen Security Policies \\ 16.13 @@ -79,7 +81,11 @@ console.VM & VM.consoles & one-to-many\\ 16.14 DPCI.VM & VM.DPCIs & one-to-many\\ 16.15 PPCI.host & host.PPCIs & one-to-many\\ 16.16 DSCSI.VM & VM.DSCSIs & one-to-many\\ 16.17 +DSCSI.HBA & DSCSI\_HBA.DSCSIs & one-to-many\\ 16.18 +DSCSI\_HBA.VM & VM.DSCSI\_HBAs & one-to-many\\ 16.19 PSCSI.host & host.PSCSIs & one-to-many\\ 16.20 +PSCSI.HBA & PSCSI\_HBA.PSCSIs & one-to-many\\ 16.21 +PSCSI\_HBA.host & host.PSCSI\_HBAs & one-to-many\\ 16.22 host.resident\_VMs & VM.resident\_on & many-to-one\\ 16.23 host.host\_CPUs & host\_cpu.host & many-to-one\\ 16.24 \hline 16.25 @@ -1414,6 +1420,7 @@ Quals & Field & Type & Description \\ 16.26 $\mathit{RO}_\mathit{run}$ & {\tt VTPMs} & (VTPM ref) Set & virtual TPMs \\ 16.27 $\mathit{RO}_\mathit{run}$ & {\tt DPCIs} & (DPCI ref) Set & pass-through PCI devices \\ 16.28 $\mathit{RO}_\mathit{run}$ & {\tt DSCSIs} & (DSCSI ref) Set & half-virtualized SCSI devices \\ 16.29 +$\mathit{RO}_\mathit{run}$ & {\tt DSCSI\_HBAs} & (DSCSI\_HBA ref) Set & half-virtualized SCSI host bus adapters \\ 16.30 $\mathit{RW}$ & {\tt PV/bootloader} & string & name of or path to bootloader \\ 16.31 $\mathit{RW}$ & {\tt PV/kernel} & string & URI of kernel \\ 16.32 $\mathit{RW}$ & {\tt PV/ramdisk} & string & URI of initrd \\ 16.33 @@ -3570,6 +3577,38 @@ value of the field 16.34 \vspace{0.3cm} 16.35 \vspace{0.3cm} 16.36 \vspace{0.3cm} 16.37 +\subsubsection{RPC name:~get\_DSCSI\_HBAs} 16.38 + 16.39 +{\bf Overview:} 16.40 +Get the DSCSI\_HBAs field of the given VM. 16.41 + 16.42 + \noindent {\bf Signature:} 16.43 +\begin{verbatim} ((DSCSI_HBA ref) Set) get_DSCSI_HBAs (session_id s, VM ref self)\end{verbatim} 16.44 + 16.45 + 16.46 +\noindent{\bf Arguments:} 16.47 + 16.48 + 16.49 +\vspace{0.3cm} 16.50 +\begin{tabular}{|c|c|p{7cm}|} 16.51 + \hline 16.52 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.53 +{\tt VM ref } & self & reference to the object \\ \hline 16.54 + 16.55 +\end{tabular} 16.56 + 16.57 +\vspace{0.3cm} 16.58 + 16.59 + \noindent {\bf Return Type:} 16.60 +{\tt 16.61 +(DSCSI\_HBA ref) Set 16.62 +} 16.63 + 16.64 + 16.65 +value of the field 16.66 +\vspace{0.3cm} 16.67 +\vspace{0.3cm} 16.68 +\vspace{0.3cm} 16.69 \subsubsection{RPC name:~get\_PV\_bootloader} 16.70 16.71 {\bf Overview:} 16.72 @@ -5639,6 +5678,7 @@ Quals & Field & Type & Description \\ 16.73 $\mathit{RO}_\mathit{run}$ & {\tt PBDs} & (PBD ref) Set & physical blockdevices \\ 16.74 $\mathit{RO}_\mathit{run}$ & {\tt PPCIs} & (PPCI ref) Set & physical PCI devices \\ 16.75 $\mathit{RO}_\mathit{run}$ & {\tt PSCSIs} & (PSCSI ref) Set & physical SCSI devices \\ 16.76 +$\mathit{RO}_\mathit{run}$ & {\tt PSCSI\_HBAs} & (PSCSI\_HBA ref) Set & physical SCSI host bus adapters \\ 16.77 $\mathit{RO}_\mathit{run}$ & {\tt host\_CPUs} & (host\_cpu ref) Set & The physical CPUs on this host \\ 16.78 $\mathit{RO}_\mathit{run}$ & {\tt metrics} & host\_metrics ref & metrics associated with this host \\ 16.79 \hline 16.80 @@ -6997,6 +7037,38 @@ value of the field 16.81 \vspace{0.3cm} 16.82 \vspace{0.3cm} 16.83 \vspace{0.3cm} 16.84 +\subsubsection{RPC name:~get\_PSCSI\_HBAs} 16.85 + 16.86 +{\bf Overview:} 16.87 +Get the PSCSI\_HBAs field of the given host. 16.88 + 16.89 + \noindent {\bf Signature:} 16.90 +\begin{verbatim} ((PSCSI_HBA ref) Set) get_PSCSI_HBAs (session_id s, host ref self)\end{verbatim} 16.91 + 16.92 + 16.93 +\noindent{\bf Arguments:} 16.94 + 16.95 + 16.96 +\vspace{0.3cm} 16.97 +\begin{tabular}{|c|c|p{7cm}|} 16.98 + \hline 16.99 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.100 +{\tt host ref } & self & reference to the object \\ \hline 16.101 + 16.102 +\end{tabular} 16.103 + 16.104 +\vspace{0.3cm} 16.105 + 16.106 + \noindent {\bf Return Type:} 16.107 +{\tt 16.108 +(PSCSI\_HBA ref) Set 16.109 +} 16.110 + 16.111 + 16.112 +value of the field 16.113 +\vspace{0.3cm} 16.114 +\vspace{0.3cm} 16.115 +\vspace{0.3cm} 16.116 \subsubsection{RPC name:~get\_host\_CPUs} 16.117 16.118 {\bf Overview:} 16.119 @@ -15889,6 +15961,7 @@ Quals & Field & Type & Description \\ 16.120 $\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ 16.121 $\mathit{RO}_\mathit{inst}$ & {\tt VM} & VM ref & the virtual machine \\ 16.122 $\mathit{RO}_\mathit{inst}$ & {\tt PSCSI} & PSCSI ref & the physical SCSI device \\ 16.123 +$\mathit{RO}_\mathit{run}$ & {\tt HBA} & DSCSI\_HBA ref & the half-virtualized SCSI host bus adapter \\ 16.124 $\mathit{RO}_\mathit{run}$ & {\tt virtual\_host} & int & the virtual host number \\ 16.125 $\mathit{RO}_\mathit{run}$ & {\tt virtual\_channel} & int & the virtual channel number \\ 16.126 $\mathit{RO}_\mathit{run}$ & {\tt virtual\_target} & int & the virtual target number \\ 16.127 @@ -16015,6 +16088,38 @@ value of the field 16.128 \vspace{0.3cm} 16.129 \vspace{0.3cm} 16.130 \vspace{0.3cm} 16.131 +\subsubsection{RPC name:~get\_HBA} 16.132 + 16.133 +{\bf Overview:} 16.134 +Get the HBA field of the given DSCSI. 16.135 + 16.136 + \noindent {\bf Signature:} 16.137 +\begin{verbatim} (DSCSI_HBA ref) get_HBA (session_id s, DSCSI ref self)\end{verbatim} 16.138 + 16.139 + 16.140 +\noindent{\bf Arguments:} 16.141 + 16.142 + 16.143 +\vspace{0.3cm} 16.144 +\begin{tabular}{|c|c|p{7cm}|} 16.145 + \hline 16.146 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.147 +{\tt DSCSI ref } & self & reference to the object \\ \hline 16.148 + 16.149 +\end{tabular} 16.150 + 16.151 +\vspace{0.3cm} 16.152 + 16.153 + \noindent {\bf Return Type:} 16.154 +{\tt 16.155 +DSCSI\_HBA ref 16.156 +} 16.157 + 16.158 + 16.159 +value of the field 16.160 +\vspace{0.3cm} 16.161 +\vspace{0.3cm} 16.162 +\vspace{0.3cm} 16.163 \subsubsection{RPC name:~get\_virtual\_host} 16.164 16.165 {\bf Overview:} 16.166 @@ -16210,7 +16315,9 @@ value of the field 16.167 \subsubsection{RPC name:~create} 16.168 16.169 {\bf Overview:} 16.170 -Create a new DSCSI instance, and return its handle. 16.171 +Create a new DSCSI instance, and create a new DSCSI\_HBA instance as needed 16.172 +that the new DSCSI instance connects to, and return the handle of the new 16.173 +DSCSI instance. 16.174 16.175 \noindent {\bf Signature:} 16.176 \begin{verbatim} (DSCSI ref) create (session_id s, DSCSI record args)\end{verbatim} 16.177 @@ -16242,7 +16349,8 @@ reference to the newly created object 16.178 \subsubsection{RPC name:~destroy} 16.179 16.180 {\bf Overview:} 16.181 -Destroy the specified DSCSI instance. 16.182 +Destroy the specified DSCSI instance, and destroy a DSCSI\_HBA instance as 16.183 +needed that the specified DSCSI instance connects to. 16.184 16.185 \noindent {\bf Signature:} 16.186 \begin{verbatim} void destroy (session_id s, DSCSI ref self)\end{verbatim} 16.187 @@ -16337,6 +16445,372 @@ all fields from the object 16.188 16.189 \vspace{1cm} 16.190 \newpage 16.191 +\section{Class: DSCSI\_HBA} 16.192 +\subsection{Fields for class: DSCSI\_HBA} 16.193 +\begin{longtable}{|lllp{0.38\textwidth}|} 16.194 +\hline 16.195 +\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf DSCSI\_HBA} \\ 16.196 +\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A 16.197 +half-virtualized SCSI host bus adapter.}} \\ 16.198 +\hline 16.199 +Quals & Field & Type & Description \\ 16.200 +\hline 16.201 +$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ 16.202 +$\mathit{RO}_\mathit{inst}$ & {\tt VM} & VM ref & the virtual machine \\ 16.203 +$\mathit{RO}_\mathit{run}$ & {\tt PSCSI\_HBAs} & (PSCSI\_HBA ref) Set & the physical SCSI HBAs \\ 16.204 +$\mathit{RO}_\mathit{run}$ & {\tt DSCSIs} & (DSCSI ref) Set & the half-virtualized SCSI devices which are connected to this DSCSI HBA \\ 16.205 +$\mathit{RO}_\mathit{inst}$ & {\tt virtual\_host} & int & the virtual host number \\ 16.206 +$\mathit{RO}_\mathit{inst}$ & {\tt assignment\_mode} & string & the assignment mode of the half-virtualized SCSI devices which are connected to this DSCSI HBA \\ 16.207 +\hline 16.208 +\end{longtable} 16.209 +\subsection{RPCs associated with class: DSCSI\_HBA} 16.210 +\subsubsection{RPC name:~get\_all} 16.211 + 16.212 +{\bf Overview:} 16.213 +Return a list of all the DSCSI HBAs known to the system. 16.214 + 16.215 + \noindent {\bf Signature:} 16.216 +\begin{verbatim} ((DSCSI_HBA ref) Set) get_all (session_id s)\end{verbatim} 16.217 + 16.218 + 16.219 +\vspace{0.3cm} 16.220 + 16.221 + \noindent {\bf Return Type:} 16.222 +{\tt 16.223 +(DSCSI\_HBA ref) Set 16.224 +} 16.225 + 16.226 + 16.227 +references to all objects 16.228 +\vspace{0.3cm} 16.229 +\vspace{0.3cm} 16.230 +\vspace{0.3cm} 16.231 +\subsubsection{RPC name:~get\_uuid} 16.232 + 16.233 +{\bf Overview:} 16.234 +Get the uuid field of the given DSCSI HBA. 16.235 + 16.236 + \noindent {\bf Signature:} 16.237 +\begin{verbatim} string get_uuid (session_id s, DSCSI_HBA ref self)\end{verbatim} 16.238 + 16.239 + 16.240 +\noindent{\bf Arguments:} 16.241 + 16.242 + 16.243 +\vspace{0.3cm} 16.244 +\begin{tabular}{|c|c|p{7cm}|} 16.245 + \hline 16.246 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.247 +{\tt DSCSI ref } & self & reference to the object \\ \hline 16.248 + 16.249 +\end{tabular} 16.250 + 16.251 +\vspace{0.3cm} 16.252 + 16.253 + \noindent {\bf Return Type:} 16.254 +{\tt 16.255 +string 16.256 +} 16.257 + 16.258 + 16.259 +value of the field 16.260 +\vspace{0.3cm} 16.261 +\vspace{0.3cm} 16.262 +\vspace{0.3cm} 16.263 +\subsubsection{RPC name:~get\_VM} 16.264 + 16.265 +{\bf Overview:} 16.266 +Get the VM field of the given DSCSI HBA. 16.267 + 16.268 + \noindent {\bf Signature:} 16.269 +\begin{verbatim} (VM ref) get_VM (session_id s, DSCSI_HBA ref self)\end{verbatim} 16.270 + 16.271 + 16.272 +\noindent{\bf Arguments:} 16.273 + 16.274 + 16.275 +\vspace{0.3cm} 16.276 +\begin{tabular}{|c|c|p{7cm}|} 16.277 + \hline 16.278 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.279 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 16.280 + 16.281 +\end{tabular} 16.282 + 16.283 +\vspace{0.3cm} 16.284 + 16.285 + \noindent {\bf Return Type:} 16.286 +{\tt 16.287 +VM ref 16.288 +} 16.289 + 16.290 + 16.291 +value of the field 16.292 +\vspace{0.3cm} 16.293 +\vspace{0.3cm} 16.294 +\vspace{0.3cm} 16.295 +\subsubsection{RPC name:~get\_PSCSI\_HBAs} 16.296 + 16.297 +{\bf Overview:} 16.298 +Get the PSCSI\_HBAs field of the given DSCSI HBA. 16.299 + 16.300 + \noindent {\bf Signature:} 16.301 +\begin{verbatim} ((PSCSI_HBA ref) Set) get_PSCSI_HBAs (session_id s, DSCSI_HBA ref self)\end{verbatim} 16.302 + 16.303 + 16.304 +\noindent{\bf Arguments:} 16.305 + 16.306 + 16.307 +\vspace{0.3cm} 16.308 +\begin{tabular}{|c|c|p{7cm}|} 16.309 + \hline 16.310 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.311 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 16.312 + 16.313 +\end{tabular} 16.314 + 16.315 +\vspace{0.3cm} 16.316 + 16.317 + \noindent {\bf Return Type:} 16.318 +{\tt 16.319 +(PSCSI\_HBA ref) Set 16.320 +} 16.321 + 16.322 + 16.323 +value of the field 16.324 +\vspace{0.3cm} 16.325 +\vspace{0.3cm} 16.326 +\vspace{0.3cm} 16.327 +\subsubsection{RPC name:~get\_DSCSIs} 16.328 + 16.329 +{\bf Overview:} 16.330 +Get the DSCSIs field of the given DSCSI HBA. 16.331 + 16.332 + \noindent {\bf Signature:} 16.333 +\begin{verbatim} ((DSCSI ref) Set) get_DSCSIs (session_id s, DSCSI_HBA ref self)\end{verbatim} 16.334 + 16.335 + 16.336 +\noindent{\bf Arguments:} 16.337 + 16.338 + 16.339 +\vspace{0.3cm} 16.340 +\begin{tabular}{|c|c|p{7cm}|} 16.341 + \hline 16.342 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.343 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 16.344 + 16.345 +\end{tabular} 16.346 + 16.347 +\vspace{0.3cm} 16.348 + 16.349 + \noindent {\bf Return Type:} 16.350 +{\tt 16.351 +(DSCSI ref) Set 16.352 +} 16.353 + 16.354 + 16.355 +value of the field 16.356 +\vspace{0.3cm} 16.357 +\vspace{0.3cm} 16.358 +\vspace{0.3cm} 16.359 +\subsubsection{RPC name:~get\_virtual\_host} 16.360 + 16.361 +{\bf Overview:} 16.362 +Get the virtual\_host field of the given DSCSI HBA. 16.363 + 16.364 + \noindent {\bf Signature:} 16.365 +\begin{verbatim} int get_virtual_host (session_id s, DSCSI_HBA ref self)\end{verbatim} 16.366 + 16.367 + 16.368 +\noindent{\bf Arguments:} 16.369 + 16.370 + 16.371 +\vspace{0.3cm} 16.372 +\begin{tabular}{|c|c|p{7cm}|} 16.373 + \hline 16.374 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.375 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 16.376 + 16.377 +\end{tabular} 16.378 + 16.379 +\vspace{0.3cm} 16.380 + 16.381 + \noindent {\bf Return Type:} 16.382 +{\tt 16.383 +int 16.384 +} 16.385 + 16.386 + 16.387 +value of the field 16.388 +\vspace{0.3cm} 16.389 +\vspace{0.3cm} 16.390 +\vspace{0.3cm} 16.391 +\subsubsection{RPC name:~get\_assignment\_mode} 16.392 + 16.393 +{\bf Overview:} 16.394 +Get the assignment\_mode field of the given DSCSI HBA. 16.395 + 16.396 + \noindent {\bf Signature:} 16.397 +\begin{verbatim} string get_assignment_mode (session_id s, DSCSI_HBA ref self)\end{verbatim} 16.398 + 16.399 + 16.400 +\noindent{\bf Arguments:} 16.401 + 16.402 + 16.403 +\vspace{0.3cm} 16.404 +\begin{tabular}{|c|c|p{7cm}|} 16.405 + \hline 16.406 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.407 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 16.408 + 16.409 +\end{tabular} 16.410 + 16.411 +\vspace{0.3cm} 16.412 + 16.413 + \noindent {\bf Return Type:} 16.414 +{\tt 16.415 +string 16.416 +} 16.417 + 16.418 + 16.419 +value of the field 16.420 +\vspace{0.3cm} 16.421 +\vspace{0.3cm} 16.422 +\vspace{0.3cm} 16.423 +\subsubsection{RPC name:~create} 16.424 + 16.425 +{\bf Overview:} 16.426 +Create a new DSCSI\_HBA instance, and create new DSCSI instances of 16.427 +half-virtualized SCSI devices which are connected to the half-virtualized 16.428 +SCSI host bus adapter, and return the handle of the new DSCSI\_HBA instance. 16.429 + 16.430 + \noindent {\bf Signature:} 16.431 +\begin{verbatim} (DSCSI_HBA ref) create (session_id s, DSCSI_HBA record args)\end{verbatim} 16.432 + 16.433 + 16.434 +\noindent{\bf Arguments:} 16.435 + 16.436 + 16.437 +\vspace{0.3cm} 16.438 +\begin{tabular}{|c|c|p{7cm}|} 16.439 + \hline 16.440 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.441 +{\tt DSCSI\_HBA record } & args & All constructor arguments \\ \hline 16.442 + 16.443 +\end{tabular} 16.444 + 16.445 +\vspace{0.3cm} 16.446 + 16.447 + \noindent {\bf Return Type:} 16.448 +{\tt 16.449 +DSCSI\_HBA ref 16.450 +} 16.451 + 16.452 + 16.453 +reference to the newly created object 16.454 +\vspace{0.3cm} 16.455 +\vspace{0.3cm} 16.456 +\vspace{0.3cm} 16.457 +\subsubsection{RPC name:~destroy} 16.458 + 16.459 +{\bf Overview:} 16.460 +Destroy the specified DSCSI\_HBA instance, and destroy DSCSI instances of 16.461 +half-virtualized SCSI devices which are connected to the half-virtualized SCSI 16.462 +host bus adapter. 16.463 + 16.464 + \noindent {\bf Signature:} 16.465 +\begin{verbatim} void destroy (session_id s, DSCSI_HBA ref self)\end{verbatim} 16.466 + 16.467 + 16.468 +\noindent{\bf Arguments:} 16.469 + 16.470 + 16.471 +\vspace{0.3cm} 16.472 +\begin{tabular}{|c|c|p{7cm}|} 16.473 + \hline 16.474 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.475 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 16.476 + 16.477 +\end{tabular} 16.478 + 16.479 +\vspace{0.3cm} 16.480 + 16.481 + \noindent {\bf Return Type:} 16.482 +{\tt 16.483 +void 16.484 +} 16.485 + 16.486 + 16.487 +\vspace{0.3cm} 16.488 +\vspace{0.3cm} 16.489 +\vspace{0.3cm} 16.490 +\subsubsection{RPC name:~get\_by\_uuid} 16.491 + 16.492 +{\bf Overview:} 16.493 +Get a reference to the DSCSI\_HBA instance with the specified UUID. 16.494 + 16.495 + \noindent {\bf Signature:} 16.496 +\begin{verbatim} (DSCSI_HBA ref) get_by_uuid (session_id s, string uuid)\end{verbatim} 16.497 + 16.498 + 16.499 +\noindent{\bf Arguments:} 16.500 + 16.501 + 16.502 +\vspace{0.3cm} 16.503 +\begin{tabular}{|c|c|p{7cm}|} 16.504 + \hline 16.505 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.506 +{\tt string } & uuid & UUID of object to return \\ \hline 16.507 + 16.508 +\end{tabular} 16.509 + 16.510 +\vspace{0.3cm} 16.511 + 16.512 + \noindent {\bf Return Type:} 16.513 +{\tt 16.514 +DSCSI\_HBA ref 16.515 +} 16.516 + 16.517 + 16.518 +reference to the object 16.519 +\vspace{0.3cm} 16.520 +\vspace{0.3cm} 16.521 +\vspace{0.3cm} 16.522 +\subsubsection{RPC name:~get\_record} 16.523 + 16.524 +{\bf Overview:} 16.525 +Get a record containing the current state of the given DSCSI HBA. 16.526 + 16.527 + \noindent {\bf Signature:} 16.528 +\begin{verbatim} (DSCSI_HBA record) get_record (session_id s, DSCSI_HBA ref self)\end{verbatim} 16.529 + 16.530 + 16.531 +\noindent{\bf Arguments:} 16.532 + 16.533 + 16.534 +\vspace{0.3cm} 16.535 +\begin{tabular}{|c|c|p{7cm}|} 16.536 + \hline 16.537 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.538 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 16.539 + 16.540 +\end{tabular} 16.541 + 16.542 +\vspace{0.3cm} 16.543 + 16.544 + \noindent {\bf Return Type:} 16.545 +{\tt 16.546 +DSCSI\_HBA record 16.547 +} 16.548 + 16.549 + 16.550 +all fields from the object 16.551 +\vspace{0.3cm} 16.552 +\vspace{0.3cm} 16.553 +\vspace{0.3cm} 16.554 + 16.555 +\vspace{1cm} 16.556 +\newpage 16.557 \section{Class: PSCSI} 16.558 \subsection{Fields for class: PSCSI} 16.559 \begin{longtable}{|lllp{0.38\textwidth}|} 16.560 @@ -16349,6 +16823,7 @@ Quals & Field & Type & Description \\ 16.561 \hline 16.562 $\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ 16.563 $\mathit{RO}_\mathit{run}$ & {\tt host} & host ref & the physical machine to which this PSCSI is connected \\ 16.564 +$\mathit{RO}_\mathit{run}$ & {\tt HBA} & PSCSI\_HBA ref & the physical SCSI host bus adapter \\ 16.565 $\mathit{RO}_\mathit{run}$ & {\tt physical\_host} & int & the physical host number \\ 16.566 $\mathit{RO}_\mathit{run}$ & {\tt physical\_channel} & int & the physical channel number \\ 16.567 $\mathit{RO}_\mathit{run}$ & {\tt physical\_target} & int & the physical target number \\ 16.568 @@ -16451,6 +16926,38 @@ value of the field 16.569 \vspace{0.3cm} 16.570 \vspace{0.3cm} 16.571 \vspace{0.3cm} 16.572 +\subsubsection{RPC name:~get\_HBA} 16.573 + 16.574 +{\bf Overview:} 16.575 +Get the HBA field of the given PSCSI. 16.576 + 16.577 + \noindent {\bf Signature:} 16.578 +\begin{verbatim} (PSCSI_HBA ref) get_HBA (session_id s, PSCSI ref self)\end{verbatim} 16.579 + 16.580 + 16.581 +\noindent{\bf Arguments:} 16.582 + 16.583 + 16.584 +\vspace{0.3cm} 16.585 +\begin{tabular}{|c|c|p{7cm}|} 16.586 + \hline 16.587 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.588 +{\tt PSCSI ref } & self & reference to the object \\ \hline 16.589 + 16.590 +\end{tabular} 16.591 + 16.592 +\vspace{0.3cm} 16.593 + 16.594 + \noindent {\bf Return Type:} 16.595 +{\tt 16.596 +PSCSI\_HBA ref 16.597 +} 16.598 + 16.599 + 16.600 +value of the field 16.601 +\vspace{0.3cm} 16.602 +\vspace{0.3cm} 16.603 +\vspace{0.3cm} 16.604 \subsubsection{RPC name:~get\_physical\_host} 16.605 16.606 {\bf Overview:} 16.607 @@ -16966,6 +17473,239 @@ all fields from the object 16.608 16.609 \vspace{1cm} 16.610 \newpage 16.611 +\section{Class: PSCSI\_HBA} 16.612 +\subsection{Fields for class: PSCSI\_HBA} 16.613 +\begin{longtable}{|lllp{0.38\textwidth}|} 16.614 +\hline 16.615 +\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf PSCSI\_HBA} \\ 16.616 +\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A 16.617 +physical SCSI host bus adapter.}} \\ 16.618 +\hline 16.619 +Quals & Field & Type & Description \\ 16.620 +\hline 16.621 +$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\ 16.622 +$\mathit{RO}_\mathit{run}$ & {\tt host} & host ref & the physical machine to which this PSCSI HBA is connected \\ 16.623 +$\mathit{RO}_\mathit{run}$ & {\tt physical\_host} & int & the physical host number \\ 16.624 +$\mathit{RO}_\mathit{run}$ & {\tt PSCSIs} & (PSCSI ref) Set & the physical SCSI devices which are connected to this PSCSI HBA \\ 16.625 +\hline 16.626 +\end{longtable} 16.627 +\subsection{RPCs associated with class: PSCSI\_HBA} 16.628 +\subsubsection{RPC name:~get\_all} 16.629 + 16.630 +{\bf Overview:} 16.631 +Return a list of all the PSCSI HBAs known to the system. 16.632 + 16.633 + \noindent {\bf Signature:} 16.634 +\begin{verbatim} ((PSCSI_HBA ref) Set) get_all (session_id s)\end{verbatim} 16.635 + 16.636 + 16.637 +\vspace{0.3cm} 16.638 + 16.639 + \noindent {\bf Return Type:} 16.640 +{\tt 16.641 +(PSCSI\_HBA ref) Set 16.642 +} 16.643 + 16.644 + 16.645 +references to all objects 16.646 +\vspace{0.3cm} 16.647 +\vspace{0.3cm} 16.648 +\vspace{0.3cm} 16.649 +\subsubsection{RPC name:~get\_uuid} 16.650 + 16.651 +{\bf Overview:} 16.652 +Get the uuid field of the given PSCSI HBA. 16.653 + 16.654 + \noindent {\bf Signature:} 16.655 +\begin{verbatim} string get_uuid (session_id s, PSCSI_HBA ref self)\end{verbatim} 16.656 + 16.657 + 16.658 +\noindent{\bf Arguments:} 16.659 + 16.660 + 16.661 +\vspace{0.3cm} 16.662 +\begin{tabular}{|c|c|p{7cm}|} 16.663 + \hline 16.664 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.665 +{\tt PSCSI\_HBA ref } & self & reference to the object \\ \hline 16.666 + 16.667 +\end{tabular} 16.668 + 16.669 +\vspace{0.3cm} 16.670 + 16.671 + \noindent {\bf Return Type:} 16.672 +{\tt 16.673 +string 16.674 +} 16.675 + 16.676 + 16.677 +value of the field 16.678 +\vspace{0.3cm} 16.679 +\vspace{0.3cm} 16.680 +\vspace{0.3cm} 16.681 +\subsubsection{RPC name:~get\_host} 16.682 + 16.683 +{\bf Overview:} 16.684 +Get the host field of the given PSCSI HBA. 16.685 + 16.686 + \noindent {\bf Signature:} 16.687 +\begin{verbatim} (host ref) get_host (session_id s, PSCSI_HBA ref self)\end{verbatim} 16.688 + 16.689 + 16.690 +\noindent{\bf Arguments:} 16.691 + 16.692 + 16.693 +\vspace{0.3cm} 16.694 +\begin{tabular}{|c|c|p{7cm}|} 16.695 + \hline 16.696 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.697 +{\tt PSCSI\_HBA ref } & self & reference to the object \\ \hline 16.698 + 16.699 +\end{tabular} 16.700 + 16.701 +\vspace{0.3cm} 16.702 + 16.703 + \noindent {\bf Return Type:} 16.704 +{\tt 16.705 +host ref 16.706 +} 16.707 + 16.708 + 16.709 +value of the field 16.710 +\vspace{0.3cm} 16.711 +\vspace{0.3cm} 16.712 +\vspace{0.3cm} 16.713 +\subsubsection{RPC name:~get\_physical\_host} 16.714 + 16.715 +{\bf Overview:} 16.716 +Get the physical\_host field of the given PSCSI HBA. 16.717 + 16.718 + \noindent {\bf Signature:} 16.719 +\begin{verbatim} int get_physical_host (session_id s, PSCSI_HBA ref self)\end{verbatim} 16.720 + 16.721 + 16.722 +\noindent{\bf Arguments:} 16.723 + 16.724 + 16.725 +\vspace{0.3cm} 16.726 +\begin{tabular}{|c|c|p{7cm}|} 16.727 + \hline 16.728 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.729 +{\tt PSCSI\_HBA ref } & self & reference to the object \\ \hline 16.730 + 16.731 +\end{tabular} 16.732 + 16.733 +\vspace{0.3cm} 16.734 + 16.735 + \noindent {\bf Return Type:} 16.736 +{\tt 16.737 +int 16.738 +} 16.739 + 16.740 + 16.741 +value of the field 16.742 +\vspace{0.3cm} 16.743 +\vspace{0.3cm} 16.744 +\vspace{0.3cm} 16.745 +\subsubsection{RPC name:~get\_PSCSIs} 16.746 + 16.747 +{\bf Overview:} 16.748 +Get the PSCSIs field of the given PSCSI HBA. 16.749 + 16.750 + \noindent {\bf Signature:} 16.751 +\begin{verbatim} ((PSCSI ref) Set) get_PSCSIs (session_id s, PSCSI_HBA ref self)\end{verbatim} 16.752 + 16.753 + 16.754 +\noindent{\bf Arguments:} 16.755 + 16.756 + 16.757 +\vspace{0.3cm} 16.758 +\begin{tabular}{|c|c|p{7cm}|} 16.759 + \hline 16.760 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.761 +{\tt PSCSI\_HBA ref } & self & reference to the object \\ \hline 16.762 + 16.763 +\end{tabular} 16.764 + 16.765 +\vspace{0.3cm} 16.766 + 16.767 + \noindent {\bf Return Type:} 16.768 +{\tt 16.769 +(PSCSI ref) Set 16.770 +} 16.771 + 16.772 + 16.773 +value of the field 16.774 +\vspace{0.3cm} 16.775 +\vspace{0.3cm} 16.776 +\vspace{0.3cm} 16.777 +\subsubsection{RPC name:~get\_by\_uuid} 16.778 + 16.779 +{\bf Overview:} 16.780 +Get a reference to the PSCSI HBA instance with the specified UUID. 16.781 + 16.782 + \noindent {\bf Signature:} 16.783 +\begin{verbatim} (PSCSI_HBA ref) get_by_uuid (session_id s, string uuid)\end{verbatim} 16.784 + 16.785 + 16.786 +\noindent{\bf Arguments:} 16.787 + 16.788 + 16.789 +\vspace{0.3cm} 16.790 +\begin{tabular}{|c|c|p{7cm}|} 16.791 + \hline 16.792 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.793 +{\tt string } & uuid & UUID of object to return \\ \hline 16.794 + 16.795 +\end{tabular} 16.796 + 16.797 +\vspace{0.3cm} 16.798 + 16.799 + \noindent {\bf Return Type:} 16.800 +{\tt 16.801 +PSCSI\_HBA ref 16.802 +} 16.803 + 16.804 + 16.805 +reference to the object 16.806 +\vspace{0.3cm} 16.807 +\vspace{0.3cm} 16.808 +\vspace{0.3cm} 16.809 +\subsubsection{RPC name:~get\_record} 16.810 + 16.811 +{\bf Overview:} 16.812 +Get a record containing the current state of the given PSCSI HBA. 16.813 + 16.814 + \noindent {\bf Signature:} 16.815 +\begin{verbatim} (PSCSI_HBA record) get_record (session_id s, PSCSI_HBA ref self)\end{verbatim} 16.816 + 16.817 + 16.818 +\noindent{\bf Arguments:} 16.819 + 16.820 + 16.821 +\vspace{0.3cm} 16.822 +\begin{tabular}{|c|c|p{7cm}|} 16.823 + \hline 16.824 +{\bf type} & {\bf name} & {\bf description} \\ \hline 16.825 +{\tt PSCSI\_HBA ref } & self & reference to the object \\ \hline 16.826 + 16.827 +\end{tabular} 16.828 + 16.829 +\vspace{0.3cm} 16.830 + 16.831 + \noindent {\bf Return Type:} 16.832 +{\tt 16.833 +PSCSI\_HBA record 16.834 +} 16.835 + 16.836 + 16.837 +all fields from the object 16.838 +\vspace{0.3cm} 16.839 +\vspace{0.3cm} 16.840 +\vspace{0.3cm} 16.841 + 16.842 +\vspace{1cm} 16.843 +\newpage 16.844 \section{Class: user} 16.845 \subsection{Fields for class: user} 16.846 \begin{longtable}{|lllp{0.38\textwidth}|}
17.1 --- a/extras/mini-os/arch/ia64/mm.c Mon Nov 02 19:35:54 2009 -0800 17.2 +++ b/extras/mini-os/arch/ia64/mm.c Fri Mar 19 18:36:57 2010 -0700 17.3 @@ -137,17 +137,17 @@ unsigned long allocate_ondemand(unsigned 17.4 17.5 /* Helper function used in gnttab.c. */ 17.6 void do_map_frames(unsigned long addr, 17.7 - unsigned long *f, unsigned long n, unsigned long stride, 17.8 - unsigned long increment, domid_t id, int may_fail, unsigned long prot) 17.9 + const unsigned long *f, unsigned long n, unsigned long stride, 17.10 + unsigned long increment, domid_t id, int *err, unsigned long prot) 17.11 { 17.12 /* TODO */ 17.13 ASSERT(0); 17.14 } 17.15 17.16 void* 17.17 -map_frames_ex(unsigned long* frames, unsigned long n, unsigned long stride, 17.18 +map_frames_ex(const unsigned long* frames, unsigned long n, unsigned long stride, 17.19 unsigned long increment, unsigned long alignment, domid_t id, 17.20 - int may_fail, unsigned long prot) 17.21 + int *err, unsigned long prot) 17.22 { 17.23 /* TODO: incomplete! */ 17.24 ASSERT(n == 1 || (stride == 0 && increment == 1));
18.1 --- a/extras/mini-os/arch/x86/ioremap.c Mon Nov 02 19:35:54 2009 -0800 18.2 +++ b/extras/mini-os/arch/x86/ioremap.c Fri Mar 19 18:36:57 2010 -0700 18.3 @@ -53,7 +53,7 @@ static void *__do_ioremap(unsigned long 18.4 } 18.5 } 18.6 va = (unsigned long)map_frames_ex(&mfns, num_pages, 0, 1, 1, 18.7 - DOMID_IO, 0, prot); 18.8 + DOMID_IO, NULL, prot); 18.9 return (void *)(va + offset); 18.10 18.11 mfn_invalid:
19.1 --- a/extras/mini-os/arch/x86/mm.c Mon Nov 02 19:35:54 2009 -0800 19.2 +++ b/extras/mini-os/arch/x86/mm.c Fri Mar 19 18:36:57 2010 -0700 19.3 @@ -568,10 +568,9 @@ unsigned long allocate_ondemand(unsigned 19.4 */ 19.5 #define MAP_BATCH ((STACK_SIZE / 2) / sizeof(mmu_update_t)) 19.6 void do_map_frames(unsigned long va, 19.7 - unsigned long *mfns, unsigned long n, 19.8 + const unsigned long *mfns, unsigned long n, 19.9 unsigned long stride, unsigned long incr, 19.10 - domid_t id, int may_fail, 19.11 - unsigned long prot) 19.12 + domid_t id, int *err, unsigned long prot) 19.13 { 19.14 pgentry_t *pgt = NULL; 19.15 unsigned long done = 0; 19.16 @@ -585,12 +584,14 @@ void do_map_frames(unsigned long va, 19.17 } 19.18 DEBUG("va=%p n=0x%lx, mfns[0]=0x%lx stride=0x%lx incr=0x%lx prot=0x%lx\n", 19.19 va, n, mfns[0], stride, incr, prot); 19.20 - 19.21 + 19.22 + if ( err ) 19.23 + memset(err, 0x00, n * sizeof(int)); 19.24 while ( done < n ) 19.25 { 19.26 unsigned long todo; 19.27 19.28 - if ( may_fail ) 19.29 + if ( err ) 19.30 todo = 1; 19.31 else 19.32 todo = n - done; 19.33 @@ -615,8 +616,8 @@ void do_map_frames(unsigned long va, 19.34 rc = HYPERVISOR_mmu_update(mmu_updates, todo, NULL, id); 19.35 if ( rc < 0 ) 19.36 { 19.37 - if (may_fail) 19.38 - mfns[done * stride] |= 0xF0000000; 19.39 + if (err) 19.40 + err[done * stride] = rc; 19.41 else { 19.42 printk("Map %ld (%lx, ...) at %p failed: %d.\n", 19.43 todo, mfns[done * stride] + done * incr, va, rc); 19.44 @@ -632,17 +633,17 @@ void do_map_frames(unsigned long va, 19.45 * Map an array of MFNs contiguous into virtual address space. Virtual 19.46 * addresses are allocated from the on demand area. 19.47 */ 19.48 -void *map_frames_ex(unsigned long *mfns, unsigned long n, 19.49 +void *map_frames_ex(const unsigned long *mfns, unsigned long n, 19.50 unsigned long stride, unsigned long incr, 19.51 unsigned long alignment, 19.52 - domid_t id, int may_fail, unsigned long prot) 19.53 + domid_t id, int *err, unsigned long prot) 19.54 { 19.55 unsigned long va = allocate_ondemand(n, alignment); 19.56 19.57 if ( !va ) 19.58 return NULL; 19.59 19.60 - do_map_frames(va, mfns, n, stride, incr, id, may_fail, prot); 19.61 + do_map_frames(va, mfns, n, stride, incr, id, err, prot); 19.62 19.63 return (void *)va; 19.64 }
20.1 --- a/extras/mini-os/blkfront.c Mon Nov 02 19:35:54 2009 -0800 20.2 +++ b/extras/mini-os/blkfront.c Fri Mar 19 18:36:57 2010 -0700 20.3 @@ -93,7 +93,7 @@ struct blkfront_dev *init_blkfront(char 20.4 char* message=NULL; 20.5 struct blkif_sring *s; 20.6 int retry=0; 20.7 - char* msg; 20.8 + char* msg = NULL; 20.9 char* c; 20.10 char* nodename = _nodename ? _nodename : "device/vbd/768"; 20.11 20.12 @@ -129,6 +129,7 @@ again: 20.13 err = xenbus_transaction_start(&xbt); 20.14 if (err) { 20.15 printk("starting transaction\n"); 20.16 + free(err); 20.17 } 20.18 20.19 err = xenbus_printf(xbt, nodename, "ring-ref","%u", 20.20 @@ -159,6 +160,7 @@ again: 20.21 20.22 20.23 err = xenbus_transaction_end(xbt, 0, &retry); 20.24 + if (err) free(err); 20.25 if (retry) { 20.26 goto again; 20.27 printk("completing transaction\n"); 20.28 @@ -167,7 +169,8 @@ again: 20.29 goto done; 20.30 20.31 abort_transaction: 20.32 - xenbus_transaction_end(xbt, 1, &retry); 20.33 + free(err); 20.34 + err = xenbus_transaction_end(xbt, 1, &retry); 20.35 goto error; 20.36 20.37 done: 20.38 @@ -208,7 +211,7 @@ done: 20.39 msg = xenbus_wait_for_state_change(path, &state, &dev->events); 20.40 if (msg != NULL || state != XenbusStateConnected) { 20.41 printk("backend not available, state=%d\n", state); 20.42 - xenbus_unwatch_path(XBT_NIL, path); 20.43 + xenbus_unwatch_path_token(XBT_NIL, path, path); 20.44 goto error; 20.45 } 20.46 20.47 @@ -238,6 +241,8 @@ done: 20.48 return dev; 20.49 20.50 error: 20.51 + free(msg); 20.52 + free(err); 20.53 free_blkfront(dev); 20.54 return NULL; 20.55 } 20.56 @@ -265,6 +270,7 @@ void shutdown_blkfront(struct blkfront_d 20.57 state = xenbus_read_integer(path); 20.58 while (err == NULL && state < XenbusStateClosing) 20.59 err = xenbus_wait_for_state_change(path, &state, &dev->events); 20.60 + if (err) free(err); 20.61 20.62 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { 20.63 printk("shutdown_blkfront: error changing state to %d: %s\n", 20.64 @@ -272,8 +278,10 @@ void shutdown_blkfront(struct blkfront_d 20.65 goto close; 20.66 } 20.67 state = xenbus_read_integer(path); 20.68 - if (state < XenbusStateClosed) 20.69 - xenbus_wait_for_state_change(path, &state, &dev->events); 20.70 + if (state < XenbusStateClosed) { 20.71 + err = xenbus_wait_for_state_change(path, &state, &dev->events); 20.72 + if (err) free(err); 20.73 + } 20.74 20.75 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { 20.76 printk("shutdown_blkfront: error changing state to %d: %s\n", 20.77 @@ -286,7 +294,8 @@ void shutdown_blkfront(struct blkfront_d 20.78 err = xenbus_wait_for_state_change(path, &state, &dev->events); 20.79 20.80 close: 20.81 - xenbus_unwatch_path(XBT_NIL, path); 20.82 + if (err) free(err); 20.83 + xenbus_unwatch_path_token(XBT_NIL, path, path); 20.84 20.85 snprintf(path, sizeof(path), "%s/ring-ref", nodename); 20.86 xenbus_rm(XBT_NIL, path);
21.1 --- a/extras/mini-os/console/xencons_ring.c Mon Nov 02 19:35:54 2009 -0800 21.2 +++ b/extras/mini-os/console/xencons_ring.c Fri Mar 19 18:36:57 2010 -0700 21.3 @@ -67,8 +67,8 @@ int xencons_ring_send(struct consfront_d 21.4 21.5 static void handle_input(evtchn_port_t port, struct pt_regs *regs, void *data) 21.6 { 21.7 + struct consfront_dev *dev = (struct consfront_dev *) data; 21.8 #ifdef HAVE_LIBC 21.9 - struct consfront_dev *dev = (struct consfront_dev *) data; 21.10 int fd = dev ? dev->fd : -1; 21.11 21.12 if (fd != -1) 21.13 @@ -203,7 +203,7 @@ struct consfront_dev *init_consfront(cha 21.14 char* err; 21.15 char* message=NULL; 21.16 int retry=0; 21.17 - char* msg; 21.18 + char* msg = NULL; 21.19 char nodename[256]; 21.20 char path[256]; 21.21 static int consfrontends = 1; 21.22 @@ -242,6 +242,7 @@ again: 21.23 err = xenbus_transaction_start(&xbt); 21.24 if (err) { 21.25 printk("starting transaction\n"); 21.26 + free(err); 21.27 } 21.28 21.29 err = xenbus_printf(xbt, nodename, "ring-ref","%u", 21.30 @@ -278,6 +279,7 @@ again: 21.31 21.32 21.33 err = xenbus_transaction_end(xbt, 0, &retry); 21.34 + if (err) free(err); 21.35 if (retry) { 21.36 goto again; 21.37 printk("completing transaction\n"); 21.38 @@ -286,7 +288,8 @@ again: 21.39 goto done; 21.40 21.41 abort_transaction: 21.42 - xenbus_transaction_end(xbt, 1, &retry); 21.43 + free(err); 21.44 + err = xenbus_transaction_end(xbt, 1, &retry); 21.45 goto error; 21.46 21.47 done: 21.48 @@ -312,7 +315,7 @@ done: 21.49 msg = xenbus_wait_for_state_change(path, &state, &dev->events); 21.50 if (msg != NULL || state != XenbusStateConnected) { 21.51 printk("backend not available, state=%d\n", state); 21.52 - xenbus_unwatch_path(XBT_NIL, path); 21.53 + xenbus_unwatch_path_token(XBT_NIL, path, path); 21.54 goto error; 21.55 } 21.56 } 21.57 @@ -323,6 +326,8 @@ done: 21.58 return dev; 21.59 21.60 error: 21.61 + free(msg); 21.62 + free(err); 21.63 free_consfront(dev); 21.64 return NULL; 21.65 }
22.1 --- a/extras/mini-os/fbfront.c Mon Nov 02 19:35:54 2009 -0800 22.2 +++ b/extras/mini-os/fbfront.c Fri Mar 19 18:36:57 2010 -0700 22.3 @@ -71,7 +71,7 @@ struct kbdfront_dev *init_kbdfront(char 22.4 char* message=NULL; 22.5 struct xenkbd_page *s; 22.6 int retry=0; 22.7 - char* msg; 22.8 + char* msg = NULL; 22.9 char* nodename = _nodename ? _nodename : "device/vkbd/0"; 22.10 struct kbdfront_dev *dev; 22.11 22.12 @@ -80,6 +80,7 @@ struct kbdfront_dev *init_kbdfront(char 22.13 printk("******************* KBDFRONT for %s **********\n\n\n", nodename); 22.14 22.15 dev = malloc(sizeof(*dev)); 22.16 + memset(dev, 0, sizeof(*dev)); 22.17 dev->nodename = strdup(nodename); 22.18 #ifdef HAVE_LIBC 22.19 dev->fd = -1; 22.20 @@ -101,6 +102,7 @@ again: 22.21 err = xenbus_transaction_start(&xbt); 22.22 if (err) { 22.23 printk("starting transaction\n"); 22.24 + free(err); 22.25 } 22.26 22.27 err = xenbus_printf(xbt, nodename, "page-ref","%u", virt_to_mfn(s)); 22.28 @@ -123,11 +125,13 @@ again: 22.29 22.30 snprintf(path, sizeof(path), "%s/state", nodename); 22.31 err = xenbus_switch_state(xbt, path, XenbusStateInitialised); 22.32 - if (err) 22.33 + if (err) { 22.34 printk("error writing initialized: %s\n", err); 22.35 - 22.36 + free(err); 22.37 + } 22.38 22.39 err = xenbus_transaction_end(xbt, 0, &retry); 22.40 + if (err) free(err); 22.41 if (retry) { 22.42 goto again; 22.43 printk("completing transaction\n"); 22.44 @@ -136,7 +140,8 @@ again: 22.45 goto done; 22.46 22.47 abort_transaction: 22.48 - xenbus_transaction_end(xbt, 1, &retry); 22.49 + free(err); 22.50 + err = xenbus_transaction_end(xbt, 1, &retry); 22.51 goto error; 22.52 22.53 done: 22.54 @@ -165,7 +170,7 @@ done: 22.55 err = xenbus_wait_for_state_change(path, &state, &dev->events); 22.56 if (state != XenbusStateConnected) { 22.57 printk("backend not available, state=%d\n", state); 22.58 - xenbus_unwatch_path(XBT_NIL, path); 22.59 + xenbus_unwatch_path_token(XBT_NIL, path, path); 22.60 goto error; 22.61 } 22.62 22.63 @@ -175,7 +180,7 @@ done: 22.64 if((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected)) 22.65 != NULL) { 22.66 printk("error switching state: %s\n", err); 22.67 - xenbus_unwatch_path(XBT_NIL, path); 22.68 + xenbus_unwatch_path_token(XBT_NIL, path, path); 22.69 goto error; 22.70 } 22.71 } 22.72 @@ -185,6 +190,8 @@ done: 22.73 22.74 return dev; 22.75 error: 22.76 + free(msg); 22.77 + free(err); 22.78 free_kbdfront(dev); 22.79 return NULL; 22.80 } 22.81 @@ -246,6 +253,7 @@ void shutdown_kbdfront(struct kbdfront_d 22.82 state = xenbus_read_integer(path); 22.83 while (err == NULL && state < XenbusStateClosing) 22.84 err = xenbus_wait_for_state_change(path, &state, &dev->events); 22.85 + if (err) free(err); 22.86 22.87 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { 22.88 printk("shutdown_kbdfront: error changing state to %d: %s\n", 22.89 @@ -253,8 +261,10 @@ void shutdown_kbdfront(struct kbdfront_d 22.90 goto close_kbdfront; 22.91 } 22.92 state = xenbus_read_integer(path); 22.93 - if (state < XenbusStateClosed) 22.94 - xenbus_wait_for_state_change(path, &state, &dev->events); 22.95 + if (state < XenbusStateClosed) { 22.96 + err = xenbus_wait_for_state_change(path, &state, &dev->events); 22.97 + if (err) free(err); 22.98 + } 22.99 22.100 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { 22.101 printk("shutdown_kbdfront: error changing state to %d: %s\n", 22.102 @@ -265,7 +275,8 @@ void shutdown_kbdfront(struct kbdfront_d 22.103 //xenbus_wait_for_value(path, "2", &dev->events); 22.104 22.105 close_kbdfront: 22.106 - xenbus_unwatch_path(XBT_NIL, path); 22.107 + if (err) free(err); 22.108 + xenbus_unwatch_path_token(XBT_NIL, path, path); 22.109 22.110 snprintf(path, sizeof(path), "%s/page-ref", nodename); 22.111 xenbus_rm(XBT_NIL, path); 22.112 @@ -403,6 +414,7 @@ struct fbfront_dev *init_fbfront(char *_ 22.113 printk("******************* FBFRONT for %s **********\n\n\n", nodename); 22.114 22.115 dev = malloc(sizeof(*dev)); 22.116 + memset(dev, 0, sizeof(*dev)); 22.117 dev->nodename = strdup(nodename); 22.118 #ifdef HAVE_LIBC 22.119 dev->fd = -1; 22.120 @@ -444,6 +456,7 @@ again: 22.121 err = xenbus_transaction_start(&xbt); 22.122 if (err) { 22.123 printk("starting transaction\n"); 22.124 + free(err); 22.125 } 22.126 22.127 err = xenbus_printf(xbt, nodename, "page-ref","%u", virt_to_mfn(s)); 22.128 @@ -476,6 +489,7 @@ again: 22.129 } 22.130 22.131 err = xenbus_transaction_end(xbt, 0, &retry); 22.132 + if (err) free(err); 22.133 if (retry) { 22.134 goto again; 22.135 printk("completing transaction\n"); 22.136 @@ -484,7 +498,8 @@ again: 22.137 goto done; 22.138 22.139 abort_transaction: 22.140 - xenbus_transaction_end(xbt, 1, &retry); 22.141 + free(err); 22.142 + err = xenbus_transaction_end(xbt, 1, &retry); 22.143 goto error; 22.144 22.145 done: 22.146 @@ -513,7 +528,7 @@ done: 22.147 err = xenbus_wait_for_state_change(path, &state, &dev->events); 22.148 if (state != XenbusStateConnected) { 22.149 printk("backend not available, state=%d\n", state); 22.150 - xenbus_unwatch_path(XBT_NIL, path); 22.151 + xenbus_unwatch_path_token(XBT_NIL, path, path); 22.152 goto error; 22.153 } 22.154 22.155 @@ -526,7 +541,7 @@ done: 22.156 if ((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected)) 22.157 != NULL) { 22.158 printk("error switching state: %s\n", err); 22.159 - xenbus_unwatch_path(XBT_NIL, path); 22.160 + xenbus_unwatch_path_token(XBT_NIL, path, path); 22.161 goto error; 22.162 } 22.163 } 22.164 @@ -537,6 +552,7 @@ done: 22.165 return dev; 22.166 22.167 error: 22.168 + free(err); 22.169 free_fbfront(dev); 22.170 return NULL; 22.171 } 22.172 @@ -625,6 +641,7 @@ void shutdown_fbfront(struct fbfront_dev 22.173 state = xenbus_read_integer(path); 22.174 while (err == NULL && state < XenbusStateClosing) 22.175 err = xenbus_wait_for_state_change(path, &state, &dev->events); 22.176 + if (err) free(err); 22.177 22.178 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { 22.179 printk("shutdown_fbfront: error changing state to %d: %s\n", 22.180 @@ -632,8 +649,10 @@ void shutdown_fbfront(struct fbfront_dev 22.181 goto close_fbfront; 22.182 } 22.183 state = xenbus_read_integer(path); 22.184 - if (state < XenbusStateClosed) 22.185 + if (state < XenbusStateClosed) { 22.186 xenbus_wait_for_state_change(path, &state, &dev->events); 22.187 + if (err) free(err); 22.188 + } 22.189 22.190 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { 22.191 printk("shutdown_fbfront: error changing state to %d: %s\n", 22.192 @@ -644,7 +663,8 @@ void shutdown_fbfront(struct fbfront_dev 22.193 //xenbus_wait_for_value(path, "2", &dev->events); 22.194 22.195 close_fbfront: 22.196 - xenbus_unwatch_path(XBT_NIL, path); 22.197 + if (err) free(err); 22.198 + xenbus_unwatch_path_token(XBT_NIL, path, path); 22.199 22.200 snprintf(path, sizeof(path), "%s/page-ref", nodename); 22.201 xenbus_rm(XBT_NIL, path);
23.1 --- a/extras/mini-os/fs-front.c Mon Nov 02 19:35:54 2009 -0800 23.2 +++ b/extras/mini-os/fs-front.c Fri Mar 19 18:36:57 2010 -0700 23.3 @@ -1103,6 +1103,7 @@ again: 23.4 err = xenbus_transaction_start(&xbt); 23.5 if (err) { 23.6 printk("starting transaction\n"); 23.7 + free(err); 23.8 } 23.9 23.10 err = xenbus_printf(xbt, 23.11 @@ -1140,9 +1141,10 @@ again: 23.12 } 23.13 23.14 err = xenbus_printf(xbt, nodename, "state", STATE_READY, 0xdeadbeef); 23.15 + if (err) free(err); 23.16 23.17 - 23.18 err = xenbus_transaction_end(xbt, 0, &retry); 23.19 + if (err) free(err); 23.20 if (retry) { 23.21 goto again; 23.22 printk("completing transaction\n"); 23.23 @@ -1159,7 +1161,9 @@ again: 23.24 goto done; 23.25 23.26 abort_transaction: 23.27 - xenbus_transaction_end(xbt, 1, &retry); 23.28 + free(err); 23.29 + err = xenbus_transaction_end(xbt, 1, &retry); 23.30 + if (err) free(err); 23.31 23.32 done: 23.33 23.34 @@ -1189,8 +1193,9 @@ done: 23.35 sprintf(token, "fs-front-%d", import->import_id); 23.36 /* The token will not be unique if multiple imports are inited */ 23.37 xenbus_watch_path_token(XBT_NIL, r_nodename, r_nodename, &events); 23.38 - xenbus_wait_for_value(r_nodename, STATE_READY, &events); 23.39 - xenbus_unwatch_path(XBT_NIL, r_nodename); 23.40 + err = xenbus_wait_for_value(r_nodename, STATE_READY, &events); 23.41 + if (err) free(err); 23.42 + xenbus_unwatch_path_token(XBT_NIL, r_nodename, r_nodename); 23.43 printk("Backend ready.\n"); 23.44 23.45 //create_thread("fs-tester", test_fs_import, import);
24.1 --- a/extras/mini-os/include/ia64/arch_mm.h Mon Nov 02 19:35:54 2009 -0800 24.2 +++ b/extras/mini-os/include/ia64/arch_mm.h Fri Mar 19 18:36:57 2010 -0700 24.3 @@ -35,9 +35,9 @@ 24.4 #define virt_to_mfn(x) virt_to_pfn(x) 24.5 #define virtual_to_mfn(x) (ia64_tpa((uint64_t)(x)) >> PAGE_SHIFT) 24.6 24.7 -#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, 0) 24.8 +#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, 0) 24.9 /* TODO */ 24.10 -#define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, 0, 0) 24.11 +#define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, NULL, 0) 24.12 #define do_map_zero(start, n) ASSERT(n == 0) 24.13 24.14 #endif /* __ARCH_MM_H__ */
25.1 --- a/extras/mini-os/include/lib.h Mon Nov 02 19:35:54 2009 -0800 25.2 +++ b/extras/mini-os/include/lib.h Fri Mar 19 18:36:57 2010 -0700 25.3 @@ -145,6 +145,7 @@ enum fd_type { 25.4 FTYPE_BLK, 25.5 FTYPE_KBD, 25.6 FTYPE_FB, 25.7 + FTYPE_MEM, 25.8 }; 25.9 25.10 #define MAX_EVTCHN_PORTS 16
26.1 --- a/extras/mini-os/include/mm.h Mon Nov 02 19:35:54 2009 -0800 26.2 +++ b/extras/mini-os/include/mm.h Fri Mar 19 18:36:57 2010 -0700 26.3 @@ -65,12 +65,12 @@ void arch_init_p2m(unsigned long max_pfn 26.4 26.5 unsigned long allocate_ondemand(unsigned long n, unsigned long alignment); 26.6 /* map f[i*stride]+i*increment for i in 0..n-1, aligned on alignment pages */ 26.7 -void *map_frames_ex(unsigned long *f, unsigned long n, unsigned long stride, 26.8 +void *map_frames_ex(const unsigned long *f, unsigned long n, unsigned long stride, 26.9 unsigned long increment, unsigned long alignment, domid_t id, 26.10 - int may_fail, unsigned long prot); 26.11 + int *err, unsigned long prot); 26.12 void do_map_frames(unsigned long addr, 26.13 - unsigned long *f, unsigned long n, unsigned long stride, 26.14 - unsigned long increment, domid_t id, int may_fail, unsigned long prot); 26.15 + const unsigned long *f, unsigned long n, unsigned long stride, 26.16 + unsigned long increment, domid_t id, int *err, unsigned long prot); 26.17 int unmap_frames(unsigned long va, unsigned long num_frames); 26.18 unsigned long alloc_contig_pages(int order, unsigned int addr_bits); 26.19 #ifdef HAVE_LIBC
27.1 --- a/extras/mini-os/include/pcifront.h Mon Nov 02 19:35:54 2009 -0800 27.2 +++ b/extras/mini-os/include/pcifront.h Fri Mar 19 18:36:57 2010 -0700 27.3 @@ -1,6 +1,7 @@ 27.4 #include <mini-os/types.h> 27.5 #include <xen/io/pciif.h> 27.6 struct pcifront_dev; 27.7 +void pcifront_watches(void *opaque); 27.8 struct pcifront_dev *init_pcifront(char *nodename); 27.9 void pcifront_op(struct pcifront_dev *dev, struct xen_pci_op *op); 27.10 void pcifront_scan(struct pcifront_dev *dev, void (*fun)(unsigned int domain, unsigned int bus, unsigned slot, unsigned int fun));
28.1 --- a/extras/mini-os/include/x86/arch_mm.h Mon Nov 02 19:35:54 2009 -0800 28.2 +++ b/extras/mini-os/include/x86/arch_mm.h Fri Mar 19 18:36:57 2010 -0700 28.3 @@ -224,9 +224,9 @@ static __inline__ paddr_t machine_to_phy 28.4 }) 28.5 #define virtual_to_mfn(_virt) pte_to_mfn(virtual_to_pte(_virt)) 28.6 28.7 -#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, L1_PROT) 28.8 -#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 0, L1_PROT_RO) 28.9 -#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, 0, L1_PROT_RO) 28.10 +#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, L1_PROT) 28.11 +#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, NULL, L1_PROT_RO) 28.12 +#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, NULL, L1_PROT_RO) 28.13 28.14 pgentry_t *need_pgt(unsigned long addr); 28.15 int mfn_is_ram(unsigned long mfn);
29.1 --- a/extras/mini-os/lib/printf.c Mon Nov 02 19:35:54 2009 -0800 29.2 +++ b/extras/mini-os/lib/printf.c Fri Mar 19 18:36:57 2010 -0700 29.3 @@ -62,7 +62,7 @@ 29.4 #include <mini-os/lib.h> 29.5 #include <mini-os/mm.h> 29.6 #include <mini-os/ctype.h> 29.7 -#include <mini-os/limits.h> 29.8 +#include <mini-os/posix/limits.h> 29.9 29.10 /** 29.11 * simple_strtoul - convert a string to an unsigned long
30.1 --- a/extras/mini-os/lib/sys.c Mon Nov 02 19:35:54 2009 -0800 30.2 +++ b/extras/mini-os/lib/sys.c Fri Mar 19 18:36:57 2010 -0700 30.3 @@ -190,6 +190,11 @@ int open(const char *pathname, int flags 30.4 printk("open(%s) -> %d\n", pathname, fd); 30.5 return fd; 30.6 } 30.7 + if (!strncmp(pathname, "/dev/mem", strlen("/dev/mem"))) { 30.8 + fd = alloc_fd(FTYPE_MEM); 30.9 + printk("open(/dev/mem) -> %d\n", fd); 30.10 + return fd; 30.11 + } 30.12 if (!strncmp(pathname, "/dev/ptmx", strlen("/dev/ptmx"))) 30.13 return posix_openpt(flags); 30.14 printk("open(%s, %x)", pathname, flags); 30.15 @@ -1244,13 +1249,15 @@ void *mmap(void *start, size_t length, i 30.16 ASSERT(prot == (PROT_READ|PROT_WRITE)); 30.17 ASSERT((fd == -1 && (flags == (MAP_SHARED|MAP_ANON) || flags == (MAP_PRIVATE|MAP_ANON))) 30.18 || (fd != -1 && flags == MAP_SHARED)); 30.19 - ASSERT(offset == 0); 30.20 30.21 if (fd == -1) 30.22 return map_zero(n, 1); 30.23 else if (files[fd].type == FTYPE_XC) { 30.24 unsigned long zero = 0; 30.25 - return map_frames_ex(&zero, n, 0, 0, 1, DOMID_SELF, 0, 0); 30.26 + return map_frames_ex(&zero, n, 0, 0, 1, DOMID_SELF, NULL, 0); 30.27 + } else if (files[fd].type == FTYPE_MEM) { 30.28 + unsigned long first_mfn = offset >> PAGE_SHIFT; 30.29 + return map_frames_ex(&first_mfn, n, 0, 1, 1, DOMID_IO, NULL, _PAGE_PRESENT|_PAGE_RW); 30.30 } else ASSERT(0); 30.31 } 30.32
31.1 --- a/extras/mini-os/lib/xmalloc.c Mon Nov 02 19:35:54 2009 -0800 31.2 +++ b/extras/mini-os/lib/xmalloc.c Fri Mar 19 18:36:57 2010 -0700 31.3 @@ -187,6 +187,8 @@ void *_xmalloc(size_t size, size_t align 31.4 31.5 /* Alloc a new page and return from that. */ 31.6 hdr = xmalloc_new_page(align_up(hdr_size, align) + size); 31.7 + if ( hdr == NULL ) 31.8 + return NULL; 31.9 data_begin = (uintptr_t)hdr + align_up(hdr_size, align); 31.10 } 31.11 31.12 @@ -279,14 +281,18 @@ void *_realloc(void *ptr, size_t size) 31.13 void *new; 31.14 struct xmalloc_hdr *hdr; 31.15 struct xmalloc_pad *pad; 31.16 + size_t old_data_size; 31.17 31.18 if (ptr == NULL) 31.19 return _xmalloc(size, DEFAULT_ALIGN); 31.20 31.21 pad = (struct xmalloc_pad *)ptr - 1; 31.22 hdr = (struct xmalloc_hdr *)((char*)ptr - pad->hdr_size); 31.23 - if (hdr->size >= size) { 31.24 - maybe_split(hdr, size, hdr->size); 31.25 + 31.26 + old_data_size = hdr->size - pad->hdr_size; 31.27 + if ( old_data_size >= size ) 31.28 + { 31.29 + maybe_split(hdr, pad->hdr_size + size, hdr->size); 31.30 return ptr; 31.31 } 31.32 31.33 @@ -294,7 +300,7 @@ void *_realloc(void *ptr, size_t size) 31.34 if (new == NULL) 31.35 return NULL; 31.36 31.37 - memcpy(new, ptr, hdr->size); 31.38 + memcpy(new, ptr, old_data_size); 31.39 xfree(ptr); 31.40 31.41 return new;
32.1 --- a/extras/mini-os/lib/xs.c Mon Nov 02 19:35:54 2009 -0800 32.2 +++ b/extras/mini-os/lib/xs.c Fri Mar 19 18:36:57 2010 -0700 32.3 @@ -49,6 +49,7 @@ void *xs_read(struct xs_handle *h, xs_tr 32.4 msg = xenbus_read(t, path, &value); 32.5 if (msg) { 32.6 printk("xs_read(%s): %s\n", path, msg); 32.7 + free(msg); 32.8 return NULL; 32.9 } 32.10 32.11 @@ -69,6 +70,7 @@ bool xs_write(struct xs_handle *h, xs_tr 32.12 msg = xenbus_write(t, path, value); 32.13 if (msg) { 32.14 printk("xs_write(%s): %s\n", path, msg); 32.15 + free(msg); 32.16 return false; 32.17 } 32.18 return true;
33.1 --- a/extras/mini-os/main.c Mon Nov 02 19:35:54 2009 -0800 33.2 +++ b/extras/mini-os/main.c Fri Mar 19 18:36:57 2010 -0700 33.3 @@ -9,6 +9,7 @@ 33.4 #include <sched.h> 33.5 #include <console.h> 33.6 #include <netfront.h> 33.7 +#include <pcifront.h> 33.8 #include <time.h> 33.9 #include <stdlib.h> 33.10 #include <unistd.h> 33.11 @@ -67,6 +68,7 @@ static void call_main(void *p) 33.12 #endif 33.13 init_fs_frontend(); 33.14 #endif 33.15 + create_thread("pcifront", pcifront_watches, NULL); 33.16 33.17 #ifdef CONFIG_QEMU 33.18 /* Fetch argc, argv from XenStore */
34.1 --- a/extras/mini-os/netfront.c Mon Nov 02 19:35:54 2009 -0800 34.2 +++ b/extras/mini-os/netfront.c Fri Mar 19 18:36:57 2010 -0700 34.3 @@ -305,7 +305,7 @@ struct netfront_dev *init_netfront(char 34.4 struct netif_rx_sring *rxs; 34.5 int retry=0; 34.6 int i; 34.7 - char* msg; 34.8 + char* msg = NULL; 34.9 char nodename[256]; 34.10 char path[256]; 34.11 struct netfront_dev *dev; 34.12 @@ -377,6 +377,7 @@ again: 34.13 err = xenbus_transaction_start(&xbt); 34.14 if (err) { 34.15 printk("starting transaction\n"); 34.16 + free(err); 34.17 } 34.18 34.19 err = xenbus_printf(xbt, nodename, "tx-ring-ref","%u", 34.20 @@ -413,6 +414,7 @@ again: 34.21 } 34.22 34.23 err = xenbus_transaction_end(xbt, 0, &retry); 34.24 + if (err) free(err); 34.25 if (retry) { 34.26 goto again; 34.27 printk("completing transaction\n"); 34.28 @@ -421,7 +423,8 @@ again: 34.29 goto done; 34.30 34.31 abort_transaction: 34.32 - xenbus_transaction_end(xbt, 1, &retry); 34.33 + free(err); 34.34 + err = xenbus_transaction_end(xbt, 1, &retry); 34.35 goto error; 34.36 34.37 done: 34.38 @@ -452,7 +455,7 @@ done: 34.39 err = xenbus_wait_for_state_change(path, &state, &dev->events); 34.40 if (state != XenbusStateConnected) { 34.41 printk("backend not avalable, state=%d\n", state); 34.42 - xenbus_unwatch_path(XBT_NIL, path); 34.43 + xenbus_unwatch_path_token(XBT_NIL, path, path); 34.44 goto error; 34.45 } 34.46 34.47 @@ -479,6 +482,8 @@ done: 34.48 34.49 return dev; 34.50 error: 34.51 + free(msg); 34.52 + free(err); 34.53 free_netfront(dev); 34.54 return NULL; 34.55 } 34.56 @@ -521,6 +526,7 @@ void shutdown_netfront(struct netfront_d 34.57 state = xenbus_read_integer(path); 34.58 while (err == NULL && state < XenbusStateClosing) 34.59 err = xenbus_wait_for_state_change(path, &state, &dev->events); 34.60 + if (err) free(err); 34.61 34.62 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { 34.63 printk("shutdown_netfront: error changing state to %d: %s\n", 34.64 @@ -528,8 +534,10 @@ void shutdown_netfront(struct netfront_d 34.65 goto close; 34.66 } 34.67 state = xenbus_read_integer(path); 34.68 - if (state < XenbusStateClosed) 34.69 - xenbus_wait_for_state_change(path, &state, &dev->events); 34.70 + if (state < XenbusStateClosed) { 34.71 + err = xenbus_wait_for_state_change(path, &state, &dev->events); 34.72 + if (err) free(err); 34.73 + } 34.74 34.75 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { 34.76 printk("shutdown_netfront: error changing state to %d: %s\n", 34.77 @@ -542,7 +550,8 @@ void shutdown_netfront(struct netfront_d 34.78 err = xenbus_wait_for_state_change(path, &state, &dev->events); 34.79 34.80 close: 34.81 - xenbus_unwatch_path(XBT_NIL, path); 34.82 + if (err) free(err); 34.83 + xenbus_unwatch_path_token(XBT_NIL, path, path); 34.84 34.85 snprintf(path, sizeof(path), "%s/tx-ring-ref", nodename); 34.86 xenbus_rm(XBT_NIL, path);
35.1 --- a/extras/mini-os/pcifront.c Mon Nov 02 19:35:54 2009 -0800 35.2 +++ b/extras/mini-os/pcifront.c Fri Mar 19 18:36:57 2010 -0700 35.3 @@ -13,10 +13,12 @@ 35.4 #include <mini-os/xmalloc.h> 35.5 #include <mini-os/wait.h> 35.6 #include <mini-os/pcifront.h> 35.7 +#include <mini-os/sched.h> 35.8 35.9 #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07)) 35.10 35.11 DECLARE_WAIT_QUEUE_HEAD(pcifront_queue); 35.12 +static struct pcifront_dev *pcidev; 35.13 35.14 struct pcifront_dev { 35.15 domid_t dom; 35.16 @@ -38,19 +40,103 @@ void pcifront_handler(evtchn_port_t port 35.17 35.18 static void free_pcifront(struct pcifront_dev *dev) 35.19 { 35.20 - mask_evtchn(dev->evtchn); 35.21 + if (!dev) 35.22 + dev = pcidev; 35.23 35.24 - free(dev->backend); 35.25 + mask_evtchn(dev->evtchn); 35.26 35.27 gnttab_end_access(dev->info_ref); 35.28 free_page(dev->info); 35.29 35.30 unbind_evtchn(dev->evtchn); 35.31 35.32 + free(dev->backend); 35.33 free(dev->nodename); 35.34 free(dev); 35.35 } 35.36 35.37 +void pcifront_watches(void *opaque) 35.38 +{ 35.39 + XenbusState state; 35.40 + char *err = NULL, *msg = NULL; 35.41 + char *be_path, *be_state; 35.42 + char* nodename = opaque ? opaque : "device/pci/0"; 35.43 + char path[strlen(nodename) + 9]; 35.44 + char fe_state[strlen(nodename) + 7]; 35.45 + xenbus_event_queue events = NULL; 35.46 + 35.47 + snprintf(path, sizeof(path), "%s/backend", nodename); 35.48 + snprintf(fe_state, sizeof(fe_state), "%s/state", nodename); 35.49 + 35.50 + while (1) { 35.51 + printk("pcifront_watches: waiting for backend path to happear %s\n", path); 35.52 + xenbus_watch_path_token(XBT_NIL, path, path, &events); 35.53 + while ((err = xenbus_read(XBT_NIL, path, &be_path)) != NULL) { 35.54 + free(err); 35.55 + xenbus_wait_for_watch(&events); 35.56 + } 35.57 + xenbus_unwatch_path_token(XBT_NIL, path, path); 35.58 + printk("pcifront_watches: waiting for backend to get into the right state %s\n", be_path); 35.59 + be_state = (char *) malloc(strlen(be_path) + 7); 35.60 + snprintf(be_state, strlen(be_path) + 7, "%s/state", be_path); 35.61 + xenbus_watch_path_token(XBT_NIL, be_state, be_state, &events); 35.62 + while ((err = xenbus_read(XBT_NIL, be_state, &msg)) != NULL || msg[0] > '4') { 35.63 + free(msg); 35.64 + free(err); 35.65 + xenbus_wait_for_watch(&events); 35.66 + } 35.67 + xenbus_unwatch_path_token(XBT_NIL, be_state, be_state); 35.68 + if (init_pcifront(NULL) == NULL) { 35.69 + free(be_state); 35.70 + free(be_path); 35.71 + continue; 35.72 + } 35.73 + xenbus_watch_path_token(XBT_NIL, be_state, be_state, &events); 35.74 + state = XenbusStateConnected; 35.75 + printk("pcifront_watches: waiting for backend events %s\n", be_state); 35.76 + while ((err = xenbus_wait_for_state_change(be_state, &state, &events)) == NULL && 35.77 + (err = xenbus_read(XBT_NIL, pcidev->backend, &msg)) == NULL) { 35.78 + free(msg); 35.79 + printk("pcifront_watches: backend state changed: %s %d\n", be_state, state); 35.80 + if (state == XenbusStateReconfiguring) { 35.81 + printk("pcifront_watches: writing %s %d\n", fe_state, XenbusStateReconfiguring); 35.82 + if ((err = xenbus_switch_state(XBT_NIL, fe_state, XenbusStateReconfiguring)) != NULL) { 35.83 + printk("pcifront_watches: error changing state to %d: %s\n", 35.84 + XenbusStateReconfiguring, err); 35.85 + if (!strcmp(err, "ENOENT")) { 35.86 + xenbus_write(XBT_NIL, fe_state, "7"); 35.87 + free(err); 35.88 + } 35.89 + } 35.90 + } else if (state == XenbusStateReconfigured) { 35.91 + printk("pcifront_watches: writing %s %d\n", fe_state, XenbusStateConnected); 35.92 + printk("pcifront_watches: changing state to %d\n", XenbusStateConnected); 35.93 + if ((err = xenbus_switch_state(XBT_NIL, fe_state, XenbusStateConnected)) != NULL) { 35.94 + printk("pcifront_watches: error changing state to %d: %s\n", 35.95 + XenbusStateConnected, err); 35.96 + if (!strcmp(err, "ENOENT")) { 35.97 + xenbus_write(XBT_NIL, fe_state, "4"); 35.98 + free(err); 35.99 + } 35.100 + } 35.101 + } else if (state == XenbusStateClosing) 35.102 + break; 35.103 + } 35.104 + if (err) 35.105 + printk("pcifront_watches: done waiting err=%s\n", err); 35.106 + else 35.107 + printk("pcifront_watches: done waiting\n"); 35.108 + xenbus_unwatch_path_token(XBT_NIL, be_state, be_state); 35.109 + shutdown_pcifront(pcidev); 35.110 + free(be_state); 35.111 + free(be_path); 35.112 + free(err); 35.113 + pcidev = NULL; 35.114 + } 35.115 + 35.116 + xenbus_unwatch_path_token(XBT_NIL, path, path); 35.117 +} 35.118 + 35.119 struct pcifront_dev *init_pcifront(char *_nodename) 35.120 { 35.121 xenbus_transaction_t xbt; 35.122 @@ -65,6 +151,9 @@ struct pcifront_dev *init_pcifront(char 35.123 35.124 char path[strlen(nodename) + 1 + 10 + 1]; 35.125 35.126 + if (!_nodename && pcidev) 35.127 + return pcidev; 35.128 + 35.129 printk("******************* PCIFRONT for %s **********\n\n\n", nodename); 35.130 35.131 snprintf(path, sizeof(path), "%s/backend-id", nodename); 35.132 @@ -92,6 +181,7 @@ again: 35.133 err = xenbus_transaction_start(&xbt); 35.134 if (err) { 35.135 printk("starting transaction\n"); 35.136 + free(err); 35.137 } 35.138 35.139 err = xenbus_printf(xbt, nodename, "pci-op-ref","%u", 35.140 @@ -121,6 +211,7 @@ again: 35.141 } 35.142 35.143 err = xenbus_transaction_end(xbt, 0, &retry); 35.144 + if (err) free(err); 35.145 if (retry) { 35.146 goto again; 35.147 printk("completing transaction\n"); 35.148 @@ -129,7 +220,8 @@ again: 35.149 goto done; 35.150 35.151 abort_transaction: 35.152 - xenbus_transaction_end(xbt, 1, &retry); 35.153 + free(err); 35.154 + err = xenbus_transaction_end(xbt, 1, &retry); 35.155 goto error; 35.156 35.157 done: 35.158 @@ -157,7 +249,7 @@ done: 35.159 err = xenbus_wait_for_state_change(path, &state, &dev->events); 35.160 if (state != XenbusStateConnected) { 35.161 printk("backend not avalable, state=%d\n", state); 35.162 - xenbus_unwatch_path(XBT_NIL, path); 35.163 + xenbus_unwatch_path_token(XBT_NIL, path, path); 35.164 goto error; 35.165 } 35.166 35.167 @@ -165,7 +257,7 @@ done: 35.168 if ((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected)) 35.169 != NULL) { 35.170 printk("error switching state %s\n", err); 35.171 - xenbus_unwatch_path(XBT_NIL, path); 35.172 + xenbus_unwatch_path_token(XBT_NIL, path, path); 35.173 goto error; 35.174 } 35.175 } 35.176 @@ -173,25 +265,47 @@ done: 35.177 35.178 printk("**************************\n"); 35.179 35.180 + if (!_nodename) 35.181 + pcidev = dev; 35.182 + 35.183 return dev; 35.184 35.185 error: 35.186 + free(err); 35.187 free_pcifront(dev); 35.188 return NULL; 35.189 } 35.190 35.191 void pcifront_scan(struct pcifront_dev *dev, void (*func)(unsigned int domain, unsigned int bus, unsigned slot, unsigned int fun)) 35.192 { 35.193 - char path[strlen(dev->backend) + 1 + 5 + 10 + 1]; 35.194 - int i, n; 35.195 - char *s, *msg; 35.196 + char *path; 35.197 + int i, n, len; 35.198 + char *s, *msg = NULL, *err = NULL; 35.199 unsigned int domain, bus, slot, fun; 35.200 35.201 - snprintf(path, sizeof(path), "%s/num_devs", dev->backend); 35.202 + if (!dev) 35.203 + dev = pcidev; 35.204 + if (!dev) { 35.205 + xenbus_event_queue events = NULL; 35.206 + char *fe_state = "device/pci/0/state"; 35.207 + xenbus_watch_path_token(XBT_NIL, fe_state, fe_state, &events); 35.208 + while ((err = xenbus_read(XBT_NIL, fe_state, &msg)) != NULL || msg[0] != '4') { 35.209 + free(msg); 35.210 + free(err); 35.211 + printk("pcifront_scan: waiting for pcifront to become ready\n"); 35.212 + xenbus_wait_for_watch(&events); 35.213 + } 35.214 + xenbus_unwatch_path_token(XBT_NIL, fe_state, fe_state); 35.215 + dev = pcidev; 35.216 + } 35.217 + 35.218 + len = strlen(dev->backend) + 1 + 5 + 10 + 1; 35.219 + path = (char *) malloc(len); 35.220 + snprintf(path, len, "%s/num_devs", dev->backend); 35.221 n = xenbus_read_integer(path); 35.222 35.223 for (i = 0; i < n; i++) { 35.224 - snprintf(path, sizeof(path), "%s/dev-%d", dev->backend, i); 35.225 + snprintf(path, len, "%s/dev-%d", dev->backend, i); 35.226 msg = xenbus_read(XBT_NIL, path, &s); 35.227 if (msg) { 35.228 printk("Error %s when reading the PCI root name at %s\n", msg, path); 35.229 @@ -205,8 +319,10 @@ void pcifront_scan(struct pcifront_dev * 35.230 } 35.231 free(s); 35.232 35.233 - func(domain, bus, slot, fun); 35.234 + if (func) 35.235 + func(domain, bus, slot, fun); 35.236 } 35.237 + free(path); 35.238 } 35.239 35.240 void shutdown_pcifront(struct pcifront_dev *dev) 35.241 @@ -229,6 +345,7 @@ void shutdown_pcifront(struct pcifront_d 35.242 state = xenbus_read_integer(path); 35.243 while (err == NULL && state < XenbusStateClosing) 35.244 err = xenbus_wait_for_state_change(path, &state, &dev->events); 35.245 + if (err) free(err); 35.246 35.247 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) { 35.248 printk("shutdown_pcifront: error changing state to %d: %s\n", 35.249 @@ -236,8 +353,10 @@ void shutdown_pcifront(struct pcifront_d 35.250 goto close_pcifront; 35.251 } 35.252 state = xenbus_read_integer(path); 35.253 - if (state < XenbusStateClosed) 35.254 - xenbus_wait_for_state_change(path, &state, &dev->events); 35.255 + if (state < XenbusStateClosed) { 35.256 + err = xenbus_wait_for_state_change(path, &state, &dev->events); 35.257 + free(err); 35.258 + } 35.259 35.260 if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) { 35.261 printk("shutdown_pcifront: error changing state to %d: %s\n", 35.262 @@ -250,7 +369,8 @@ void shutdown_pcifront(struct pcifront_d 35.263 err = xenbus_wait_for_state_change(path, &state, &dev->events); 35.264 35.265 close_pcifront: 35.266 - xenbus_unwatch_path(XBT_NIL, path); 35.267 + if (err) free(err); 35.268 + xenbus_unwatch_path_token(XBT_NIL, path, path); 35.269 35.270 snprintf(path, sizeof(path), "%s/info-ref", nodename); 35.271 xenbus_rm(XBT_NIL, path); 35.272 @@ -271,6 +391,9 @@ int pcifront_physical_to_virtual (struct 35.273 char *s, *msg = NULL; 35.274 unsigned int dom1, bus1, slot1, fun1; 35.275 35.276 + if (!dev) 35.277 + dev = pcidev; 35.278 + 35.279 snprintf(path, sizeof(path), "%s/num_devs", dev->backend); 35.280 n = xenbus_read_integer(path); 35.281 35.282 @@ -312,6 +435,8 @@ int pcifront_physical_to_virtual (struct 35.283 35.284 void pcifront_op(struct pcifront_dev *dev, struct xen_pci_op *op) 35.285 { 35.286 + if (!dev) 35.287 + dev = pcidev; 35.288 dev->info->op = *op; 35.289 /* Make sure info is written before the flag */ 35.290 wmb(); 35.291 @@ -332,6 +457,8 @@ int pcifront_conf_read(struct pcifront_d 35.292 { 35.293 struct xen_pci_op op; 35.294 35.295 + if (!dev) 35.296 + dev = pcidev; 35.297 if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) 35.298 return XEN_PCI_ERR_dev_not_found; 35.299 memset(&op, 0, sizeof(op)); 35.300 @@ -360,6 +487,8 @@ int pcifront_conf_write(struct pcifront_ 35.301 { 35.302 struct xen_pci_op op; 35.303 35.304 + if (!dev) 35.305 + dev = pcidev; 35.306 if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) 35.307 return XEN_PCI_ERR_dev_not_found; 35.308 memset(&op, 0, sizeof(op)); 35.309 @@ -384,6 +513,8 @@ int pcifront_enable_msi(struct pcifront_ 35.310 { 35.311 struct xen_pci_op op; 35.312 35.313 + if (!dev) 35.314 + dev = pcidev; 35.315 if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) 35.316 return XEN_PCI_ERR_dev_not_found; 35.317 memset(&op, 0, sizeof(op)); 35.318 @@ -407,6 +538,8 @@ int pcifront_disable_msi(struct pcifront 35.319 { 35.320 struct xen_pci_op op; 35.321 35.322 + if (!dev) 35.323 + dev = pcidev; 35.324 if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) 35.325 return XEN_PCI_ERR_dev_not_found; 35.326 memset(&op, 0, sizeof(op)); 35.327 @@ -428,6 +561,8 @@ int pcifront_enable_msix(struct pcifront 35.328 { 35.329 struct xen_pci_op op; 35.330 35.331 + if (!dev) 35.332 + dev = pcidev; 35.333 if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) 35.334 return XEN_PCI_ERR_dev_not_found; 35.335 if (n > SH_INFO_MAX_VEC) 35.336 @@ -460,6 +595,8 @@ int pcifront_disable_msix(struct pcifron 35.337 { 35.338 struct xen_pci_op op; 35.339 35.340 + if (!dev) 35.341 + dev = pcidev; 35.342 if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0) 35.343 return XEN_PCI_ERR_dev_not_found; 35.344 memset(&op, 0, sizeof(op));
36.1 --- a/extras/mini-os/xenbus/xenbus.c Mon Nov 02 19:35:54 2009 -0800 36.2 +++ b/extras/mini-os/xenbus/xenbus.c Fri Mar 19 18:36:57 2010 -0700 36.3 @@ -96,7 +96,10 @@ void xenbus_wait_for_watch(xenbus_event_ 36.4 if (!queue) 36.5 queue = &xenbus_events; 36.6 ret = xenbus_wait_for_watch_return(queue); 36.7 - free(ret); 36.8 + if (ret) 36.9 + free(ret); 36.10 + else 36.11 + printk("unexpected path returned by watch\n"); 36.12 } 36.13 36.14 char* xenbus_wait_for_value(const char* path, const char* value, xenbus_event_queue *queue) 36.15 @@ -132,7 +135,8 @@ char *xenbus_switch_state(xenbus_transac 36.16 36.17 do { 36.18 if (xbt == XBT_NIL) { 36.19 - xenbus_transaction_start(&xbt); 36.20 + msg = xenbus_transaction_start(&xbt); 36.21 + if (msg) goto exit; 36.22 xbt_flag = 1; 36.23 } 36.24
37.1 --- a/stubdom/Makefile Mon Nov 02 19:35:54 2009 -0800 37.2 +++ b/stubdom/Makefile Fri Mar 19 18:36:57 2010 -0700 37.3 @@ -265,9 +265,11 @@ TARGETS_MINIOS=$(addprefix mini-os-$(XEN 37.4 37.5 .PHONY: libxc 37.6 libxc: libxc-$(XEN_TARGET_ARCH)/libxenctrl.a libxc-$(XEN_TARGET_ARCH)/libxenguest.a 37.7 -libxc-$(XEN_TARGET_ARCH)/libxenctrl.a libxc-$(XEN_TARGET_ARCH)/libxenguest.a:: cross-zlib 37.8 +libxc-$(XEN_TARGET_ARCH)/libxenctrl.a: cross-zlib 37.9 CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) -C libxc-$(XEN_TARGET_ARCH) 37.10 37.11 + libxc-$(XEN_TARGET_ARCH)/libxenguest.a: libxc-$(XEN_TARGET_ARCH)/libxenctrl.a 37.12 + 37.13 ####### 37.14 # ioemu 37.15 #######
38.1 --- a/stubdom/README Mon Nov 02 19:35:54 2009 -0800 38.2 +++ b/stubdom/README Fri Mar 19 18:36:57 2010 -0700 38.3 @@ -52,11 +52,17 @@ kernel = "pv-grub.gz" 38.4 38.5 extra = "(hd0,0)/boot/grub/menu.lst" 38.6 38.7 -you can also use a tftp path (dhcp will be automatically performed): 38.8 +or you can provide the content of a menu.lst stored in dom0 by passing it as a 38.9 +ramdisk: 38.10 + 38.11 +ramdisk = "/boot/domU-1-menu.lst" 38.12 + 38.13 +or you can also use a tftp path (dhcp will be automatically performed): 38.14 38.15 extra = "(nd)/somepath/menu.lst" 38.16 38.17 -or you can set it in option 150 of your dhcp server and leave extra empty 38.18 +or you can set it in option 150 of your dhcp server and leave extra and ramdisk 38.19 +empty (dhcp will be automatically performed) 38.20 38.21 Limitations 38.22 =========== 38.23 @@ -69,6 +75,13 @@ export XEN_TARGET_ARCH=x86_32 38.24 - bootsplash is supported, but the ioemu backend does not yet support restart 38.25 for use by the booted kernel. 38.26 38.27 +- PV-GRUB doesn't support virtualized partitions. For instance: 38.28 + 38.29 +disk = [ 'phy:hda7,hda7,w' ] 38.30 + 38.31 +will be seen by PV-GRUB as (hd0), not (hd0,6), since GRUB will not see any 38.32 +partition table. 38.33 + 38.34 38.35 Your own stubdom 38.36 ================
39.1 --- a/stubdom/grub.patches/99minios Mon Nov 02 19:35:54 2009 -0800 39.2 +++ b/stubdom/grub.patches/99minios Fri Mar 19 18:36:57 2010 -0700 39.3 @@ -151,6 +151,14 @@ Index: grub/stage2/builtins.c 39.4 39.5 /* print */ 39.6 static int 39.7 +@@ -2910,6 +2910,7 @@ 39.8 + switch (kernel_type) 39.9 + { 39.10 + case KERNEL_TYPE_MULTIBOOT: 39.11 ++ case KERNEL_TYPE_PV: 39.12 + if (mb_cmdline + len + 1 > (char *) MB_CMDLINE_BUF + MB_CMDLINE_BUFLEN) 39.13 + { 39.14 + errnum = ERR_WONT_FIT; 39.15 @@ -3776,6 +3802,7 @@ 39.16 }; 39.17 39.18 @@ -1493,3 +1501,70 @@ diff -u -p -r1.5 fsys_xfs.c 39.19 #else 39.20 /* This is slower but this works on all x86 architectures. */ 39.21 __asm__("xchgb %b0, %h0" \ 39.22 +--- grub.orig/stage2/gunzip.c 2010-03-07 23:03:34.000000000 +0100 39.23 ++++ grub/stage2/gunzip.c 2010-03-07 23:05:36.000000000 +0100 39.24 +@@ -141,7 +141,7 @@ 39.25 + static int gzip_filemax; 39.26 + static int gzip_fsmax; 39.27 + static int saved_filepos; 39.28 +-static unsigned long gzip_crc; 39.29 ++static unsigned int gzip_crc; 39.30 + 39.31 + /* internal extra variables for use of inflate code */ 39.32 + static int block_type; 39.33 +@@ -157,7 +157,7 @@ 39.34 + * Linear allocator. 39.35 + */ 39.36 + 39.37 +-static unsigned long linalloc_topaddr; 39.38 ++static unsigned int linalloc_topaddr; 39.39 + 39.40 + static void * 39.41 + linalloc (int size) 39.42 +@@ -253,7 +253,7 @@ 39.43 + 39.44 + typedef unsigned char uch; 39.45 + typedef unsigned short ush; 39.46 +-typedef unsigned long ulg; 39.47 ++typedef unsigned int ulg; 39.48 + 39.49 + /* 39.50 + * Window Size 39.51 +@@ -316,8 +316,8 @@ 39.52 + return 0; 39.53 + } 39.54 + 39.55 +- gzip_crc = *((unsigned long *) buf); 39.56 +- gzip_fsmax = gzip_filemax = *((unsigned long *) (buf + 4)); 39.57 ++ gzip_crc = *((unsigned int *) buf); 39.58 ++ gzip_fsmax = gzip_filemax = *((unsigned int *) (buf + 4)); 39.59 + 39.60 + initialize_tables (); 39.61 + 39.62 +diff -ur grub.orig/stage2/fsys_iso9660.c grub-upstream/stage2/fsys_iso9660.c 39.63 +--- grub.orig/stage2/fsys_iso9660.c 2010-03-07 23:39:00.000000000 +0100 39.64 ++++ grub/stage2/fsys_iso9660.c 2010-03-07 23:39:56.000000000 +0100 39.65 +@@ -43,7 +43,7 @@ 39.66 + 39.67 + /* iso fs inode data in memory */ 39.68 + struct iso_inode_info { 39.69 +- unsigned long file_start; 39.70 ++ unsigned int file_start; 39.71 + }; 39.72 + 39.73 + #define ISO_SUPER \ 39.74 +@@ -88,12 +88,12 @@ 39.75 + if (byte_len <= 0) 39.76 + return 1; 39.77 + 39.78 +- sector += (byte_offset >> sector_size_lg2); 39.79 +- byte_offset &= (buf_geom.sector_size - 1); 39.80 + asm volatile ("shl%L0 %1,%0" 39.81 + : "=r"(sector) 39.82 + : "Ic"((int8_t)(ISO_SECTOR_BITS - sector_size_lg2)), 39.83 + "0"(sector)); 39.84 ++ sector += (byte_offset >> sector_size_lg2); 39.85 ++ byte_offset &= (buf_geom.sector_size - 1); 39.86 + 39.87 + #if !defined(STAGE1_5) 39.88 + if (disk_read_hook && debug)
40.1 --- a/stubdom/grub/config.h Mon Nov 02 19:35:54 2009 -0800 40.2 +++ b/stubdom/grub/config.h Fri Mar 19 18:36:57 2010 -0700 40.3 @@ -5,7 +5,7 @@ 40.4 #define debug _debug 40.5 #define grub_halt(a) do_exit() 40.6 #define printf grub_printf 40.7 -void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline); 40.8 +void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline, unsigned long flags); 40.9 struct fbfront_dev *fb_open(void *fb, int width, int height, int depth); 40.10 void fb_close(void); 40.11 void pv_boot (void);
41.1 --- a/stubdom/grub/kexec.c Mon Nov 02 19:35:54 2009 -0800 41.2 +++ b/stubdom/grub/kexec.c Fri Mar 19 18:36:57 2010 -0700 41.3 @@ -103,7 +103,7 @@ int kexec_allocate(struct xc_dom_image * 41.4 return 0; 41.5 } 41.6 41.7 -void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline) 41.8 +void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline, unsigned long flags) 41.9 { 41.10 struct xc_dom_image *dom; 41.11 int rc; 41.12 @@ -129,7 +129,7 @@ void kexec(void *kernel, long kernel_siz 41.13 dom->ramdisk_blob = module; 41.14 dom->ramdisk_size = module_size; 41.15 41.16 - dom->flags = 0; 41.17 + dom->flags = flags; 41.18 dom->console_evtchn = start_info.console.domU.evtchn; 41.19 dom->xenstore_evtchn = start_info.store_evtchn; 41.20
42.1 --- a/stubdom/grub/mini-os.c Mon Nov 02 19:35:54 2009 -0800 42.2 +++ b/stubdom/grub/mini-os.c Fri Mar 19 18:36:57 2010 -0700 42.3 @@ -173,6 +173,8 @@ load_file(char *name, void **ptr, long * 42.4 void *kernel_image, *module_image; 42.5 long kernel_size, module_size; 42.6 char *kernel_arg, *module_arg; 42.7 +void *multiboot_next_module; 42.8 +struct xen_multiboot_mod_list *multiboot_next_module_header; 42.9 42.10 kernel_t 42.11 load_image (char *kernel, char *arg, kernel_t suggested_type, 42.12 @@ -196,6 +198,8 @@ load_initrd (char *initrd) 42.13 if (module_image) 42.14 free(module_image); 42.15 module_image = NULL; 42.16 + multiboot_next_module = NULL; 42.17 + multiboot_next_module_header = NULL; 42.18 load_file (initrd, &module_image, &module_size); 42.19 return ! errnum; 42.20 } 42.21 @@ -203,20 +207,76 @@ load_initrd (char *initrd) 42.22 int 42.23 load_module (char *module, char *arg) 42.24 { 42.25 - if (module_image) 42.26 + void *new_module, *new_module_image; 42.27 + long new_module_size, rounded_new_module_size; 42.28 + 42.29 + if (load_file (module, &new_module, &new_module_size)) 42.30 + return 0; 42.31 + if (strlen(arg) >= PAGE_SIZE) { 42.32 + /* Too big module command line */ 42.33 + errnum = ERR_WONT_FIT; 42.34 + return 0; 42.35 + } 42.36 + rounded_new_module_size = (new_module_size + PAGE_SIZE - 1) & PAGE_MASK; 42.37 + 42.38 + if (module_image && !multiboot_next_module_header) { 42.39 + /* Initrd already loaded, drop it */ 42.40 free(module_image); 42.41 - module_image = NULL; 42.42 - load_file (module, &module_image, &module_size); 42.43 - if (module_arg) 42.44 - free(module_arg); 42.45 - module_arg = strdup(arg); 42.46 - return ! errnum; 42.47 + if (module_arg) 42.48 + free(module_arg); 42.49 + module_image = NULL; 42.50 + } 42.51 + if (!module_image) 42.52 + /* Reserve one page for the header */ 42.53 + multiboot_next_module = (void*) PAGE_SIZE; 42.54 + 42.55 + /* Allocate more room for the new module plus its arg */ 42.56 + new_module_image = realloc(module_image, 42.57 + (multiboot_next_module - module_image) + rounded_new_module_size + PAGE_SIZE); 42.58 + 42.59 + /* Update pointers */ 42.60 + multiboot_next_module += new_module_image - module_image; 42.61 + multiboot_next_module_header = (void*) multiboot_next_module_header + (new_module_image - module_image); 42.62 + module_image = new_module_image; 42.63 + 42.64 + if ((void*) (multiboot_next_module_header+1) - module_image > PAGE_SIZE) { 42.65 + /* Too many modules */ 42.66 + ERR_WONT_FIT; 42.67 + return 0; 42.68 + } 42.69 + 42.70 + /* Copy module */ 42.71 + memcpy(multiboot_next_module, new_module, new_module_size); 42.72 + multiboot_next_module_header->mod_start = multiboot_next_module - module_image; 42.73 + multiboot_next_module_header->mod_end = multiboot_next_module_header->mod_start + new_module_size - 1; 42.74 + multiboot_next_module += rounded_new_module_size; 42.75 + 42.76 + /* Copy cmdline */ 42.77 + strcpy(multiboot_next_module, arg); 42.78 + multiboot_next_module_header->cmdline = multiboot_next_module - module_image; 42.79 + multiboot_next_module += PAGE_SIZE; 42.80 + 42.81 + /* Pad */ 42.82 + multiboot_next_module_header->pad = 0; 42.83 + 42.84 + multiboot_next_module_header++; 42.85 + 42.86 + return 1; 42.87 } 42.88 42.89 void 42.90 pv_boot (void) 42.91 { 42.92 - kexec(kernel_image, kernel_size, module_image, module_size, kernel_arg); 42.93 + unsigned long flags = 0; 42.94 + if (multiboot_next_module_header) { 42.95 + /* Termination entry */ 42.96 + multiboot_next_module_header->mod_start = 0; 42.97 + /* Total size */ 42.98 + module_size = multiboot_next_module - module_image; 42.99 + /* It's a multiboot module */ 42.100 + flags |= SIF_MULTIBOOT_MOD; 42.101 + } 42.102 + kexec(kernel_image, kernel_size, module_image, module_size, kernel_arg, flags); 42.103 } 42.104 42.105 /*
43.1 --- a/stubdom/pciutils.patch Mon Nov 02 19:35:54 2009 -0800 43.2 +++ b/stubdom/pciutils.patch Fri Mar 19 18:36:57 2010 -0700 43.3 @@ -23,14 +23,6 @@ diff -urN pciutils-2.2.9.orig/lib/access 43.4 PCI_ACCESS_MAX 43.5 }; 43.6 43.7 -@@ -63,6 +64,7 @@ 43.8 - int fd_rw; /* proc: fd opened read-write */ 43.9 - struct pci_dev *cached_dev; /* proc: device the fd is for */ 43.10 - int fd_pos; /* proc: current position */ 43.11 -+ void *minios; 43.12 - }; 43.13 - 43.14 - /* Initialize PCI access */ 43.15 --- pciutils-2.2.9.orig/lib/internal.h 2006-09-09 11:52:47.000000000 +0100 43.16 +++ pciutils-2.2.9/lib/internal.h 2008-07-01 10:46:24.968202000 +0100 43.17 @@ -37,4 +37,4 @@ 43.18 @@ -72,7 +64,7 @@ diff -urN pciutils-2.2.9.orig/lib/access 43.19 43.20 --- pciutils-2.2.9.orig/lib/minios.c 1970-01-01 01:00:00.000000000 +0100 43.21 +++ pciutils-2.2.9/lib/minios.c 2008-07-01 12:31:40.554260000 +0100 43.22 -@@ -0,0 +1,113 @@ 43.23 +@@ -0,0 +1,106 @@ 43.24 +/* 43.25 + * The PCI Library -- MiniOS PCI frontend access 43.26 + * 43.27 @@ -95,24 +87,17 @@ diff -urN pciutils-2.2.9.orig/lib/access 43.28 +static void 43.29 +minios_init(struct pci_access *a) 43.30 +{ 43.31 -+ a->minios = init_pcifront(NULL); 43.32 -+ if (!a->minios) 43.33 -+ a->warning("minios_init open failed"); 43.34 +} 43.35 + 43.36 +static void 43.37 +minios_cleanup(struct pci_access *a) 43.38 +{ 43.39 -+ if (a->minios) 43.40 -+ shutdown_pcifront(a->minios); 43.41 ++ shutdown_pcifront(NULL); 43.42 +} 43.43 + 43.44 +static void 43.45 +minios_scan(struct pci_access *a) 43.46 +{ 43.47 -+ if (!a->minios) 43.48 -+ return; 43.49 -+ 43.50 + void func(unsigned int domain, unsigned int bus, unsigned int slot, unsigned int fun) 43.51 + { 43.52 + struct pci_dev *d = pci_alloc_dev(a); 43.53 @@ -125,7 +110,7 @@ diff -urN pciutils-2.2.9.orig/lib/access 43.54 + pci_link_dev(a, d); 43.55 + } 43.56 + 43.57 -+ pcifront_scan(a->minios, func); 43.58 ++ pcifront_scan(NULL, func); 43.59 +} 43.60 + 43.61 +static int 43.62 @@ -134,17 +119,17 @@ diff -urN pciutils-2.2.9.orig/lib/access 43.63 + unsigned int val; 43.64 + switch (len) { 43.65 + case 1: 43.66 -+ if (pcifront_conf_read(d->access->minios, d->domain, d->bus, d->dev, d->func, pos, len, &val)) 43.67 ++ if (pcifront_conf_read(NULL, d->domain, d->bus, d->dev, d->func, pos, len, &val)) 43.68 + return 0; 43.69 + * buf = val; 43.70 + return 1; 43.71 + case 2: 43.72 -+ if (pcifront_conf_read(d->access->minios, d->domain, d->bus, d->dev, d->func, pos, len, &val)) 43.73 ++ if (pcifront_conf_read(NULL, d->domain, d->bus, d->dev, d->func, pos, len, &val)) 43.74 + return 0; 43.75 + *(u16 *) buf = cpu_to_le16((u16) val); 43.76 + return 1; 43.77 + case 4: 43.78 -+ if (pcifront_conf_read(d->access->minios, d->domain, d->bus, d->dev, d->func, pos, len, &val)) 43.79 ++ if (pcifront_conf_read(NULL, d->domain, d->bus, d->dev, d->func, pos, len, &val)) 43.80 + return 0; 43.81 + *(u32 *) buf = cpu_to_le32((u32) val); 43.82 + return 1; 43.83 @@ -170,7 +155,7 @@ diff -urN pciutils-2.2.9.orig/lib/access 43.84 + default: 43.85 + return pci_generic_block_write(d, pos, buf, len); 43.86 + } 43.87 -+ return !pcifront_conf_write(d->access->minios, d->domain, d->bus, d->dev, d->func, pos, len, val); 43.88 ++ return !pcifront_conf_write(NULL, d->domain, d->bus, d->dev, d->func, pos, len, val); 43.89 +} 43.90 + 43.91 +struct pci_methods pm_minios = {
44.1 --- a/stubdom/stubdom-dm Mon Nov 02 19:35:54 2009 -0800 44.2 +++ b/stubdom/stubdom-dm Fri Mar 19 18:36:57 2010 -0700 44.3 @@ -80,8 +80,8 @@ done 44.4 # Termination handler 44.5 44.6 term() { 44.7 - kill %1 44.8 [ -n "$vncpid" ] && kill -9 $vncpid 44.9 + rm -f /tmp/domname-dm 44.10 rm ${stubdom_configdir}/$domname-dm 44.11 exit 0 44.12 } 44.13 @@ -154,11 +154,10 @@ do 44.14 j=$(( $j + 1 )) 44.15 done 44.16 echo " ] " >> ${stubdom_configdir}/$domname-dm 44.17 -creation="xm create -c ${stubdom_configdir}/$domname-dm target=$domid memory=32 extra=\"$extra\"" 44.18 44.19 -(while true ; do sleep 60 ; done) | /bin/sh -c "$creation" & 44.20 -#xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to shut down ; read" & 44.21 -consolepid=$! 44.22 +mkfifo /tmp/$domname-dm 44.23 +xm create -c ${stubdom_configdir}/$domname-dm target=$domid memory=32 extra="$extra" < /tmp/$domname-dm & 44.24 +exec 4>/tmp/$domname-dm 44.25 44.26 44.27 ########### 44.28 @@ -178,6 +177,6 @@ then 44.29 fi 44.30 44.31 # wait for SIGHUP or stubdom termination 44.32 -wait $consolepid 44.33 +wait 44.34 44.35 term
45.1 --- a/tools/Makefile Mon Nov 02 19:35:54 2009 -0800 45.2 +++ b/tools/Makefile Fri Mar 19 18:36:57 2010 -0700 45.3 @@ -1,4 +1,4 @@ 45.4 -XEN_ROOT = ../ 45.5 +XEN_ROOT = .. 45.6 include $(XEN_ROOT)/tools/Rules.mk 45.7 45.8 SUBDIRS-y := 45.9 @@ -21,6 +21,7 @@ SUBDIRS-$(VTPM_TOOLS) += vtpm_manager 45.10 SUBDIRS-$(VTPM_TOOLS) += vtpm 45.11 SUBDIRS-y += xenstat 45.12 SUBDIRS-$(CONFIG_Linux) += libaio 45.13 +SUBDIRS-$(CONFIG_Linux) += memshr 45.14 SUBDIRS-$(CONFIG_Linux) += blktap 45.15 SUBDIRS-$(CONFIG_Linux) += blktap2 45.16 SUBDIRS-$(CONFIG_NetBSD) += libaio 45.17 @@ -32,6 +33,9 @@ SUBDIRS-$(CONFIG_Linux) += fs-back 45.18 SUBDIRS-$(CONFIG_NetBSD) += fs-back 45.19 SUBDIRS-$(CONFIG_IOEMU) += ioemu-dir 45.20 SUBDIRS-y += xenpmd 45.21 +SUBDIRS-y += libxl 45.22 +SUBDIRS-y += remus 45.23 +SUBDIRS-$(CONFIG_X86) += xenpaging 45.24 45.25 # These don't cross-compile 45.26 ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
46.1 --- a/tools/Rules.mk Mon Nov 02 19:35:54 2009 -0800 46.2 +++ b/tools/Rules.mk Fri Mar 19 18:36:57 2010 -0700 46.3 @@ -49,8 +49,8 @@ check-$(CONFIG_X86) = $(call cc-ver-chec 46.4 "Xen requires at least gcc-3.4") 46.5 $(eval $(check-y)) 46.6 46.7 -DEFAULT_PYTHON_PATH := $(shell $(XEN_ROOT)/tools/python/get-path) 46.8 -PYTHON_PATH ?= $(DEFAULT_PYTHON_PATH) 46.9 +_PYTHON_PATH := $(shell which $(PYTHON)) 46.10 +PYTHON_PATH ?= $(_PYTHON_PATH) 46.11 INSTALL_PYTHON_PROG = \ 46.12 $(XEN_ROOT)/tools/python/install-wrap "$(PYTHON_PATH)" $(INSTALL_PROG) 46.13
47.1 --- a/tools/blktap/drivers/Makefile Mon Nov 02 19:35:54 2009 -0800 47.2 +++ b/tools/blktap/drivers/Makefile Fri Mar 19 18:36:57 2010 -0700 47.3 @@ -4,6 +4,7 @@ include $(XEN_ROOT)/tools/Rules.mk 47.4 IBIN = blktapctrl tapdisk 47.5 QCOW_UTIL = img2qcow qcow2raw qcow-create 47.6 LIBAIO_DIR = ../../libaio/src 47.7 +MEMSHR_DIR = ../../memshr 47.8 47.9 CFLAGS += -Werror 47.10 CFLAGS += -Wno-unused 47.11 @@ -11,6 +12,7 @@ CFLAGS += -I../lib 47.12 CFLAGS += $(CFLAGS_libxenctrl) 47.13 CFLAGS += $(CFLAGS_libxenstore) 47.14 CFLAGS += -I $(LIBAIO_DIR) 47.15 +CFLAGS += -I $(MEMSHR_DIR) 47.16 CFLAGS += -D_GNU_SOURCE 47.17 47.18 ifeq ($(shell . ./check_gcrypt $(CC)),yes) 47.19 @@ -21,7 +23,13 @@ CRYPT_LIB := -lcrypto 47.20 $(warning === libgcrypt not installed: falling back to libcrypto ===) 47.21 endif 47.22 47.23 -LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib -lblktap 47.24 +MEMSHRLIBS := 47.25 +ifeq ($(CONFIG_Linux), y) 47.26 +CFLAGS += -DMEMSHR 47.27 +MEMSHRLIBS += $(MEMSHR_DIR)/libmemshr.a 47.28 +endif 47.29 + 47.30 +LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) $(MEMSHRLIBS) -L../lib -lblktap -lrt -lm -lpthread 47.31 LDFLAGS_img := $(LIBAIO_DIR)/libaio.a $(CRYPT_LIB) -lpthread -lz 47.32 47.33 BLK-OBJS-y := block-aio.o
48.1 --- a/tools/blktap/drivers/blktapctrl.c Mon Nov 02 19:35:54 2009 -0800 48.2 +++ b/tools/blktap/drivers/blktapctrl.c Fri Mar 19 18:36:57 2010 -0700 48.3 @@ -50,6 +50,8 @@ 48.4 #include <xs.h> 48.5 #include <sys/time.h> 48.6 #include <syslog.h> 48.7 +#include <memshr.h> 48.8 +#include <sys/stat.h> 48.9 48.10 #include "blktaplib.h" 48.11 #include "blktapctrl.h" 48.12 @@ -858,6 +860,10 @@ int main(int argc, char *argv[]) 48.13 goto open_failed; 48.14 } 48.15 48.16 +#ifdef MEMSHR 48.17 + memshr_daemon_initialize(); 48.18 +#endif 48.19 + 48.20 retry: 48.21 /* Set up store connection and watch. */ 48.22 h = xs_daemon_open();
49.1 --- a/tools/blktap/drivers/block-qcow2.c Mon Nov 02 19:35:54 2009 -0800 49.2 +++ b/tools/blktap/drivers/block-qcow2.c Fri Mar 19 18:36:57 2010 -0700 49.3 @@ -30,6 +30,7 @@ 49.4 #include <stdio.h> 49.5 #include <stdlib.h> 49.6 #include <string.h> 49.7 +#include <sys/stat.h> 49.8 49.9 #include "tapdisk.h" 49.10 #include "tapaio.h"
50.1 --- a/tools/blktap/lib/blktaplib.h Mon Nov 02 19:35:54 2009 -0800 50.2 +++ b/tools/blktap/lib/blktaplib.h Fri Mar 19 18:36:57 2010 -0700 50.3 @@ -42,7 +42,7 @@ 50.4 #include <sys/types.h> 50.5 #include <unistd.h> 50.6 50.7 -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, XC_PAGE_SIZE) 50.8 +#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, XC_PAGE_SIZE) 50.9 50.10 /* size of the extra VMA area to map in attached pages. */ 50.11 #define BLKTAP_VMA_PAGES BLK_RING_SIZE
51.1 --- a/tools/blktap/lib/xenbus.c Mon Nov 02 19:35:54 2009 -0800 51.2 +++ b/tools/blktap/lib/xenbus.c Fri Mar 19 18:36:57 2010 -0700 51.3 @@ -107,6 +107,24 @@ static int get_be_id(const char *str) 51.4 return atoi(num); 51.5 } 51.6 51.7 +static int get_be_domid(const char *str) 51.8 +{ 51.9 + int len1, len2; 51.10 + const char *ptr; 51.11 + char *tptr, num[10]; 51.12 + 51.13 + len2 = strsep_len(str, '/', 3); 51.14 + if ( len2 < 0 ) return -1; 51.15 + len1 = strsep_len(str, '/', 2); 51.16 + 51.17 + ptr = str + len1 + 1; 51.18 + strncpy(num, ptr, len2 - len1 - 1); 51.19 + tptr = num + (len2 - len1 - 1); 51.20 + *tptr = '\0'; 51.21 + 51.22 + return atoi(num); 51.23 +} 51.24 + 51.25 static struct backend_info *be_lookup_be(const char *bepath) 51.26 { 51.27 struct backend_info *be; 51.28 @@ -150,6 +168,24 @@ static int backend_remove(struct xs_hand 51.29 return 0; 51.30 } 51.31 51.32 +static const char *get_image_path(const char *path) 51.33 +{ 51.34 + const char *tmp; 51.35 + 51.36 + /* Strip off the image type */ 51.37 + if (!strncmp(path, "tapdisk:", strlen("tapdisk:"))) { 51.38 + path += strlen("tapdisk:"); 51.39 + } else if (!strncmp(path, "ioemu:", strlen("ioemu:"))) { 51.40 + path += strlen("ioemu:"); 51.41 + } 51.42 + 51.43 + tmp = strchr(path, ':'); 51.44 + if (tmp != NULL) 51.45 + path = tmp + 1; 51.46 + 51.47 + return path; 51.48 +} 51.49 + 51.50 static int check_sharing(struct xs_handle *h, struct backend_info *be) 51.51 { 51.52 char *dom_uuid; 51.53 @@ -161,8 +197,12 @@ static int check_sharing(struct xs_handl 51.54 char **devices; 51.55 int i, j; 51.56 unsigned int num_dom, num_dev; 51.57 - blkif_info_t *info; 51.58 + blkif_info_t *info = be->blkif->info; 51.59 int ret = 0; 51.60 + const char *image_path[2]; 51.61 + int be_domid = get_be_domid(be->backpath); 51.62 + 51.63 + image_path[0] = get_image_path(info->params); 51.64 51.65 /* If the mode contains '!' or doesn't contain 'w' don't check anything */ 51.66 xs_gather(h, be->backpath, "mode", NULL, &mode, NULL); 51.67 @@ -178,7 +218,10 @@ static int check_sharing(struct xs_handl 51.68 free(path); 51.69 51.70 /* Iterate through the devices of all VMs */ 51.71 - domains = xs_directory(h, XBT_NULL, "backend/tap", &num_dom); 51.72 + if (asprintf(&path, "/local/domain/%d/backend/tap", be_domid) == -1) 51.73 + goto fail; 51.74 + domains = xs_directory(h, XBT_NULL, path, &num_dom); 51.75 + free(path); 51.76 if (domains == NULL) 51.77 num_dom = 0; 51.78 51.79 @@ -189,8 +232,11 @@ static int check_sharing(struct xs_handl 51.80 ret = -1; 51.81 break; 51.82 } 51.83 + cur_dom_uuid = NULL; 51.84 xs_gather(h, path, "vm", NULL, &cur_dom_uuid, NULL); 51.85 free(path); 51.86 + if (!cur_dom_uuid) 51.87 + continue; 51.88 51.89 if (!strcmp(cur_dom_uuid, dom_uuid)) { 51.90 free(cur_dom_uuid); 51.91 @@ -198,7 +244,7 @@ static int check_sharing(struct xs_handl 51.92 } 51.93 51.94 /* Check the devices */ 51.95 - if (asprintf(&path, "backend/tap/%s", domains[i]) == -1) { 51.96 + if (asprintf(&path, "/local/domain/%d/backend/tap/%s", be_domid, domains[i]) == -1) { 51.97 ret = -1; 51.98 free(cur_dom_uuid); 51.99 break; 51.100 @@ -209,15 +255,18 @@ static int check_sharing(struct xs_handl 51.101 free(path); 51.102 51.103 for (j = 0; !ret && (j < num_dev); j++) { 51.104 - if (asprintf(&path, "backend/tap/%s/%s", domains[i], devices[j]) == -1) { 51.105 + if (asprintf(&path, "/local/domain/%d/backend/tap/%s/%s", be_domid, domains[i], devices[j]) == -1) { 51.106 ret = -1; 51.107 break; 51.108 } 51.109 + params = NULL; 51.110 xs_gather(h, path, "params", NULL, ¶ms, NULL); 51.111 free(path); 51.112 + if (!params) 51.113 + continue; 51.114 51.115 - info = be->blkif->info; 51.116 - if (strcmp(params, info->params)) { 51.117 + image_path[1] = get_image_path(params); 51.118 + if (!strcmp(image_path[0], image_path[1])) { 51.119 ret = -1; 51.120 } 51.121 51.122 @@ -241,24 +290,12 @@ out: 51.123 static int check_image(struct xs_handle *h, struct backend_info *be, 51.124 const char** errmsg) 51.125 { 51.126 - const char *tmp; 51.127 const char *path; 51.128 int mode; 51.129 blkif_t *blkif = be->blkif; 51.130 blkif_info_t *info = blkif->info; 51.131 51.132 - /* Strip off the image type */ 51.133 - path = info->params; 51.134 - 51.135 - if (!strncmp(path, "tapdisk:", strlen("tapdisk:"))) { 51.136 - path += strlen("tapdisk:"); 51.137 - } else if (!strncmp(path, "ioemu:", strlen("ioemu:"))) { 51.138 - path += strlen("ioemu:"); 51.139 - } 51.140 - 51.141 - tmp = strchr(path, ':'); 51.142 - if (tmp != NULL) 51.143 - path = tmp + 1; 51.144 + path = get_image_path(info->params); 51.145 51.146 /* Check if the image exists and access is permitted */ 51.147 mode = R_OK;
52.1 --- a/tools/blktap2/drivers/Makefile Mon Nov 02 19:35:54 2009 -0800 52.2 +++ b/tools/blktap2/drivers/Makefile Fri Mar 19 18:36:57 2010 -0700 52.3 @@ -1,4 +1,4 @@ 52.4 -XEN_ROOT=../../../ 52.5 +XEN_ROOT=../../.. 52.6 BLKTAP_ROOT= .. 52.7 include $(XEN_ROOT)/tools/Rules.mk 52.8 52.9 @@ -14,7 +14,9 @@ CFLAGS += -Wno-unused 52.10 CFLAGS += -fno-strict-aliasing 52.11 CFLAGS += -I../lib -I../../libxc 52.12 CFLAGS += -I../include -I../../include 52.13 +CFLAGS += $(CFLAGS_libxenctrl) 52.14 CFLAGS += -I $(LIBAIO_DIR) 52.15 +CFLAGS += -I $(MEMSHR_DIR) 52.16 CFLAGS += -D_GNU_SOURCE 52.17 CFLAGS += -DUSE_NFS_LOCKS 52.18 52.19 @@ -36,7 +38,7 @@ else 52.20 CRYPT_LIB += -lcrypto 52.21 endif 52.22 52.23 -LDFLAGS_img := $(CRYPT_LIB) -lpthread -lz 52.24 +LDFLAGS_img := $(LDFLAGS_libxenctrl) $(CRYPT_LIB) -lpthread -lz -lm 52.25 52.26 LIBS += -L$(LIBVHDDIR) -lvhd 52.27 52.28 @@ -44,8 +46,23 @@ ifeq ($(CONFIG_Linux),y) 52.29 LIBS += -luuid 52.30 endif 52.31 52.32 +REMUS-OBJS := block-remus.o 52.33 +REMUS-OBJS += hashtable.o 52.34 +REMUS-OBJS += hashtable_itr.o 52.35 +REMUS-OBJS += hashtable_utility.o 52.36 + 52.37 +$(REMUS-OBJS): CFLAGS += -I$(XEN_XENSTORE) 52.38 + 52.39 LIBAIO_DIR = $(XEN_ROOT)/tools/libaio/src 52.40 -tapdisk2 tapdisk-stream tapdisk-diff $(QCOW_UTIL): AIOLIBS := $(LIBAIO_DIR)/libaio.a 52.41 +MEMSHR_DIR = $(XEN_ROOT)/tools/memshr 52.42 + 52.43 +MEMSHRLIBS := 52.44 +ifeq ($(CONFIG_Linux), __fixme__) 52.45 +CFLAGS += -DMEMSHR 52.46 +MEMSHRLIBS += $(MEMSHR_DIR)/libmemshr.a 52.47 +endif 52.48 + 52.49 +tapdisk2 tapdisk-stream tapdisk-diff $(QCOW_UTIL): AIOLIBS := $(LIBAIO_DIR)/libaio.a 52.50 tapdisk-client tapdisk-stream tapdisk-diff $(QCOW_UTIL): CFLAGS += -I$(LIBAIO_DIR) -I$(XEN_LIBXC) 52.51 52.52 ifeq ($(VHD_STATIC),y) 52.53 @@ -81,18 +98,19 @@ BLK-OBJS-y += block-log.o 52.54 BLK-OBJS-y += block-qcow.o 52.55 BLK-OBJS-y += aes.o 52.56 BLK-OBJS-y += $(PORTABLE-OBJS-y) 52.57 +BLK-OBJS-y += $(REMUS-OBJS) 52.58 52.59 all: $(IBIN) lock-util qcow-util 52.60 52.61 52.62 tapdisk2: $(TAP-OBJS-y) $(BLK-OBJS-y) $(MISC-OBJS-y) tapdisk2.c 52.63 - $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(LDFLAGS_img) 52.64 + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(MEMSHRLIBS) $(LDFLAGS_img) 52.65 52.66 tapdisk-client: tapdisk-client.o 52.67 $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(LDFLAGS_img) 52.68 52.69 tapdisk-stream tapdisk-diff: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y) 52.70 - $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(LDFLAGS_img) 52.71 + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(MEMSHRLIBS) $(LDFLAGS_img) 52.72 52.73 td-util: td.o tapdisk-utils.o tapdisk-log.o $(PORTABLE-OBJS-y) 52.74 $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(LDFLAGS_img) 52.75 @@ -104,7 +122,7 @@ lock-util: lock.c 52.76 qcow-util: img2qcow qcow2raw qcow-create 52.77 52.78 img2qcow qcow2raw qcow-create: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y) 52.79 - $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(LDFLAGS_img) 52.80 + $(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(MEMSHRLIBS) $(LDFLAGS_img) 52.81 52.82 install: all 52.83 $(INSTALL_DIR) -p $(DESTDIR)$(INST_DIR)
53.1 --- a/tools/blktap2/drivers/block-aio.c Mon Nov 02 19:35:54 2009 -0800 53.2 +++ b/tools/blktap2/drivers/block-aio.c Fri Mar 19 18:36:57 2010 -0700 53.3 @@ -28,7 +28,6 @@ 53.4 53.5 53.6 #include <errno.h> 53.7 -#include <libaio.h> 53.8 #include <fcntl.h> 53.9 #include <stdio.h> 53.10 #include <stdlib.h>
54.1 --- a/tools/blktap2/drivers/block-qcow.c Mon Nov 02 19:35:54 2009 -0800 54.2 +++ b/tools/blktap2/drivers/block-qcow.c Fri Mar 19 18:36:57 2010 -0700 54.3 @@ -1035,6 +1035,17 @@ void tdqcow_queue_read(td_driver_t *driv 54.4 } 54.5 54.6 if(!cluster_offset) { 54.7 + int i; 54.8 + /* Forward entire request if possible. */ 54.9 + for(i=0; i<nb_sectors; i++) 54.10 + if(get_cluster_offset(s, (sector+i) << 9, 0, 0, 0, 0)) 54.11 + goto coalesce_failed; 54.12 + treq.buf = buf; 54.13 + treq.sec = sector; 54.14 + treq.secs = nb_sectors; 54.15 + td_forward_request(treq); 54.16 + return; 54.17 +coalesce_failed: 54.18 treq.buf = buf; 54.19 treq.sec = sector; 54.20 treq.secs = n;
55.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 55.2 +++ b/tools/blktap2/drivers/block-remus.c Fri Mar 19 18:36:57 2010 -0700 55.3 @@ -0,0 +1,1674 @@ 55.4 +/* block-remus.c 55.5 + * 55.6 + * This disk sends all writes to a backup via a network interface before 55.7 + * passing them to an underlying device. 55.8 + * The backup is a bit more complicated: 55.9 + * 1. It applies all incoming writes to a ramdisk. 55.10 + * 2. When a checkpoint request arrives, it moves the ramdisk to 55.11 + * a committing state and uses a new ramdisk for subsequent writes. 55.12 + * It also acknowledges the request, to let the sender know it can 55.13 + * release output. 55.14 + * 3. The ramdisk flushes its contents to the underlying driver. 55.15 + * 4. At failover, the backup waits for the in-flight ramdisk (if any) to 55.16 + * drain before letting the domain be activated. 55.17 + * 55.18 + * The driver determines whether it is the client or server by attempting 55.19 + * to bind to the replication address. If the address is not local, 55.20 + * the driver acts as client. 55.21 + * 55.22 + * The following messages are defined for the replication stream: 55.23 + * 1. write request 55.24 + * "wreq" 4 55.25 + * num_sectors 4 55.26 + * sector 8 55.27 + * buffer (num_sectors * sector_size) 55.28 + * 2. submit request (may be used as a barrier 55.29 + * "sreq" 4 55.30 + * 3. commit request 55.31 + * "creq" 4 55.32 + * After a commit request, the client must wait for a competion message: 55.33 + * 4. completion 55.34 + * "done" 4 55.35 + */ 55.36 + 55.37 +/* due to architectural choices in tapdisk, block-buffer is forced to 55.38 + * reimplement some code which is meant to be private */ 55.39 +#define TAPDISK 55.40 +#include "tapdisk.h" 55.41 +#include "tapdisk-server.h" 55.42 +#include "tapdisk-driver.h" 55.43 +#include "tapdisk-interface.h" 55.44 +#include "hashtable.h" 55.45 +#include "hashtable_itr.h" 55.46 +#include "hashtable_utility.h" 55.47 + 55.48 +#include <errno.h> 55.49 +#include <inttypes.h> 55.50 +#include <fcntl.h> 55.51 +#include <stdio.h> 55.52 +#include <stdlib.h> 55.53 +#include <string.h> 55.54 +#include <sys/time.h> 55.55 +#include <sys/types.h> 55.56 +#include <sys/socket.h> 55.57 +#include <netdb.h> 55.58 +#include <netinet/in.h> 55.59 +#include <arpa/inet.h> 55.60 +#include <sys/param.h> 55.61 +#include <sys/sysctl.h> 55.62 +#include <unistd.h> 55.63 +#include <sys/stat.h> 55.64 + 55.65 +/* timeout for reads and writes in ms */ 55.66 +#define HEARTBEAT_MS 1000 55.67 +#define RAMDISK_HASHSIZE 128 55.68 + 55.69 +/* connect retry timeout (seconds) */ 55.70 +#define REMUS_CONNRETRY_TIMEOUT 10 55.71 + 55.72 +#define RPRINTF(_f, _a...) syslog (LOG_DEBUG, "remus: " _f, ## _a) 55.73 + 55.74 +enum tdremus_mode { 55.75 + mode_invalid = 0, 55.76 + mode_unprotected, 55.77 + mode_primary, 55.78 + mode_backup 55.79 +}; 55.80 + 55.81 +struct tdremus_req { 55.82 + uint64_t sector; 55.83 + int nb_sectors; 55.84 + char buf[4096]; 55.85 +}; 55.86 + 55.87 +struct req_ring { 55.88 + /* waste one slot to distinguish between empty and full */ 55.89 + struct tdremus_req requests[MAX_REQUESTS * 2 + 1]; 55.90 + unsigned int head; 55.91 + unsigned int tail; 55.92 +}; 55.93 + 55.94 +/* TODO: This isn't very pretty, but to properly generate our own treqs (needed 55.95 + * by the backup) we need to know our td_vbt_t and td_image_t (blktap2 55.96 + * internals). As a proper fix, we should consider extending the tapdisk 55.97 + * interface with a td_create_request() function, or something similar. 55.98 + * 55.99 + * For now, we just grab the vbd in the td_open() command, and the td_image_t 55.100 + * from the first read request. 55.101 + */ 55.102 +td_vbd_t *device_vbd = NULL; 55.103 +td_image_t *remus_image = NULL; 55.104 + 55.105 +struct ramdisk { 55.106 + size_t sector_size; 55.107 + struct hashtable* h; 55.108 + /* when a ramdisk is flushed, h is given a new empty hash for writes 55.109 + * while the old ramdisk (prev) is drained asynchronously. To avoid 55.110 + * a race where a read request points to a sector in prev which has 55.111 + * not yet been flushed, check prev on a miss in h */ 55.112 + struct hashtable* prev; 55.113 + /* count of outstanding requests to the base driver */ 55.114 + size_t inflight; 55.115 +}; 55.116 + 55.117 +/* the ramdisk intercepts the original callback for reads and writes. 55.118 + * This holds the original data. */ 55.119 +/* Might be worth making this a static array in struct ramdisk to avoid 55.120 + * a malloc per request */ 55.121 + 55.122 +struct tdremus_state; 55.123 + 55.124 +struct ramdisk_cbdata { 55.125 + td_callback_t cb; 55.126 + void* private; 55.127 + char* buf; 55.128 + struct tdremus_state* state; 55.129 +}; 55.130 + 55.131 +struct ramdisk_write_cbdata { 55.132 + struct tdremus_state* state; 55.133 + char* buf; 55.134 +}; 55.135 + 55.136 +typedef void (*queue_rw_t) (td_driver_t *driver, td_request_t treq); 55.137 + 55.138 +/* poll_fd type for blktap2 fd system. taken from block_log.c */ 55.139 +typedef struct poll_fd { 55.140 + int fd; 55.141 + event_id_t id; 55.142 +} poll_fd_t; 55.143 + 55.144 +struct tdremus_state { 55.145 +// struct tap_disk* driver; 55.146 + void* driver_data; 55.147 + 55.148 + /* XXX: this is needed so that the server can perform operations on 55.149 + * the driver from the stream_fd event handler. fix this. */ 55.150 + td_driver_t *tdremus_driver; 55.151 + 55.152 + /* TODO: we may wish to replace these two FIFOs with a unix socket */ 55.153 + char* ctl_path; /* receive flush instruction here */ 55.154 + poll_fd_t ctl_fd; /* io_fd slot for control FIFO */ 55.155 + char* msg_path; /* output completion message here */ 55.156 + poll_fd_t msg_fd; 55.157 + 55.158 + /* replication host */ 55.159 + struct sockaddr_in sa; 55.160 + poll_fd_t server_fd; /* server listen port */ 55.161 + poll_fd_t stream_fd; /* replication channel */ 55.162 + 55.163 + /* queue write requests, batch-replicate at submit */ 55.164 + struct req_ring write_ring; 55.165 + 55.166 + /* ramdisk data*/ 55.167 + struct ramdisk ramdisk; 55.168 + 55.169 + /* mode methods */ 55.170 + enum tdremus_mode mode; 55.171 + int (*queue_flush)(td_driver_t *driver); 55.172 +}; 55.173 + 55.174 +typedef struct tdremus_wire { 55.175 + uint32_t op; 55.176 + uint64_t id; 55.177 + uint64_t sec; 55.178 + uint32_t secs; 55.179 +} tdremus_wire_t; 55.180 + 55.181 +#define TDREMUS_READ "rreq" 55.182 +#define TDREMUS_WRITE "wreq" 55.183 +#define TDREMUS_SUBMIT "sreq" 55.184 +#define TDREMUS_COMMIT "creq" 55.185 +#define TDREMUS_DONE "done" 55.186 +#define TDREMUS_FAIL "fail" 55.187 + 55.188 +/* primary read/write functions */ 55.189 +static void primary_queue_read(td_driver_t *driver, td_request_t treq); 55.190 +static void primary_queue_write(td_driver_t *driver, td_request_t treq); 55.191 + 55.192 +/* backup read/write functions */ 55.193 +static void backup_queue_read(td_driver_t *driver, td_request_t treq); 55.194 +static void backup_queue_write(td_driver_t *driver, td_request_t treq); 55.195 + 55.196 +/* unpritected read/write functions */ 55.197 +static void unprotected_queue_read(td_driver_t *driver, td_request_t treq); 55.198 +static void unprotected_queue_write(td_driver_t *driver, td_request_t treq); 55.199 + 55.200 +static int tdremus_close(td_driver_t *driver); 55.201 + 55.202 +static int switch_mode(td_driver_t *driver, enum tdremus_mode mode); 55.203 +static int ctl_respond(struct tdremus_state *s, const char *response); 55.204 + 55.205 +/* ring functions */ 55.206 +static inline unsigned int ring_next(struct req_ring* ring, unsigned int pos) 55.207 +{ 55.208 + if (++pos >= MAX_REQUESTS * 2 + 1) 55.209 + return 0; 55.210 + 55.211 + return pos; 55.212 +} 55.213 + 55.214 +static inline int ring_isempty(struct req_ring* ring) 55.215 +{ 55.216 + return ring->head == ring->tail; 55.217 +} 55.218 + 55.219 +static inline int ring_isfull(struct req_ring* ring) 55.220 +{ 55.221 + return ring_next(ring, ring->tail) == ring->head; 55.222 +} 55.223 + 55.224 +/* functions to create and sumbit treq's */ 55.225 + 55.226 +static void 55.227 +replicated_write_callback(td_request_t treq, int err) 55.228 +{ 55.229 + struct tdremus_state *s = (struct tdremus_state *) treq.cb_data; 55.230 + td_vbd_request_t *vreq; 55.231 + 55.232 + vreq = (td_vbd_request_t *) treq.private; 55.233 + 55.234 + /* the write failed for now, lets panic. this is very bad */ 55.235 + if (err) { 55.236 + RPRINTF("ramdisk write failed, disk image is not consistent\n"); 55.237 + exit(-1); 55.238 + } 55.239 + 55.240 + /* The write succeeded. let's pull the vreq off whatever request list 55.241 + * it is on and free() it */ 55.242 + list_del(&vreq->next); 55.243 + free(vreq); 55.244 + 55.245 + s->ramdisk.inflight--; 55.246 + if (!s->ramdisk.inflight && !s->ramdisk.prev) { 55.247 + /* TODO: the ramdisk has been flushed */ 55.248 + } 55.249 +} 55.250 + 55.251 +static inline int 55.252 +create_write_request(struct tdremus_state *state, td_sector_t sec, int secs, char *buf) 55.253 +{ 55.254 + td_request_t treq; 55.255 + td_vbd_request_t *vreq; 55.256 + 55.257 + treq.op = TD_OP_WRITE; 55.258 + treq.buf = buf; 55.259 + treq.sec = sec; 55.260 + treq.secs = secs; 55.261 + treq.image = remus_image; 55.262 + treq.cb = replicated_write_callback; 55.263 + treq.cb_data = state; 55.264 + treq.id = 0; 55.265 + treq.sidx = 0; 55.266 + 55.267 + vreq = calloc(1, sizeof(td_vbd_request_t)); 55.268 + treq.private = vreq; 55.269 + 55.270 + if(!vreq) 55.271 + return -1; 55.272 + 55.273 + vreq->submitting = 1; 55.274 + INIT_LIST_HEAD(&vreq->next); 55.275 + tapdisk_vbd_move_request(treq.private, &device_vbd->pending_requests); 55.276 + 55.277 + /* TODO: 55.278 + * we should probably leave it up to the caller to forward the request */ 55.279 + td_forward_request(treq); 55.280 + 55.281 + vreq->submitting--; 55.282 + 55.283 + return 0; 55.284 +} 55.285 + 55.286 + 55.287 +/* ramdisk methods */ 55.288 +static int ramdisk_flush(td_driver_t *driver, struct tdremus_state *s); 55.289 + 55.290 +/* http://www.concentric.net/~Ttwang/tech/inthash.htm */ 55.291 +static unsigned int uint64_hash(void* k) 55.292 +{ 55.293 + uint64_t key = *(uint64_t*)k; 55.294 + 55.295 + key = (~key) + (key << 18); 55.296 + key = key ^ (key >> 31); 55.297 + key = key * 21; 55.298 + key = key ^ (key >> 11); 55.299 + key = key + (key << 6); 55.300 + key = key ^ (key >> 22); 55.301 + 55.302 + return (unsigned int)key; 55.303 +} 55.304 + 55.305 +static int rd_hash_equal(void* k1, void* k2) 55.306 +{ 55.307 + uint64_t key1, key2; 55.308 + 55.309 + key1 = *(uint64_t*)k1; 55.310 + key2 = *(uint64_t*)k2; 55.311 + 55.312 + return key1 == key2; 55.313 +} 55.314 + 55.315 +static int ramdisk_read(struct ramdisk* ramdisk, uint64_t sector, 55.316 + int nb_sectors, char* buf) 55.317 +{ 55.318 + int i; 55.319 + char* v; 55.320 + uint64_t key; 55.321 + 55.322 + for (i = 0; i < nb_sectors; i++) { 55.323 + key = sector + i; 55.324 + if (!(v = hashtable_search(ramdisk->h, &key))) { 55.325 + /* check whether it is queued in a previous flush request */ 55.326 + if (!(ramdisk->prev && (v = hashtable_search(ramdisk->prev, &key)))) 55.327 + return -1; 55.328 + } 55.329 + memcpy(buf + i * ramdisk->sector_size, v, ramdisk->sector_size); 55.330 + } 55.331 + 55.332 + return 0; 55.333 +} 55.334 + 55.335 +static int ramdisk_write_hash(struct hashtable* h, uint64_t sector, char* buf, 55.336 + size_t len) 55.337 +{ 55.338 + char* v; 55.339 + uint64_t* key; 55.340 + 55.341 + if ((v = hashtable_search(h, §or))) { 55.342 + memcpy(v, buf, len); 55.343 + return 0; 55.344 + } 55.345 + 55.346 + if (!(v = malloc(len))) { 55.347 + DPRINTF("ramdisk_write_hash: malloc failed\n"); 55.348 + return -1; 55.349 + } 55.350 + memcpy(v, buf, len); 55.351 + if (!(key = malloc(sizeof(*key)))) { 55.352 + DPRINTF("ramdisk_write_hash: error allocating key\n"); 55.353 + free(v); 55.354 + return -1; 55.355 + } 55.356 + *key = sector; 55.357 + if (!hashtable_insert(h, key, v)) { 55.358 + DPRINTF("ramdisk_write_hash failed on sector %" PRIu64 "\n", sector); 55.359 + free(key); 55.360 + free(v); 55.361 + return -1; 55.362 + } 55.363 + 55.364 + return 0; 55.365 +} 55.366 + 55.367 +static inline int ramdisk_write(struct ramdisk* ramdisk, uint64_t sector, 55.368 + int nb_sectors, char* buf) 55.369 +{ 55.370 + int i, rc; 55.371 + 55.372 + for (i = 0; i < nb_sectors; i++) { 55.373 + rc = ramdisk_write_hash(ramdisk->h, sector + i, 55.374 + buf + i * ramdisk->sector_size, 55.375 + ramdisk->sector_size); 55.376 + if (rc) 55.377 + return rc; 55.378 + } 55.379 + 55.380 + return 0; 55.381 +} 55.382 + 55.383 +static int ramdisk_write_cb(td_driver_t *driver, int res, uint64_t sector, 55.384 + int nb_sectors, int id, void* private) 55.385 +{ 55.386 + struct ramdisk_write_cbdata *cbdata = (struct ramdisk_write_cbdata*)private; 55.387 + struct tdremus_state *s = cbdata->state; 55.388 + int rc; 55.389 + 55.390 + /* 55.391 + RPRINTF("ramdisk write callback: rc %d, %d sectors @ %" PRIu64 "\n", res, nb_sectors, 55.392 + sector); 55.393 + */ 55.394 + 55.395 + free(cbdata->buf); 55.396 + free(cbdata); 55.397 + 55.398 + s->ramdisk.inflight--; 55.399 + if (!s->ramdisk.inflight && !s->ramdisk.prev) { 55.400 + /* when this reaches 0 and prev is empty, the disk is flushed. */ 55.401 + /* 55.402 + RPRINTF("ramdisk flush complete\n"); 55.403 + */ 55.404 + } 55.405 + 55.406 + if (s->ramdisk.prev) { 55.407 + /* resubmit as much as possible in the remaining disk */ 55.408 + /* 55.409 + RPRINTF("calling ramdisk_flush from write callback\n"); 55.410 + */ 55.411 + return ramdisk_flush(driver, s); 55.412 + } 55.413 + 55.414 + return 0; 55.415 +} 55.416 + 55.417 +static int uint64_compare(const void* k1, const void* k2) 55.418 +{ 55.419 + uint64_t u1 = *(uint64_t*)k1; 55.420 + uint64_t u2 = *(uint64_t*)k2; 55.421 + 55.422 + /* u1 - u2 is unsigned */ 55.423 + return u1 < u2 ? -1 : u1 > u2 ? 1 : 0; 55.424 +} 55.425 + 55.426 +/* set psectors to an array of the sector numbers in the hash, returning 55.427 + * the number of entries (or -1 on error) */ 55.428 +static int ramdisk_get_sectors(struct hashtable* h, uint64_t** psectors) 55.429 +{ 55.430 + struct hashtable_itr* itr; 55.431 + uint64_t* sectors; 55.432 + int count; 55.433 + 55.434 + if (!(count = hashtable_count(h))) 55.435 + return 0; 55.436 + 55.437 + if (!(*psectors = malloc(count * sizeof(uint64_t)))) { 55.438 + DPRINTF("ramdisk_get_sectors: error allocating sector map\n"); 55.439 + return -1; 55.440 + } 55.441 + sectors = *psectors; 55.442 + 55.443 + itr = hashtable_iterator(h); 55.444 + count = 0; 55.445 + do { 55.446 + sectors[count++] = *(uint64_t*)hashtable_iterator_key(itr); 55.447 + } while (hashtable_iterator_advance(itr)); 55.448 + free(itr); 55.449 + 55.450 + return count; 55.451 +} 55.452 + 55.453 +static char* merge_requests(struct ramdisk* ramdisk, uint64_t start, 55.454 + size_t count) 55.455 +{ 55.456 + char* buf; 55.457 + char* sector; 55.458 + int i; 55.459 + 55.460 + if (!(buf = valloc(count * ramdisk->sector_size))) { 55.461 + DPRINTF("merge_request: allocation failed\n"); 55.462 + return NULL; 55.463 + } 55.464 + 55.465 + for (i = 0; i < count; i++) { 55.466 + if (!(sector = hashtable_search(ramdisk->prev, &start))) { 55.467 + DPRINTF("merge_request: lookup failed on %"PRIu64"\n", start); 55.468 + return NULL; 55.469 + } 55.470 + 55.471 + memcpy(buf + i * ramdisk->sector_size, sector, ramdisk->sector_size); 55.472 + free(sector); 55.473 + 55.474 + start++; 55.475 + } 55.476 + 55.477 + return buf; 55.478 +} 55.479 + 55.480 +/* The underlying driver may not handle having the whole ramdisk queued at 55.481 + * once. We queue what we can and let the callbacks attempt to queue more. */ 55.482 +/* NOTE: may be called from callback, while dd->private still belongs to 55.483 + * the underlying driver */ 55.484 +static int ramdisk_flush(td_driver_t *driver, struct tdremus_state* s) 55.485 +{ 55.486 + uint64_t* sectors; 55.487 + char* buf; 55.488 + uint64_t base, batchlen; 55.489 + int i, j, count = 0; 55.490 + 55.491 + // RPRINTF("ramdisk flush\n"); 55.492 + 55.493 + if ((count = ramdisk_get_sectors(s->ramdisk.prev, §ors)) <= 0) 55.494 + return count; 55.495 + 55.496 + /* 55.497 + RPRINTF("ramdisk: flushing %d sectors\n", count); 55.498 + */ 55.499 + 55.500 + /* sort and merge sectors to improve disk performance */ 55.501 + qsort(sectors, count, sizeof(*sectors), uint64_compare); 55.502 + 55.503 + for (i = 0; i < count;) { 55.504 + base = sectors[i++]; 55.505 + while (i < count && sectors[i] == sectors[i-1] + 1) 55.506 + i++; 55.507 + batchlen = sectors[i-1] - base + 1; 55.508 + 55.509 + if (!(buf = merge_requests(&s->ramdisk, base, batchlen))) { 55.510 + RPRINTF("ramdisk_flush: merge_requests failed\n"); 55.511 + free(sectors); 55.512 + return -1; 55.513 + } 55.514 + 55.515 + /* NOTE: create_write_request() creates a treq AND forwards it down 55.516 + * the driver chain */ 55.517 + // RPRINTF("forwarding write request at %" PRIu64 ", length: %" PRIu64 "\n", base, batchlen); 55.518 + create_write_request(s, base, batchlen, buf); 55.519 + //RPRINTF("write request at %" PRIu64 ", length: %" PRIu64 " forwarded\n", base, batchlen); 55.520 + 55.521 + s->ramdisk.inflight++; 55.522 + 55.523 + for (j = 0; j < batchlen; j++) { 55.524 + hashtable_remove(s->ramdisk.prev, &base); 55.525 + base++; 55.526 + } 55.527 + } 55.528 + 55.529 + if (!hashtable_count(s->ramdisk.prev)) { 55.530 + /* everything is in flight */ 55.531 + hashtable_destroy(s->ramdisk.prev, 0); 55.532 + s->ramdisk.prev = NULL; 55.533 + } 55.534 + 55.535 + free(sectors); 55.536 + 55.537 + // RPRINTF("ramdisk flush done\n"); 55.538 + return 0; 55.539 +} 55.540 + 55.541 +/* flush ramdisk contents to disk */ 55.542 +static int ramdisk_start_flush(td_driver_t *driver) 55.543 +{ 55.544 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.545 + uint64_t* key; 55.546 + char* buf; 55.547 + int rc = 0; 55.548 + int i, j, count, batchlen; 55.549 + uint64_t* sectors; 55.550 + 55.551 + if (!hashtable_count(s->ramdisk.h)) { 55.552 + /* 55.553 + RPRINTF("Nothing to flush\n"); 55.554 + */ 55.555 + return 0; 55.556 + } 55.557 + 55.558 + if (s->ramdisk.prev) { 55.559 + /* a flush request issued while a previous flush is still in progress 55.560 + * will merge with the previous request. If you want the previous 55.561 + * request to be consistent, wait for it to complete. */ 55.562 + if ((count = ramdisk_get_sectors(s->ramdisk.h, §ors)) < 0) 55.563 + return count; 55.564 + 55.565 + for (i = 0; i < count; i++) { 55.566 + buf = hashtable_search(s->ramdisk.h, sectors + i); 55.567 + ramdisk_write_hash(s->ramdisk.prev, sectors[i], buf, 55.568 + s->ramdisk.sector_size); 55.569 + } 55.570 + free(sectors); 55.571 + 55.572 + hashtable_destroy (s->ramdisk.h, 0); 55.573 + } else 55.574 + s->ramdisk.prev = s->ramdisk.h; 55.575 + 55.576 + /* We create a new hashtable so that new writes can be performed before 55.577 + * the old hashtable is completely drained. */ 55.578 + s->ramdisk.h = create_hashtable(RAMDISK_HASHSIZE, uint64_hash, 55.579 + rd_hash_equal); 55.580 + 55.581 + return ramdisk_flush(driver, s); 55.582 +} 55.583 + 55.584 + 55.585 +static int ramdisk_start(td_driver_t *driver) 55.586 +{ 55.587 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.588 + 55.589 + if (s->ramdisk.h) { 55.590 + RPRINTF("ramdisk already allocated\n"); 55.591 + return 0; 55.592 + } 55.593 + 55.594 + s->ramdisk.sector_size = driver->info.sector_size; 55.595 + s->ramdisk.h = create_hashtable(RAMDISK_HASHSIZE, uint64_hash, 55.596 + rd_hash_equal); 55.597 + 55.598 + DPRINTF("Ramdisk started, %zu bytes/sector\n", s->ramdisk.sector_size); 55.599 + 55.600 + return 0; 55.601 +} 55.602 + 55.603 +/* common client/server functions */ 55.604 +/* mayberead: Time out after a certain interval. */ 55.605 +static int mread(int fd, void* buf, size_t len) 55.606 +{ 55.607 + fd_set rfds; 55.608 + int rc; 55.609 + size_t cur = 0; 55.610 + struct timeval tv = { 55.611 + .tv_sec = HEARTBEAT_MS / 1000, 55.612 + .tv_usec = (HEARTBEAT_MS % 1000) * 1000 55.613 + }; 55.614 + 55.615 + if (!len) 55.616 + return 0; 55.617 + 55.618 + /* read first. Only select if read is incomplete. */ 55.619 + rc = read(fd, buf, len); 55.620 + while (rc < 0 || cur + rc < len) { 55.621 + if (!rc) { 55.622 + RPRINTF("end-of-file"); 55.623 + return -1; 55.624 + } 55.625 + if (rc < 0 && errno != EAGAIN) { 55.626 + RPRINTF("error during read: %s\n", strerror(errno)); 55.627 + return -1; 55.628 + } 55.629 + if (rc > 0) 55.630 + cur += rc; 55.631 + 55.632 + FD_ZERO(&rfds); 55.633 + FD_SET(fd, &rfds); 55.634 + if (!(rc = select(fd + 1, &rfds, NULL, NULL, &tv))) { 55.635 + RPRINTF("time out during read\n"); 55.636 + return -1; 55.637 + } else if (rc < 0) { 55.638 + RPRINTF("error during select: %d\n", errno); 55.639 + return -1; 55.640 + } 55.641 + rc = read(fd, buf + cur, len - cur); 55.642 + } 55.643 + /* 55.644 + RPRINTF("read %d bytes\n", cur + rc); 55.645 + */ 55.646 + 55.647 + return 0; 55.648 +} 55.649 + 55.650 +static int mwrite(int fd, void* buf, size_t len) 55.651 +{ 55.652 + fd_set wfds; 55.653 + size_t cur = 0; 55.654 + int rc; 55.655 + struct timeval tv = { 55.656 + .tv_sec = HEARTBEAT_MS / 1000, 55.657 + .tv_usec = (HEARTBEAT_MS % 1000) * 1000 55.658 + }; 55.659 + 55.660 + if (!len) 55.661 + return 0; 55.662 + 55.663 + /* read first. Only select if read is incomplete. */ 55.664 + rc = write(fd, buf, len); 55.665 + while (rc < 0 || cur + rc < len) { 55.666 + if (!rc) { 55.667 + RPRINTF("end-of-file"); 55.668 + return -1; 55.669 + } 55.670 + if (rc < 0 && errno != EAGAIN) { 55.671 + RPRINTF("error during write: %s\n", strerror(errno)); 55.672 + return -1; 55.673 + } 55.674 + if (rc > 0) 55.675 + cur += rc; 55.676 + 55.677 + FD_ZERO(&wfds); 55.678 + FD_SET(fd, &wfds); 55.679 + if (!(rc = select(fd + 1, NULL, &wfds, NULL, &tv))) { 55.680 + RPRINTF("time out during write\n"); 55.681 + return -1; 55.682 + } else if (rc < 0) { 55.683 + RPRINTF("error during select: %d\n", errno); 55.684 + return -1; 55.685 + } 55.686 + rc = write(fd, buf + cur, len - cur); 55.687 + } 55.688 + /* 55.689 + RPRINTF("wrote %d bytes\n", cur + rc); 55.690 + */ 55.691 + 55.692 + return 0; 55.693 + FD_ZERO(&wfds); 55.694 + FD_SET(fd, &wfds); 55.695 + select(fd + 1, NULL, &wfds, NULL, &tv); 55.696 +} 55.697 + 55.698 + 55.699 +static void inline close_stream_fd(struct tdremus_state *s) 55.700 +{ 55.701 + /* XXX: -2 is magic. replace with macro perhaps? */ 55.702 + tapdisk_server_unregister_event(s->stream_fd.id); 55.703 + close(s->stream_fd.fd); 55.704 + s->stream_fd.fd = -2; 55.705 +} 55.706 + 55.707 +/* primary functions */ 55.708 +static void remus_client_event(event_id_t, char mode, void *private); 55.709 +static void remus_connect_event(event_id_t id, char mode, void *private); 55.710 +static void remus_retry_connect_event(event_id_t id, char mode, void *private); 55.711 + 55.712 +static int primary_do_connect(struct tdremus_state *state) 55.713 +{ 55.714 + event_id_t id; 55.715 + int fd; 55.716 + int rc; 55.717 + int flags; 55.718 + 55.719 + RPRINTF("client connecting to %s:%d...\n", inet_ntoa(state->sa.sin_addr), ntohs(state->sa.sin_port)); 55.720 + 55.721 + if ((fd = socket(PF_INET, SOCK_STREAM, 0)) < 0) { 55.722 + RPRINTF("could not create client socket: %d\n", errno); 55.723 + return -1; 55.724 + } 55.725 + 55.726 + /* make socket nonblocking */ 55.727 + if ((flags = fcntl(fd, F_GETFL, 0)) == -1) 55.728 + flags = 0; 55.729 + if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) 55.730 + return -1; 55.731 + 55.732 + /* once we have created the socket and populated the address, we can now start 55.733 + * our non-blocking connect. rather than duplicating code we trigger a timeout 55.734 + * on the socket fd, which calls out nonblocking connect code 55.735 + */ 55.736 + if((id = tapdisk_server_register_event(SCHEDULER_POLL_TIMEOUT, fd, 0, remus_retry_connect_event, state)) < 0) { 55.737 + RPRINTF("error registering timeout client connection event handler: %s\n", strerror(id)); 55.738 + /* TODO: we leak a fd here */ 55.739 + return -1; 55.740 + } 55.741 + state->stream_fd.fd = fd; 55.742 + state->stream_fd.id = id; 55.743 + return 0; 55.744 +} 55.745 + 55.746 +static int primary_blocking_connect(struct tdremus_state *state) 55.747 +{ 55.748 + int fd; 55.749 + int id; 55.750 + int rc; 55.751 + int flags; 55.752 + 55.753 + RPRINTF("client connecting to %s:%d...\n", inet_ntoa(state->sa.sin_addr), ntohs(state->sa.sin_port)); 55.754 + 55.755 + if ((fd = socket(PF_INET, SOCK_STREAM, 0)) < 0) { 55.756 + RPRINTF("could not create client socket: %d\n", errno); 55.757 + return -1; 55.758 + } 55.759 + 55.760 + do { 55.761 + if ((rc = connect(fd, (struct sockaddr *)&state->sa, 55.762 + sizeof(state->sa))) < 0) 55.763 + { 55.764 + if (errno == ECONNREFUSED) { 55.765 + RPRINTF("connection refused -- retrying in 1 second\n"); 55.766 + sleep(1); 55.767 + } else { 55.768 + RPRINTF("connection failed: %d\n", errno); 55.769 + close(fd); 55.770 + return -1; 55.771 + } 55.772 + } 55.773 + } while (rc < 0); 55.774 + 55.775 + RPRINTF("client connected\n"); 55.776 + 55.777 + /* make socket nonblocking */ 55.778 + if ((flags = fcntl(fd, F_GETFL, 0)) == -1) 55.779 + flags = 0; 55.780 + if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1) 55.781 + { 55.782 + RPRINTF("error making socket nonblocking\n"); 55.783 + close(fd); 55.784 + return -1; 55.785 + } 55.786 + 55.787 + if((id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, fd, 0, remus_client_event, state)) < 0) { 55.788 + RPRINTF("error registering client event handler: %s\n", strerror(id)); 55.789 + close(fd); 55.790 + return -1; 55.791 + } 55.792 + 55.793 + state->stream_fd.fd = fd; 55.794 + state->stream_fd.id = id; 55.795 + return 0; 55.796 +} 55.797 + 55.798 +/* on read, just pass request through */ 55.799 +static void primary_queue_read(td_driver_t *driver, td_request_t treq) 55.800 +{ 55.801 + /* just pass read through */ 55.802 + td_forward_request(treq); 55.803 +} 55.804 + 55.805 +/* TODO: 55.806 + * The primary uses mwrite() to write the contents of a write request to the 55.807 + * backup. This effectively blocks until all data has been copied into a system 55.808 + * buffer or a timeout has occured. We may wish to instead use tapdisk's 55.809 + * nonblocking i/o interface, tapdisk_server_register_event(), to set timeouts 55.810 + * and write data in an asynchronous fashion. 55.811 + */ 55.812 +static void primary_queue_write(td_driver_t *driver, td_request_t treq) 55.813 +{ 55.814 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.815 + 55.816 + char header[sizeof(uint32_t) + sizeof(uint64_t)]; 55.817 + uint32_t *sectors = (uint32_t *)header; 55.818 + uint64_t *sector = (uint64_t *)(header + sizeof(uint32_t)); 55.819 + 55.820 + // RPRINTF("write: stream_fd.fd: %d\n", s->stream_fd.fd); 55.821 + 55.822 + /* -1 means we haven't connected yet, -2 means the connection was lost */ 55.823 + if(s->stream_fd.fd == -1) { 55.824 + RPRINTF("connecting to backup...\n"); 55.825 + primary_blocking_connect(s); 55.826 + } 55.827 + 55.828 + *sectors = treq.secs; 55.829 + *sector = treq.sec; 55.830 + 55.831 + if (mwrite(s->stream_fd.fd, TDREMUS_WRITE, strlen(TDREMUS_WRITE)) < 0) 55.832 + goto fail; 55.833 + if (mwrite(s->stream_fd.fd, header, sizeof(header)) < 0) 55.834 + goto fail; 55.835 + 55.836 + if (mwrite(s->stream_fd.fd, treq.buf, treq.secs * driver->info.sector_size) < 0) 55.837 + goto fail; 55.838 + 55.839 + td_forward_request(treq); 55.840 + 55.841 + return; 55.842 + 55.843 + fail: 55.844 + /* switch to unprotected mode and tell tapdisk to retry */ 55.845 + RPRINTF("write request replication failed, switching to unprotected mode"); 55.846 + switch_mode(s->tdremus_driver, mode_unprotected); 55.847 + td_complete_request(treq, -EBUSY); 55.848 +} 55.849 + 55.850 + 55.851 +static int client_flush(td_driver_t *driver) 55.852 +{ 55.853 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.854 + 55.855 + // RPRINTF("committing output\n"); 55.856 + 55.857 + if (s->stream_fd.fd == -1) 55.858 + /* connection not yet established, nothing to flush */ 55.859 + return 0; 55.860 + 55.861 + if (mwrite(s->stream_fd.fd, TDREMUS_COMMIT, strlen(TDREMUS_COMMIT)) < 0) { 55.862 + RPRINTF("error flushing output"); 55.863 + close_stream_fd(s); 55.864 + return -1; 55.865 + } 55.866 + 55.867 + return 0; 55.868 +} 55.869 + 55.870 +static int primary_start(td_driver_t *driver) 55.871 +{ 55.872 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.873 + 55.874 + RPRINTF("activating client mode\n"); 55.875 + 55.876 + tapdisk_remus.td_queue_read = primary_queue_read; 55.877 + tapdisk_remus.td_queue_write = primary_queue_write; 55.878 + s->queue_flush = client_flush; 55.879 + 55.880 + s->stream_fd.fd = -1; 55.881 + s->stream_fd.id = -1; 55.882 + 55.883 + return 0; 55.884 +} 55.885 + 55.886 +/* timeout callback */ 55.887 +static void remus_retry_connect_event(event_id_t id, char mode, void *private) 55.888 +{ 55.889 + struct tdremus_state *s = (struct tdremus_state *)private; 55.890 + 55.891 + /* do a non-blocking connect */ 55.892 + if (connect(s->stream_fd.fd, (struct sockaddr *)&s->sa, sizeof(s->sa)) 55.893 + && errno != EINPROGRESS) 55.894 + { 55.895 + if(errno == ECONNREFUSED || errno == ENETUNREACH || errno == EAGAIN || errno == ECONNABORTED) 55.896 + { 55.897 + /* try again in a second */ 55.898 + tapdisk_server_unregister_event(s->stream_fd.id); 55.899 + if((id = tapdisk_server_register_event(SCHEDULER_POLL_TIMEOUT, s->stream_fd.fd, REMUS_CONNRETRY_TIMEOUT, remus_retry_connect_event, s)) < 0) { 55.900 + RPRINTF("error registering timeout client connection event handler: %s\n", strerror(id)); 55.901 + return; 55.902 + } 55.903 + s->stream_fd.id = id; 55.904 + } 55.905 + else 55.906 + { 55.907 + /* not recoverable */ 55.908 + RPRINTF("error connection to server %s\n", strerror(errno)); 55.909 + return; 55.910 + } 55.911 + } 55.912 + else 55.913 + { 55.914 + /* the connect returned EINPROGRESS (nonblocking connect) we must wait for the fd to be writeable to determine if the connect worked */ 55.915 + 55.916 + tapdisk_server_unregister_event(s->stream_fd.id); 55.917 + if((id = tapdisk_server_register_event(SCHEDULER_POLL_WRITE_FD, s->stream_fd.fd, 0, remus_connect_event, s)) < 0) { 55.918 + RPRINTF("error registering client connection event handler: %s\n", strerror(id)); 55.919 + return; 55.920 + } 55.921 + s->stream_fd.id = id; 55.922 + } 55.923 +} 55.924 + 55.925 +/* callback when nonblocking connect() is finished */ 55.926 +/* called only by primary in unprotected state */ 55.927 +static void remus_connect_event(event_id_t id, char mode, void *private) 55.928 +{ 55.929 + int socket_errno; 55.930 + socklen_t socket_errno_size; 55.931 + struct tdremus_state *s = (struct tdremus_state *)private; 55.932 + 55.933 + /* check to se if the connect succeeded */ 55.934 + socket_errno_size = sizeof(socket_errno); 55.935 + if (getsockopt(s->stream_fd.fd, SOL_SOCKET, SO_ERROR, &socket_errno, &socket_errno_size)) { 55.936 + RPRINTF("error getting socket errno\n"); 55.937 + return; 55.938 + } 55.939 + 55.940 + RPRINTF("socket connect returned %d\n", socket_errno); 55.941 + 55.942 + if(socket_errno) 55.943 + { 55.944 + /* the connect did not succeed */ 55.945 + 55.946 + if(socket_errno == ECONNREFUSED || socket_errno == ENETUNREACH || socket_errno == ETIMEDOUT 55.947 + || socket_errno == ECONNABORTED || socket_errno == EAGAIN) 55.948 + { 55.949 + /* we can probably assume that the backup is down. just try again later */ 55.950 + tapdisk_server_unregister_event(s->stream_fd.id); 55.951 + if((id = tapdisk_server_register_event(SCHEDULER_POLL_TIMEOUT, s->stream_fd.fd, REMUS_CONNRETRY_TIMEOUT, remus_retry_connect_event, s)) < 0) { 55.952 + RPRINTF("error registering timeout client connection event handler: %s\n", strerror(id)); 55.953 + return; 55.954 + } 55.955 + s->stream_fd.id = id; 55.956 + } 55.957 + else 55.958 + { 55.959 + RPRINTF("socket connect returned %d, giving up\n", socket_errno); 55.960 + } 55.961 + } 55.962 + else 55.963 + { 55.964 + /* the connect succeeded */ 55.965 + 55.966 + /* unregister this function and register a new event handler */ 55.967 + tapdisk_server_unregister_event(s->stream_fd.id); 55.968 + if((id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, s->stream_fd.fd, 0, remus_client_event, s)) < 0) { 55.969 + RPRINTF("error registering client event handler: %s\n", strerror(id)); 55.970 + return; 55.971 + } 55.972 + s->stream_fd.id = id; 55.973 + 55.974 + /* switch from unprotected to protected client */ 55.975 + switch_mode(s->tdremus_driver, mode_primary); 55.976 + } 55.977 +} 55.978 + 55.979 + 55.980 +/* we install this event handler on the primary once we have connected to the backup */ 55.981 +/* wait for "done" message to commit checkpoint */ 55.982 +static void remus_client_event(event_id_t id, char mode, void *private) 55.983 +{ 55.984 + struct tdremus_state *s = (struct tdremus_state *)private; 55.985 + char req[5]; 55.986 + int rc; 55.987 + 55.988 + if (mread(s->stream_fd.fd, req, sizeof(req) - 1) < 0) { 55.989 + /* replication stream closed or otherwise broken (timeout, reset, &c) */ 55.990 + RPRINTF("error reading from backup\n"); 55.991 + close_stream_fd(s); 55.992 + return; 55.993 + } 55.994 + 55.995 + req[4] = '\0'; 55.996 + 55.997 + if (!strcmp(req, TDREMUS_DONE)) 55.998 + /* checkpoint committed, inform msg_fd */ 55.999 + ctl_respond(s, TDREMUS_DONE); 55.1000 + else { 55.1001 + RPRINTF("received unknown message: %s\n", req); 55.1002 + close_stream_fd(s); 55.1003 + } 55.1004 + 55.1005 + return; 55.1006 +} 55.1007 + 55.1008 +/* backup functions */ 55.1009 +static void remus_server_event(event_id_t id, char mode, void *private); 55.1010 + 55.1011 +/* returns the socket that receives write requests */ 55.1012 +static void remus_server_accept(event_id_t id, char mode, void* private) 55.1013 +{ 55.1014 + struct tdremus_state* s = (struct tdremus_state *) private; 55.1015 + 55.1016 + int stream_fd; 55.1017 + event_id_t cid; 55.1018 + 55.1019 + /* XXX: add address-based black/white list */ 55.1020 + if ((stream_fd = accept(s->server_fd.fd, NULL, NULL)) < 0) { 55.1021 + RPRINTF("error accepting connection: %d\n", errno); 55.1022 + return; 55.1023 + } 55.1024 + 55.1025 + /* TODO: check to see if we are already replicating. if so just close the 55.1026 + * connection (or do something smarter) */ 55.1027 + RPRINTF("server accepted connection\n"); 55.1028 + 55.1029 + /* add tapdisk event for replication stream */ 55.1030 + cid = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, stream_fd, 0, 55.1031 + remus_server_event, s); 55.1032 + 55.1033 + if(cid < 0) { 55.1034 + RPRINTF("error registering connection event handler: %s\n", strerror(errno)); 55.1035 + close(stream_fd); 55.1036 + return; 55.1037 + } 55.1038 + 55.1039 + /* store replication file descriptor */ 55.1040 + s->stream_fd.fd = stream_fd; 55.1041 + s->stream_fd.id = cid; 55.1042 +} 55.1043 + 55.1044 +/* returns -2 if EADDRNOTAVAIL */ 55.1045 +static int remus_bind(struct tdremus_state* s) 55.1046 +{ 55.1047 +// struct sockaddr_in sa; 55.1048 + int opt; 55.1049 + int rc = -1; 55.1050 + 55.1051 + if ((s->server_fd.fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) { 55.1052 + RPRINTF("could not create server socket: %d\n", errno); 55.1053 + return rc; 55.1054 + } 55.1055 + opt = 1; 55.1056 + if (setsockopt(s->server_fd.fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0) 55.1057 + RPRINTF("Error setting REUSEADDR on %d: %d\n", s->server_fd.fd, errno); 55.1058 + 55.1059 + if (bind(s->server_fd.fd, (struct sockaddr *)&s->sa, sizeof(s->sa)) < 0) { 55.1060 + RPRINTF("could not bind server socket %d to %s:%d: %d %s\n", s->server_fd.fd, 55.1061 + inet_ntoa(s->sa.sin_addr), ntohs(s->sa.sin_port), errno, strerror(errno)); 55.1062 + if (errno != EADDRINUSE) 55.1063 + rc = -2; 55.1064 + goto err_sfd; 55.1065 + } 55.1066 + if (listen(s->server_fd.fd, 10)) { 55.1067 + RPRINTF("could not listen on socket: %d\n", errno); 55.1068 + goto err_sfd; 55.1069 + } 55.1070 + 55.1071 + /* The socket s now bound to the address and listening so we may now register 55.1072 + * the fd with tapdisk */ 55.1073 + 55.1074 + if((s->server_fd.id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, 55.1075 + s->server_fd.fd, 0, 55.1076 + remus_server_accept, s)) < 0) { 55.1077 + RPRINTF("error registering server connection event handler: %s", 55.1078 + strerror(s->server_fd.id)); 55.1079 + goto err_sfd; 55.1080 + } 55.1081 + 55.1082 + return 0; 55.1083 + 55.1084 + err_sfd: 55.1085 + close(s->server_fd.fd); 55.1086 + s->server_fd.fd = -1; 55.1087 + 55.1088 + return rc; 55.1089 +} 55.1090 + 55.1091 +/* wait for latest checkpoint to be applied */ 55.1092 +static inline int server_writes_inflight(td_driver_t *driver) 55.1093 +{ 55.1094 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1095 + 55.1096 + if (!s->ramdisk.inflight && !s->ramdisk.prev) 55.1097 + return 0; 55.1098 + 55.1099 + return 1; 55.1100 +} 55.1101 + 55.1102 +/* Due to block device prefetching this code may be called on the server side 55.1103 + * during normal replication. In this case we must return EBUSY, otherwise the 55.1104 + * domain may be started with stale data. 55.1105 + */ 55.1106 +void backup_queue_read(td_driver_t *driver, td_request_t treq) 55.1107 +{ 55.1108 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1109 + 55.1110 + if(!remus_image) 55.1111 + remus_image = treq.image; 55.1112 + 55.1113 +#if 0 55.1114 + /* due to prefetching, we must return EBUSY on server reads. This 55.1115 + * maintains a consistent disk image */ 55.1116 + td_complete_request(treq, -EBUSY); 55.1117 +#else 55.1118 + /* what exactly is the race that requires the response above? */ 55.1119 + td_forward_request(treq); 55.1120 +#endif 55.1121 +} 55.1122 + 55.1123 +/* see above */ 55.1124 +void backup_queue_write(td_driver_t *driver, td_request_t treq) 55.1125 +{ 55.1126 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1127 + 55.1128 + /* on a server write, we know the domain has failed over. we must change our 55.1129 + * state to unprotected and then have the unprotected queue_write function 55.1130 + * handle the write 55.1131 + */ 55.1132 + 55.1133 + switch_mode(driver, mode_unprotected); 55.1134 + /* TODO: call the appropriate write function rather than return EBUSY */ 55.1135 + td_complete_request(treq, -EBUSY); 55.1136 +} 55.1137 + 55.1138 +static int backup_start(td_driver_t *driver) 55.1139 +{ 55.1140 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1141 + int fd; 55.1142 + 55.1143 + if (ramdisk_start(driver) < 0) 55.1144 + return -1; 55.1145 + 55.1146 + tapdisk_remus.td_queue_read = backup_queue_read; 55.1147 + tapdisk_remus.td_queue_write = backup_queue_write; 55.1148 + /* TODO set flush function */ 55.1149 + return 0; 55.1150 +} 55.1151 + 55.1152 +static int server_do_wreq(td_driver_t *driver) 55.1153 +{ 55.1154 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1155 + static tdremus_wire_t twreq; 55.1156 + char buf[4096]; 55.1157 + int len, rc; 55.1158 + 55.1159 + char header[sizeof(uint32_t) + sizeof(uint64_t)]; 55.1160 + uint32_t *sectors = (uint32_t *) header; 55.1161 + uint64_t *sector = (uint64_t *) &header[sizeof(uint32_t)]; 55.1162 + 55.1163 + // RPRINTF("received write request\n"); 55.1164 + 55.1165 + if (mread(s->stream_fd.fd, header, sizeof(header)) < 0) 55.1166 + goto err; 55.1167 + 55.1168 + len = *sectors * driver->info.sector_size; 55.1169 + 55.1170 + //RPRINTF("writing %d sectors (%d bytes) starting at %" PRIu64 "\n", *sectors, len, 55.1171 + // *sector); 55.1172 + 55.1173 + if (len > sizeof(buf)) { 55.1174 + /* freak out! */ 55.1175 + RPRINTF("write request too large: %d/%u\n", len, (unsigned)sizeof(buf)); 55.1176 + return -1; 55.1177 + } 55.1178 + 55.1179 + if (mread(s->stream_fd.fd, buf, len) < 0) 55.1180 + goto err; 55.1181 + 55.1182 + if (ramdisk_write(&s->ramdisk, *sector, *sectors, buf) < 0) 55.1183 + goto err; 55.1184 + 55.1185 + return 0; 55.1186 + 55.1187 + err: 55.1188 + /* should start failover */ 55.1189 + RPRINTF("backup write request error\n"); 55.1190 + close_stream_fd(s); 55.1191 + 55.1192 + return -1; 55.1193 +} 55.1194 + 55.1195 +static int server_do_sreq(td_driver_t *driver) 55.1196 +{ 55.1197 + /* 55.1198 + RPRINTF("submit request received\n"); 55.1199 + */ 55.1200 + 55.1201 + return 0; 55.1202 +} 55.1203 + 55.1204 +/* at this point, the server can start applying the most recent 55.1205 + * ramdisk. */ 55.1206 +static int server_do_creq(td_driver_t *driver) 55.1207 +{ 55.1208 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1209 + 55.1210 + // RPRINTF("committing buffer\n"); 55.1211 + 55.1212 + ramdisk_start_flush(driver); 55.1213 + 55.1214 + /* XXX this message should not be sent until flush completes! */ 55.1215 + if (write(s->stream_fd.fd, TDREMUS_DONE, strlen(TDREMUS_DONE)) != 4) 55.1216 + return -1; 55.1217 + 55.1218 + return 0; 55.1219 +} 55.1220 + 55.1221 + 55.1222 +/* called when data is pending in s->rfd */ 55.1223 +static void remus_server_event(event_id_t id, char mode, void *private) 55.1224 +{ 55.1225 + struct tdremus_state *s = (struct tdremus_state *)private; 55.1226 + td_driver_t *driver = s->tdremus_driver; 55.1227 + char req[5]; 55.1228 + 55.1229 + // RPRINTF("replication data waiting\n"); 55.1230 + 55.1231 + /* TODO: add a get_connection_by_event_id() function. 55.1232 + * for now we can assume that the fd is s->stream_fd */ 55.1233 + 55.1234 + if (mread(s->stream_fd.fd, req, sizeof(req) - 1) < 0) { 55.1235 + RPRINTF("error reading server event, activating backup\n"); 55.1236 + switch_mode(driver, mode_unprotected); 55.1237 + return; 55.1238 + } 55.1239 + 55.1240 + req[4] = '\0'; 55.1241 + 55.1242 + if (!strcmp(req, TDREMUS_WRITE)) 55.1243 + server_do_wreq(driver); 55.1244 + else if (!strcmp(req, TDREMUS_SUBMIT)) 55.1245 + server_do_sreq(driver); 55.1246 + else if (!strcmp(req, TDREMUS_COMMIT)) 55.1247 + server_do_creq(driver); 55.1248 + else 55.1249 + RPRINTF("unknown request received: %s\n", req); 55.1250 + 55.1251 + return; 55.1252 + 55.1253 +} 55.1254 + 55.1255 +/* unprotected */ 55.1256 + 55.1257 +void unprotected_queue_read(td_driver_t *driver, td_request_t treq) 55.1258 +{ 55.1259 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1260 + 55.1261 + /* wait for previous ramdisk to flush before servicing reads */ 55.1262 + if (server_writes_inflight(driver)) { 55.1263 + /* for now lets just return EBUSY. if this becomes an issue we can 55.1264 + * do something smarter */ 55.1265 + td_complete_request(treq, -EBUSY); 55.1266 + } 55.1267 + else { 55.1268 + /* here we just pass reads through */ 55.1269 + td_forward_request(treq); 55.1270 + } 55.1271 +} 55.1272 + 55.1273 +/* For a recoverable remus solution we need to log unprotected writes here */ 55.1274 +void unprotected_queue_write(td_driver_t *driver, td_request_t treq) 55.1275 +{ 55.1276 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1277 + 55.1278 + /* wait for previous ramdisk to flush */ 55.1279 + if (server_writes_inflight(driver)) { 55.1280 + RPRINTF("queue_write: waiting for queue to drain"); 55.1281 + td_complete_request(treq, -EBUSY); 55.1282 + } 55.1283 + else { 55.1284 + // RPRINTF("servicing write request on backup\n"); 55.1285 + td_forward_request(treq); 55.1286 + } 55.1287 +} 55.1288 + 55.1289 +static int unprotected_start(td_driver_t *driver) 55.1290 +{ 55.1291 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1292 + 55.1293 + RPRINTF("failure detected, activating passthrough\n"); 55.1294 + 55.1295 + /* close the server socket */ 55.1296 + close_stream_fd(s); 55.1297 + 55.1298 + /* unregister the replication stream */ 55.1299 + tapdisk_server_unregister_event(s->server_fd.id); 55.1300 + 55.1301 + /* close the replication stream */ 55.1302 + close(s->server_fd.fd); 55.1303 + s->server_fd.fd = -1; 55.1304 + 55.1305 + /* install the unprotected read/write handlers */ 55.1306 + tapdisk_remus.td_queue_read = unprotected_queue_read; 55.1307 + tapdisk_remus.td_queue_write = unprotected_queue_write; 55.1308 + 55.1309 + return 0; 55.1310 +} 55.1311 + 55.1312 + 55.1313 +/* control */ 55.1314 + 55.1315 +static inline int resolve_address(const char* addr, struct in_addr* ia) 55.1316 +{ 55.1317 + struct hostent* he; 55.1318 + uint32_t ip; 55.1319 + 55.1320 + if (!(he = gethostbyname(addr))) { 55.1321 + RPRINTF("error resolving %s: %d\n", addr, h_errno); 55.1322 + return -1; 55.1323 + } 55.1324 + 55.1325 + if (!he->h_addr_list[0]) { 55.1326 + RPRINTF("no address found for %s\n", addr); 55.1327 + return -1; 55.1328 + } 55.1329 + 55.1330 + /* network byte order */ 55.1331 + ip = *((uint32_t**)he->h_addr_list)[0]; 55.1332 + ia->s_addr = ip; 55.1333 + 55.1334 + return 0; 55.1335 +} 55.1336 + 55.1337 +static int get_args(td_driver_t *driver, const char* name) 55.1338 +{ 55.1339 + struct tdremus_state *state = (struct tdremus_state *)driver->data; 55.1340 + char* host; 55.1341 + char* port; 55.1342 +// char* driver_str; 55.1343 +// char* parent; 55.1344 +// int type; 55.1345 +// char* path; 55.1346 +// unsigned long ulport; 55.1347 +// int i; 55.1348 +// struct sockaddr_in server_addr_in; 55.1349 + 55.1350 + int gai_status; 55.1351 + int valid_addr; 55.1352 + struct addrinfo gai_hints; 55.1353 + struct addrinfo *servinfo, *servinfo_itr; 55.1354 + 55.1355 + memset(&gai_hints, 0, sizeof gai_hints); 55.1356 + gai_hints.ai_family = AF_UNSPEC; 55.1357 + gai_hints.ai_socktype = SOCK_STREAM; 55.1358 + 55.1359 + port = strchr(name, ':'); 55.1360 + if (!port) { 55.1361 + RPRINTF("missing host in %s\n", name); 55.1362 + return -ENOENT; 55.1363 + } 55.1364 + if (!(host = strndup(name, port - name))) { 55.1365 + RPRINTF("unable to allocate host\n"); 55.1366 + return -ENOMEM; 55.1367 + } 55.1368 + port++; 55.1369 + 55.1370 + if ((gai_status = getaddrinfo(host, port, &gai_hints, &servinfo)) != 0) { 55.1371 + RPRINTF("getaddrinfo error: %s\n", gai_strerror(gai_status)); 55.1372 + return -ENOENT; 55.1373 + } 55.1374 + 55.1375 + /* TODO: do something smarter here */ 55.1376 + valid_addr = 0; 55.1377 + for(servinfo_itr = servinfo; servinfo_itr != NULL; servinfo_itr = servinfo_itr->ai_next) { 55.1378 + void *addr; 55.1379 + char *ipver; 55.1380 + 55.1381 + if (servinfo_itr->ai_family == AF_INET) { 55.1382 + valid_addr = 1; 55.1383 + memset(&state->sa, 0, sizeof(state->sa)); 55.1384 + state->sa = *(struct sockaddr_in *)servinfo_itr->ai_addr; 55.1385 + break; 55.1386 + } 55.1387 + } 55.1388 + freeaddrinfo(servinfo); 55.1389 + 55.1390 + if (!valid_addr) 55.1391 + return -ENOENT; 55.1392 + 55.1393 + RPRINTF("host: %s, port: %d\n", inet_ntoa(state->sa.sin_addr), ntohs(state->sa.sin_port)); 55.1394 + 55.1395 + return 0; 55.1396 +} 55.1397 + 55.1398 +static int switch_mode(td_driver_t *driver, enum tdremus_mode mode) 55.1399 +{ 55.1400 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1401 + int rc; 55.1402 + 55.1403 + if (mode == s->mode) 55.1404 + return 0; 55.1405 + 55.1406 + if (s->queue_flush) 55.1407 + if ((rc = s->queue_flush(driver)) < 0) { 55.1408 + // fall back to unprotected mode on error 55.1409 + RPRINTF("switch_mode: error flushing queue (old: %d, new: %d)", s->mode, mode); 55.1410 + mode = mode_unprotected; 55.1411 + } 55.1412 + 55.1413 + if (mode == mode_unprotected) 55.1414 + rc = unprotected_start(driver); 55.1415 + else if (mode == mode_primary) 55.1416 + rc = primary_start(driver); 55.1417 + else if (mode == mode_backup) 55.1418 + rc = backup_start(driver); 55.1419 + else { 55.1420 + RPRINTF("unknown mode requested: %d\n", mode); 55.1421 + rc = -1; 55.1422 + } 55.1423 + 55.1424 + if (!rc) 55.1425 + s->mode = mode; 55.1426 + 55.1427 + return rc; 55.1428 +} 55.1429 + 55.1430 +static void ctl_request(event_id_t id, char mode, void *private) 55.1431 +{ 55.1432 + struct tdremus_state *s = (struct tdremus_state *)private; 55.1433 + td_driver_t *driver = s->tdremus_driver; 55.1434 + char msg[80]; 55.1435 + int rc; 55.1436 + 55.1437 + // RPRINTF("data waiting on control fifo\n"); 55.1438 + 55.1439 + if (!(rc = read(s->ctl_fd.fd, msg, sizeof(msg) - 1 /* append nul */))) { 55.1440 + RPRINTF("0-byte read received, reopening FIFO\n"); 55.1441 + /*TODO: we may have to unregister/re-register with tapdisk_server */ 55.1442 + close(s->ctl_fd.fd); 55.1443 + RPRINTF("FIFO closed\n"); 55.1444 + if ((s->ctl_fd.fd = open(s->ctl_path, O_RDWR)) < 0) { 55.1445 + RPRINTF("error reopening FIFO: %d\n", errno); 55.1446 + } 55.1447 + return; 55.1448 + } 55.1449 + 55.1450 + if (rc < 0) { 55.1451 + RPRINTF("error reading from FIFO: %d\n", errno); 55.1452 + return; 55.1453 + } 55.1454 + 55.1455 + /* TODO: need to get driver somehow */ 55.1456 + msg[rc] = '\0'; 55.1457 + if (!strncmp(msg, "flush", 5)) { 55.1458 + if (s->queue_flush) 55.1459 + if ((rc = s->queue_flush(driver))) { 55.1460 + RPRINTF("error passing flush request to backup"); 55.1461 + ctl_respond(s, TDREMUS_FAIL); 55.1462 + } 55.1463 + } else { 55.1464 + RPRINTF("unknown command: %s\n", msg); 55.1465 + } 55.1466 +} 55.1467 + 55.1468 +static int ctl_respond(struct tdremus_state *s, const char *response) 55.1469 +{ 55.1470 + int rc; 55.1471 + 55.1472 + if ((rc = write(s->msg_fd.fd, response, strlen(response))) < 0) { 55.1473 + RPRINTF("error writing notification: %d\n", errno); 55.1474 + close(s->msg_fd.fd); 55.1475 + if ((s->msg_fd.fd = open(s->msg_path, O_RDWR)) < 0) 55.1476 + RPRINTF("error reopening FIFO: %d\n", errno); 55.1477 + } 55.1478 + 55.1479 + return rc; 55.1480 +} 55.1481 + 55.1482 +/* must be called after the underlying driver has been initialized */ 55.1483 +static int ctl_open(td_driver_t *driver, const char* name) 55.1484 +{ 55.1485 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1486 + int i, l; 55.1487 + 55.1488 + /* first we must ensure that BLKTAP_CTRL_DIR exists */ 55.1489 + if (mkdir(BLKTAP_CTRL_DIR, 0755) && errno != EEXIST) 55.1490 + { 55.1491 + DPRINTF("error creating directory %s: %d\n", BLKTAP_CTRL_DIR, errno); 55.1492 + return -1; 55.1493 + } 55.1494 + 55.1495 + /* use the device name to create the control fifo path */ 55.1496 + if (asprintf(&s->ctl_path, BLKTAP_CTRL_DIR "/remus_%s", name) < 0) 55.1497 + return -1; 55.1498 + /* scrub fifo pathname */ 55.1499 + for (i = strlen(BLKTAP_CTRL_DIR) + 1, l = strlen(s->ctl_path); i < l; i++) { 55.1500 + if (strchr(":/", s->ctl_path[i])) 55.1501 + s->ctl_path[i] = '_'; 55.1502 + } 55.1503 + if (asprintf(&s->msg_path, "%s.msg", s->ctl_path) < 0) 55.1504 + goto err_ctlfifo; 55.1505 + 55.1506 + if (mkfifo(s->ctl_path, S_IRWXU|S_IRWXG|S_IRWXO) && errno != EEXIST) { 55.1507 + RPRINTF("error creating control FIFO %s: %d\n", s->ctl_path, errno); 55.1508 + goto err_msgfifo; 55.1509 + } 55.1510 + 55.1511 + if (mkfifo(s->msg_path, S_IRWXU|S_IRWXG|S_IRWXO) && errno != EEXIST) { 55.1512 + RPRINTF("error creating message FIFO %s: %d\n", s->msg_path, errno); 55.1513 + goto err_msgfifo; 55.1514 + } 55.1515 + 55.1516 + /* RDWR so that fd doesn't block select when no writer is present */ 55.1517 + if ((s->ctl_fd.fd = open(s->ctl_path, O_RDWR)) < 0) { 55.1518 + RPRINTF("error opening control FIFO %s: %d\n", s->ctl_path, errno); 55.1519 + goto err_msgfifo; 55.1520 + } 55.1521 + 55.1522 + if ((s->msg_fd.fd = open(s->msg_path, O_RDWR)) < 0) { 55.1523 + RPRINTF("error opening message FIFO %s: %d\n", s->msg_path, errno); 55.1524 + goto err_openctlfifo; 55.1525 + } 55.1526 + 55.1527 + RPRINTF("control FIFO %s\n", s->ctl_path); 55.1528 + RPRINTF("message FIFO %s\n", s->msg_path); 55.1529 + 55.1530 + return 0; 55.1531 + 55.1532 + err_openctlfifo: 55.1533 + close(s->ctl_fd.fd); 55.1534 + err_msgfifo: 55.1535 + free(s->msg_path); 55.1536 + s->msg_path = NULL; 55.1537 + err_ctlfifo: 55.1538 + free(s->ctl_path); 55.1539 + s->ctl_path = NULL; 55.1540 + return -1; 55.1541 +} 55.1542 + 55.1543 +static void ctl_close(td_driver_t *driver) 55.1544 +{ 55.1545 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1546 + 55.1547 + /* TODO: close *all* connections */ 55.1548 + 55.1549 + if(s->ctl_fd.fd) 55.1550 + close(s->ctl_fd.fd); 55.1551 + 55.1552 + if (s->ctl_path) { 55.1553 + unlink(s->ctl_path); 55.1554 + free(s->ctl_path); 55.1555 + s->ctl_path = NULL; 55.1556 + } 55.1557 + if (s->msg_path) { 55.1558 + unlink(s->msg_path); 55.1559 + free(s->msg_path); 55.1560 + s->msg_path = NULL; 55.1561 + } 55.1562 +} 55.1563 + 55.1564 +static int ctl_register(struct tdremus_state *s) 55.1565 +{ 55.1566 + RPRINTF("registering ctl fifo\n"); 55.1567 + 55.1568 + /* register ctl fd */ 55.1569 + s->ctl_fd.id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, s->ctl_fd.fd, 0, ctl_request, s); 55.1570 + 55.1571 + if (s->ctl_fd.id < 0) { 55.1572 + RPRINTF("error registering ctrl FIFO %s: %d\n", s->ctl_path, s->ctl_fd.id); 55.1573 + return -1; 55.1574 + } 55.1575 + 55.1576 + return 0; 55.1577 +} 55.1578 + 55.1579 +/* interface */ 55.1580 + 55.1581 +static int tdremus_open(td_driver_t *driver, const char *name, 55.1582 + td_flag_t flags) 55.1583 +{ 55.1584 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1585 + int rc; 55.1586 + 55.1587 + RPRINTF("opening %s\n", name); 55.1588 + 55.1589 + /* first we need to get the underlying vbd for this driver stack. To do so we 55.1590 + * need to know the vbd's id. Fortunately, for tapdisk2 this is hard-coded as 55.1591 + * 0 (see tapdisk2.c) 55.1592 + */ 55.1593 + device_vbd = tapdisk_server_get_vbd(0); 55.1594 + 55.1595 + memset(s, 0, sizeof(*s)); 55.1596 + s->server_fd.fd = -1; 55.1597 + s->stream_fd.fd = -1; 55.1598 + s->ctl_fd.fd = -1; 55.1599 + s->msg_fd.fd = -1; 55.1600 + 55.1601 + /* TODO: this is only needed so that the server can send writes down 55.1602 + * the driver stack from the stream_fd event handler */ 55.1603 + s->tdremus_driver = driver; 55.1604 + 55.1605 + /* parse name to get info etc */ 55.1606 + if ((rc = get_args(driver, name))) 55.1607 + return rc; 55.1608 + 55.1609 + if ((rc = ctl_open(driver, name))) { 55.1610 + RPRINTF("error setting up control channel\n"); 55.1611 + free(s->driver_data); 55.1612 + return rc; 55.1613 + } 55.1614 + 55.1615 + if ((rc = ctl_register(s))) { 55.1616 + RPRINTF("error registering control channel\n"); 55.1617 + free(s->driver_data); 55.1618 + return rc; 55.1619 + } 55.1620 + 55.1621 + if (!(rc = remus_bind(s))) 55.1622 + rc = switch_mode(driver, mode_backup); 55.1623 + else if (rc == -2) 55.1624 + rc = switch_mode(driver, mode_primary); 55.1625 + 55.1626 + if (!rc) 55.1627 + return 0; 55.1628 + 55.1629 + tdremus_close(driver); 55.1630 + return -EIO; 55.1631 +} 55.1632 + 55.1633 +static int tdremus_close(td_driver_t *driver) 55.1634 +{ 55.1635 + struct tdremus_state *s = (struct tdremus_state *)driver->data; 55.1636 + 55.1637 + RPRINTF("closing\n"); 55.1638 + 55.1639 + if (s->driver_data) { 55.1640 + free(s->driver_data); 55.1641 + s->driver_data = NULL; 55.1642 + } 55.1643 + if (s->server_fd.fd >= 0) { 55.1644 + close(s->server_fd.fd); 55.1645 + s->server_fd.fd = -1; 55.1646 + } 55.1647 + if (s->stream_fd.fd >= 0) 55.1648 + close_stream_fd(s); 55.1649 + 55.1650 + ctl_close(driver); 55.1651 + 55.1652 + return 0; 55.1653 +} 55.1654 + 55.1655 +static int tdremus_get_parent_id(td_driver_t *driver, td_disk_id_t *id) 55.1656 +{ 55.1657 + /* we shouldn't have a parent... for now */ 55.1658 + return -EINVAL; 55.1659 +} 55.1660 + 55.1661 +static int tdremus_validate_parent(td_driver_t *driver, 55.1662 + td_driver_t *pdriver, td_flag_t flags) 55.1663 +{ 55.1664 + return 0; 55.1665 +} 55.1666 + 55.1667 +struct tap_disk tapdisk_remus = { 55.1668 + .disk_type = "tapdisk_remus", 55.1669 + .private_data_size = sizeof(struct tdremus_state), 55.1670 + .td_open = tdremus_open, 55.1671 + .td_queue_read = unprotected_queue_read, 55.1672 + .td_queue_write = unprotected_queue_write, 55.1673 + .td_close = tdremus_close, 55.1674 + .td_get_parent_id = tdremus_get_parent_id, 55.1675 + .td_validate_parent = tdremus_validate_parent, 55.1676 + .td_debug = NULL, 55.1677 +};
56.1 --- a/tools/blktap2/drivers/check_gcrypt Mon Nov 02 19:35:54 2009 -0800 56.2 +++ b/tools/blktap2/drivers/check_gcrypt Fri Mar 19 18:36:57 2010 -0700 56.3 @@ -4,8 +4,7 @@ cat > .gcrypt.c << EOF 56.4 #include <gcrypt.h> 56.5 int main(void) 56.6 { 56.7 - char dummy[14]; 56.8 - MD5("DUMMY", 5, dummy); 56.9 + gcry_md_hash_buffer(GCRY_MD_MD5, NULL, NULL, 0); 56.10 return 0; 56.11 } 56.12 EOF
57.1 --- a/tools/blktap2/drivers/disktypes.h Mon Nov 02 19:35:54 2009 -0800 57.2 +++ b/tools/blktap2/drivers/disktypes.h Fri Mar 19 18:36:57 2010 -0700 57.3 @@ -49,6 +49,7 @@ extern struct tap_disk tapdisk_ram; 57.4 extern struct tap_disk tapdisk_qcow; 57.5 extern struct tap_disk tapdisk_block_cache; 57.6 extern struct tap_disk tapdisk_log; 57.7 +extern struct tap_disk tapdisk_remus; 57.8 57.9 #define MAX_DISK_TYPES 20 57.10 57.11 @@ -61,6 +62,7 @@ extern struct tap_disk tapdisk_log; 57.12 #define DISK_TYPE_QCOW 6 57.13 #define DISK_TYPE_BLOCK_CACHE 7 57.14 #define DISK_TYPE_LOG 9 57.15 +#define DISK_TYPE_REMUS 10 57.16 57.17 /*Define Individual Disk Parameters here */ 57.18 static disk_info_t null_disk = { 57.19 @@ -167,6 +169,16 @@ static disk_info_t log_disk = { 57.20 #endif 57.21 }; 57.22 57.23 +static disk_info_t remus_disk = { 57.24 + DISK_TYPE_REMUS, 57.25 + "remus disk replicator (remus)", 57.26 + "remus", 57.27 + 0, 57.28 +#ifdef TAPDISK 57.29 + &tapdisk_remus, 57.30 +#endif 57.31 +}; 57.32 + 57.33 /*Main disk info array */ 57.34 static disk_info_t *dtypes[] = { 57.35 &aio_disk, 57.36 @@ -179,6 +191,7 @@ static disk_info_t *dtypes[] = { 57.37 &block_cache_disk, 57.38 &null_disk, 57.39 &log_disk, 57.40 + &remus_disk, 57.41 }; 57.42 57.43 #endif
58.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 58.2 +++ b/tools/blktap2/drivers/hashtable.c Fri Mar 19 18:36:57 2010 -0700 58.3 @@ -0,0 +1,274 @@ 58.4 +/* Copyright (C) 2004 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */ 58.5 + 58.6 +#include "hashtable.h" 58.7 +#include "hashtable_private.h" 58.8 +#include <stdlib.h> 58.9 +#include <stdio.h> 58.10 +#include <string.h> 58.11 +#include <math.h> 58.12 + 58.13 +/* 58.14 +Credit for primes table: Aaron Krowne 58.15 + http://br.endernet.org/~akrowne/ 58.16 + http://planetmath.org/encyclopedia/GoodHashTablePrimes.html 58.17 +*/ 58.18 +static const unsigned int primes[] = { 58.19 + 53, 97, 193, 389, 58.20 + 769, 1543, 3079, 6151, 58.21 + 12289, 24593, 49157, 98317, 58.22 + 196613, 393241, 786433, 1572869, 58.23 + 3145739, 6291469, 12582917, 25165843, 58.24 + 50331653, 100663319, 201326611, 402653189, 58.25 + 805306457, 1610612741 58.26 +}; 58.27 +const unsigned int prime_table_length = sizeof(primes)/sizeof(primes[0]); 58.28 +const float max_load_factor = 0.65; 58.29 + 58.30 +/*****************************************************************************/ 58.31 +struct hashtable * 58.32 +create_hashtable(unsigned int minsize, 58.33 + unsigned int (*hashf) (void*), 58.34 + int (*eqf) (void*,void*)) 58.35 +{ 58.36 + struct hashtable *h; 58.37 + unsigned int pindex, size = primes[0]; 58.38 + /* Check requested hashtable isn't too large */ 58.39 + if (minsize > (1u << 30)) return NULL; 58.40 + /* Enforce size as prime */ 58.41 + for (pindex=0; pindex < prime_table_length; pindex++) { 58.42 + if (primes[pindex] > minsize) { size = primes[pindex]; break; } 58.43 + } 58.44 + h = (struct hashtable *)malloc(sizeof(struct hashtable)); 58.45 + if (NULL == h) return NULL; /*oom*/ 58.46 + h->table = (struct entry **)malloc(sizeof(struct entry*) * size); 58.47 + if (NULL == h->table) { free(h); return NULL; } /*oom*/ 58.48 + memset(h->table, 0, size * sizeof(struct entry *)); 58.49 + h->tablelength = size; 58.50 + h->primeindex = pindex; 58.51 + h->entrycount = 0; 58.52 + h->hashfn = hashf; 58.53 + h->eqfn = eqf; 58.54 + h->loadlimit = (unsigned int) ceil(size * max_load_factor); 58.55 + return h; 58.56 +} 58.57 + 58.58 +/*****************************************************************************/ 58.59 +unsigned int 58.60 +hash(struct hashtable *h, void *k) 58.61 +{ 58.62 + /* Aim to protect against poor hash functions by adding logic here 58.63 + * - logic taken from java 1.4 hashtable source */ 58.64 + unsigned int i = h->hashfn(k); 58.65 + i += ~(i << 9); 58.66 + i ^= ((i >> 14) | (i << 18)); /* >>> */ 58.67 + i += (i << 4); 58.68 + i ^= ((i >> 10) | (i << 22)); /* >>> */ 58.69 + return i; 58.70 +} 58.71 + 58.72 +/*****************************************************************************/ 58.73 +static int 58.74 +hashtable_expand(struct hashtable *h) 58.75 +{ 58.76 + /* Double the size of the table to accomodate more entries */ 58.77 + struct entry **newtable; 58.78 + struct entry *e; 58.79 + struct entry **pE; 58.80 + unsigned int newsize, i, index; 58.81 + /* Check we're not hitting max capacity */ 58.82 + if (h->primeindex == (prime_table_length - 1)) return 0; 58.83 + newsize = primes[++(h->primeindex)]; 58.84 + 58.85 + newtable = (struct entry **)malloc(sizeof(struct entry*) * newsize); 58.86 + if (NULL != newtable) 58.87 + { 58.88 + memset(newtable, 0, newsize * sizeof(struct entry *)); 58.89 + /* This algorithm is not 'stable'. ie. it reverses the list 58.90 + * when it transfers entries between the tables */ 58.91 + for (i = 0; i < h->tablelength; i++) { 58.92 + while (NULL != (e = h->table[i])) { 58.93 + h->table[i] = e->next; 58.94 + index = indexFor(newsize,e->h); 58.95 + e->next = newtable[index]; 58.96 + newtable[index] = e; 58.97 + } 58.98 + } 58.99 + free(h->table); 58.100 + h->table = newtable; 58.101 + } 58.102 + /* Plan B: realloc instead */ 58.103 + else 58.104 + { 58.105 + newtable = (struct entry **) 58.106 + realloc(h->table, newsize * sizeof(struct entry *)); 58.107 + if (NULL == newtable) { (h->primeindex)--; return 0; } 58.108 + h->table = newtable; 58.109 + memset(newtable[h->tablelength], 0, newsize - h->tablelength); 58.110 + for (i = 0; i < h->tablelength; i++) { 58.111 + for (pE = &(newtable[i]), e = *pE; e != NULL; e = *pE) { 58.112 + index = indexFor(newsize,e->h); 58.113 + if (index == i) 58.114 + { 58.115 + pE = &(e->next); 58.116 + } 58.117 + else 58.118 + { 58.119 + *pE = e->next; 58.120 + e->next = newtable[index]; 58.121 + newtable[index] = e; 58.122 + } 58.123 + } 58.124 + } 58.125 + } 58.126 + h->tablelength = newsize; 58.127 + h->loadlimit = (unsigned int) ceil(newsize * max_load_factor); 58.128 + return -1; 58.129 +} 58.130 + 58.131 +/*****************************************************************************/ 58.132 +unsigned int 58.133 +hashtable_count(struct hashtable *h) 58.134 +{ 58.135 + return h->entrycount; 58.136 +} 58.137 + 58.138 +/*****************************************************************************/ 58.139 +int 58.140 +hashtable_insert(struct hashtable *h, void *k, void *v) 58.141 +{ 58.142 + /* This method allows duplicate keys - but they shouldn't be used */ 58.143 + unsigned int index; 58.144 + struct entry *e; 58.145 + if (++(h->entrycount) > h->loadlimit) 58.146 + { 58.147 + /* Ignore the return value. If expand fails, we should 58.148 + * still try cramming just this value into the existing table 58.149 + * -- we may not have memory for a larger table, but one more 58.150 + * element may be ok. Next time we insert, we'll try expanding again.*/ 58.151 + hashtable_expand(h); 58.152 + } 58.153 + e = (struct entry *)malloc(sizeof(struct entry)); 58.154 + if (NULL == e) { --(h->entrycount); return 0; } /*oom*/ 58.155 + e->h = hash(h,k); 58.156 + index = indexFor(h->tablelength,e->h); 58.157 + e->k = k; 58.158 + e->v = v; 58.159 + e->next = h->table[index]; 58.160 + h->table[index] = e; 58.161 + return -1; 58.162 +} 58.163 + 58.164 +/*****************************************************************************/ 58.165 +void * /* returns value associated with key */ 58.166 +hashtable_search(struct hashtable *h, void *k) 58.167 +{ 58.168 + struct entry *e; 58.169 + unsigned int hashvalue, index; 58.170 + hashvalue = hash(h,k); 58.171 + index = indexFor(h->tablelength,hashvalue); 58.172 + e = h->table[index]; 58.173 + while (NULL != e) 58.174 + { 58.175 + /* Check hash value to short circuit heavier comparison */ 58.176 + if ((hashvalue == e->h) && (h->eqfn(k, e->k))) return e->v; 58.177 + e = e->next; 58.178 + } 58.179 + return NULL; 58.180 +} 58.181 + 58.182 +/*****************************************************************************/ 58.183 +void * /* returns value associated with key */ 58.184 +hashtable_remove(struct hashtable *h, void *k) 58.185 +{ 58.186 + /* TODO: consider compacting the table when the load factor drops enough, 58.187 + * or provide a 'compact' method. */ 58.188 + 58.189 + struct entry *e; 58.190 + struct entry **pE; 58.191 + void *v; 58.192 + unsigned int hashvalue, index; 58.193 + 58.194 + hashvalue = hash(h,k); 58.195 + index = indexFor(h->tablelength,hash(h,k)); 58.196 + pE = &(h->table[index]); 58.197 + e = *pE; 58.198 + while (NULL != e) 58.199 + { 58.200 + /* Check hash value to short circuit heavier comparison */ 58.201 + if ((hashvalue == e->h) && (h->eqfn(k, e->k))) 58.202 + { 58.203 + *pE = e->next; 58.204 + h->entrycount--; 58.205 + v = e->v; 58.206 + freekey(e->k); 58.207 + free(e); 58.208 + return v; 58.209 + } 58.210 + pE = &(e->next); 58.211 + e = e->next; 58.212 + } 58.213 + return NULL; 58.214 +} 58.215 + 58.216 +/*****************************************************************************/ 58.217 +/* destroy */ 58.218 +void 58.219 +hashtable_destroy(struct hashtable *h, int free_values) 58.220 +{ 58.221 + unsigned int i; 58.222 + struct entry *e, *f; 58.223 + struct entry **table = h->table; 58.224 + if (free_values) 58.225 + { 58.226 + for (i = 0; i < h->tablelength; i++) 58.227 + { 58.228 + e = table[i]; 58.229 + while (NULL != e) 58.230 + { f = e; e = e->next; freekey(f->k); free(f->v); free(f); } 58.231 + } 58.232 + } 58.233 + else 58.234 + { 58.235 + for (i = 0; i < h->tablelength; i++) 58.236 + { 58.237 + e = table[i]; 58.238 + while (NULL != e) 58.239 + { f = e; e = e->next; freekey(f->k); free(f); } 58.240 + } 58.241 + } 58.242 + free(h->table); 58.243 + free(h); 58.244 +} 58.245 + 58.246 +/* 58.247 + * Copyright (c) 2002, Christopher Clark 58.248 + * All rights reserved. 58.249 + * 58.250 + * Redistribution and use in source and binary forms, with or without 58.251 + * modification, are permitted provided that the following conditions 58.252 + * are met: 58.253 + * 58.254 + * * Redistributions of source code must retain the above copyright 58.255 + * notice, this list of conditions and the following disclaimer. 58.256 + * 58.257 + * * Redistributions in binary form must reproduce the above copyright 58.258 + * notice, this list of conditions and the following disclaimer in the 58.259 + * documentation and/or other materials provided with the distribution. 58.260 + * 58.261 + * * Neither the name of the original author; nor the names of any contributors 58.262 + * may be used to endorse or promote products derived from this software 58.263 + * without specific prior written permission. 58.264 + * 58.265 + * 58.266 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 58.267 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 58.268 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 58.269 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 58.270 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 58.271 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 58.272 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 58.273 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 58.274 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 58.275 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 58.276 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 58.277 + */
59.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 59.2 +++ b/tools/blktap2/drivers/hashtable_itr.c Fri Mar 19 18:36:57 2010 -0700 59.3 @@ -0,0 +1,195 @@ 59.4 +/* Copyright (C) 2002, 2004 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */ 59.5 + 59.6 +#include "hashtable.h" 59.7 +#include "hashtable_private.h" 59.8 +#include "hashtable_itr.h" 59.9 +#include <stdlib.h> /* defines NULL */ 59.10 + 59.11 +struct hashtable_itr { 59.12 + struct hashtable *h; 59.13 + struct entry *e; 59.14 + struct entry *parent; 59.15 + unsigned int index; 59.16 +}; 59.17 + 59.18 +/*****************************************************************************/ 59.19 +/* hashtable_iterator - iterator constructor */ 59.20 + 59.21 +struct hashtable_itr * 59.22 +hashtable_iterator(struct hashtable *h) 59.23 +{ 59.24 + unsigned int i, tablelength; 59.25 + struct hashtable_itr *itr = (struct hashtable_itr *) 59.26 + malloc(sizeof(struct hashtable_itr)); 59.27 + if (NULL == itr) return NULL; 59.28 + itr->h = h; 59.29 + itr->e = NULL; 59.30 + itr->parent = NULL; 59.31 + tablelength = h->tablelength; 59.32 + itr->index = tablelength; 59.33 + if (0 == h->entrycount) return itr; 59.34 + 59.35 + for (i = 0; i < tablelength; i++) 59.36 + { 59.37 + if (NULL != h->table[i]) 59.38 + { 59.39 + itr->e = h->table[i]; 59.40 + itr->index = i; 59.41 + break; 59.42 + } 59.43 + } 59.44 + return itr; 59.45 +} 59.46 + 59.47 +/*****************************************************************************/ 59.48 +/* key - return the key of the (key,value) pair at the current position */ 59.49 +/* value - return the value of the (key,value) pair at the current position */ 59.50 + 59.51 +void * 59.52 +hashtable_iterator_key(struct hashtable_itr *i) 59.53 +{ return i->e->k; } 59.54 + 59.55 +void * 59.56 +hashtable_iterator_value(struct hashtable_itr *i) 59.57 +{ return i->e->v; } 59.58 + 59.59 +/*****************************************************************************/ 59.60 +/* advance - advance the iterator to the next element 59.61 + * returns zero if advanced to end of table */ 59.62 + 59.63 +int 59.64 +hashtable_iterator_advance(struct hashtable_itr *itr) 59.65 +{ 59.66 + unsigned int j,tablelength; 59.67 + struct entry **table; 59.68 + struct entry *next; 59.69 + if (NULL == itr->e) return 0; /* stupidity check */ 59.70 + 59.71 + next = itr->e->next; 59.72 + if (NULL != next) 59.73 + { 59.74 + itr->parent = itr->e; 59.75 + itr->e = next; 59.76 + return -1; 59.77 + } 59.78 + tablelength = itr->h->tablelength; 59.79 + itr->parent = NULL; 59.80 + if (tablelength <= (j = ++(itr->index))) 59.81 + { 59.82 + itr->e = NULL; 59.83 + return 0; 59.84 + } 59.85 + table = itr->h->table; 59.86 + while (NULL == (next = table[j])) 59.87 + { 59.88 + if (++j >= tablelength) 59.89 + { 59.90 + itr->index = tablelength; 59.91 + itr->e = NULL; 59.92 + return 0; 59.93 + } 59.94 + } 59.95 + itr->index = j; 59.96 + itr->e = next; 59.97 + return -1; 59.98 +} 59.99 + 59.100 +/*****************************************************************************/ 59.101 +/* remove - remove the entry at the current iterator position 59.102 + * and advance the iterator, if there is a successive 59.103 + * element. 59.104 + * If you want the value, read it before you remove: 59.105 + * beware memory leaks if you don't. 59.106 + * Returns zero if end of iteration. */ 59.107 + 59.108 +int 59.109 +hashtable_iterator_remove(struct hashtable_itr *itr) 59.110 +{ 59.111 + struct entry *remember_e, *remember_parent; 59.112 + int ret; 59.113 + 59.114 + /* Do the removal */ 59.115 + if (NULL == (itr->parent)) 59.116 + { 59.117 + /* element is head of a chain */ 59.118 + itr->h->table[itr->index] = itr->e->next; 59.119 + } else { 59.120 + /* element is mid-chain */ 59.121 + itr->parent->next = itr->e->next; 59.122 + } 59.123 + /* itr->e is now outside the hashtable */ 59.124 + remember_e = itr->e; 59.125 + itr->h->entrycount--; 59.126 + freekey(remember_e->k); 59.127 + 59.128 + /* Advance the iterator, correcting the parent */ 59.129 + remember_parent = itr->parent; 59.130 + ret = hashtable_iterator_advance(itr); 59.131 + if (itr->parent == remember_e) { itr->parent = remember_parent; } 59.132 + free(remember_e); 59.133 + return ret; 59.134 +} 59.135 + 59.136 +/*****************************************************************************/ 59.137 +int /* returns zero if not found */ 59.138 +hashtable_iterator_search(struct hashtable_itr *itr, 59.139 + struct hashtable *h, void *k) 59.140 +{ 59.141 + struct entry *e, *parent; 59.142 + unsigned int hashvalue, index; 59.143 + 59.144 + hashvalue = hash(h,k); 59.145 + index = indexFor(h->tablelength,hashvalue); 59.146 + 59.147 + e = h->table[index]; 59.148 + parent = NULL; 59.149 + while (NULL != e) 59.150 + { 59.151 + /* Check hash value to short circuit heavier comparison */ 59.152 + if ((hashvalue == e->h) && (h->eqfn(k, e->k))) 59.153 + { 59.154 + itr->index = index; 59.155 + itr->e = e; 59.156 + itr->parent = parent; 59.157 + itr->h = h; 59.158 + return -1; 59.159 + } 59.160 + parent = e; 59.161 + e = e->next; 59.162 + } 59.163 + return 0; 59.164 +} 59.165 + 59.166 + 59.167 +/* 59.168 + * Copyright (c) 2002, 2004, Christopher Clark 59.169 + * All rights reserved. 59.170 + * 59.171 + * Redistribution and use in source and binary forms, with or without 59.172 + * modification, are permitted provided that the following conditions 59.173 + * are met: 59.174 + * 59.175 + * * Redistributions of source code must retain the above copyright 59.176 + * notice, this list of conditions and the following disclaimer. 59.177 + * 59.178 + * * Redistributions in binary form must reproduce the above copyright 59.179 + * notice, this list of conditions and the following disclaimer in the 59.180 + * documentation and/or other materials provided with the distribution. 59.181 + * 59.182 + * * Neither the name of the original author; nor the names of any contributors 59.183 + * may be used to endorse or promote products derived from this software 59.184 + * without specific prior written permission. 59.185 + * 59.186 + * 59.187 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 59.188 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 59.189 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 59.190 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 59.191 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 59.192 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 59.193 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 59.194 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 59.195 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 59.196 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 59.197 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 59.198 + */
60.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 60.2 +++ b/tools/blktap2/drivers/hashtable_itr.h Fri Mar 19 18:36:57 2010 -0700 60.3 @@ -0,0 +1,96 @@ 60.4 +/* Copyright (C) 2002, 2004 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */ 60.5 + 60.6 +#ifndef __HASHTABLE_ITR_CWC22__ 60.7 +#define __HASHTABLE_ITR_CWC22__ 60.8 +#include "hashtable.h" 60.9 +#include "hashtable_private.h" /* needed to enable inlining */ 60.10 + 60.11 +struct hashtable_itr; 60.12 + 60.13 +/*****************************************************************************/ 60.14 +/* hashtable_iterator 60.15 + */ 60.16 + 60.17 +struct hashtable_itr * 60.18 +hashtable_iterator(struct hashtable *h); 60.19 + 60.20 +/*****************************************************************************/ 60.21 +/* hashtable_iterator_key 60.22 + * - return the value of the (key,value) pair at the current position */ 60.23 + 60.24 +void * 60.25 +hashtable_iterator_key(struct hashtable_itr *i); 60.26 + 60.27 +/*****************************************************************************/ 60.28 +/* value - return the value of the (key,value) pair at the current position */ 60.29 + 60.30 +void * 60.31 +hashtable_iterator_value(struct hashtable_itr *i); 60.32 + 60.33 +/*****************************************************************************/ 60.34 +/* advance - advance the iterator to the next element 60.35 + * returns zero if advanced to end of table */ 60.36 + 60.37 +int 60.38 +hashtable_iterator_advance(struct hashtable_itr *itr); 60.39 + 60.40 +/*****************************************************************************/ 60.41 +/* remove - remove current element and advance the iterator to the next element 60.42 + * NB: if you need the value to free it, read it before 60.43 + * removing. ie: beware memory leaks! 60.44 + * returns zero if advanced to end of table */ 60.45 + 60.46 +int 60.47 +hashtable_iterator_remove(struct hashtable_itr *itr); 60.48 + 60.49 +/*****************************************************************************/ 60.50 +/* search - overwrite the supplied iterator, to point to the entry 60.51 + * matching the supplied key. 60.52 + h points to the hashtable to be searched. 60.53 + * returns zero if not found. */ 60.54 +int 60.55 +hashtable_iterator_search(struct hashtable_itr *itr, 60.56 + struct hashtable *h, void *k); 60.57 + 60.58 +#define DEFINE_HASHTABLE_ITERATOR_SEARCH(fnname, keytype) \ 60.59 +int fnname (struct hashtable_itr *i, struct hashtable *h, keytype *k) \ 60.60 +{ \ 60.61 + return (hashtable_iterator_search(i,h,k)); \ 60.62 +} 60.63 + 60.64 + 60.65 + 60.66 +#endif /* __HASHTABLE_ITR_CWC22__*/ 60.67 + 60.68 +/* 60.69 + * Copyright (c) 2002, 2004, Christopher Clark 60.70 + * All rights reserved. 60.71 + * 60.72 + * Redistribution and use in source and binary forms, with or without 60.73 + * modification, are permitted provided that the following conditions 60.74 + * are met: 60.75 + * 60.76 + * * Redistributions of source code must retain the above copyright 60.77 + * notice, this list of conditions and the following disclaimer. 60.78 + * 60.79 + * * Redistributions in binary form must reproduce the above copyright 60.80 + * notice, this list of conditions and the following disclaimer in the 60.81 + * documentation and/or other materials provided with the distribution. 60.82 + * 60.83 + * * Neither the name of the original author; nor the names of any contributors 60.84 + * may be used to endorse or promote products derived from this software 60.85 + * without specific prior written permission. 60.86 + * 60.87 + * 60.88 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 60.89 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 60.90 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 60.91 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 60.92 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 60.93 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 60.94 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 60.95 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 60.96 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 60.97 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 60.98 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 60.99 + */
61.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 61.2 +++ b/tools/blktap2/drivers/hashtable_utility.c Fri Mar 19 18:36:57 2010 -0700 61.3 @@ -0,0 +1,71 @@ 61.4 +/* Copyright (C) 2002 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */ 61.5 + 61.6 +#include "hashtable.h" 61.7 +#include "hashtable_private.h" 61.8 +#include "hashtable_utility.h" 61.9 +#include <stdlib.h> 61.10 +#include <stdio.h> 61.11 +#include <string.h> 61.12 + 61.13 +/*****************************************************************************/ 61.14 +/* hashtable_change 61.15 + * 61.16 + * function to change the value associated with a key, where there already 61.17 + * exists a value bound to the key in the hashtable. 61.18 + * Source due to Holger Schemel. 61.19 + * 61.20 + * */ 61.21 +int 61.22 +hashtable_change(struct hashtable *h, void *k, void *v) 61.23 +{ 61.24 + struct entry *e; 61.25 + unsigned int hashvalue, index; 61.26 + hashvalue = hash(h,k); 61.27 + index = indexFor(h->tablelength,hashvalue); 61.28 + e = h->table[index]; 61.29 + while (NULL != e) 61.30 + { 61.31 + /* Check hash value to short circuit heavier comparison */ 61.32 + if ((hashvalue == e->h) && (h->eqfn(k, e->k))) 61.33 + { 61.34 + free(e->v); 61.35 + e->v = v; 61.36 + return -1; 61.37 + } 61.38 + e = e->next; 61.39 + } 61.40 + return 0; 61.41 +} 61.42 + 61.43 +/* 61.44 + * Copyright (c) 2002, Christopher Clark 61.45 + * All rights reserved. 61.46 + * 61.47 + * Redistribution and use in source and binary forms, with or without 61.48 + * modification, are permitted provided that the following conditions 61.49 + * are met: 61.50 + * 61.51 + * * Redistributions of source code must retain the above copyright 61.52 + * notice, this list of conditions and the following disclaimer. 61.53 + * 61.54 + * * Redistributions in binary form must reproduce the above copyright 61.55 + * notice, this list of conditions and the following disclaimer in the 61.56 + * documentation and/or other materials provided with the distribution. 61.57 + * 61.58 + * * Neither the name of the original author; nor the names of any contributors 61.59 + * may be used to endorse or promote products derived from this software 61.60 + * without specific prior written permission. 61.61 + * 61.62 + * 61.63 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 61.64 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 61.65 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 61.66 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 61.67 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 61.68 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 61.69 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 61.70 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 61.71 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 61.72 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 61.73 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 61.74 + */
62.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 62.2 +++ b/tools/blktap2/drivers/hashtable_utility.h Fri Mar 19 18:36:57 2010 -0700 62.3 @@ -0,0 +1,55 @@ 62.4 +/* Copyright (C) 2002 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */ 62.5 + 62.6 +#ifndef __HASHTABLE_CWC22_UTILITY_H__ 62.7 +#define __HASHTABLE_CWC22_UTILITY_H__ 62.8 + 62.9 +/***************************************************************************** 62.10 + * hashtable_change 62.11 + * 62.12 + * function to change the value associated with a key, where there already 62.13 + * exists a value bound to the key in the hashtable. 62.14 + * Source due to Holger Schemel. 62.15 + * 62.16 + * @name hashtable_change 62.17 + * @param h the hashtable 62.18 + * @param key 62.19 + * @param value 62.20 + * 62.21 + */ 62.22 +int 62.23 +hashtable_change(struct hashtable *h, void *k, void *v); 62.24 + 62.25 +#endif /* __HASHTABLE_CWC22_H__ */ 62.26 + 62.27 +/* 62.28 + * Copyright (c) 2002, Christopher Clark 62.29 + * All rights reserved. 62.30 + * 62.31 + * Redistribution and use in source and binary forms, with or without 62.32 + * modification, are permitted provided that the following conditions 62.33 + * are met: 62.34 + * 62.35 + * * Redistributions of source code must retain the above copyright 62.36 + * notice, this list of conditions and the following disclaimer. 62.37 + * 62.38 + * * Redistributions in binary form must reproduce the above copyright 62.39 + * notice, this list of conditions and the following disclaimer in the 62.40 + * documentation and/or other materials provided with the distribution. 62.41 + * 62.42 + * * Neither the name of the original author; nor the names of any contributors 62.43 + * may be used to endorse or promote products derived from this software 62.44 + * without specific prior written permission. 62.45 + * 62.46 + * 62.47 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 62.48 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 62.49 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 62.50 + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER 62.51 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 62.52 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 62.53 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 62.54 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 62.55 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 62.56 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 62.57 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 62.58 +*/
63.1 --- a/tools/blktap2/drivers/io-optimize.c Mon Nov 02 19:35:54 2009 -0800 63.2 +++ b/tools/blktap2/drivers/io-optimize.c Fri Mar 19 18:36:57 2010 -0700 63.3 @@ -51,9 +51,16 @@ void 63.4 opio_free(struct opioctx *ctx) 63.5 { 63.6 free(ctx->opios); 63.7 + ctx->opios = NULL; 63.8 + 63.9 free(ctx->free_opios); 63.10 + ctx->free_opios = NULL; 63.11 + 63.12 free(ctx->iocb_queue); 63.13 + ctx->iocb_queue = NULL; 63.14 + 63.15 free(ctx->event_queue); 63.16 + ctx->event_queue = NULL; 63.17 } 63.18 63.19 int
64.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 64.2 +++ b/tools/blktap2/drivers/libaio-compat.h Fri Mar 19 18:36:57 2010 -0700 64.3 @@ -0,0 +1,92 @@ 64.4 +/* 64.5 + * Copyright (c) 2010, XenSource Inc. 64.6 + * All rights reserved. 64.7 + * 64.8 + * This library is free software; you can redistribute it and/or 64.9 + * modify it under the terms of the GNU Lesser General Public License 64.10 + * as published by the Free Software Foundation; either version 2 of 64.11 + * the License, or (at your option) any later version. 64.12 + * 64.13 + * This library is distributed in the hope that it will be useful, but 64.14 + * WITHOUT ANY WARRANTY; without even the implied warranty of 64.15 + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 64.16 + * Lesser General Public License for more details. 64.17 + * 64.18 + * You should have received a copy of the GNU Lesser General Public 64.19 + * License along with this library; if not, write to the Free Software 64.20 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 64.21 + * USA 64.22 + */ 64.23 + 64.24 +/* 64.25 + * kernel 2.6.21 added eventfd(2) support, kernel 2.6.22 eventfds for 64.26 + * aio. libaio 0.3.107 updated the header file, but few systems have 64.27 + * it. define a custom iocb_common struct instead, and work around a 64.28 + * potentially missing sys/eventfd.h. this header should vanish over 64.29 + * time. 64.30 + */ 64.31 + 64.32 +#ifndef __LIBAIO_COMPAT 64.33 +#define __LIBAIO_COMPAT 64.34 + 64.35 +#include <libaio.h> 64.36 +#include <unistd.h> 64.37 +#include <sys/syscall.h> 64.38 + 64.39 +struct __compat_io_iocb_common { 64.40 + char __pad_buf[8]; 64.41 + char __pad_nbytes[8]; 64.42 + long long offset; 64.43 + long long __pad3; 64.44 + unsigned flags; 64.45 + unsigned resfd; 64.46 +}; 64.47 + 64.48 +static inline void __io_set_eventfd(struct iocb *iocb, int eventfd) 64.49 +{ 64.50 + struct __compat_io_iocb_common *c; 64.51 + c = (struct __compat_io_iocb_common*)&iocb->u.c; 64.52 + c->flags |= (1 << 0); 64.53 + c->resfd = eventfd; 64.54 +} 64.55 + 64.56 +#ifndef SYS_eventfd 64.57 +#ifndef __NR_eventfd 64.58 +# if defined(__alpha__) 64.59 +# define __NR_eventfd 478 64.60 +# elif defined(__arm__) 64.61 +# define __NR_eventfd (__NR_SYSCALL_BASE+351) 64.62 +# elif defined(__ia64__) 64.63 +# define __NR_eventfd 1309 64.64 +# elif defined(__i386__) 64.65 +# define __NR_eventfd 323 64.66 +# elif defined(__m68k__) 64.67 +# define __NR_eventfd 319 64.68 +# elif 0 && defined(__mips__) 64.69 +# error __NR_eventfd? 64.70 +# define __NR_eventfd (__NR_Linux + 319) 64.71 +# define __NR_eventfd (__NR_Linux + 278) 64.72 +# define __NR_eventfd (__NR_Linux + 282) 64.73 +# elif defined(__hppa__) 64.74 +# define __NR_eventfd (__NR_Linux + 304) 64.75 +# elif defined(__PPC__) || defined(__powerpc64__) 64.76 +# define __NR_eventfd 307 64.77 +# elif defined(__s390__) || defined(__s390x__) 64.78 +# define __NR_eventfd 318 64.79 +# elif defined(__sparc__) 64.80 +# define __NR_eventfd 313 64.81 +# elif defined(__x86_64__) 64.82 +# define __NR_eventfd 284 64.83 +# endif 64.84 +#else 64.85 +# error __NR_eventfd? 64.86 +#endif 64.87 +#define SYS_eventfd __NR_eventfd 64.88 +#endif 64.89 + 64.90 +static inline int tapdisk_sys_eventfd(int initval) 64.91 +{ 64.92 + return syscall(SYS_eventfd, initval, 0); 64.93 +} 64.94 + 64.95 +#endif /* __LIBAIO_COMPAT */
65.1 --- a/tools/blktap2/drivers/qcow2raw.c Mon Nov 02 19:35:54 2009 -0800 65.2 +++ b/tools/blktap2/drivers/qcow2raw.c Fri Mar 19 18:36:57 2010 -0700 65.3 @@ -101,12 +101,6 @@ static void print_bytes(void *ptr, int l 65.4 return; 65.5 } 65.6 65.7 -void 65.8 -queue_event(event_id_t id, char mode, void *private) 65.9 -{ 65.10 - tapdisk_complete_tiocbs(&server.aio_queue); 65.11 -} 65.12 - 65.13 static void debug_output(uint64_t progress, uint64_t size) 65.14 { 65.15 //Output progress every PROGRESS_QUANT
66.1 --- a/tools/blktap2/drivers/tapdisk-image.c Mon Nov 02 19:35:54 2009 -0800 66.2 +++ b/tools/blktap2/drivers/tapdisk-image.c Fri Mar 19 18:36:57 2010 -0700 66.3 @@ -28,6 +28,9 @@ 66.4 #include <errno.h> 66.5 #include <unistd.h> 66.6 #include <stdlib.h> 66.7 +#ifdef MEMSHR 66.8 +#include <memshr.h> 66.9 +#endif 66.10 66.11 #include "tapdisk-image.h" 66.12 #include "tapdisk-driver.h" 66.13 @@ -52,10 +55,13 @@ tapdisk_image_allocate(char *file, int t 66.14 return NULL; 66.15 } 66.16 66.17 - image->type = type; 66.18 - image->flags = flags; 66.19 - image->storage = storage; 66.20 - image->private = private; 66.21 + image->type = type; 66.22 + image->flags = flags; 66.23 + image->storage = storage; 66.24 + image->private = private; 66.25 +#ifdef MEMSHR 66.26 + image->memshr_id = memshr_vbd_image_get(file); 66.27 +#endif 66.28 INIT_LIST_HEAD(&image->next); 66.29 66.30 return image; 66.31 @@ -69,6 +75,9 @@ tapdisk_image_free(td_image_t *image) 66.32 66.33 list_del(&image->next); 66.34 66.35 +#ifdef MEMSHR 66.36 + memshr_vbd_image_put(image->memshr_id); 66.37 +#endif 66.38 free(image->name); 66.39 tapdisk_driver_free(image->driver); 66.40 free(image);
67.1 --- a/tools/blktap2/drivers/tapdisk-image.h Mon Nov 02 19:35:54 2009 -0800 67.2 +++ b/tools/blktap2/drivers/tapdisk-image.h Fri Mar 19 18:36:57 2010 -0700 67.3 @@ -34,6 +34,7 @@ 67.4 struct td_image_handle { 67.5 int type; 67.6 char *name; 67.7 + uint16_t memshr_id; 67.8 67.9 td_flag_t flags; 67.10 int storage;
68.1 --- a/tools/blktap2/drivers/tapdisk-ipc.c Mon Nov 02 19:35:54 2009 -0800 68.2 +++ b/tools/blktap2/drivers/tapdisk-ipc.c Fri Mar 19 18:36:57 2010 -0700 68.3 @@ -30,12 +30,86 @@ 68.4 #include <stdlib.h> 68.5 #include <unistd.h> 68.6 #include <string.h> 68.7 +#include <fcntl.h> 68.8 68.9 #include "tapdisk.h" 68.10 #include "tapdisk-ipc.h" 68.11 #include "tapdisk-vbd.h" 68.12 #include "tapdisk-server.h" 68.13 68.14 +static void 68.15 +tapdisk_ipc_read_event(event_id_t id, char mode, void *private) 68.16 +{ 68.17 + td_ipc_t *ipc = private; 68.18 + tapdisk_ipc_read(ipc); 68.19 +} 68.20 + 68.21 +static void 68.22 +__tapdisk_ipc_init(td_ipc_t *ipc) 68.23 +{ 68.24 + ipc->rfd = -1; 68.25 + ipc->wfd = -1; 68.26 + ipc->rfd_event = -1; 68.27 +} 68.28 + 68.29 +int 68.30 +tapdisk_ipc_open(td_ipc_t *ipc, const char *read, const char *write) 68.31 +{ 68.32 + int err; 68.33 + 68.34 + memset(ipc, 0, sizeof(td_ipc_t)); 68.35 + __tapdisk_ipc_init(ipc); 68.36 + 68.37 + if (read) { 68.38 + ipc->rfd = open(read, O_RDWR | O_NONBLOCK); 68.39 + if (ipc->rfd < 0) { 68.40 + err = -errno; 68.41 + EPRINTF("FD open failed %s: %d\n", read, err); 68.42 + goto fail; 68.43 + } 68.44 + 68.45 + ipc->rfd_event = 68.46 + tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, 68.47 + ipc->rfd, 0, 68.48 + tapdisk_ipc_read_event, 68.49 + ipc); 68.50 + if (ipc->rfd_event < 0) { 68.51 + err = ipc->rfd_event; 68.52 + goto fail; 68.53 + } 68.54 + } 68.55 + 68.56 + if (write) { 68.57 + ipc->wfd = open(write, O_RDWR | O_NONBLOCK); 68.58 + if (ipc->wfd < 0) { 68.59 + err = -errno; 68.60 + EPRINTF("FD open failed %s, %d\n", write, err); 68.61 + goto fail; 68.62 + } 68.63 + } 68.64 + 68.65 + return 0; 68.66 + 68.67 +fail: 68.68 + tapdisk_ipc_close(ipc); 68.69 + return err; 68.70 +} 68.71 + 68.72 +void 68.73 +tapdisk_ipc_close(td_ipc_t *ipc) 68.74 +{ 68.75 + if (ipc->rfd > 0) 68.76 + close(ipc->rfd); 68.77 + 68.78 + if (ipc->wfd > 0) 68.79 + close(ipc->wfd); 68.80 + 68.81 + if (ipc->rfd_event >= 0) 68.82 + tapdisk_server_unregister_event(ipc->rfd_event); 68.83 + 68.84 + __tapdisk_ipc_init(ipc); 68.85 +} 68.86 + 68.87 static int 68.88 tapdisk_ipc_write_message(int fd, tapdisk_message_t *message, int timeout) 68.89 {
69.1 --- a/tools/blktap2/drivers/tapdisk-ipc.h Mon Nov 02 19:35:54 2009 -0800 69.2 +++ b/tools/blktap2/drivers/tapdisk-ipc.h Fri Mar 19 18:36:57 2010 -0700 69.3 @@ -29,13 +29,17 @@ 69.4 #define _TAPDISK_IPC_H_ 69.5 69.6 #include "tapdisk-message.h" 69.7 +#include "scheduler.h" 69.8 69.9 typedef struct td_ipc_handle { 69.10 int rfd; 69.11 int wfd; 69.12 + event_id_t rfd_event; 69.13 td_uuid_t uuid; 69.14 } td_ipc_t; 69.15 69.16 +int tapdisk_ipc_open(td_ipc_t *ipc, const char *read, const char *write); 69.17 +void tapdisk_ipc_close(td_ipc_t *ipc); 69.18 int tapdisk_ipc_read(td_ipc_t *ipc); 69.19 int tapdisk_ipc_write(td_ipc_t *ipc, int type); 69.20 int tapdisk_ipc_write_error(td_ipc_t *ipc, const char *message);
70.1 --- a/tools/blktap2/drivers/tapdisk-queue.c Mon Nov 02 19:35:54 2009 -0800 70.2 +++ b/tools/blktap2/drivers/tapdisk-queue.c Fri Mar 19 18:36:57 2010 -0700 70.3 @@ -30,11 +30,18 @@ 70.4 #include <stdlib.h> 70.5 #include <unistd.h> 70.6 #include <libaio.h> 70.7 +#ifdef __linux__ 70.8 +#include <linux/version.h> 70.9 +#endif 70.10 70.11 #include "tapdisk.h" 70.12 #include "tapdisk-log.h" 70.13 #include "tapdisk-queue.h" 70.14 #include "tapdisk-filter.h" 70.15 +#include "tapdisk-server.h" 70.16 +#include "tapdisk-utils.h" 70.17 + 70.18 +#include "libaio-compat.h" 70.19 #include "atomicio.h" 70.20 70.21 #define WARN(_f, _a...) tlog_write(TLOG_WARN, _f, ##_a) 70.22 @@ -46,7 +53,7 @@ 70.23 * so that we can concurrently poll on synchronous and async descriptors. 70.24 * This is signalled by passing 1 as the io context to io_setup. 70.25 */ 70.26 -#define REQUEST_ASYNC_FD 1 70.27 +#define REQUEST_ASYNC_FD ((io_context_t)1) 70.28 70.29 static inline void 70.30 queue_tiocb(struct tqueue *queue, struct tiocb *tiocb) 70.31 @@ -140,7 +147,7 @@ cancel_tiocbs(struct tqueue *queue, int 70.32 * use a private linked list to keep track 70.33 * of the tiocbs we're cancelling. 70.34 */ 70.35 - tiocb = (struct tiocb *)queue->iocbs[0]->data; 70.36 + tiocb = queue->iocbs[0]->data; 70.37 queued = queue->queued; 70.38 queue->queued = 0; 70.39 70.40 @@ -164,8 +171,40 @@ fail_tiocbs(struct tqueue *queue, int su 70.41 return cancel_tiocbs(queue, err); 70.42 } 70.43 70.44 +/* 70.45 + * rwio 70.46 + */ 70.47 + 70.48 +struct rwio { 70.49 + struct io_event *aio_events; 70.50 +}; 70.51 + 70.52 +static void 70.53 +tapdisk_rwio_destroy(struct tqueue *queue) 70.54 +{ 70.55 + struct rwio *rwio = queue->tio_data; 70.56 + 70.57 + if (rwio->aio_events) { 70.58 + free(rwio->aio_events); 70.59 + rwio->aio_events = NULL; 70.60 + } 70.61 +} 70.62 + 70.63 +static int 70.64 +tapdisk_rwio_setup(struct tqueue *queue, int size) 70.65 +{ 70.66 + struct rwio *rwio = queue->tio_data; 70.67 + int err; 70.68 + 70.69 + rwio->aio_events = calloc(size, sizeof(struct io_event)); 70.70 + if (!rwio->aio_events) 70.71 + return -errno; 70.72 + 70.73 + return 0; 70.74 +} 70.75 + 70.76 static inline ssize_t 70.77 -iocb_rw(struct iocb *iocb) 70.78 +tapdisk_rwio_rw(const struct iocb *iocb) 70.79 { 70.80 int fd = iocb->aio_fildes; 70.81 char *buf = iocb->u.c.buf; 70.82 @@ -176,7 +215,7 @@ iocb_rw(struct iocb *iocb) 70.83 70.84 if (lseek(fd, off, SEEK_SET) == (off_t)-1) 70.85 return -errno; 70.86 - 70.87 + 70.88 if (atomicio(func, fd, buf, size) != size) 70.89 return -errno; 70.90 70.91 @@ -184,8 +223,9 @@ iocb_rw(struct iocb *iocb) 70.92 } 70.93 70.94 static int 70.95 -io_synchronous_rw(struct tqueue *queue) 70.96 +tapdisk_rwio_submit(struct tqueue *queue) 70.97 { 70.98 + struct rwio *rwio = queue->tio_data; 70.99 int i, merged, split; 70.100 struct iocb *iocb; 70.101 struct tiocb *tiocb; 70.102 @@ -200,18 +240,18 @@ io_synchronous_rw(struct tqueue *queue) 70.103 queue->queued = 0; 70.104 70.105 for (i = 0; i < merged; i++) { 70.106 - ep = queue->aio_events + i; 70.107 + ep = rwio->aio_events + i; 70.108 iocb = queue->iocbs[i]; 70.109 ep->obj = iocb; 70.110 - ep->res = iocb_rw(iocb); 70.111 + ep->res = tapdisk_rwio_rw(iocb); 70.112 } 70.113 70.114 - split = io_split(&queue->opioctx, queue->aio_events, merged); 70.115 - tapdisk_filter_events(queue->filter, queue->aio_events, split); 70.116 + split = io_split(&queue->opioctx, rwio->aio_events, merged); 70.117 + tapdisk_filter_events(queue->filter, rwio->aio_events, split); 70.118 70.119 - for (i = split, ep = queue->aio_events; i-- > 0; ep++) { 70.120 + for (i = split, ep = rwio->aio_events; i-- > 0; ep++) { 70.121 iocb = ep->obj; 70.122 - tiocb = (struct tiocb *)iocb->data; 70.123 + tiocb = iocb->data; 70.124 complete_tiocb(queue, tiocb, ep->res); 70.125 } 70.126 70.127 @@ -220,52 +260,374 @@ io_synchronous_rw(struct tqueue *queue) 70.128 return split; 70.129 } 70.130 70.131 +static const struct tio td_tio_rwio = { 70.132 + .name = "rwio", 70.133 + .data_size = 0, 70.134 + .tio_setup = NULL, 70.135 + .tio_destroy = NULL, 70.136 + .tio_submit = tapdisk_rwio_submit 70.137 +}; 70.138 + 70.139 +/* 70.140 + * libaio 70.141 + */ 70.142 + 70.143 +struct lio { 70.144 + io_context_t aio_ctx; 70.145 + struct io_event *aio_events; 70.146 + 70.147 + int event_fd; 70.148 + int event_id; 70.149 + 70.150 + int flags; 70.151 +}; 70.152 + 70.153 +#define LIO_FLAG_EVENTFD (1<<0) 70.154 + 70.155 +static int 70.156 +tapdisk_lio_check_resfd(void) 70.157 +{ 70.158 +#if defined(__linux__) 70.159 + return tapdisk_linux_version() >= KERNEL_VERSION(2, 6, 22); 70.160 +#else 70.161 + return 1; 70.162 +#endif 70.163 +} 70.164 + 70.165 +static void 70.166 +tapdisk_lio_destroy_aio(struct tqueue *queue) 70.167 +{ 70.168 + struct lio *lio = queue->tio_data; 70.169 + 70.170 + if (lio->event_fd >= 0) { 70.171 + close(lio->event_fd); 70.172 + lio->event_fd = -1; 70.173 + } 70.174 + 70.175 + if (lio->aio_ctx) { 70.176 + io_destroy(lio->aio_ctx); 70.177 + lio->aio_ctx = 0; 70.178 + } 70.179 +} 70.180 + 70.181 +static int 70.182 +__lio_setup_aio_poll(struct tqueue *queue, int qlen) 70.183 +{ 70.184 + struct lio *lio = queue->tio_data; 70.185 + int err, fd; 70.186 + 70.187 + lio->aio_ctx = REQUEST_ASYNC_FD; 70.188 + 70.189 + fd = io_setup(qlen, &lio->aio_ctx); 70.190 + if (fd < 0) { 70.191 + lio->aio_ctx = 0; 70.192 + err = -errno; 70.193 + 70.194 + if (err == -EINVAL) 70.195 + goto fail_fd; 70.196 + 70.197 + goto fail; 70.198 + } 70.199 + 70.200 + lio->event_fd = fd; 70.201 + 70.202 + return 0; 70.203 + 70.204 +fail_fd: 70.205 + DPRINTF("Couldn't get fd for AIO poll support. This is probably " 70.206 + "because your kernel does not have the aio-poll patch " 70.207 + "applied.\n"); 70.208 +fail: 70.209 + return err; 70.210 +} 70.211 + 70.212 +static int 70.213 +__lio_setup_aio_eventfd(struct tqueue *queue, int qlen) 70.214 +{ 70.215 + struct lio *lio = queue->tio_data; 70.216 + int err; 70.217 + 70.218 + err = io_setup(qlen, &lio->aio_ctx); 70.219 + if (err < 0) { 70.220 + lio->aio_ctx = 0; 70.221 + return err; 70.222 + } 70.223 + 70.224 + lio->event_fd = tapdisk_sys_eventfd(0); 70.225 + if (lio->event_fd < 0) 70.226 + return -errno; 70.227 + 70.228 + lio->flags |= LIO_FLAG_EVENTFD; 70.229 + 70.230 + return 0; 70.231 +} 70.232 + 70.233 +static int 70.234 +tapdisk_lio_setup_aio(struct tqueue *queue, int qlen) 70.235 +{ 70.236 + struct lio *lio = queue->tio_data; 70.237 + int err; 70.238 + 70.239 + lio->aio_ctx = 0; 70.240 + lio->event_fd = -1; 70.241 + 70.242 + /* 70.243 + * prefer the mainline eventfd(2) api, if available. 70.244 + * if not, fall back to the poll fd patch. 70.245 + */ 70.246 + 70.247 + err = !tapdisk_lio_check_resfd(); 70.248 + if (!err) 70.249 + err = __lio_setup_aio_eventfd(queue, qlen); 70.250 + if (err) 70.251 + err = __lio_setup_aio_poll(queue, qlen); 70.252 + 70.253 + if (err == -EAGAIN) 70.254 + goto fail_rsv; 70.255 +fail: 70.256 + return err; 70.257 + 70.258 +fail_rsv: 70.259 + DPRINTF("Couldn't setup AIO context. If you are trying to " 70.260 + "concurrently use a large number of blktap-based disks, you may " 70.261 + "need to increase the system-wide aio request limit. " 70.262 + "(e.g. 'echo 1048576 > /proc/sys/fs/aio-max-nr')\n"); 70.263 + goto fail; 70.264 +} 70.265 + 70.266 + 70.267 +static void 70.268 +tapdisk_lio_destroy(struct tqueue *queue) 70.269 +{ 70.270 + struct lio *lio = queue->tio_data; 70.271 + 70.272 + if (!lio) 70.273 + return; 70.274 + 70.275 + if (lio->event_id >= 0) { 70.276 + tapdisk_server_unregister_event(lio->event_id); 70.277 + lio->event_id = -1; 70.278 + } 70.279 + 70.280 + tapdisk_lio_destroy_aio(queue); 70.281 + 70.282 + if (lio->aio_events) { 70.283 + free(lio->aio_events); 70.284 + lio->aio_events = NULL; 70.285 + } 70.286 +} 70.287 + 70.288 +static void 70.289 +tapdisk_lio_set_eventfd(struct tqueue *queue, int n, struct iocb **iocbs) 70.290 +{ 70.291 + struct lio *lio = queue->tio_data; 70.292 + int i; 70.293 + 70.294 + if (lio->flags & LIO_FLAG_EVENTFD) 70.295 + for (i = 0; i < n; ++i) 70.296 + __io_set_eventfd(iocbs[i], lio->event_fd); 70.297 +} 70.298 + 70.299 +static void 70.300 +tapdisk_lio_ack_event(struct tqueue *queue) 70.301 +{ 70.302 + struct lio *lio = queue->tio_data; 70.303 + uint64_t val; 70.304 + 70.305 + if (lio->flags & LIO_FLAG_EVENTFD) 70.306 + read(lio->event_fd, &val, sizeof(val)); 70.307 +} 70.308 + 70.309 +static void 70.310 +tapdisk_lio_event(event_id_t id, char mode, void *private) 70.311 +{ 70.312 + struct tqueue *queue = private; 70.313 + struct lio *lio; 70.314 + int i, ret, split; 70.315 + struct iocb *iocb; 70.316 + struct tiocb *tiocb; 70.317 + struct io_event *ep; 70.318 + 70.319 + tapdisk_lio_ack_event(queue); 70.320 + 70.321 + lio = queue->tio_data; 70.322 + ret = io_getevents(lio->aio_ctx, 0, 70.323 + queue->size, lio->aio_events, NULL); 70.324 + split = io_split(&queue->opioctx, lio->aio_events, ret); 70.325 + tapdisk_filter_events(queue->filter, lio->aio_events, split); 70.326 + 70.327 + DBG("events: %d, tiocbs: %d\n", ret, split); 70.328 + 70.329 + queue->iocbs_pending -= ret; 70.330 + queue->tiocbs_pending -= split; 70.331 + 70.332 + for (i = split, ep = lio->aio_events; i-- > 0; ep++) { 70.333 + iocb = ep->obj; 70.334 + tiocb = iocb->data; 70.335 + complete_tiocb(queue, tiocb, ep->res); 70.336 + } 70.337 + 70.338 + queue_deferred_tiocbs(queue); 70.339 +} 70.340 + 70.341 +static int 70.342 +tapdisk_lio_setup(struct tqueue *queue, int qlen) 70.343 +{ 70.344 + struct lio *lio = queue->tio_data; 70.345 + size_t sz; 70.346 + int err; 70.347 + 70.348 + lio->event_id = -1; 70.349 + 70.350 + err = tapdisk_lio_setup_aio(queue, qlen); 70.351 + if (err) 70.352 + goto fail; 70.353 + 70.354 + lio->event_id = 70.355 + tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, 70.356 + lio->event_fd, 0, 70.357 + tapdisk_lio_event, 70.358 + queue); 70.359 + err = lio->event_id; 70.360 + if (err < 0) 70.361 + goto fail; 70.362 + 70.363 + lio->aio_events = calloc(qlen, sizeof(struct io_event)); 70.364 + if (!lio->aio_events) { 70.365 + err = -errno; 70.366 + goto fail; 70.367 + } 70.368 + 70.369 + return 0; 70.370 + 70.371 +fail: 70.372 + tapdisk_lio_destroy(queue); 70.373 + return err; 70.374 +} 70.375 + 70.376 +static int 70.377 +tapdisk_lio_submit(struct tqueue *queue) 70.378 +{ 70.379 + struct lio *lio = queue->tio_data; 70.380 + int merged, submitted, err = 0; 70.381 + 70.382 + if (!queue->queued) 70.383 + return 0; 70.384 + 70.385 + tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued); 70.386 + merged = io_merge(&queue->opioctx, queue->iocbs, queue->queued); 70.387 + tapdisk_lio_set_eventfd(queue, merged, queue->iocbs); 70.388 + submitted = io_submit(lio->aio_ctx, merged, queue->iocbs); 70.389 + 70.390 + DBG("queued: %d, merged: %d, submitted: %d\n", 70.391 + queue->queued, merged, submitted); 70.392 + 70.393 + if (submitted < 0) { 70.394 + err = submitted; 70.395 + submitted = 0; 70.396 + } else if (submitted < merged) 70.397 + err = -EIO; 70.398 + 70.399 + queue->iocbs_pending += submitted; 70.400 + queue->tiocbs_pending += queue->queued; 70.401 + queue->queued = 0; 70.402 + 70.403 + if (err) 70.404 + queue->tiocbs_pending -= 70.405 + fail_tiocbs(queue, submitted, merged, err); 70.406 + 70.407 + return submitted; 70.408 +} 70.409 + 70.410 +static const struct tio td_tio_lio = { 70.411 + .name = "lio", 70.412 + .data_size = sizeof(struct lio), 70.413 + .tio_setup = tapdisk_lio_setup, 70.414 + .tio_destroy = tapdisk_lio_destroy, 70.415 + .tio_submit = tapdisk_lio_submit, 70.416 +}; 70.417 + 70.418 +static void 70.419 +tapdisk_queue_free_io(struct tqueue *queue) 70.420 +{ 70.421 + if (queue->tio) { 70.422 + if (queue->tio->tio_destroy) 70.423 + queue->tio->tio_destroy(queue); 70.424 + queue->tio = NULL; 70.425 + } 70.426 + 70.427 + if (queue->tio_data) { 70.428 + free(queue->tio_data); 70.429 + queue->tio_data = NULL; 70.430 + } 70.431 +} 70.432 + 70.433 +static int 70.434 +tapdisk_queue_init_io(struct tqueue *queue, int drv) 70.435 +{ 70.436 + const struct tio *tio; 70.437 + int err; 70.438 + 70.439 + switch (drv) { 70.440 + case TIO_DRV_LIO: 70.441 + tio = &td_tio_lio; 70.442 + break; 70.443 + case TIO_DRV_RWIO: 70.444 + tio = &td_tio_rwio; 70.445 + break; 70.446 + default: 70.447 + err = -EINVAL; 70.448 + goto fail; 70.449 + } 70.450 + 70.451 + queue->tio_data = calloc(1, tio->data_size); 70.452 + if (!queue->tio_data) { 70.453 + PERROR("malloc(%zu)", tio->data_size); 70.454 + err = -errno; 70.455 + goto fail; 70.456 + } 70.457 + 70.458 + queue->tio = tio; 70.459 + 70.460 + if (tio->tio_setup) { 70.461 + err = tio->tio_setup(queue, queue->size); 70.462 + if (err) 70.463 + goto fail; 70.464 + } 70.465 + 70.466 + DPRINTF("I/O queue driver: %s\n", tio->name); 70.467 + 70.468 + return 0; 70.469 + 70.470 +fail: 70.471 + tapdisk_queue_free_io(queue); 70.472 + return err; 70.473 +} 70.474 + 70.475 int 70.476 tapdisk_init_queue(struct tqueue *queue, int size, 70.477 - int sync, struct tfilter *filter) 70.478 + int drv, struct tfilter *filter) 70.479 { 70.480 int i, err; 70.481 70.482 memset(queue, 0, sizeof(struct tqueue)); 70.483 70.484 queue->size = size; 70.485 - queue->sync = sync; 70.486 queue->filter = filter; 70.487 70.488 - if (sync) { 70.489 - /* set up a pipe so we can return 70.490 - * a poll fd that won't fire. */ 70.491 - if (pipe(queue->dummy_pipe)) 70.492 - return -errno; 70.493 - queue->poll_fd = queue->dummy_pipe[0]; 70.494 - } else { 70.495 - queue->aio_ctx = (io_context_t)REQUEST_ASYNC_FD; 70.496 - queue->poll_fd = io_setup(size, &queue->aio_ctx); 70.497 + if (!size) 70.498 + return 0; 70.499 70.500 - if (queue->poll_fd < 0) { 70.501 - if (queue->poll_fd == -EAGAIN) 70.502 - DPRINTF("Couldn't setup AIO context. If you " 70.503 - "are trying to concurrently use a " 70.504 - "large number of blktap-based disks, " 70.505 - "you may need to increase the " 70.506 - "system-wide aio request limit. " 70.507 - "(e.g. 'echo 1048576 > /proc/sys/fs/" 70.508 - "aio-max-nr')\n"); 70.509 - else 70.510 - DPRINTF("Couldn't get fd for AIO poll " 70.511 - "support. This is probably because " 70.512 - "your kernel does not have the " 70.513 - "aio-poll patch applied.\n"); 70.514 - return queue->poll_fd; 70.515 - } 70.516 + err = tapdisk_queue_init_io(queue, drv); 70.517 + if (err) 70.518 + goto fail; 70.519 + 70.520 + queue->iocbs = calloc(size, sizeof(struct iocb *)); 70.521 + if (!queue->iocbs) { 70.522 + err = -errno; 70.523 + goto fail; 70.524 } 70.525 70.526 - err = -ENOMEM; 70.527 - queue->iocbs = calloc(size, sizeof(struct iocb *)); 70.528 - queue->aio_events = calloc(size, sizeof(struct io_event)); 70.529 - if (!queue->iocbs || !queue->aio_events) 70.530 - goto fail; 70.531 - 70.532 err = opio_init(&queue->opioctx, size); 70.533 if (err) 70.534 goto fail; 70.535 @@ -280,14 +642,11 @@ tapdisk_init_queue(struct tqueue *queue, 70.536 void 70.537 tapdisk_free_queue(struct tqueue *queue) 70.538 { 70.539 - if (queue->sync) { 70.540 - close(queue->dummy_pipe[0]); 70.541 - close(queue->dummy_pipe[1]); 70.542 - } else 70.543 - io_destroy(queue->aio_ctx); 70.544 + tapdisk_queue_free_io(queue); 70.545 70.546 free(queue->iocbs); 70.547 - free(queue->aio_events); 70.548 + queue->iocbs = NULL; 70.549 + 70.550 opio_free(&queue->opioctx); 70.551 } 70.552 70.553 @@ -297,9 +656,9 @@ tapdisk_debug_queue(struct tqueue *queue 70.554 struct tiocb *tiocb = queue->deferred.head; 70.555 70.556 WARN("TAPDISK QUEUE:\n"); 70.557 - WARN("size: %d, sync: %d, queued: %d, iocbs_pending: %d, " 70.558 + WARN("size: %d, tio: %s, queued: %d, iocbs_pending: %d, " 70.559 "tiocbs_pending: %d, tiocbs_deferred: %d, deferrals: %"PRIx64"\n", 70.560 - queue->size, queue->sync, queue->queued, queue->iocbs_pending, 70.561 + queue->size, queue->tio->name, queue->queued, queue->iocbs_pending, 70.562 queue->tiocbs_pending, queue->tiocbs_deferred, queue->deferrals); 70.563 70.564 if (tiocb) { 70.565 @@ -340,42 +699,14 @@ tapdisk_queue_tiocb(struct tqueue *queue 70.566 defer_tiocb(queue, tiocb); 70.567 } 70.568 70.569 + 70.570 /* 70.571 * fail_tiocbs may queue more tiocbs 70.572 */ 70.573 int 70.574 tapdisk_submit_tiocbs(struct tqueue *queue) 70.575 { 70.576 - int merged, submitted, err = 0; 70.577 - 70.578 - if (!queue->queued) 70.579 - return 0; 70.580 - 70.581 - if (queue->sync) 70.582 - return io_synchronous_rw(queue); 70.583 - 70.584 - tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued); 70.585 - merged = io_merge(&queue->opioctx, queue->iocbs, queue->queued); 70.586 - submitted = io_submit(queue->aio_ctx, merged, queue->iocbs); 70.587 - 70.588 - DBG("queued: %d, merged: %d, submitted: %d\n", 70.589 - queue->queued, merged, submitted); 70.590 - 70.591 - if (submitted < 0) { 70.592 - err = submitted; 70.593 - submitted = 0; 70.594 - } else if (submitted < merged) 70.595 - err = -EIO; 70.596 - 70.597 - queue->iocbs_pending += submitted; 70.598 - queue->tiocbs_pending += queue->queued; 70.599 - queue->queued = 0; 70.600 - 70.601 - if (err) 70.602 - queue->tiocbs_pending -= 70.603 - fail_tiocbs(queue, submitted, merged, err); 70.604 - 70.605 - return submitted; 70.606 + return queue->tio->tio_submit(queue); 70.607 } 70.608 70.609 int 70.610 @@ -390,35 +721,6 @@ tapdisk_submit_all_tiocbs(struct tqueue 70.611 return submitted; 70.612 } 70.613 70.614 -int 70.615 -tapdisk_complete_tiocbs(struct tqueue *queue) 70.616 -{ 70.617 - int i, ret, split; 70.618 - struct iocb *iocb; 70.619 - struct tiocb *tiocb; 70.620 - struct io_event *ep; 70.621 - 70.622 - ret = io_getevents(queue->aio_ctx, 0, 70.623 - queue->size, queue->aio_events, NULL); 70.624 - split = io_split(&queue->opioctx, queue->aio_events, ret); 70.625 - tapdisk_filter_events(queue->filter, queue->aio_events, split); 70.626 - 70.627 - DBG("events: %d, tiocbs: %d\n", ret, split); 70.628 - 70.629 - queue->iocbs_pending -= ret; 70.630 - queue->tiocbs_pending -= split; 70.631 - 70.632 - for (i = split, ep = queue->aio_events; i-- > 0; ep++) { 70.633 - iocb = ep->obj; 70.634 - tiocb = (struct tiocb *)iocb->data; 70.635 - complete_tiocb(queue, tiocb, ep->res); 70.636 - } 70.637 - 70.638 - queue_deferred_tiocbs(queue); 70.639 - 70.640 - return split; 70.641 -} 70.642 - 70.643 /* 70.644 * cancel_tiocbs may queue more tiocbs 70.645 */
71.1 --- a/tools/blktap2/drivers/tapdisk-queue.h Mon Nov 02 19:35:54 2009 -0800 71.2 +++ b/tools/blktap2/drivers/tapdisk-queue.h Fri Mar 19 18:36:57 2010 -0700 71.3 @@ -32,6 +32,7 @@ 71.4 #include <libaio.h> 71.5 71.6 #include "io-optimize.h" 71.7 +#include "scheduler.h" 71.8 71.9 struct tiocb; 71.10 struct tfilter; 71.11 @@ -54,16 +55,14 @@ struct tlist { 71.12 71.13 struct tqueue { 71.14 int size; 71.15 - int sync; 71.16 71.17 - int poll_fd; 71.18 - io_context_t aio_ctx; 71.19 + const struct tio *tio; 71.20 + void *tio_data; 71.21 + 71.22 struct opioctx opioctx; 71.23 - int dummy_pipe[2]; 71.24 71.25 int queued; 71.26 struct iocb **iocbs; 71.27 - struct io_event *aio_events; 71.28 71.29 /* number of iocbs pending in the aio layer */ 71.30 int iocbs_pending; 71.31 @@ -85,6 +84,20 @@ struct tqueue { 71.32 uint64_t deferrals; 71.33 }; 71.34 71.35 +struct tio { 71.36 + const char *name; 71.37 + size_t data_size; 71.38 + 71.39 + int (*tio_setup) (struct tqueue *queue, int qlen); 71.40 + void (*tio_destroy) (struct tqueue *queue); 71.41 + int (*tio_submit) (struct tqueue *queue); 71.42 +}; 71.43 + 71.44 +enum { 71.45 + TIO_DRV_LIO = 1, 71.46 + TIO_DRV_RWIO = 2, 71.47 +}; 71.48 + 71.49 /* 71.50 * Interface for request producer (i.e., tapdisk) 71.51 * NB: the following functions may cause additional tiocbs to be queued: 71.52 @@ -98,13 +111,12 @@ struct tqueue { 71.53 #define tapdisk_queue_empty(q) ((q)->queued == 0) 71.54 #define tapdisk_queue_full(q) \ 71.55 (((q)->tiocbs_pending + (q)->queued) >= (q)->size) 71.56 -int tapdisk_init_queue(struct tqueue *, int size, int sync, struct tfilter *); 71.57 +int tapdisk_init_queue(struct tqueue *, int size, int drv, struct tfilter *); 71.58 void tapdisk_free_queue(struct tqueue *); 71.59 void tapdisk_debug_queue(struct tqueue *); 71.60 void tapdisk_queue_tiocb(struct tqueue *, struct tiocb *); 71.61 int tapdisk_submit_tiocbs(struct tqueue *); 71.62 int tapdisk_submit_all_tiocbs(struct tqueue *); 71.63 -int tapdisk_complete_tiocbs(struct tqueue *); 71.64 int tapdisk_cancel_tiocbs(struct tqueue *); 71.65 int tapdisk_cancel_all_tiocbs(struct tqueue *); 71.66 void tapdisk_prep_tiocb(struct tiocb *, int, int, char *, size_t,
72.1 --- a/tools/blktap2/drivers/tapdisk-server.c Mon Nov 02 19:35:54 2009 -0800 72.2 +++ b/tools/blktap2/drivers/tapdisk-server.c Fri Mar 19 18:36:57 2010 -0700 72.3 @@ -26,7 +26,6 @@ 72.4 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 72.5 */ 72.6 #include <stdio.h> 72.7 -#include <fcntl.h> 72.8 #include <errno.h> 72.9 #include <unistd.h> 72.10 #include <stdlib.h> 72.11 @@ -222,63 +221,36 @@ tapdisk_server_send_error(const char *me 72.12 tapdisk_ipc_write_error(&vbd->ipc, message); 72.13 } 72.14 72.15 -static void 72.16 -tapdisk_server_read_ipc_message(event_id_t id, char mode, void *private) 72.17 +static int 72.18 +tapdisk_server_init_ipc(const char *read, const char *write) 72.19 { 72.20 - tapdisk_ipc_read(&server.ipc); 72.21 + return tapdisk_ipc_open(&server.ipc, read, write); 72.22 } 72.23 72.24 static void 72.25 -tapdisk_server_aio_queue_event(event_id_t id, char mode, void *private) 72.26 +tapdisk_server_close_ipc(void) 72.27 { 72.28 - tapdisk_complete_tiocbs(&server.aio_queue); 72.29 -} 72.30 - 72.31 -static void 72.32 -tapdisk_server_free_aio_queue(void) 72.33 -{ 72.34 - tapdisk_server_unregister_event(server.aio_queue_event_id); 72.35 - tapdisk_free_queue(&server.aio_queue); 72.36 + tapdisk_ipc_close(&server.ipc); 72.37 } 72.38 72.39 static int 72.40 -tapdisk_server_initialize_aio_queue(void) 72.41 +tapdisk_server_init_aio(void) 72.42 { 72.43 - int err; 72.44 - event_id_t id; 72.45 - 72.46 - err = tapdisk_init_queue(&server.aio_queue, 72.47 - TAPDISK_TIOCBS, 0, NULL); 72.48 - if (err) 72.49 - return err; 72.50 + return tapdisk_init_queue(&server.aio_queue, TAPDISK_TIOCBS, 72.51 + TIO_DRV_LIO, NULL); 72.52 +} 72.53 72.54 - id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, 72.55 - server.aio_queue.poll_fd, 0, 72.56 - tapdisk_server_aio_queue_event, 72.57 - NULL); 72.58 - if (id < 0) { 72.59 - tapdisk_free_queue(&server.aio_queue); 72.60 - return id; 72.61 - } 72.62 - 72.63 - server.aio_queue_event_id = id; 72.64 - 72.65 - return 0; 72.66 +static void 72.67 +tapdisk_server_close_aio(void) 72.68 +{ 72.69 + tapdisk_free_queue(&server.aio_queue); 72.70 } 72.71 72.72 static void 72.73 tapdisk_server_close(void) 72.74 { 72.75 - tapdisk_server_free_aio_queue(); 72.76 - 72.77 - if (server.control_event) 72.78 - scheduler_unregister_event(&server.scheduler, server.control_event); 72.79 - 72.80 - if (server.ipc.rfd != -1) 72.81 - close(server.ipc.rfd); 72.82 - 72.83 - if (server.ipc.wfd != -1) 72.84 - close(server.ipc.wfd); 72.85 + tapdisk_server_close_aio(); 72.86 + tapdisk_server_close_ipc(); 72.87 } 72.88 72.89 static void 72.90 @@ -334,63 +306,26 @@ int 72.91 tapdisk_server_initialize(const char *read, const char *write) 72.92 { 72.93 int err; 72.94 - event_id_t event_id; 72.95 72.96 - event_id = 0; 72.97 memset(&server, 0, sizeof(tapdisk_server_t)); 72.98 - server.ipc.rfd = server.ipc.wfd = -1; 72.99 - 72.100 INIT_LIST_HEAD(&server.vbds); 72.101 72.102 - if (read) { 72.103 - server.ipc.rfd = open(read, O_RDWR | O_NONBLOCK); 72.104 - if (server.ipc.rfd < 0) { 72.105 - err = -errno; 72.106 - EPRINTF("FD open failed %s: %d\n", read, err); 72.107 - goto fail; 72.108 - } 72.109 - } 72.110 - 72.111 - if (write) { 72.112 - server.ipc.wfd = open(write, O_RDWR | O_NONBLOCK); 72.113 - if (server.ipc.wfd < 0) { 72.114 - err = -errno; 72.115 - EPRINTF("FD open failed %s, %d\n", write, err); 72.116 - goto fail; 72.117 - } 72.118 - } 72.119 - 72.120 scheduler_initialize(&server.scheduler); 72.121 72.122 - if (read) { 72.123 - event_id = scheduler_register_event(&server.scheduler, 72.124 - SCHEDULER_POLL_READ_FD, 72.125 - server.ipc.rfd, 0, 72.126 - tapdisk_server_read_ipc_message, 72.127 - NULL); 72.128 - if (event_id < 0) { 72.129 - err = event_id; 72.130 - goto fail; 72.131 - } 72.132 - } 72.133 - 72.134 - err = tapdisk_server_initialize_aio_queue(); 72.135 + err = tapdisk_server_init_ipc(read, write); 72.136 if (err) 72.137 goto fail; 72.138 72.139 - server.control_event = event_id; 72.140 + err = tapdisk_server_init_aio(); 72.141 + if (err) 72.142 + goto fail; 72.143 + 72.144 server.run = 1; 72.145 72.146 return 0; 72.147 72.148 fail: 72.149 - if (server.ipc.rfd > 0) 72.150 - close(server.ipc.rfd); 72.151 - if (server.ipc.wfd > 0) 72.152 - close(server.ipc.wfd); 72.153 - if (event_id > 0) 72.154 - scheduler_unregister_event(&server.scheduler, 72.155 - server.control_event); 72.156 + tapdisk_server_close_ipc(); 72.157 return err; 72.158 } 72.159
73.1 --- a/tools/blktap2/drivers/tapdisk-server.h Mon Nov 02 19:35:54 2009 -0800 73.2 +++ b/tools/blktap2/drivers/tapdisk-server.h Fri Mar 19 18:36:57 2010 -0700 73.3 @@ -57,9 +57,7 @@ typedef struct tapdisk_server { 73.4 td_ipc_t ipc; 73.5 struct list_head vbds; 73.6 scheduler_t scheduler; 73.7 - event_id_t control_event; 73.8 struct tqueue aio_queue; 73.9 - event_id_t aio_queue_event_id; 73.10 } tapdisk_server_t; 73.11 73.12 #endif
74.1 --- a/tools/blktap2/drivers/tapdisk-utils.c Mon Nov 02 19:35:54 2009 -0800 74.2 +++ b/tools/blktap2/drivers/tapdisk-utils.c Fri Mar 19 18:36:57 2010 -0700 74.3 @@ -33,6 +33,10 @@ 74.4 #include <sys/mman.h> 74.5 #include <sys/ioctl.h> 74.6 #include <sys/resource.h> 74.7 +#include <sys/utsname.h> 74.8 +#ifdef __linux__ 74.9 +#include <linux/version.h> 74.10 +#endif 74.11 74.12 #include "blk.h" 74.13 #include "tapdisk.h" 74.14 @@ -183,3 +187,31 @@ tapdisk_get_image_size(int fd, uint64_t 74.15 74.16 return 0; 74.17 } 74.18 + 74.19 +#ifdef __linux__ 74.20 + 74.21 +int tapdisk_linux_version(void) 74.22 +{ 74.23 + struct utsname uts; 74.24 + unsigned int version, patchlevel, sublevel; 74.25 + int n, err; 74.26 + 74.27 + err = uname(&uts); 74.28 + if (err) 74.29 + return -errno; 74.30 + 74.31 + n = sscanf(uts.release, "%u.%u.%u", &version, &patchlevel, &sublevel); 74.32 + if (n != 3) 74.33 + return -ENOSYS; 74.34 + 74.35 + return KERNEL_VERSION(version, patchlevel, sublevel); 74.36 +} 74.37 + 74.38 +#else 74.39 + 74.40 +int tapdisk_linux_version(void) 74.41 +{ 74.42 + return -ENOSYS; 74.43 +} 74.44 + 74.45 +#endif
75.1 --- a/tools/blktap2/drivers/tapdisk-utils.h Mon Nov 02 19:35:54 2009 -0800 75.2 +++ b/tools/blktap2/drivers/tapdisk-utils.h Fri Mar 19 18:36:57 2010 -0700 75.3 @@ -38,5 +38,6 @@ int tapdisk_set_resource_limits(void); 75.4 int tapdisk_namedup(char **, const char *); 75.5 int tapdisk_parse_disk_type(const char *, char **, int *); 75.6 int tapdisk_get_image_size(int, uint64_t *, uint32_t *); 75.7 +int tapdisk_linux_version(void); 75.8 75.9 #endif
76.1 --- a/tools/blktap2/drivers/tapdisk-vbd.c Mon Nov 02 19:35:54 2009 -0800 76.2 +++ b/tools/blktap2/drivers/tapdisk-vbd.c Fri Mar 19 18:36:57 2010 -0700 76.3 @@ -34,6 +34,9 @@ 76.4 #include <libgen.h> 76.5 #include <sys/mman.h> 76.6 #include <sys/ioctl.h> 76.7 +#ifdef MEMSHR 76.8 +#include <memshr.h> 76.9 +#endif 76.10 76.11 #include "libvhd.h" 76.12 #include "tapdisk-image.h" 76.13 @@ -105,7 +108,12 @@ tapdisk_vbd_initialize(int rfd, int wfd, 76.14 /* default blktap ring completion */ 76.15 vbd->callback = tapdisk_vbd_callback; 76.16 vbd->argument = vbd; 76.17 + 76.18 +#ifdef MEMSHR 76.19 + memshr_vbd_initialize(); 76.20 +#endif 76.21 76.22 + INIT_LIST_HEAD(&vbd->driver_stack); 76.23 INIT_LIST_HEAD(&vbd->images); 76.24 INIT_LIST_HEAD(&vbd->new_requests); 76.25 INIT_LIST_HEAD(&vbd->pending_requests); 76.26 @@ -541,6 +549,105 @@ regerr: 76.27 goto out; 76.28 } 76.29 76.30 +/* TODO: ugh, lets not call it parent info... */ 76.31 +static struct list_head * 76.32 +tapdisk_vbd_open_level(td_vbd_t *vbd, char* params, int driver_type, td_disk_info_t *parent_info, td_flag_t flags) 76.33 +{ 76.34 + char *name; 76.35 + int type, err; 76.36 + td_image_t *image; 76.37 + td_disk_id_t id; 76.38 + struct list_head *images; 76.39 + td_driver_t *driver; 76.40 + 76.41 + images = calloc(1, sizeof(struct list_head)); 76.42 + INIT_LIST_HEAD(images); 76.43 + 76.44 + name = params; 76.45 + type = driver_type; 76.46 + 76.47 + for (;;) { 76.48 + err = -ENOMEM; 76.49 + image = tapdisk_image_allocate(name, type, 76.50 + vbd->storage, flags, vbd); 76.51 + 76.52 + /* free 'name' if it was created by td_get_parent_id() */ 76.53 + if (name != params) { 76.54 + free(name); 76.55 + name = NULL; 76.56 + } 76.57 + 76.58 + if (!image) 76.59 + return NULL; 76.60 + 76.61 + 76.62 + /* We have to do this to set the driver info for child drivers. this conflicts with td_open */ 76.63 + driver = image->driver; 76.64 + if (!driver) { 76.65 + driver = tapdisk_driver_allocate(image->type, 76.66 + image->name, 76.67 + image->flags, 76.68 + image->storage); 76.69 + if (!driver) 76.70 + return NULL; 76.71 + } 76.72 + /* the image has a driver, set the info and driver */ 76.73 + image->driver = driver; 76.74 + image->info = driver->info; 76.75 + 76.76 + /* XXX: we don't touch driver->refcount, broken? */ 76.77 + /* XXX: we've replicated about 90% of td_open() gross! */ 76.78 + /* XXX: this breaks if a driver modifies its info within a layer */ 76.79 + 76.80 + /* if the parent info is set, pass it to the child */ 76.81 + if(parent_info) 76.82 + { 76.83 + image->driver->info = *parent_info; 76.84 + } 76.85 + 76.86 + err = td_load(image); 76.87 + if (err) { 76.88 + if (err != -ENODEV) 76.89 + return NULL; 76.90 + 76.91 + err = td_open(image); 76.92 + if (err) 76.93 + return NULL; 76.94 + } 76.95 + 76.96 + /* TODO: non-sink drivers that don't care about their child 76.97 + * currently return EINVAL. Could return TD_PARENT_OK or 76.98 + * TD_ANY_PARENT */ 76.99 + 76.100 + err = td_get_parent_id(image, &id); 76.101 + if (err && (err != TD_NO_PARENT && err != -EINVAL)) { 76.102 + td_close(image); 76.103 + return NULL; 76.104 + } 76.105 + 76.106 + if (!image->storage) 76.107 + image->storage = vbd->storage; 76.108 + 76.109 + /* add this image to the end of the list */ 76.110 + list_add_tail(&image->next, images); 76.111 + 76.112 + image = NULL; 76.113 + 76.114 + /* if the image does not have a parent we return the 76.115 + * list of images generated by this level of the stack */ 76.116 + if (err == TD_NO_PARENT || err == -EINVAL) 76.117 + break; 76.118 + 76.119 + name = id.name; 76.120 + type = id.drivertype; 76.121 +#if 0 76.122 + /* catch this by validate, not here */ 76.123 + flags |= (TD_OPEN_RDONLY | TD_OPEN_SHAREABLE); 76.124 +#endif 76.125 + } 76.126 + return images; 76.127 +} 76.128 + 76.129 static int 76.130 __tapdisk_vbd_open_vdi(td_vbd_t *vbd, td_flag_t extra_flags) 76.131 { 76.132 @@ -548,58 +655,35 @@ static int 76.133 int err, type; 76.134 td_flag_t flags; 76.135 td_disk_id_t id; 76.136 - td_image_t *image, *tmp; 76.137 + td_image_t *tmp; 76.138 struct tfilter *filter = NULL; 76.139 + td_vbd_driver_info_t *driver_info; 76.140 + struct list_head *images; 76.141 + td_disk_info_t *parent_info = NULL; 76.142 76.143 err = tapdisk_vbd_reactivate_volumes(vbd, 0); 76.144 if (err) 76.145 return err; 76.146 76.147 flags = (vbd->flags & ~TD_OPEN_SHAREABLE) | extra_flags; 76.148 - file = vbd->name; 76.149 - type = vbd->type; 76.150 - 76.151 - for (;;) { 76.152 - err = -ENOMEM; 76.153 - image = tapdisk_image_allocate(file, type, 76.154 - vbd->storage, flags, vbd); 76.155 - 76.156 - if (file != vbd->name) { 76.157 - free(file); 76.158 - file = NULL; 76.159 - } 76.160 - 76.161 - if (!image) 76.162 - goto fail; 76.163 - 76.164 - err = td_load(image); 76.165 - if (err) { 76.166 - if (err != -ENODEV) 76.167 - goto fail; 76.168 76.169 - err = td_open(image); 76.170 - if (err) 76.171 - goto fail; 76.172 - } 76.173 - 76.174 - err = td_get_parent_id(image, &id); 76.175 - if (err && err != TD_NO_PARENT) { 76.176 - td_close(image); 76.177 - goto fail; 76.178 - } 76.179 + /* loop on each user specified driver. 76.180 + * NOTE: driver_info is in reverse order. That is, the first 76.181 + * item is the 'parent' or 'sink' driver */ 76.182 + list_for_each_entry(driver_info, &vbd->driver_stack, next) { 76.183 + file = driver_info->params; 76.184 + type = driver_info->type; 76.185 + images = tapdisk_vbd_open_level(vbd, file, type, parent_info, flags); 76.186 + if (!images) 76.187 + return -EINVAL; 76.188 76.189 - if (!image->storage) 76.190 - image->storage = vbd->storage; 76.191 - 76.192 - tapdisk_vbd_add_image(vbd, image); 76.193 - image = NULL; 76.194 + /* after each loop, append the created stack to the result stack */ 76.195 + list_splice(images, &vbd->images); 76.196 + free(images); 76.197 76.198 - if (err == TD_NO_PARENT) 76.199 - break; 76.200 - 76.201 - file = id.name; 76.202 - type = id.drivertype; 76.203 - flags |= (TD_OPEN_RDONLY | TD_OPEN_SHAREABLE); 76.204 + /* set the parent_info to the first diskinfo on the stack */ 76.205 + tmp = tapdisk_vbd_first_image(vbd); 76.206 + parent_info = &tmp->info; 76.207 } 76.208 76.209 if (td_flag_test(vbd->flags, TD_OPEN_LOG_DIRTY)) { 76.210 @@ -623,14 +707,91 @@ static int 76.211 return 0; 76.212 76.213 fail: 76.214 + 76.215 +/* TODO: loop over vbd to free images? maybe do that in vbd_close_vdi */ 76.216 +#if 0 76.217 if (image) 76.218 tapdisk_image_free(image); 76.219 +#endif 76.220 76.221 + /* TODO: handle partial stack creation? */ 76.222 tapdisk_vbd_close_vdi(vbd); 76.223 76.224 return err; 76.225 } 76.226 76.227 +/* this populates a vbd type based on path */ 76.228 +int 76.229 +tapdisk_vbd_parse_stack(td_vbd_t *vbd, const char *path) 76.230 +{ 76.231 + int err; 76.232 + char *params, *driver_str; 76.233 + td_vbd_driver_info_t *driver; 76.234 + 76.235 + /* make a copy of path */ 76.236 + /* TODO: check against MAX_NAME_LEM ? */ 76.237 + err = tapdisk_namedup(¶ms, path); 76.238 + if(err) 76.239 + goto error; 76.240 + 76.241 + 76.242 + /* tokenize params based on pipe '|' */ 76.243 + driver_str = strtok(params, "|"); 76.244 + while(driver_str != NULL) 76.245 + { 76.246 + /* parse driver info and add to vbd */ 76.247 + driver = calloc(1, sizeof(td_vbd_driver_info_t)); 76.248 + INIT_LIST_HEAD(&driver->next); 76.249 + err = tapdisk_parse_disk_type(driver_str, &driver->params, &driver->type); 76.250 + if(err) 76.251 + goto error; 76.252 + 76.253 + /* build the list backwards as the last driver will be the first 76.254 + * driver to open in the stack */ 76.255 + list_add(&driver->next, &vbd->driver_stack); 76.256 + 76.257 + /* get next driver string */ 76.258 + driver_str = strtok(NULL, "|"); 76.259 + } 76.260 + 76.261 + return 0; 76.262 + 76.263 + /* error: free any driver_info's and params */ 76.264 + error: 76.265 + while(!list_empty(&vbd->driver_stack)) { 76.266 + driver = list_entry(vbd->driver_stack.next, td_vbd_driver_info_t, next); 76.267 + list_del(&driver->next); 76.268 + free(driver); 76.269 + } 76.270 + 76.271 + return err; 76.272 +} 76.273 + 76.274 +/* NOTE: driver type, etc. must be set */ 76.275 +static int 76.276 +tapdisk_vbd_open_stack(td_vbd_t *vbd, uint16_t storage, td_flag_t flags) 76.277 +{ 76.278 + int i, err; 76.279 + 76.280 + vbd->flags = flags; 76.281 + vbd->storage = storage; 76.282 + 76.283 + for (i = 0; i < TD_VBD_EIO_RETRIES; i++) { 76.284 + err = __tapdisk_vbd_open_vdi(vbd, 0); 76.285 + if (err != -EIO) 76.286 + break; 76.287 + 76.288 + sleep(TD_VBD_EIO_SLEEP); 76.289 + } 76.290 + if (err) 76.291 + goto fail; 76.292 + 76.293 + return 0; 76.294 + 76.295 + fail: 76.296 + return err; 76.297 +} 76.298 + 76.299 int 76.300 tapdisk_vbd_open_vdi(td_vbd_t *vbd, const char *path, 76.301 uint16_t drivertype, uint16_t storage, td_flag_t flags) 76.302 @@ -759,7 +920,7 @@ tapdisk_vbd_open(td_vbd_t *vbd, const ch 76.303 { 76.304 int err; 76.305 76.306 - err = tapdisk_vbd_open_vdi(vbd, name, type, storage, flags); 76.307 + err = tapdisk_vbd_open_stack(vbd, storage, flags); 76.308 if (err) 76.309 goto out; 76.310 76.311 @@ -1099,6 +1260,8 @@ tapdisk_vbd_kick(td_vbd_t *vbd) 76.312 int n; 76.313 td_ring_t *ring; 76.314 76.315 + tapdisk_vbd_check_state(vbd); 76.316 + 76.317 ring = &vbd->ring; 76.318 if (!ring->sring) 76.319 return 0; 76.320 @@ -1261,11 +1424,26 @@ tapdisk_vbd_complete_vbd_request(td_vbd_ 76.321 } 76.322 } 76.323 76.324 +static uint64_t 76.325 +tapdisk_vbd_breq_get_sector(blkif_request_t *breq, td_request_t treq) 76.326 +{ 76.327 + int seg, nsects; 76.328 + uint64_t sector_nr = breq->sector_number; 76.329 + 76.330 + for(seg=0; seg < treq.sidx; seg++) { 76.331 + nsects = breq->seg[seg].last_sect - breq->seg[seg].first_sect + 1; 76.332 + sector_nr += nsects; 76.333 + } 76.334 + 76.335 + return sector_nr; 76.336 +} 76.337 + 76.338 static void 76.339 __tapdisk_vbd_complete_td_request(td_vbd_t *vbd, td_vbd_request_t *vreq, 76.340 td_request_t treq, int res) 76.341 { 76.342 int err; 76.343 + td_image_t *image = treq.image; 76.344 76.345 err = (res <= 0 ? res : -res); 76.346 vbd->secs_pending -= treq.secs; 76.347 @@ -1283,6 +1461,22 @@ static void 76.348 (treq.op == TD_OP_WRITE ? "write" : "read"), 76.349 treq.secs, treq.sec); 76.350 } 76.351 + } else { 76.352 +#ifdef MEMSHR 76.353 + if (treq.op == TD_OP_READ 76.354 + && td_flag_test(image->flags, TD_OPEN_RDONLY)) { 76.355 + uint64_t hnd = treq.memshr_hnd; 76.356 + uint16_t uid = image->memshr_id; 76.357 + blkif_request_t *breq = &vreq->req; 76.358 + uint64_t sec = tapdisk_vbd_breq_get_sector(breq, treq); 76.359 + int secs = breq->seg[treq.sidx].last_sect - 76.360 + breq->seg[treq.sidx].first_sect + 1; 76.361 + 76.362 + if (hnd != 0) 76.363 + memshr_vbd_complete_ro_request(hnd, uid, 76.364 + sec, secs); 76.365 + } 76.366 +#endif 76.367 } 76.368 76.369 tapdisk_vbd_complete_vbd_request(vbd, vreq); 76.370 @@ -1335,7 +1529,28 @@ static void 76.371 break; 76.372 76.373 case TD_OP_READ: 76.374 - td_queue_read(parent, treq); 76.375 +#ifdef MEMSHR 76.376 + if(td_flag_test(parent->flags, TD_OPEN_RDONLY)) { 76.377 + int ret, seg = treq.sidx; 76.378 + blkif_request_t *breq = &vreq->req; 76.379 + 76.380 + ret = memshr_vbd_issue_ro_request(treq.buf, 76.381 + breq->seg[seg].gref, 76.382 + parent->memshr_id, 76.383 + treq.sec, 76.384 + treq.secs, 76.385 + &treq.memshr_hnd); 76.386 + if(ret == 0) { 76.387 + /* Reset memshr handle. This'll prevent 76.388 + * memshr_vbd_complete_ro_request being called 76.389 + */ 76.390 + treq.memshr_hnd = 0; 76.391 + td_complete_request(treq, 0); 76.392 + } else 76.393 + td_queue_read(parent, treq); 76.394 + } else 76.395 +#endif 76.396 + td_queue_read(parent, treq); 76.397 break; 76.398 } 76.399 76.400 @@ -1406,9 +1621,11 @@ tapdisk_vbd_issue_request(td_vbd_t *vbd, 76.401 gettimeofday(&vreq->last_try, NULL); 76.402 tapdisk_vbd_move_request(vreq, &vbd->pending_requests); 76.403 76.404 +#if 0 76.405 err = tapdisk_vbd_check_queue(vbd); 76.406 if (err) 76.407 goto fail; 76.408 +#endif 76.409 76.410 err = tapdisk_image_check_ring_request(image, req); 76.411 if (err)
77.1 --- a/tools/blktap2/drivers/tapdisk-vbd.h Mon Nov 02 19:35:54 2009 -0800 77.2 +++ b/tools/blktap2/drivers/tapdisk-vbd.h Fri Mar 19 18:36:57 2010 -0700 77.3 @@ -53,6 +53,7 @@ 77.4 77.5 typedef struct td_ring td_ring_t; 77.6 typedef struct td_vbd_request td_vbd_request_t; 77.7 +typedef struct td_vbd_driver_info td_vbd_driver_info_t; 77.8 typedef struct td_vbd_handle td_vbd_t; 77.9 typedef void (*td_vbd_cb_t) (void *, blkif_response_t *); 77.10 77.11 @@ -79,12 +80,20 @@ struct td_vbd_request { 77.12 struct list_head next; 77.13 }; 77.14 77.15 +struct td_vbd_driver_info { 77.16 + char *params; 77.17 + int type; 77.18 + struct list_head next; 77.19 +}; 77.20 + 77.21 struct td_vbd_handle { 77.22 char *name; 77.23 77.24 td_uuid_t uuid; 77.25 int type; 77.26 77.27 + struct list_head driver_stack; 77.28 + 77.29 int storage; 77.30 77.31 uint8_t reopened; 77.32 @@ -164,6 +173,7 @@ tapdisk_vbd_next_image(td_image_t *image 77.33 77.34 int tapdisk_vbd_initialize(int, int, td_uuid_t); 77.35 void tapdisk_vbd_set_callback(td_vbd_t *, td_vbd_cb_t, void *); 77.36 +int tapdisk_vbd_parse_stack(td_vbd_t *vbd, const char *path); 77.37 int tapdisk_vbd_open(td_vbd_t *, const char *, uint16_t, 77.38 uint16_t, const char *, td_flag_t); 77.39 int tapdisk_vbd_close(td_vbd_t *);
78.1 --- a/tools/blktap2/drivers/tapdisk.h Mon Nov 02 19:35:54 2009 -0800 78.2 +++ b/tools/blktap2/drivers/tapdisk.h Fri Mar 19 18:36:57 2010 -0700 78.3 @@ -131,6 +131,10 @@ struct td_request { 78.4 uint64_t id; 78.5 int sidx; 78.6 void *private; 78.7 + 78.8 +#ifdef MEMSHR 78.9 + uint64_t memshr_hnd; 78.10 +#endif 78.11 }; 78.12 78.13 /*
79.1 --- a/tools/blktap2/drivers/tapdisk2.c Mon Nov 02 19:35:54 2009 -0800 79.2 +++ b/tools/blktap2/drivers/tapdisk2.c Fri Mar 19 18:36:57 2010 -0700 79.3 @@ -34,6 +34,9 @@ 79.4 #include <sys/stat.h> 79.5 #include <sys/types.h> 79.6 #include <sys/ioctl.h> 79.7 +#ifdef MEMSHR 79.8 +#include <memshr.h> 79.9 +#endif 79.10 79.11 #include "tapdisk.h" 79.12 #include "blktap2.h" 79.13 @@ -264,6 +267,13 @@ tapdisk2_open_device(int type, const cha 79.14 return err; 79.15 } 79.16 79.17 + err = tapdisk_vbd_parse_stack(vbd, name); 79.18 + if (err) { 79.19 + CHILD_ERR(err, "vbd_parse_stack failed: %d\n", err); 79.20 + return err; 79.21 + } 79.22 + 79.23 + /* TODO: clean this up */ 79.24 err = tapdisk_vbd_open(vbd, path, type, 79.25 TAPDISK_STORAGE_TYPE_DEFAULT, 79.26 devname, 0); 79.27 @@ -404,13 +414,22 @@ main(int argc, char *argv[]) 79.28 79.29 params = NULL; 79.30 79.31 - while ((c = getopt(argc, argv, "n:h")) != -1) { 79.32 + while ((c = getopt(argc, argv, "n:s:h")) != -1) { 79.33 switch (c) { 79.34 case 'n': 79.35 params = optarg; 79.36 break; 79.37 case 'h': 79.38 usage(argv[0], 0); 79.39 + break; 79.40 + case 's': 79.41 +#ifdef MEMSHR 79.42 + memshr_set_domid(atoi(optarg)); 79.43 +#else 79.44 + fprintf(stderr, "MEMSHR support not compiled in.\n"); 79.45 + exit(EXIT_FAILURE); 79.46 +#endif 79.47 + break; 79.48 default: 79.49 usage(argv[0], EINVAL); 79.50 }
80.1 --- a/tools/blktap2/include/Makefile Mon Nov 02 19:35:54 2009 -0800 80.2 +++ b/tools/blktap2/include/Makefile Fri Mar 19 18:36:57 2010 -0700 80.3 @@ -1,4 +1,4 @@ 80.4 -XEN_ROOT := ../../../ 80.5 +XEN_ROOT := ../../.. 80.6 include $(XEN_ROOT)/tools/Rules.mk 80.7 80.8 .PHONY: all
81.1 --- a/tools/blktap2/include/blktaplib.h Mon Nov 02 19:35:54 2009 -0800 81.2 +++ b/tools/blktap2/include/blktaplib.h Fri Mar 19 18:36:57 2010 -0700 81.3 @@ -43,8 +43,9 @@ 81.4 #endif 81.5 81.6 #define EPRINTF(_f, _a...) syslog(LOG_ERR, "tap-err:%s: " _f, __func__, ##_a) 81.7 +#define PERROR(_f, _a...) EPRINTF(_f ": %s", ##_a, strerror(errno)) 81.8 81.9 -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, XC_PAGE_SIZE) 81.10 +#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, XC_PAGE_SIZE) 81.11 81.12 /* size of the extra VMA area to map in attached pages. */ 81.13 #define BLKTAP_VMA_PAGES BLK_RING_SIZE
82.1 --- a/tools/blktap2/include/list.h Mon Nov 02 19:35:54 2009 -0800 82.2 +++ b/tools/blktap2/include/list.h Fri Mar 19 18:36:57 2010 -0700 82.3 @@ -87,6 +87,26 @@ static inline int list_is_last(const str 82.4 return list->next == head; 82.5 } 82.6 82.7 +static inline void __list_splice(struct list_head *list, 82.8 + struct list_head *head) 82.9 +{ 82.10 + struct list_head *first = list->next; 82.11 + struct list_head *last = list->prev; 82.12 + struct list_head *at = head->next; 82.13 + 82.14 + first->prev = head; 82.15 + head->next = first; 82.16 + 82.17 + last->next = at; 82.18 + at->prev = last; 82.19 +} 82.20 + 82.21 +static inline void list_splice(struct list_head *list, struct list_head *head) 82.22 +{ 82.23 + if (!list_empty(list)) 82.24 + __list_splice(list, head); 82.25 +} 82.26 + 82.27 #define list_entry(ptr, type, member) \ 82.28 ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member))) 82.29
83.1 --- a/tools/blktap2/lvm/Makefile Mon Nov 02 19:35:54 2009 -0800 83.2 +++ b/tools/blktap2/lvm/Makefile Fri Mar 19 18:36:57 2010 -0700 83.3 @@ -1,5 +1,5 @@ 83.4 -XEN_ROOT = ../../../ 83.5 -BLKTAP_ROOT := ../ 83.6 +XEN_ROOT = ../../.. 83.7 +BLKTAP_ROOT := .. 83.8 include $(XEN_ROOT)/tools/Rules.mk 83.9 83.10 ifeq ($(LVM_UTIL_TEST),y)
84.1 --- a/tools/blktap2/vhd/Makefile Mon Nov 02 19:35:54 2009 -0800 84.2 +++ b/tools/blktap2/vhd/Makefile Fri Mar 19 18:36:57 2010 -0700 84.3 @@ -1,5 +1,5 @@ 84.4 -XEN_ROOT=../../../ 84.5 -BLKTAP_ROOT := ../ 84.6 +XEN_ROOT=../../.. 84.7 +BLKTAP_ROOT := .. 84.8 include $(XEN_ROOT)/tools/Rules.mk 84.9 84.10 SUBDIRS-y :=
85.1 --- a/tools/blktap2/vhd/lib/Makefile Mon Nov 02 19:35:54 2009 -0800 85.2 +++ b/tools/blktap2/vhd/lib/Makefile Fri Mar 19 18:36:57 2010 -0700 85.3 @@ -1,12 +1,12 @@ 85.4 -XEN_ROOT=../../../../ 85.5 -BLKTAP_ROOT := ../../ 85.6 +XEN_ROOT=../../../.. 85.7 +BLKTAP_ROOT := ../.. 85.8 include $(XEN_ROOT)/tools/Rules.mk 85.9 85.10 LIBVHD-MAJOR = 1.0 85.11 LIBVHD-MINOR = 0 85.12 LIBVHD-SONAME = libvhd.so.$(LIBVHD-MAJOR) 85.13 85.14 -LVM-UTIL-OBJ := $(BLKTAP_ROOT)lvm/lvm-util.o 85.15 +LVM-UTIL-OBJ := $(BLKTAP_ROOT)/lvm/lvm-util.o 85.16 85.17 LIBVHD-BUILD := libvhd.a 85.18
86.1 --- a/tools/blktap2/vhd/lib/libvhd.c Mon Nov 02 19:35:54 2009 -0800 86.2 +++ b/tools/blktap2/vhd/lib/libvhd.c Fri Mar 19 18:36:57 2010 -0700 86.3 @@ -36,6 +36,7 @@ 86.4 #include <libgen.h> 86.5 #include <iconv.h> 86.6 #include <sys/mman.h> 86.7 +#include <sys/stat.h> 86.8 86.9 #include "libvhd.h" 86.10 #include "relative-path.h"
87.1 --- a/tools/blktap2/vhd/lib/vhd-util-scan.c Mon Nov 02 19:35:54 2009 -0800 87.2 +++ b/tools/blktap2/vhd/lib/vhd-util-scan.c Fri Mar 19 18:36:57 2010 -0700 87.3 @@ -34,6 +34,7 @@ 87.4 #include <unistd.h> 87.5 #include <fnmatch.h> 87.6 #include <libgen.h> /* for basename() */ 87.7 +#include <sys/stat.h> 87.8 87.9 #include "list.h" 87.10 #include "libvhd.h"
88.1 --- a/tools/console/client/main.c Mon Nov 02 19:35:54 2009 -0800 88.2 +++ b/tools/console/client/main.c Fri Mar 19 18:36:57 2010 -0700 88.3 @@ -287,7 +287,13 @@ int main(int argc, char **argv) 88.4 exit(EINVAL); 88.5 } 88.6 } 88.7 - 88.8 + 88.9 + if (optind >= argc) { 88.10 + fprintf(stderr, "DOMID should be specified\n"); 88.11 + fprintf(stderr, "Try `%s --help' for more information.\n", 88.12 + argv[0]); 88.13 + exit(EINVAL); 88.14 + } 88.15 domid = strtol(argv[optind], &end, 10); 88.16 if (end && *end) { 88.17 fprintf(stderr, "Invalid DOMID `%s'\n", argv[optind]);
89.1 --- a/tools/examples/Makefile Mon Nov 02 19:35:54 2009 -0800 89.2 +++ b/tools/examples/Makefile Fri Mar 19 18:36:57 2010 -0700 89.3 @@ -1,4 +1,4 @@ 89.4 -XEN_ROOT = ../../ 89.5 +XEN_ROOT = ../.. 89.6 include $(XEN_ROOT)/tools/Rules.mk 89.7 89.8 # Init scripts.
90.1 --- a/tools/examples/xend-config.sxp Mon Nov 02 19:35:54 2009 -0800 90.2 +++ b/tools/examples/xend-config.sxp Fri Mar 19 18:36:57 2010 -0700 90.3 @@ -192,6 +192,16 @@ 90.4 # If enable-dom0-ballooning = no, dom0 will never balloon out. 90.5 (enable-dom0-ballooning yes) 90.6 90.7 +# 32-bit paravirtual domains can only consume physical 90.8 +# memory below 168GB. On systems with memory beyond that address, 90.9 +# they'll be confined to memory below 128GB. 90.10 +# Using total_available_memory (in GB) to specify the amount of memory reserved 90.11 +# in the memory pool exclusively for 32-bit paravirtual domains. 90.12 +# Additionally you should use dom0_mem = <-Value> as a parameter in 90.13 +# xen kernel to reserve the memory for 32-bit paravirtual domains, default 90.14 +# is "0" (0GB). 90.15 +(total_available_memory 0) 90.16 + 90.17 # In SMP system, dom0 will use dom0-cpus # of CPUS 90.18 # If dom0-cpus = 0, dom0 will take all cpus available 90.19 (dom0-cpus 0)
91.1 --- a/tools/examples/xmexample.hvm Mon Nov 02 19:35:54 2009 -0800 91.2 +++ b/tools/examples/xmexample.hvm Fri Mar 19 18:36:57 2010 -0700 91.3 @@ -25,6 +25,10 @@ memory = 128 91.4 # Should be at least 2KB per MB of domain memory, plus a few MB per vcpu. 91.5 # shadow_memory = 8 91.6 91.7 +# Whether to transparently share this domain's memory with other domains. 91.8 +# default = 0 91.9 +# memory_sharing = 0 91.10 + 91.11 # A name for your domain. All domains must have different names. 91.12 name = "ExampleHVMDomain" 91.13 91.14 @@ -178,11 +182,16 @@ stdvga=0 91.15 serial='pty' 91.16 91.17 #---------------------------------------------------------------------------- 91.18 -# tsc_native : TSC mode (0=emulate TSC, 1=native TSC) 91.19 +# tsc_mode : TSC mode (0=default, 1=native TSC, 2=never emulate, 3=pvrdtscp) 91.20 # emulate TSC provides synced TSC for all vcpus, but lose perfomrance. 91.21 # native TSC leverages hardware's TSC(no perf loss), but vcpu's TSC may lose 91.22 -# sync due to hardware's unreliable/unsynced TSC between CPUs. 91.23 -tsc_native=1 91.24 +# sync due to hardware's unreliable/unsynced TSC between CPUs. 91.25 +# default intelligently uses native TSC on machines where it is safe, but 91.26 +# switches to emulated if necessary after save/restore/migration 91.27 +# pvrdtscp is for intelligent apps that use special Xen-only paravirtualized 91.28 +# cpuid instructions to obtain offset/scaling/migration info and maximize 91.29 +# performance within pools of machines that support the rdtscp instruction 91.30 +tsc_mode=0 91.31 91.32 #----------------------------------------------------------------------------- 91.33 # Qemu Monitor, default is disable 91.34 @@ -335,6 +344,12 @@ tsc_native=1 91.35 # 91.36 #pci_power_mgmt=0 91.37 91.38 +# Enable graphics passthrough: 91.39 +# 91.40 +# If it's set, and specify grapchis device BDF in pci passthrough option, 91.41 +# like pci=['xx:xx.x'], it enables graphics passthrough, default=0 (disabled) 91.42 +#gfx_passthru=0 91.43 + 91.44 #----------------------------------------------------------------------------- 91.45 # Configure PVSCSI devices: 91.46 #
92.1 --- a/tools/firmware/hvmloader/acpi/Makefile Mon Nov 02 19:35:54 2009 -0800 92.2 +++ b/tools/firmware/hvmloader/acpi/Makefile Fri Mar 19 18:36:57 2010 -0700 92.3 @@ -18,8 +18,7 @@ 92.4 XEN_ROOT = ../../../.. 92.5 include $(XEN_ROOT)/tools/firmware/Rules.mk 92.6 92.7 -C_SRC = build.c dsdt.c static_tables.c 92.8 -H_SRC = $(wildcard *.h) 92.9 +C_SRC = build.c dsdt_anycpu.c dsdt_15cpu.c static_tables.c 92.10 OBJS = $(patsubst %.c,%.o,$(C_SRC)) 92.11 92.12 CFLAGS += -I. -I.. $(CFLAGS_include) 92.13 @@ -27,18 +26,20 @@ CFLAGS += -I. -I.. $(CFLAGS_include) 92.14 vpath iasl $(PATH) 92.15 all: acpi.a 92.16 92.17 -ssdt_pm.h ssdt_tpm.h: %.h: %.asl 92.18 - $(MAKE) iasl 92.19 - iasl -tc $< 92.20 - mv $*.hex $@ 92.21 - rm -f *.aml 92.22 +ssdt_pm.h ssdt_tpm.h: %.h: %.asl iasl 92.23 + iasl -p $* -tc $< 92.24 + sed -e 's/AmlCode/$*/g' $*.hex >$@ 92.25 + rm -f $*.hex $*.aml 92.26 92.27 -dsdt.c: dsdt.asl 92.28 - $(MAKE) iasl 92.29 - iasl -tc dsdt.asl 92.30 - mv dsdt.hex dsdt.c 92.31 - echo "int DsdtLen=sizeof(AmlCode);" >> dsdt.c 92.32 - rm -f *.aml 92.33 +# NB. awk invocation is a portable alternative to 'head -n -1' 92.34 +dsdt_15cpu.c dsdt_anycpu.c: %.c: dsdt.asl mk_dsdt.c iasl 92.35 + $(HOSTCC) $(HOSTCFLAGS) $(CFLAGS_include) -o mk_$* mk_dsdt.c 92.36 + awk 'NR > 1 {print s} {s=$$0}' $< >$*.asl 92.37 + ./mk_$* >>$*.asl 92.38 + iasl -p $* -tc $*.asl 92.39 + sed -e 's/AmlCode/$*/g' $*.hex >$@ 92.40 + echo "int $*_len=sizeof($*);" >>$@ 92.41 + rm -f $*.hex $*.aml $*.asl mk_$* 92.42 92.43 iasl: 92.44 @echo 92.45 @@ -48,14 +49,14 @@ iasl: 92.46 @echo 92.47 @exit 1 92.48 92.49 +build.o: ssdt_pm.h ssdt_tpm.h 92.50 + 92.51 acpi.a: $(OBJS) 92.52 $(AR) rc $@ $(OBJS) 92.53 92.54 -%.o: %.c $(H_SRC) 92.55 - $(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $< 92.56 - 92.57 clean: 92.58 rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz $(DEPS) 92.59 + rm -rf ssdt_*.h dsdt*.c *~ *.aml *.hex mk_dsdt mk_dsdt15 dsdt_*cpu.asl 92.60 92.61 install: all 92.62
93.1 --- a/tools/firmware/hvmloader/acpi/acpi2_0.h Mon Nov 02 19:35:54 2009 -0800 93.2 +++ b/tools/firmware/hvmloader/acpi/acpi2_0.h Fri Mar 19 18:36:57 2010 -0700 93.3 @@ -382,6 +382,7 @@ struct acpi_20_madt_intsrcovr { 93.4 #pragma pack () 93.5 93.6 void acpi_build_tables(void); 93.7 +extern uint32_t madt_csum_addr, madt_lapic0_addr; 93.8 93.9 #endif /* _ACPI_2_0_H_ */ 93.10
94.1 --- a/tools/firmware/hvmloader/acpi/build.c Mon Nov 02 19:35:54 2009 -0800 94.2 +++ b/tools/firmware/hvmloader/acpi/build.c Fri Mar 19 18:36:57 2010 -0700 94.3 @@ -25,13 +25,28 @@ 94.4 #define align16(sz) (((sz) + 15) & ~15) 94.5 #define fixed_strcpy(d, s) strncpy((d), (s), sizeof(d)) 94.6 94.7 +/* MADT parameters for filling in bios_info structure for DSDT. */ 94.8 +uint32_t madt_csum_addr, madt_lapic0_addr; 94.9 + 94.10 extern struct acpi_20_rsdp Rsdp; 94.11 extern struct acpi_20_rsdt Rsdt; 94.12 extern struct acpi_20_xsdt Xsdt; 94.13 extern struct acpi_20_fadt Fadt; 94.14 extern struct acpi_20_facs Facs; 94.15 -extern unsigned char AmlCode[]; 94.16 -extern int DsdtLen; 94.17 + 94.18 +/* 94.19 + * Alternative DSDTs we get linked against. A cover-all DSDT for up to the 94.20 + * implementation-defined maximum number of VCPUs, and an alternative for use 94.21 + * when a guest can only have up to 15 VCPUs. 94.22 + * 94.23 + * The latter is required for Windows 2000, which experiences a BSOD of 94.24 + * KMODE_EXCEPTION_NOT_HANDLED if it sees more than 15 processor objects. 94.25 + */ 94.26 +extern unsigned char dsdt_anycpu[], dsdt_15cpu; 94.27 +extern int dsdt_anycpu_len, dsdt_15cpu_len; 94.28 + 94.29 +/* Number of processor objects in the chosen DSDT. */ 94.30 +static unsigned int nr_processor_objects; 94.31 94.32 static void set_checksum( 94.33 void *table, uint32_t checksum_offset, uint32_t length) 94.34 @@ -111,7 +126,8 @@ static int construct_madt(struct acpi_20 94.35 offset += sizeof(*io_apic); 94.36 94.37 lapic = (struct acpi_20_madt_lapic *)(io_apic + 1); 94.38 - for ( i = 0; i < hvm_info->nr_vcpus; i++ ) 94.39 + madt_lapic0_addr = (uint32_t)lapic; 94.40 + for ( i = 0; i < nr_processor_objects; i++ ) 94.41 { 94.42 memset(lapic, 0, sizeof(*lapic)); 94.43 lapic->type = ACPI_PROCESSOR_LOCAL_APIC; 94.44 @@ -119,13 +135,16 @@ static int construct_madt(struct acpi_20 94.45 /* Processor ID must match processor-object IDs in the DSDT. */ 94.46 lapic->acpi_processor_id = i; 94.47 lapic->apic_id = LAPIC_ID(i); 94.48 - lapic->flags = ACPI_LOCAL_APIC_ENABLED; 94.49 + lapic->flags = ((i < hvm_info->nr_vcpus) && 94.50 + test_bit(i, hvm_info->vcpu_online) 94.51 + ? ACPI_LOCAL_APIC_ENABLED : 0); 94.52 offset += sizeof(*lapic); 94.53 lapic++; 94.54 } 94.55 94.56 madt->header.length = offset; 94.57 set_checksum(madt, offsetof(struct acpi_header, checksum), offset); 94.58 + madt_csum_addr = (uint32_t)&madt->header.checksum; 94.59 94.60 return align16(offset); 94.61 } 94.62 @@ -181,8 +200,8 @@ static int construct_secondary_tables(ui 94.63 if ( battery_port_exists() ) 94.64 { 94.65 table_ptrs[nr_tables++] = (unsigned long)&buf[offset]; 94.66 - memcpy(&buf[offset], AmlCode_PM, sizeof(AmlCode_PM)); 94.67 - offset += align16(sizeof(AmlCode_PM)); 94.68 + memcpy(&buf[offset], ssdt_pm, sizeof(ssdt_pm)); 94.69 + offset += align16(sizeof(ssdt_pm)); 94.70 } 94.71 94.72 /* TPM TCPA and SSDT. */ 94.73 @@ -191,9 +210,9 @@ static int construct_secondary_tables(ui 94.74 (tis_hdr[1] == tis_signature[1]) && 94.75 (tis_hdr[2] == tis_signature[2]) ) 94.76 { 94.77 - memcpy(&buf[offset], AmlCode_TPM, sizeof(AmlCode_TPM)); 94.78 + memcpy(&buf[offset], ssdt_tpm, sizeof(ssdt_tpm)); 94.79 table_ptrs[nr_tables++] = (unsigned long)&buf[offset]; 94.80 - offset += align16(sizeof(AmlCode_TPM)); 94.81 + offset += align16(sizeof(ssdt_tpm)); 94.82 94.83 tcpa = (struct acpi_20_tcpa *)&buf[offset]; 94.84 memset(tcpa, 0, sizeof(*tcpa)); 94.85 @@ -244,8 +263,18 @@ static void __acpi_build_tables(uint8_t 94.86 offset += align16(sizeof(struct acpi_20_facs)); 94.87 94.88 dsdt = (unsigned char *)&buf[offset]; 94.89 - memcpy(dsdt, &AmlCode, DsdtLen); 94.90 - offset += align16(DsdtLen); 94.91 + if ( hvm_info->nr_vcpus <= 15 ) 94.92 + { 94.93 + memcpy(dsdt, &dsdt_15cpu, dsdt_15cpu_len); 94.94 + offset += align16(dsdt_15cpu_len); 94.95 + nr_processor_objects = 15; 94.96 + } 94.97 + else 94.98 + { 94.99 + memcpy(dsdt, &dsdt_anycpu, dsdt_anycpu_len); 94.100 + offset += align16(dsdt_anycpu_len); 94.101 + nr_processor_objects = HVM_MAX_VCPUS; 94.102 + } 94.103 94.104 /* 94.105 * N.B. ACPI 1.0 operating systems may not handle FADT with revision 2
95.1 --- a/tools/firmware/hvmloader/acpi/dsdt.asl Mon Nov 02 19:35:54 2009 -0800 95.2 +++ b/tools/firmware/hvmloader/acpi/dsdt.asl Fri Mar 19 18:36:57 2010 -0700 95.3 @@ -27,31 +27,6 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 95.4 Name (\APCL, 0x00010000) 95.5 Name (\PUID, 0x00) 95.6 95.7 - Scope (\_PR) 95.8 - { 95.9 - Processor (PR00, 0x00, 0x0000, 0x00) {} 95.10 - Processor (PR01, 0x01, 0x0000, 0x00) {} 95.11 - Processor (PR02, 0x02, 0x0000, 0x00) {} 95.12 - Processor (PR03, 0x03, 0x0000, 0x00) {} 95.13 - Processor (PR04, 0x04, 0x0000, 0x00) {} 95.14 - Processor (PR05, 0x05, 0x0000, 0x00) {} 95.15 - Processor (PR06, 0x06, 0x0000, 0x00) {} 95.16 - Processor (PR07, 0x07, 0x0000, 0x00) {} 95.17 - Processor (PR08, 0x08, 0x0000, 0x00) {} 95.18 - Processor (PR09, 0x09, 0x0000, 0x00) {} 95.19 - Processor (PR0A, 0x0a, 0x0000, 0x00) {} 95.20 - Processor (PR0B, 0x0b, 0x0000, 0x00) {} 95.21 - Processor (PR0C, 0x0c, 0x0000, 0x00) {} 95.22 - Processor (PR0D, 0x0d, 0x0000, 0x00) {} 95.23 - Processor (PR0E, 0x0e, 0x0000, 0x00) {} 95.24 - /* No more than 15 Processor objects, as otherwise Windows 2000 95.25 - * experiences a BSOD of KMODE_EXCEPTION_NOT_HANDLED. If we require 95.26 - * more in some configurations then we should move \_PR scope into a 95.27 - * SSDT, statically compiled with a range of different numbers of 95.28 - * processors. We can then link the appropriate one into the RSDT/XSDT 95.29 - * at HVM guest boot time. */ 95.30 - } 95.31 - 95.32 /* 95.33 * S3 (suspend-to-ram), S4 (suspend-to-disc) and S5 (power-off) type codes: 95.34 * must match piix4 emulation. 95.35 @@ -87,14 +62,17 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 95.36 Scope (\_SB) 95.37 { 95.38 /* BIOS_INFO_PHYSICAL_ADDRESS == 0xEA000 */ 95.39 - OperationRegion(BIOS, SystemMemory, 0xEA000, 16) 95.40 + OperationRegion(BIOS, SystemMemory, 0xEA000, 24) 95.41 Field(BIOS, ByteAcc, NoLock, Preserve) { 95.42 UAR1, 1, 95.43 UAR2, 1, 95.44 + LTP1, 1, 95.45 HPET, 1, 95.46 Offset(4), 95.47 PMIN, 32, 95.48 - PLEN, 32 95.49 + PLEN, 32, 95.50 + MSUA, 32, /* MADT checksum address */ 95.51 + MAPA, 32 /* MADT LAPIC0 address */ 95.52 } 95.53 95.54 /* Fix HCT test for 0x400 pci memory: 95.55 @@ -136,6 +114,25 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 95.56 }) 95.57 } 95.58 95.59 + /* Make cirrues VGA S3 suspend/resume work in Windows XP/2003 */ 95.60 + Device (VGA) 95.61 + { 95.62 + Name (_ADR, 0x00020000) 95.63 + 95.64 + Method (_S1D, 0, NotSerialized) 95.65 + { 95.66 + Return (0x00) 95.67 + } 95.68 + Method (_S2D, 0, NotSerialized) 95.69 + { 95.70 + Return (0x00) 95.71 + } 95.72 + Method (_S3D, 0, NotSerialized) 95.73 + { 95.74 + Return (0x00) 95.75 + } 95.76 + } 95.77 + 95.78 Method (_CRS, 0, NotSerialized) 95.79 { 95.80 Name (PRT0, ResourceTemplate () 95.81 @@ -199,157 +196,6 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 95.82 Return (PRT0) 95.83 } 95.84 95.85 - Name(BUFA, ResourceTemplate() { 95.86 - IRQ(Level, ActiveLow, Shared) { 5, 10, 11 } 95.87 - }) 95.88 - 95.89 - Name(BUFB, Buffer() { 95.90 - 0x23, 0x00, 0x00, 0x18, /* IRQ descriptor */ 95.91 - 0x79, 0 /* End tag, null checksum */ 95.92 - }) 95.93 - 95.94 - CreateWordField(BUFB, 0x01, IRQV) 95.95 - 95.96 - Device(LNKA) { 95.97 - Name(_HID, EISAID("PNP0C0F")) /* PCI interrupt link */ 95.98 - Name(_UID, 1) 95.99 - 95.100 - Method(_STA, 0) { 95.101 - And(PIRA, 0x80, Local0) 95.102 - If(LEqual(Local0, 0x80)) { 95.103 - Return(0x09) 95.104 - } Else { 95.105 - Return(0x0B) 95.106 - } 95.107 - } 95.108 - 95.109 - Method(_PRS) { 95.110 - Return(BUFA) 95.111 - } 95.112 - 95.113 - Method(_DIS) { 95.114 - Or(PIRA, 0x80, PIRA) 95.115 - } 95.116 - 95.117 - Method(_CRS) { 95.118 - And(PIRA, 0x0f, Local0) 95.119 - ShiftLeft(0x1, Local0, IRQV) 95.120 - Return(BUFB) 95.121 - } 95.122 - 95.123 - Method(_SRS, 1) { 95.124 - CreateWordField(ARG0, 0x01, IRQ1) 95.125 - FindSetRightBit(IRQ1, Local0) 95.126 - Decrement(Local0) 95.127 - Store(Local0, PIRA) 95.128 - } 95.129 - } 95.130 - 95.131 - Device(LNKB) { 95.132 - Name(_HID, EISAID("PNP0C0F")) /* PCI interrupt link */ 95.133 - Name(_UID, 2) 95.134 - 95.135 - Method(_STA, 0) { 95.136 - And(PIRB, 0x80, Local0) 95.137 - If(LEqual(Local0, 0x80)) { 95.138 - Return(0x09) 95.139 - } Else { 95.140 - Return(0x0B) 95.141 - } 95.142 - } 95.143 - 95.144 - Method(_PRS) { 95.145 - Return(BUFA) 95.146 - } 95.147 - 95.148 - Method(_DIS) { 95.149 - Or(PIRB, 0x80, PIRB) 95.150 - } 95.151 - 95.152 - Method(_CRS) { 95.153 - And(PIRB, 0x0f, Local0) 95.154 - ShiftLeft(0x1, Local0, IRQV) 95.155 - Return(BUFB) 95.156 - } 95.157 - 95.158 - Method(_SRS, 1) { 95.159 - CreateWordField(ARG0, 0x01, IRQ1) 95.160 - FindSetRightBit(IRQ1, Local0) 95.161 - Decrement(Local0) 95.162 - Store(Local0, PIRB) 95.163 - } 95.164 - } 95.165 - 95.166 - Device(LNKC) { 95.167 - Name(_HID, EISAID("PNP0C0F")) /* PCI interrupt link */ 95.168 - Name(_UID, 3) 95.169 - 95.170 - Method(_STA, 0) { 95.171 - And(PIRC, 0x80, Local0) 95.172 - If(LEqual(Local0, 0x80)) { 95.173 - Return(0x09) 95.174 - } Else { 95.175 - Return(0x0B) 95.176 - } 95.177 - } 95.178 - 95.179 - Method(_PRS) { 95.180 - Return(BUFA) 95.181 - } 95.182 - 95.183 - Method(_DIS) { 95.184 - Or(PIRC, 0x80, PIRC) 95.185 - } 95.186 - 95.187 - Method(_CRS) { 95.188 - And(PIRC, 0x0f, Local0) 95.189 - ShiftLeft(0x1, Local0, IRQV) 95.190 - Return(BUFB) 95.191 - } 95.192 - 95.193 - Method(_SRS, 1) { 95.194 - CreateWordField(ARG0, 0x01, IRQ1) 95.195 - FindSetRightBit(IRQ1, Local0) 95.196 - Decrement(Local0) 95.197 - Store(Local0, PIRC) 95.198 - } 95.199 - } 95.200 - 95.201 - Device(LNKD) { 95.202 - Name(_HID, EISAID("PNP0C0F")) /* PCI interrupt link */ 95.203 - Name(_UID, 4) 95.204 - 95.205 - Method(_STA, 0) { 95.206 - And(PIRD, 0x80, Local0) 95.207 - If(LEqual(Local0, 0x80)) { 95.208 - Return(0x09) 95.209 - } Else { 95.210 - Return(0x0B) 95.211 - } 95.212 - } 95.213 - 95.214 - Method(_PRS) { 95.215 - Return(BUFA) 95.216 - } 95.217 - 95.218 - Method(_DIS) { 95.219 - Or(PIRD, 0x80, PIRD) 95.220 - } 95.221 - 95.222 - Method(_CRS) { 95.223 - And(PIRD, 0x0f, Local0) 95.224 - ShiftLeft(0x1, Local0, IRQV) 95.225 - Return(BUFB) 95.226 - } 95.227 - 95.228 - Method(_SRS, 1) { 95.229 - CreateWordField(ARG0, 0x01, IRQ1) 95.230 - FindSetRightBit(IRQ1, Local0) 95.231 - Decrement(Local0) 95.232 - Store(Local0, PIRD) 95.233 - } 95.234 - } 95.235 - 95.236 Device(HPET) { 95.237 Name(_HID, EISAID("PNP0103")) 95.238 Name(_UID, 0) 95.239 @@ -373,389 +219,6 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 95.240 }) 95.241 } 95.242 95.243 - Method(_PRT,0) { 95.244 - If(PICD) { 95.245 - Return(PRTA) 95.246 - } 95.247 - Return (PRTP) 95.248 - } 95.249 - 95.250 - Name(PRTP, Package() { 95.251 - /* Device 1, INTA - INTD */ 95.252 - Package(){0x0001ffff, 0, \_SB.PCI0.LNKB, 0}, 95.253 - Package(){0x0001ffff, 1, \_SB.PCI0.LNKC, 0}, 95.254 - Package(){0x0001ffff, 2, \_SB.PCI0.LNKD, 0}, 95.255 - Package(){0x0001ffff, 3, \_SB.PCI0.LNKA, 0}, 95.256 - 95.257 - /* Device 2, INTA - INTD */ 95.258 - Package(){0x0002ffff, 0, \_SB.PCI0.LNKC, 0}, 95.259 - Package(){0x0002ffff, 1, \_SB.PCI0.LNKD, 0}, 95.260 - Package(){0x0002ffff, 2, \_SB.PCI0.LNKA, 0}, 95.261 - Package(){0x0002ffff, 3, \_SB.PCI0.LNKB, 0}, 95.262 - 95.263 - /* Device 3, INTA - INTD */ 95.264 - Package(){0x0003ffff, 0, \_SB.PCI0.LNKD, 0}, 95.265 - Package(){0x0003ffff, 1, \_SB.PCI0.LNKA, 0}, 95.266 - Package(){0x0003ffff, 2, \_SB.PCI0.LNKB, 0}, 95.267 - Package(){0x0003ffff, 3, \_SB.PCI0.LNKC, 0}, 95.268 - 95.269 - /* Device 4, INTA - INTD */ 95.270 - Package(){0x0004ffff, 0, \_SB.PCI0.LNKA, 0}, 95.271 - Package(){0x0004ffff, 1, \_SB.PCI0.LNKB, 0}, 95.272 - Package(){0x0004ffff, 2, \_SB.PCI0.LNKC, 0}, 95.273 - Package(){0x0004ffff, 3, \_SB.PCI0.LNKD, 0}, 95.274 - 95.275 - /* Device 5, INTA - INTD */ 95.276 - Package(){0x0005ffff, 0, \_SB.PCI0.LNKB, 0}, 95.277 - Package(){0x0005ffff, 1, \_SB.PCI0.LNKC, 0}, 95.278 - Package(){0x0005ffff, 2, \_SB.PCI0.LNKD, 0}, 95.279 - Package(){0x0005ffff, 3, \_SB.PCI0.LNKA, 0}, 95.280 - 95.281 - /* Device 6, INTA - INTD */ 95.282 - Package(){0x0006ffff, 0, \_SB.PCI0.LNKC, 0}, 95.283 - Package(){0x0006ffff, 1, \_SB.PCI0.LNKD, 0}, 95.284 - Package(){0x0006ffff, 2, \_SB.PCI0.LNKA, 0}, 95.285 - Package(){0x0006ffff, 3, \_SB.PCI0.LNKB, 0}, 95.286 - 95.287 - /* Device 7, INTA - INTD */ 95.288 - Package(){0x0007ffff, 0, \_SB.PCI0.LNKD, 0}, 95.289 - Package(){0x0007ffff, 1, \_SB.PCI0.LNKA, 0}, 95.290 - Package(){0x0007ffff, 2, \_SB.PCI0.LNKB, 0}, 95.291 - Package(){0x0007ffff, 3, \_SB.PCI0.LNKC, 0}, 95.292 - 95.293 - /* Device 8, INTA - INTD */ 95.294 - Package(){0x0008ffff, 0, \_SB.PCI0.LNKA, 0}, 95.295 - Package(){0x0008ffff, 1, \_SB.PCI0.LNKB, 0}, 95.296 - Package(){0x0008ffff, 2, \_SB.PCI0.LNKC, 0}, 95.297 - Package(){0x0008ffff, 3, \_SB.PCI0.LNKD, 0}, 95.298 - 95.299 - /* Device 9, INTA - INTD */ 95.300 - Package(){0x0009ffff, 0, \_SB.PCI0.LNKB, 0}, 95.301 - Package(){0x0009ffff, 1, \_SB.PCI0.LNKC, 0}, 95.302 - Package(){0x0009ffff, 2, \_SB.PCI0.LNKD, 0}, 95.303 - Package(){0x0009ffff, 3, \_SB.PCI0.LNKA, 0}, 95.304 - 95.305 - /* Device 10, INTA - INTD */ 95.306 - Package(){0x000affff, 0, \_SB.PCI0.LNKC, 0}, 95.307 - Package(){0x000affff, 1, \_SB.PCI0.LNKD, 0}, 95.308 - Package(){0x000affff, 2, \_SB.PCI0.LNKA, 0}, 95.309 - Package(){0x000affff, 3, \_SB.PCI0.LNKB, 0}, 95.310 - 95.311 - /* Device 11, INTA - INTD */ 95.312 - Package(){0x000bffff, 0, \_SB.PCI0.LNKD, 0}, 95.313 - Package(){0x000bffff, 1, \_SB.PCI0.LNKA, 0}, 95.314 - Package(){0x000bffff, 2, \_SB.PCI0.LNKB, 0}, 95.315 - Package(){0x000bffff, 3, \_SB.PCI0.LNKC, 0}, 95.316 - 95.317 - /* Device 12, INTA - INTD */ 95.318 - Package(){0x000cffff, 0, \_SB.PCI0.LNKA, 0}, 95.319 - Package(){0x000cffff, 1, \_SB.PCI0.LNKB, 0}, 95.320 - Package(){0x000cffff, 2, \_SB.PCI0.LNKC, 0}, 95.321 - Package(){0x000cffff, 3, \_SB.PCI0.LNKD, 0}, 95.322 - 95.323 - /* Device 13, INTA - INTD */ 95.324 - Package(){0x000dffff, 0, \_SB.PCI0.LNKB, 0}, 95.325 - Package(){0x000dffff, 1, \_SB.PCI0.LNKC, 0}, 95.326 - Package(){0x000dffff, 2, \_SB.PCI0.LNKD, 0}, 95.327 - Package(){0x000dffff, 3, \_SB.PCI0.LNKA, 0}, 95.328 - 95.329 - /* Device 14, INTA - INTD */ 95.330 - Package(){0x000effff, 0, \_SB.PCI0.LNKC, 0}, 95.331 - Package(){0x000effff, 1, \_SB.PCI0.LNKD, 0}, 95.332 - Package(){0x000effff, 2, \_SB.PCI0.LNKA, 0}, 95.333 - Package(){0x000effff, 3, \_SB.PCI0.LNKB, 0}, 95.334 - 95.335 - /* Device 15, INTA - INTD */ 95.336 - Package(){0x000fffff, 0, \_SB.PCI0.LNKD, 0}, 95.337 - Package(){0x000fffff, 1, \_SB.PCI0.LNKA, 0}, 95.338 - Package(){0x000fffff, 2, \_SB.PCI0.LNKB, 0}, 95.339 - Package(){0x000fffff, 3, \_SB.PCI0.LNKC, 0}, 95.340 - 95.341 - /* Device 16, INTA - INTD */ 95.342 - Package(){0x0010ffff, 0, \_SB.PCI0.LNKA, 0}, 95.343 - Package(){0x0010ffff, 1, \_SB.PCI0.LNKB, 0}, 95.344 - Package(){0x0010ffff, 2, \_SB.PCI0.LNKC, 0}, 95.345 - Package(){0x0010ffff, 3, \_SB.PCI0.LNKD, 0}, 95.346 - 95.347 - /* Device 17, INTA - INTD */ 95.348 - Package(){0x0011ffff, 0, \_SB.PCI0.LNKB, 0}, 95.349 - Package(){0x0011ffff, 1, \_SB.PCI0.LNKC, 0}, 95.350 - Package(){0x0011ffff, 2, \_SB.PCI0.LNKD, 0}, 95.351 - Package(){0x0011ffff, 3, \_SB.PCI0.LNKA, 0}, 95.352 - 95.353 - /* Device 18, INTA - INTD */ 95.354 - Package(){0x0012ffff, 0, \_SB.PCI0.LNKC, 0}, 95.355 - Package(){0x0012ffff, 1, \_SB.PCI0.LNKD, 0}, 95.356 - Package(){0x0012ffff, 2, \_SB.PCI0.LNKA, 0}, 95.357 - Package(){0x0012ffff, 3, \_SB.PCI0.LNKB, 0}, 95.358 - 95.359 - /* Device 19, INTA - INTD */ 95.360 - Package(){0x0013ffff, 0, \_SB.PCI0.LNKD, 0}, 95.361 - Package(){0x0013ffff, 1, \_SB.PCI0.LNKA, 0}, 95.362 - Package(){0x0013ffff, 2, \_SB.PCI0.LNKB, 0}, 95.363 - Package(){0x0013ffff, 3, \_SB.PCI0.LNKC, 0}, 95.364 - 95.365 - /* Device 20, INTA - INTD */ 95.366 - Package(){0x0014ffff, 0, \_SB.PCI0.LNKA, 0}, 95.367 - Package(){0x0014ffff, 1, \_SB.PCI0.LNKB, 0}, 95.368 - Package(){0x0014ffff, 2, \_SB.PCI0.LNKC, 0}, 95.369 - Package(){0x0014ffff, 3, \_SB.PCI0.LNKD, 0}, 95.370 - 95.371 - /* Device 21, INTA - INTD */ 95.372 - Package(){0x0015ffff, 0, \_SB.PCI0.LNKB, 0}, 95.373 - Package(){0x0015ffff, 1, \_SB.PCI0.LNKC, 0}, 95.374 - Package(){0x0015ffff, 2, \_SB.PCI0.LNKD, 0}, 95.375 - Package(){0x0015ffff, 3, \_SB.PCI0.LNKA, 0}, 95.376 - 95.377 - /* Device 22, INTA - INTD */ 95.378 - Package(){0x0016ffff, 0, \_SB.PCI0.LNKC, 0}, 95.379 - Package(){0x0016ffff, 1, \_SB.PCI0.LNKD, 0}, 95.380 - Package(){0x0016ffff, 2, \_SB.PCI0.LNKA, 0}, 95.381 - Package(){0x0016ffff, 3, \_SB.PCI0.LNKB, 0}, 95.382 - 95.383 - /* Device 23, INTA - INTD */ 95.384 - Package(){0x0017ffff, 0, \_SB.PCI0.LNKD, 0}, 95.385 - Package(){0x0017ffff, 1, \_SB.PCI0.LNKA, 0}, 95.386 - Package(){0x0017ffff, 2, \_SB.PCI0.LNKB, 0}, 95.387 - Package(){0x0017ffff, 3, \_SB.PCI0.LNKC, 0}, 95.388 - 95.389 - /* Device 24, INTA - INTD */ 95.390 - Package(){0x0018ffff, 0, \_SB.PCI0.LNKA, 0}, 95.391 - Package(){0x0018ffff, 1, \_SB.PCI0.LNKB, 0}, 95.392 - Package(){0x0018ffff, 2, \_SB.PCI0.LNKC, 0}, 95.393 - Package(){0x0018ffff, 3, \_SB.PCI0.LNKD, 0}, 95.394 - 95.395 - /* Device 25, INTA - INTD */ 95.396 - Package(){0x0019ffff, 0, \_SB.PCI0.LNKB, 0}, 95.397 - Package(){0x0019ffff, 1, \_SB.PCI0.LNKC, 0}, 95.398 - Package(){0x0019ffff, 2, \_SB.PCI0.LNKD, 0}, 95.399 - Package(){0x0019ffff, 3, \_SB.PCI0.LNKA, 0}, 95.400 - 95.401 - /* Device 26, INTA - INTD */ 95.402 - Package(){0x001affff, 0, \_SB.PCI0.LNKC, 0}, 95.403 - Package(){0x001affff, 1, \_SB.PCI0.LNKD, 0}, 95.404 - Package(){0x001affff, 2, \_SB.PCI0.LNKA, 0}, 95.405 - Package(){0x001affff, 3, \_SB.PCI0.LNKB, 0}, 95.406 - 95.407 - /* Device 27, INTA - INTD */ 95.408 - Package(){0x001bffff, 0, \_SB.PCI0.LNKD, 0}, 95.409 - Package(){0x001bffff, 1, \_SB.PCI0.LNKA, 0}, 95.410 - Package(){0x001bffff, 2, \_SB.PCI0.LNKB, 0}, 95.411 - Package(){0x001bffff, 3, \_SB.PCI0.LNKC, 0}, 95.412 - 95.413 - /* Device 28, INTA - INTD */ 95.414 - Package(){0x001cffff, 0, \_SB.PCI0.LNKA, 0}, 95.415 - Package(){0x001cffff, 1, \_SB.PCI0.LNKB, 0}, 95.416 - Package(){0x001cffff, 2, \_SB.PCI0.LNKC, 0}, 95.417 - Package(){0x001cffff, 3, \_SB.PCI0.LNKD, 0}, 95.418 - 95.419 - /* Device 29, INTA - INTD */ 95.420 - Package(){0x001dffff, 0, \_SB.PCI0.LNKB, 0}, 95.421 - Package(){0x001dffff, 1, \_SB.PCI0.LNKC, 0}, 95.422 - Package(){0x001dffff, 2, \_SB.PCI0.LNKD, 0}, 95.423 - Package(){0x001dffff, 3, \_SB.PCI0.LNKA, 0}, 95.424 - 95.425 - /* Device 30, INTA - INTD */ 95.426 - Package(){0x001effff, 0, \_SB.PCI0.LNKC, 0}, 95.427 - Package(){0x001effff, 1, \_SB.PCI0.LNKD, 0}, 95.428 - Package(){0x001effff, 2, \_SB.PCI0.LNKA, 0}, 95.429 - Package(){0x001effff, 3, \_SB.PCI0.LNKB, 0}, 95.430 - 95.431 - /* Device 31, INTA - INTD */ 95.432 - Package(){0x001fffff, 0, \_SB.PCI0.LNKD, 0}, 95.433 - Package(){0x001fffff, 1, \_SB.PCI0.LNKA, 0}, 95.434 - Package(){0x001fffff, 2, \_SB.PCI0.LNKB, 0}, 95.435 - Package(){0x001fffff, 3, \_SB.PCI0.LNKC, 0}, 95.436 - }) 95.437 - 95.438 - Name(PRTA, Package() { 95.439 - /* Device 1, INTA - INTD */ 95.440 - Package(){0x0001ffff, 0, 0, 20}, 95.441 - Package(){0x0001ffff, 1, 0, 21}, 95.442 - Package(){0x0001ffff, 2, 0, 22}, 95.443 - Package(){0x0001ffff, 3, 0, 23}, 95.444 - 95.445 - /* Device 2, INTA - INTD */ 95.446 - Package(){0x0002ffff, 0, 0, 24}, 95.447 - Package(){0x0002ffff, 1, 0, 25}, 95.448 - Package(){0x0002ffff, 2, 0, 26}, 95.449 - Package(){0x0002ffff, 3, 0, 27}, 95.450 - 95.451 - /* Device 3, INTA - INTD */ 95.452 - Package(){0x0003ffff, 0, 0, 28}, 95.453 - Package(){0x0003ffff, 1, 0, 29}, 95.454 - Package(){0x0003ffff, 2, 0, 30}, 95.455 - Package(){0x0003ffff, 3, 0, 31}, 95.456 - 95.457 - /* Device 4, INTA - INTD */ 95.458 - Package(){0x0004ffff, 0, 0, 32}, 95.459 - Package(){0x0004ffff, 1, 0, 33}, 95.460 - Package(){0x0004ffff, 2, 0, 34}, 95.461 - Package(){0x0004ffff, 3, 0, 35}, 95.462 - 95.463 - /* Device 5, INTA - INTD */ 95.464 - Package(){0x0005ffff, 0, 0, 36}, 95.465 - Package(){0x0005ffff, 1, 0, 37}, 95.466 - Package(){0x0005ffff, 2, 0, 38}, 95.467 - Package(){0x0005ffff, 3, 0, 39}, 95.468 - 95.469 - /* Device 6, INTA - INTD */ 95.470 - Package(){0x0006ffff, 0, 0, 40}, 95.471 - Package(){0x0006ffff, 1, 0, 41}, 95.472 - Package(){0x0006ffff, 2, 0, 42}, 95.473 - Package(){0x0006ffff, 3, 0, 43}, 95.474 - 95.475 - /* Device 7, INTA - INTD */ 95.476 - Package(){0x0007ffff, 0, 0, 44}, 95.477 - Package(){0x0007ffff, 1, 0, 45}, 95.478 - Package(){0x0007ffff, 2, 0, 46}, 95.479 - Package(){0x0007ffff, 3, 0, 47}, 95.480 - 95.481 - /* Device 8, INTA - INTD */ 95.482 - Package(){0x0008ffff, 0, 0, 17}, 95.483 - Package(){0x0008ffff, 1, 0, 18}, 95.484 - Package(){0x0008ffff, 2, 0, 19}, 95.485 - Package(){0x0008ffff, 3, 0, 20}, 95.486 - 95.487 - /* Device 9, INTA - INTD */ 95.488 - Package(){0x0009ffff, 0, 0, 21}, 95.489 - Package(){0x0009ffff, 1, 0, 22}, 95.490 - Package(){0x0009ffff, 2, 0, 23}, 95.491 - Package(){0x0009ffff, 3, 0, 24}, 95.492 - 95.493 - /* Device 10, INTA - INTD */ 95.494 - Package(){0x000affff, 0, 0, 25}, 95.495 - Package(){0x000affff, 1, 0, 26}, 95.496 - Package(){0x000affff, 2, 0, 27}, 95.497 - Package(){0x000affff, 3, 0, 28}, 95.498 - 95.499 - /* Device 11, INTA - INTD */ 95.500 - Package(){0x000bffff, 0, 0, 29}, 95.501 - Package(){0x000bffff, 1, 0, 30}, 95.502 - Package(){0x000bffff, 2, 0, 31}, 95.503 - Package(){0x000bffff, 3, 0, 32}, 95.504 - 95.505 - /* Device 12, INTA - INTD */ 95.506 - Package(){0x000cffff, 0, 0, 33}, 95.507 - Package(){0x000cffff, 1, 0, 34}, 95.508 - Package(){0x000cffff, 2, 0, 35}, 95.509 - Package(){0x000cffff, 3, 0, 36}, 95.510 - 95.511 - /* Device 13, INTA - INTD */ 95.512 - Package(){0x000dffff, 0, 0, 37}, 95.513 - Package(){0x000dffff, 1, 0, 38}, 95.514 - Package(){0x000dffff, 2, 0, 39}, 95.515 - Package(){0x000dffff, 3, 0, 40}, 95.516 - 95.517 - /* Device 14, INTA - INTD */ 95.518 - Package(){0x000effff, 0, 0, 41}, 95.519 - Package(){0x000effff, 1, 0, 42}, 95.520 - Package(){0x000effff, 2, 0, 43}, 95.521 - Package(){0x000effff, 3, 0, 44}, 95.522 - 95.523 - /* Device 15, INTA - INTD */ 95.524 - Package(){0x000fffff, 0, 0, 45}, 95.525 - Package(){0x000fffff, 1, 0, 46}, 95.526 - Package(){0x000fffff, 2, 0, 47}, 95.527 - Package(){0x000fffff, 3, 0, 16}, 95.528 - 95.529 - /* Device 16, INTA - INTD */ 95.530 - Package(){0x0010ffff, 0, 0, 18}, 95.531 - Package(){0x0010ffff, 1, 0, 19}, 95.532 - Package(){0x0010ffff, 2, 0, 20}, 95.533 - Package(){0x0010ffff, 3, 0, 21}, 95.534 - 95.535 - /* Device 17, INTA - INTD */ 95.536 - Package(){0x0011ffff, 0, 0, 22}, 95.537 - Package(){0x0011ffff, 1, 0, 23}, 95.538 - Package(){0x0011ffff, 2, 0, 24}, 95.539 - Package(){0x0011ffff, 3, 0, 25}, 95.540 - 95.541 - /* Device 18, INTA - INTD */ 95.542 - Package(){0x0012ffff, 0, 0, 26}, 95.543 - Package(){0x0012ffff, 1, 0, 27}, 95.544 - Package(){0x0012ffff, 2, 0, 28}, 95.545 - Package(){0x0012ffff, 3, 0, 29}, 95.546 - 95.547 - /* Device 19, INTA - INTD */ 95.548 - Package(){0x0013ffff, 0, 0, 30}, 95.549 - Package(){0x0013ffff, 1, 0, 31}, 95.550 - Package(){0x0013ffff, 2, 0, 32}, 95.551 - Package(){0x0013ffff, 3, 0, 33}, 95.552 - 95.553 - /* Device 20, INTA - INTD */ 95.554 - Package(){0x0014ffff, 0, 0, 34}, 95.555 - Package(){0x0014ffff, 1, 0, 35}, 95.556 - Package(){0x0014ffff, 2, 0, 36}, 95.557 - Package(){0x0014ffff, 3, 0, 37}, 95.558 - 95.559 - /* Device 21, INTA - INTD */ 95.560 - Package(){0x0015ffff, 0, 0, 38}, 95.561 - Package(){0x0015ffff, 1, 0, 39}, 95.562 - Package(){0x0015ffff, 2, 0, 40}, 95.563 - Package(){0x0015ffff, 3, 0, 41}, 95.564 - 95.565 - /* Device 22, INTA - INTD */ 95.566 - Package(){0x0016ffff, 0, 0, 42}, 95.567 - Package(){0x0016ffff, 1, 0, 43}, 95.568 - Package(){0x0016ffff, 2, 0, 44}, 95.569 - Package(){0x0016ffff, 3, 0, 45}, 95.570 - 95.571 - /* Device 23, INTA - INTD */ 95.572 - Package(){0x0017ffff, 0, 0, 46}, 95.573 - Package(){0x0017ffff, 1, 0, 47}, 95.574 - Package(){0x0017ffff, 2, 0, 16}, 95.575 - Package(){0x0017ffff, 3, 0, 17}, 95.576 - 95.577 - /* Device 24, INTA - INTD */ 95.578 - Package(){0x0018ffff, 0, 0, 19}, 95.579 - Package(){0x0018ffff, 1, 0, 20}, 95.580 - Package(){0x0018ffff, 2, 0, 21}, 95.581 - Package(){0x0018ffff, 3, 0, 22}, 95.582 - 95.583 - /* Device 25, INTA - INTD */ 95.584 - Package(){0x0019ffff, 0, 0, 23}, 95.585 - Package(){0x0019ffff, 1, 0, 24}, 95.586 - Package(){0x0019ffff, 2, 0, 25}, 95.587 - Package(){0x0019ffff, 3, 0, 26}, 95.588 - 95.589 - /* Device 26, INTA - INTD */ 95.590 - Package(){0x001affff, 0, 0, 27}, 95.591 - Package(){0x001affff, 1, 0, 28}, 95.592 - Package(){0x001affff, 2, 0, 29}, 95.593 - Package(){0x001affff, 3, 0, 30}, 95.594 - 95.595 - /* Device 27, INTA - INTD */ 95.596 - Package(){0x001bffff, 0, 0, 31}, 95.597 - Package(){0x001bffff, 1, 0, 32}, 95.598 - Package(){0x001bffff, 2, 0, 33}, 95.599 - Package(){0x001bffff, 3, 0, 34}, 95.600 - 95.601 - /* Device 28, INTA - INTD */ 95.602 - Package(){0x001cffff, 0, 0, 35}, 95.603 - Package(){0x001cffff, 1, 0, 36}, 95.604 - Package(){0x001cffff, 2, 0, 37}, 95.605 - Package(){0x001cffff, 3, 0, 38}, 95.606 - 95.607 - /* Device 29, INTA - INTD */ 95.608 - Package(){0x001dffff, 0, 0, 39}, 95.609 - Package(){0x001dffff, 1, 0, 40}, 95.610 - Package(){0x001dffff, 2, 0, 41}, 95.611 - Package(){0x001dffff, 3, 0, 42}, 95.612 - 95.613 - /* Device 30, INTA - INTD */ 95.614 - Package(){0x001effff, 0, 0, 43}, 95.615 - Package(){0x001effff, 1, 0, 44}, 95.616 - Package(){0x001effff, 2, 0, 45}, 95.617 - Package(){0x001effff, 3, 0, 46}, 95.618 - 95.619 - /* Device 31, INTA - INTD */ 95.620 - Package(){0x001fffff, 0, 0, 47}, 95.621 - Package(){0x001fffff, 1, 0, 16}, 95.622 - Package(){0x001fffff, 2, 0, 17}, 95.623 - Package(){0x001fffff, 3, 0, 18}, 95.624 - }) 95.625 - 95.626 Device (ISA) 95.627 { 95.628 Name (_ADR, 0x00010000) /* device 1, fn 0 */ 95.629 @@ -951,7 +414,11 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 95.630 Name (_UID, 0x02) 95.631 Method (_STA, 0, NotSerialized) 95.632 { 95.633 - Return (0x0F) 95.634 + If(LEqual(\_SB.LTP1, 0)) { 95.635 + Return(0x00) 95.636 + } Else { 95.637 + Return(0x0F) 95.638 + } 95.639 } 95.640 95.641 Name (_CRS, ResourceTemplate() 95.642 @@ -961,9657 +428,6 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 95.643 }) 95.644 } 95.645 } 95.646 - 95.647 - /****************************************************************** 95.648 - * Each PCI hotplug slot needs at least two methods to handle 95.649 - * the ACPI event: 95.650 - * _EJ0: eject a device 95.651 - * _STA: return a device's status, e.g. enabled or removed 95.652 - * Other methods are optional: 95.653 - * _PS0/3: put them here for debug purpose 95.654 - * 95.655 - * Eject button would generate a general-purpose event, then the 95.656 - * control method for this event uses Notify() to inform OSPM which 95.657 - * action happened and on which device. 95.658 - * 95.659 - * Pls. refer "6.3 Device Insertion, Removal, and Status Objects" 95.660 - * in ACPI spec 3.0b for details. 95.661 - * 95.662 - * QEMU provides a simple hotplug controller with some I/O to 95.663 - * handle the hotplug action and status, which is beyond the ACPI 95.664 - * scope. 95.665 - */ 95.666 - Device(S00) 95.667 - { 95.668 - Name (_ADR, 0x00000000) /* Dev 0x00, Func 0x0 */ 95.669 - Name (_SUN, 0x00000000) 95.670 - 95.671 - Method (_PS0, 0) 95.672 - { 95.673 - Store (0x00, \_GPE.DPT1) 95.674 - Store (0x80, \_GPE.DPT2) 95.675 - } 95.676 - 95.677 - Method (_PS3, 0) 95.678 - { 95.679 - Store (0x00, \_GPE.DPT1) 95.680 - Store (0x83, \_GPE.DPT2) 95.681 - } 95.682 - 95.683 - Method (_EJ0, 1) 95.684 - { 95.685 - Store (0x00, \_GPE.DPT1) 95.686 - Store (0x88, \_GPE.DPT2) 95.687 - Store (0x01, \_GPE.PH00) /* eject */ 95.688 - } 95.689 - 95.690 - Method (_STA, 0) 95.691 - { 95.692 - Store (0x00, \_GPE.DPT1) 95.693 - Store (0x89, \_GPE.DPT2) 95.694 - And (\_GPE.PH00, 0x0f, Local1) 95.695 - Return (Local1) /* IN status as the _STA */ 95.696 - } 95.697 - } 95.698 - 95.699 - Device(S01) 95.700 - { 95.701 - Name (_ADR, 0x00000001) /* Dev 0x00, Func 0x1 */ 95.702 - Name (_SUN, 0x00000000) 95.703 - 95.704 - Method (_PS0, 0) 95.705 - { 95.706 - Store (0x01, \_GPE.DPT1) 95.707 - Store (0x80, \_GPE.DPT2) 95.708 - } 95.709 - 95.710 - Method (_PS3, 0) 95.711 - { 95.712 - Store (0x01, \_GPE.DPT1) 95.713 - Store (0x83, \_GPE.DPT2) 95.714 - } 95.715 - 95.716 - Method (_EJ0, 1) 95.717 - { 95.718 - Store (0x01, \_GPE.DPT1) 95.719 - Store (0x88, \_GPE.DPT2) 95.720 - Store (0x10, \_GPE.PH00) /* eject */ 95.721 - } 95.722 - 95.723 - Method (_STA, 0) 95.724 - { 95.725 - Store (0x01, \_GPE.DPT1) 95.726 - Store (0x89, \_GPE.DPT2) 95.727 - ShiftRight (0x4, \_GPE.PH00, Local1) 95.728 - Return (Local1) /* IN status as the _STA */ 95.729 - } 95.730 - } 95.731 - 95.732 - Device(S02) 95.733 - { 95.734 - Name (_ADR, 0x00000002) /* Dev 0x00, Func 0x2 */ 95.735 - Name (_SUN, 0x00000000) 95.736 - 95.737 - Method (_PS0, 0) 95.738 - { 95.739 - Store (0x02, \_GPE.DPT1) 95.740 - Store (0x80, \_GPE.DPT2) 95.741 - } 95.742 - 95.743 - Method (_PS3, 0) 95.744 - { 95.745 - Store (0x02, \_GPE.DPT1) 95.746 - Store (0x83, \_GPE.DPT2) 95.747 - } 95.748 - 95.749 - Method (_EJ0, 1) 95.750 - { 95.751 - Store (0x02, \_GPE.DPT1) 95.752 - Store (0x88, \_GPE.DPT2) 95.753 - Store (0x01, \_GPE.PH02) /* eject */ 95.754 - } 95.755 - 95.756 - Method (_STA, 0) 95.757 - { 95.758 - Store (0x02, \_GPE.DPT1) 95.759 - Store (0x89, \_GPE.DPT2) 95.760 - And (\_GPE.PH02, 0x0f, Local1) 95.761 - Return (Local1) /* IN status as the _STA */ 95.762 - } 95.763 - } 95.764 - 95.765 - Device(S03) 95.766 - { 95.767 - Name (_ADR, 0x00000003) /* Dev 0x00, Func 0x3 */ 95.768 - Name (_SUN, 0x00000000) 95.769 - 95.770 - Method (_PS0, 0) 95.771 - { 95.772 - Store (0x03, \_GPE.DPT1) 95.773 - Store (0x80, \_GPE.DPT2) 95.774 - } 95.775 - 95.776 - Method (_PS3, 0) 95.777 - { 95.778 - Store (0x03, \_GPE.DPT1) 95.779 - Store (0x83, \_GPE.DPT2) 95.780 - } 95.781 - 95.782 - Method (_EJ0, 1) 95.783 - { 95.784 - Store (0x03, \_GPE.DPT1) 95.785 - Store (0x88, \_GPE.DPT2) 95.786 - Store (0x10, \_GPE.PH02) /* eject */ 95.787 - } 95.788 - 95.789 - Method (_STA, 0) 95.790 - { 95.791 - Store (0x03, \_GPE.DPT1) 95.792 - Store (0x89, \_GPE.DPT2) 95.793 - ShiftRight (0x4, \_GPE.PH02, Local1) 95.794 - Return (Local1) /* IN status as the _STA */ 95.795 - } 95.796 - } 95.797 - 95.798 - Device(S04) 95.799 - { 95.800 - Name (_ADR, 0x00000004) /* Dev 0x00, Func 0x4 */ 95.801 - Name (_SUN, 0x00000000) 95.802 - 95.803 - Method (_PS0, 0) 95.804 - { 95.805 - Store (0x04, \_GPE.DPT1) 95.806 - Store (0x80, \_GPE.DPT2) 95.807 - } 95.808 - 95.809 - Method (_PS3, 0) 95.810 - { 95.811 - Store (0x04, \_GPE.DPT1) 95.812 - Store (0x83, \_GPE.DPT2) 95.813 - } 95.814 - 95.815 - Method (_EJ0, 1) 95.816 - { 95.817 - Store (0x04, \_GPE.DPT1) 95.818 - Store (0x88, \_GPE.DPT2) 95.819 - Store (0x01, \_GPE.PH04) /* eject */ 95.820 - } 95.821 - 95.822 - Method (_STA, 0) 95.823 - { 95.824 - Store (0x04, \_GPE.DPT1) 95.825 - Store (0x89, \_GPE.DPT2) 95.826 - And (\_GPE.PH04, 0x0f, Local1) 95.827 - Return (Local1) /* IN status as the _STA */ 95.828 - } 95.829 - } 95.830 - 95.831 - Device(S05) 95.832 - { 95.833 - Name (_ADR, 0x00000005) /* Dev 0x00, Func 0x5 */ 95.834 - Name (_SUN, 0x00000000) 95.835 - 95.836 - Method (_PS0, 0) 95.837 - { 95.838 - Store (0x05, \_GPE.DPT1) 95.839 - Store (0x80, \_GPE.DPT2) 95.840 - } 95.841 - 95.842 - Method (_PS3, 0) 95.843 - { 95.844 - Store (0x05, \_GPE.DPT1) 95.845 - Store (0x83, \_GPE.DPT2) 95.846 - } 95.847 - 95.848 - Method (_EJ0, 1) 95.849 - { 95.850 - Store (0x05, \_GPE.DPT1) 95.851 - Store (0x88, \_GPE.DPT2) 95.852 - Store (0x10, \_GPE.PH04) /* eject */ 95.853 - } 95.854 - 95.855 - Method (_STA, 0) 95.856 - { 95.857 - Store (0x05, \_GPE.DPT1) 95.858 - Store (0x89, \_GPE.DPT2) 95.859 - ShiftRight (0x4, \_GPE.PH04, Local1) 95.860 - Return (Local1) /* IN status as the _STA */ 95.861 - } 95.862 - } 95.863 - 95.864 - Device(S06) 95.865 - { 95.866 - Name (_ADR, 0x00000006) /* Dev 0x00, Func 0x6 */ 95.867 - Name (_SUN, 0x00000000) 95.868 - 95.869 - Method (_PS0, 0) 95.870 - { 95.871 - Store (0x06, \_GPE.DPT1) 95.872 - Store (0x80, \_GPE.DPT2) 95.873 - } 95.874 - 95.875 - Method (_PS3, 0) 95.876 - { 95.877 - Store (0x06, \_GPE.DPT1) 95.878 - Store (0x83, \_GPE.DPT2) 95.879 - } 95.880 - 95.881 - Method (_EJ0, 1) 95.882 - { 95.883 - Store (0x06, \_GPE.DPT1) 95.884 - Store (0x88, \_GPE.DPT2) 95.885 - Store (0x01, \_GPE.PH06) /* eject */ 95.886 - } 95.887 - 95.888 - Method (_STA, 0) 95.889 - { 95.890 - Store (0x06, \_GPE.DPT1) 95.891 - Store (0x89, \_GPE.DPT2) 95.892 - And (\_GPE.PH06, 0x0f, Local1) 95.893 - Return (Local1) /* IN status as the _STA */ 95.894 - } 95.895 - } 95.896 - 95.897 - Device(S07) 95.898 - { 95.899 - Name (_ADR, 0x00000007) /* Dev 0x00, Func 0x7 */ 95.900 - Name (_SUN, 0x00000000) 95.901 - 95.902 - Method (_PS0, 0) 95.903 - { 95.904 - Store (0x07, \_GPE.DPT1) 95.905 - Store (0x80, \_GPE.DPT2) 95.906 - } 95.907 - 95.908 - Method (_PS3, 0) 95.909 - { 95.910 - Store (0x07, \_GPE.DPT1) 95.911 - Store (0x83, \_GPE.DPT2) 95.912 - } 95.913 - 95.914 - Method (_EJ0, 1) 95.915 - { 95.916 - Store (0x07, \_GPE.DPT1) 95.917 - Store (0x88, \_GPE.DPT2) 95.918 - Store (0x10, \_GPE.PH06) /* eject */ 95.919 - } 95.920 - 95.921 - Method (_STA, 0) 95.922 - { 95.923 - Store (0x07, \_GPE.DPT1) 95.924 - Store (0x89, \_GPE.DPT2) 95.925 - ShiftRight (0x4, \_GPE.PH06, Local1) 95.926 - Return (Local1) /* IN status as the _STA */ 95.927 - } 95.928 - } 95.929 - 95.930 - Device(S08) 95.931 - { 95.932 - Name (_ADR, 0x00010000) /* Dev 0x01, Func 0x0 */ 95.933 - Name (_SUN, 0x00000001) 95.934 - 95.935 - Method (_PS0, 0) 95.936 - { 95.937 - Store (0x08, \_GPE.DPT1) 95.938 - Store (0x80, \_GPE.DPT2) 95.939 - } 95.940 - 95.941 - Method (_PS3, 0) 95.942 - { 95.943 - Store (0x08, \_GPE.DPT1) 95.944 - Store (0x83, \_GPE.DPT2) 95.945 - } 95.946 - 95.947 - Method (_EJ0, 1) 95.948 - { 95.949 - Store (0x08, \_GPE.DPT1) 95.950 - Store (0x88, \_GPE.DPT2) 95.951 - Store (0x01, \_GPE.PH08) /* eject */ 95.952 - } 95.953 - 95.954 - Method (_STA, 0) 95.955 - { 95.956 - Store (0x08, \_GPE.DPT1) 95.957 - Store (0x89, \_GPE.DPT2) 95.958 - And (\_GPE.PH08, 0x0f, Local1) 95.959 - Return (Local1) /* IN status as the _STA */ 95.960 - } 95.961 - } 95.962 - 95.963 - Device(S09) 95.964 - { 95.965 - Name (_ADR, 0x00010001) /* Dev 0x01, Func 0x1 */ 95.966 - Name (_SUN, 0x00000001) 95.967 - 95.968 - Method (_PS0, 0) 95.969 - { 95.970 - Store (0x09, \_GPE.DPT1) 95.971 - Store (0x80, \_GPE.DPT2) 95.972 - } 95.973 - 95.974 - Method (_PS3, 0) 95.975 - { 95.976 - Store (0x09, \_GPE.DPT1) 95.977 - Store (0x83, \_GPE.DPT2) 95.978 - } 95.979 - 95.980 - Method (_EJ0, 1) 95.981 - { 95.982 - Store (0x09, \_GPE.DPT1) 95.983 - Store (0x88, \_GPE.DPT2) 95.984 - Store (0x10, \_GPE.PH08) /* eject */ 95.985 - } 95.986 - 95.987 - Method (_STA, 0) 95.988 - { 95.989 - Store (0x09, \_GPE.DPT1) 95.990 - Store (0x89, \_GPE.DPT2) 95.991 - ShiftRight (0x4, \_GPE.PH08, Local1) 95.992 - Return (Local1) /* IN status as the _STA */ 95.993 - } 95.994 - } 95.995 - 95.996 - Device(S0A) 95.997 - { 95.998 - Name (_ADR, 0x00010002) /* Dev 0x01, Func 0x2 */ 95.999 - Name (_SUN, 0x00000001) 95.1000 - 95.1001 - Method (_PS0, 0) 95.1002 - { 95.1003 - Store (0x0a, \_GPE.DPT1) 95.1004 - Store (0x80, \_GPE.DPT2) 95.1005 - } 95.1006 - 95.1007 - Method (_PS3, 0) 95.1008 - { 95.1009 - Store (0x0a, \_GPE.DPT1) 95.1010 - Store (0x83, \_GPE.DPT2) 95.1011 - } 95.1012 - 95.1013 - Method (_EJ0, 1) 95.1014 - { 95.1015 - Store (0x0a, \_GPE.DPT1) 95.1016 - Store (0x88, \_GPE.DPT2) 95.1017 - Store (0x01, \_GPE.PH0A) /* eject */ 95.1018 - } 95.1019 - 95.1020 - Method (_STA, 0) 95.1021 - { 95.1022 - Store (0x0a, \_GPE.DPT1) 95.1023 - Store (0x89, \_GPE.DPT2) 95.1024 - And (\_GPE.PH0A, 0x0f, Local1) 95.1025 - Return (Local1) /* IN status as the _STA */ 95.1026 - } 95.1027 - } 95.1028 - 95.1029 - Device(S0B) 95.1030 - { 95.1031 - Name (_ADR, 0x00010003) /* Dev 0x01, Func 0x3 */ 95.1032 - Name (_SUN, 0x00000001) 95.1033 - 95.1034 - Method (_PS0, 0) 95.1035 - { 95.1036 - Store (0x0b, \_GPE.DPT1) 95.1037 - Store (0x80, \_GPE.DPT2) 95.1038 - } 95.1039 - 95.1040 - Method (_PS3, 0) 95.1041 - { 95.1042 - Store (0x0b, \_GPE.DPT1) 95.1043 - Store (0x83, \_GPE.DPT2) 95.1044 - } 95.1045 - 95.1046 - Method (_EJ0, 1) 95.1047 - { 95.1048 - Store (0x0b, \_GPE.DPT1) 95.1049 - Store (0x88, \_GPE.DPT2) 95.1050 - Store (0x10, \_GPE.PH0A) /* eject */ 95.1051 - } 95.1052 - 95.1053 - Method (_STA, 0) 95.1054 - { 95.1055 - Store (0x0b, \_GPE.DPT1) 95.1056 - Store (0x89, \_GPE.DPT2) 95.1057 - ShiftRight (0x4, \_GPE.PH0A, Local1) 95.1058 - Return (Local1) /* IN status as the _STA */ 95.1059 - } 95.1060 - } 95.1061 - 95.1062 - Device(S0C) 95.1063 - { 95.1064 - Name (_ADR, 0x00010004) /* Dev 0x01, Func 0x4 */ 95.1065 - Name (_SUN, 0x00000001) 95.1066 - 95.1067 - Method (_PS0, 0) 95.1068 - { 95.1069 - Store (0x0c, \_GPE.DPT1) 95.1070 - Store (0x80, \_GPE.DPT2) 95.1071 - } 95.1072 - 95.1073 - Method (_PS3, 0) 95.1074 - { 95.1075 - Store (0x0c, \_GPE.DPT1) 95.1076 - Store (0x83, \_GPE.DPT2) 95.1077 - } 95.1078 - 95.1079 - Method (_EJ0, 1) 95.1080 - { 95.1081 - Store (0x0c, \_GPE.DPT1) 95.1082 - Store (0x88, \_GPE.DPT2) 95.1083 - Store (0x01, \_GPE.PH0C) /* eject */ 95.1084 - } 95.1085 - 95.1086 - Method (_STA, 0) 95.1087 - { 95.1088 - Store (0x0c, \_GPE.DPT1) 95.1089 - Store (0x89, \_GPE.DPT2) 95.1090 - And (\_GPE.PH0C, 0x0f, Local1) 95.1091 - Return (Local1) /* IN status as the _STA */ 95.1092 - } 95.1093 - } 95.1094 - 95.1095 - Device(S0D) 95.1096 - { 95.1097 - Name (_ADR, 0x00010005) /* Dev 0x01, Func 0x5 */ 95.1098 - Name (_SUN, 0x00000001) 95.1099 - 95.1100 - Method (_PS0, 0) 95.1101 - { 95.1102 - Store (0x0d, \_GPE.DPT1) 95.1103 - Store (0x80, \_GPE.DPT2) 95.1104 - } 95.1105 - 95.1106 - Method (_PS3, 0) 95.1107 - { 95.1108 - Store (0x0d, \_GPE.DPT1) 95.1109 - Store (0x83, \_GPE.DPT2) 95.1110 - } 95.1111 - 95.1112 - Method (_EJ0, 1) 95.1113 - { 95.1114 - Store (0x0d, \_GPE.DPT1) 95.1115 - Store (0x88, \_GPE.DPT2) 95.1116 - Store (0x10, \_GPE.PH0C) /* eject */ 95.1117 - } 95.1118 - 95.1119 - Method (_STA, 0) 95.1120 - { 95.1121 - Store (0x0d, \_GPE.DPT1) 95.1122 - Store (0x89, \_GPE.DPT2) 95.1123 - ShiftRight (0x4, \_GPE.PH0C, Local1) 95.1124 - Return (Local1) /* IN status as the _STA */ 95.1125 - } 95.1126 - } 95.1127 - 95.1128 - Device(S0E) 95.1129 - { 95.1130 - Name (_ADR, 0x00010006) /* Dev 0x01, Func 0x6 */ 95.1131 - Name (_SUN, 0x00000001) 95.1132 - 95.1133 - Method (_PS0, 0) 95.1134 - { 95.1135 - Store (0x0e, \_GPE.DPT1) 95.1136 - Store (0x80, \_GPE.DPT2) 95.1137 - } 95.1138 - 95.1139 - Method (_PS3, 0) 95.1140 - { 95.1141 - Store (0x0e, \_GPE.DPT1) 95.1142 - Store (0x83, \_GPE.DPT2) 95.1143 - } 95.1144 - 95.1145 - Method (_EJ0, 1) 95.1146 - { 95.1147 - Store (0x0e, \_GPE.DPT1) 95.1148 - Store (0x88, \_GPE.DPT2) 95.1149 - Store (0x01, \_GPE.PH0E) /* eject */ 95.1150 - } 95.1151 - 95.1152 - Method (_STA, 0) 95.1153 - { 95.1154 - Store (0x0e, \_GPE.DPT1) 95.1155 - Store (0x89, \_GPE.DPT2) 95.1156 - And (\_GPE.PH0E, 0x0f, Local1) 95.1157 - Return (Local1) /* IN status as the _STA */ 95.1158 - } 95.1159 - } 95.1160 - 95.1161 - Device(S0F) 95.1162 - { 95.1163 - Name (_ADR, 0x00010007) /* Dev 0x01, Func 0x7 */ 95.1164 - Name (_SUN, 0x00000001) 95.1165 - 95.1166 - Method (_PS0, 0) 95.1167 - { 95.1168 - Store (0x0f, \_GPE.DPT1) 95.1169 - Store (0x80, \_GPE.DPT2) 95.1170 - } 95.1171 - 95.1172 - Method (_PS3, 0) 95.1173 - { 95.1174 - Store (0x0f, \_GPE.DPT1) 95.1175 - Store (0x83, \_GPE.DPT2) 95.1176 - } 95.1177 - 95.1178 - Method (_EJ0, 1) 95.1179 - { 95.1180 - Store (0x0f, \_GPE.DPT1) 95.1181 - Store (0x88, \_GPE.DPT2) 95.1182 - Store (0x10, \_GPE.PH0E) /* eject */ 95.1183 - } 95.1184 - 95.1185 - Method (_STA, 0) 95.1186 - { 95.1187 - Store (0x0f, \_GPE.DPT1) 95.1188 - Store (0x89, \_GPE.DPT2) 95.1189 - ShiftRight (0x4, \_GPE.PH0E, Local1) 95.1190 - Return (Local1) /* IN status as the _STA */ 95.1191 - } 95.1192 - } 95.1193 - 95.1194 - Device(S10) 95.1195 - { 95.1196 - Name (_ADR, 0x00020000) /* Dev 0x02, Func 0x0 */ 95.1197 - Name (_SUN, 0x00000002) 95.1198 - 95.1199 - Method (_PS0, 0) 95.1200 - { 95.1201 - Store (0x10, \_GPE.DPT1) 95.1202 - Store (0x80, \_GPE.DPT2) 95.1203 - } 95.1204 - 95.1205 - Method (_PS3, 0) 95.1206 - { 95.1207 - Store (0x10, \_GPE.DPT1) 95.1208 - Store (0x83, \_GPE.DPT2) 95.1209 - } 95.1210 - 95.1211 - Method (_EJ0, 1) 95.1212 - { 95.1213 - Store (0x10, \_GPE.DPT1) 95.1214 - Store (0x88, \_GPE.DPT2) 95.1215 - Store (0x01, \_GPE.PH10) /* eject */ 95.1216 - } 95.1217 - 95.1218 - Method (_STA, 0) 95.1219 - { 95.1220 - Store (0x10, \_GPE.DPT1) 95.1221 - Store (0x89, \_GPE.DPT2) 95.1222 - And (\_GPE.PH10, 0x0f, Local1) 95.1223 - Return (Local1) /* IN status as the _STA */ 95.1224 - } 95.1225 - } 95.1226 - 95.1227 - Device(S11) 95.1228 - { 95.1229 - Name (_ADR, 0x00020001) /* Dev 0x02, Func 0x1 */ 95.1230 - Name (_SUN, 0x00000002) 95.1231 - 95.1232 - Method (_PS0, 0) 95.1233 - { 95.1234 - Store (0x11, \_GPE.DPT1) 95.1235 - Store (0x80, \_GPE.DPT2) 95.1236 - } 95.1237 - 95.1238 - Method (_PS3, 0) 95.1239 - { 95.1240 - Store (0x11, \_GPE.DPT1) 95.1241 - Store (0x83, \_GPE.DPT2) 95.1242 - } 95.1243 - 95.1244 - Method (_EJ0, 1) 95.1245 - { 95.1246 - Store (0x11, \_GPE.DPT1) 95.1247 - Store (0x88, \_GPE.DPT2) 95.1248 - Store (0x10, \_GPE.PH10) /* eject */ 95.1249 - } 95.1250 - 95.1251 - Method (_STA, 0) 95.1252 - { 95.1253 - Store (0x11, \_GPE.DPT1) 95.1254 - Store (0x89, \_GPE.DPT2) 95.1255 - ShiftRight (0x4, \_GPE.PH10, Local1) 95.1256 - Return (Local1) /* IN status as the _STA */ 95.1257 - } 95.1258 - } 95.1259 - 95.1260 - Device(S12) 95.1261 - { 95.1262 - Name (_ADR, 0x00020002) /* Dev 0x02, Func 0x2 */ 95.1263 - Name (_SUN, 0x00000002) 95.1264 - 95.1265 - Method (_PS0, 0) 95.1266 - { 95.1267 - Store (0x12, \_GPE.DPT1) 95.1268 - Store (0x80, \_GPE.DPT2) 95.1269 - } 95.1270 - 95.1271 - Method (_PS3, 0) 95.1272 - { 95.1273 - Store (0x12, \_GPE.DPT1) 95.1274 - Store (0x83, \_GPE.DPT2) 95.1275 - } 95.1276 - 95.1277 - Method (_EJ0, 1) 95.1278 - { 95.1279 - Store (0x12, \_GPE.DPT1) 95.1280 - Store (0x88, \_GPE.DPT2) 95.1281 - Store (0x01, \_GPE.PH12) /* eject */ 95.1282 - } 95.1283 - 95.1284 - Method (_STA, 0) 95.1285 - { 95.1286 - Store (0x12, \_GPE.DPT1) 95.1287 - Store (0x89, \_GPE.DPT2) 95.1288 - And (\_GPE.PH12, 0x0f, Local1) 95.1289 - Return (Local1) /* IN status as the _STA */ 95.1290 - } 95.1291 - } 95.1292 - 95.1293 - Device(S13) 95.1294 - { 95.1295 - Name (_ADR, 0x00020003) /* Dev 0x02, Func 0x3 */ 95.1296 - Name (_SUN, 0x00000002) 95.1297 - 95.1298 - Method (_PS0, 0) 95.1299 - { 95.1300 - Store (0x13, \_GPE.DPT1) 95.1301 - Store (0x80, \_GPE.DPT2) 95.1302 - } 95.1303 - 95.1304 - Method (_PS3, 0) 95.1305 - { 95.1306 - Store (0x13, \_GPE.DPT1) 95.1307 - Store (0x83, \_GPE.DPT2) 95.1308 - } 95.1309 - 95.1310 - Method (_EJ0, 1) 95.1311 - { 95.1312 - Store (0x13, \_GPE.DPT1) 95.1313 - Store (0x88, \_GPE.DPT2) 95.1314 - Store (0x10, \_GPE.PH12) /* eject */ 95.1315 - } 95.1316 - 95.1317 - Method (_STA, 0) 95.1318 - { 95.1319 - Store (0x13, \_GPE.DPT1) 95.1320 - Store (0x89, \_GPE.DPT2) 95.1321 - ShiftRight (0x4, \_GPE.PH12, Local1) 95.1322 - Return (Local1) /* IN status as the _STA */ 95.1323 - } 95.1324 - } 95.1325 - 95.1326 - Device(S14) 95.1327 - { 95.1328 - Name (_ADR, 0x00020004) /* Dev 0x02, Func 0x4 */ 95.1329 - Name (_SUN, 0x00000002) 95.1330 - 95.1331 - Method (_PS0, 0) 95.1332 - { 95.1333 - Store (0x14, \_GPE.DPT1) 95.1334 - Store (0x80, \_GPE.DPT2) 95.1335 - } 95.1336 - 95.1337 - Method (_PS3, 0) 95.1338 - { 95.1339 - Store (0x14, \_GPE.DPT1) 95.1340 - Store (0x83, \_GPE.DPT2) 95.1341 - } 95.1342 - 95.1343 - Method (_EJ0, 1) 95.1344 - { 95.1345