debuggers.hg

changeset 20373:809b20f066fb

Refresh to c/s 21046. Also, update kdb as follows:
- update kdb_cmds for xen 4.0
- check if xen addr in xen text during bp so bp is not set in 64bit dom0
- don't check for guest_mode() in f_cur
- support more than 64 cpus to whatever NR_CPUS is
author Mukesh Rathor
date Fri Mar 19 18:36:57 2010 -0700 (2010-03-19)
parents 35f348b228ba
children 926cc93296dd
files .hgignore .hgtags Config.mk README buildconfigs/mk.linux-2.6-pvops buildconfigs/mk.linux-2.6-tip-latest config/NetBSD.mk config/StdGNU.mk docs/misc/cpuid-config-for-guest.txt docs/misc/pvrdtscp.c docs/misc/tscmode.txt docs/misc/xsm-flask.txt docs/xen-api/revision-history.tex docs/xen-api/xenapi-coversheet.tex docs/xen-api/xenapi-datamodel-graph.dot docs/xen-api/xenapi-datamodel.tex extras/mini-os/arch/ia64/mm.c extras/mini-os/arch/x86/ioremap.c extras/mini-os/arch/x86/mm.c extras/mini-os/blkfront.c extras/mini-os/console/xencons_ring.c extras/mini-os/fbfront.c extras/mini-os/fs-front.c extras/mini-os/include/ia64/arch_mm.h extras/mini-os/include/lib.h extras/mini-os/include/mm.h extras/mini-os/include/pcifront.h extras/mini-os/include/x86/arch_mm.h extras/mini-os/lib/printf.c extras/mini-os/lib/sys.c extras/mini-os/lib/xmalloc.c extras/mini-os/lib/xs.c extras/mini-os/main.c extras/mini-os/netfront.c extras/mini-os/pcifront.c extras/mini-os/xenbus/xenbus.c stubdom/Makefile stubdom/README stubdom/grub.patches/99minios stubdom/grub/config.h stubdom/grub/kexec.c stubdom/grub/mini-os.c stubdom/pciutils.patch stubdom/stubdom-dm tools/Makefile tools/Rules.mk tools/blktap/drivers/Makefile tools/blktap/drivers/blktapctrl.c tools/blktap/drivers/block-qcow2.c tools/blktap/lib/blktaplib.h tools/blktap/lib/xenbus.c tools/blktap2/drivers/Makefile tools/blktap2/drivers/block-aio.c tools/blktap2/drivers/block-qcow.c tools/blktap2/drivers/block-remus.c tools/blktap2/drivers/check_gcrypt tools/blktap2/drivers/disktypes.h tools/blktap2/drivers/hashtable.c tools/blktap2/drivers/hashtable_itr.c tools/blktap2/drivers/hashtable_itr.h tools/blktap2/drivers/hashtable_utility.c tools/blktap2/drivers/hashtable_utility.h tools/blktap2/drivers/io-optimize.c tools/blktap2/drivers/libaio-compat.h tools/blktap2/drivers/qcow2raw.c tools/blktap2/drivers/tapdisk-image.c tools/blktap2/drivers/tapdisk-image.h tools/blktap2/drivers/tapdisk-ipc.c tools/blktap2/drivers/tapdisk-ipc.h tools/blktap2/drivers/tapdisk-queue.c tools/blktap2/drivers/tapdisk-queue.h tools/blktap2/drivers/tapdisk-server.c tools/blktap2/drivers/tapdisk-server.h tools/blktap2/drivers/tapdisk-utils.c tools/blktap2/drivers/tapdisk-utils.h tools/blktap2/drivers/tapdisk-vbd.c tools/blktap2/drivers/tapdisk-vbd.h tools/blktap2/drivers/tapdisk.h tools/blktap2/drivers/tapdisk2.c tools/blktap2/include/Makefile tools/blktap2/include/blktaplib.h tools/blktap2/include/list.h tools/blktap2/lvm/Makefile tools/blktap2/vhd/Makefile tools/blktap2/vhd/lib/Makefile tools/blktap2/vhd/lib/libvhd.c tools/blktap2/vhd/lib/vhd-util-scan.c tools/console/client/main.c tools/examples/Makefile tools/examples/xend-config.sxp tools/examples/xmexample.hvm tools/firmware/hvmloader/acpi/Makefile tools/firmware/hvmloader/acpi/acpi2_0.h tools/firmware/hvmloader/acpi/build.c tools/firmware/hvmloader/acpi/dsdt.asl tools/firmware/hvmloader/acpi/dsdt.c tools/firmware/hvmloader/acpi/mk_dsdt.c tools/firmware/hvmloader/acpi/ssdt_pm.h tools/firmware/hvmloader/acpi/ssdt_tpm.asl tools/firmware/hvmloader/acpi/ssdt_tpm.h tools/firmware/hvmloader/acpi/static_tables.c tools/firmware/hvmloader/config.h tools/firmware/hvmloader/hvmloader.c tools/firmware/hvmloader/mp_tables.c tools/firmware/hvmloader/smbios.c tools/firmware/hvmloader/util.c tools/firmware/hvmloader/util.h tools/firmware/rombios/rombios.c tools/flask/Makefile tools/flask/libflask/Makefile tools/flask/libflask/flask_op.c tools/flask/libflask/include/flask.h tools/flask/libflask/include/libflask.h tools/flask/loadpolicy/Makefile tools/flask/loadpolicy/loadpolicy.c tools/flask/policy/Makefile tools/flask/policy/policy/flask/access_vectors tools/flask/policy/policy/modules/xen/xen.if tools/flask/policy/policy/modules/xen/xen.te tools/flask/utils/Makefile tools/flask/utils/getenforce.c tools/flask/utils/loadpolicy.c tools/flask/utils/setenforce.c tools/fs-back/Makefile tools/fs-back/fs-backend.c tools/fs-back/fs-backend.h tools/fs-back/fs-xenbus.c tools/hotplug/Linux/Makefile tools/hotplug/Linux/blktap tools/hotplug/Linux/block tools/hotplug/Linux/network-bridge tools/hotplug/Linux/xen-hotplug-cleanup tools/hotplug/Linux/xend.rules tools/hotplug/Makefile tools/hotplug/NetBSD/Makefile tools/include/xen-sys/Linux/privcmd.h tools/libfsimage/ext2fs/fsys_ext2fs.c tools/libfsimage/reiserfs/fsys_reiserfs.c tools/libxc/Makefile tools/libxc/ia64/xc_ia64_linux_save.c tools/libxc/xc_core.c tools/libxc/xc_core_x86.c tools/libxc/xc_core_x86.h tools/libxc/xc_cpuid_x86.c tools/libxc/xc_dom_bzimageloader.c tools/libxc/xc_dom_compat_linux.c tools/libxc/xc_dom_core.c tools/libxc/xc_domain.c tools/libxc/xc_domain_restore.c tools/libxc/xc_domain_save.c tools/libxc/xc_linux.c tools/libxc/xc_mem_event.c tools/libxc/xc_mem_paging.c tools/libxc/xc_memshr.c tools/libxc/xc_minios.c tools/libxc/xc_misc.c tools/libxc/xc_offline_page.c tools/libxc/xc_physdev.c tools/libxc/xc_pm.c tools/libxc/xc_private.c tools/libxc/xc_private.h tools/libxc/xc_resume.c tools/libxc/xc_tbuf.c tools/libxc/xenctrl.h tools/libxc/xenguest.h tools/libxc/xg_private.c tools/libxc/xg_private.h tools/libxc/xg_save_restore.h tools/libxl/Makefile tools/libxl/flexarray.c tools/libxl/flexarray.h tools/libxl/libxl.c tools/libxl/libxl.h tools/libxl/libxl_device.c tools/libxl/libxl_dom.c tools/libxl/libxl_exec.c tools/libxl/libxl_internal.c tools/libxl/libxl_internal.h tools/libxl/libxl_osdeps.h tools/libxl/libxl_utils.c tools/libxl/libxl_utils.h tools/libxl/libxl_xshelp.c tools/libxl/libxlu_cfg.c tools/libxl/libxlu_cfg_i.h tools/libxl/libxlu_cfg_l.c tools/libxl/libxlu_cfg_l.h tools/libxl/libxlu_cfg_l.l tools/libxl/libxlu_cfg_y.c tools/libxl/libxlu_cfg_y.h tools/libxl/libxlu_cfg_y.y tools/libxl/libxlu_internal.h tools/libxl/libxlutil.h tools/libxl/osdeps.c tools/libxl/xenguest.c tools/libxl/xl.c tools/memshr/Makefile tools/memshr/bidir-daemon.c tools/memshr/bidir-daemon.h tools/memshr/bidir-hash.c tools/memshr/bidir-hash.h tools/memshr/bidir-namedefs.h tools/memshr/interface.c tools/memshr/memshr-priv.h tools/memshr/memshr.h tools/memshr/shm.c tools/memshr/shm.h tools/misc/gtraceview.c tools/misc/xen-detect.c tools/misc/xen-hvmctx.c tools/misc/xenpm.c tools/pygrub/src/ExtLinuxConf.py tools/pygrub/src/GrubConf.py tools/pygrub/src/pygrub tools/python/Makefile tools/python/get-path tools/python/install-wrap tools/python/setup.py tools/python/xen/lowlevel/checkpoint/checkpoint.c tools/python/xen/lowlevel/checkpoint/checkpoint.h tools/python/xen/lowlevel/checkpoint/libcheckpoint.c tools/python/xen/lowlevel/flask/flask.c tools/python/xen/lowlevel/netlink/libnetlink.c tools/python/xen/lowlevel/netlink/libnetlink.h tools/python/xen/lowlevel/netlink/netlink.c tools/python/xen/lowlevel/xc/xc.c tools/python/xen/remus/blkdev.py tools/python/xen/remus/image.py tools/python/xen/remus/netlink.py tools/python/xen/remus/profile.py tools/python/xen/remus/qdisc.py tools/python/xen/remus/save.py tools/python/xen/remus/tapdisk.py tools/python/xen/remus/util.py tools/python/xen/remus/vbd.py tools/python/xen/remus/vdi.py tools/python/xen/remus/vif.py tools/python/xen/remus/vm.py tools/python/xen/util/pci.py tools/python/xen/util/utils.py tools/python/xen/util/vscsi_util.py tools/python/xen/util/vusb_util.py tools/python/xen/util/xsm/flask/flask.py tools/python/xen/xend/MemoryPool.py tools/python/xen/xend/XendAPI.py tools/python/xen/xend/XendCheckpoint.py tools/python/xen/xend/XendConfig.py tools/python/xen/xend/XendDSCSI.py tools/python/xen/xend/XendDevices.py tools/python/xen/xend/XendDomain.py tools/python/xen/xend/XendDomainInfo.py tools/python/xen/xend/XendNode.py tools/python/xen/xend/XendOptions.py tools/python/xen/xend/XendPSCSI.py tools/python/xen/xend/XendXSPolicy.py tools/python/xen/xend/XendXSPolicyAdmin.py tools/python/xen/xend/balloon.py tools/python/xen/xend/image.py tools/python/xen/xend/server/BlktapController.py tools/python/xen/xend/server/SrvDomain.py tools/python/xen/xend/server/pciif.py tools/python/xen/xend/server/pciquirk.py tools/python/xen/xend/server/udevevent.py tools/python/xen/xend/server/vfbif.py tools/python/xen/xend/server/vusbif.py tools/python/xen/xend/sxp.py tools/python/xen/xm/create.dtd tools/python/xen/xm/create.py tools/python/xen/xm/getenforce.py tools/python/xen/xm/main.py tools/python/xen/xm/opts.py tools/python/xen/xm/setenforce.py tools/python/xen/xm/xenapi_create.py tools/remus/Makefile tools/remus/README tools/remus/imqebt/Makefile tools/remus/imqebt/README tools/remus/imqebt/communication.c tools/remus/imqebt/ebtables-standalone.c tools/remus/imqebt/ebtables.c tools/remus/imqebt/extensions/Makefile tools/remus/imqebt/extensions/ebt_imq.c tools/remus/imqebt/extensions/ebt_standard.c tools/remus/imqebt/extensions/ebtable_filter.c tools/remus/imqebt/getethertype.c tools/remus/imqebt/include/ebtables_u.h tools/remus/imqebt/include/ethernetdb.h tools/remus/imqebt/include/linux/if_ether.h tools/remus/imqebt/include/linux/netfilter_bridge.h tools/remus/imqebt/include/linux/netfilter_bridge/ebt_imq.h tools/remus/imqebt/include/linux/netfilter_bridge/ebtables.h tools/remus/imqebt/include/linux/types.h tools/remus/imqebt/libebtc.c tools/remus/imqebt/useful_functions.c tools/remus/kmod/Kbuild tools/remus/kmod/Makefile tools/remus/kmod/ebt_imq.c tools/remus/kmod/ebt_imq.h tools/remus/kmod/sch_queue.c tools/remus/remus tools/xcutils/xc_save.c tools/xenmon/xenbaked.c tools/xenpaging/Makefile tools/xenpaging/bitops.h tools/xenpaging/file_ops.c tools/xenpaging/file_ops.h tools/xenpaging/mem_event.h tools/xenpaging/policy.h tools/xenpaging/policy_default.c tools/xenpaging/spinlock.h tools/xenpaging/xc.c tools/xenpaging/xc.h tools/xenpaging/xenpaging.c tools/xenpaging/xenpaging.h tools/xenstat/libxenstat/Makefile tools/xenstat/libxenstat/src/xenstat.c tools/xenstat/libxenstat/src/xenstat.h tools/xenstat/libxenstat/src/xenstat_linux.c tools/xenstat/libxenstat/src/xenstat_priv.h tools/xenstat/xentop/xentop.c tools/xenstore/xenstore_client.c tools/xenstore/xs_lib.c tools/xentrace/xentrace.c unmodified_drivers/linux-2.6/platform-pci/platform-pci.c xen/Makefile xen/Rules.mk xen/arch/ia64/linux-xen/acpi.c xen/arch/ia64/linux-xen/iosapic.c xen/arch/ia64/linux-xen/irq_ia64.c xen/arch/ia64/linux-xen/mca.c xen/arch/ia64/linux-xen/numa.c xen/arch/ia64/linux-xen/perfmon.c xen/arch/ia64/linux-xen/sn/kernel/irq.c xen/arch/ia64/linux-xen/unwind.c xen/arch/ia64/linux/irq_lsapic.c xen/arch/ia64/vmx/mmio.c xen/arch/ia64/vmx/vlsapic.c xen/arch/ia64/vmx/vmx_init.c xen/arch/ia64/vmx/vmx_support.c xen/arch/ia64/xen/dom0_ops.c xen/arch/ia64/xen/hypercall.c xen/arch/ia64/xen/irq.c xen/arch/ia64/xen/mm.c xen/arch/ia64/xen/pci.c xen/arch/ia64/xen/xen.lds.S xen/arch/ia64/xen/xentime.c xen/arch/x86/acpi/boot.c xen/arch/x86/acpi/cpu_idle.c xen/arch/x86/acpi/cpufreq/cpufreq.c xen/arch/x86/acpi/cpufreq/powernow.c xen/arch/x86/acpi/cpuidle_menu.c xen/arch/x86/acpi/power.c xen/arch/x86/acpi/suspend.c xen/arch/x86/apic.c xen/arch/x86/boot/Makefile xen/arch/x86/boot/build32.mk xen/arch/x86/boot/cmdline.S xen/arch/x86/bzimage.c xen/arch/x86/cpu/amd.c xen/arch/x86/cpu/common.c xen/arch/x86/cpu/intel_cacheinfo.c xen/arch/x86/cpu/mcheck/amd_f10.c xen/arch/x86/cpu/mcheck/amd_k8.c xen/arch/x86/cpu/mcheck/amd_nonfatal.c xen/arch/x86/cpu/mcheck/k7.c xen/arch/x86/cpu/mcheck/mce.c xen/arch/x86/cpu/mcheck/mce.h xen/arch/x86/cpu/mcheck/mce_intel.c xen/arch/x86/cpu/mcheck/mctelem.c xen/arch/x86/cpu/mcheck/non-fatal.c xen/arch/x86/debug.c xen/arch/x86/domain.c xen/arch/x86/domain_build.c xen/arch/x86/domctl.c xen/arch/x86/e820.c xen/arch/x86/genapic/bigsmp.c xen/arch/x86/genapic/default.c xen/arch/x86/genapic/probe.c xen/arch/x86/genapic/summit.c xen/arch/x86/genapic/x2apic.c xen/arch/x86/hpet.c xen/arch/x86/hvm/Makefile xen/arch/x86/hvm/asid.c xen/arch/x86/hvm/emulate.c xen/arch/x86/hvm/hpet.c xen/arch/x86/hvm/hvm.c xen/arch/x86/hvm/intercept.c xen/arch/x86/hvm/io.c xen/arch/x86/hvm/irq.c xen/arch/x86/hvm/mtrr.c xen/arch/x86/hvm/pmtimer.c xen/arch/x86/hvm/quirks.c xen/arch/x86/hvm/rtc.c xen/arch/x86/hvm/stdvga.c xen/arch/x86/hvm/svm/asid.c xen/arch/x86/hvm/svm/svm.c xen/arch/x86/hvm/svm/vmcb.c xen/arch/x86/hvm/vioapic.c xen/arch/x86/hvm/vlapic.c xen/arch/x86/hvm/vmsi.c xen/arch/x86/hvm/vmx/entry.S xen/arch/x86/hvm/vmx/vmcs.c xen/arch/x86/hvm/vmx/vmx.c xen/arch/x86/hvm/vmx/vpmu.c xen/arch/x86/i8259.c xen/arch/x86/io_apic.c xen/arch/x86/irq.c xen/arch/x86/microcode_amd.c xen/arch/x86/microcode_intel.c xen/arch/x86/mm.c xen/arch/x86/mm/Makefile xen/arch/x86/mm/guest_walk.c xen/arch/x86/mm/hap/guest_walk.c xen/arch/x86/mm/hap/hap.c xen/arch/x86/mm/hap/p2m-ept.c xen/arch/x86/mm/hap/private.h xen/arch/x86/mm/mem_event.c xen/arch/x86/mm/mem_paging.c xen/arch/x86/mm/mem_sharing.c xen/arch/x86/mm/p2m.c xen/arch/x86/mm/paging.c xen/arch/x86/mm/shadow/common.c xen/arch/x86/mm/shadow/multi.c xen/arch/x86/mm/shadow/multi.h xen/arch/x86/mm/shadow/private.h xen/arch/x86/mpparse.c xen/arch/x86/msi.c xen/arch/x86/nmi.c xen/arch/x86/numa.c xen/arch/x86/oprofile/nmi_int.c xen/arch/x86/oprofile/op_model_p4.c xen/arch/x86/oprofile/op_model_ppro.c xen/arch/x86/physdev.c xen/arch/x86/platform_hypercall.c xen/arch/x86/setup.c xen/arch/x86/smpboot.c xen/arch/x86/srat.c xen/arch/x86/sysctl.c xen/arch/x86/tboot.c xen/arch/x86/time.c xen/arch/x86/traps.c xen/arch/x86/x86_32/asm-offsets.c xen/arch/x86/x86_32/entry.S xen/arch/x86/x86_32/mm.c xen/arch/x86/x86_32/traps.c xen/arch/x86/x86_64/asm-offsets.c xen/arch/x86/x86_64/compat/entry.S xen/arch/x86/x86_64/compat/traps.c xen/arch/x86/x86_64/entry.S xen/arch/x86/x86_64/mm.c xen/arch/x86/x86_64/mmconfig-shared.c xen/arch/x86/x86_64/platform_hypercall.c xen/arch/x86/x86_64/traps.c xen/arch/x86/x86_emulate/x86_emulate.c xen/arch/x86/x86_emulate/x86_emulate.h xen/arch/x86/xen.lds.S xen/common/Makefile xen/common/bunzip2.c xen/common/compat/memory.c xen/common/decompress.c xen/common/decompress.h xen/common/domain.c xen/common/domctl.c xen/common/event_channel.c xen/common/grant_table.c xen/common/keyhandler.c xen/common/lib.c xen/common/libelf/libelf-dominfo.c xen/common/libelf/libelf-tools.c xen/common/memory.c xen/common/page_alloc.c xen/common/rangeset.c xen/common/sched_credit.c xen/common/sched_sedf.c xen/common/schedule.c xen/common/softirq.c xen/common/timer.c xen/common/tmem.c xen/common/tmem_xen.c xen/common/trace.c xen/common/unlzma.c xen/common/xenoprof.c xen/common/xmalloc_tlsf.c xen/drivers/acpi/pmstat.c xen/drivers/acpi/utilities/utglobal.c xen/drivers/char/console.c xen/drivers/char/ns16550.c xen/drivers/char/serial.c xen/drivers/cpufreq/cpufreq.c xen/drivers/cpufreq/cpufreq_ondemand.c xen/drivers/cpufreq/utility.c xen/drivers/passthrough/amd/iommu_acpi.c xen/drivers/passthrough/amd/iommu_init.c xen/drivers/passthrough/amd/iommu_intr.c xen/drivers/passthrough/amd/pci_amd_iommu.c xen/drivers/passthrough/io.c xen/drivers/passthrough/iommu.c xen/drivers/passthrough/pci.c xen/drivers/passthrough/vtd/dmar.c xen/drivers/passthrough/vtd/ia64/ats.c xen/drivers/passthrough/vtd/ia64/vtd.c xen/drivers/passthrough/vtd/intremap.c xen/drivers/passthrough/vtd/iommu.c xen/drivers/passthrough/vtd/iommu.h xen/drivers/passthrough/vtd/qinval.c xen/drivers/passthrough/vtd/utils.c xen/drivers/passthrough/vtd/vtd.h xen/drivers/passthrough/vtd/x86/ats.c xen/drivers/passthrough/vtd/x86/vtd.c xen/drivers/video/vesa.c xen/include/acpi/cpufreq/cpufreq.h xen/include/asm-ia64/config.h xen/include/asm-ia64/hvm/iommu.h xen/include/asm-ia64/hvm/support.h xen/include/asm-ia64/linux-xen/asm/hw_irq.h xen/include/asm-ia64/mm.h xen/include/asm-ia64/vmx.h xen/include/asm-x86/acpi.h xen/include/asm-x86/apic.h xen/include/asm-x86/apicdef.h xen/include/asm-x86/bug.h xen/include/asm-x86/config.h xen/include/asm-x86/cpufeature.h xen/include/asm-x86/domain.h xen/include/asm-x86/genapic.h xen/include/asm-x86/hap.h xen/include/asm-x86/hvm/asid.h xen/include/asm-x86/hvm/domain.h xen/include/asm-x86/hvm/hvm.h xen/include/asm-x86/hvm/iommu.h xen/include/asm-x86/hvm/support.h xen/include/asm-x86/hvm/svm/amd-iommu-proto.h xen/include/asm-x86/hvm/svm/asid.h xen/include/asm-x86/hvm/svm/intr.h xen/include/asm-x86/hvm/svm/vmcb.h xen/include/asm-x86/hvm/vcpu.h xen/include/asm-x86/hvm/vmx/vmcs.h xen/include/asm-x86/hvm/vmx/vmx.h xen/include/asm-x86/io_apic.h xen/include/asm-x86/irq.h xen/include/asm-x86/mach-generic/mach_apic.h xen/include/asm-x86/mem_event.h xen/include/asm-x86/mem_paging.h xen/include/asm-x86/mem_sharing.h xen/include/asm-x86/mm.h xen/include/asm-x86/mpspec.h xen/include/asm-x86/msi.h xen/include/asm-x86/msr-index.h xen/include/asm-x86/msr.h xen/include/asm-x86/mtrr.h xen/include/asm-x86/numa.h xen/include/asm-x86/p2m.h xen/include/asm-x86/page.h xen/include/asm-x86/paging.h xen/include/asm-x86/processor.h xen/include/asm-x86/shadow.h xen/include/asm-x86/shared.h xen/include/asm-x86/smp.h xen/include/asm-x86/system.h xen/include/asm-x86/time.h xen/include/asm-x86/traps.h xen/include/public/arch-x86/hvm/save.h xen/include/public/domctl.h xen/include/public/grant_table.h xen/include/public/hvm/hvm_info_table.h xen/include/public/hvm/ioreq.h xen/include/public/io/ring.h xen/include/public/io/usbif.h xen/include/public/mem_event.h xen/include/public/memory.h xen/include/public/physdev.h xen/include/public/platform.h xen/include/public/sysctl.h xen/include/public/trace.h xen/include/public/vcpu.h xen/include/public/xen.h xen/include/public/xsm/flask_op.h xen/include/xen/compat.h xen/include/xen/config.h xen/include/xen/cpuidle.h xen/include/xen/ctype.h xen/include/xen/decompress.h xen/include/xen/hvm/iommu.h xen/include/xen/init.h xen/include/xen/iommu.h xen/include/xen/irq.h xen/include/xen/keyhandler.h xen/include/xen/mm.h xen/include/xen/nodemask.h xen/include/xen/pci.h xen/include/xen/pci_regs.h xen/include/xen/rangeset.h xen/include/xen/sched-if.h xen/include/xen/sched.h xen/include/xen/shared.h xen/include/xen/smp.h xen/include/xen/softirq.h xen/include/xen/timer.h xen/include/xen/tmem.h xen/include/xen/tmem_xen.h xen/kdb/include/kdbinc.h xen/kdb/kdb_cmds.c xen/kdb/kdbmain.c xen/kdb/x86/udis86-1.6/kdb_dis.c xen/xsm/flask/avc.c xen/xsm/flask/flask_op.c xen/xsm/flask/hooks.c xen/xsm/flask/include/av_perm_to_string.h xen/xsm/flask/include/av_permissions.h xen/xsm/flask/include/avc.h xen/xsm/flask/include/security.h xen/xsm/flask/ss/policydb.c xen/xsm/flask/ss/policydb.h xen/xsm/flask/ss/services.c
line diff
     1.1 --- a/.hgignore	Mon Nov 02 19:35:54 2009 -0800
     1.2 +++ b/.hgignore	Fri Mar 19 18:36:57 2010 -0700
     1.3 @@ -137,7 +137,8 @@
     1.4  ^tools/firmware/etherboot/gpxe/.*$
     1.5  ^tools/firmware/extboot/extboot.img$
     1.6  ^tools/firmware/extboot/signrom$
     1.7 -^tools/firmware/hvmloader/acpi/acpigen$
     1.8 +^tools/firmware/hvmloader/acpi/dsdt.*\.c$
     1.9 +^tools/firmware/hvmloader/acpi/ssdt_.*\.h$
    1.10  ^tools/firmware/hvmloader/hvmloader$
    1.11  ^tools/firmware/hvmloader/roms\.h$
    1.12  ^tools/firmware/rombios/BIOS-bochs-[^/]*$
    1.13 @@ -147,6 +148,9 @@
    1.14  ^tools/firmware/vgabios/vbetables-gen$
    1.15  ^tools/firmware/vgabios/vbetables\.h$
    1.16  ^tools/flask/loadpolicy/flask-loadpolicy$
    1.17 +^tools/flask/utils/flask-getenforce$
    1.18 +^tools/flask/utils/flask-loadpolicy$
    1.19 +^tools/flask/utils/flask-setenforce$
    1.20  ^tools/fs-back/fs-backend$
    1.21  ^tools/hotplug/common/hotplugpath\.sh$
    1.22  ^tools/include/xen/.*$
    1.23 @@ -176,6 +180,8 @@
    1.24  ^tools/libxen/libxenapi-
    1.25  ^tools/libxen/test/test_bindings$
    1.26  ^tools/libxen/test/test_event_handling$
    1.27 +^tools/libxl/libxlu_cfg_y\.output$
    1.28 +^tools/libxl/xl$
    1.29  ^tools/libaio/src/.*\.ol$
    1.30  ^tools/libaio/src/.*\.os$
    1.31  ^tools/misc/cpuperf/cpuperf-perfcntr$
    1.32 @@ -196,9 +202,12 @@
    1.33  ^tools/misc/xen-hvmctx$
    1.34  ^tools/misc/gtraceview$
    1.35  ^tools/misc/gtracestat$
    1.36 +^tools/misc/xenlockprof$
    1.37  ^tools/pygrub/build/.*$
    1.38  ^tools/python/build/.*$
    1.39  ^tools/python/xen/util/path\.py$
    1.40 +^tools/remus/imqebt/imqebt$
    1.41 +^tools/remus/kmod/.*(\.cmd|\.mod|\.ko|\.mod\.c|\.symvers|\.xen)$
    1.42  ^tools/security/secpol_tool$
    1.43  ^tools/security/xen/.*$
    1.44  ^tools/security/xensec_tool$
    1.45 @@ -230,6 +239,7 @@
    1.46  ^tools/xenfb/vncfb$
    1.47  ^tools/xenmon/xentrace_setmask$
    1.48  ^tools/xenmon/xenbaked$
    1.49 +^tools/xenpaging/xenpaging$
    1.50  ^tools/xenpmd/xenpmd$
    1.51  ^tools/xenstat/xentop/xentop$
    1.52  ^tools/xenstore/testsuite/tmp/.*$
     2.1 --- a/.hgtags	Mon Nov 02 19:35:54 2009 -0800
     2.2 +++ b/.hgtags	Fri Mar 19 18:36:57 2010 -0700
     2.3 @@ -36,3 +36,9 @@ 1e99ba54035623731bc7318a8357aa6a118c5da1
     2.4  d611d9ac6d0271b53eb1d4e5d0c4ef20b269eea8 3.4.0-rc1
     2.5  087854cf3ed9e30ce6bcf7499c9675b759e1e9e7 3.4.0-rc2
     2.6  22e01301ff64c6f9f835f604523ac019f0f5e993 3.4.0-rc3
     2.7 +67b5ad8ae87e64159807374ad66d5b5b2fb2ca1f 4.0.0-rc1
     2.8 +e5e4573bcaba68a8b93a35768c825c1e8daa23be 4.0.0-rc2
     2.9 +7d565d58f49859a2161f0e74844773d3b3312634 4.0.0-rc3
    2.10 +912295f1b1f30307975c7514569f6f9c8faae4a7 4.0.0-rc4
    2.11 +92f2ee87e5018073edc08734b425bc60bcd80bcd 4.0.0-rc5
    2.12 +b4a1832a916f1e8f2aa2ad5b1efa155f9dd0cb4f 4.0.0-rc6
     3.1 --- a/Config.mk	Mon Nov 02 19:35:54 2009 -0800
     3.2 +++ b/Config.mk	Fri Mar 19 18:36:57 2010 -0700
     3.3 @@ -1,7 +1,7 @@
     3.4  # -*- mode: Makefile; -*-
     3.5  
     3.6  # A debug build of Xen and tools?
     3.7 -debug ?= y
     3.8 +debug ?= n
     3.9  
    3.10  XEN_COMPILE_ARCH    ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
    3.11                           -e s/i86pc/x86_32/ -e s/amd64/x86_64/)
    3.12 @@ -35,6 +35,9 @@ EXTRA_INCLUDES += $(EXTRA_PREFIX)/includ
    3.13  EXTRA_LIB += $(EXTRA_PREFIX)/$(LIBLEAFDIR)
    3.14  endif
    3.15  
    3.16 +BISON	?= bison
    3.17 +FLEX	?= flex
    3.18 +
    3.19  PYTHON      ?= python
    3.20  PYTHON_PREFIX_ARG ?= --prefix="$(PREFIX)"
    3.21  # The above requires that PREFIX contains *no spaces*. This variable is here
    3.22 @@ -153,9 +156,10 @@ QEMU_REMOTE=http://xenbits.xensource.com
    3.23  # CONFIG_QEMU ?= ../qemu-xen.git
    3.24  CONFIG_QEMU ?= $(QEMU_REMOTE)
    3.25  
    3.26 -QEMU_TAG ?= 71324566f3b95bb88105659439adaef1d5bd155c
    3.27 -# Tue Oct 13 18:16:42 2009 +0100
    3.28 -# passthrough: support passthrough in stubdoms
    3.29 +QEMU_TAG := xen-4.0.0-rc6
    3.30 +#QEMU_TAG ?= e5d14857cd67490bf956d97c8888c0be95ed3f78
    3.31 +# Thu Feb 18 15:36:29 2010 +0000
    3.32 +# When xen_platform_pci=0 also disable fixed Xen platform ioports
    3.33  
    3.34  OCAML_XENSTORED_REPO=http://xenbits.xensource.com/ext/xen-ocaml-tools.hg
    3.35  
     4.1 --- a/README	Mon Nov 02 19:35:54 2009 -0800
     4.2 +++ b/README	Fri Mar 19 18:36:57 2010 -0700
     4.3 @@ -1,10 +1,10 @@
     4.4  #################################
     4.5 - __  __            _____ _  _   
     4.6 - \ \/ /___ _ __   |___ /| || |  
     4.7 -  \  // _ \ '_ \    |_ \| || |_ 
     4.8 -  /  \  __/ | | |  ___) |__   _|
     4.9 - /_/\_\___|_| |_| |____(_) |_|  
    4.10 -                                 
    4.11 + __  __            _  _    ___  
    4.12 + \ \/ /___ _ __   | || |  / _ \ 
    4.13 +  \  // _ \ '_ \  | || |_| | | |
    4.14 +  /  \  __/ | | | |__   _| |_| |
    4.15 + /_/\_\___|_| |_|    |_|(_)___/ 
    4.16 +
    4.17  #################################
    4.18  
    4.19  http://www.xen.org/
    4.20 @@ -17,14 +17,12 @@ Systems Research Group of the University
    4.21  Laboratory, as part of the UK-EPSRC funded XenoServers project.  Xen
    4.22  is freely-distributable Open Source software, released under the GNU
    4.23  GPL. Since its initial public release, Xen has grown a large
    4.24 -development community, spearheaded by XenSource Inc, a company created
    4.25 -by the original Xen development team to build enterprise products
    4.26 -around Xen.
    4.27 +development community, spearheaded by xen.org (http://www.xen.org).
    4.28  
    4.29 -The 3.4 release offers excellent performance, hardware support and
    4.30 +The 4.0 release offers excellent performance, hardware support and
    4.31  enterprise-grade features such as x86_32-PAE, x86_64, SMP guests and
    4.32 -live relocation of VMs. Ports to Linux 2.6, Linux 2.4, NetBSD, FreeBSD
    4.33 -and Solaris are available from the community.
    4.34 +live relocation of VMs. Ports to Linux, NetBSD, FreeBSD and Solaris
    4.35 +are available from the community.
    4.36  
    4.37  This file contains some quick-start instructions to install Xen on
    4.38  your system. For full documentation, see the Xen User Manual. If this
    4.39 @@ -33,53 +31,8 @@ is a pre-built release then you can find
    4.40  If you have a source release, then 'make -C docs' will build the
    4.41  manual at docs/pdf/user.pdf.
    4.42  
    4.43 -Quick-Start Guide - Pre-Built Binary Release
    4.44 -============================================
    4.45 -
    4.46 -[NB. Unless noted otherwise, all the following steps should be
    4.47 -performed with root privileges.]
    4.48 -
    4.49 -1. Install the binary distribution onto your filesystem:
    4.50 -
    4.51 -    # sh ./install.sh
    4.52 -
    4.53 -   Among other things, this will install Xen and Xen-ready Linux
    4.54 -   kernel files in /boot, kernel modules and Python packages in /lib,
    4.55 -   and various control tools in standard 'bin' directories.
    4.56 -
    4.57 -2. Configure your bootloader to boot Xen and an initial Linux virtual
    4.58 -   machine. Note that Xen currently only works with GRUB and pxelinux
    4.59 -   derived boot loaders: less common alternatives such as LILO are
    4.60 -   *not* supported. You can most likely find your GRUB menu file at
    4.61 -   /boot/grub/menu.lst: edit this file to include an entry like the
    4.62 -   following:
    4.63 -
    4.64 -    title Xen 3.4 / XenLinux 2.6
    4.65 -       kernel /boot/xen-3.4.gz console=vga
    4.66 -       module /boot/vmlinuz-2.6-xen root=<root-dev> ro console=tty0
    4.67 -       module /boot/initrd-2.6-xen.img
    4.68 -
    4.69 -   NB: Not all kernel configs need an initial ram disk (initrd), but
    4.70 -   if you do specify one you'll need to use the 'module' grub directive
    4.71 -   rather than 'initrd'.
    4.72 -
    4.73 -   The linux command line takes all the usual options, such as
    4.74 -   root=<root-dev> to specify your usual root partition (e.g.,
    4.75 -   /dev/hda1).
    4.76 -
    4.77 -   The Xen command line takes a number of optional arguments described
    4.78 -   in the manual. The most common is 'dom0_mem=xxxM' which sets the
    4.79 -   amount of memory to allocate for use by your initial virtual
    4.80 -   machine (known as domain 0). Note that Xen itself reserves about
    4.81 -   32MB memory for internal use, which is not available for allocation
    4.82 -   to virtual machines.
    4.83 -
    4.84 -3. Reboot your system and select the "Xen 3.4 / XenLinux 2.6" menu
    4.85 -   option. After booting Xen, Linux will start and your initialisation
    4.86 -   scripts should execute in the usual way.
    4.87 -
    4.88 -Quick-Start Guide - Source Release
    4.89 -==================================
    4.90 +Quick-Start Guide
    4.91 +=================
    4.92  
    4.93  First, there are a number of prerequisites for building a Xen source
    4.94  release. Make sure you have all the following installed, either by
    4.95 @@ -96,6 +49,7 @@ provided by your Linux distributor:
    4.96      * bridge-utils package (/sbin/brctl)
    4.97      * iproute package (/sbin/ip)
    4.98      * hotplug or udev
    4.99 +    * GNU bison and GNU flex
   4.100  
   4.101  [NB. Unless noted otherwise, all the following steps should be
   4.102  performed with root privileges.]
     5.1 --- a/buildconfigs/mk.linux-2.6-pvops	Mon Nov 02 19:35:54 2009 -0800
     5.2 +++ b/buildconfigs/mk.linux-2.6-pvops	Fri Mar 19 18:36:57 2010 -0700
     5.3 @@ -5,7 +5,11 @@ IMAGE_TARGET ?= bzImage
     5.4  
     5.5  XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config
     5.6  
     5.7 +ifeq ($(GIT_HTTP),y)
     5.8  XEN_LINUX_GIT_URL ?= http://www.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git
     5.9 +else
    5.10 +XEN_LINUX_GIT_URL ?= git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git
    5.11 +endif
    5.12  XEN_LINUX_GIT_REMOTEBRANCH ?= xen/master
    5.13  
    5.14  EXTRAVERSION ?=
     6.1 --- a/buildconfigs/mk.linux-2.6-tip-latest	Mon Nov 02 19:35:54 2009 -0800
     6.2 +++ b/buildconfigs/mk.linux-2.6-tip-latest	Fri Mar 19 18:36:57 2010 -0700
     6.3 @@ -5,7 +5,11 @@ IMAGE_TARGET ?= bzImage vmlinux
     6.4  
     6.5  XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config
     6.6  
     6.7 +ifeq ($(GIT_HTTP),y)
     6.8  XEN_LINUX_GIT_URL ?= http://www.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-tip.git
     6.9 +else
    6.10 +XEN_LINUX_GIT_URL ?= git://git.kernel.org/pub/scm/linux/kernel/git/x86/linux-2.6-tip.git
    6.11 +endif
    6.12  XEN_LINUX_GIT_REMOTEBRANCH ?= auto-latest
    6.13  
    6.14  EXTRAVERSION ?=
     7.1 --- a/config/NetBSD.mk	Mon Nov 02 19:35:54 2009 -0800
     7.2 +++ b/config/NetBSD.mk	Fri Mar 19 18:36:57 2010 -0700
     7.3 @@ -6,3 +6,5 @@ CURSES_LIBS = -lcurses
     7.4  LIBLEAFDIR_x86_64 = lib
     7.5  LIBEXEC = $(PREFIX)/libexec
     7.6  PRIVATE_BINDIR = $(BINDIR)
     7.7 +
     7.8 +WGET = ftp
     8.1 --- a/config/StdGNU.mk	Mon Nov 02 19:35:54 2009 -0800
     8.2 +++ b/config/StdGNU.mk	Fri Mar 19 18:36:57 2010 -0700
     8.3 @@ -16,6 +16,8 @@ MSGMERGE   = msgmerge
     8.4  # Allow git to be wrappered in the environment
     8.5  GIT        ?= git
     8.6  
     8.7 +WGET       ?= wget -c
     8.8 +
     8.9  INSTALL      = install
    8.10  INSTALL_DIR  = $(INSTALL) -d -m0755 -p
    8.11  INSTALL_DATA = $(INSTALL) -m0644 -p
     9.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     9.2 +++ b/docs/misc/cpuid-config-for-guest.txt	Fri Mar 19 18:36:57 2010 -0700
     9.3 @@ -0,0 +1,23 @@
     9.4 +CPUID emulation for guest
     9.5 +-------------------------
     9.6 +
     9.7 +When HVM guest tries to execute CPUID, or PV guest tries to execute XEN_CPUID,
     9.8 +the xen hypervior traps and emultes them.
     9.9 +
    9.10 +For HVM guest and PV DomU guest, xen's CPUID emulation can be adjusted using
    9.11 +the guest configation file if necessary (e.g., to supply better support for
    9.12 +guest live migration). The CPUID syntax in guest configration file is
    9.13 +described in detail in the examples like /etc/xen/xmexample.hvm,
    9.14 +/etc/xen/xmexample.hvm-stubdom.
    9.15 +
    9.16 +However, a user (or an administrator) must be aware that the CPUID in guest
    9.17 +configuration file can NOT be configured casually. The default CPUID
    9.18 +configuration should be safe, but illegal configuration can cause unexpected
    9.19 +behaviors of guest -- even can crash guest.
    9.20 +
    9.21 +For example, we should not expose the MONITOR CPUID feature flag (ECX bit 3;
    9.22 +CPUID executed EAX = 1) to HVM guest, otherwise, on guest's attempt of
    9.23 +executing MWAIT, the VMExit handler in Xen would inject #UD (Invalid Opcode
    9.24 +Exception) into the HVM guest, and guest kernel would panic.
    9.25 +
    9.26 +/* We can add more unsafe CPUID configuration here in future. */
    10.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    10.2 +++ b/docs/misc/pvrdtscp.c	Fri Mar 19 18:36:57 2010 -0700
    10.3 @@ -0,0 +1,307 @@
    10.4 +/* pvrdtscp algorithm
    10.5 + *
    10.6 + * This sample code demonstrates the use of the paravirtualized rdtscp
    10.7 + * algorithm.  Using this algorithm, an application may communicate with
    10.8 + * the Xen hypervisor (version 4.0+) to obtain timestamp information which
    10.9 + * is both monotonically increasing and has a fixed 1 GHz rate, even across
   10.10 + * migrations between machines with different TSC rates and offsets.
   10.11 + * Further,the algorithm provides performance near the performance of a
   10.12 + * native rdtsc/rdtscp instruction -- much faster than emulation PROVIDED
   10.13 + * the application is running on a machine on which the rdtscp instruction
   10.14 + * is supported and TSC is "safe". The application must also be running in a
   10.15 + * PV domain.  (HVM domains may be supported at a later time.) On machines
   10.16 + * where TSC is unsafe or the rdtscp instruction is not supported, Xen
   10.17 + * (v4.0+) provides emulation which is slower but consistent with the pvrdtscp
   10.18 + * algorithm, thus providing support for the algorithm for live migration
   10.19 + * across all machines.
   10.20 + *
   10.21 + * More information can be found within the Xen (4.0+) source tree at
   10.22 + *  docs/misc/tscmode.txt
   10.23 + *
   10.24 + * Copyright (c) 2009 Oracle Corporation and/or its affiliates.
   10.25 + * All rights reserved
   10.26 + * Written by: Dan Magenheimer <dan.magenheimer@oracle.com>
   10.27 + * 
   10.28 + * This code is derived from code licensed under the GNU
   10.29 + * General Public License ("GPL") version 2 and is therefore itself
   10.30 + * also licensed under the GPL version 2.
   10.31 + *
   10.32 + * This code is known to compile and run on Oracle Enterprise Linux 5 Update 2
   10.33 + * using gcc version 4.1.2, but its purpose is to describe the pvrdtscp
   10.34 + * algorithm and its ABI to Xen version 4.0+ 
   10.35 + */
   10.36 +
   10.37 +#include <stdio.h>
   10.38 +#include <stdlib.h>
   10.39 +#include <string.h>
   10.40 +#include <sys/wait.h>
   10.41 +
   10.42 +#ifdef __LP64__
   10.43 +#define __X86_64__
   10.44 +typedef unsigned short u16;
   10.45 +typedef unsigned int u32;
   10.46 +typedef unsigned long u64;
   10.47 +typedef int i32;
   10.48 +typedef long i64;
   10.49 +#define NSEC_PER_SEC 1000000000
   10.50 +#else
   10.51 +#define __X86_32__
   10.52 +typedef unsigned int u16;
   10.53 +typedef unsigned long u32;
   10.54 +typedef unsigned long long u64;
   10.55 +typedef long i32;
   10.56 +typedef long long i64;
   10.57 +#define NSEC_PER_SEC 1000000000L
   10.58 +#endif
   10.59 +
   10.60 +static inline void hvm_cpuid(u32 idx, u32 sub,
   10.61 +				u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
   10.62 +{
   10.63 +	*eax = idx, *ecx = sub;
   10.64 +	asm("cpuid" : "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx)
   10.65 +	    : "0" (*eax), "2" (*ecx));
   10.66 +}
   10.67 +
   10.68 +static inline void pv_cpuid(u32 idx, u32 sub,
   10.69 +				u32 *eax, u32 *ebx, u32 *ecx, u32 *edx)
   10.70 +{
   10.71 +	*eax = idx, *ecx = sub;
   10.72 +	asm volatile ( "ud2a ; .ascii \"xen\"; cpuid" : "=a" (*eax),
   10.73 +            "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (*eax), "2" (*ecx));
   10.74 +}
   10.75 +
   10.76 +static inline u64 do_rdtscp(u32 *aux)
   10.77 +{
   10.78 +static u64 last = 0;
   10.79 +	u32 lo32, hi32;
   10.80 +	u64 val;
   10.81 +
   10.82 +	asm volatile(".byte 0x0f,0x01,0xf9":"=a"(lo32),"=d"(hi32),"=c" (*aux));
   10.83 +	val = lo32 | ((u64)hi32 << 32);
   10.84 +	return val;
   10.85 +}
   10.86 +
   10.87 +static inline int get_xen_tsc_mode(void)
   10.88 +{
   10.89 +	u32 val, dummy1, dummy2, dummy3;
   10.90 +	pv_cpuid(0x40000003,0,&dummy1,&val,&dummy2,&dummy3);
   10.91 +	return val;
   10.92 +}
   10.93 +
   10.94 +static inline int get_xen_vtsc(void)
   10.95 +{
   10.96 +	u32 val, dummy1, dummy2, dummy3;
   10.97 +	pv_cpuid(0x40000003,0,&val,&dummy1,&dummy2,&dummy3);
   10.98 +	return val & 1;
   10.99 +}
  10.100 +
  10.101 +static inline int get_xen_vtsc_khz(void)
  10.102 +{
  10.103 +	u32 val, dummy1, dummy2, dummy3;
  10.104 +	pv_cpuid(0x40000003,0,&dummy1,&dummy2,&val,&dummy3);
  10.105 +	return val;
  10.106 +}
  10.107 +
  10.108 +static inline u32 get_xen_cpu_khz(void)
  10.109 +{
  10.110 +	u32 cpu_khz, dummy1, dummy2, dummy3;
  10.111 +	pv_cpuid(0x40000003,2,&cpu_khz,&dummy1,&dummy2,&dummy3);
  10.112 +	return cpu_khz;
  10.113 +}
  10.114 +
  10.115 +static inline u32 get_xen_incarnation(void)
  10.116 +{
  10.117 +	u32 incarn, dummy1, dummy2, dummy3;
  10.118 +	pv_cpuid(0x40000003,0,&dummy1,&dummy2,&dummy3,&incarn);
  10.119 +	return incarn;
  10.120 +}
  10.121 +
  10.122 +static inline void get_xen_time_values(u64 *offset, u32 *mul_frac, u32 *shift)
  10.123 +{
  10.124 +	u32 off_lo, off_hi, sys_lo, sys_hi, dummy;
  10.125 +
  10.126 +	pv_cpuid(0x40000003,1,&off_lo,&off_hi,mul_frac,shift);
  10.127 +	*offset = off_lo | ((u64)off_hi << 32);
  10.128 +}
  10.129 +
  10.130 +static inline u64 scale_delta(u64 delta, u32 tsc_mul_frac, i32 tsc_shift)
  10.131 +{
  10.132 +    u64 product;
  10.133 +#ifdef __X86_32__
  10.134 +    u32 tmp1, tmp2;
  10.135 +#endif
  10.136 +
  10.137 +    if ( tsc_shift < 0 )
  10.138 +        delta >>= -tsc_shift;
  10.139 +    else
  10.140 +        delta <<= tsc_shift;
  10.141 +
  10.142 +#ifdef __X86_32__
  10.143 +    asm (
  10.144 +        "mul  %5       ; "
  10.145 +        "mov  %4,%%eax ; "
  10.146 +        "mov  %%edx,%4 ; "
  10.147 +        "mul  %5       ; "
  10.148 +        "xor  %5,%5    ; "
  10.149 +        "add  %4,%%eax ; "
  10.150 +        "adc  %5,%%edx ; "
  10.151 +        : "=A" (product), "=r" (tmp1), "=r" (tmp2)
  10.152 +        : "a" ((u32)delta), "1" ((u32)(delta >> 32)), "2" (tsc_mul_frac) );
  10.153 +#else
  10.154 +    asm (
  10.155 +        "mul %%rdx ; shrd $32,%%rdx,%%rax"
  10.156 +        : "=a" (product) : "0" (delta), "d" ((u64)tsc_mul_frac) );
  10.157 +#endif
  10.158 +
  10.159 +    return product;
  10.160 +}
  10.161 +
  10.162 +static inline u64 get_pvrdtscp_timestamp(int *discontinuity)
  10.163 +{
  10.164 +	static int firsttime = 1;
  10.165 +	static u64 last_pvrdtscp_timestamp = 0;
  10.166 +	static u32 last_tsc_aux;
  10.167 +	static u64 xen_ns_offset;
  10.168 +	static u32 xen_tsc_to_ns_mul_frac, xen_tsc_to_ns_shift;
  10.169 +	u32 this_tsc_aux;
  10.170 +	u64 timestamp, cur_tsc, cur_ns;
  10.171 +
  10.172 +	if (firsttime) {
  10.173 +		cur_tsc = do_rdtscp(&last_tsc_aux);
  10.174 +		get_xen_time_values(&xen_ns_offset, &xen_tsc_to_ns_mul_frac,
  10.175 +					&xen_tsc_to_ns_shift);
  10.176 +		cur_ns = scale_delta(cur_tsc, xen_tsc_to_ns_mul_frac,
  10.177 +					xen_tsc_to_ns_shift);
  10.178 +		timestamp = cur_ns - xen_ns_offset;
  10.179 +		last_pvrdtscp_timestamp = timestamp;
  10.180 +		firsttime = 0;
  10.181 +	}
  10.182 +	cur_tsc = do_rdtscp(&this_tsc_aux);
  10.183 +	*discontinuity = 0;
  10.184 +	while (this_tsc_aux != last_tsc_aux) {
  10.185 +		/* if tsc_aux changed, try again */
  10.186 +		last_tsc_aux = this_tsc_aux;
  10.187 +		get_xen_time_values(&xen_ns_offset, &xen_tsc_to_ns_mul_frac,
  10.188 +					&xen_tsc_to_ns_shift);
  10.189 +		cur_tsc = do_rdtscp(&this_tsc_aux);
  10.190 +		*discontinuity = 1;
  10.191 +	}
  10.192 +
  10.193 +	/* compute nsec from TSC and Xen time values */
  10.194 +	cur_ns = scale_delta(cur_tsc, xen_tsc_to_ns_mul_frac,
  10.195 +					xen_tsc_to_ns_shift);
  10.196 +	timestamp = cur_ns - xen_ns_offset;
  10.197 +
  10.198 +	/* enforce monotonicity just in case */
  10.199 +	if ((i64)(timestamp - last_pvrdtscp_timestamp) > 0)
  10.200 +		last_pvrdtscp_timestamp = timestamp;
  10.201 +	else {
  10.202 +		/* this should never happen but we'll check it anyway in
  10.203 +		 * case of some strange combination of scaling errors
  10.204 +		 * occurs across a very fast migration */
  10.205 +		printf("Time went backwards by %lluns\n",
  10.206 +		    (unsigned long long)(last_pvrdtscp_timestamp-timestamp));
  10.207 +		timestamp = ++last_pvrdtscp_timestamp;
  10.208 +	}
  10.209 +	return timestamp;
  10.210 +}
  10.211 +
  10.212 +#define HVM 1
  10.213 +#define PVM 0
  10.214 +
  10.215 +static int running_on_xen(int hvm, u16 *version_major, u16 *version_minor)
  10.216 +{
  10.217 +	u32 eax, ebx, ecx, edx, base;
  10.218 +	union { char csig[16]; u32 u[4]; } sig;
  10.219 +
  10.220 +	for (base=0x40000000; base < 0x40010000; base += 0x100) {
  10.221 +		if (hvm==HVM)
  10.222 +			hvm_cpuid(base,0,&eax,&ebx,&ecx,&edx);
  10.223 +		else
  10.224 +			pv_cpuid(base,0,&eax,&ebx,&ecx,&edx);
  10.225 +		sig.u[0] = ebx; sig.u[1] = ecx; sig.u[2] = edx;
  10.226 +		sig.csig[12] = '\0';
  10.227 +		if (!strcmp("XenVMMXenVMM",&sig.csig[0]) && (eax >= (base+2))) {
  10.228 +				if (hvm==HVM)
  10.229 +					hvm_cpuid(base+1,0,&eax,&ebx,&ecx,&edx);
  10.230 +				else
  10.231 +					pv_cpuid(base+1,0,&eax,&ebx,&ecx,&edx);
  10.232 +				*version_major = (eax >> 16) & 0xffff;
  10.233 +				*version_minor = eax & 0xffff;
  10.234 +				return 1;
  10.235 +		}
  10.236 +	}
  10.237 +	return 0;
  10.238 +}
  10.239 +
  10.240 +main(int ac, char **av)
  10.241 +{
  10.242 +	u32 dummy;
  10.243 +	u16 version_hi, version_lo;
  10.244 +	u64 ts, last_ts;
  10.245 +	int status, discontinuity = 0;
  10.246 +	pid_t pid;
  10.247 +
  10.248 +	if (running_on_xen(HVM,&version_hi,&version_lo)) {
  10.249 +		printf("running on Xen v%d.%d as an HVM domain, "
  10.250 +			"pvrdtsc not supported, exiting\n",
  10.251 +			(int)version_hi, (int)version_lo);
  10.252 +		exit(0);
  10.253 +	}
  10.254 +	pid = fork();
  10.255 +	if (pid == -1) {
  10.256 +		fprintf(stderr,"Huh? Fork failed\n");
  10.257 +		return 0;
  10.258 +	}
  10.259 +	else if (pid == 0) { /* child */
  10.260 +		pv_cpuid(0x40000000,0,&dummy,&dummy,&dummy,&dummy);
  10.261 +		exit(0);
  10.262 +	}
  10.263 +	waitpid(pid,&status,0);
  10.264 +	if (!WIFEXITED(status))
  10.265 +		exit(0);
  10.266 +	if (!running_on_xen(PVM,&version_hi,&version_lo)) {
  10.267 +		printf("not running on Xen, exiting\n");
  10.268 +		exit(0);
  10.269 +	}
  10.270 +	printf("running on Xen v%d.%d as a PV domain\n",
  10.271 +		(int)version_hi, (int)version_lo);
  10.272 +	if ( version_hi <= 3 ) {
  10.273 +		printf("pvrdtscp requires Xen version 4.0 or greater\n");
  10.274 +		/* exit(0); FIXME after xen-unstable is officially v4.0 */
  10.275 +	}
  10.276 +	if ( get_xen_tsc_mode() != 3 )
  10.277 +		printf("tsc_mode not pvrdtscp, set tsc_mode=3, exiting\n");
  10.278 +
  10.279 +	/* OK, we are on Xen, now loop forever checking timestamps */
  10.280 +	ts = get_pvrdtscp_timestamp(&discontinuity);
  10.281 +	printf("Starting with ts=%lluns 0x%llx (%llusec)\n",ts,ts,ts/NSEC_PER_SEC);
  10.282 +	printf("incarn=%d: vtsc=%d, vtsc_khz=%lu, phys cpu_khz=%lu\n",
  10.283 +				(unsigned long)get_xen_incarnation(),
  10.284 +				(unsigned long)get_xen_vtsc(),
  10.285 +				(unsigned long)get_xen_vtsc_khz(),
  10.286 +				(unsigned long)get_xen_cpu_khz());
  10.287 +	ts = get_pvrdtscp_timestamp(&discontinuity);
  10.288 +	last_ts = ts;
  10.289 +	while (1) {
  10.290 +		ts = get_pvrdtscp_timestamp(&discontinuity);
  10.291 +		if (discontinuity)
  10.292 +			printf("migrated/restored, incarn=%d: "
  10.293 +                               "vtsc now %d, vtsc_khz=%lu, phys cpu_khz=%lu\n",
  10.294 +				(unsigned long)get_xen_incarnation(),
  10.295 +				(unsigned long)get_xen_vtsc(),
  10.296 +				(unsigned long)get_xen_vtsc_khz(),
  10.297 +				(unsigned long)get_xen_cpu_khz());
  10.298 +		if (ts < last_ts)
  10.299 +			/* this should NEVER happen, especially since there
  10.300 +			 * is a check for it in get_pvrdtscp_timestamp() */
  10.301 +			printf("Time went backwards: %lluns (%llusec)\n",
  10.302 +				last_ts-ts,(last_ts-ts)/NSEC_PER_SEC);
  10.303 +		if (ts > last_ts + 200000000LL)
  10.304 +			/* this is OK, usually about 2sec for save/restore
  10.305 +			 * and a fraction of a second for live migrate */
  10.306 +			printf("Time jumped forward %lluns (%llusec)\n",
  10.307 +				ts-last_ts,(ts-last_ts)/NSEC_PER_SEC);
  10.308 +		last_ts = ts;
  10.309 +	}
  10.310 +}
    11.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    11.2 +++ b/docs/misc/tscmode.txt	Fri Mar 19 18:36:57 2010 -0700
    11.3 @@ -0,0 +1,299 @@
    11.4 +TSC_MODE HOW-TO
    11.5 +by: Dan Magenheimer <dan.magenheimer@oracle.com>
    11.6 +
    11.7 +OVERVIEW
    11.8 +
    11.9 +As of Xen 4.0, a new config option called tsc_mode may be specified
   11.10 +for each domain.  The default for tsc_mode handles the vast majority
   11.11 +of hardware and software environments.  This document is targeted
   11.12 +for Xen users and administrators that may need to select a non-default
   11.13 +tsc_mode.
   11.14 +
   11.15 +Proper selection of tsc_mode depends on an understanding not only of
   11.16 +the guest operating system (OS), but also of the application set that will
   11.17 +ever run on this guest OS.  This is because tsc_mode applies
   11.18 +equally to both the OS and ALL apps that are running on this
   11.19 +domain, now or in the future.
   11.20 +
   11.21 +Key questions to be answered for the OS and/or each application are:
   11.22 +- Does the OS/app use the rdtsc instruction at all?  (We will explain below
   11.23 +  how to determine this.)
   11.24 +- At what frequency is the rdtsc instruction executed by either the OS
   11.25 +  or any running apps?  If the sum exceeds about 10,000 rdtsc instructions
   11.26 +  per second per processor, we call this a "high-TSC-frequency"
   11.27 +  OS/app/environment.  (This is relatively rare, and developers of OS's
   11.28 +  and apps that are high-TSC-frequency are usually aware of it.)
   11.29 +- If the OS/app does use rdtsc, will it behave incorrectly if "time goes
   11.30 +  backwards" or if the frequency of the TSC suddenly changes?  If so,
   11.31 +  we call this a "TSC-sensitive" app or OS; otherwise it is "TSC-resilient".
   11.32 +
   11.33 +This last is the US$64,000 question as it may be very difficult
   11.34 +(or, for legacy apps, even impossible) to predict all possible
   11.35 +failure cases.  As a result, unless proven otherwise, any app
   11.36 +that uses rdtsc must be assumed to be TSC-sensitive and, as we
   11.37 +will see, this is the default starting in Xen 4.0.
   11.38 +
   11.39 +Xen's new tsc_mode parameter determines the circumstances under which
   11.40 +the family of rdtsc instructions are executed "natively" vs emulated.
   11.41 +Roughly speaking, native means rdtsc is fast but TSC-sensitive apps
   11.42 +may, under unpredictable circumstances, run incorrectly; emulated means
   11.43 +there is some performance degradation (unobservable in most cases),
   11.44 +but TSC-sensitive apps will always run correctly.  Prior to Xen 4.0,
   11.45 +all rdtsc instructions were native: "fast but potentially incorrect."
   11.46 +Starting at Xen 4.0, the default is that all rdtsc instructions are
   11.47 +"correct but potentially slow".  The tsc_mode parameter in 4.0 provides
   11.48 +an intelligent default but allows system administrator's to adjust
   11.49 +how rdtsc instructions are executed differently for different domains.
   11.50 +
   11.51 +The non-default choices for tsc_mode are:
   11.52 +- tsc_mode=1 (always emulate). All rdtsc instructions are emulated;
   11.53 +   this is the best choice when TSC-sensitive apps are running and
   11.54 +   it is necessary to understand worst-case performance degradation
   11.55 +   for a specific hardware environment.
   11.56 +- tsc_mode=2 (never emulate).  This is the same as prior to Xen 4.0
   11.57 +   and is the best choice if it is certain that all apps running in
   11.58 +   this VM are TSC-resilient and highest performance is required.
   11.59 +- tsc_mode=3 (PVRDTSCP).  High-TSC-frequency apps may be paravirtualized
   11.60 +   (modified) to obtain both correctness and highest performance; any
   11.61 +   unmodified apps must be TSC-resilient.
   11.62 +
   11.63 +If tsc_mode is left unspecified (or set to tsc_mode=0), a hybrid
   11.64 +algorithm is utilized to ensure correctness while providing the
   11.65 +best performance possible given:
   11.66 +- the requirement of correctness,
   11.67 +- the underlying hardware, and
   11.68 +- whether or not the VM has been saved/restored/migrated
   11.69 +To understand this in more detail, the rest of this document must
   11.70 +be read.
   11.71 +
   11.72 +DETERMINING RDTSC FREQUENCY
   11.73 +
   11.74 +To determine the frequency of rdtsc instructions that are emulated,
   11.75 +an "xm" command can be used by a privileged user of domain0.  The
   11.76 +command:
   11.77 +
   11.78 +# xm debug-key s; xm dmesg | tail
   11.79 +
   11.80 +provides information about TSC usage in each domain where TSC
   11.81 +emulation is currently enabled.
   11.82 +
   11.83 +TSC HISTORY
   11.84 +
   11.85 +To understand tsc_mode completely, some background on TSC is required:
   11.86 +
   11.87 +The x86 "timestamp counter", or TSC, is a 64-bit register on each
   11.88 +processor that increases monotonically.  Historically, TSC incremented
   11.89 +every processor cycle, but on recent processors, it increases
   11.90 +at a constant rate even if the processor changes frequency (for example,
   11.91 +to reduce processor power usage).  TSC is known by x86 programmers
   11.92 +as the fastest, highest-precision measurement of the passage of time
   11.93 +so it is often used as a foundation for performance monitoring.
   11.94 +And since it is guaranteed to be monotonically increasing and, at
   11.95 +64 bits, is guaranteed to not wraparound within 10 years, it is
   11.96 +sometimes used as a random number or a unique sequence identifier,
   11.97 +such as to stamp transactions so they can be replayed in a specific
   11.98 +order.
   11.99 +
  11.100 +On most older SMP and early multi-core machines, TSC was not synchronized
  11.101 +between processors.  Thus if an application were to read the TSC on
  11.102 +one processor, then was moved by the OS to another processor, then read
  11.103 +TSC again, it might appear that "time went backwards".  This loss of
  11.104 +monotonicity resulted in many obscure application bugs when TSC-sensitive
  11.105 +apps were ported from a uniprocessor to an SMP environment; as a result,
  11.106 +many applications -- especially in the Windows world -- removed their
  11.107 +dependency on TSC and replaced their timestamp needs with OS-specific
  11.108 +functions, losing both performance and precision. On some more recent
  11.109 +generations of multi-core machines, especially multi-socket multi-core
  11.110 +machines, the TSC was synchronized but if one processor were to enter
  11.111 +certain low-power states, its TSC would stop, destroying the synchrony
  11.112 +and again causing obscure bugs.  This reinforced decisions to avoid use
  11.113 +of TSC altogether.  On the most recent generations of multi-core
  11.114 +machines, however, synchronization is provided across all processors
  11.115 +in all power states, even on multi-socket machines, and provide a
  11.116 +flag that indicates that TSC is synchronized and "invariant".  Thus
  11.117 +TSC is once again useful for applications, and even newer operating
  11.118 +systems are using and depending upon TSC for critical timekeeping
  11.119 +tasks when running on these recent machines.
  11.120 +
  11.121 +We will refer to hardware that ensures TSC is both synchronized and
  11.122 +invariant as "TSC-safe" and any hardware on which TSC is not (or
  11.123 +may not remain) synchronized as "TSC-unsafe".
  11.124 +
  11.125 +As a result of TSC's sordid history, two classes of applications use
  11.126 +TSC: old applications designed for single processors, and the most recent
  11.127 +enteprise applications which require high-frequency high-precision
  11.128 +timestamping.
  11.129 +
  11.130 +We will refer to apps that might break if running on a TSC-unsafe
  11.131 +machine as "TSC-sensitive"; apps that don't use TSC, or do use
  11.132 +TSC but use it in a way that monotonicity and frequency invariance
  11.133 +are unimportant as "TSC-resilient".
  11.134 +
  11.135 +The emergence of virtualization once again complicates the usage of
  11.136 +TSC.  When features such as save/restore or live migration are employed,
  11.137 +a guest OS and all its currently running applications may be invisibly
  11.138 +transported to an entirely different physical machine.  While TSC
  11.139 +may be "safe" on one machine, it is essentially impossible to precisely
  11.140 +synchronize TSC across a data center or even a pool of machines.  As
  11.141 +a result, when run in a virtualized environment, rare and obscure
  11.142 +"time going backwards" problems might once again occur for those
  11.143 +TSC-sensitive applications.  Worse, if a guest OS moves from, for
  11.144 +example, a 3GHz
  11.145 +machine to a 1.5GHz machine, attempts by an OS/app to measure time
  11.146 +intervals with TSC may without notice be incorrect by a factor of two.
  11.147 +
  11.148 +The rdtsc (read timestamp counter) instruction is used to read the
  11.149 +TSC register.  The rdtscp instruction is a variant of rdtsc on recent
  11.150 +processors.  We refer to these together as the rdtsc family of instructions,
  11.151 +or just "rdtsc".  Instructions in the rdtsc family are non-privileged, but
  11.152 +privileged software may set a cpuid bit to cause all rdtsc family
  11.153 +instructions to trap.  This trap can be detected by Xen, which can
  11.154 +then transparently "emulate" the results of the rdtsc instruction and
  11.155 +return control to the code following the rdtsc instruction.
  11.156 +
  11.157 +To provide a "safe" TSC, i.e. to ensure both TSC monontonicity and a
  11.158 +fixed rate, Xen provides rdtsc emulation whenever necessary or when
  11.159 +explicitly specified by a per-VM configuration option.  TSC emulation is
  11.160 +relatively slow -- roughly 15-20 times slower than the rdtsc instruction
  11.161 +when executed natively.  However, except when an OS or application uses
  11.162 +the rdtsc instruction at a high frequency (e.g. more than about 10,000 times
  11.163 +per second per processor), this performance degradation is not noticable
  11.164 +(i.e. <0.3%).  And, TSC emulation is nearly always faster than
  11.165 +OS-provided alternatives (e.g. Linux's gettimeofday).  For environments
  11.166 +where it is certain that all apps are TSC-resilient (e.g.
  11.167 +"TSC-safeness" is not necessary) and highest performance is a
  11.168 +requirement, TSC emulation may be entirely disabled (tsc_mode==2).
  11.169 +
  11.170 +The default mode (tsc_mode==0) checks TSC-safeness of the underlying
  11.171 +hardware on which the virtual machine is launched.  If it is
  11.172 +TSC-safe, rdtsc will execute at hardware speed; if it is not, rdtsc
  11.173 +will be emulated.  Once a virtual machine is save/restored or migrated,
  11.174 +however, there are two possibilities:  For a paravirtualized (PV) domain,
  11.175 +TSC will always be emulated.  For a fully-virtualized (HVM) domain,
  11.176 +TSC remains native IF the source physical machine and target physical machine
  11.177 +have the same TSC frequency; else TSC is emulated.  Note that, though
  11.178 +emulated, the "apparent" TSC frequency will be the TSC frequency
  11.179 +of the initial physical machine, even after migration.
  11.180 +
  11.181 +For environments where both TSC-safeness AND highest performance
  11.182 +even across migration is a requirement, application code can be specially
  11.183 +modified to use an algorithm explicitly designed into Xen for this purpose.
  11.184 +This mode (tsc_mode==3) is called PVRDTSCP, because it requires
  11.185 +app paravirtualization (awareness by the app that it may be running
  11.186 +on top of Xen), and utilizes a variation of the rdtsc instruction
  11.187 +called rdtscp that is available on most recent generation processors.
  11.188 +(The rdtscp instruction differs from the rdtsc instruction in that it
  11.189 +reads not only the TSC but an additional register set by system software.)
  11.190 +When a pvrdtscp-modified app is running on a processor that is both TSC-safe
  11.191 +and supports the rdtscp instruction, information can be obtained
  11.192 +about migration and TSC frequency/offset adjustment to allow the
  11.193 +vast majority of timestamps to be obtained at top performance; when
  11.194 +running on a TSC-unsafe processor or a processor that doesn't support
  11.195 +the rdtscp instruction, rdtscp is emulated.
  11.196 +
  11.197 +PVRDTSCP (tsc_mode==3) has two limitations.  First, it applies to
  11.198 +all apps running in this virtual machine.  This means that all
  11.199 +apps must either be TSC-resilient or pvrdtscp-modified.  Second,
  11.200 +highest performance is only obtained on TSC-safe machines that
  11.201 +support the rdtscp instruction; when running on older machines,
  11.202 +rdtscp is emulated and thus slower.  For more information on PVRTSCP,
  11.203 +see below.
  11.204 +
  11.205 +Finally, tsc_mode==1 always enables TSC emulation, regardless of
  11.206 +the underlying physical hardware. The "apparent" TSC frequency will
  11.207 +be the TSC frequency of the initial physical machine, even after migration.
  11.208 +This mode is useful to measure any performance degradation that
  11.209 +might be encountered by a tsc_mode==0 domain after migration occurs,
  11.210 +or a tsc_mode==3 domain when it is running on TSC-unsafe hardware.
  11.211 +
  11.212 +Note that while Xen ensures that an emulated TSC is "safe" across migration,
  11.213 +it does not ensure that it continues to tick at the same rate during
  11.214 +the actual migration.  As an oversimplified example, if TSC is ticking
  11.215 +once per second in a guest, and the guest is saved when the TSC is 1000,
  11.216 +then restored 30 seconds later, TSC is only guaranteed to be greater
  11.217 +than or equal to 1001, not precisely 1030.  This has some OS implications
  11.218 +as will be seen in the next section.
  11.219 +
  11.220 +TSC INVARIANT BIT and NO_MIGRATE
  11.221 +
  11.222 +Related to TSC emulation, the "TSC Invariant" bit is architecturally defined
  11.223 +in a cpuid bit on the most recent x86 processors.  If set, TSC invariance
  11.224 +ensures that the TSC is "safe", that is it will increment at a constant rate
  11.225 +regardless of power events, will be synchronized across all processors, and
  11.226 +was properly initialized to zero on all processors at boot-time
  11.227 +by system hardware/BIOS.  As long as system software never writes to TSC,
  11.228 +TSC will be safe and continuously incremented at a fixed rate and thus
  11.229 +can be used as a system "clocksource".
  11.230 +
  11.231 +This bit is used by some OS's, and specifically by Linux starting with
  11.232 +version 2.6.30(?), to select TSC as a system clocksource.  Once selected,
  11.233 +TSC remains the Linux system clocksource unless manually overridden.  In
  11.234 +a virtualized environment, since it is not possible to synchronize TSC
  11.235 +across all the machines in a pool or data center, a migration may "break"
  11.236 +TSC as a usable clocksource; while time will not go backwards, it may
  11.237 +not track wallclock time well enough to avoid certain time-sensitive
  11.238 +consequences.  As a result, Xen can only expose the TSC Invariant bit
  11.239 +to a guest OS if it is certain that the domain will never migrate.
  11.240 +As of Xen 4.0, the "no_migrate=1" VM configuration option may be specified
  11.241 +to disable migration.  If no_migrate is selected and the VM is running
  11.242 +on a physical machine with "TSC Invariant", Linux 2.6.30+ will safely
  11.243 +use TSC as the system clocksource.  But, attempts to migrate or, once
  11.244 +saved, restore this domain will fail.
  11.245 +
  11.246 +There is another cpuid-related complication: The x86 cpuid instruction is
  11.247 +non-privileged.  HVM domains are configured to always trap this instruction
  11.248 +to Xen, where Xen can "filter" the result.  In a PV OS, all cpuid instructions
  11.249 +have been replaced by a parvirtualized equivalent of the cpuid instruction
  11.250 +("pvcpuid") and also trap to Xen.  But apps in a PV guest that use a
  11.251 +cpuid instruction execute it directly, without a trap to Xen.  As a result,
  11.252 +an app may directly examine the physical TSC Invariant cpuid bit and make
  11.253 +decisions based on that bit.  This is still an unsolved problem, though
  11.254 +a workaround exists as part of the PVRDTSCP tsc_mode for apps that
  11.255 +can be modified.
  11.256 +
  11.257 +MORE ON PVRDTSCP
  11.258 +
  11.259 +Paravirtualized OS's use the "pvclock" algorithm to manage the passing
  11.260 +of time.  This sophisticated algorithm obtains information from a memory
  11.261 +page shared between Xen and the OS and selects information from this
  11.262 +page based on the current virtual CPU (vcpu) in order to properly adapt to
  11.263 +TSC-unsafe systems and changes that occur across migration.  Neither
  11.264 +this shared page nor the vcpu information is available to a userland
  11.265 +app so the pvclock algorithm cannot be directly used by an app, at least
  11.266 +without performance degradation roughly equal to the cost of just
  11.267 +emulating an rdtsc.
  11.268 +
  11.269 +As a result, as of 4.0, Xen provides capabilities for a userland app
  11.270 +to obtain key time values similar to the information accessible
  11.271 +to the PV OS pvclock algorithm.  The app uses the rdtscp instruction
  11.272 +which is defined in recent processors to obtain both the TSC and an
  11.273 +auxiliary value called TSC_AUX.  Xen is responsible for setting TSC_AUX
  11.274 +to the same value on all vcpus running any domain with tsc_mode==3;
  11.275 +further, Xen tools are responsible for monotonically incrementing TSC_AUX
  11.276 +anytime the domain is restored/migrated (thus changing key time values);
  11.277 +and, when the domain is running on a physical machine that either
  11.278 +is not TSC-safe or does not support the rdtscp instruction, Xen
  11.279 +is responsible for emulating the rdtscp instruction and for setting
  11.280 +TSC_AUX to zero on all processors.
  11.281 +
  11.282 +Xen also provides pvclock information via a "pvcpuid" instruction.
  11.283 +While this results in a slow trap, the information changes
  11.284 +(and thus must be reobtained via pvcpuid) ONLY when TSC_AUX
  11.285 +has changed, which should be very rare relative to a high
  11.286 +frequency of rdtscp instructions.
  11.287 +
  11.288 +Finally, Xen provides additional time-related information via
  11.289 +other pvcpuid instructions.  First, an app is capable of
  11.290 +determining if it is currently running on Xen, next whether
  11.291 +the tsc_mode setting of the domain in which it is running,
  11.292 +and finally whether the underlying hardware is TSC-safe and
  11.293 +supports the rdtscp instruction.
  11.294 +
  11.295 +As a result, a pvrdtscp-modified app has sufficient information
  11.296 +to compute the pvclock "elapsed nanoseconds" which can
  11.297 +be used as a timestamp.  And this can be done nearly as
  11.298 +fast as a native rdtsc instruction, much faster than emulation,
  11.299 +and also much faster than nearly all OS-provided time mechanisms.
  11.300 +While pvrtscp is too complex for most apps, certain enterprise
  11.301 +TSC-sensitive high-TSC-frequency apps may find it useful to
  11.302 +obtain a significant performance gain.
    12.1 --- a/docs/misc/xsm-flask.txt	Mon Nov 02 19:35:54 2009 -0800
    12.2 +++ b/docs/misc/xsm-flask.txt	Fri Mar 19 18:36:57 2010 -0700
    12.3 @@ -168,6 +168,70 @@ adding them to xen.te, although manual r
    12.4  often lead to adding parameterized rules to the interfaces in xen.if 
    12.5  to address the general case.
    12.6  
    12.7 +Device Policy
    12.8 +-------------
    12.9 +
   12.10 +Flask is capable of labeling devices and enforcing policies associated with
   12.11 +them.  To enable this functionality the latest version of checkpolicy
   12.12 +(>= 2.0.20) and libsepol (>=2.0.39) will be needed in order to compile it.  To
   12.13 +enable the building of the new policies the following changes will need to be
   12.14 +done to tools/flask/policy/Makefile.
   12.15 +
   12.16 +########################################
   12.17 +#
   12.18 +# Build a binary policy locally
   12.19 +#
   12.20 +$(POLVER): policy.conf
   12.21 +        @echo "Compiling $(NAME) $(POLVER)"
   12.22 +       $(QUIET) $(CHECKPOLICY) $^ -o $@            (Comment out this line)
   12.23 +# Uncomment line below to enable policies for devices
   12.24 +#        $(QUIET) $(CHECKPOLICY) -t Xen $^ -o $@   (Uncomment this line)
   12.25 +
   12.26 +########################################
   12.27 +#
   12.28 +# Install a binary policy
   12.29 +#
   12.30 +$(LOADPATH): policy.conf
   12.31 +        @echo "Compiling and installing $(NAME) $(LOADPATH)"
   12.32 +       $(QUIET) $(CHECKPOLICY) $^ -o $@            (Comment out this line)
   12.33 +# Uncomment line below to enable policies for devices
   12.34 +#        $(QUIET) $(CHECKPOLICY) -t Xen $^ -o $@   (Uncomment this line)
   12.35 +
   12.36 +
   12.37 +Pirqs, PCI devices, I/O memory and ports can all be labeled.  There are
   12.38 +commented out lines in xen.te policy for examples on how to label devices.
   12.39 +
   12.40 +Device Labeling
   12.41 +---------------
   12.42 +
   12.43 +The "lspci -vvn" command can be used to output all the devices and identifiers
   12.44 +associated with them.  For example, to label an Intel e1000e ethernet card the
   12.45 +lspci output is..
   12.46 +
   12.47 +00:19.0 0200: 8086:10de (rev 02)
   12.48 +        Subsystem: 1028:0276
   12.49 +        Interrupt: pin A routed to IRQ 33
   12.50 +        Region 0: Memory at febe0000 (32-bit, non-prefetchable) [size=128K]
   12.51 +        Region 1: Memory at febd9000 (32-bit, non-prefetchable) [size=4K]
   12.52 +        Region 2: I/O ports at ecc0 [size=32]
   12.53 +        Kernel modules: e1000e
   12.54 +
   12.55 +The labeling can be done with these commands
   12.56 +
   12.57 +pirqcon 33 system_u:object_r:nicP_t
   12.58 +iomemcon 0xfebe0-0xfebff system_u:object_r:nicP_t
   12.59 +iomemcon 0xfebd9 system_u:object_r:nicP_t
   12.60 +ioportcon 0xecc0-0xecdf system_u:object_r:nicP_t
   12.61 +pcidevicecon 0xc800 system_u:object_r:nicP_t
   12.62 +
   12.63 +Labeling of the PCI device is tricky since there is no output in lspci that
   12.64 +makes the information easily available.  The easiest way to obtain the
   12.65 +information is to look at the avc denial line for the correct hex value.
   12.66 +
   12.67 +(XEN) avc:  denied  { add_device } for domid=0 device=0xc800 <---
   12.68 +scontext=system_u:system_r:dom0_t tcontext=system_u:object_r:device_t
   12.69 +tclass=resource
   12.70 +
   12.71  Additional notes on XSM:FLASK
   12.72  -----------------------------
   12.73  
    13.1 --- a/docs/xen-api/revision-history.tex	Mon Nov 02 19:35:54 2009 -0800
    13.2 +++ b/docs/xen-api/revision-history.tex	Fri Mar 19 18:36:57 2010 -0700
    13.3 @@ -44,6 +44,12 @@
    13.4       Added description for \texttt{PV/kernel} and \texttt{PV/ramdisk}
    13.5       parameters using URIs.\tabularnewline
    13.6    \hline
    13.7 +  1.0.9 & 20th Nov. 09 & M. Kanno &
    13.8 +     Added definitions of new classes DSCSI\_HBA and PSCSI\_HBA.
    13.9 +     Updated the table and the diagram representing relationships
   13.10 +     between classes. Added host.PSCSI\_HBAs and VM.DSCSI\_HBAs
   13.11 +     fields.\tabularnewline
   13.12 +  \hline
   13.13   \end{tabular}
   13.14  \end{center}
   13.15  \end{flushleft}
    14.1 --- a/docs/xen-api/xenapi-coversheet.tex	Mon Nov 02 19:35:54 2009 -0800
    14.2 +++ b/docs/xen-api/xenapi-coversheet.tex	Fri Mar 19 18:36:57 2010 -0700
    14.3 @@ -17,12 +17,12 @@
    14.4  \newcommand{\coversheetlogo}{xen.eps}
    14.5  
    14.6  %% Document date
    14.7 -\newcommand{\datestring}{17th June 2009}
    14.8 +\newcommand{\datestring}{20th November 2009}
    14.9  
   14.10  \newcommand{\releasestatement}{Stable Release}
   14.11  
   14.12  %% Document revision
   14.13 -\newcommand{\revstring}{API Revision 1.0.8}
   14.14 +\newcommand{\revstring}{API Revision 1.0.9}
   14.15  
   14.16  %% Document authors
   14.17  \newcommand{\docauthors}{
    15.1 --- a/docs/xen-api/xenapi-datamodel-graph.dot	Mon Nov 02 19:35:54 2009 -0800
    15.2 +++ b/docs/xen-api/xenapi-datamodel-graph.dot	Fri Mar 19 18:36:57 2010 -0700
    15.3 @@ -14,7 +14,7 @@ fontname="Verdana";
    15.4  
    15.5  node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user;
    15.6  node [ shape=box ]; XSPolicy ACMPolicy DPCI PPCI host_cpu console VTPM;
    15.7 -node [ shape=box ]; DSCSI PSCSI;
    15.8 +node [ shape=box ]; DSCSI PSCSI DSCSI_HBA PSCSI_HBA;
    15.9  node [ shape=ellipse ]; VM_metrics VM_guest_metrics host_metrics;
   15.10  node [ shape=ellipse ]; PIF_metrics VIF_metrics VBD_metrics PBD_metrics;
   15.11  session -> host [ arrowhead="none" ]
   15.12 @@ -44,6 +44,11 @@ DPCI -> VM [ arrowhead="none", arrowtail
   15.13  DPCI -> PPCI [ arrowhead="none" ]
   15.14  PPCI -> host [ arrowhead="none", arrowtail="crow" ]
   15.15  DSCSI -> VM [ arrowhead="none", arrowtail="crow" ]
   15.16 +DSCSI_HBA -> VM [ arrowhead="none", arrowtail="crow" ]
   15.17 +DSCSI -> DSCSI_HBA [ arrowhead="none", arrowtail="crow" ]
   15.18  DSCSI -> PSCSI [ arrowhead="none" ]
   15.19 +DSCSI_HBA -> PSCSI_HBA [ arrowhead="crow", arrowtail="none" ]
   15.20  PSCSI -> host [ arrowhead="none", arrowtail="crow" ]
   15.21 +PSCSI_HBA -> host [ arrowhead="none", arrowtail="crow" ]
   15.22 +PSCSI -> PSCSI_HBA [ arrowhead="none", arrowtail="crow" ]
   15.23  }
    16.1 --- a/docs/xen-api/xenapi-datamodel.tex	Mon Nov 02 19:35:54 2009 -0800
    16.2 +++ b/docs/xen-api/xenapi-datamodel.tex	Fri Mar 19 18:36:57 2010 -0700
    16.3 @@ -49,7 +49,9 @@ Name & Description \\
    16.4  {\tt DPCI} & A pass-through PCI device \\
    16.5  {\tt PPCI} & A physical PCI device \\
    16.6  {\tt DSCSI} & A half-virtualized SCSI device \\
    16.7 +{\tt DSCSI\_HBA} & A half-virtualized SCSI host bus adapter \\
    16.8  {\tt PSCSI} & A physical SCSI device \\
    16.9 +{\tt PSCSI\_HBA} & A physical SCSI host bus adapter \\
   16.10  {\tt user} & A user of the system \\
   16.11  {\tt debug} & A basic class for testing \\
   16.12  {\tt XSPolicy} & A class for handling Xen Security Policies \\
   16.13 @@ -79,7 +81,11 @@ console.VM & VM.consoles & one-to-many\\
   16.14  DPCI.VM & VM.DPCIs & one-to-many\\
   16.15  PPCI.host & host.PPCIs & one-to-many\\
   16.16  DSCSI.VM & VM.DSCSIs & one-to-many\\
   16.17 +DSCSI.HBA & DSCSI\_HBA.DSCSIs & one-to-many\\
   16.18 +DSCSI\_HBA.VM & VM.DSCSI\_HBAs & one-to-many\\
   16.19  PSCSI.host & host.PSCSIs & one-to-many\\
   16.20 +PSCSI.HBA & PSCSI\_HBA.PSCSIs & one-to-many\\
   16.21 +PSCSI\_HBA.host & host.PSCSI\_HBAs & one-to-many\\
   16.22  host.resident\_VMs & VM.resident\_on & many-to-one\\
   16.23  host.host\_CPUs & host\_cpu.host & many-to-one\\
   16.24  \hline
   16.25 @@ -1414,6 +1420,7 @@ Quals & Field & Type & Description \\
   16.26  $\mathit{RO}_\mathit{run}$ &  {\tt VTPMs} & (VTPM ref) Set & virtual TPMs \\
   16.27  $\mathit{RO}_\mathit{run}$ &  {\tt DPCIs} & (DPCI ref) Set & pass-through PCI devices \\
   16.28  $\mathit{RO}_\mathit{run}$ &  {\tt DSCSIs} & (DSCSI ref) Set & half-virtualized SCSI devices \\
   16.29 +$\mathit{RO}_\mathit{run}$ &  {\tt DSCSI\_HBAs} & (DSCSI\_HBA ref) Set & half-virtualized SCSI host bus adapters \\
   16.30  $\mathit{RW}$ &  {\tt PV/bootloader} & string & name of or path to bootloader \\
   16.31  $\mathit{RW}$ &  {\tt PV/kernel} & string & URI of kernel \\
   16.32  $\mathit{RW}$ &  {\tt PV/ramdisk} & string & URI of initrd \\
   16.33 @@ -3570,6 +3577,38 @@ value of the field
   16.34  \vspace{0.3cm}
   16.35  \vspace{0.3cm}
   16.36  \vspace{0.3cm}
   16.37 +\subsubsection{RPC name:~get\_DSCSI\_HBAs}
   16.38 +
   16.39 +{\bf Overview:} 
   16.40 +Get the DSCSI\_HBAs field of the given VM.
   16.41 +
   16.42 + \noindent {\bf Signature:} 
   16.43 +\begin{verbatim} ((DSCSI_HBA ref) Set) get_DSCSI_HBAs (session_id s, VM ref self)\end{verbatim}
   16.44 +
   16.45 +
   16.46 +\noindent{\bf Arguments:}
   16.47 +
   16.48 +
   16.49 +\vspace{0.3cm}
   16.50 +\begin{tabular}{|c|c|p{7cm}|}
   16.51 + \hline
   16.52 +{\bf type} & {\bf name} & {\bf description} \\ \hline
   16.53 +{\tt VM ref } & self & reference to the object \\ \hline 
   16.54 +
   16.55 +\end{tabular}
   16.56 +
   16.57 +\vspace{0.3cm}
   16.58 +
   16.59 + \noindent {\bf Return Type:} 
   16.60 +{\tt 
   16.61 +(DSCSI\_HBA ref) Set
   16.62 +}
   16.63 +
   16.64 +
   16.65 +value of the field
   16.66 +\vspace{0.3cm}
   16.67 +\vspace{0.3cm}
   16.68 +\vspace{0.3cm}
   16.69  \subsubsection{RPC name:~get\_PV\_bootloader}
   16.70  
   16.71  {\bf Overview:} 
   16.72 @@ -5639,6 +5678,7 @@ Quals & Field & Type & Description \\
   16.73  $\mathit{RO}_\mathit{run}$ &  {\tt PBDs} & (PBD ref) Set & physical blockdevices \\
   16.74  $\mathit{RO}_\mathit{run}$ &  {\tt PPCIs} & (PPCI ref) Set & physical PCI devices \\
   16.75  $\mathit{RO}_\mathit{run}$ &  {\tt PSCSIs} & (PSCSI ref) Set & physical SCSI devices \\
   16.76 +$\mathit{RO}_\mathit{run}$ &  {\tt PSCSI\_HBAs} & (PSCSI\_HBA ref) Set & physical SCSI host bus adapters \\
   16.77  $\mathit{RO}_\mathit{run}$ &  {\tt host\_CPUs} & (host\_cpu ref) Set & The physical CPUs on this host \\
   16.78  $\mathit{RO}_\mathit{run}$ &  {\tt metrics} & host\_metrics ref & metrics associated with this host \\
   16.79  \hline
   16.80 @@ -6997,6 +7037,38 @@ value of the field
   16.81  \vspace{0.3cm}
   16.82  \vspace{0.3cm}
   16.83  \vspace{0.3cm}
   16.84 +\subsubsection{RPC name:~get\_PSCSI\_HBAs}
   16.85 +
   16.86 +{\bf Overview:} 
   16.87 +Get the PSCSI\_HBAs field of the given host.
   16.88 +
   16.89 + \noindent {\bf Signature:} 
   16.90 +\begin{verbatim} ((PSCSI_HBA ref) Set) get_PSCSI_HBAs (session_id s, host ref self)\end{verbatim}
   16.91 +
   16.92 +
   16.93 +\noindent{\bf Arguments:}
   16.94 +
   16.95 +
   16.96 +\vspace{0.3cm}
   16.97 +\begin{tabular}{|c|c|p{7cm}|}
   16.98 + \hline
   16.99 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.100 +{\tt host ref } & self & reference to the object \\ \hline 
  16.101 +
  16.102 +\end{tabular}
  16.103 +
  16.104 +\vspace{0.3cm}
  16.105 +
  16.106 + \noindent {\bf Return Type:} 
  16.107 +{\tt 
  16.108 +(PSCSI\_HBA ref) Set
  16.109 +}
  16.110 +
  16.111 +
  16.112 +value of the field
  16.113 +\vspace{0.3cm}
  16.114 +\vspace{0.3cm}
  16.115 +\vspace{0.3cm}
  16.116  \subsubsection{RPC name:~get\_host\_CPUs}
  16.117  
  16.118  {\bf Overview:} 
  16.119 @@ -15889,6 +15961,7 @@ Quals & Field & Type & Description \\
  16.120  $\mathit{RO}_\mathit{run}$ &  {\tt uuid} & string & unique identifier/object reference \\
  16.121  $\mathit{RO}_\mathit{inst}$ &  {\tt VM} & VM ref & the virtual machine \\
  16.122  $\mathit{RO}_\mathit{inst}$ &  {\tt PSCSI} & PSCSI ref & the physical SCSI device \\
  16.123 +$\mathit{RO}_\mathit{run}$ &  {\tt HBA} & DSCSI\_HBA ref & the half-virtualized SCSI host bus adapter \\
  16.124  $\mathit{RO}_\mathit{run}$ &  {\tt virtual\_host} & int & the virtual host number \\
  16.125  $\mathit{RO}_\mathit{run}$ &  {\tt virtual\_channel} & int & the virtual channel number \\
  16.126  $\mathit{RO}_\mathit{run}$ &  {\tt virtual\_target} & int & the virtual target number \\
  16.127 @@ -16015,6 +16088,38 @@ value of the field
  16.128  \vspace{0.3cm}
  16.129  \vspace{0.3cm}
  16.130  \vspace{0.3cm}
  16.131 +\subsubsection{RPC name:~get\_HBA}
  16.132 +
  16.133 +{\bf Overview:} 
  16.134 +Get the HBA field of the given DSCSI.
  16.135 +
  16.136 + \noindent {\bf Signature:} 
  16.137 +\begin{verbatim} (DSCSI_HBA ref) get_HBA (session_id s, DSCSI ref self)\end{verbatim}
  16.138 +
  16.139 +
  16.140 +\noindent{\bf Arguments:}
  16.141 +
  16.142 +
  16.143 +\vspace{0.3cm}
  16.144 +\begin{tabular}{|c|c|p{7cm}|}
  16.145 + \hline
  16.146 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.147 +{\tt DSCSI ref } & self & reference to the object \\ \hline 
  16.148 +
  16.149 +\end{tabular}
  16.150 +
  16.151 +\vspace{0.3cm}
  16.152 +
  16.153 + \noindent {\bf Return Type:} 
  16.154 +{\tt 
  16.155 +DSCSI\_HBA ref
  16.156 +}
  16.157 +
  16.158 +
  16.159 +value of the field
  16.160 +\vspace{0.3cm}
  16.161 +\vspace{0.3cm}
  16.162 +\vspace{0.3cm}
  16.163  \subsubsection{RPC name:~get\_virtual\_host}
  16.164  
  16.165  {\bf Overview:} 
  16.166 @@ -16210,7 +16315,9 @@ value of the field
  16.167  \subsubsection{RPC name:~create}
  16.168  
  16.169  {\bf Overview:} 
  16.170 -Create a new DSCSI instance, and return its handle.
  16.171 +Create a new DSCSI instance, and create a new DSCSI\_HBA instance as needed
  16.172 +that the new DSCSI instance connects to, and return the handle of the new
  16.173 +DSCSI instance.
  16.174  
  16.175   \noindent {\bf Signature:} 
  16.176  \begin{verbatim} (DSCSI ref) create (session_id s, DSCSI record args)\end{verbatim}
  16.177 @@ -16242,7 +16349,8 @@ reference to the newly created object
  16.178  \subsubsection{RPC name:~destroy}
  16.179  
  16.180  {\bf Overview:} 
  16.181 -Destroy the specified DSCSI instance.
  16.182 +Destroy the specified DSCSI instance, and destroy a DSCSI\_HBA instance as
  16.183 +needed that the specified DSCSI instance connects to.
  16.184  
  16.185   \noindent {\bf Signature:} 
  16.186  \begin{verbatim} void destroy (session_id s, DSCSI ref self)\end{verbatim}
  16.187 @@ -16337,6 +16445,372 @@ all fields from the object
  16.188  
  16.189  \vspace{1cm}
  16.190  \newpage
  16.191 +\section{Class: DSCSI\_HBA}
  16.192 +\subsection{Fields for class: DSCSI\_HBA}
  16.193 +\begin{longtable}{|lllp{0.38\textwidth}|}
  16.194 +\hline
  16.195 +\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf DSCSI\_HBA} \\
  16.196 +\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A
  16.197 +half-virtualized SCSI host bus adapter.}} \\
  16.198 +\hline
  16.199 +Quals & Field & Type & Description \\
  16.200 +\hline
  16.201 +$\mathit{RO}_\mathit{run}$ &  {\tt uuid} & string & unique identifier/object reference \\
  16.202 +$\mathit{RO}_\mathit{inst}$ &  {\tt VM} & VM ref & the virtual machine \\
  16.203 +$\mathit{RO}_\mathit{run}$ &  {\tt PSCSI\_HBAs} & (PSCSI\_HBA ref) Set & the physical SCSI HBAs \\
  16.204 +$\mathit{RO}_\mathit{run}$ &  {\tt DSCSIs} & (DSCSI ref) Set & the half-virtualized SCSI devices which are connected to this DSCSI HBA \\
  16.205 +$\mathit{RO}_\mathit{inst}$ &  {\tt virtual\_host} & int & the virtual host number \\
  16.206 +$\mathit{RO}_\mathit{inst}$ &  {\tt assignment\_mode} & string & the assignment mode of the half-virtualized SCSI devices which are connected to this DSCSI HBA \\
  16.207 +\hline
  16.208 +\end{longtable}
  16.209 +\subsection{RPCs associated with class: DSCSI\_HBA}
  16.210 +\subsubsection{RPC name:~get\_all}
  16.211 +
  16.212 +{\bf Overview:} 
  16.213 +Return a list of all the DSCSI HBAs known to the system.
  16.214 +
  16.215 + \noindent {\bf Signature:} 
  16.216 +\begin{verbatim} ((DSCSI_HBA ref) Set) get_all (session_id s)\end{verbatim}
  16.217 +
  16.218 +
  16.219 +\vspace{0.3cm}
  16.220 +
  16.221 + \noindent {\bf Return Type:} 
  16.222 +{\tt 
  16.223 +(DSCSI\_HBA ref) Set
  16.224 +}
  16.225 +
  16.226 +
  16.227 +references to all objects
  16.228 +\vspace{0.3cm}
  16.229 +\vspace{0.3cm}
  16.230 +\vspace{0.3cm}
  16.231 +\subsubsection{RPC name:~get\_uuid}
  16.232 +
  16.233 +{\bf Overview:} 
  16.234 +Get the uuid field of the given DSCSI HBA.
  16.235 +
  16.236 + \noindent {\bf Signature:} 
  16.237 +\begin{verbatim} string get_uuid (session_id s, DSCSI_HBA ref self)\end{verbatim}
  16.238 +
  16.239 +
  16.240 +\noindent{\bf Arguments:}
  16.241 +
  16.242 +
  16.243 +\vspace{0.3cm}
  16.244 +\begin{tabular}{|c|c|p{7cm}|}
  16.245 + \hline
  16.246 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.247 +{\tt DSCSI ref } & self & reference to the object \\ \hline 
  16.248 +
  16.249 +\end{tabular}
  16.250 +
  16.251 +\vspace{0.3cm}
  16.252 +
  16.253 + \noindent {\bf Return Type:} 
  16.254 +{\tt
  16.255 +string
  16.256 +}
  16.257 +
  16.258 +
  16.259 +value of the field
  16.260 +\vspace{0.3cm}
  16.261 +\vspace{0.3cm}
  16.262 +\vspace{0.3cm}
  16.263 +\subsubsection{RPC name:~get\_VM}
  16.264 +
  16.265 +{\bf Overview:} 
  16.266 +Get the VM field of the given DSCSI HBA.
  16.267 +
  16.268 + \noindent {\bf Signature:} 
  16.269 +\begin{verbatim} (VM ref) get_VM (session_id s, DSCSI_HBA ref self)\end{verbatim}
  16.270 +
  16.271 +
  16.272 +\noindent{\bf Arguments:}
  16.273 +
  16.274 +
  16.275 +\vspace{0.3cm}
  16.276 +\begin{tabular}{|c|c|p{7cm}|}
  16.277 + \hline
  16.278 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.279 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.280 +
  16.281 +\end{tabular}
  16.282 +
  16.283 +\vspace{0.3cm}
  16.284 +
  16.285 + \noindent {\bf Return Type:} 
  16.286 +{\tt 
  16.287 +VM ref
  16.288 +}
  16.289 +
  16.290 +
  16.291 +value of the field
  16.292 +\vspace{0.3cm}
  16.293 +\vspace{0.3cm}
  16.294 +\vspace{0.3cm}
  16.295 +\subsubsection{RPC name:~get\_PSCSI\_HBAs}
  16.296 +
  16.297 +{\bf Overview:} 
  16.298 +Get the PSCSI\_HBAs field of the given DSCSI HBA.
  16.299 +
  16.300 + \noindent {\bf Signature:} 
  16.301 +\begin{verbatim} ((PSCSI_HBA ref) Set) get_PSCSI_HBAs (session_id s, DSCSI_HBA ref self)\end{verbatim}
  16.302 +
  16.303 +
  16.304 +\noindent{\bf Arguments:}
  16.305 +
  16.306 +
  16.307 +\vspace{0.3cm}
  16.308 +\begin{tabular}{|c|c|p{7cm}|}
  16.309 + \hline
  16.310 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.311 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.312 +
  16.313 +\end{tabular}
  16.314 +
  16.315 +\vspace{0.3cm}
  16.316 +
  16.317 + \noindent {\bf Return Type:} 
  16.318 +{\tt 
  16.319 +(PSCSI\_HBA ref) Set
  16.320 +}
  16.321 +
  16.322 +
  16.323 +value of the field
  16.324 +\vspace{0.3cm}
  16.325 +\vspace{0.3cm}
  16.326 +\vspace{0.3cm}
  16.327 +\subsubsection{RPC name:~get\_DSCSIs}
  16.328 +
  16.329 +{\bf Overview:} 
  16.330 +Get the DSCSIs field of the given DSCSI HBA.
  16.331 +
  16.332 + \noindent {\bf Signature:} 
  16.333 +\begin{verbatim} ((DSCSI ref) Set) get_DSCSIs (session_id s, DSCSI_HBA ref self)\end{verbatim}
  16.334 +
  16.335 +
  16.336 +\noindent{\bf Arguments:}
  16.337 +
  16.338 +
  16.339 +\vspace{0.3cm}
  16.340 +\begin{tabular}{|c|c|p{7cm}|}
  16.341 + \hline
  16.342 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.343 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.344 +
  16.345 +\end{tabular}
  16.346 +
  16.347 +\vspace{0.3cm}
  16.348 +
  16.349 + \noindent {\bf Return Type:} 
  16.350 +{\tt 
  16.351 +(DSCSI ref) Set
  16.352 +}
  16.353 +
  16.354 +
  16.355 +value of the field
  16.356 +\vspace{0.3cm}
  16.357 +\vspace{0.3cm}
  16.358 +\vspace{0.3cm}
  16.359 +\subsubsection{RPC name:~get\_virtual\_host}
  16.360 +
  16.361 +{\bf Overview:} 
  16.362 +Get the virtual\_host field of the given DSCSI HBA.
  16.363 +
  16.364 + \noindent {\bf Signature:} 
  16.365 +\begin{verbatim} int get_virtual_host (session_id s, DSCSI_HBA ref self)\end{verbatim}
  16.366 +
  16.367 +
  16.368 +\noindent{\bf Arguments:}
  16.369 +
  16.370 +
  16.371 +\vspace{0.3cm}
  16.372 +\begin{tabular}{|c|c|p{7cm}|}
  16.373 + \hline
  16.374 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.375 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.376 +
  16.377 +\end{tabular}
  16.378 +
  16.379 +\vspace{0.3cm}
  16.380 +
  16.381 + \noindent {\bf Return Type:} 
  16.382 +{\tt 
  16.383 +int
  16.384 +}
  16.385 +
  16.386 +
  16.387 +value of the field
  16.388 +\vspace{0.3cm}
  16.389 +\vspace{0.3cm}
  16.390 +\vspace{0.3cm}
  16.391 +\subsubsection{RPC name:~get\_assignment\_mode}
  16.392 +
  16.393 +{\bf Overview:} 
  16.394 +Get the assignment\_mode field of the given DSCSI HBA.
  16.395 +
  16.396 + \noindent {\bf Signature:} 
  16.397 +\begin{verbatim} string get_assignment_mode (session_id s, DSCSI_HBA ref self)\end{verbatim}
  16.398 +
  16.399 +
  16.400 +\noindent{\bf Arguments:}
  16.401 +
  16.402 +
  16.403 +\vspace{0.3cm}
  16.404 +\begin{tabular}{|c|c|p{7cm}|}
  16.405 + \hline
  16.406 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.407 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.408 +
  16.409 +\end{tabular}
  16.410 +
  16.411 +\vspace{0.3cm}
  16.412 +
  16.413 + \noindent {\bf Return Type:} 
  16.414 +{\tt 
  16.415 +string
  16.416 +}
  16.417 +
  16.418 +
  16.419 +value of the field
  16.420 +\vspace{0.3cm}
  16.421 +\vspace{0.3cm}
  16.422 +\vspace{0.3cm}
  16.423 +\subsubsection{RPC name:~create}
  16.424 +
  16.425 +{\bf Overview:} 
  16.426 +Create a new DSCSI\_HBA instance, and create new DSCSI instances of
  16.427 +half-virtualized SCSI devices which are connected to the half-virtualized
  16.428 +SCSI host bus adapter, and return the handle of the new DSCSI\_HBA instance.
  16.429 +
  16.430 + \noindent {\bf Signature:} 
  16.431 +\begin{verbatim} (DSCSI_HBA ref) create (session_id s, DSCSI_HBA record args)\end{verbatim}
  16.432 +
  16.433 +
  16.434 +\noindent{\bf Arguments:}
  16.435 +
  16.436 +
  16.437 +\vspace{0.3cm}
  16.438 +\begin{tabular}{|c|c|p{7cm}|}
  16.439 + \hline
  16.440 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.441 +{\tt DSCSI\_HBA record } & args & All constructor arguments \\ \hline 
  16.442 +
  16.443 +\end{tabular}
  16.444 +
  16.445 +\vspace{0.3cm}
  16.446 +
  16.447 + \noindent {\bf Return Type:} 
  16.448 +{\tt 
  16.449 +DSCSI\_HBA ref
  16.450 +}
  16.451 +
  16.452 +
  16.453 +reference to the newly created object
  16.454 +\vspace{0.3cm}
  16.455 +\vspace{0.3cm}
  16.456 +\vspace{0.3cm}
  16.457 +\subsubsection{RPC name:~destroy}
  16.458 +
  16.459 +{\bf Overview:} 
  16.460 +Destroy the specified DSCSI\_HBA instance, and destroy DSCSI instances of
  16.461 +half-virtualized SCSI devices which are connected to the half-virtualized SCSI
  16.462 +host bus adapter.
  16.463 +
  16.464 + \noindent {\bf Signature:} 
  16.465 +\begin{verbatim} void destroy (session_id s, DSCSI_HBA ref self)\end{verbatim}
  16.466 +
  16.467 +
  16.468 +\noindent{\bf Arguments:}
  16.469 +
  16.470 +
  16.471 +\vspace{0.3cm}
  16.472 +\begin{tabular}{|c|c|p{7cm}|}
  16.473 + \hline
  16.474 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.475 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.476 +
  16.477 +\end{tabular}
  16.478 +
  16.479 +\vspace{0.3cm}
  16.480 +
  16.481 + \noindent {\bf Return Type:} 
  16.482 +{\tt 
  16.483 +void
  16.484 +}
  16.485 +
  16.486 +
  16.487 +\vspace{0.3cm}
  16.488 +\vspace{0.3cm}
  16.489 +\vspace{0.3cm}
  16.490 +\subsubsection{RPC name:~get\_by\_uuid}
  16.491 +
  16.492 +{\bf Overview:} 
  16.493 +Get a reference to the DSCSI\_HBA instance with the specified UUID.
  16.494 +
  16.495 + \noindent {\bf Signature:} 
  16.496 +\begin{verbatim} (DSCSI_HBA ref) get_by_uuid (session_id s, string uuid)\end{verbatim}
  16.497 +
  16.498 +
  16.499 +\noindent{\bf Arguments:}
  16.500 +
  16.501 +
  16.502 +\vspace{0.3cm}
  16.503 +\begin{tabular}{|c|c|p{7cm}|}
  16.504 + \hline
  16.505 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.506 +{\tt string } & uuid & UUID of object to return \\ \hline 
  16.507 +
  16.508 +\end{tabular}
  16.509 +
  16.510 +\vspace{0.3cm}
  16.511 +
  16.512 + \noindent {\bf Return Type:} 
  16.513 +{\tt 
  16.514 +DSCSI\_HBA ref
  16.515 +}
  16.516 +
  16.517 +
  16.518 +reference to the object
  16.519 +\vspace{0.3cm}
  16.520 +\vspace{0.3cm}
  16.521 +\vspace{0.3cm}
  16.522 +\subsubsection{RPC name:~get\_record}
  16.523 +
  16.524 +{\bf Overview:} 
  16.525 +Get a record containing the current state of the given DSCSI HBA.
  16.526 +
  16.527 + \noindent {\bf Signature:} 
  16.528 +\begin{verbatim} (DSCSI_HBA record) get_record (session_id s, DSCSI_HBA ref self)\end{verbatim}
  16.529 +
  16.530 +
  16.531 +\noindent{\bf Arguments:}
  16.532 +
  16.533 +
  16.534 +\vspace{0.3cm}
  16.535 +\begin{tabular}{|c|c|p{7cm}|}
  16.536 + \hline
  16.537 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.538 +{\tt DSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.539 +
  16.540 +\end{tabular}
  16.541 +
  16.542 +\vspace{0.3cm}
  16.543 +
  16.544 + \noindent {\bf Return Type:} 
  16.545 +{\tt 
  16.546 +DSCSI\_HBA record
  16.547 +}
  16.548 +
  16.549 +
  16.550 +all fields from the object
  16.551 +\vspace{0.3cm}
  16.552 +\vspace{0.3cm}
  16.553 +\vspace{0.3cm}
  16.554 +
  16.555 +\vspace{1cm}
  16.556 +\newpage
  16.557  \section{Class: PSCSI}
  16.558  \subsection{Fields for class: PSCSI}
  16.559  \begin{longtable}{|lllp{0.38\textwidth}|}
  16.560 @@ -16349,6 +16823,7 @@ Quals & Field & Type & Description \\
  16.561  \hline
  16.562  $\mathit{RO}_\mathit{run}$ &  {\tt uuid} & string & unique identifier/object reference \\
  16.563  $\mathit{RO}_\mathit{run}$ &  {\tt host} & host ref &  the physical machine to which this PSCSI is connected \\
  16.564 +$\mathit{RO}_\mathit{run}$ &  {\tt HBA} & PSCSI\_HBA ref &  the physical SCSI host bus adapter \\
  16.565  $\mathit{RO}_\mathit{run}$ &  {\tt physical\_host} & int & the physical host number \\
  16.566  $\mathit{RO}_\mathit{run}$ &  {\tt physical\_channel} & int & the physical channel number \\
  16.567  $\mathit{RO}_\mathit{run}$ &  {\tt physical\_target} & int & the physical target number \\
  16.568 @@ -16451,6 +16926,38 @@ value of the field
  16.569  \vspace{0.3cm}
  16.570  \vspace{0.3cm}
  16.571  \vspace{0.3cm}
  16.572 +\subsubsection{RPC name:~get\_HBA}
  16.573 +
  16.574 +{\bf Overview:} 
  16.575 +Get the HBA field of the given PSCSI.
  16.576 +
  16.577 + \noindent {\bf Signature:} 
  16.578 +\begin{verbatim} (PSCSI_HBA ref) get_HBA (session_id s, PSCSI ref self)\end{verbatim}
  16.579 +
  16.580 +
  16.581 +\noindent{\bf Arguments:}
  16.582 +
  16.583 +
  16.584 +\vspace{0.3cm}
  16.585 +\begin{tabular}{|c|c|p{7cm}|}
  16.586 + \hline
  16.587 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.588 +{\tt PSCSI ref } & self & reference to the object \\ \hline 
  16.589 +
  16.590 +\end{tabular}
  16.591 +
  16.592 +\vspace{0.3cm}
  16.593 +
  16.594 + \noindent {\bf Return Type:} 
  16.595 +{\tt 
  16.596 +PSCSI\_HBA ref
  16.597 +}
  16.598 +
  16.599 +
  16.600 +value of the field
  16.601 +\vspace{0.3cm}
  16.602 +\vspace{0.3cm}
  16.603 +\vspace{0.3cm}
  16.604  \subsubsection{RPC name:~get\_physical\_host}
  16.605  
  16.606  {\bf Overview:} 
  16.607 @@ -16966,6 +17473,239 @@ all fields from the object
  16.608  
  16.609  \vspace{1cm}
  16.610  \newpage
  16.611 +\section{Class: PSCSI\_HBA}
  16.612 +\subsection{Fields for class: PSCSI\_HBA}
  16.613 +\begin{longtable}{|lllp{0.38\textwidth}|}
  16.614 +\hline
  16.615 +\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf PSCSI\_HBA} \\
  16.616 +\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A
  16.617 +physical SCSI host bus adapter.}} \\
  16.618 +\hline
  16.619 +Quals & Field & Type & Description \\
  16.620 +\hline
  16.621 +$\mathit{RO}_\mathit{run}$ &  {\tt uuid} & string & unique identifier/object reference \\
  16.622 +$\mathit{RO}_\mathit{run}$ &  {\tt host} & host ref &  the physical machine to which this PSCSI HBA is connected \\
  16.623 +$\mathit{RO}_\mathit{run}$ &  {\tt physical\_host} & int & the physical host number \\
  16.624 +$\mathit{RO}_\mathit{run}$ &  {\tt PSCSIs} & (PSCSI ref) Set & the physical SCSI devices which are connected to this PSCSI HBA \\
  16.625 +\hline
  16.626 +\end{longtable}
  16.627 +\subsection{RPCs associated with class: PSCSI\_HBA}
  16.628 +\subsubsection{RPC name:~get\_all}
  16.629 +
  16.630 +{\bf Overview:} 
  16.631 +Return a list of all the PSCSI HBAs known to the system.
  16.632 +
  16.633 + \noindent {\bf Signature:} 
  16.634 +\begin{verbatim} ((PSCSI_HBA ref) Set) get_all (session_id s)\end{verbatim}
  16.635 +
  16.636 +
  16.637 +\vspace{0.3cm}
  16.638 +
  16.639 + \noindent {\bf Return Type:} 
  16.640 +{\tt 
  16.641 +(PSCSI\_HBA ref) Set
  16.642 +}
  16.643 +
  16.644 +
  16.645 +references to all objects
  16.646 +\vspace{0.3cm}
  16.647 +\vspace{0.3cm}
  16.648 +\vspace{0.3cm}
  16.649 +\subsubsection{RPC name:~get\_uuid}
  16.650 +
  16.651 +{\bf Overview:} 
  16.652 +Get the uuid field of the given PSCSI HBA.
  16.653 +
  16.654 + \noindent {\bf Signature:} 
  16.655 +\begin{verbatim} string get_uuid (session_id s, PSCSI_HBA ref self)\end{verbatim}
  16.656 +
  16.657 +
  16.658 +\noindent{\bf Arguments:}
  16.659 +
  16.660 +
  16.661 +\vspace{0.3cm}
  16.662 +\begin{tabular}{|c|c|p{7cm}|}
  16.663 + \hline
  16.664 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.665 +{\tt PSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.666 +
  16.667 +\end{tabular}
  16.668 +
  16.669 +\vspace{0.3cm}
  16.670 +
  16.671 + \noindent {\bf Return Type:} 
  16.672 +{\tt 
  16.673 +string
  16.674 +}
  16.675 +
  16.676 +
  16.677 +value of the field
  16.678 +\vspace{0.3cm}
  16.679 +\vspace{0.3cm}
  16.680 +\vspace{0.3cm}
  16.681 +\subsubsection{RPC name:~get\_host}
  16.682 +
  16.683 +{\bf Overview:} 
  16.684 +Get the host field of the given PSCSI HBA.
  16.685 +
  16.686 + \noindent {\bf Signature:} 
  16.687 +\begin{verbatim} (host ref) get_host (session_id s, PSCSI_HBA ref self)\end{verbatim}
  16.688 +
  16.689 +
  16.690 +\noindent{\bf Arguments:}
  16.691 +
  16.692 +
  16.693 +\vspace{0.3cm}
  16.694 +\begin{tabular}{|c|c|p{7cm}|}
  16.695 + \hline
  16.696 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.697 +{\tt PSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.698 +
  16.699 +\end{tabular}
  16.700 +
  16.701 +\vspace{0.3cm}
  16.702 +
  16.703 + \noindent {\bf Return Type:} 
  16.704 +{\tt 
  16.705 +host ref
  16.706 +}
  16.707 +
  16.708 +
  16.709 +value of the field
  16.710 +\vspace{0.3cm}
  16.711 +\vspace{0.3cm}
  16.712 +\vspace{0.3cm}
  16.713 +\subsubsection{RPC name:~get\_physical\_host}
  16.714 +
  16.715 +{\bf Overview:} 
  16.716 +Get the physical\_host field of the given PSCSI HBA.
  16.717 +
  16.718 + \noindent {\bf Signature:} 
  16.719 +\begin{verbatim} int get_physical_host (session_id s, PSCSI_HBA ref self)\end{verbatim}
  16.720 +
  16.721 +
  16.722 +\noindent{\bf Arguments:}
  16.723 +
  16.724 +
  16.725 +\vspace{0.3cm}
  16.726 +\begin{tabular}{|c|c|p{7cm}|}
  16.727 + \hline
  16.728 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.729 +{\tt PSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.730 +
  16.731 +\end{tabular}
  16.732 +
  16.733 +\vspace{0.3cm}
  16.734 +
  16.735 + \noindent {\bf Return Type:} 
  16.736 +{\tt 
  16.737 +int
  16.738 +}
  16.739 +
  16.740 +
  16.741 +value of the field
  16.742 +\vspace{0.3cm}
  16.743 +\vspace{0.3cm}
  16.744 +\vspace{0.3cm}
  16.745 +\subsubsection{RPC name:~get\_PSCSIs}
  16.746 +
  16.747 +{\bf Overview:} 
  16.748 +Get the PSCSIs field of the given PSCSI HBA.
  16.749 +
  16.750 + \noindent {\bf Signature:} 
  16.751 +\begin{verbatim} ((PSCSI ref) Set) get_PSCSIs (session_id s, PSCSI_HBA ref self)\end{verbatim}
  16.752 +
  16.753 +
  16.754 +\noindent{\bf Arguments:}
  16.755 +
  16.756 +
  16.757 +\vspace{0.3cm}
  16.758 +\begin{tabular}{|c|c|p{7cm}|}
  16.759 + \hline
  16.760 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.761 +{\tt PSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.762 +
  16.763 +\end{tabular}
  16.764 +
  16.765 +\vspace{0.3cm}
  16.766 +
  16.767 + \noindent {\bf Return Type:} 
  16.768 +{\tt 
  16.769 +(PSCSI ref) Set
  16.770 +}
  16.771 +
  16.772 +
  16.773 +value of the field
  16.774 +\vspace{0.3cm}
  16.775 +\vspace{0.3cm}
  16.776 +\vspace{0.3cm}
  16.777 +\subsubsection{RPC name:~get\_by\_uuid}
  16.778 +
  16.779 +{\bf Overview:} 
  16.780 +Get a reference to the PSCSI HBA instance with the specified UUID.
  16.781 +
  16.782 + \noindent {\bf Signature:} 
  16.783 +\begin{verbatim} (PSCSI_HBA ref) get_by_uuid (session_id s, string uuid)\end{verbatim}
  16.784 +
  16.785 +
  16.786 +\noindent{\bf Arguments:}
  16.787 +
  16.788 +
  16.789 +\vspace{0.3cm}
  16.790 +\begin{tabular}{|c|c|p{7cm}|}
  16.791 + \hline
  16.792 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.793 +{\tt string } & uuid & UUID of object to return \\ \hline 
  16.794 +
  16.795 +\end{tabular}
  16.796 +
  16.797 +\vspace{0.3cm}
  16.798 +
  16.799 + \noindent {\bf Return Type:} 
  16.800 +{\tt 
  16.801 +PSCSI\_HBA ref
  16.802 +}
  16.803 +
  16.804 +
  16.805 +reference to the object
  16.806 +\vspace{0.3cm}
  16.807 +\vspace{0.3cm}
  16.808 +\vspace{0.3cm}
  16.809 +\subsubsection{RPC name:~get\_record}
  16.810 +
  16.811 +{\bf Overview:} 
  16.812 +Get a record containing the current state of the given PSCSI HBA.
  16.813 +
  16.814 + \noindent {\bf Signature:} 
  16.815 +\begin{verbatim} (PSCSI_HBA record) get_record (session_id s, PSCSI_HBA ref self)\end{verbatim}
  16.816 +
  16.817 +
  16.818 +\noindent{\bf Arguments:}
  16.819 +
  16.820 +
  16.821 +\vspace{0.3cm}
  16.822 +\begin{tabular}{|c|c|p{7cm}|}
  16.823 + \hline
  16.824 +{\bf type} & {\bf name} & {\bf description} \\ \hline
  16.825 +{\tt PSCSI\_HBA ref } & self & reference to the object \\ \hline 
  16.826 +
  16.827 +\end{tabular}
  16.828 +
  16.829 +\vspace{0.3cm}
  16.830 +
  16.831 + \noindent {\bf Return Type:} 
  16.832 +{\tt 
  16.833 +PSCSI\_HBA record
  16.834 +}
  16.835 +
  16.836 +
  16.837 +all fields from the object
  16.838 +\vspace{0.3cm}
  16.839 +\vspace{0.3cm}
  16.840 +\vspace{0.3cm}
  16.841 +
  16.842 +\vspace{1cm}
  16.843 +\newpage
  16.844  \section{Class: user}
  16.845  \subsection{Fields for class: user}
  16.846  \begin{longtable}{|lllp{0.38\textwidth}|}
    17.1 --- a/extras/mini-os/arch/ia64/mm.c	Mon Nov 02 19:35:54 2009 -0800
    17.2 +++ b/extras/mini-os/arch/ia64/mm.c	Fri Mar 19 18:36:57 2010 -0700
    17.3 @@ -137,17 +137,17 @@ unsigned long allocate_ondemand(unsigned
    17.4  
    17.5  /* Helper function used in gnttab.c. */
    17.6  void do_map_frames(unsigned long addr,
    17.7 -        unsigned long *f, unsigned long n, unsigned long stride,
    17.8 -	unsigned long increment, domid_t id, int may_fail, unsigned long prot)
    17.9 +    const unsigned long *f, unsigned long n, unsigned long stride,
   17.10 +	unsigned long increment, domid_t id, int *err, unsigned long prot)
   17.11  {
   17.12  	/* TODO */
   17.13  	ASSERT(0);
   17.14  }
   17.15  
   17.16  void*
   17.17 -map_frames_ex(unsigned long* frames, unsigned long n, unsigned long stride,
   17.18 +map_frames_ex(const unsigned long* frames, unsigned long n, unsigned long stride,
   17.19  	unsigned long increment, unsigned long alignment, domid_t id,
   17.20 -	int may_fail, unsigned long prot)
   17.21 +	int *err, unsigned long prot)
   17.22  {
   17.23          /* TODO: incomplete! */
   17.24          ASSERT(n == 1 || (stride == 0 && increment == 1));
    18.1 --- a/extras/mini-os/arch/x86/ioremap.c	Mon Nov 02 19:35:54 2009 -0800
    18.2 +++ b/extras/mini-os/arch/x86/ioremap.c	Fri Mar 19 18:36:57 2010 -0700
    18.3 @@ -53,7 +53,7 @@ static void *__do_ioremap(unsigned long 
    18.4          }
    18.5      }   
    18.6      va = (unsigned long)map_frames_ex(&mfns, num_pages, 0, 1, 1,
    18.7 -                                      DOMID_IO, 0, prot);
    18.8 +                                      DOMID_IO, NULL, prot);
    18.9      return (void *)(va + offset);
   18.10      
   18.11  mfn_invalid:
    19.1 --- a/extras/mini-os/arch/x86/mm.c	Mon Nov 02 19:35:54 2009 -0800
    19.2 +++ b/extras/mini-os/arch/x86/mm.c	Fri Mar 19 18:36:57 2010 -0700
    19.3 @@ -568,10 +568,9 @@ unsigned long allocate_ondemand(unsigned
    19.4   */
    19.5  #define MAP_BATCH ((STACK_SIZE / 2) / sizeof(mmu_update_t))
    19.6  void do_map_frames(unsigned long va,
    19.7 -                   unsigned long *mfns, unsigned long n, 
    19.8 +                   const unsigned long *mfns, unsigned long n, 
    19.9                     unsigned long stride, unsigned long incr, 
   19.10 -                   domid_t id, int may_fail,
   19.11 -                   unsigned long prot)
   19.12 +                   domid_t id, int *err, unsigned long prot)
   19.13  {
   19.14      pgentry_t *pgt = NULL;
   19.15      unsigned long done = 0;
   19.16 @@ -585,12 +584,14 @@ void do_map_frames(unsigned long va,
   19.17      }
   19.18      DEBUG("va=%p n=0x%lx, mfns[0]=0x%lx stride=0x%lx incr=0x%lx prot=0x%lx\n",
   19.19            va, n, mfns[0], stride, incr, prot);
   19.20 - 
   19.21 +
   19.22 +    if ( err )
   19.23 +        memset(err, 0x00, n * sizeof(int));
   19.24      while ( done < n )
   19.25      {
   19.26          unsigned long todo;
   19.27  
   19.28 -        if ( may_fail )
   19.29 +        if ( err )
   19.30              todo = 1;
   19.31          else
   19.32              todo = n - done;
   19.33 @@ -615,8 +616,8 @@ void do_map_frames(unsigned long va,
   19.34              rc = HYPERVISOR_mmu_update(mmu_updates, todo, NULL, id);
   19.35              if ( rc < 0 )
   19.36              {
   19.37 -                if (may_fail)
   19.38 -                    mfns[done * stride] |= 0xF0000000;
   19.39 +                if (err)
   19.40 +                    err[done * stride] = rc;
   19.41                  else {
   19.42                      printk("Map %ld (%lx, ...) at %p failed: %d.\n",
   19.43                             todo, mfns[done * stride] + done * incr, va, rc);
   19.44 @@ -632,17 +633,17 @@ void do_map_frames(unsigned long va,
   19.45   * Map an array of MFNs contiguous into virtual address space. Virtual
   19.46   * addresses are allocated from the on demand area.
   19.47   */
   19.48 -void *map_frames_ex(unsigned long *mfns, unsigned long n, 
   19.49 +void *map_frames_ex(const unsigned long *mfns, unsigned long n, 
   19.50                      unsigned long stride, unsigned long incr,
   19.51                      unsigned long alignment,
   19.52 -                    domid_t id, int may_fail, unsigned long prot)
   19.53 +                    domid_t id, int *err, unsigned long prot)
   19.54  {
   19.55      unsigned long va = allocate_ondemand(n, alignment);
   19.56  
   19.57      if ( !va )
   19.58          return NULL;
   19.59  
   19.60 -    do_map_frames(va, mfns, n, stride, incr, id, may_fail, prot);
   19.61 +    do_map_frames(va, mfns, n, stride, incr, id, err, prot);
   19.62  
   19.63      return (void *)va;
   19.64  }
    20.1 --- a/extras/mini-os/blkfront.c	Mon Nov 02 19:35:54 2009 -0800
    20.2 +++ b/extras/mini-os/blkfront.c	Fri Mar 19 18:36:57 2010 -0700
    20.3 @@ -93,7 +93,7 @@ struct blkfront_dev *init_blkfront(char 
    20.4      char* message=NULL;
    20.5      struct blkif_sring *s;
    20.6      int retry=0;
    20.7 -    char* msg;
    20.8 +    char* msg = NULL;
    20.9      char* c;
   20.10      char* nodename = _nodename ? _nodename : "device/vbd/768";
   20.11  
   20.12 @@ -129,6 +129,7 @@ again:
   20.13      err = xenbus_transaction_start(&xbt);
   20.14      if (err) {
   20.15          printk("starting transaction\n");
   20.16 +        free(err);
   20.17      }
   20.18  
   20.19      err = xenbus_printf(xbt, nodename, "ring-ref","%u",
   20.20 @@ -159,6 +160,7 @@ again:
   20.21  
   20.22  
   20.23      err = xenbus_transaction_end(xbt, 0, &retry);
   20.24 +    if (err) free(err);
   20.25      if (retry) {
   20.26              goto again;
   20.27          printk("completing transaction\n");
   20.28 @@ -167,7 +169,8 @@ again:
   20.29      goto done;
   20.30  
   20.31  abort_transaction:
   20.32 -    xenbus_transaction_end(xbt, 1, &retry);
   20.33 +    free(err);
   20.34 +    err = xenbus_transaction_end(xbt, 1, &retry);
   20.35      goto error;
   20.36  
   20.37  done:
   20.38 @@ -208,7 +211,7 @@ done:
   20.39              msg = xenbus_wait_for_state_change(path, &state, &dev->events);
   20.40          if (msg != NULL || state != XenbusStateConnected) {
   20.41              printk("backend not available, state=%d\n", state);
   20.42 -            xenbus_unwatch_path(XBT_NIL, path);
   20.43 +            xenbus_unwatch_path_token(XBT_NIL, path, path);
   20.44              goto error;
   20.45          }
   20.46  
   20.47 @@ -238,6 +241,8 @@ done:
   20.48      return dev;
   20.49  
   20.50  error:
   20.51 +    free(msg);
   20.52 +    free(err);
   20.53      free_blkfront(dev);
   20.54      return NULL;
   20.55  }
   20.56 @@ -265,6 +270,7 @@ void shutdown_blkfront(struct blkfront_d
   20.57      state = xenbus_read_integer(path);
   20.58      while (err == NULL && state < XenbusStateClosing)
   20.59          err = xenbus_wait_for_state_change(path, &state, &dev->events);
   20.60 +    if (err) free(err);
   20.61  
   20.62      if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) {
   20.63          printk("shutdown_blkfront: error changing state to %d: %s\n",
   20.64 @@ -272,8 +278,10 @@ void shutdown_blkfront(struct blkfront_d
   20.65          goto close;
   20.66      }
   20.67      state = xenbus_read_integer(path);
   20.68 -    if (state < XenbusStateClosed)
   20.69 -        xenbus_wait_for_state_change(path, &state, &dev->events);
   20.70 +    if (state < XenbusStateClosed) {
   20.71 +        err = xenbus_wait_for_state_change(path, &state, &dev->events);
   20.72 +        if (err) free(err);
   20.73 +    }
   20.74  
   20.75      if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) {
   20.76          printk("shutdown_blkfront: error changing state to %d: %s\n",
   20.77 @@ -286,7 +294,8 @@ void shutdown_blkfront(struct blkfront_d
   20.78          err = xenbus_wait_for_state_change(path, &state, &dev->events);
   20.79  
   20.80  close:
   20.81 -    xenbus_unwatch_path(XBT_NIL, path);
   20.82 +    if (err) free(err);
   20.83 +    xenbus_unwatch_path_token(XBT_NIL, path, path);
   20.84  
   20.85      snprintf(path, sizeof(path), "%s/ring-ref", nodename);
   20.86      xenbus_rm(XBT_NIL, path);
    21.1 --- a/extras/mini-os/console/xencons_ring.c	Mon Nov 02 19:35:54 2009 -0800
    21.2 +++ b/extras/mini-os/console/xencons_ring.c	Fri Mar 19 18:36:57 2010 -0700
    21.3 @@ -67,8 +67,8 @@ int xencons_ring_send(struct consfront_d
    21.4  
    21.5  static void handle_input(evtchn_port_t port, struct pt_regs *regs, void *data)
    21.6  {
    21.7 +	struct consfront_dev *dev = (struct consfront_dev *) data;
    21.8  #ifdef HAVE_LIBC
    21.9 -	struct consfront_dev *dev = (struct consfront_dev *) data;
   21.10          int fd = dev ? dev->fd : -1;
   21.11  
   21.12          if (fd != -1)
   21.13 @@ -203,7 +203,7 @@ struct consfront_dev *init_consfront(cha
   21.14      char* err;
   21.15      char* message=NULL;
   21.16      int retry=0;
   21.17 -    char* msg;
   21.18 +    char* msg = NULL;
   21.19      char nodename[256];
   21.20      char path[256];
   21.21      static int consfrontends = 1;
   21.22 @@ -242,6 +242,7 @@ again:
   21.23      err = xenbus_transaction_start(&xbt);
   21.24      if (err) {
   21.25          printk("starting transaction\n");
   21.26 +        free(err);
   21.27      }
   21.28  
   21.29      err = xenbus_printf(xbt, nodename, "ring-ref","%u",
   21.30 @@ -278,6 +279,7 @@ again:
   21.31  
   21.32  
   21.33      err = xenbus_transaction_end(xbt, 0, &retry);
   21.34 +    if (err) free(err);
   21.35      if (retry) {
   21.36              goto again;
   21.37          printk("completing transaction\n");
   21.38 @@ -286,7 +288,8 @@ again:
   21.39      goto done;
   21.40  
   21.41  abort_transaction:
   21.42 -    xenbus_transaction_end(xbt, 1, &retry);
   21.43 +    free(err);
   21.44 +    err = xenbus_transaction_end(xbt, 1, &retry);
   21.45      goto error;
   21.46  
   21.47  done:
   21.48 @@ -312,7 +315,7 @@ done:
   21.49              msg = xenbus_wait_for_state_change(path, &state, &dev->events);
   21.50          if (msg != NULL || state != XenbusStateConnected) {
   21.51              printk("backend not available, state=%d\n", state);
   21.52 -            xenbus_unwatch_path(XBT_NIL, path);
   21.53 +            xenbus_unwatch_path_token(XBT_NIL, path, path);
   21.54              goto error;
   21.55          }
   21.56      }
   21.57 @@ -323,6 +326,8 @@ done:
   21.58      return dev;
   21.59  
   21.60  error:
   21.61 +    free(msg);
   21.62 +    free(err);
   21.63      free_consfront(dev);
   21.64      return NULL;
   21.65  }
    22.1 --- a/extras/mini-os/fbfront.c	Mon Nov 02 19:35:54 2009 -0800
    22.2 +++ b/extras/mini-os/fbfront.c	Fri Mar 19 18:36:57 2010 -0700
    22.3 @@ -71,7 +71,7 @@ struct kbdfront_dev *init_kbdfront(char 
    22.4      char* message=NULL;
    22.5      struct xenkbd_page *s;
    22.6      int retry=0;
    22.7 -    char* msg;
    22.8 +    char* msg = NULL;
    22.9      char* nodename = _nodename ? _nodename : "device/vkbd/0";
   22.10      struct kbdfront_dev *dev;
   22.11  
   22.12 @@ -80,6 +80,7 @@ struct kbdfront_dev *init_kbdfront(char 
   22.13      printk("******************* KBDFRONT for %s **********\n\n\n", nodename);
   22.14  
   22.15      dev = malloc(sizeof(*dev));
   22.16 +    memset(dev, 0, sizeof(*dev));
   22.17      dev->nodename = strdup(nodename);
   22.18  #ifdef HAVE_LIBC
   22.19      dev->fd = -1;
   22.20 @@ -101,6 +102,7 @@ again:
   22.21      err = xenbus_transaction_start(&xbt);
   22.22      if (err) {
   22.23          printk("starting transaction\n");
   22.24 +        free(err);
   22.25      }
   22.26  
   22.27      err = xenbus_printf(xbt, nodename, "page-ref","%u", virt_to_mfn(s));
   22.28 @@ -123,11 +125,13 @@ again:
   22.29  
   22.30      snprintf(path, sizeof(path), "%s/state", nodename);
   22.31      err = xenbus_switch_state(xbt, path, XenbusStateInitialised);
   22.32 -    if (err)
   22.33 +    if (err) {
   22.34          printk("error writing initialized: %s\n", err);
   22.35 -
   22.36 +        free(err);
   22.37 +    }
   22.38  
   22.39      err = xenbus_transaction_end(xbt, 0, &retry);
   22.40 +    if (err) free(err);
   22.41      if (retry) {
   22.42              goto again;
   22.43          printk("completing transaction\n");
   22.44 @@ -136,7 +140,8 @@ again:
   22.45      goto done;
   22.46  
   22.47  abort_transaction:
   22.48 -    xenbus_transaction_end(xbt, 1, &retry);
   22.49 +    free(err);
   22.50 +    err = xenbus_transaction_end(xbt, 1, &retry);
   22.51      goto error;
   22.52  
   22.53  done:
   22.54 @@ -165,7 +170,7 @@ done:
   22.55              err = xenbus_wait_for_state_change(path, &state, &dev->events);
   22.56          if (state != XenbusStateConnected) {
   22.57              printk("backend not available, state=%d\n", state);
   22.58 -            xenbus_unwatch_path(XBT_NIL, path);
   22.59 +            xenbus_unwatch_path_token(XBT_NIL, path, path);
   22.60              goto error;
   22.61          }
   22.62  
   22.63 @@ -175,7 +180,7 @@ done:
   22.64          if((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected))
   22.65              != NULL) {
   22.66              printk("error switching state: %s\n", err);
   22.67 -            xenbus_unwatch_path(XBT_NIL, path);
   22.68 +            xenbus_unwatch_path_token(XBT_NIL, path, path);
   22.69              goto error;
   22.70          }
   22.71      }
   22.72 @@ -185,6 +190,8 @@ done:
   22.73  
   22.74      return dev;
   22.75  error:
   22.76 +    free(msg);
   22.77 +    free(err);
   22.78      free_kbdfront(dev);
   22.79      return NULL;
   22.80  }
   22.81 @@ -246,6 +253,7 @@ void shutdown_kbdfront(struct kbdfront_d
   22.82      state = xenbus_read_integer(path);
   22.83      while (err == NULL && state < XenbusStateClosing)
   22.84          err = xenbus_wait_for_state_change(path, &state, &dev->events);
   22.85 +    if (err) free(err);
   22.86  
   22.87      if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) {
   22.88          printk("shutdown_kbdfront: error changing state to %d: %s\n",
   22.89 @@ -253,8 +261,10 @@ void shutdown_kbdfront(struct kbdfront_d
   22.90          goto close_kbdfront;
   22.91      }
   22.92      state = xenbus_read_integer(path);
   22.93 -    if (state < XenbusStateClosed)
   22.94 -        xenbus_wait_for_state_change(path, &state, &dev->events);
   22.95 +    if (state < XenbusStateClosed) {
   22.96 +        err = xenbus_wait_for_state_change(path, &state, &dev->events);
   22.97 +        if (err) free(err);
   22.98 +    }
   22.99  
  22.100      if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) {
  22.101          printk("shutdown_kbdfront: error changing state to %d: %s\n",
  22.102 @@ -265,7 +275,8 @@ void shutdown_kbdfront(struct kbdfront_d
  22.103      //xenbus_wait_for_value(path, "2", &dev->events);
  22.104  
  22.105  close_kbdfront:
  22.106 -    xenbus_unwatch_path(XBT_NIL, path);
  22.107 +    if (err) free(err);
  22.108 +    xenbus_unwatch_path_token(XBT_NIL, path, path);
  22.109  
  22.110      snprintf(path, sizeof(path), "%s/page-ref", nodename);
  22.111      xenbus_rm(XBT_NIL, path);
  22.112 @@ -403,6 +414,7 @@ struct fbfront_dev *init_fbfront(char *_
  22.113      printk("******************* FBFRONT for %s **********\n\n\n", nodename);
  22.114  
  22.115      dev = malloc(sizeof(*dev));
  22.116 +    memset(dev, 0, sizeof(*dev));
  22.117      dev->nodename = strdup(nodename);
  22.118  #ifdef HAVE_LIBC
  22.119      dev->fd = -1;
  22.120 @@ -444,6 +456,7 @@ again:
  22.121      err = xenbus_transaction_start(&xbt);
  22.122      if (err) {
  22.123          printk("starting transaction\n");
  22.124 +        free(err);
  22.125      }
  22.126  
  22.127      err = xenbus_printf(xbt, nodename, "page-ref","%u", virt_to_mfn(s));
  22.128 @@ -476,6 +489,7 @@ again:
  22.129      }
  22.130  
  22.131      err = xenbus_transaction_end(xbt, 0, &retry);
  22.132 +    if (err) free(err);
  22.133      if (retry) {
  22.134              goto again;
  22.135          printk("completing transaction\n");
  22.136 @@ -484,7 +498,8 @@ again:
  22.137      goto done;
  22.138  
  22.139  abort_transaction:
  22.140 -    xenbus_transaction_end(xbt, 1, &retry);
  22.141 +    free(err);
  22.142 +    err = xenbus_transaction_end(xbt, 1, &retry);
  22.143      goto error;
  22.144  
  22.145  done:
  22.146 @@ -513,7 +528,7 @@ done:
  22.147              err = xenbus_wait_for_state_change(path, &state, &dev->events);
  22.148          if (state != XenbusStateConnected) {
  22.149              printk("backend not available, state=%d\n", state);
  22.150 -            xenbus_unwatch_path(XBT_NIL, path);
  22.151 +            xenbus_unwatch_path_token(XBT_NIL, path, path);
  22.152              goto error;
  22.153          }
  22.154  
  22.155 @@ -526,7 +541,7 @@ done:
  22.156          if ((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected))
  22.157              != NULL) {
  22.158              printk("error switching state: %s\n", err);
  22.159 -            xenbus_unwatch_path(XBT_NIL, path);
  22.160 +            xenbus_unwatch_path_token(XBT_NIL, path, path);
  22.161              goto error;
  22.162          }
  22.163      }
  22.164 @@ -537,6 +552,7 @@ done:
  22.165      return dev;
  22.166  
  22.167  error:
  22.168 +    free(err);
  22.169      free_fbfront(dev);
  22.170      return NULL;
  22.171  }
  22.172 @@ -625,6 +641,7 @@ void shutdown_fbfront(struct fbfront_dev
  22.173      state = xenbus_read_integer(path);
  22.174      while (err == NULL && state < XenbusStateClosing)
  22.175          err = xenbus_wait_for_state_change(path, &state, &dev->events);
  22.176 +    if (err) free(err);
  22.177  
  22.178      if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) {
  22.179          printk("shutdown_fbfront: error changing state to %d: %s\n",
  22.180 @@ -632,8 +649,10 @@ void shutdown_fbfront(struct fbfront_dev
  22.181          goto close_fbfront;
  22.182      }
  22.183      state = xenbus_read_integer(path);
  22.184 -    if (state < XenbusStateClosed)
  22.185 +    if (state < XenbusStateClosed) {
  22.186          xenbus_wait_for_state_change(path, &state, &dev->events);
  22.187 +        if (err) free(err);
  22.188 +    }
  22.189  
  22.190      if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) {
  22.191          printk("shutdown_fbfront: error changing state to %d: %s\n",
  22.192 @@ -644,7 +663,8 @@ void shutdown_fbfront(struct fbfront_dev
  22.193      //xenbus_wait_for_value(path, "2", &dev->events);
  22.194  
  22.195  close_fbfront:
  22.196 -    xenbus_unwatch_path(XBT_NIL, path);
  22.197 +    if (err) free(err);
  22.198 +    xenbus_unwatch_path_token(XBT_NIL, path, path);
  22.199  
  22.200      snprintf(path, sizeof(path), "%s/page-ref", nodename);
  22.201      xenbus_rm(XBT_NIL, path);
    23.1 --- a/extras/mini-os/fs-front.c	Mon Nov 02 19:35:54 2009 -0800
    23.2 +++ b/extras/mini-os/fs-front.c	Fri Mar 19 18:36:57 2010 -0700
    23.3 @@ -1103,6 +1103,7 @@ again:
    23.4      err = xenbus_transaction_start(&xbt);
    23.5      if (err) {
    23.6          printk("starting transaction\n");
    23.7 +        free(err);
    23.8      }
    23.9      
   23.10      err = xenbus_printf(xbt, 
   23.11 @@ -1140,9 +1141,10 @@ again:
   23.12      }
   23.13  
   23.14      err = xenbus_printf(xbt, nodename, "state", STATE_READY, 0xdeadbeef);
   23.15 +    if (err) free(err);
   23.16  
   23.17 -    
   23.18      err = xenbus_transaction_end(xbt, 0, &retry);
   23.19 +    if (err) free(err);
   23.20      if (retry) {
   23.21              goto again;
   23.22          printk("completing transaction\n");
   23.23 @@ -1159,7 +1161,9 @@ again:
   23.24      goto done;
   23.25  
   23.26  abort_transaction:
   23.27 -    xenbus_transaction_end(xbt, 1, &retry);
   23.28 +    free(err);
   23.29 +    err = xenbus_transaction_end(xbt, 1, &retry);
   23.30 +    if (err) free(err);
   23.31  
   23.32  done:
   23.33  
   23.34 @@ -1189,8 +1193,9 @@ done:
   23.35      sprintf(token, "fs-front-%d", import->import_id);
   23.36      /* The token will not be unique if multiple imports are inited */
   23.37      xenbus_watch_path_token(XBT_NIL, r_nodename, r_nodename, &events);
   23.38 -    xenbus_wait_for_value(r_nodename, STATE_READY, &events);
   23.39 -    xenbus_unwatch_path(XBT_NIL, r_nodename);
   23.40 +    err = xenbus_wait_for_value(r_nodename, STATE_READY, &events);
   23.41 +    if (err) free(err);
   23.42 +    xenbus_unwatch_path_token(XBT_NIL, r_nodename, r_nodename);
   23.43      printk("Backend ready.\n");
   23.44     
   23.45      //create_thread("fs-tester", test_fs_import, import); 
    24.1 --- a/extras/mini-os/include/ia64/arch_mm.h	Mon Nov 02 19:35:54 2009 -0800
    24.2 +++ b/extras/mini-os/include/ia64/arch_mm.h	Fri Mar 19 18:36:57 2010 -0700
    24.3 @@ -35,9 +35,9 @@
    24.4  #define virt_to_mfn(x)	virt_to_pfn(x)
    24.5  #define virtual_to_mfn(x)      (ia64_tpa((uint64_t)(x)) >> PAGE_SHIFT)
    24.6  
    24.7 -#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, 0)
    24.8 +#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, 0)
    24.9  /* TODO */
   24.10 -#define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, 0, 0)
   24.11 +#define map_zero(n, a) map_frames_ex(NULL, n, 0, 0, a, DOMID_SELF, NULL, 0)
   24.12  #define do_map_zero(start, n) ASSERT(n == 0)
   24.13  
   24.14  #endif /* __ARCH_MM_H__ */
    25.1 --- a/extras/mini-os/include/lib.h	Mon Nov 02 19:35:54 2009 -0800
    25.2 +++ b/extras/mini-os/include/lib.h	Fri Mar 19 18:36:57 2010 -0700
    25.3 @@ -145,6 +145,7 @@ enum fd_type {
    25.4      FTYPE_BLK,
    25.5      FTYPE_KBD,
    25.6      FTYPE_FB,
    25.7 +    FTYPE_MEM,
    25.8  };
    25.9  
   25.10  #define MAX_EVTCHN_PORTS 16
    26.1 --- a/extras/mini-os/include/mm.h	Mon Nov 02 19:35:54 2009 -0800
    26.2 +++ b/extras/mini-os/include/mm.h	Fri Mar 19 18:36:57 2010 -0700
    26.3 @@ -65,12 +65,12 @@ void arch_init_p2m(unsigned long max_pfn
    26.4  
    26.5  unsigned long allocate_ondemand(unsigned long n, unsigned long alignment);
    26.6  /* map f[i*stride]+i*increment for i in 0..n-1, aligned on alignment pages */
    26.7 -void *map_frames_ex(unsigned long *f, unsigned long n, unsigned long stride,
    26.8 +void *map_frames_ex(const unsigned long *f, unsigned long n, unsigned long stride,
    26.9  	unsigned long increment, unsigned long alignment, domid_t id,
   26.10 -	int may_fail, unsigned long prot);
   26.11 +	int *err, unsigned long prot);
   26.12  void do_map_frames(unsigned long addr,
   26.13 -        unsigned long *f, unsigned long n, unsigned long stride,
   26.14 -	unsigned long increment, domid_t id, int may_fail, unsigned long prot);
   26.15 +        const unsigned long *f, unsigned long n, unsigned long stride,
   26.16 +	unsigned long increment, domid_t id, int *err, unsigned long prot);
   26.17  int unmap_frames(unsigned long va, unsigned long num_frames);
   26.18  unsigned long alloc_contig_pages(int order, unsigned int addr_bits);
   26.19  #ifdef HAVE_LIBC
    27.1 --- a/extras/mini-os/include/pcifront.h	Mon Nov 02 19:35:54 2009 -0800
    27.2 +++ b/extras/mini-os/include/pcifront.h	Fri Mar 19 18:36:57 2010 -0700
    27.3 @@ -1,6 +1,7 @@
    27.4  #include <mini-os/types.h>
    27.5  #include <xen/io/pciif.h>
    27.6  struct pcifront_dev;
    27.7 +void pcifront_watches(void *opaque);
    27.8  struct pcifront_dev *init_pcifront(char *nodename);
    27.9  void pcifront_op(struct pcifront_dev *dev, struct xen_pci_op *op);
   27.10  void pcifront_scan(struct pcifront_dev *dev, void (*fun)(unsigned int domain, unsigned int bus, unsigned slot, unsigned int fun));
    28.1 --- a/extras/mini-os/include/x86/arch_mm.h	Mon Nov 02 19:35:54 2009 -0800
    28.2 +++ b/extras/mini-os/include/x86/arch_mm.h	Fri Mar 19 18:36:57 2010 -0700
    28.3 @@ -224,9 +224,9 @@ static __inline__ paddr_t machine_to_phy
    28.4  })
    28.5  #define virtual_to_mfn(_virt)	   pte_to_mfn(virtual_to_pte(_virt))
    28.6  
    28.7 -#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, 0, L1_PROT)
    28.8 -#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, 0, L1_PROT_RO)
    28.9 -#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, 0, L1_PROT_RO)
   28.10 +#define map_frames(f, n) map_frames_ex(f, n, 1, 0, 1, DOMID_SELF, NULL, L1_PROT)
   28.11 +#define map_zero(n, a) map_frames_ex(&mfn_zero, n, 0, 0, a, DOMID_SELF, NULL, L1_PROT_RO)
   28.12 +#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, NULL, L1_PROT_RO)
   28.13  
   28.14  pgentry_t *need_pgt(unsigned long addr);
   28.15  int mfn_is_ram(unsigned long mfn);
    29.1 --- a/extras/mini-os/lib/printf.c	Mon Nov 02 19:35:54 2009 -0800
    29.2 +++ b/extras/mini-os/lib/printf.c	Fri Mar 19 18:36:57 2010 -0700
    29.3 @@ -62,7 +62,7 @@
    29.4  #include <mini-os/lib.h>
    29.5  #include <mini-os/mm.h>
    29.6  #include <mini-os/ctype.h>
    29.7 -#include <mini-os/limits.h>
    29.8 +#include <mini-os/posix/limits.h>
    29.9  
   29.10  /**
   29.11   * simple_strtoul - convert a string to an unsigned long
    30.1 --- a/extras/mini-os/lib/sys.c	Mon Nov 02 19:35:54 2009 -0800
    30.2 +++ b/extras/mini-os/lib/sys.c	Fri Mar 19 18:36:57 2010 -0700
    30.3 @@ -190,6 +190,11 @@ int open(const char *pathname, int flags
    30.4          printk("open(%s) -> %d\n", pathname, fd);
    30.5          return fd;
    30.6      }
    30.7 +    if (!strncmp(pathname, "/dev/mem", strlen("/dev/mem"))) {
    30.8 +        fd = alloc_fd(FTYPE_MEM);
    30.9 +        printk("open(/dev/mem) -> %d\n", fd);
   30.10 +        return fd;
   30.11 +    }
   30.12      if (!strncmp(pathname, "/dev/ptmx", strlen("/dev/ptmx")))
   30.13          return posix_openpt(flags);
   30.14      printk("open(%s, %x)", pathname, flags);
   30.15 @@ -1244,13 +1249,15 @@ void *mmap(void *start, size_t length, i
   30.16      ASSERT(prot == (PROT_READ|PROT_WRITE));
   30.17      ASSERT((fd == -1 && (flags == (MAP_SHARED|MAP_ANON) || flags == (MAP_PRIVATE|MAP_ANON)))
   30.18          || (fd != -1 && flags == MAP_SHARED));
   30.19 -    ASSERT(offset == 0);
   30.20  
   30.21      if (fd == -1)
   30.22          return map_zero(n, 1);
   30.23      else if (files[fd].type == FTYPE_XC) {
   30.24          unsigned long zero = 0;
   30.25 -        return map_frames_ex(&zero, n, 0, 0, 1, DOMID_SELF, 0, 0);
   30.26 +        return map_frames_ex(&zero, n, 0, 0, 1, DOMID_SELF, NULL, 0);
   30.27 +    } else if (files[fd].type == FTYPE_MEM) {
   30.28 +        unsigned long first_mfn = offset >> PAGE_SHIFT;
   30.29 +        return map_frames_ex(&first_mfn, n, 0, 1, 1, DOMID_IO, NULL, _PAGE_PRESENT|_PAGE_RW);
   30.30      } else ASSERT(0);
   30.31  }
   30.32  
    31.1 --- a/extras/mini-os/lib/xmalloc.c	Mon Nov 02 19:35:54 2009 -0800
    31.2 +++ b/extras/mini-os/lib/xmalloc.c	Fri Mar 19 18:36:57 2010 -0700
    31.3 @@ -187,6 +187,8 @@ void *_xmalloc(size_t size, size_t align
    31.4  
    31.5          /* Alloc a new page and return from that. */
    31.6          hdr = xmalloc_new_page(align_up(hdr_size, align) + size);
    31.7 +        if ( hdr == NULL )
    31.8 +            return NULL;
    31.9          data_begin = (uintptr_t)hdr + align_up(hdr_size, align);
   31.10      }
   31.11  
   31.12 @@ -279,14 +281,18 @@ void *_realloc(void *ptr, size_t size)
   31.13      void *new;
   31.14      struct xmalloc_hdr *hdr;
   31.15      struct xmalloc_pad *pad;
   31.16 +    size_t old_data_size;
   31.17  
   31.18      if (ptr == NULL)
   31.19          return _xmalloc(size, DEFAULT_ALIGN);
   31.20  
   31.21      pad = (struct xmalloc_pad *)ptr - 1;
   31.22      hdr = (struct xmalloc_hdr *)((char*)ptr - pad->hdr_size);
   31.23 -    if (hdr->size >= size) {
   31.24 -        maybe_split(hdr, size, hdr->size);
   31.25 +
   31.26 +    old_data_size = hdr->size - pad->hdr_size;
   31.27 +    if ( old_data_size >= size )
   31.28 +    {
   31.29 +	maybe_split(hdr, pad->hdr_size + size, hdr->size);
   31.30          return ptr;
   31.31      }
   31.32      
   31.33 @@ -294,7 +300,7 @@ void *_realloc(void *ptr, size_t size)
   31.34      if (new == NULL) 
   31.35          return NULL;
   31.36  
   31.37 -    memcpy(new, ptr, hdr->size);
   31.38 +    memcpy(new, ptr, old_data_size);
   31.39      xfree(ptr);
   31.40  
   31.41      return new;
    32.1 --- a/extras/mini-os/lib/xs.c	Mon Nov 02 19:35:54 2009 -0800
    32.2 +++ b/extras/mini-os/lib/xs.c	Fri Mar 19 18:36:57 2010 -0700
    32.3 @@ -49,6 +49,7 @@ void *xs_read(struct xs_handle *h, xs_tr
    32.4      msg = xenbus_read(t, path, &value);
    32.5      if (msg) {
    32.6  	printk("xs_read(%s): %s\n", path, msg);
    32.7 +	free(msg);
    32.8  	return NULL;
    32.9      }
   32.10  
   32.11 @@ -69,6 +70,7 @@ bool xs_write(struct xs_handle *h, xs_tr
   32.12      msg = xenbus_write(t, path, value);
   32.13      if (msg) {
   32.14  	printk("xs_write(%s): %s\n", path, msg);
   32.15 +	free(msg);
   32.16  	return false;
   32.17      }
   32.18      return true;
    33.1 --- a/extras/mini-os/main.c	Mon Nov 02 19:35:54 2009 -0800
    33.2 +++ b/extras/mini-os/main.c	Fri Mar 19 18:36:57 2010 -0700
    33.3 @@ -9,6 +9,7 @@
    33.4  #include <sched.h>
    33.5  #include <console.h>
    33.6  #include <netfront.h>
    33.7 +#include <pcifront.h>
    33.8  #include <time.h>
    33.9  #include <stdlib.h>
   33.10  #include <unistd.h>
   33.11 @@ -67,6 +68,7 @@ static void call_main(void *p)
   33.12  #endif
   33.13      init_fs_frontend();
   33.14  #endif
   33.15 +    create_thread("pcifront", pcifront_watches, NULL);
   33.16  
   33.17  #ifdef CONFIG_QEMU
   33.18      /* Fetch argc, argv from XenStore */
    34.1 --- a/extras/mini-os/netfront.c	Mon Nov 02 19:35:54 2009 -0800
    34.2 +++ b/extras/mini-os/netfront.c	Fri Mar 19 18:36:57 2010 -0700
    34.3 @@ -305,7 +305,7 @@ struct netfront_dev *init_netfront(char 
    34.4      struct netif_rx_sring *rxs;
    34.5      int retry=0;
    34.6      int i;
    34.7 -    char* msg;
    34.8 +    char* msg = NULL;
    34.9      char nodename[256];
   34.10      char path[256];
   34.11      struct netfront_dev *dev;
   34.12 @@ -377,6 +377,7 @@ again:
   34.13      err = xenbus_transaction_start(&xbt);
   34.14      if (err) {
   34.15          printk("starting transaction\n");
   34.16 +        free(err);
   34.17      }
   34.18  
   34.19      err = xenbus_printf(xbt, nodename, "tx-ring-ref","%u",
   34.20 @@ -413,6 +414,7 @@ again:
   34.21      }
   34.22  
   34.23      err = xenbus_transaction_end(xbt, 0, &retry);
   34.24 +    if (err) free(err);
   34.25      if (retry) {
   34.26              goto again;
   34.27          printk("completing transaction\n");
   34.28 @@ -421,7 +423,8 @@ again:
   34.29      goto done;
   34.30  
   34.31  abort_transaction:
   34.32 -    xenbus_transaction_end(xbt, 1, &retry);
   34.33 +    free(err);
   34.34 +    err = xenbus_transaction_end(xbt, 1, &retry);
   34.35      goto error;
   34.36  
   34.37  done:
   34.38 @@ -452,7 +455,7 @@ done:
   34.39              err = xenbus_wait_for_state_change(path, &state, &dev->events);
   34.40          if (state != XenbusStateConnected) {
   34.41              printk("backend not avalable, state=%d\n", state);
   34.42 -            xenbus_unwatch_path(XBT_NIL, path);
   34.43 +            xenbus_unwatch_path_token(XBT_NIL, path, path);
   34.44              goto error;
   34.45          }
   34.46  
   34.47 @@ -479,6 +482,8 @@ done:
   34.48  
   34.49      return dev;
   34.50  error:
   34.51 +    free(msg);
   34.52 +    free(err);
   34.53      free_netfront(dev);
   34.54      return NULL;
   34.55  }
   34.56 @@ -521,6 +526,7 @@ void shutdown_netfront(struct netfront_d
   34.57      state = xenbus_read_integer(path);
   34.58      while (err == NULL && state < XenbusStateClosing)
   34.59          err = xenbus_wait_for_state_change(path, &state, &dev->events);
   34.60 +    if (err) free(err);
   34.61  
   34.62      if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) {
   34.63          printk("shutdown_netfront: error changing state to %d: %s\n",
   34.64 @@ -528,8 +534,10 @@ void shutdown_netfront(struct netfront_d
   34.65          goto close;
   34.66      }
   34.67      state = xenbus_read_integer(path);
   34.68 -    if (state < XenbusStateClosed)
   34.69 -        xenbus_wait_for_state_change(path, &state, &dev->events);
   34.70 +    if (state < XenbusStateClosed) {
   34.71 +        err = xenbus_wait_for_state_change(path, &state, &dev->events);
   34.72 +        if (err) free(err);
   34.73 +    }
   34.74  
   34.75      if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) {
   34.76          printk("shutdown_netfront: error changing state to %d: %s\n",
   34.77 @@ -542,7 +550,8 @@ void shutdown_netfront(struct netfront_d
   34.78          err = xenbus_wait_for_state_change(path, &state, &dev->events);
   34.79  
   34.80  close:
   34.81 -    xenbus_unwatch_path(XBT_NIL, path);
   34.82 +    if (err) free(err);
   34.83 +    xenbus_unwatch_path_token(XBT_NIL, path, path);
   34.84  
   34.85      snprintf(path, sizeof(path), "%s/tx-ring-ref", nodename);
   34.86      xenbus_rm(XBT_NIL, path);
    35.1 --- a/extras/mini-os/pcifront.c	Mon Nov 02 19:35:54 2009 -0800
    35.2 +++ b/extras/mini-os/pcifront.c	Fri Mar 19 18:36:57 2010 -0700
    35.3 @@ -13,10 +13,12 @@
    35.4  #include <mini-os/xmalloc.h>
    35.5  #include <mini-os/wait.h>
    35.6  #include <mini-os/pcifront.h>
    35.7 +#include <mini-os/sched.h>
    35.8  
    35.9  #define PCI_DEVFN(slot, func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
   35.10  
   35.11  DECLARE_WAIT_QUEUE_HEAD(pcifront_queue);
   35.12 +static struct pcifront_dev *pcidev;
   35.13  
   35.14  struct pcifront_dev {
   35.15      domid_t dom;
   35.16 @@ -38,19 +40,103 @@ void pcifront_handler(evtchn_port_t port
   35.17  
   35.18  static void free_pcifront(struct pcifront_dev *dev)
   35.19  {
   35.20 -    mask_evtchn(dev->evtchn);
   35.21 +    if (!dev)
   35.22 +        dev = pcidev;
   35.23  
   35.24 -    free(dev->backend);
   35.25 +    mask_evtchn(dev->evtchn);
   35.26  
   35.27      gnttab_end_access(dev->info_ref);
   35.28      free_page(dev->info);
   35.29  
   35.30      unbind_evtchn(dev->evtchn);
   35.31  
   35.32 +    free(dev->backend);
   35.33      free(dev->nodename);
   35.34      free(dev);
   35.35  }
   35.36  
   35.37 +void pcifront_watches(void *opaque)
   35.38 +{
   35.39 +    XenbusState state;
   35.40 +    char *err = NULL, *msg = NULL;
   35.41 +    char *be_path, *be_state;
   35.42 +    char* nodename = opaque ? opaque : "device/pci/0";
   35.43 +    char path[strlen(nodename) + 9];
   35.44 +    char fe_state[strlen(nodename) + 7];
   35.45 +    xenbus_event_queue events = NULL;
   35.46 +
   35.47 +    snprintf(path, sizeof(path), "%s/backend", nodename);
   35.48 +    snprintf(fe_state, sizeof(fe_state), "%s/state", nodename);
   35.49 +
   35.50 +    while (1) {
   35.51 +        printk("pcifront_watches: waiting for backend path to happear %s\n", path);
   35.52 +        xenbus_watch_path_token(XBT_NIL, path, path, &events);
   35.53 +        while ((err = xenbus_read(XBT_NIL, path, &be_path)) != NULL) {
   35.54 +            free(err);
   35.55 +            xenbus_wait_for_watch(&events);
   35.56 +        }
   35.57 +        xenbus_unwatch_path_token(XBT_NIL, path, path);
   35.58 +        printk("pcifront_watches: waiting for backend to get into the right state %s\n", be_path);
   35.59 +        be_state = (char *) malloc(strlen(be_path) +  7);
   35.60 +        snprintf(be_state, strlen(be_path) +  7, "%s/state", be_path);
   35.61 +        xenbus_watch_path_token(XBT_NIL, be_state, be_state, &events);
   35.62 +        while ((err = xenbus_read(XBT_NIL, be_state, &msg)) != NULL || msg[0] > '4') {
   35.63 +            free(msg);
   35.64 +            free(err);
   35.65 +            xenbus_wait_for_watch(&events);
   35.66 +        }
   35.67 +        xenbus_unwatch_path_token(XBT_NIL, be_state, be_state);
   35.68 +        if (init_pcifront(NULL) == NULL) {
   35.69 +            free(be_state);
   35.70 +            free(be_path);
   35.71 +            continue;
   35.72 +        }
   35.73 +        xenbus_watch_path_token(XBT_NIL, be_state, be_state, &events);
   35.74 +        state = XenbusStateConnected;
   35.75 +        printk("pcifront_watches: waiting for backend events %s\n", be_state);
   35.76 +        while ((err = xenbus_wait_for_state_change(be_state, &state, &events)) == NULL &&
   35.77 +               (err = xenbus_read(XBT_NIL, pcidev->backend, &msg)) == NULL) {
   35.78 +            free(msg);
   35.79 +            printk("pcifront_watches: backend state changed: %s %d\n", be_state, state);
   35.80 +            if (state == XenbusStateReconfiguring) {
   35.81 +                printk("pcifront_watches: writing %s %d\n", fe_state, XenbusStateReconfiguring);
   35.82 +                if ((err = xenbus_switch_state(XBT_NIL, fe_state, XenbusStateReconfiguring)) != NULL) {
   35.83 +                    printk("pcifront_watches: error changing state to %d: %s\n",
   35.84 +                            XenbusStateReconfiguring, err);
   35.85 +                    if (!strcmp(err, "ENOENT")) {
   35.86 +                        xenbus_write(XBT_NIL, fe_state, "7");
   35.87 +                        free(err);
   35.88 +                    }
   35.89 +                }
   35.90 +            } else if (state == XenbusStateReconfigured) {
   35.91 +                printk("pcifront_watches: writing %s %d\n", fe_state, XenbusStateConnected);
   35.92 +                printk("pcifront_watches: changing state to %d\n", XenbusStateConnected);
   35.93 +                if ((err = xenbus_switch_state(XBT_NIL, fe_state, XenbusStateConnected)) != NULL) {
   35.94 +                    printk("pcifront_watches: error changing state to %d: %s\n",
   35.95 +                            XenbusStateConnected, err);
   35.96 +                    if (!strcmp(err, "ENOENT")) {
   35.97 +                        xenbus_write(XBT_NIL, fe_state, "4");
   35.98 +                        free(err);
   35.99 +                    }
  35.100 +                }
  35.101 +            } else if (state == XenbusStateClosing)
  35.102 +                break;
  35.103 +        }
  35.104 +        if (err)
  35.105 +            printk("pcifront_watches: done waiting err=%s\n", err);
  35.106 +        else
  35.107 +            printk("pcifront_watches: done waiting\n");
  35.108 +        xenbus_unwatch_path_token(XBT_NIL, be_state, be_state);
  35.109 +        shutdown_pcifront(pcidev);
  35.110 +        free(be_state);
  35.111 +        free(be_path);
  35.112 +        free(err);
  35.113 +        pcidev = NULL;
  35.114 +    }
  35.115 +
  35.116 +    xenbus_unwatch_path_token(XBT_NIL, path, path);
  35.117 +}
  35.118 +
  35.119  struct pcifront_dev *init_pcifront(char *_nodename)
  35.120  {
  35.121      xenbus_transaction_t xbt;
  35.122 @@ -65,6 +151,9 @@ struct pcifront_dev *init_pcifront(char 
  35.123  
  35.124      char path[strlen(nodename) + 1 + 10 + 1];
  35.125  
  35.126 +    if (!_nodename && pcidev)
  35.127 +        return pcidev;
  35.128 +
  35.129      printk("******************* PCIFRONT for %s **********\n\n\n", nodename);
  35.130  
  35.131      snprintf(path, sizeof(path), "%s/backend-id", nodename);
  35.132 @@ -92,6 +181,7 @@ again:
  35.133      err = xenbus_transaction_start(&xbt);
  35.134      if (err) {
  35.135          printk("starting transaction\n");
  35.136 +        free(err);
  35.137      }
  35.138  
  35.139      err = xenbus_printf(xbt, nodename, "pci-op-ref","%u",
  35.140 @@ -121,6 +211,7 @@ again:
  35.141      }
  35.142  
  35.143      err = xenbus_transaction_end(xbt, 0, &retry);
  35.144 +    if (err) free(err);
  35.145      if (retry) {
  35.146              goto again;
  35.147          printk("completing transaction\n");
  35.148 @@ -129,7 +220,8 @@ again:
  35.149      goto done;
  35.150  
  35.151  abort_transaction:
  35.152 -    xenbus_transaction_end(xbt, 1, &retry);
  35.153 +    free(err);
  35.154 +    err = xenbus_transaction_end(xbt, 1, &retry);
  35.155      goto error;
  35.156  
  35.157  done:
  35.158 @@ -157,7 +249,7 @@ done:
  35.159              err = xenbus_wait_for_state_change(path, &state, &dev->events);
  35.160          if (state != XenbusStateConnected) {
  35.161              printk("backend not avalable, state=%d\n", state);
  35.162 -            xenbus_unwatch_path(XBT_NIL, path);
  35.163 +            xenbus_unwatch_path_token(XBT_NIL, path, path);
  35.164              goto error;
  35.165          }
  35.166  
  35.167 @@ -165,7 +257,7 @@ done:
  35.168          if ((err = xenbus_switch_state(XBT_NIL, frontpath, XenbusStateConnected))
  35.169              != NULL) {
  35.170              printk("error switching state %s\n", err);
  35.171 -            xenbus_unwatch_path(XBT_NIL, path);
  35.172 +            xenbus_unwatch_path_token(XBT_NIL, path, path);
  35.173              goto error;
  35.174          }
  35.175      }
  35.176 @@ -173,25 +265,47 @@ done:
  35.177  
  35.178      printk("**************************\n");
  35.179  
  35.180 +    if (!_nodename)
  35.181 +        pcidev = dev;
  35.182 +
  35.183      return dev;
  35.184  
  35.185  error:
  35.186 +    free(err);
  35.187      free_pcifront(dev);
  35.188      return NULL;
  35.189  }
  35.190  
  35.191  void pcifront_scan(struct pcifront_dev *dev, void (*func)(unsigned int domain, unsigned int bus, unsigned slot, unsigned int fun))
  35.192  {
  35.193 -    char path[strlen(dev->backend) + 1 + 5 + 10 + 1];
  35.194 -    int i, n;
  35.195 -    char *s, *msg;
  35.196 +    char *path;
  35.197 +    int i, n, len;
  35.198 +    char *s, *msg = NULL, *err = NULL;
  35.199      unsigned int domain, bus, slot, fun;
  35.200  
  35.201 -    snprintf(path, sizeof(path), "%s/num_devs", dev->backend);
  35.202 +    if (!dev)
  35.203 +        dev = pcidev;
  35.204 +    if (!dev) {
  35.205 +        xenbus_event_queue events = NULL;
  35.206 +        char *fe_state = "device/pci/0/state";
  35.207 +        xenbus_watch_path_token(XBT_NIL, fe_state, fe_state, &events);
  35.208 +        while ((err = xenbus_read(XBT_NIL, fe_state, &msg)) != NULL || msg[0] != '4') {
  35.209 +            free(msg);
  35.210 +            free(err);
  35.211 +            printk("pcifront_scan: waiting for pcifront to become ready\n");
  35.212 +            xenbus_wait_for_watch(&events);
  35.213 +        }
  35.214 +        xenbus_unwatch_path_token(XBT_NIL, fe_state, fe_state);
  35.215 +        dev = pcidev;
  35.216 +    }
  35.217 +
  35.218 +    len = strlen(dev->backend) + 1 + 5 + 10 + 1;
  35.219 +    path = (char *) malloc(len);
  35.220 +    snprintf(path, len, "%s/num_devs", dev->backend);
  35.221      n = xenbus_read_integer(path);
  35.222  
  35.223      for (i = 0; i < n; i++) {
  35.224 -        snprintf(path, sizeof(path), "%s/dev-%d", dev->backend, i);
  35.225 +        snprintf(path, len, "%s/dev-%d", dev->backend, i);
  35.226          msg = xenbus_read(XBT_NIL, path, &s);
  35.227          if (msg) {
  35.228              printk("Error %s when reading the PCI root name at %s\n", msg, path);
  35.229 @@ -205,8 +319,10 @@ void pcifront_scan(struct pcifront_dev *
  35.230          }
  35.231          free(s);
  35.232  
  35.233 -        func(domain, bus, slot, fun);
  35.234 +        if (func)
  35.235 +            func(domain, bus, slot, fun);
  35.236      }
  35.237 +    free(path);
  35.238  }
  35.239  
  35.240  void shutdown_pcifront(struct pcifront_dev *dev)
  35.241 @@ -229,6 +345,7 @@ void shutdown_pcifront(struct pcifront_d
  35.242      state = xenbus_read_integer(path);
  35.243      while (err == NULL && state < XenbusStateClosing)
  35.244          err = xenbus_wait_for_state_change(path, &state, &dev->events);
  35.245 +    if (err) free(err);
  35.246  
  35.247      if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateClosed)) != NULL) {
  35.248          printk("shutdown_pcifront: error changing state to %d: %s\n",
  35.249 @@ -236,8 +353,10 @@ void shutdown_pcifront(struct pcifront_d
  35.250          goto close_pcifront;
  35.251      }
  35.252      state = xenbus_read_integer(path);
  35.253 -    if (state < XenbusStateClosed)
  35.254 -        xenbus_wait_for_state_change(path, &state, &dev->events);
  35.255 +    if (state < XenbusStateClosed) {
  35.256 +        err = xenbus_wait_for_state_change(path, &state, &dev->events);
  35.257 +        free(err);
  35.258 +    }
  35.259  
  35.260      if ((err = xenbus_switch_state(XBT_NIL, nodename, XenbusStateInitialising)) != NULL) {
  35.261          printk("shutdown_pcifront: error changing state to %d: %s\n",
  35.262 @@ -250,7 +369,8 @@ void shutdown_pcifront(struct pcifront_d
  35.263          err = xenbus_wait_for_state_change(path, &state, &dev->events);
  35.264  
  35.265  close_pcifront:
  35.266 -    xenbus_unwatch_path(XBT_NIL, path);
  35.267 +    if (err) free(err);
  35.268 +    xenbus_unwatch_path_token(XBT_NIL, path, path);
  35.269  
  35.270      snprintf(path, sizeof(path), "%s/info-ref", nodename);
  35.271      xenbus_rm(XBT_NIL, path);
  35.272 @@ -271,6 +391,9 @@ int pcifront_physical_to_virtual (struct
  35.273      char *s, *msg = NULL;
  35.274      unsigned int dom1, bus1, slot1, fun1;
  35.275  
  35.276 +    if (!dev)
  35.277 +        dev = pcidev;
  35.278 +
  35.279      snprintf(path, sizeof(path), "%s/num_devs", dev->backend);
  35.280      n = xenbus_read_integer(path);
  35.281  
  35.282 @@ -312,6 +435,8 @@ int pcifront_physical_to_virtual (struct
  35.283  
  35.284  void pcifront_op(struct pcifront_dev *dev, struct xen_pci_op *op)
  35.285  {
  35.286 +    if (!dev)
  35.287 +        dev = pcidev;
  35.288      dev->info->op = *op;
  35.289      /* Make sure info is written before the flag */
  35.290      wmb();
  35.291 @@ -332,6 +457,8 @@ int pcifront_conf_read(struct pcifront_d
  35.292  {
  35.293      struct xen_pci_op op;
  35.294  
  35.295 +    if (!dev)
  35.296 +        dev = pcidev;
  35.297      if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0)
  35.298          return XEN_PCI_ERR_dev_not_found;
  35.299      memset(&op, 0, sizeof(op));
  35.300 @@ -360,6 +487,8 @@ int pcifront_conf_write(struct pcifront_
  35.301  {
  35.302      struct xen_pci_op op;
  35.303  
  35.304 +    if (!dev)
  35.305 +        dev = pcidev;
  35.306      if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0)
  35.307          return XEN_PCI_ERR_dev_not_found;
  35.308      memset(&op, 0, sizeof(op));
  35.309 @@ -384,6 +513,8 @@ int pcifront_enable_msi(struct pcifront_
  35.310  {
  35.311      struct xen_pci_op op;
  35.312  
  35.313 +    if (!dev)
  35.314 +        dev = pcidev;
  35.315      if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0)
  35.316          return XEN_PCI_ERR_dev_not_found;
  35.317      memset(&op, 0, sizeof(op));
  35.318 @@ -407,6 +538,8 @@ int pcifront_disable_msi(struct pcifront
  35.319  {
  35.320      struct xen_pci_op op;
  35.321  
  35.322 +    if (!dev)
  35.323 +        dev = pcidev;
  35.324      if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0)
  35.325          return XEN_PCI_ERR_dev_not_found;
  35.326      memset(&op, 0, sizeof(op));
  35.327 @@ -428,6 +561,8 @@ int pcifront_enable_msix(struct pcifront
  35.328  {
  35.329      struct xen_pci_op op;
  35.330  
  35.331 +    if (!dev)
  35.332 +        dev = pcidev;
  35.333      if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0)
  35.334          return XEN_PCI_ERR_dev_not_found;
  35.335      if (n > SH_INFO_MAX_VEC)
  35.336 @@ -460,6 +595,8 @@ int pcifront_disable_msix(struct pcifron
  35.337  {
  35.338      struct xen_pci_op op;
  35.339  
  35.340 +    if (!dev)
  35.341 +        dev = pcidev;
  35.342      if (pcifront_physical_to_virtual(dev, &dom, &bus, &slot, &fun) < 0)
  35.343          return XEN_PCI_ERR_dev_not_found;
  35.344      memset(&op, 0, sizeof(op));
    36.1 --- a/extras/mini-os/xenbus/xenbus.c	Mon Nov 02 19:35:54 2009 -0800
    36.2 +++ b/extras/mini-os/xenbus/xenbus.c	Fri Mar 19 18:36:57 2010 -0700
    36.3 @@ -96,7 +96,10 @@ void xenbus_wait_for_watch(xenbus_event_
    36.4      if (!queue)
    36.5          queue = &xenbus_events;
    36.6      ret = xenbus_wait_for_watch_return(queue);
    36.7 -    free(ret);
    36.8 +    if (ret)
    36.9 +        free(ret);
   36.10 +    else
   36.11 +        printk("unexpected path returned by watch\n");
   36.12  }
   36.13  
   36.14  char* xenbus_wait_for_value(const char* path, const char* value, xenbus_event_queue *queue)
   36.15 @@ -132,7 +135,8 @@ char *xenbus_switch_state(xenbus_transac
   36.16  
   36.17      do {
   36.18          if (xbt == XBT_NIL) {
   36.19 -            xenbus_transaction_start(&xbt);
   36.20 +            msg = xenbus_transaction_start(&xbt);
   36.21 +            if (msg) goto exit;
   36.22              xbt_flag = 1;
   36.23          }
   36.24  
    37.1 --- a/stubdom/Makefile	Mon Nov 02 19:35:54 2009 -0800
    37.2 +++ b/stubdom/Makefile	Fri Mar 19 18:36:57 2010 -0700
    37.3 @@ -265,9 +265,11 @@ TARGETS_MINIOS=$(addprefix mini-os-$(XEN
    37.4  
    37.5  .PHONY: libxc
    37.6  libxc: libxc-$(XEN_TARGET_ARCH)/libxenctrl.a libxc-$(XEN_TARGET_ARCH)/libxenguest.a
    37.7 -libxc-$(XEN_TARGET_ARCH)/libxenctrl.a libxc-$(XEN_TARGET_ARCH)/libxenguest.a:: cross-zlib
    37.8 +libxc-$(XEN_TARGET_ARCH)/libxenctrl.a: cross-zlib
    37.9  	CPPFLAGS="$(TARGET_CPPFLAGS)" CFLAGS="$(TARGET_CFLAGS)" $(MAKE) -C libxc-$(XEN_TARGET_ARCH)
   37.10  
   37.11 + libxc-$(XEN_TARGET_ARCH)/libxenguest.a: libxc-$(XEN_TARGET_ARCH)/libxenctrl.a
   37.12 +
   37.13  #######
   37.14  # ioemu
   37.15  #######
    38.1 --- a/stubdom/README	Mon Nov 02 19:35:54 2009 -0800
    38.2 +++ b/stubdom/README	Fri Mar 19 18:36:57 2010 -0700
    38.3 @@ -52,11 +52,17 @@ kernel = "pv-grub.gz"
    38.4  
    38.5  extra = "(hd0,0)/boot/grub/menu.lst"
    38.6  
    38.7 -you can also use a tftp path (dhcp will be automatically performed):
    38.8 +or you can provide the content of a menu.lst stored in dom0 by passing it as a
    38.9 +ramdisk:
   38.10 +
   38.11 +ramdisk = "/boot/domU-1-menu.lst"
   38.12 +
   38.13 +or you can also use a tftp path (dhcp will be automatically performed):
   38.14  
   38.15  extra = "(nd)/somepath/menu.lst"
   38.16  
   38.17 -or you can set it in option 150 of your dhcp server and leave extra empty
   38.18 +or you can set it in option 150 of your dhcp server and leave extra and ramdisk
   38.19 +empty (dhcp will be automatically performed)
   38.20  
   38.21  Limitations
   38.22  ===========
   38.23 @@ -69,6 +75,13 @@ export XEN_TARGET_ARCH=x86_32
   38.24  - bootsplash is supported, but the ioemu backend does not yet support restart
   38.25  for use by the booted kernel.
   38.26  
   38.27 +- PV-GRUB doesn't support virtualized partitions. For instance:
   38.28 +
   38.29 +disk = [ 'phy:hda7,hda7,w' ]
   38.30 +
   38.31 +will be seen by PV-GRUB as (hd0), not (hd0,6), since GRUB will not see any
   38.32 +partition table.
   38.33 +
   38.34  
   38.35                                  Your own stubdom
   38.36                                  ================
    39.1 --- a/stubdom/grub.patches/99minios	Mon Nov 02 19:35:54 2009 -0800
    39.2 +++ b/stubdom/grub.patches/99minios	Fri Mar 19 18:36:57 2010 -0700
    39.3 @@ -151,6 +151,14 @@ Index: grub/stage2/builtins.c
    39.4   
    39.5   /* print */
    39.6   static int
    39.7 +@@ -2910,6 +2910,7 @@
    39.8 +   switch (kernel_type)
    39.9 +     {
   39.10 +     case KERNEL_TYPE_MULTIBOOT:
   39.11 ++    case KERNEL_TYPE_PV:
   39.12 +       if (mb_cmdline + len + 1 > (char *) MB_CMDLINE_BUF + MB_CMDLINE_BUFLEN)
   39.13 + 	{
   39.14 + 	  errnum = ERR_WONT_FIT;
   39.15  @@ -3776,6 +3802,7 @@
   39.16   };
   39.17   
   39.18 @@ -1493,3 +1501,70 @@ diff -u -p -r1.5 fsys_xfs.c
   39.19   #else
   39.20   	/* This is slower but this works on all x86 architectures.  */
   39.21   	__asm__("xchgb %b0, %h0" \
   39.22 +--- grub.orig/stage2/gunzip.c	2010-03-07 23:03:34.000000000 +0100
   39.23 ++++ grub/stage2/gunzip.c	2010-03-07 23:05:36.000000000 +0100
   39.24 +@@ -141,7 +141,7 @@
   39.25 + static int gzip_filemax;
   39.26 + static int gzip_fsmax;
   39.27 + static int saved_filepos;
   39.28 +-static unsigned long gzip_crc;
   39.29 ++static unsigned int gzip_crc;
   39.30 + 
   39.31 + /* internal extra variables for use of inflate code */
   39.32 + static int block_type;
   39.33 +@@ -157,7 +157,7 @@
   39.34 +  *  Linear allocator.
   39.35 +  */
   39.36 + 
   39.37 +-static unsigned long linalloc_topaddr;
   39.38 ++static unsigned int linalloc_topaddr;
   39.39 + 
   39.40 + static void *
   39.41 + linalloc (int size)
   39.42 +@@ -253,7 +253,7 @@
   39.43 + 
   39.44 + typedef unsigned char uch;
   39.45 + typedef unsigned short ush;
   39.46 +-typedef unsigned long ulg;
   39.47 ++typedef unsigned int ulg;
   39.48 + 
   39.49 + /*
   39.50 +  *  Window Size
   39.51 +@@ -316,8 +316,8 @@
   39.52 +       return 0;
   39.53 +     }
   39.54 + 
   39.55 +-  gzip_crc = *((unsigned long *) buf);
   39.56 +-  gzip_fsmax = gzip_filemax = *((unsigned long *) (buf + 4));
   39.57 ++  gzip_crc = *((unsigned int *) buf);
   39.58 ++  gzip_fsmax = gzip_filemax = *((unsigned int *) (buf + 4));
   39.59 + 
   39.60 +   initialize_tables ();
   39.61 + 
   39.62 +diff -ur grub.orig/stage2/fsys_iso9660.c grub-upstream/stage2/fsys_iso9660.c
   39.63 +--- grub.orig/stage2/fsys_iso9660.c	2010-03-07 23:39:00.000000000 +0100
   39.64 ++++ grub/stage2/fsys_iso9660.c	2010-03-07 23:39:56.000000000 +0100
   39.65 +@@ -43,7 +43,7 @@
   39.66 + 
   39.67 + /* iso fs inode data in memory */
   39.68 + struct iso_inode_info {
   39.69 +-  unsigned long file_start;
   39.70 ++  unsigned int file_start;
   39.71 + };
   39.72 + 
   39.73 + #define ISO_SUPER	\
   39.74 +@@ -88,12 +88,12 @@
   39.75 +   if (byte_len <= 0)
   39.76 +     return 1;
   39.77 + 
   39.78 +-  sector += (byte_offset >> sector_size_lg2);
   39.79 +-  byte_offset &= (buf_geom.sector_size - 1);
   39.80 +   asm volatile ("shl%L0 %1,%0"
   39.81 + 		: "=r"(sector)
   39.82 + 		: "Ic"((int8_t)(ISO_SECTOR_BITS - sector_size_lg2)),
   39.83 + 		"0"(sector));
   39.84 ++  sector += (byte_offset >> sector_size_lg2);
   39.85 ++  byte_offset &= (buf_geom.sector_size - 1);
   39.86 + 
   39.87 + #if !defined(STAGE1_5)
   39.88 +   if (disk_read_hook && debug)
    40.1 --- a/stubdom/grub/config.h	Mon Nov 02 19:35:54 2009 -0800
    40.2 +++ b/stubdom/grub/config.h	Fri Mar 19 18:36:57 2010 -0700
    40.3 @@ -5,7 +5,7 @@
    40.4  #define debug _debug
    40.5  #define grub_halt(a) do_exit()
    40.6  #define printf grub_printf
    40.7 -void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline);
    40.8 +void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline, unsigned long flags);
    40.9  struct fbfront_dev *fb_open(void *fb, int width, int height, int depth);
   40.10  void fb_close(void);
   40.11  void pv_boot (void);
    41.1 --- a/stubdom/grub/kexec.c	Mon Nov 02 19:35:54 2009 -0800
    41.2 +++ b/stubdom/grub/kexec.c	Fri Mar 19 18:36:57 2010 -0700
    41.3 @@ -103,7 +103,7 @@ int kexec_allocate(struct xc_dom_image *
    41.4      return 0;
    41.5  }
    41.6  
    41.7 -void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline)
    41.8 +void kexec(void *kernel, long kernel_size, void *module, long module_size, char *cmdline, unsigned long flags)
    41.9  {
   41.10      struct xc_dom_image *dom;
   41.11      int rc;
   41.12 @@ -129,7 +129,7 @@ void kexec(void *kernel, long kernel_siz
   41.13      dom->ramdisk_blob = module;
   41.14      dom->ramdisk_size = module_size;
   41.15  
   41.16 -    dom->flags = 0;
   41.17 +    dom->flags = flags;
   41.18      dom->console_evtchn = start_info.console.domU.evtchn;
   41.19      dom->xenstore_evtchn = start_info.store_evtchn;
   41.20  
    42.1 --- a/stubdom/grub/mini-os.c	Mon Nov 02 19:35:54 2009 -0800
    42.2 +++ b/stubdom/grub/mini-os.c	Fri Mar 19 18:36:57 2010 -0700
    42.3 @@ -173,6 +173,8 @@ load_file(char *name, void **ptr, long *
    42.4  void *kernel_image, *module_image;
    42.5  long  kernel_size, module_size;
    42.6  char *kernel_arg, *module_arg;
    42.7 +void *multiboot_next_module;
    42.8 +struct xen_multiboot_mod_list *multiboot_next_module_header;
    42.9  
   42.10  kernel_t
   42.11  load_image (char *kernel, char *arg, kernel_t suggested_type,
   42.12 @@ -196,6 +198,8 @@ load_initrd (char *initrd)
   42.13      if (module_image)
   42.14          free(module_image);
   42.15      module_image = NULL;
   42.16 +    multiboot_next_module = NULL;
   42.17 +    multiboot_next_module_header = NULL;
   42.18      load_file (initrd, &module_image, &module_size);
   42.19      return ! errnum;
   42.20  }
   42.21 @@ -203,20 +207,76 @@ load_initrd (char *initrd)
   42.22  int
   42.23  load_module (char *module, char *arg)
   42.24  {
   42.25 -    if (module_image)
   42.26 +    void *new_module, *new_module_image;
   42.27 +    long new_module_size, rounded_new_module_size;
   42.28 +
   42.29 +    if (load_file (module, &new_module, &new_module_size))
   42.30 +        return 0;
   42.31 +    if (strlen(arg) >= PAGE_SIZE) {
   42.32 +        /* Too big module command line */
   42.33 +        errnum = ERR_WONT_FIT;
   42.34 +        return 0;
   42.35 +    }
   42.36 +    rounded_new_module_size = (new_module_size + PAGE_SIZE - 1) & PAGE_MASK;
   42.37 +
   42.38 +    if (module_image && !multiboot_next_module_header) {
   42.39 +        /* Initrd already loaded, drop it */
   42.40          free(module_image);
   42.41 -    module_image = NULL;
   42.42 -    load_file (module, &module_image, &module_size);
   42.43 -    if (module_arg)
   42.44 -        free(module_arg);
   42.45 -    module_arg = strdup(arg);
   42.46 -    return ! errnum;
   42.47 +        if (module_arg)
   42.48 +            free(module_arg);
   42.49 +        module_image = NULL;
   42.50 +    }
   42.51 +    if (!module_image)
   42.52 +        /* Reserve one page for the header */
   42.53 +        multiboot_next_module = (void*) PAGE_SIZE;
   42.54 +
   42.55 +    /* Allocate more room for the new module plus its arg */
   42.56 +    new_module_image = realloc(module_image,
   42.57 +            (multiboot_next_module - module_image) + rounded_new_module_size + PAGE_SIZE);
   42.58 +
   42.59 +    /* Update pointers */
   42.60 +    multiboot_next_module += new_module_image - module_image;
   42.61 +    multiboot_next_module_header = (void*) multiboot_next_module_header + (new_module_image - module_image);
   42.62 +    module_image = new_module_image;
   42.63 +
   42.64 +    if ((void*) (multiboot_next_module_header+1) - module_image > PAGE_SIZE) {
   42.65 +        /* Too many modules */
   42.66 +        ERR_WONT_FIT;
   42.67 +        return 0;
   42.68 +    }
   42.69 +
   42.70 +    /* Copy module */
   42.71 +    memcpy(multiboot_next_module, new_module, new_module_size);
   42.72 +    multiboot_next_module_header->mod_start = multiboot_next_module - module_image;
   42.73 +    multiboot_next_module_header->mod_end = multiboot_next_module_header->mod_start + new_module_size - 1;
   42.74 +    multiboot_next_module += rounded_new_module_size;
   42.75 +
   42.76 +    /* Copy cmdline */
   42.77 +    strcpy(multiboot_next_module, arg);
   42.78 +    multiboot_next_module_header->cmdline = multiboot_next_module - module_image;
   42.79 +    multiboot_next_module += PAGE_SIZE;
   42.80 +
   42.81 +    /* Pad */
   42.82 +    multiboot_next_module_header->pad = 0;
   42.83 +
   42.84 +    multiboot_next_module_header++;
   42.85 +
   42.86 +    return 1;
   42.87  }
   42.88  
   42.89  void
   42.90  pv_boot (void)
   42.91  {
   42.92 -    kexec(kernel_image, kernel_size, module_image, module_size, kernel_arg);
   42.93 +    unsigned long flags = 0;
   42.94 +    if (multiboot_next_module_header) {
   42.95 +        /* Termination entry */
   42.96 +        multiboot_next_module_header->mod_start = 0;
   42.97 +        /* Total size */
   42.98 +        module_size = multiboot_next_module - module_image;
   42.99 +        /* It's a multiboot module */
  42.100 +        flags |= SIF_MULTIBOOT_MOD;
  42.101 +    }
  42.102 +    kexec(kernel_image, kernel_size, module_image, module_size, kernel_arg, flags);
  42.103  }
  42.104  
  42.105  /*
    43.1 --- a/stubdom/pciutils.patch	Mon Nov 02 19:35:54 2009 -0800
    43.2 +++ b/stubdom/pciutils.patch	Fri Mar 19 18:36:57 2010 -0700
    43.3 @@ -23,14 +23,6 @@ diff -urN pciutils-2.2.9.orig/lib/access
    43.4     PCI_ACCESS_MAX
    43.5   };
    43.6   
    43.7 -@@ -63,6 +64,7 @@
    43.8 -   int fd_rw;				/* proc: fd opened read-write */
    43.9 -   struct pci_dev *cached_dev;		/* proc: device the fd is for */
   43.10 -   int fd_pos;				/* proc: current position */
   43.11 -+  void *minios;
   43.12 - };
   43.13 - 
   43.14 - /* Initialize PCI access */
   43.15  --- pciutils-2.2.9.orig/lib/internal.h	2006-09-09 11:52:47.000000000 +0100
   43.16  +++ pciutils-2.2.9/lib/internal.h	2008-07-01 10:46:24.968202000 +0100
   43.17  @@ -37,4 +37,4 @@
   43.18 @@ -72,7 +64,7 @@ diff -urN pciutils-2.2.9.orig/lib/access
   43.19   
   43.20  --- pciutils-2.2.9.orig/lib/minios.c	1970-01-01 01:00:00.000000000 +0100
   43.21  +++ pciutils-2.2.9/lib/minios.c	2008-07-01 12:31:40.554260000 +0100
   43.22 -@@ -0,0 +1,113 @@
   43.23 +@@ -0,0 +1,106 @@
   43.24  +/*
   43.25  + *	The PCI Library -- MiniOS PCI frontend access
   43.26  + *
   43.27 @@ -95,24 +87,17 @@ diff -urN pciutils-2.2.9.orig/lib/access
   43.28  +static void
   43.29  +minios_init(struct pci_access *a)
   43.30  +{
   43.31 -+  a->minios = init_pcifront(NULL);
   43.32 -+  if (!a->minios)
   43.33 -+    a->warning("minios_init open failed");
   43.34  +}
   43.35  +
   43.36  +static void
   43.37  +minios_cleanup(struct pci_access *a)
   43.38  +{
   43.39 -+  if (a->minios)
   43.40 -+    shutdown_pcifront(a->minios);
   43.41 ++  shutdown_pcifront(NULL);
   43.42  +}
   43.43  +
   43.44  +static void
   43.45  +minios_scan(struct pci_access *a)
   43.46  +{
   43.47 -+  if (!a->minios)
   43.48 -+    return;
   43.49 -+
   43.50  +  void func(unsigned int domain, unsigned int bus, unsigned int slot, unsigned int fun)
   43.51  +  {
   43.52  +    struct pci_dev *d = pci_alloc_dev(a);
   43.53 @@ -125,7 +110,7 @@ diff -urN pciutils-2.2.9.orig/lib/access
   43.54  +    pci_link_dev(a, d);
   43.55  +  }
   43.56  +
   43.57 -+  pcifront_scan(a->minios, func);
   43.58 ++  pcifront_scan(NULL, func);
   43.59  +}
   43.60  +
   43.61  +static int
   43.62 @@ -134,17 +119,17 @@ diff -urN pciutils-2.2.9.orig/lib/access
   43.63  +  unsigned int val;
   43.64  +  switch (len) {
   43.65  +    case 1:
   43.66 -+      if (pcifront_conf_read(d->access->minios, d->domain, d->bus, d->dev, d->func, pos, len, &val))
   43.67 ++      if (pcifront_conf_read(NULL, d->domain, d->bus, d->dev, d->func, pos, len, &val))
   43.68  +        return 0;
   43.69  +      * buf = val;
   43.70  +      return 1;
   43.71  +    case 2:
   43.72 -+      if (pcifront_conf_read(d->access->minios, d->domain, d->bus, d->dev, d->func, pos, len, &val))
   43.73 ++      if (pcifront_conf_read(NULL, d->domain, d->bus, d->dev, d->func, pos, len, &val))
   43.74  +        return 0;
   43.75  +      *(u16 *) buf = cpu_to_le16((u16) val);
   43.76  +      return 1;
   43.77  +    case 4:
   43.78 -+      if (pcifront_conf_read(d->access->minios, d->domain, d->bus, d->dev, d->func, pos, len, &val))
   43.79 ++      if (pcifront_conf_read(NULL, d->domain, d->bus, d->dev, d->func, pos, len, &val))
   43.80  +        return 0;
   43.81  +      *(u32 *) buf = cpu_to_le32((u32) val);
   43.82  +      return 1;
   43.83 @@ -170,7 +155,7 @@ diff -urN pciutils-2.2.9.orig/lib/access
   43.84  +    default:
   43.85  +      return pci_generic_block_write(d, pos, buf, len);
   43.86  +  }
   43.87 -+  return !pcifront_conf_write(d->access->minios, d->domain, d->bus, d->dev, d->func, pos, len, val);
   43.88 ++  return !pcifront_conf_write(NULL, d->domain, d->bus, d->dev, d->func, pos, len, val);
   43.89  +}
   43.90  +
   43.91  +struct pci_methods pm_minios = {
    44.1 --- a/stubdom/stubdom-dm	Mon Nov 02 19:35:54 2009 -0800
    44.2 +++ b/stubdom/stubdom-dm	Fri Mar 19 18:36:57 2010 -0700
    44.3 @@ -80,8 +80,8 @@ done
    44.4  # Termination handler
    44.5  
    44.6  term() {
    44.7 -    kill %1
    44.8      [ -n "$vncpid" ] && kill -9 $vncpid
    44.9 +    rm -f /tmp/domname-dm
   44.10      rm ${stubdom_configdir}/$domname-dm
   44.11      exit 0
   44.12  }
   44.13 @@ -154,11 +154,10 @@ do
   44.14      j=$(( $j + 1 ))
   44.15  done
   44.16  echo " ] " >> ${stubdom_configdir}/$domname-dm
   44.17 -creation="xm create -c ${stubdom_configdir}/$domname-dm target=$domid memory=32 extra=\"$extra\""
   44.18  
   44.19 -(while true ; do sleep 60 ; done) | /bin/sh -c "$creation" &
   44.20 -#xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to shut down ; read" &
   44.21 -consolepid=$!
   44.22 +mkfifo /tmp/$domname-dm
   44.23 +xm create -c ${stubdom_configdir}/$domname-dm target=$domid memory=32 extra="$extra" < /tmp/$domname-dm &
   44.24 +exec 4>/tmp/$domname-dm
   44.25  
   44.26  
   44.27  ###########
   44.28 @@ -178,6 +177,6 @@ then
   44.29  fi
   44.30  
   44.31  # wait for SIGHUP or stubdom termination
   44.32 -wait $consolepid
   44.33 +wait
   44.34  
   44.35  term
    45.1 --- a/tools/Makefile	Mon Nov 02 19:35:54 2009 -0800
    45.2 +++ b/tools/Makefile	Fri Mar 19 18:36:57 2010 -0700
    45.3 @@ -1,4 +1,4 @@
    45.4 -XEN_ROOT = ../
    45.5 +XEN_ROOT = ..
    45.6  include $(XEN_ROOT)/tools/Rules.mk
    45.7  
    45.8  SUBDIRS-y :=
    45.9 @@ -21,6 +21,7 @@ SUBDIRS-$(VTPM_TOOLS) += vtpm_manager
   45.10  SUBDIRS-$(VTPM_TOOLS) += vtpm
   45.11  SUBDIRS-y += xenstat
   45.12  SUBDIRS-$(CONFIG_Linux) += libaio
   45.13 +SUBDIRS-$(CONFIG_Linux) += memshr 
   45.14  SUBDIRS-$(CONFIG_Linux) += blktap
   45.15  SUBDIRS-$(CONFIG_Linux) += blktap2
   45.16  SUBDIRS-$(CONFIG_NetBSD) += libaio
   45.17 @@ -32,6 +33,9 @@ SUBDIRS-$(CONFIG_Linux) += fs-back
   45.18  SUBDIRS-$(CONFIG_NetBSD) += fs-back
   45.19  SUBDIRS-$(CONFIG_IOEMU) += ioemu-dir
   45.20  SUBDIRS-y += xenpmd
   45.21 +SUBDIRS-y += libxl
   45.22 +SUBDIRS-y += remus
   45.23 +SUBDIRS-$(CONFIG_X86) += xenpaging
   45.24  
   45.25  # These don't cross-compile
   45.26  ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
    46.1 --- a/tools/Rules.mk	Mon Nov 02 19:35:54 2009 -0800
    46.2 +++ b/tools/Rules.mk	Fri Mar 19 18:36:57 2010 -0700
    46.3 @@ -49,8 +49,8 @@ check-$(CONFIG_X86) = $(call cc-ver-chec
    46.4                          "Xen requires at least gcc-3.4")
    46.5  $(eval $(check-y))
    46.6  
    46.7 -DEFAULT_PYTHON_PATH := $(shell $(XEN_ROOT)/tools/python/get-path)
    46.8 -PYTHON_PATH ?= $(DEFAULT_PYTHON_PATH)
    46.9 +_PYTHON_PATH := $(shell which $(PYTHON))
   46.10 +PYTHON_PATH ?= $(_PYTHON_PATH)
   46.11  INSTALL_PYTHON_PROG = \
   46.12  	$(XEN_ROOT)/tools/python/install-wrap "$(PYTHON_PATH)" $(INSTALL_PROG)
   46.13  
    47.1 --- a/tools/blktap/drivers/Makefile	Mon Nov 02 19:35:54 2009 -0800
    47.2 +++ b/tools/blktap/drivers/Makefile	Fri Mar 19 18:36:57 2010 -0700
    47.3 @@ -4,6 +4,7 @@ include $(XEN_ROOT)/tools/Rules.mk
    47.4  IBIN         = blktapctrl tapdisk
    47.5  QCOW_UTIL    = img2qcow qcow2raw qcow-create
    47.6  LIBAIO_DIR   = ../../libaio/src
    47.7 +MEMSHR_DIR   = ../../memshr
    47.8  
    47.9  CFLAGS   += -Werror
   47.10  CFLAGS   += -Wno-unused
   47.11 @@ -11,6 +12,7 @@ CFLAGS   += -I../lib
   47.12  CFLAGS   += $(CFLAGS_libxenctrl)
   47.13  CFLAGS   += $(CFLAGS_libxenstore)
   47.14  CFLAGS   += -I $(LIBAIO_DIR)
   47.15 +CFLAGS   += -I $(MEMSHR_DIR)
   47.16  CFLAGS   += -D_GNU_SOURCE
   47.17  
   47.18  ifeq ($(shell . ./check_gcrypt $(CC)),yes)
   47.19 @@ -21,7 +23,13 @@ CRYPT_LIB := -lcrypto
   47.20  $(warning === libgcrypt not installed: falling back to libcrypto ===)
   47.21  endif
   47.22  
   47.23 -LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib -lblktap
   47.24 +MEMSHRLIBS :=
   47.25 +ifeq ($(CONFIG_Linux), y)
   47.26 +CFLAGS += -DMEMSHR
   47.27 +MEMSHRLIBS += $(MEMSHR_DIR)/libmemshr.a
   47.28 +endif
   47.29 +
   47.30 +LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) $(MEMSHRLIBS) -L../lib -lblktap -lrt -lm -lpthread
   47.31  LDFLAGS_img := $(LIBAIO_DIR)/libaio.a $(CRYPT_LIB) -lpthread -lz
   47.32  
   47.33  BLK-OBJS-y  := block-aio.o
    48.1 --- a/tools/blktap/drivers/blktapctrl.c	Mon Nov 02 19:35:54 2009 -0800
    48.2 +++ b/tools/blktap/drivers/blktapctrl.c	Fri Mar 19 18:36:57 2010 -0700
    48.3 @@ -50,6 +50,8 @@
    48.4  #include <xs.h>
    48.5  #include <sys/time.h>
    48.6  #include <syslog.h>
    48.7 +#include <memshr.h>
    48.8 +#include <sys/stat.h>
    48.9                                                                       
   48.10  #include "blktaplib.h"
   48.11  #include "blktapctrl.h"
   48.12 @@ -858,6 +860,10 @@ int main(int argc, char *argv[])
   48.13  		goto open_failed;
   48.14  	}
   48.15  
   48.16 +#ifdef MEMSHR
   48.17 +	memshr_daemon_initialize();
   48.18 +#endif
   48.19 +
   48.20   retry:
   48.21  	/* Set up store connection and watch. */
   48.22  	h = xs_daemon_open();
    49.1 --- a/tools/blktap/drivers/block-qcow2.c	Mon Nov 02 19:35:54 2009 -0800
    49.2 +++ b/tools/blktap/drivers/block-qcow2.c	Fri Mar 19 18:36:57 2010 -0700
    49.3 @@ -30,6 +30,7 @@
    49.4  #include <stdio.h>
    49.5  #include <stdlib.h>
    49.6  #include <string.h>
    49.7 +#include <sys/stat.h>
    49.8  
    49.9  #include "tapdisk.h"
   49.10  #include "tapaio.h"
    50.1 --- a/tools/blktap/lib/blktaplib.h	Mon Nov 02 19:35:54 2009 -0800
    50.2 +++ b/tools/blktap/lib/blktaplib.h	Fri Mar 19 18:36:57 2010 -0700
    50.3 @@ -42,7 +42,7 @@
    50.4  #include <sys/types.h>
    50.5  #include <unistd.h>
    50.6  
    50.7 -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, XC_PAGE_SIZE)
    50.8 +#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, XC_PAGE_SIZE)
    50.9  
   50.10  /* size of the extra VMA area to map in attached pages. */
   50.11  #define BLKTAP_VMA_PAGES BLK_RING_SIZE
    51.1 --- a/tools/blktap/lib/xenbus.c	Mon Nov 02 19:35:54 2009 -0800
    51.2 +++ b/tools/blktap/lib/xenbus.c	Fri Mar 19 18:36:57 2010 -0700
    51.3 @@ -107,6 +107,24 @@ static int get_be_id(const char *str)
    51.4  	return atoi(num);
    51.5  }
    51.6  
    51.7 +static int get_be_domid(const char *str)
    51.8 +{
    51.9 +	int len1, len2;
   51.10 +	const char *ptr;
   51.11 +	char *tptr, num[10];
   51.12 +
   51.13 +	len2 = strsep_len(str, '/', 3);
   51.14 +	if ( len2 < 0 ) return -1;
   51.15 +	len1 = strsep_len(str, '/', 2);
   51.16 +
   51.17 +	ptr = str + len1 + 1;
   51.18 +	strncpy(num, ptr, len2 - len1 - 1);
   51.19 +	tptr = num + (len2 - len1 - 1);
   51.20 +	*tptr = '\0';
   51.21 +
   51.22 +	return atoi(num);
   51.23 +}
   51.24 +
   51.25  static struct backend_info *be_lookup_be(const char *bepath)
   51.26  {
   51.27  	struct backend_info *be;
   51.28 @@ -150,6 +168,24 @@ static int backend_remove(struct xs_hand
   51.29  	return 0;
   51.30  }
   51.31  
   51.32 +static const char *get_image_path(const char *path)
   51.33 +{
   51.34 +	const char *tmp;
   51.35 +
   51.36 +	/* Strip off the image type */
   51.37 +	if (!strncmp(path, "tapdisk:", strlen("tapdisk:"))) {
   51.38 +		path += strlen("tapdisk:");
   51.39 +	} else if (!strncmp(path, "ioemu:", strlen("ioemu:"))) {
   51.40 +		path += strlen("ioemu:");
   51.41 +	}
   51.42 +
   51.43 +	tmp = strchr(path, ':');
   51.44 +	if (tmp != NULL)
   51.45 +		path = tmp + 1;
   51.46 +
   51.47 +	return path;
   51.48 +}
   51.49 +
   51.50  static int check_sharing(struct xs_handle *h, struct backend_info *be)
   51.51  {
   51.52  	char *dom_uuid;
   51.53 @@ -161,8 +197,12 @@ static int check_sharing(struct xs_handl
   51.54  	char **devices;
   51.55  	int i, j;
   51.56  	unsigned int num_dom, num_dev;
   51.57 -	blkif_info_t *info;
   51.58 +	blkif_info_t *info = be->blkif->info;
   51.59  	int ret = 0;
   51.60 +	const char *image_path[2];
   51.61 +	int be_domid = get_be_domid(be->backpath);
   51.62 +
   51.63 +	image_path[0] = get_image_path(info->params);
   51.64  
   51.65  	/* If the mode contains '!' or doesn't contain 'w' don't check anything */
   51.66  	xs_gather(h, be->backpath, "mode", NULL, &mode, NULL);
   51.67 @@ -178,7 +218,10 @@ static int check_sharing(struct xs_handl
   51.68  	free(path);
   51.69  
   51.70  	/* Iterate through the devices of all VMs */
   51.71 -	domains = xs_directory(h, XBT_NULL, "backend/tap", &num_dom);
   51.72 +	if (asprintf(&path, "/local/domain/%d/backend/tap", be_domid) == -1)
   51.73 +		goto fail;
   51.74 +	domains = xs_directory(h, XBT_NULL, path, &num_dom);
   51.75 +	free(path);
   51.76  	if (domains == NULL)
   51.77  		num_dom = 0;
   51.78  
   51.79 @@ -189,8 +232,11 @@ static int check_sharing(struct xs_handl
   51.80  			ret = -1;
   51.81  			break;
   51.82  		}
   51.83 +		cur_dom_uuid = NULL;
   51.84  		xs_gather(h, path, "vm", NULL, &cur_dom_uuid, NULL);
   51.85  		free(path);
   51.86 +		if (!cur_dom_uuid)
   51.87 +			continue;
   51.88  
   51.89  		if (!strcmp(cur_dom_uuid, dom_uuid)) {
   51.90  			free(cur_dom_uuid);
   51.91 @@ -198,7 +244,7 @@ static int check_sharing(struct xs_handl
   51.92  		}
   51.93  
   51.94  		/* Check the devices */
   51.95 -		if (asprintf(&path, "backend/tap/%s", domains[i]) == -1) {
   51.96 +		if (asprintf(&path, "/local/domain/%d/backend/tap/%s", be_domid, domains[i]) == -1) {
   51.97  			ret = -1;
   51.98  			free(cur_dom_uuid);
   51.99  			break;
  51.100 @@ -209,15 +255,18 @@ static int check_sharing(struct xs_handl
  51.101  		free(path);
  51.102  
  51.103  		for (j = 0; !ret && (j < num_dev); j++) {
  51.104 -			if (asprintf(&path, "backend/tap/%s/%s", domains[i], devices[j]) == -1) {
  51.105 +			if (asprintf(&path, "/local/domain/%d/backend/tap/%s/%s", be_domid, domains[i], devices[j]) == -1) {
  51.106  				ret = -1;
  51.107  				break;
  51.108  			}
  51.109 +			params = NULL;
  51.110  			xs_gather(h, path, "params", NULL, &params, NULL);
  51.111  			free(path);
  51.112 +			if (!params)
  51.113 +				continue;
  51.114  
  51.115 -			info =  be->blkif->info;
  51.116 -			if (strcmp(params, info->params)) {
  51.117 +			image_path[1] = get_image_path(params);
  51.118 +			if (!strcmp(image_path[0], image_path[1])) {
  51.119  				ret = -1;
  51.120  			}
  51.121  
  51.122 @@ -241,24 +290,12 @@ out:
  51.123  static int check_image(struct xs_handle *h, struct backend_info *be,
  51.124  	const char** errmsg)
  51.125  {
  51.126 -	const char *tmp;
  51.127  	const char *path;
  51.128  	int mode;
  51.129  	blkif_t *blkif = be->blkif;
  51.130  	blkif_info_t *info = blkif->info;
  51.131  
  51.132 -	/* Strip off the image type */
  51.133 -	path = info->params;
  51.134 -
  51.135 -	if (!strncmp(path, "tapdisk:", strlen("tapdisk:"))) {
  51.136 -		path += strlen("tapdisk:");
  51.137 -	} else if (!strncmp(path, "ioemu:", strlen("ioemu:"))) {
  51.138 -		path += strlen("ioemu:");
  51.139 -	}
  51.140 -
  51.141 -	tmp = strchr(path, ':');
  51.142 -	if (tmp != NULL)
  51.143 -		path = tmp + 1;
  51.144 +	path = get_image_path(info->params);
  51.145  
  51.146  	/* Check if the image exists and access is permitted */
  51.147  	mode = R_OK;
    52.1 --- a/tools/blktap2/drivers/Makefile	Mon Nov 02 19:35:54 2009 -0800
    52.2 +++ b/tools/blktap2/drivers/Makefile	Fri Mar 19 18:36:57 2010 -0700
    52.3 @@ -1,4 +1,4 @@
    52.4 -XEN_ROOT=../../../
    52.5 +XEN_ROOT=../../..
    52.6  BLKTAP_ROOT= ..
    52.7  include $(XEN_ROOT)/tools/Rules.mk
    52.8  
    52.9 @@ -14,7 +14,9 @@ CFLAGS    += -Wno-unused
   52.10  CFLAGS    += -fno-strict-aliasing
   52.11  CFLAGS    += -I../lib -I../../libxc
   52.12  CFLAGS    += -I../include -I../../include
   52.13 +CFLAGS    += $(CFLAGS_libxenctrl)
   52.14  CFLAGS    += -I $(LIBAIO_DIR)
   52.15 +CFLAGS    += -I $(MEMSHR_DIR)
   52.16  CFLAGS    += -D_GNU_SOURCE
   52.17  CFLAGS    += -DUSE_NFS_LOCKS
   52.18  
   52.19 @@ -36,7 +38,7 @@ else
   52.20  CRYPT_LIB += -lcrypto
   52.21  endif
   52.22  
   52.23 -LDFLAGS_img := $(CRYPT_LIB) -lpthread -lz
   52.24 +LDFLAGS_img := $(LDFLAGS_libxenctrl) $(CRYPT_LIB) -lpthread -lz -lm
   52.25  
   52.26  LIBS += -L$(LIBVHDDIR) -lvhd
   52.27  
   52.28 @@ -44,8 +46,23 @@ ifeq ($(CONFIG_Linux),y)
   52.29  LIBS += -luuid
   52.30  endif
   52.31  
   52.32 +REMUS-OBJS  := block-remus.o
   52.33 +REMUS-OBJS  += hashtable.o
   52.34 +REMUS-OBJS  += hashtable_itr.o
   52.35 +REMUS-OBJS  += hashtable_utility.o
   52.36 +
   52.37 +$(REMUS-OBJS): CFLAGS += -I$(XEN_XENSTORE)
   52.38 +
   52.39  LIBAIO_DIR = $(XEN_ROOT)/tools/libaio/src
   52.40 -tapdisk2 tapdisk-stream tapdisk-diff $(QCOW_UTIL): AIOLIBS := $(LIBAIO_DIR)/libaio.a
   52.41 +MEMSHR_DIR = $(XEN_ROOT)/tools/memshr
   52.42 +
   52.43 +MEMSHRLIBS :=
   52.44 +ifeq ($(CONFIG_Linux), __fixme__)
   52.45 +CFLAGS += -DMEMSHR
   52.46 +MEMSHRLIBS += $(MEMSHR_DIR)/libmemshr.a
   52.47 +endif
   52.48 +
   52.49 +tapdisk2 tapdisk-stream tapdisk-diff $(QCOW_UTIL): AIOLIBS := $(LIBAIO_DIR)/libaio.a 
   52.50  tapdisk-client tapdisk-stream tapdisk-diff $(QCOW_UTIL): CFLAGS  += -I$(LIBAIO_DIR) -I$(XEN_LIBXC)
   52.51  
   52.52  ifeq ($(VHD_STATIC),y)
   52.53 @@ -81,18 +98,19 @@ BLK-OBJS-y  += block-log.o
   52.54  BLK-OBJS-y  += block-qcow.o
   52.55  BLK-OBJS-y  += aes.o
   52.56  BLK-OBJS-y  += $(PORTABLE-OBJS-y)
   52.57 +BLK-OBJS-y  += $(REMUS-OBJS)
   52.58  
   52.59  all: $(IBIN) lock-util qcow-util
   52.60  
   52.61  
   52.62  tapdisk2: $(TAP-OBJS-y) $(BLK-OBJS-y) $(MISC-OBJS-y) tapdisk2.c
   52.63 -	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(LDFLAGS_img)
   52.64 +	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(MEMSHRLIBS) $(LDFLAGS_img)
   52.65  
   52.66  tapdisk-client: tapdisk-client.o
   52.67  	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS)  $(LDFLAGS_img)
   52.68  
   52.69  tapdisk-stream tapdisk-diff: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y)
   52.70 -	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS)  $(LDFLAGS_img)
   52.71 +	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(MEMSHRLIBS) $(LDFLAGS_img)
   52.72  
   52.73  td-util: td.o tapdisk-utils.o tapdisk-log.o $(PORTABLE-OBJS-y)
   52.74  	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS)  $(LDFLAGS_img)
   52.75 @@ -104,7 +122,7 @@ lock-util: lock.c
   52.76  qcow-util: img2qcow qcow2raw qcow-create
   52.77  
   52.78  img2qcow qcow2raw qcow-create: %: %.o $(TAP-OBJS-y) $(BLK-OBJS-y)
   52.79 -	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(LDFLAGS_img)
   52.80 +	$(CC) $(CFLAGS) -o $@ $^ $(LDFLAGS) $(LIBS) $(AIOLIBS) $(MEMSHRLIBS) $(LDFLAGS_img)
   52.81  
   52.82  install: all
   52.83  	$(INSTALL_DIR) -p $(DESTDIR)$(INST_DIR)
    53.1 --- a/tools/blktap2/drivers/block-aio.c	Mon Nov 02 19:35:54 2009 -0800
    53.2 +++ b/tools/blktap2/drivers/block-aio.c	Fri Mar 19 18:36:57 2010 -0700
    53.3 @@ -28,7 +28,6 @@
    53.4  
    53.5  
    53.6  #include <errno.h>
    53.7 -#include <libaio.h>
    53.8  #include <fcntl.h>
    53.9  #include <stdio.h>
   53.10  #include <stdlib.h>
    54.1 --- a/tools/blktap2/drivers/block-qcow.c	Mon Nov 02 19:35:54 2009 -0800
    54.2 +++ b/tools/blktap2/drivers/block-qcow.c	Fri Mar 19 18:36:57 2010 -0700
    54.3 @@ -1035,6 +1035,17 @@ void tdqcow_queue_read(td_driver_t *driv
    54.4  		}
    54.5  		
    54.6  		if(!cluster_offset) {
    54.7 +            int i;
    54.8 +            /* Forward entire request if possible. */
    54.9 +            for(i=0; i<nb_sectors; i++)
   54.10 +                if(get_cluster_offset(s, (sector+i) << 9, 0, 0, 0, 0))
   54.11 +                    goto coalesce_failed;
   54.12 +            treq.buf  = buf;
   54.13 +            treq.sec  = sector;
   54.14 +            treq.secs = nb_sectors;
   54.15 +			td_forward_request(treq);
   54.16 +            return;
   54.17 +coalesce_failed:            
   54.18  			treq.buf  = buf;
   54.19  			treq.sec  = sector;
   54.20  			treq.secs = n;
    55.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    55.2 +++ b/tools/blktap2/drivers/block-remus.c	Fri Mar 19 18:36:57 2010 -0700
    55.3 @@ -0,0 +1,1674 @@
    55.4 +/* block-remus.c
    55.5 + *
    55.6 + * This disk sends all writes to a backup via a network interface before
    55.7 + * passing them to an underlying device.
    55.8 + * The backup is a bit more complicated:
    55.9 + *  1. It applies all incoming writes to a ramdisk.
   55.10 + *  2. When a checkpoint request arrives, it moves the ramdisk to
   55.11 + *     a committing state and uses a new ramdisk for subsequent writes.
   55.12 + *     It also acknowledges the request, to let the sender know it can
   55.13 + *     release output.
   55.14 + *  3. The ramdisk flushes its contents to the underlying driver.
   55.15 + *  4. At failover, the backup waits for the in-flight ramdisk (if any) to
   55.16 + *     drain before letting the domain be activated.
   55.17 + *
   55.18 + * The driver determines whether it is the client or server by attempting
   55.19 + * to bind to the replication address. If the address is not local,
   55.20 + * the driver acts as client.
   55.21 + *
   55.22 + * The following messages are defined for the replication stream:
   55.23 + * 1. write request
   55.24 + *    "wreq"      4
   55.25 + *    num_sectors 4
   55.26 + *    sector      8
   55.27 + *    buffer      (num_sectors * sector_size)
   55.28 + * 2. submit request (may be used as a barrier
   55.29 + *    "sreq"      4
   55.30 + * 3. commit request
   55.31 + *    "creq"      4
   55.32 + * After a commit request, the client must wait for a competion message:
   55.33 + * 4. completion
   55.34 + *    "done"      4
   55.35 + */
   55.36 +
   55.37 +/* due to architectural choices in tapdisk, block-buffer is forced to
   55.38 + * reimplement some code which is meant to be private */
   55.39 +#define TAPDISK
   55.40 +#include "tapdisk.h"
   55.41 +#include "tapdisk-server.h"
   55.42 +#include "tapdisk-driver.h"
   55.43 +#include "tapdisk-interface.h"
   55.44 +#include "hashtable.h"
   55.45 +#include "hashtable_itr.h"
   55.46 +#include "hashtable_utility.h"
   55.47 +
   55.48 +#include <errno.h>
   55.49 +#include <inttypes.h>
   55.50 +#include <fcntl.h>
   55.51 +#include <stdio.h>
   55.52 +#include <stdlib.h>
   55.53 +#include <string.h>
   55.54 +#include <sys/time.h>
   55.55 +#include <sys/types.h>
   55.56 +#include <sys/socket.h>
   55.57 +#include <netdb.h>
   55.58 +#include <netinet/in.h>
   55.59 +#include <arpa/inet.h>
   55.60 +#include <sys/param.h>
   55.61 +#include <sys/sysctl.h>
   55.62 +#include <unistd.h>
   55.63 +#include <sys/stat.h>
   55.64 +
   55.65 +/* timeout for reads and writes in ms */
   55.66 +#define HEARTBEAT_MS 1000
   55.67 +#define RAMDISK_HASHSIZE 128
   55.68 +
   55.69 +/* connect retry timeout (seconds) */
   55.70 +#define REMUS_CONNRETRY_TIMEOUT 10
   55.71 +
   55.72 +#define RPRINTF(_f, _a...) syslog (LOG_DEBUG, "remus: " _f, ## _a)
   55.73 +
   55.74 +enum tdremus_mode {
   55.75 +	mode_invalid = 0,
   55.76 +	mode_unprotected,
   55.77 +	mode_primary,
   55.78 +	mode_backup
   55.79 +};
   55.80 +
   55.81 +struct tdremus_req {
   55.82 +	uint64_t sector;
   55.83 +	int nb_sectors;
   55.84 +	char buf[4096];
   55.85 +};
   55.86 +
   55.87 +struct req_ring {
   55.88 +	/* waste one slot to distinguish between empty and full */
   55.89 +	struct tdremus_req requests[MAX_REQUESTS * 2 + 1];
   55.90 +	unsigned int head;
   55.91 +	unsigned int tail;
   55.92 +};
   55.93 +
   55.94 +/* TODO: This isn't very pretty, but to properly generate our own treqs (needed
   55.95 + * by the backup) we need to know our td_vbt_t and td_image_t (blktap2
   55.96 + * internals). As a proper fix, we should consider extending the tapdisk
   55.97 + * interface with a td_create_request() function, or something similar.
   55.98 + *
   55.99 + * For now, we just grab the vbd in the td_open() command, and the td_image_t
  55.100 + * from the first read request.
  55.101 + */
  55.102 +td_vbd_t *device_vbd = NULL;
  55.103 +td_image_t *remus_image = NULL;
  55.104 +
  55.105 +struct ramdisk {
  55.106 +	size_t sector_size;
  55.107 +	struct hashtable* h;
  55.108 +	/* when a ramdisk is flushed, h is given a new empty hash for writes
  55.109 +	 * while the old ramdisk (prev) is drained asynchronously. To avoid
  55.110 +	 * a race where a read request points to a sector in prev which has
  55.111 +	 * not yet been flushed, check prev on a miss in h */
  55.112 +	struct hashtable* prev;
  55.113 +	/* count of outstanding requests to the base driver */
  55.114 +	size_t inflight;
  55.115 +};
  55.116 +
  55.117 +/* the ramdisk intercepts the original callback for reads and writes.
  55.118 + * This holds the original data. */
  55.119 +/* Might be worth making this a static array in struct ramdisk to avoid
  55.120 + * a malloc per request */
  55.121 +
  55.122 +struct tdremus_state;
  55.123 +
  55.124 +struct ramdisk_cbdata {
  55.125 +	td_callback_t cb;
  55.126 +	void* private;
  55.127 +	char* buf;
  55.128 +	struct tdremus_state* state;
  55.129 +};
  55.130 +
  55.131 +struct ramdisk_write_cbdata {
  55.132 +	struct tdremus_state* state;
  55.133 +	char* buf;
  55.134 +};
  55.135 +
  55.136 +typedef void (*queue_rw_t) (td_driver_t *driver, td_request_t treq);
  55.137 +
  55.138 +/* poll_fd type for blktap2 fd system. taken from block_log.c */
  55.139 +typedef struct poll_fd {
  55.140 +	int        fd;
  55.141 +	event_id_t id;
  55.142 +} poll_fd_t;
  55.143 +
  55.144 +struct tdremus_state {
  55.145 +//  struct tap_disk* driver;
  55.146 +	void* driver_data;
  55.147 +
  55.148 +  /* XXX: this is needed so that the server can perform operations on
  55.149 +   * the driver from the stream_fd event handler. fix this. */
  55.150 +	td_driver_t *tdremus_driver;
  55.151 +
  55.152 +	/* TODO: we may wish to replace these two FIFOs with a unix socket */
  55.153 +	char*     ctl_path; /* receive flush instruction here */
  55.154 +	poll_fd_t ctl_fd;     /* io_fd slot for control FIFO */
  55.155 +	char*     msg_path; /* output completion message here */
  55.156 +	poll_fd_t msg_fd;
  55.157 +
  55.158 +  /* replication host */
  55.159 +	struct sockaddr_in sa;
  55.160 +	poll_fd_t server_fd;    /* server listen port */
  55.161 +	poll_fd_t stream_fd;     /* replication channel */
  55.162 +
  55.163 +	/* queue write requests, batch-replicate at submit */
  55.164 +	struct req_ring write_ring;
  55.165 +
  55.166 +	/* ramdisk data*/
  55.167 +	struct ramdisk ramdisk;
  55.168 +
  55.169 +	/* mode methods */
  55.170 +	enum tdremus_mode mode;
  55.171 +	int (*queue_flush)(td_driver_t *driver);
  55.172 +};
  55.173 +
  55.174 +typedef struct tdremus_wire {
  55.175 +	uint32_t op;
  55.176 +	uint64_t id;
  55.177 +	uint64_t sec;
  55.178 +	uint32_t secs;
  55.179 +} tdremus_wire_t;
  55.180 +
  55.181 +#define TDREMUS_READ "rreq"
  55.182 +#define TDREMUS_WRITE "wreq"
  55.183 +#define TDREMUS_SUBMIT "sreq"
  55.184 +#define TDREMUS_COMMIT "creq"
  55.185 +#define TDREMUS_DONE "done"
  55.186 +#define TDREMUS_FAIL "fail"
  55.187 +
  55.188 +/* primary read/write functions */
  55.189 +static void primary_queue_read(td_driver_t *driver, td_request_t treq);
  55.190 +static void primary_queue_write(td_driver_t *driver, td_request_t treq);
  55.191 +
  55.192 +/* backup read/write functions */
  55.193 +static void backup_queue_read(td_driver_t *driver, td_request_t treq);
  55.194 +static void backup_queue_write(td_driver_t *driver, td_request_t treq);
  55.195 +
  55.196 +/* unpritected read/write functions */
  55.197 +static void unprotected_queue_read(td_driver_t *driver, td_request_t treq);
  55.198 +static void unprotected_queue_write(td_driver_t *driver, td_request_t treq);
  55.199 +
  55.200 +static int tdremus_close(td_driver_t *driver);
  55.201 +
  55.202 +static int switch_mode(td_driver_t *driver, enum tdremus_mode mode);
  55.203 +static int ctl_respond(struct tdremus_state *s, const char *response);
  55.204 +
  55.205 +/* ring functions */
  55.206 +static inline unsigned int ring_next(struct req_ring* ring, unsigned int pos)
  55.207 +{
  55.208 +	if (++pos >= MAX_REQUESTS * 2 + 1)
  55.209 +		return 0;
  55.210 +
  55.211 +	return pos;
  55.212 +}
  55.213 +
  55.214 +static inline int ring_isempty(struct req_ring* ring)
  55.215 +{
  55.216 +	return ring->head == ring->tail;
  55.217 +}
  55.218 +
  55.219 +static inline int ring_isfull(struct req_ring* ring)
  55.220 +{
  55.221 +	return ring_next(ring, ring->tail) == ring->head;
  55.222 +}
  55.223 +
  55.224 +/* functions to create and sumbit treq's */
  55.225 +
  55.226 +static void
  55.227 +replicated_write_callback(td_request_t treq, int err)
  55.228 +{
  55.229 +	struct tdremus_state *s = (struct tdremus_state *) treq.cb_data;
  55.230 +	td_vbd_request_t *vreq;
  55.231 +
  55.232 +	vreq = (td_vbd_request_t *) treq.private;
  55.233 +
  55.234 +	/* the write failed for now, lets panic. this is very bad */
  55.235 +	if (err) {
  55.236 +		RPRINTF("ramdisk write failed, disk image is not consistent\n");
  55.237 +		exit(-1);
  55.238 +	}
  55.239 +
  55.240 +	/* The write succeeded. let's pull the vreq off whatever request list
  55.241 +	 * it is on and free() it */
  55.242 +	list_del(&vreq->next);
  55.243 +	free(vreq);
  55.244 +
  55.245 +	s->ramdisk.inflight--;
  55.246 +	if (!s->ramdisk.inflight && !s->ramdisk.prev) {
  55.247 +		/* TODO: the ramdisk has been flushed */
  55.248 +	}
  55.249 +}
  55.250 +
  55.251 +static inline int
  55.252 +create_write_request(struct tdremus_state *state, td_sector_t sec, int secs, char *buf)
  55.253 +{
  55.254 +	td_request_t treq;
  55.255 +	td_vbd_request_t *vreq;
  55.256 +
  55.257 +	treq.op      = TD_OP_WRITE;
  55.258 +	treq.buf     = buf;
  55.259 +	treq.sec     = sec;
  55.260 +	treq.secs    = secs;
  55.261 +	treq.image   = remus_image;
  55.262 +	treq.cb      = replicated_write_callback;
  55.263 +	treq.cb_data = state;
  55.264 +	treq.id      = 0;
  55.265 +	treq.sidx    = 0;
  55.266 +
  55.267 +	vreq         = calloc(1, sizeof(td_vbd_request_t));
  55.268 +	treq.private = vreq;
  55.269 +
  55.270 +	if(!vreq)
  55.271 +		return -1;
  55.272 +
  55.273 +	vreq->submitting = 1;
  55.274 +	INIT_LIST_HEAD(&vreq->next);
  55.275 +	tapdisk_vbd_move_request(treq.private, &device_vbd->pending_requests);
  55.276 +
  55.277 +	/* TODO:
  55.278 +	 * we should probably leave it up to the caller to forward the request */
  55.279 +	td_forward_request(treq);
  55.280 +
  55.281 +	vreq->submitting--;
  55.282 +
  55.283 +	return 0;
  55.284 +}
  55.285 +
  55.286 +
  55.287 +/* ramdisk methods */
  55.288 +static int ramdisk_flush(td_driver_t *driver, struct tdremus_state *s);
  55.289 +
  55.290 +/* http://www.concentric.net/~Ttwang/tech/inthash.htm */
  55.291 +static unsigned int uint64_hash(void* k)
  55.292 +{
  55.293 +	uint64_t key = *(uint64_t*)k;
  55.294 +
  55.295 +	key = (~key) + (key << 18);
  55.296 +	key = key ^ (key >> 31);
  55.297 +	key = key * 21;
  55.298 +	key = key ^ (key >> 11);
  55.299 +	key = key + (key << 6);
  55.300 +	key = key ^ (key >> 22);
  55.301 +
  55.302 +	return (unsigned int)key;
  55.303 +}
  55.304 +
  55.305 +static int rd_hash_equal(void* k1, void* k2)
  55.306 +{
  55.307 +	uint64_t key1, key2;
  55.308 +
  55.309 +	key1 = *(uint64_t*)k1;
  55.310 +	key2 = *(uint64_t*)k2;
  55.311 +
  55.312 +	return key1 == key2;
  55.313 +}
  55.314 +
  55.315 +static int ramdisk_read(struct ramdisk* ramdisk, uint64_t sector,
  55.316 +			int nb_sectors, char* buf)
  55.317 +{
  55.318 +	int i;
  55.319 +	char* v;
  55.320 +	uint64_t key;
  55.321 +
  55.322 +	for (i = 0; i < nb_sectors; i++) {
  55.323 +		key = sector + i;
  55.324 +		if (!(v = hashtable_search(ramdisk->h, &key))) {
  55.325 +			/* check whether it is queued in a previous flush request */
  55.326 +			if (!(ramdisk->prev && (v = hashtable_search(ramdisk->prev, &key))))
  55.327 +				return -1;
  55.328 +		}
  55.329 +		memcpy(buf + i * ramdisk->sector_size, v, ramdisk->sector_size);
  55.330 +	}
  55.331 +
  55.332 +	return 0;
  55.333 +}
  55.334 +
  55.335 +static int ramdisk_write_hash(struct hashtable* h, uint64_t sector, char* buf,
  55.336 +			      size_t len)
  55.337 +{
  55.338 +	char* v;
  55.339 +	uint64_t* key;
  55.340 +
  55.341 +	if ((v = hashtable_search(h, &sector))) {
  55.342 +		memcpy(v, buf, len);
  55.343 +		return 0;
  55.344 +	}
  55.345 +
  55.346 +	if (!(v = malloc(len))) {
  55.347 +		DPRINTF("ramdisk_write_hash: malloc failed\n");
  55.348 +		return -1;
  55.349 +	}
  55.350 +	memcpy(v, buf, len);
  55.351 +	if (!(key = malloc(sizeof(*key)))) {
  55.352 +		DPRINTF("ramdisk_write_hash: error allocating key\n");
  55.353 +		free(v);
  55.354 +		return -1;
  55.355 +	}
  55.356 +	*key = sector;
  55.357 +	if (!hashtable_insert(h, key, v)) {
  55.358 +		DPRINTF("ramdisk_write_hash failed on sector %" PRIu64 "\n", sector);
  55.359 +		free(key);
  55.360 +		free(v);
  55.361 +		return -1;
  55.362 +	}
  55.363 +
  55.364 +	return 0;
  55.365 +}
  55.366 +
  55.367 +static inline int ramdisk_write(struct ramdisk* ramdisk, uint64_t sector,
  55.368 +				int nb_sectors, char* buf)
  55.369 +{
  55.370 +	int i, rc;
  55.371 +
  55.372 +	for (i = 0; i < nb_sectors; i++) {
  55.373 +		rc = ramdisk_write_hash(ramdisk->h, sector + i,
  55.374 +					buf + i * ramdisk->sector_size,
  55.375 +					ramdisk->sector_size);
  55.376 +		if (rc)
  55.377 +			return rc;
  55.378 +	}
  55.379 +
  55.380 +	return 0;
  55.381 +}
  55.382 +
  55.383 +static int ramdisk_write_cb(td_driver_t *driver, int res, uint64_t sector,
  55.384 +			    int nb_sectors, int id, void* private)
  55.385 +{
  55.386 +	struct ramdisk_write_cbdata *cbdata = (struct ramdisk_write_cbdata*)private;
  55.387 +	struct tdremus_state *s = cbdata->state;
  55.388 +	int rc;
  55.389 +
  55.390 +	/*
  55.391 +	  RPRINTF("ramdisk write callback: rc %d, %d sectors @ %" PRIu64 "\n", res, nb_sectors,
  55.392 +	  sector);
  55.393 +	*/
  55.394 +
  55.395 +	free(cbdata->buf);
  55.396 +	free(cbdata);
  55.397 +
  55.398 +	s->ramdisk.inflight--;
  55.399 +	if (!s->ramdisk.inflight && !s->ramdisk.prev) {
  55.400 +		/* when this reaches 0 and prev is empty, the disk is flushed. */
  55.401 +		/*
  55.402 +		  RPRINTF("ramdisk flush complete\n");
  55.403 +		*/
  55.404 +	}
  55.405 +
  55.406 +	if (s->ramdisk.prev) {
  55.407 +		/* resubmit as much as possible in the remaining disk */
  55.408 +		/*
  55.409 +		  RPRINTF("calling ramdisk_flush from write callback\n");
  55.410 +		*/
  55.411 +		return ramdisk_flush(driver, s);
  55.412 +	}
  55.413 +
  55.414 +	return 0;
  55.415 +}
  55.416 +
  55.417 +static int uint64_compare(const void* k1, const void* k2)
  55.418 +{
  55.419 +	uint64_t u1 = *(uint64_t*)k1;
  55.420 +	uint64_t u2 = *(uint64_t*)k2;
  55.421 +
  55.422 +	/* u1 - u2 is unsigned */
  55.423 +	return u1 < u2 ? -1 : u1 > u2 ? 1 : 0;
  55.424 +}
  55.425 +
  55.426 +/* set psectors to an array of the sector numbers in the hash, returning
  55.427 + * the number of entries (or -1 on error) */
  55.428 +static int ramdisk_get_sectors(struct hashtable* h, uint64_t** psectors)
  55.429 +{
  55.430 +	struct hashtable_itr* itr;
  55.431 +	uint64_t* sectors;
  55.432 +	int count;
  55.433 +
  55.434 +	if (!(count = hashtable_count(h)))
  55.435 +		return 0;
  55.436 +
  55.437 +	if (!(*psectors = malloc(count * sizeof(uint64_t)))) {
  55.438 +		DPRINTF("ramdisk_get_sectors: error allocating sector map\n");
  55.439 +		return -1;
  55.440 +	}
  55.441 +	sectors = *psectors;
  55.442 +
  55.443 +	itr = hashtable_iterator(h);
  55.444 +	count = 0;
  55.445 +	do {
  55.446 +		sectors[count++] = *(uint64_t*)hashtable_iterator_key(itr);
  55.447 +	} while (hashtable_iterator_advance(itr));
  55.448 +	free(itr);
  55.449 +
  55.450 +	return count;
  55.451 +}
  55.452 +
  55.453 +static char* merge_requests(struct ramdisk* ramdisk, uint64_t start,
  55.454 +			    size_t count)
  55.455 +{
  55.456 +	char* buf;
  55.457 +	char* sector;
  55.458 +	int i;
  55.459 +
  55.460 +	if (!(buf = valloc(count * ramdisk->sector_size))) {
  55.461 +		DPRINTF("merge_request: allocation failed\n");
  55.462 +		return NULL;
  55.463 +	}
  55.464 +
  55.465 +	for (i = 0; i < count; i++) {
  55.466 +		if (!(sector = hashtable_search(ramdisk->prev, &start))) {
  55.467 +			DPRINTF("merge_request: lookup failed on %"PRIu64"\n", start);
  55.468 +			return NULL;
  55.469 +		}
  55.470 +
  55.471 +		memcpy(buf + i * ramdisk->sector_size, sector, ramdisk->sector_size);
  55.472 +		free(sector);
  55.473 +
  55.474 +		start++;
  55.475 +	}
  55.476 +
  55.477 +	return buf;
  55.478 +}
  55.479 +
  55.480 +/* The underlying driver may not handle having the whole ramdisk queued at
  55.481 + * once. We queue what we can and let the callbacks attempt to queue more. */
  55.482 +/* NOTE: may be called from callback, while dd->private still belongs to
  55.483 + * the underlying driver */
  55.484 +static int ramdisk_flush(td_driver_t *driver, struct tdremus_state* s)
  55.485 +{
  55.486 +	uint64_t* sectors;
  55.487 +	char* buf;
  55.488 +	uint64_t base, batchlen;
  55.489 +	int i, j, count = 0;
  55.490 +
  55.491 +	// RPRINTF("ramdisk flush\n");
  55.492 +
  55.493 +	if ((count = ramdisk_get_sectors(s->ramdisk.prev, &sectors)) <= 0)
  55.494 +		return count;
  55.495 +
  55.496 +	/*
  55.497 +	  RPRINTF("ramdisk: flushing %d sectors\n", count);
  55.498 +	*/
  55.499 +
  55.500 +	/* sort and merge sectors to improve disk performance */
  55.501 +	qsort(sectors, count, sizeof(*sectors), uint64_compare);
  55.502 +
  55.503 +	for (i = 0; i < count;) {
  55.504 +		base = sectors[i++];
  55.505 +		while (i < count && sectors[i] == sectors[i-1] + 1)
  55.506 +			i++;
  55.507 +		batchlen = sectors[i-1] - base + 1;
  55.508 +
  55.509 +		if (!(buf = merge_requests(&s->ramdisk, base, batchlen))) {
  55.510 +			RPRINTF("ramdisk_flush: merge_requests failed\n");
  55.511 +			free(sectors);
  55.512 +			return -1;
  55.513 +		}
  55.514 +
  55.515 +		/* NOTE: create_write_request() creates a treq AND forwards it down
  55.516 +		 * the driver chain */
  55.517 +		// RPRINTF("forwarding write request at %" PRIu64 ", length: %" PRIu64 "\n", base, batchlen);
  55.518 +		create_write_request(s, base, batchlen, buf);
  55.519 +		//RPRINTF("write request at %" PRIu64 ", length: %" PRIu64 " forwarded\n", base, batchlen);
  55.520 +
  55.521 +		s->ramdisk.inflight++;
  55.522 +
  55.523 +		for (j = 0; j < batchlen; j++) {
  55.524 +			hashtable_remove(s->ramdisk.prev, &base);
  55.525 +			base++;
  55.526 +		}
  55.527 +	}
  55.528 +
  55.529 +	if (!hashtable_count(s->ramdisk.prev)) {
  55.530 +		/* everything is in flight */
  55.531 +		hashtable_destroy(s->ramdisk.prev, 0);
  55.532 +		s->ramdisk.prev = NULL;
  55.533 +	}
  55.534 +
  55.535 +	free(sectors);
  55.536 +
  55.537 +	// RPRINTF("ramdisk flush done\n");
  55.538 +	return 0;
  55.539 +}
  55.540 +
  55.541 +/* flush ramdisk contents to disk */
  55.542 +static int ramdisk_start_flush(td_driver_t *driver)
  55.543 +{
  55.544 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
  55.545 +	uint64_t* key;
  55.546 +	char* buf;
  55.547 +	int rc = 0;
  55.548 +	int i, j, count, batchlen;
  55.549 +	uint64_t* sectors;
  55.550 +
  55.551 +	if (!hashtable_count(s->ramdisk.h)) {
  55.552 +		/*
  55.553 +		  RPRINTF("Nothing to flush\n");
  55.554 +		*/
  55.555 +		return 0;
  55.556 +	}
  55.557 +
  55.558 +	if (s->ramdisk.prev) {
  55.559 +		/* a flush request issued while a previous flush is still in progress
  55.560 +		 * will merge with the previous request. If you want the previous
  55.561 +		 * request to be consistent, wait for it to complete. */
  55.562 +		if ((count = ramdisk_get_sectors(s->ramdisk.h, &sectors)) < 0)
  55.563 +			return count;
  55.564 +
  55.565 +		for (i = 0; i < count; i++) {
  55.566 +			buf = hashtable_search(s->ramdisk.h, sectors + i);
  55.567 +			ramdisk_write_hash(s->ramdisk.prev, sectors[i], buf,
  55.568 +					   s->ramdisk.sector_size);
  55.569 +		}
  55.570 +		free(sectors);
  55.571 +
  55.572 +		hashtable_destroy (s->ramdisk.h, 0);
  55.573 +	} else
  55.574 +		s->ramdisk.prev = s->ramdisk.h;
  55.575 +
  55.576 +	/* We create a new hashtable so that new writes can be performed before
  55.577 +	 * the old hashtable is completely drained. */
  55.578 +	s->ramdisk.h = create_hashtable(RAMDISK_HASHSIZE, uint64_hash,
  55.579 +					rd_hash_equal);
  55.580 +
  55.581 +	return ramdisk_flush(driver, s);
  55.582 +}
  55.583 +
  55.584 +
  55.585 +static int ramdisk_start(td_driver_t *driver)
  55.586 +{
  55.587 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
  55.588 +
  55.589 +	if (s->ramdisk.h) {
  55.590 +		RPRINTF("ramdisk already allocated\n");
  55.591 +		return 0;
  55.592 +	}
  55.593 +
  55.594 +	s->ramdisk.sector_size = driver->info.sector_size;
  55.595 +	s->ramdisk.h = create_hashtable(RAMDISK_HASHSIZE, uint64_hash,
  55.596 +					rd_hash_equal);
  55.597 +
  55.598 +	DPRINTF("Ramdisk started, %zu bytes/sector\n", s->ramdisk.sector_size);
  55.599 +
  55.600 +	return 0;
  55.601 +}
  55.602 +
  55.603 +/* common client/server functions */
  55.604 +/* mayberead: Time out after a certain interval. */
  55.605 +static int mread(int fd, void* buf, size_t len)
  55.606 +{
  55.607 +	fd_set rfds;
  55.608 +	int rc;
  55.609 +	size_t cur = 0;
  55.610 +	struct timeval tv = {
  55.611 +		.tv_sec = HEARTBEAT_MS / 1000,
  55.612 +		.tv_usec = (HEARTBEAT_MS % 1000) * 1000
  55.613 +	};
  55.614 +
  55.615 +	if (!len)
  55.616 +		return 0;
  55.617 +
  55.618 +	/* read first. Only select if read is incomplete. */
  55.619 +	rc = read(fd, buf, len);
  55.620 +	while (rc < 0 || cur + rc < len) {
  55.621 +		if (!rc) {
  55.622 +			RPRINTF("end-of-file");
  55.623 +			return -1;
  55.624 +		}
  55.625 +		if (rc < 0 && errno != EAGAIN) {
  55.626 +			RPRINTF("error during read: %s\n", strerror(errno));
  55.627 +			return -1;
  55.628 +		}
  55.629 +		if (rc > 0)
  55.630 +			cur += rc;
  55.631 +
  55.632 +		FD_ZERO(&rfds);
  55.633 +		FD_SET(fd, &rfds);
  55.634 +		if (!(rc = select(fd + 1, &rfds, NULL, NULL, &tv))) {
  55.635 +			RPRINTF("time out during read\n");
  55.636 +			return -1;
  55.637 +		} else if (rc < 0) {
  55.638 +			RPRINTF("error during select: %d\n", errno);
  55.639 +			return -1;
  55.640 +		}
  55.641 +		rc = read(fd, buf + cur, len - cur);
  55.642 +	}
  55.643 +	/*
  55.644 +	  RPRINTF("read %d bytes\n", cur + rc);
  55.645 +	*/
  55.646 +
  55.647 +	return 0;
  55.648 +}
  55.649 +
  55.650 +static int mwrite(int fd, void* buf, size_t len)
  55.651 +{
  55.652 +	fd_set wfds;
  55.653 +	size_t cur = 0;
  55.654 +	int rc;
  55.655 +	struct timeval tv = {
  55.656 +		.tv_sec = HEARTBEAT_MS / 1000,
  55.657 +		.tv_usec = (HEARTBEAT_MS % 1000) * 1000
  55.658 +	};
  55.659 +
  55.660 +	if (!len)
  55.661 +		return 0;
  55.662 +
  55.663 +	/* read first. Only select if read is incomplete. */
  55.664 +	rc = write(fd, buf, len);
  55.665 +	while (rc < 0 || cur + rc < len) {
  55.666 +		if (!rc) {
  55.667 +			RPRINTF("end-of-file");
  55.668 +			return -1;
  55.669 +		}
  55.670 +		if (rc < 0 && errno != EAGAIN) {
  55.671 +			RPRINTF("error during write: %s\n", strerror(errno));
  55.672 +			return -1;
  55.673 +		}
  55.674 +		if (rc > 0)
  55.675 +			cur += rc;
  55.676 +
  55.677 +		FD_ZERO(&wfds);
  55.678 +		FD_SET(fd, &wfds);
  55.679 +		if (!(rc = select(fd + 1, NULL, &wfds, NULL, &tv))) {
  55.680 +			RPRINTF("time out during write\n");
  55.681 +			return -1;
  55.682 +		} else if (rc < 0) {
  55.683 +			RPRINTF("error during select: %d\n", errno);
  55.684 +			return -1;
  55.685 +		}
  55.686 +		rc = write(fd, buf + cur, len - cur);
  55.687 +	}
  55.688 +	/*
  55.689 +	  RPRINTF("wrote %d bytes\n", cur + rc);
  55.690 +	*/
  55.691 +
  55.692 +	return 0;
  55.693 +	FD_ZERO(&wfds);
  55.694 +	FD_SET(fd, &wfds);
  55.695 +	select(fd + 1, NULL, &wfds, NULL, &tv);
  55.696 +}
  55.697 +
  55.698 +
  55.699 +static void inline close_stream_fd(struct tdremus_state *s)
  55.700 +{
  55.701 +	/* XXX: -2 is magic. replace with macro perhaps? */
  55.702 +	tapdisk_server_unregister_event(s->stream_fd.id);
  55.703 +	close(s->stream_fd.fd);
  55.704 +	s->stream_fd.fd = -2;
  55.705 +}
  55.706 +
  55.707 +/* primary functions */
  55.708 +static void remus_client_event(event_id_t, char mode, void *private);
  55.709 +static void remus_connect_event(event_id_t id, char mode, void *private);
  55.710 +static void remus_retry_connect_event(event_id_t id, char mode, void *private);
  55.711 +
  55.712 +static int primary_do_connect(struct tdremus_state *state)
  55.713 +{
  55.714 +	event_id_t id;
  55.715 +	int fd;
  55.716 +	int rc;
  55.717 +	int flags;
  55.718 +
  55.719 +	RPRINTF("client connecting to %s:%d...\n", inet_ntoa(state->sa.sin_addr), ntohs(state->sa.sin_port));
  55.720 +
  55.721 +	if ((fd = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
  55.722 +		RPRINTF("could not create client socket: %d\n", errno);
  55.723 +		return -1;
  55.724 +	}
  55.725 +
  55.726 +	/* make socket nonblocking */
  55.727 +	if ((flags = fcntl(fd, F_GETFL, 0)) == -1)
  55.728 +		flags = 0;
  55.729 +	if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1)
  55.730 +		return -1;
  55.731 +
  55.732 +	/* once we have created the socket and populated the address, we can now start
  55.733 +	 * our non-blocking connect. rather than duplicating code we trigger a timeout
  55.734 +	 * on the socket fd, which calls out nonblocking connect code
  55.735 +	 */
  55.736 +	if((id = tapdisk_server_register_event(SCHEDULER_POLL_TIMEOUT, fd, 0, remus_retry_connect_event, state)) < 0) {
  55.737 +		RPRINTF("error registering timeout client connection event handler: %s\n", strerror(id));
  55.738 +		/* TODO: we leak a fd here */
  55.739 +		return -1;
  55.740 +	}
  55.741 +	state->stream_fd.fd = fd;
  55.742 +	state->stream_fd.id = id;
  55.743 +	return 0;
  55.744 +}
  55.745 +
  55.746 +static int primary_blocking_connect(struct tdremus_state *state)
  55.747 +{
  55.748 +	int fd;
  55.749 +	int id;
  55.750 +	int rc;
  55.751 +	int flags;
  55.752 +
  55.753 +	RPRINTF("client connecting to %s:%d...\n", inet_ntoa(state->sa.sin_addr), ntohs(state->sa.sin_port));
  55.754 +
  55.755 +	if ((fd = socket(PF_INET, SOCK_STREAM, 0)) < 0) {
  55.756 +		RPRINTF("could not create client socket: %d\n", errno);
  55.757 +		return -1;
  55.758 +	}
  55.759 +
  55.760 +	do {
  55.761 +		if ((rc = connect(fd, (struct sockaddr *)&state->sa,
  55.762 +		    sizeof(state->sa))) < 0)
  55.763 +		{
  55.764 +			if (errno == ECONNREFUSED) {
  55.765 +				RPRINTF("connection refused -- retrying in 1 second\n");
  55.766 +				sleep(1);
  55.767 +			} else {
  55.768 +				RPRINTF("connection failed: %d\n", errno);
  55.769 +				close(fd);
  55.770 +				return -1;
  55.771 +			}
  55.772 +		}
  55.773 +	} while (rc < 0);
  55.774 +
  55.775 +	RPRINTF("client connected\n");
  55.776 +
  55.777 +	/* make socket nonblocking */
  55.778 +	if ((flags = fcntl(fd, F_GETFL, 0)) == -1)
  55.779 +		flags = 0;
  55.780 +	if (fcntl(fd, F_SETFL, flags | O_NONBLOCK) == -1)
  55.781 +	{
  55.782 +		RPRINTF("error making socket nonblocking\n");
  55.783 +		close(fd);
  55.784 +		return -1;
  55.785 +	}
  55.786 +
  55.787 +	if((id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, fd, 0, remus_client_event, state)) < 0) {
  55.788 +		RPRINTF("error registering client event handler: %s\n", strerror(id));
  55.789 +		close(fd);
  55.790 +		return -1;
  55.791 +	}
  55.792 +
  55.793 +	state->stream_fd.fd = fd;
  55.794 +	state->stream_fd.id = id;
  55.795 +	return 0;
  55.796 +}
  55.797 +
  55.798 +/* on read, just pass request through */
  55.799 +static void primary_queue_read(td_driver_t *driver, td_request_t treq)
  55.800 +{
  55.801 +	/* just pass read through */
  55.802 +	td_forward_request(treq);
  55.803 +}
  55.804 +
  55.805 +/* TODO:
  55.806 + * The primary uses mwrite() to write the contents of a write request to the
  55.807 + * backup. This effectively blocks until all data has been copied into a system
  55.808 + * buffer or a timeout has occured. We may wish to instead use tapdisk's
  55.809 + * nonblocking i/o interface, tapdisk_server_register_event(), to set timeouts
  55.810 + * and write data in an asynchronous fashion.
  55.811 + */
  55.812 +static void primary_queue_write(td_driver_t *driver, td_request_t treq)
  55.813 +{
  55.814 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
  55.815 +
  55.816 +	char header[sizeof(uint32_t) + sizeof(uint64_t)];
  55.817 +	uint32_t *sectors = (uint32_t *)header;
  55.818 +	uint64_t *sector = (uint64_t *)(header + sizeof(uint32_t));
  55.819 +
  55.820 +	// RPRINTF("write: stream_fd.fd: %d\n", s->stream_fd.fd);
  55.821 +
  55.822 +	/* -1 means we haven't connected yet, -2 means the connection was lost */
  55.823 +	if(s->stream_fd.fd == -1) {
  55.824 +		RPRINTF("connecting to backup...\n");
  55.825 +		primary_blocking_connect(s);
  55.826 +	}
  55.827 +
  55.828 +	*sectors = treq.secs;
  55.829 +	*sector = treq.sec;
  55.830 +
  55.831 +	if (mwrite(s->stream_fd.fd, TDREMUS_WRITE, strlen(TDREMUS_WRITE)) < 0)
  55.832 +		goto fail;
  55.833 +	if (mwrite(s->stream_fd.fd, header, sizeof(header)) < 0)
  55.834 +		goto fail;
  55.835 +
  55.836 +	if (mwrite(s->stream_fd.fd, treq.buf, treq.secs * driver->info.sector_size) < 0)
  55.837 +		goto fail;
  55.838 +
  55.839 +	td_forward_request(treq);
  55.840 +
  55.841 +	return;
  55.842 +
  55.843 + fail:
  55.844 +	/* switch to unprotected mode and tell tapdisk to retry */
  55.845 +	RPRINTF("write request replication failed, switching to unprotected mode");
  55.846 +	switch_mode(s->tdremus_driver, mode_unprotected);
  55.847 +	td_complete_request(treq, -EBUSY);
  55.848 +}
  55.849 +
  55.850 +
  55.851 +static int client_flush(td_driver_t *driver)
  55.852 +{
  55.853 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
  55.854 +
  55.855 +	// RPRINTF("committing output\n");
  55.856 +
  55.857 +	if (s->stream_fd.fd == -1)
  55.858 +		/* connection not yet established, nothing to flush */
  55.859 +		return 0;
  55.860 +
  55.861 +	if (mwrite(s->stream_fd.fd, TDREMUS_COMMIT, strlen(TDREMUS_COMMIT)) < 0) {
  55.862 +		RPRINTF("error flushing output");
  55.863 +		close_stream_fd(s);
  55.864 +		return -1;
  55.865 +	}
  55.866 +
  55.867 +	return 0;
  55.868 +}
  55.869 +
  55.870 +static int primary_start(td_driver_t *driver)
  55.871 +{
  55.872 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
  55.873 +
  55.874 +	RPRINTF("activating client mode\n");
  55.875 +
  55.876 +	tapdisk_remus.td_queue_read = primary_queue_read;
  55.877 +	tapdisk_remus.td_queue_write = primary_queue_write;
  55.878 +	s->queue_flush = client_flush;
  55.879 +
  55.880 +	s->stream_fd.fd = -1;
  55.881 +	s->stream_fd.id = -1;
  55.882 +
  55.883 +	return 0;
  55.884 +}
  55.885 +
  55.886 +/* timeout callback */
  55.887 +static void remus_retry_connect_event(event_id_t id, char mode, void *private)
  55.888 +{
  55.889 +	struct tdremus_state *s = (struct tdremus_state *)private;
  55.890 +
  55.891 +	/* do a non-blocking connect */
  55.892 +	if (connect(s->stream_fd.fd, (struct sockaddr *)&s->sa, sizeof(s->sa))
  55.893 +	    && errno != EINPROGRESS)
  55.894 +	{
  55.895 +		if(errno == ECONNREFUSED || errno == ENETUNREACH || errno == EAGAIN || errno == ECONNABORTED)
  55.896 +		{
  55.897 +			/* try again in a second */
  55.898 +			tapdisk_server_unregister_event(s->stream_fd.id);
  55.899 +			if((id = tapdisk_server_register_event(SCHEDULER_POLL_TIMEOUT, s->stream_fd.fd, REMUS_CONNRETRY_TIMEOUT, remus_retry_connect_event, s)) < 0) {
  55.900 +				RPRINTF("error registering timeout client connection event handler: %s\n", strerror(id));
  55.901 +				return;
  55.902 +			}
  55.903 +			s->stream_fd.id = id;
  55.904 +		}
  55.905 +		else
  55.906 +		{
  55.907 +			/* not recoverable */
  55.908 +			RPRINTF("error connection to server %s\n", strerror(errno));
  55.909 +			return;
  55.910 +		}
  55.911 +	}
  55.912 +	else
  55.913 +	{
  55.914 +		/* the connect returned EINPROGRESS (nonblocking connect) we must wait for the fd to be writeable to determine if the connect worked */
  55.915 +
  55.916 +		tapdisk_server_unregister_event(s->stream_fd.id);
  55.917 +		if((id = tapdisk_server_register_event(SCHEDULER_POLL_WRITE_FD, s->stream_fd.fd, 0, remus_connect_event, s)) < 0) {
  55.918 +			RPRINTF("error registering client connection event handler: %s\n", strerror(id));
  55.919 +			return;
  55.920 +		}
  55.921 +		s->stream_fd.id = id;
  55.922 +	}
  55.923 +}
  55.924 +
  55.925 +/* callback when nonblocking connect() is finished */
  55.926 +/* called only by primary in unprotected state */
  55.927 +static void remus_connect_event(event_id_t id, char mode, void *private)
  55.928 +{
  55.929 +	int socket_errno;
  55.930 +	socklen_t socket_errno_size;
  55.931 +	struct tdremus_state *s = (struct tdremus_state *)private;
  55.932 +
  55.933 +	/* check to se if the connect succeeded */
  55.934 +	socket_errno_size = sizeof(socket_errno);
  55.935 +	if (getsockopt(s->stream_fd.fd, SOL_SOCKET, SO_ERROR, &socket_errno, &socket_errno_size)) {
  55.936 +		RPRINTF("error getting socket errno\n");
  55.937 +		return;
  55.938 +	}
  55.939 +
  55.940 +	RPRINTF("socket connect returned %d\n", socket_errno);
  55.941 +
  55.942 +	if(socket_errno)
  55.943 +	{
  55.944 +		/* the connect did not succeed */
  55.945 +
  55.946 +		if(socket_errno == ECONNREFUSED || socket_errno == ENETUNREACH || socket_errno == ETIMEDOUT
  55.947 +		   || socket_errno == ECONNABORTED || socket_errno == EAGAIN)
  55.948 +		{
  55.949 +			/* we can probably assume that the backup is down. just try again later */
  55.950 +			tapdisk_server_unregister_event(s->stream_fd.id);
  55.951 +			if((id = tapdisk_server_register_event(SCHEDULER_POLL_TIMEOUT, s->stream_fd.fd, REMUS_CONNRETRY_TIMEOUT, remus_retry_connect_event, s)) < 0) {
  55.952 +				RPRINTF("error registering timeout client connection event handler: %s\n", strerror(id));
  55.953 +				return;
  55.954 +			}
  55.955 +			s->stream_fd.id = id;
  55.956 +		}
  55.957 +		else
  55.958 +		{
  55.959 +			RPRINTF("socket connect returned %d, giving up\n", socket_errno);
  55.960 +		}
  55.961 +	}
  55.962 +	else
  55.963 +	{
  55.964 +		/* the connect succeeded */
  55.965 +
  55.966 +		/* unregister this function and register a new event handler */
  55.967 +		tapdisk_server_unregister_event(s->stream_fd.id);
  55.968 +		if((id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, s->stream_fd.fd, 0, remus_client_event, s)) < 0) {
  55.969 +			RPRINTF("error registering client event handler: %s\n", strerror(id));
  55.970 +			return;
  55.971 +		}
  55.972 +		s->stream_fd.id = id;
  55.973 +
  55.974 +		/* switch from unprotected to protected client */
  55.975 +		switch_mode(s->tdremus_driver, mode_primary);
  55.976 +	}
  55.977 +}
  55.978 +
  55.979 +
  55.980 +/* we install this event handler on the primary once we have connected to the backup */
  55.981 +/* wait for "done" message to commit checkpoint */
  55.982 +static void remus_client_event(event_id_t id, char mode, void *private)
  55.983 +{
  55.984 +	struct tdremus_state *s = (struct tdremus_state *)private;
  55.985 +	char req[5];
  55.986 +	int rc;
  55.987 +
  55.988 +	if (mread(s->stream_fd.fd, req, sizeof(req) - 1) < 0) {
  55.989 +		/* replication stream closed or otherwise broken (timeout, reset, &c) */
  55.990 +		RPRINTF("error reading from backup\n");
  55.991 +		close_stream_fd(s);
  55.992 +		return;
  55.993 +	}
  55.994 +
  55.995 +	req[4] = '\0';
  55.996 +
  55.997 +	if (!strcmp(req, TDREMUS_DONE))
  55.998 +		/* checkpoint committed, inform msg_fd */
  55.999 +		ctl_respond(s, TDREMUS_DONE);
 55.1000 +	else {
 55.1001 +		RPRINTF("received unknown message: %s\n", req);
 55.1002 +		close_stream_fd(s);
 55.1003 +	}
 55.1004 +
 55.1005 +	return;
 55.1006 +}
 55.1007 +
 55.1008 +/* backup functions */
 55.1009 +static void remus_server_event(event_id_t id, char mode, void *private);
 55.1010 +
 55.1011 +/* returns the socket that receives write requests */
 55.1012 +static void remus_server_accept(event_id_t id, char mode, void* private)
 55.1013 +{
 55.1014 +	struct tdremus_state* s = (struct tdremus_state *) private;
 55.1015 +
 55.1016 +	int stream_fd;
 55.1017 +	event_id_t cid;
 55.1018 +
 55.1019 +	/* XXX: add address-based black/white list */
 55.1020 +	if ((stream_fd = accept(s->server_fd.fd, NULL, NULL)) < 0) {
 55.1021 +		RPRINTF("error accepting connection: %d\n", errno);
 55.1022 +		return;
 55.1023 +	}
 55.1024 +
 55.1025 +	/* TODO: check to see if we are already replicating. if so just close the
 55.1026 +	 * connection (or do something smarter) */
 55.1027 +	RPRINTF("server accepted connection\n");
 55.1028 +
 55.1029 +	/* add tapdisk event for replication stream */
 55.1030 +	cid = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, stream_fd, 0,
 55.1031 +					    remus_server_event, s);
 55.1032 +
 55.1033 +	if(cid < 0) {
 55.1034 +		RPRINTF("error registering connection event handler: %s\n", strerror(errno));
 55.1035 +		close(stream_fd);
 55.1036 +		return;
 55.1037 +	}
 55.1038 +
 55.1039 +	/* store replication file descriptor */
 55.1040 +	s->stream_fd.fd = stream_fd;
 55.1041 +	s->stream_fd.id = cid;
 55.1042 +}
 55.1043 +
 55.1044 +/* returns -2 if EADDRNOTAVAIL */
 55.1045 +static int remus_bind(struct tdremus_state* s)
 55.1046 +{
 55.1047 +//  struct sockaddr_in sa;
 55.1048 +	int opt;
 55.1049 +	int rc = -1;
 55.1050 +
 55.1051 +	if ((s->server_fd.fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
 55.1052 +		RPRINTF("could not create server socket: %d\n", errno);
 55.1053 +		return rc;
 55.1054 +	}
 55.1055 +	opt = 1;
 55.1056 +	if (setsockopt(s->server_fd.fd, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt)) < 0)
 55.1057 +		RPRINTF("Error setting REUSEADDR on %d: %d\n", s->server_fd.fd, errno);
 55.1058 +
 55.1059 +	if (bind(s->server_fd.fd, (struct sockaddr *)&s->sa, sizeof(s->sa)) < 0) {
 55.1060 +		RPRINTF("could not bind server socket %d to %s:%d: %d %s\n", s->server_fd.fd,
 55.1061 +			inet_ntoa(s->sa.sin_addr), ntohs(s->sa.sin_port), errno, strerror(errno));
 55.1062 +		if (errno != EADDRINUSE)
 55.1063 +			rc = -2;
 55.1064 +		goto err_sfd;
 55.1065 +	}
 55.1066 +	if (listen(s->server_fd.fd, 10)) {
 55.1067 +		RPRINTF("could not listen on socket: %d\n", errno);
 55.1068 +		goto err_sfd;
 55.1069 +	}
 55.1070 +
 55.1071 +	/* The socket s now bound to the address and listening so we may now register
 55.1072 +   * the fd with tapdisk */
 55.1073 +
 55.1074 +	if((s->server_fd.id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
 55.1075 +							    s->server_fd.fd, 0,
 55.1076 +							    remus_server_accept, s)) < 0) {
 55.1077 +		RPRINTF("error registering server connection event handler: %s",
 55.1078 +			strerror(s->server_fd.id));
 55.1079 +		goto err_sfd;
 55.1080 +	}
 55.1081 +
 55.1082 +	return 0;
 55.1083 +
 55.1084 + err_sfd:
 55.1085 +	close(s->server_fd.fd);
 55.1086 +	s->server_fd.fd = -1;
 55.1087 +
 55.1088 +	return rc;
 55.1089 +}
 55.1090 +
 55.1091 +/* wait for latest checkpoint to be applied */
 55.1092 +static inline int server_writes_inflight(td_driver_t *driver)
 55.1093 +{
 55.1094 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1095 +
 55.1096 +	if (!s->ramdisk.inflight && !s->ramdisk.prev)
 55.1097 +		return 0;
 55.1098 +
 55.1099 +	return 1;
 55.1100 +}
 55.1101 +
 55.1102 +/* Due to block device prefetching this code may be called on the server side
 55.1103 + * during normal replication. In this case we must return EBUSY, otherwise the
 55.1104 + * domain may be started with stale data.
 55.1105 + */
 55.1106 +void backup_queue_read(td_driver_t *driver, td_request_t treq)
 55.1107 +{
 55.1108 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1109 +
 55.1110 +	if(!remus_image)
 55.1111 +		remus_image = treq.image;
 55.1112 +
 55.1113 +#if 0
 55.1114 +	/* due to prefetching, we must return EBUSY on server reads. This
 55.1115 +	 * maintains a consistent disk image */
 55.1116 +	td_complete_request(treq, -EBUSY);
 55.1117 +#else
 55.1118 +	/* what exactly is the race that requires the response above? */
 55.1119 +	td_forward_request(treq);
 55.1120 +#endif
 55.1121 +}
 55.1122 +
 55.1123 +/* see above */
 55.1124 +void backup_queue_write(td_driver_t *driver, td_request_t treq)
 55.1125 +{
 55.1126 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1127 +
 55.1128 +	/* on a server write, we know the domain has failed over. we must change our
 55.1129 +	 * state to unprotected and then have the unprotected queue_write function
 55.1130 +	 * handle the write
 55.1131 +	 */
 55.1132 +
 55.1133 +	switch_mode(driver, mode_unprotected);
 55.1134 +	/* TODO: call the appropriate write function rather than return EBUSY */
 55.1135 +	td_complete_request(treq, -EBUSY);
 55.1136 +}
 55.1137 +
 55.1138 +static int backup_start(td_driver_t *driver)
 55.1139 +{
 55.1140 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1141 +	int fd;
 55.1142 +
 55.1143 +	if (ramdisk_start(driver) < 0)
 55.1144 +		return -1;
 55.1145 +
 55.1146 +	tapdisk_remus.td_queue_read = backup_queue_read;
 55.1147 +	tapdisk_remus.td_queue_write = backup_queue_write;
 55.1148 +	/* TODO set flush function */
 55.1149 +	return 0;
 55.1150 +}
 55.1151 +
 55.1152 +static int server_do_wreq(td_driver_t *driver)
 55.1153 +{
 55.1154 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1155 +	static tdremus_wire_t twreq;
 55.1156 +	char buf[4096];
 55.1157 +	int len, rc;
 55.1158 +
 55.1159 +	char header[sizeof(uint32_t) + sizeof(uint64_t)];
 55.1160 +	uint32_t *sectors = (uint32_t *) header;
 55.1161 +	uint64_t *sector =  (uint64_t *) &header[sizeof(uint32_t)];
 55.1162 +
 55.1163 +	// RPRINTF("received write request\n");
 55.1164 +
 55.1165 +	if (mread(s->stream_fd.fd, header, sizeof(header)) < 0)
 55.1166 +		goto err;
 55.1167 +
 55.1168 +	len = *sectors * driver->info.sector_size;
 55.1169 +
 55.1170 +	//RPRINTF("writing %d sectors (%d bytes) starting at %" PRIu64 "\n", *sectors, len,
 55.1171 +	// *sector);
 55.1172 +
 55.1173 +	if (len > sizeof(buf)) {
 55.1174 +		/* freak out! */
 55.1175 +		RPRINTF("write request too large: %d/%u\n", len, (unsigned)sizeof(buf));
 55.1176 +		return -1;
 55.1177 +	}
 55.1178 +
 55.1179 +	if (mread(s->stream_fd.fd, buf, len) < 0)
 55.1180 +		goto err;
 55.1181 +
 55.1182 +	if (ramdisk_write(&s->ramdisk, *sector, *sectors, buf) < 0)
 55.1183 +		goto err;
 55.1184 +
 55.1185 +	return 0;
 55.1186 +
 55.1187 + err:
 55.1188 +	/* should start failover */
 55.1189 +	RPRINTF("backup write request error\n");
 55.1190 +	close_stream_fd(s);
 55.1191 +
 55.1192 +	return -1;
 55.1193 +}
 55.1194 +
 55.1195 +static int server_do_sreq(td_driver_t *driver)
 55.1196 +{
 55.1197 +	/*
 55.1198 +	  RPRINTF("submit request received\n");
 55.1199 +  */
 55.1200 +
 55.1201 +	return 0;
 55.1202 +}
 55.1203 +
 55.1204 +/* at this point, the server can start applying the most recent
 55.1205 + * ramdisk. */
 55.1206 +static int server_do_creq(td_driver_t *driver)
 55.1207 +{
 55.1208 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1209 +
 55.1210 +	// RPRINTF("committing buffer\n");
 55.1211 +
 55.1212 +	ramdisk_start_flush(driver);
 55.1213 +
 55.1214 +	/* XXX this message should not be sent until flush completes! */
 55.1215 +	if (write(s->stream_fd.fd, TDREMUS_DONE, strlen(TDREMUS_DONE)) != 4)
 55.1216 +		return -1;
 55.1217 +
 55.1218 +	return 0;
 55.1219 +}
 55.1220 +
 55.1221 +
 55.1222 +/* called when data is pending in s->rfd */
 55.1223 +static void remus_server_event(event_id_t id, char mode, void *private)
 55.1224 +{
 55.1225 +	struct tdremus_state *s = (struct tdremus_state *)private;
 55.1226 +	td_driver_t *driver = s->tdremus_driver;
 55.1227 +	char req[5];
 55.1228 +
 55.1229 +	// RPRINTF("replication data waiting\n");
 55.1230 +
 55.1231 +	/* TODO: add a get_connection_by_event_id() function.
 55.1232 +	 * for now we can assume that the fd is s->stream_fd */
 55.1233 +
 55.1234 +	if (mread(s->stream_fd.fd, req, sizeof(req) - 1) < 0) {
 55.1235 +		RPRINTF("error reading server event, activating backup\n");
 55.1236 +		switch_mode(driver, mode_unprotected);
 55.1237 +		return;
 55.1238 +	}
 55.1239 +
 55.1240 +	req[4] = '\0';
 55.1241 +
 55.1242 +	if (!strcmp(req, TDREMUS_WRITE))
 55.1243 +		server_do_wreq(driver);
 55.1244 +	else if (!strcmp(req, TDREMUS_SUBMIT))
 55.1245 +		server_do_sreq(driver);
 55.1246 +	else if (!strcmp(req, TDREMUS_COMMIT))
 55.1247 +		server_do_creq(driver);
 55.1248 +	else
 55.1249 +		RPRINTF("unknown request received: %s\n", req);
 55.1250 +
 55.1251 +	return;
 55.1252 +
 55.1253 +}
 55.1254 +
 55.1255 +/* unprotected */
 55.1256 +
 55.1257 +void unprotected_queue_read(td_driver_t *driver, td_request_t treq)
 55.1258 +{
 55.1259 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1260 +
 55.1261 +	/* wait for previous ramdisk to flush  before servicing reads */
 55.1262 +	if (server_writes_inflight(driver)) {
 55.1263 +		/* for now lets just return EBUSY. if this becomes an issue we can
 55.1264 +		 * do something smarter */
 55.1265 +		td_complete_request(treq, -EBUSY);
 55.1266 +	}
 55.1267 +	else {
 55.1268 +		/* here we just pass reads through */
 55.1269 +		td_forward_request(treq);
 55.1270 +	}
 55.1271 +}
 55.1272 +
 55.1273 +/* For a recoverable remus solution we need to log unprotected writes here */
 55.1274 +void unprotected_queue_write(td_driver_t *driver, td_request_t treq)
 55.1275 +{
 55.1276 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1277 +
 55.1278 +	/* wait for previous ramdisk to flush */
 55.1279 +	if (server_writes_inflight(driver)) {
 55.1280 +		RPRINTF("queue_write: waiting for queue to drain");
 55.1281 +		td_complete_request(treq, -EBUSY);
 55.1282 +	}
 55.1283 +	else {
 55.1284 +		// RPRINTF("servicing write request on backup\n");
 55.1285 +		td_forward_request(treq);
 55.1286 +	}
 55.1287 +}
 55.1288 +
 55.1289 +static int unprotected_start(td_driver_t *driver)
 55.1290 +{
 55.1291 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1292 +
 55.1293 +	RPRINTF("failure detected, activating passthrough\n");
 55.1294 +
 55.1295 +	/* close the server socket */
 55.1296 +	close_stream_fd(s);
 55.1297 +
 55.1298 +	/* unregister the replication stream */
 55.1299 +	tapdisk_server_unregister_event(s->server_fd.id);
 55.1300 +
 55.1301 +	/* close the replication stream */
 55.1302 +	close(s->server_fd.fd);
 55.1303 +	s->server_fd.fd = -1;
 55.1304 +
 55.1305 +	/* install the unprotected read/write handlers */
 55.1306 +	tapdisk_remus.td_queue_read = unprotected_queue_read;
 55.1307 +	tapdisk_remus.td_queue_write = unprotected_queue_write;
 55.1308 +
 55.1309 +	return 0;
 55.1310 +}
 55.1311 +
 55.1312 +
 55.1313 +/* control */
 55.1314 +
 55.1315 +static inline int resolve_address(const char* addr, struct in_addr* ia)
 55.1316 +{
 55.1317 +	struct hostent* he;
 55.1318 +	uint32_t ip;
 55.1319 +
 55.1320 +	if (!(he = gethostbyname(addr))) {
 55.1321 +		RPRINTF("error resolving %s: %d\n", addr, h_errno);
 55.1322 +		return -1;
 55.1323 +	}
 55.1324 +
 55.1325 +	if (!he->h_addr_list[0]) {
 55.1326 +		RPRINTF("no address found for %s\n", addr);
 55.1327 +		return -1;
 55.1328 +	}
 55.1329 +
 55.1330 +	/* network byte order */
 55.1331 +	ip = *((uint32_t**)he->h_addr_list)[0];
 55.1332 +	ia->s_addr = ip;
 55.1333 +
 55.1334 +	return 0;
 55.1335 +}
 55.1336 +
 55.1337 +static int get_args(td_driver_t *driver, const char* name)
 55.1338 +{
 55.1339 +	struct tdremus_state *state = (struct tdremus_state *)driver->data;
 55.1340 +	char* host;
 55.1341 +	char* port;
 55.1342 +//  char* driver_str;
 55.1343 +//  char* parent;
 55.1344 +//  int type;
 55.1345 +//  char* path;
 55.1346 +//  unsigned long ulport;
 55.1347 +//  int i;
 55.1348 +//  struct sockaddr_in server_addr_in;
 55.1349 +
 55.1350 +	int gai_status;
 55.1351 +	int valid_addr;
 55.1352 +	struct addrinfo gai_hints;
 55.1353 +	struct addrinfo *servinfo, *servinfo_itr;
 55.1354 +
 55.1355 +	memset(&gai_hints, 0, sizeof gai_hints);
 55.1356 +	gai_hints.ai_family = AF_UNSPEC;
 55.1357 +	gai_hints.ai_socktype = SOCK_STREAM;
 55.1358 +
 55.1359 +	port = strchr(name, ':');
 55.1360 +	if (!port) {
 55.1361 +		RPRINTF("missing host in %s\n", name);
 55.1362 +		return -ENOENT;
 55.1363 +	}
 55.1364 +	if (!(host = strndup(name, port - name))) {
 55.1365 +		RPRINTF("unable to allocate host\n");
 55.1366 +		return -ENOMEM;
 55.1367 +	}
 55.1368 +	port++;
 55.1369 +
 55.1370 +	if ((gai_status = getaddrinfo(host, port, &gai_hints, &servinfo)) != 0) {
 55.1371 +		RPRINTF("getaddrinfo error: %s\n", gai_strerror(gai_status));
 55.1372 +		return -ENOENT;
 55.1373 +	}
 55.1374 +
 55.1375 +	/* TODO: do something smarter here */
 55.1376 +	valid_addr = 0;
 55.1377 +	for(servinfo_itr = servinfo; servinfo_itr != NULL; servinfo_itr = servinfo_itr->ai_next) {
 55.1378 +		void *addr;
 55.1379 +		char *ipver;
 55.1380 +
 55.1381 +		if (servinfo_itr->ai_family == AF_INET) {
 55.1382 +			valid_addr = 1;
 55.1383 +			memset(&state->sa, 0, sizeof(state->sa));
 55.1384 +			state->sa = *(struct sockaddr_in *)servinfo_itr->ai_addr;
 55.1385 +			break;
 55.1386 +		}
 55.1387 +	}
 55.1388 +	freeaddrinfo(servinfo);
 55.1389 +
 55.1390 +	if (!valid_addr)
 55.1391 +		return -ENOENT;
 55.1392 +
 55.1393 +	RPRINTF("host: %s, port: %d\n", inet_ntoa(state->sa.sin_addr), ntohs(state->sa.sin_port));
 55.1394 +
 55.1395 +	return 0;
 55.1396 +}
 55.1397 +
 55.1398 +static int switch_mode(td_driver_t *driver, enum tdremus_mode mode)
 55.1399 +{
 55.1400 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1401 +	int rc;
 55.1402 +
 55.1403 +	if (mode == s->mode)
 55.1404 +		return 0;
 55.1405 +
 55.1406 +	if (s->queue_flush)
 55.1407 +		if ((rc = s->queue_flush(driver)) < 0) {
 55.1408 +			// fall back to unprotected mode on error
 55.1409 +			RPRINTF("switch_mode: error flushing queue (old: %d, new: %d)", s->mode, mode);
 55.1410 +			mode = mode_unprotected;
 55.1411 +		}
 55.1412 +
 55.1413 +	if (mode == mode_unprotected)
 55.1414 +		rc = unprotected_start(driver);
 55.1415 +	else if (mode == mode_primary)
 55.1416 +		rc = primary_start(driver);
 55.1417 +	else if (mode == mode_backup)
 55.1418 +		rc = backup_start(driver);
 55.1419 +	else {
 55.1420 +		RPRINTF("unknown mode requested: %d\n", mode);
 55.1421 +		rc = -1;
 55.1422 +	}
 55.1423 +
 55.1424 +	if (!rc)
 55.1425 +		s->mode = mode;
 55.1426 +
 55.1427 +	return rc;
 55.1428 +}
 55.1429 +
 55.1430 +static void ctl_request(event_id_t id, char mode, void *private)
 55.1431 +{
 55.1432 +	struct tdremus_state *s = (struct tdremus_state *)private;
 55.1433 +	td_driver_t *driver = s->tdremus_driver;
 55.1434 +	char msg[80];
 55.1435 +	int rc;
 55.1436 +
 55.1437 +	// RPRINTF("data waiting on control fifo\n");
 55.1438 +
 55.1439 +	if (!(rc = read(s->ctl_fd.fd, msg, sizeof(msg) - 1 /* append nul */))) {
 55.1440 +		RPRINTF("0-byte read received, reopening FIFO\n");
 55.1441 +		/*TODO: we may have to unregister/re-register with tapdisk_server */
 55.1442 +		close(s->ctl_fd.fd);
 55.1443 +		RPRINTF("FIFO closed\n");
 55.1444 +		if ((s->ctl_fd.fd = open(s->ctl_path, O_RDWR)) < 0) {
 55.1445 +			RPRINTF("error reopening FIFO: %d\n", errno);
 55.1446 +		}
 55.1447 +		return;
 55.1448 +	}
 55.1449 +
 55.1450 +	if (rc < 0) {
 55.1451 +		RPRINTF("error reading from FIFO: %d\n", errno);
 55.1452 +		return;
 55.1453 +	}
 55.1454 +
 55.1455 +	/* TODO: need to get driver somehow */
 55.1456 +	msg[rc] = '\0';
 55.1457 +	if (!strncmp(msg, "flush", 5)) {
 55.1458 +		if (s->queue_flush)
 55.1459 +			if ((rc = s->queue_flush(driver))) {
 55.1460 +				RPRINTF("error passing flush request to backup");
 55.1461 +				ctl_respond(s, TDREMUS_FAIL);
 55.1462 +			}
 55.1463 +	} else {
 55.1464 +		RPRINTF("unknown command: %s\n", msg);
 55.1465 +	}
 55.1466 +}
 55.1467 +
 55.1468 +static int ctl_respond(struct tdremus_state *s, const char *response)
 55.1469 +{
 55.1470 +	int rc;
 55.1471 +
 55.1472 +	if ((rc = write(s->msg_fd.fd, response, strlen(response))) < 0) {
 55.1473 +		RPRINTF("error writing notification: %d\n", errno);
 55.1474 +		close(s->msg_fd.fd);
 55.1475 +		if ((s->msg_fd.fd = open(s->msg_path, O_RDWR)) < 0)
 55.1476 +			RPRINTF("error reopening FIFO: %d\n", errno);
 55.1477 +	}
 55.1478 +
 55.1479 +	return rc;
 55.1480 +}
 55.1481 +
 55.1482 +/* must be called after the underlying driver has been initialized */
 55.1483 +static int ctl_open(td_driver_t *driver, const char* name)
 55.1484 +{
 55.1485 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1486 +	int i, l;
 55.1487 +
 55.1488 +	/* first we must ensure that BLKTAP_CTRL_DIR exists */
 55.1489 +	if (mkdir(BLKTAP_CTRL_DIR, 0755) && errno != EEXIST)
 55.1490 +	{
 55.1491 +		DPRINTF("error creating directory %s: %d\n", BLKTAP_CTRL_DIR, errno);
 55.1492 +		return -1;
 55.1493 +	}
 55.1494 +
 55.1495 +	/* use the device name to create the control fifo path */
 55.1496 +	if (asprintf(&s->ctl_path, BLKTAP_CTRL_DIR "/remus_%s", name) < 0)
 55.1497 +		return -1;
 55.1498 +	/* scrub fifo pathname  */
 55.1499 +	for (i = strlen(BLKTAP_CTRL_DIR) + 1, l = strlen(s->ctl_path); i < l; i++) {
 55.1500 +		if (strchr(":/", s->ctl_path[i]))
 55.1501 +			s->ctl_path[i] = '_';
 55.1502 +	}
 55.1503 +	if (asprintf(&s->msg_path, "%s.msg", s->ctl_path) < 0)
 55.1504 +		goto err_ctlfifo;
 55.1505 +
 55.1506 +	if (mkfifo(s->ctl_path, S_IRWXU|S_IRWXG|S_IRWXO) && errno != EEXIST) {
 55.1507 +		RPRINTF("error creating control FIFO %s: %d\n", s->ctl_path, errno);
 55.1508 +		goto err_msgfifo;
 55.1509 +	}
 55.1510 +
 55.1511 +	if (mkfifo(s->msg_path, S_IRWXU|S_IRWXG|S_IRWXO) && errno != EEXIST) {
 55.1512 +		RPRINTF("error creating message FIFO %s: %d\n", s->msg_path, errno);
 55.1513 +		goto err_msgfifo;
 55.1514 +	}
 55.1515 +
 55.1516 +	/* RDWR so that fd doesn't block select when no writer is present */
 55.1517 +	if ((s->ctl_fd.fd = open(s->ctl_path, O_RDWR)) < 0) {
 55.1518 +		RPRINTF("error opening control FIFO %s: %d\n", s->ctl_path, errno);
 55.1519 +		goto err_msgfifo;
 55.1520 +	}
 55.1521 +
 55.1522 +	if ((s->msg_fd.fd = open(s->msg_path, O_RDWR)) < 0) {
 55.1523 +		RPRINTF("error opening message FIFO %s: %d\n", s->msg_path, errno);
 55.1524 +		goto err_openctlfifo;
 55.1525 +	}
 55.1526 +
 55.1527 +	RPRINTF("control FIFO %s\n", s->ctl_path);
 55.1528 +	RPRINTF("message FIFO %s\n", s->msg_path);
 55.1529 +
 55.1530 +	return 0;
 55.1531 +
 55.1532 + err_openctlfifo:
 55.1533 +	close(s->ctl_fd.fd);
 55.1534 + err_msgfifo:
 55.1535 +	free(s->msg_path);
 55.1536 +	s->msg_path = NULL;
 55.1537 + err_ctlfifo:
 55.1538 +	free(s->ctl_path);
 55.1539 +	s->ctl_path = NULL;
 55.1540 +	return -1;
 55.1541 +}
 55.1542 +
 55.1543 +static void ctl_close(td_driver_t *driver)
 55.1544 +{
 55.1545 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1546 +
 55.1547 +	/* TODO: close *all* connections */
 55.1548 +
 55.1549 +	if(s->ctl_fd.fd)
 55.1550 +		close(s->ctl_fd.fd);
 55.1551 +
 55.1552 +	if (s->ctl_path) {
 55.1553 +		unlink(s->ctl_path);
 55.1554 +		free(s->ctl_path);
 55.1555 +		s->ctl_path = NULL;
 55.1556 +	}
 55.1557 +	if (s->msg_path) {
 55.1558 +		unlink(s->msg_path);
 55.1559 +		free(s->msg_path);
 55.1560 +		s->msg_path = NULL;
 55.1561 +	}
 55.1562 +}
 55.1563 +
 55.1564 +static int ctl_register(struct tdremus_state *s)
 55.1565 +{
 55.1566 +	RPRINTF("registering ctl fifo\n");
 55.1567 +
 55.1568 +	/* register ctl fd */
 55.1569 +	s->ctl_fd.id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD, s->ctl_fd.fd, 0, ctl_request, s);
 55.1570 +
 55.1571 +	if (s->ctl_fd.id < 0) {
 55.1572 +		RPRINTF("error registering ctrl FIFO %s: %d\n", s->ctl_path, s->ctl_fd.id);
 55.1573 +		return -1;
 55.1574 +	}
 55.1575 +
 55.1576 +	return 0;
 55.1577 +}
 55.1578 +
 55.1579 +/* interface */
 55.1580 +
 55.1581 +static int tdremus_open(td_driver_t *driver, const char *name,
 55.1582 +			td_flag_t flags)
 55.1583 +{
 55.1584 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1585 +	int rc;
 55.1586 +
 55.1587 +	RPRINTF("opening %s\n", name);
 55.1588 +
 55.1589 +	/* first we need to get the underlying vbd for this driver stack. To do so we
 55.1590 +	 * need to know the vbd's id. Fortunately, for tapdisk2 this is hard-coded as
 55.1591 +	 * 0 (see tapdisk2.c)
 55.1592 +	 */
 55.1593 +	device_vbd = tapdisk_server_get_vbd(0);
 55.1594 +
 55.1595 +	memset(s, 0, sizeof(*s));
 55.1596 +	s->server_fd.fd = -1;
 55.1597 +	s->stream_fd.fd = -1;
 55.1598 +	s->ctl_fd.fd = -1;
 55.1599 +	s->msg_fd.fd = -1;
 55.1600 +
 55.1601 +	/* TODO: this is only needed so that the server can send writes down
 55.1602 +	 * the driver stack from the stream_fd event handler */
 55.1603 +	s->tdremus_driver = driver;
 55.1604 +
 55.1605 +	/* parse name to get info etc */
 55.1606 +	if ((rc = get_args(driver, name)))
 55.1607 +		return rc;
 55.1608 +
 55.1609 +	if ((rc = ctl_open(driver, name))) {
 55.1610 +		RPRINTF("error setting up control channel\n");
 55.1611 +		free(s->driver_data);
 55.1612 +		return rc;
 55.1613 +	}
 55.1614 +
 55.1615 +	if ((rc = ctl_register(s))) {
 55.1616 +		RPRINTF("error registering control channel\n");
 55.1617 +		free(s->driver_data);
 55.1618 +		return rc;
 55.1619 +	}
 55.1620 +
 55.1621 +	if (!(rc = remus_bind(s)))
 55.1622 +		rc = switch_mode(driver, mode_backup);
 55.1623 +	else if (rc == -2)
 55.1624 +		rc = switch_mode(driver, mode_primary);
 55.1625 +
 55.1626 +	if (!rc)
 55.1627 +		return 0;
 55.1628 +
 55.1629 +	tdremus_close(driver);
 55.1630 +	return -EIO;
 55.1631 +}
 55.1632 +
 55.1633 +static int tdremus_close(td_driver_t *driver)
 55.1634 +{
 55.1635 +	struct tdremus_state *s = (struct tdremus_state *)driver->data;
 55.1636 +
 55.1637 +	RPRINTF("closing\n");
 55.1638 +
 55.1639 +	if (s->driver_data) {
 55.1640 +		free(s->driver_data);
 55.1641 +		s->driver_data = NULL;
 55.1642 +	}
 55.1643 +	if (s->server_fd.fd >= 0) {
 55.1644 +		close(s->server_fd.fd);
 55.1645 +		s->server_fd.fd = -1;
 55.1646 +	}
 55.1647 +	if (s->stream_fd.fd >= 0)
 55.1648 +		close_stream_fd(s);
 55.1649 +
 55.1650 +	ctl_close(driver);
 55.1651 +
 55.1652 +	return 0;
 55.1653 +}
 55.1654 +
 55.1655 +static int tdremus_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
 55.1656 +{
 55.1657 +	/* we shouldn't have a parent... for now */
 55.1658 +	return -EINVAL;
 55.1659 +}
 55.1660 +
 55.1661 +static int tdremus_validate_parent(td_driver_t *driver,
 55.1662 +				   td_driver_t *pdriver, td_flag_t flags)
 55.1663 +{
 55.1664 +	return 0;
 55.1665 +}
 55.1666 +
 55.1667 +struct tap_disk tapdisk_remus = {
 55.1668 +	.disk_type          = "tapdisk_remus",
 55.1669 +	.private_data_size  = sizeof(struct tdremus_state),
 55.1670 +	.td_open            = tdremus_open,
 55.1671 +	.td_queue_read      = unprotected_queue_read,
 55.1672 +	.td_queue_write     = unprotected_queue_write,
 55.1673 +	.td_close           = tdremus_close,
 55.1674 +	.td_get_parent_id   = tdremus_get_parent_id,
 55.1675 +	.td_validate_parent = tdremus_validate_parent,
 55.1676 +	.td_debug           = NULL,
 55.1677 +};
    56.1 --- a/tools/blktap2/drivers/check_gcrypt	Mon Nov 02 19:35:54 2009 -0800
    56.2 +++ b/tools/blktap2/drivers/check_gcrypt	Fri Mar 19 18:36:57 2010 -0700
    56.3 @@ -4,8 +4,7 @@ cat > .gcrypt.c << EOF
    56.4  #include <gcrypt.h>
    56.5  int main(void) 
    56.6  {
    56.7 -    char dummy[14];
    56.8 -    MD5("DUMMY", 5, dummy);
    56.9 +    gcry_md_hash_buffer(GCRY_MD_MD5, NULL, NULL, 0);
   56.10      return 0; 
   56.11  }
   56.12  EOF
    57.1 --- a/tools/blktap2/drivers/disktypes.h	Mon Nov 02 19:35:54 2009 -0800
    57.2 +++ b/tools/blktap2/drivers/disktypes.h	Fri Mar 19 18:36:57 2010 -0700
    57.3 @@ -49,6 +49,7 @@ extern struct tap_disk tapdisk_ram;
    57.4   extern struct tap_disk tapdisk_qcow; 
    57.5  extern struct tap_disk tapdisk_block_cache;
    57.6  extern struct tap_disk tapdisk_log;
    57.7 +extern struct tap_disk tapdisk_remus;
    57.8  
    57.9  #define MAX_DISK_TYPES        20
   57.10  
   57.11 @@ -61,6 +62,7 @@ extern struct tap_disk tapdisk_log;
   57.12  #define DISK_TYPE_QCOW        6
   57.13  #define DISK_TYPE_BLOCK_CACHE 7
   57.14  #define DISK_TYPE_LOG         9
   57.15 +#define DISK_TYPE_REMUS       10
   57.16  
   57.17  /*Define Individual Disk Parameters here */
   57.18  static disk_info_t null_disk = {
   57.19 @@ -167,6 +169,16 @@ static disk_info_t log_disk = {
   57.20  #endif
   57.21  };
   57.22  
   57.23 +static disk_info_t remus_disk = {
   57.24 +       DISK_TYPE_REMUS,
   57.25 +       "remus disk replicator (remus)",
   57.26 +       "remus",
   57.27 +       0,
   57.28 +#ifdef TAPDISK
   57.29 +       &tapdisk_remus,
   57.30 +#endif
   57.31 +};
   57.32 +
   57.33  /*Main disk info array */
   57.34  static disk_info_t *dtypes[] = {
   57.35         &aio_disk,
   57.36 @@ -179,6 +191,7 @@ static disk_info_t *dtypes[] = {
   57.37         &block_cache_disk,
   57.38         &null_disk,
   57.39         &log_disk,
   57.40 +       &remus_disk,
   57.41  };
   57.42  
   57.43  #endif
    58.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    58.2 +++ b/tools/blktap2/drivers/hashtable.c	Fri Mar 19 18:36:57 2010 -0700
    58.3 @@ -0,0 +1,274 @@
    58.4 +/* Copyright (C) 2004 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */
    58.5 +
    58.6 +#include "hashtable.h"
    58.7 +#include "hashtable_private.h"
    58.8 +#include <stdlib.h>
    58.9 +#include <stdio.h>
   58.10 +#include <string.h>
   58.11 +#include <math.h>
   58.12 +
   58.13 +/*
   58.14 +Credit for primes table: Aaron Krowne
   58.15 + http://br.endernet.org/~akrowne/
   58.16 + http://planetmath.org/encyclopedia/GoodHashTablePrimes.html
   58.17 +*/
   58.18 +static const unsigned int primes[] = {
   58.19 +	53, 97, 193, 389,
   58.20 +	769, 1543, 3079, 6151,
   58.21 +	12289, 24593, 49157, 98317,
   58.22 +	196613, 393241, 786433, 1572869,
   58.23 +	3145739, 6291469, 12582917, 25165843,
   58.24 +	50331653, 100663319, 201326611, 402653189,
   58.25 +	805306457, 1610612741
   58.26 +};
   58.27 +const unsigned int prime_table_length = sizeof(primes)/sizeof(primes[0]);
   58.28 +const float max_load_factor = 0.65;
   58.29 +
   58.30 +/*****************************************************************************/
   58.31 +struct hashtable *
   58.32 +create_hashtable(unsigned int minsize,
   58.33 +                 unsigned int (*hashf) (void*),
   58.34 +                 int (*eqf) (void*,void*))
   58.35 +{
   58.36 +	struct hashtable *h;
   58.37 +	unsigned int pindex, size = primes[0];
   58.38 +	/* Check requested hashtable isn't too large */
   58.39 +	if (minsize > (1u << 30)) return NULL;
   58.40 +	/* Enforce size as prime */
   58.41 +	for (pindex=0; pindex < prime_table_length; pindex++) {
   58.42 +		if (primes[pindex] > minsize) { size = primes[pindex]; break; }
   58.43 +	}
   58.44 +	h = (struct hashtable *)malloc(sizeof(struct hashtable));
   58.45 +	if (NULL == h) return NULL; /*oom*/
   58.46 +	h->table = (struct entry **)malloc(sizeof(struct entry*) * size);
   58.47 +	if (NULL == h->table) { free(h); return NULL; } /*oom*/
   58.48 +	memset(h->table, 0, size * sizeof(struct entry *));
   58.49 +	h->tablelength  = size;
   58.50 +	h->primeindex   = pindex;
   58.51 +	h->entrycount   = 0;
   58.52 +	h->hashfn       = hashf;
   58.53 +	h->eqfn         = eqf;
   58.54 +	h->loadlimit    = (unsigned int) ceil(size * max_load_factor);
   58.55 +	return h;
   58.56 +}
   58.57 +
   58.58 +/*****************************************************************************/
   58.59 +unsigned int
   58.60 +hash(struct hashtable *h, void *k)
   58.61 +{
   58.62 +	/* Aim to protect against poor hash functions by adding logic here
   58.63 +	 * - logic taken from java 1.4 hashtable source */
   58.64 +	unsigned int i = h->hashfn(k);
   58.65 +	i += ~(i << 9);
   58.66 +	i ^=  ((i >> 14) | (i << 18)); /* >>> */
   58.67 +	i +=  (i << 4);
   58.68 +	i ^=  ((i >> 10) | (i << 22)); /* >>> */
   58.69 +	return i;
   58.70 +}
   58.71 +
   58.72 +/*****************************************************************************/
   58.73 +static int
   58.74 +hashtable_expand(struct hashtable *h)
   58.75 +{
   58.76 +	/* Double the size of the table to accomodate more entries */
   58.77 +	struct entry **newtable;
   58.78 +	struct entry *e;
   58.79 +	struct entry **pE;
   58.80 +	unsigned int newsize, i, index;
   58.81 +	/* Check we're not hitting max capacity */
   58.82 +	if (h->primeindex == (prime_table_length - 1)) return 0;
   58.83 +	newsize = primes[++(h->primeindex)];
   58.84 +
   58.85 +	newtable = (struct entry **)malloc(sizeof(struct entry*) * newsize);
   58.86 +	if (NULL != newtable)
   58.87 +	{
   58.88 +		memset(newtable, 0, newsize * sizeof(struct entry *));
   58.89 +		/* This algorithm is not 'stable'. ie. it reverses the list
   58.90 +		 * when it transfers entries between the tables */
   58.91 +		for (i = 0; i < h->tablelength; i++) {
   58.92 +			while (NULL != (e = h->table[i])) {
   58.93 +				h->table[i] = e->next;
   58.94 +				index = indexFor(newsize,e->h);
   58.95 +				e->next = newtable[index];
   58.96 +				newtable[index] = e;
   58.97 +			}
   58.98 +		}
   58.99 +		free(h->table);
  58.100 +		h->table = newtable;
  58.101 +	}
  58.102 +	/* Plan B: realloc instead */
  58.103 +	else
  58.104 +	{
  58.105 +		newtable = (struct entry **)
  58.106 +			realloc(h->table, newsize * sizeof(struct entry *));
  58.107 +		if (NULL == newtable) { (h->primeindex)--; return 0; }
  58.108 +		h->table = newtable;
  58.109 +		memset(newtable[h->tablelength], 0, newsize - h->tablelength);
  58.110 +		for (i = 0; i < h->tablelength; i++) {
  58.111 +			for (pE = &(newtable[i]), e = *pE; e != NULL; e = *pE) {
  58.112 +				index = indexFor(newsize,e->h);
  58.113 +				if (index == i)
  58.114 +				{
  58.115 +					pE = &(e->next);
  58.116 +				}
  58.117 +				else
  58.118 +				{
  58.119 +					*pE = e->next;
  58.120 +					e->next = newtable[index];
  58.121 +					newtable[index] = e;
  58.122 +				}
  58.123 +			}
  58.124 +		}
  58.125 +	}
  58.126 +	h->tablelength = newsize;
  58.127 +	h->loadlimit   = (unsigned int) ceil(newsize * max_load_factor);
  58.128 +	return -1;
  58.129 +}
  58.130 +
  58.131 +/*****************************************************************************/
  58.132 +unsigned int
  58.133 +hashtable_count(struct hashtable *h)
  58.134 +{
  58.135 +	return h->entrycount;
  58.136 +}
  58.137 +
  58.138 +/*****************************************************************************/
  58.139 +int
  58.140 +hashtable_insert(struct hashtable *h, void *k, void *v)
  58.141 +{
  58.142 +	/* This method allows duplicate keys - but they shouldn't be used */
  58.143 +	unsigned int index;
  58.144 +	struct entry *e;
  58.145 +	if (++(h->entrycount) > h->loadlimit)
  58.146 +	{
  58.147 +		/* Ignore the return value. If expand fails, we should
  58.148 +		 * still try cramming just this value into the existing table
  58.149 +		 * -- we may not have memory for a larger table, but one more
  58.150 +		 * element may be ok. Next time we insert, we'll try expanding again.*/
  58.151 +		hashtable_expand(h);
  58.152 +	}
  58.153 +	e = (struct entry *)malloc(sizeof(struct entry));
  58.154 +	if (NULL == e) { --(h->entrycount); return 0; } /*oom*/
  58.155 +	e->h = hash(h,k);
  58.156 +	index = indexFor(h->tablelength,e->h);
  58.157 +	e->k = k;
  58.158 +	e->v = v;
  58.159 +	e->next = h->table[index];
  58.160 +	h->table[index] = e;
  58.161 +	return -1;
  58.162 +}
  58.163 +
  58.164 +/*****************************************************************************/
  58.165 +void * /* returns value associated with key */
  58.166 +hashtable_search(struct hashtable *h, void *k)
  58.167 +{
  58.168 +	struct entry *e;
  58.169 +	unsigned int hashvalue, index;
  58.170 +	hashvalue = hash(h,k);
  58.171 +	index = indexFor(h->tablelength,hashvalue);
  58.172 +	e = h->table[index];
  58.173 +	while (NULL != e)
  58.174 +	{
  58.175 +		/* Check hash value to short circuit heavier comparison */
  58.176 +		if ((hashvalue == e->h) && (h->eqfn(k, e->k))) return e->v;
  58.177 +		e = e->next;
  58.178 +	}
  58.179 +	return NULL;
  58.180 +}
  58.181 +
  58.182 +/*****************************************************************************/
  58.183 +void * /* returns value associated with key */
  58.184 +hashtable_remove(struct hashtable *h, void *k)
  58.185 +{
  58.186 +	/* TODO: consider compacting the table when the load factor drops enough,
  58.187 +	 *       or provide a 'compact' method. */
  58.188 +
  58.189 +	struct entry *e;
  58.190 +	struct entry **pE;
  58.191 +	void *v;
  58.192 +	unsigned int hashvalue, index;
  58.193 +
  58.194 +	hashvalue = hash(h,k);
  58.195 +	index = indexFor(h->tablelength,hash(h,k));
  58.196 +	pE = &(h->table[index]);
  58.197 +	e = *pE;
  58.198 +	while (NULL != e)
  58.199 +	{
  58.200 +		/* Check hash value to short circuit heavier comparison */
  58.201 +		if ((hashvalue == e->h) && (h->eqfn(k, e->k)))
  58.202 +		{
  58.203 +			*pE = e->next;
  58.204 +			h->entrycount--;
  58.205 +			v = e->v;
  58.206 +			freekey(e->k);
  58.207 +			free(e);
  58.208 +			return v;
  58.209 +		}
  58.210 +		pE = &(e->next);
  58.211 +		e = e->next;
  58.212 +	}
  58.213 +	return NULL;
  58.214 +}
  58.215 +
  58.216 +/*****************************************************************************/
  58.217 +/* destroy */
  58.218 +void
  58.219 +hashtable_destroy(struct hashtable *h, int free_values)
  58.220 +{
  58.221 +	unsigned int i;
  58.222 +	struct entry *e, *f;
  58.223 +	struct entry **table = h->table;
  58.224 +	if (free_values)
  58.225 +	{
  58.226 +		for (i = 0; i < h->tablelength; i++)
  58.227 +		{
  58.228 +			e = table[i];
  58.229 +			while (NULL != e)
  58.230 +			{ f = e; e = e->next; freekey(f->k); free(f->v); free(f); }
  58.231 +		}
  58.232 +	}
  58.233 +	else
  58.234 +	{
  58.235 +		for (i = 0; i < h->tablelength; i++)
  58.236 +		{
  58.237 +			e = table[i];
  58.238 +			while (NULL != e)
  58.239 +			{ f = e; e = e->next; freekey(f->k); free(f); }
  58.240 +		}
  58.241 +	}
  58.242 +	free(h->table);
  58.243 +	free(h);
  58.244 +}
  58.245 +
  58.246 +/*
  58.247 + * Copyright (c) 2002, Christopher Clark
  58.248 + * All rights reserved.
  58.249 + *
  58.250 + * Redistribution and use in source and binary forms, with or without
  58.251 + * modification, are permitted provided that the following conditions
  58.252 + * are met:
  58.253 + *
  58.254 + * * Redistributions of source code must retain the above copyright
  58.255 + * notice, this list of conditions and the following disclaimer.
  58.256 + *
  58.257 + * * Redistributions in binary form must reproduce the above copyright
  58.258 + * notice, this list of conditions and the following disclaimer in the
  58.259 + * documentation and/or other materials provided with the distribution.
  58.260 + *
  58.261 + * * Neither the name of the original author; nor the names of any contributors
  58.262 + * may be used to endorse or promote products derived from this software
  58.263 + * without specific prior written permission.
  58.264 + *
  58.265 + *
  58.266 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  58.267 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  58.268 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  58.269 + * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
  58.270 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  58.271 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  58.272 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  58.273 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  58.274 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  58.275 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  58.276 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  58.277 + */
    59.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    59.2 +++ b/tools/blktap2/drivers/hashtable_itr.c	Fri Mar 19 18:36:57 2010 -0700
    59.3 @@ -0,0 +1,195 @@
    59.4 +/* Copyright (C) 2002, 2004 Christopher Clark  <firstname.lastname@cl.cam.ac.uk> */
    59.5 +
    59.6 +#include "hashtable.h"
    59.7 +#include "hashtable_private.h"
    59.8 +#include "hashtable_itr.h"
    59.9 +#include <stdlib.h> /* defines NULL */
   59.10 +
   59.11 +struct hashtable_itr {
   59.12 +	struct hashtable *h;
   59.13 +	struct entry *e;
   59.14 +	struct entry *parent;
   59.15 +	unsigned int index;
   59.16 +};
   59.17 +
   59.18 +/*****************************************************************************/
   59.19 +/* hashtable_iterator    - iterator constructor */
   59.20 +
   59.21 +struct hashtable_itr *
   59.22 +hashtable_iterator(struct hashtable *h)
   59.23 +{
   59.24 +	unsigned int i, tablelength;
   59.25 +	struct hashtable_itr *itr = (struct hashtable_itr *)
   59.26 +		malloc(sizeof(struct hashtable_itr));
   59.27 +	if (NULL == itr) return NULL;
   59.28 +	itr->h = h;
   59.29 +	itr->e = NULL;
   59.30 +	itr->parent = NULL;
   59.31 +	tablelength = h->tablelength;
   59.32 +	itr->index = tablelength;
   59.33 +	if (0 == h->entrycount) return itr;
   59.34 +
   59.35 +	for (i = 0; i < tablelength; i++)
   59.36 +	{
   59.37 +		if (NULL != h->table[i])
   59.38 +		{
   59.39 +			itr->e = h->table[i];
   59.40 +			itr->index = i;
   59.41 +			break;
   59.42 +		}
   59.43 +	}
   59.44 +	return itr;
   59.45 +}
   59.46 +
   59.47 +/*****************************************************************************/
   59.48 +/* key      - return the key of the (key,value) pair at the current position */
   59.49 +/* value    - return the value of the (key,value) pair at the current position */
   59.50 +
   59.51 +void *
   59.52 +hashtable_iterator_key(struct hashtable_itr *i)
   59.53 +{ return i->e->k; }
   59.54 +
   59.55 +void *
   59.56 +hashtable_iterator_value(struct hashtable_itr *i)
   59.57 +{ return i->e->v; }
   59.58 +
   59.59 +/*****************************************************************************/
   59.60 +/* advance - advance the iterator to the next element
   59.61 + *           returns zero if advanced to end of table */
   59.62 +
   59.63 +int
   59.64 +hashtable_iterator_advance(struct hashtable_itr *itr)
   59.65 +{
   59.66 +	unsigned int j,tablelength;
   59.67 +	struct entry **table;
   59.68 +	struct entry *next;
   59.69 +	if (NULL == itr->e) return 0; /* stupidity check */
   59.70 +
   59.71 +	next = itr->e->next;
   59.72 +	if (NULL != next)
   59.73 +	{
   59.74 +		itr->parent = itr->e;
   59.75 +		itr->e = next;
   59.76 +		return -1;
   59.77 +	}
   59.78 +	tablelength = itr->h->tablelength;
   59.79 +	itr->parent = NULL;
   59.80 +	if (tablelength <= (j = ++(itr->index)))
   59.81 +	{
   59.82 +		itr->e = NULL;
   59.83 +		return 0;
   59.84 +	}
   59.85 +	table = itr->h->table;
   59.86 +	while (NULL == (next = table[j]))
   59.87 +	{
   59.88 +		if (++j >= tablelength)
   59.89 +		{
   59.90 +			itr->index = tablelength;
   59.91 +			itr->e = NULL;
   59.92 +			return 0;
   59.93 +		}
   59.94 +	}
   59.95 +	itr->index = j;
   59.96 +	itr->e = next;
   59.97 +	return -1;
   59.98 +}
   59.99 +
  59.100 +/*****************************************************************************/
  59.101 +/* remove - remove the entry at the current iterator position
  59.102 + *          and advance the iterator, if there is a successive
  59.103 + *          element.
  59.104 + *          If you want the value, read it before you remove:
  59.105 + *          beware memory leaks if you don't.
  59.106 + *          Returns zero if end of iteration. */
  59.107 +
  59.108 +int
  59.109 +hashtable_iterator_remove(struct hashtable_itr *itr)
  59.110 +{
  59.111 +	struct entry *remember_e, *remember_parent;
  59.112 +	int ret;
  59.113 +
  59.114 +	/* Do the removal */
  59.115 +	if (NULL == (itr->parent))
  59.116 +	{
  59.117 +		/* element is head of a chain */
  59.118 +		itr->h->table[itr->index] = itr->e->next;
  59.119 +	} else {
  59.120 +		/* element is mid-chain */
  59.121 +		itr->parent->next = itr->e->next;
  59.122 +	}
  59.123 +	/* itr->e is now outside the hashtable */
  59.124 +	remember_e = itr->e;
  59.125 +	itr->h->entrycount--;
  59.126 +	freekey(remember_e->k);
  59.127 +
  59.128 +	/* Advance the iterator, correcting the parent */
  59.129 +	remember_parent = itr->parent;
  59.130 +	ret = hashtable_iterator_advance(itr);
  59.131 +	if (itr->parent == remember_e) { itr->parent = remember_parent; }
  59.132 +	free(remember_e);
  59.133 +	return ret;
  59.134 +}
  59.135 +
  59.136 +/*****************************************************************************/
  59.137 +int /* returns zero if not found */
  59.138 +hashtable_iterator_search(struct hashtable_itr *itr,
  59.139 +                          struct hashtable *h, void *k)
  59.140 +{
  59.141 +	struct entry *e, *parent;
  59.142 +	unsigned int hashvalue, index;
  59.143 +
  59.144 +	hashvalue = hash(h,k);
  59.145 +	index = indexFor(h->tablelength,hashvalue);
  59.146 +
  59.147 +	e = h->table[index];
  59.148 +	parent = NULL;
  59.149 +	while (NULL != e)
  59.150 +	{
  59.151 +		/* Check hash value to short circuit heavier comparison */
  59.152 +		if ((hashvalue == e->h) && (h->eqfn(k, e->k)))
  59.153 +		{
  59.154 +			itr->index = index;
  59.155 +			itr->e = e;
  59.156 +			itr->parent = parent;
  59.157 +			itr->h = h;
  59.158 +			return -1;
  59.159 +		}
  59.160 +		parent = e;
  59.161 +		e = e->next;
  59.162 +	}
  59.163 +	return 0;
  59.164 +}
  59.165 +
  59.166 +
  59.167 +/*
  59.168 + * Copyright (c) 2002, 2004, Christopher Clark
  59.169 + * All rights reserved.
  59.170 + *
  59.171 + * Redistribution and use in source and binary forms, with or without
  59.172 + * modification, are permitted provided that the following conditions
  59.173 + * are met:
  59.174 + *
  59.175 + * * Redistributions of source code must retain the above copyright
  59.176 + * notice, this list of conditions and the following disclaimer.
  59.177 + *
  59.178 + * * Redistributions in binary form must reproduce the above copyright
  59.179 + * notice, this list of conditions and the following disclaimer in the
  59.180 + * documentation and/or other materials provided with the distribution.
  59.181 + *
  59.182 + * * Neither the name of the original author; nor the names of any contributors
  59.183 + * may be used to endorse or promote products derived from this software
  59.184 + * without specific prior written permission.
  59.185 + *
  59.186 + *
  59.187 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  59.188 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  59.189 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  59.190 + * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
  59.191 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  59.192 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  59.193 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  59.194 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  59.195 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  59.196 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  59.197 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  59.198 + */
    60.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    60.2 +++ b/tools/blktap2/drivers/hashtable_itr.h	Fri Mar 19 18:36:57 2010 -0700
    60.3 @@ -0,0 +1,96 @@
    60.4 +/* Copyright (C) 2002, 2004 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */
    60.5 +
    60.6 +#ifndef __HASHTABLE_ITR_CWC22__
    60.7 +#define __HASHTABLE_ITR_CWC22__
    60.8 +#include "hashtable.h"
    60.9 +#include "hashtable_private.h" /* needed to enable inlining */
   60.10 +
   60.11 +struct hashtable_itr;
   60.12 +
   60.13 +/*****************************************************************************/
   60.14 +/* hashtable_iterator
   60.15 + */
   60.16 +
   60.17 +struct hashtable_itr *
   60.18 +hashtable_iterator(struct hashtable *h);
   60.19 +
   60.20 +/*****************************************************************************/
   60.21 +/* hashtable_iterator_key
   60.22 + * - return the value of the (key,value) pair at the current position */
   60.23 +
   60.24 +void *
   60.25 +hashtable_iterator_key(struct hashtable_itr *i);
   60.26 +
   60.27 +/*****************************************************************************/
   60.28 +/* value - return the value of the (key,value) pair at the current position */
   60.29 +
   60.30 +void *
   60.31 +hashtable_iterator_value(struct hashtable_itr *i);
   60.32 +
   60.33 +/*****************************************************************************/
   60.34 +/* advance - advance the iterator to the next element
   60.35 + *           returns zero if advanced to end of table */
   60.36 +
   60.37 +int
   60.38 +hashtable_iterator_advance(struct hashtable_itr *itr);
   60.39 +
   60.40 +/*****************************************************************************/
   60.41 +/* remove - remove current element and advance the iterator to the next element
   60.42 + *          NB: if you need the value to free it, read it before
   60.43 + *          removing. ie: beware memory leaks!
   60.44 + *          returns zero if advanced to end of table */
   60.45 +
   60.46 +int
   60.47 +hashtable_iterator_remove(struct hashtable_itr *itr);
   60.48 +
   60.49 +/*****************************************************************************/
   60.50 +/* search - overwrite the supplied iterator, to point to the entry
   60.51 + *          matching the supplied key.
   60.52 +            h points to the hashtable to be searched.
   60.53 + *          returns zero if not found. */
   60.54 +int
   60.55 +hashtable_iterator_search(struct hashtable_itr *itr,
   60.56 +                          struct hashtable *h, void *k);
   60.57 +
   60.58 +#define DEFINE_HASHTABLE_ITERATOR_SEARCH(fnname, keytype) \
   60.59 +int fnname (struct hashtable_itr *i, struct hashtable *h, keytype *k) \
   60.60 +{ \
   60.61 +    return (hashtable_iterator_search(i,h,k)); \
   60.62 +}
   60.63 +
   60.64 +
   60.65 +
   60.66 +#endif /* __HASHTABLE_ITR_CWC22__*/
   60.67 +
   60.68 +/*
   60.69 + * Copyright (c) 2002, 2004, Christopher Clark
   60.70 + * All rights reserved.
   60.71 + *
   60.72 + * Redistribution and use in source and binary forms, with or without
   60.73 + * modification, are permitted provided that the following conditions
   60.74 + * are met:
   60.75 + *
   60.76 + * * Redistributions of source code must retain the above copyright
   60.77 + * notice, this list of conditions and the following disclaimer.
   60.78 + *
   60.79 + * * Redistributions in binary form must reproduce the above copyright
   60.80 + * notice, this list of conditions and the following disclaimer in the
   60.81 + * documentation and/or other materials provided with the distribution.
   60.82 + *
   60.83 + * * Neither the name of the original author; nor the names of any contributors
   60.84 + * may be used to endorse or promote products derived from this software
   60.85 + * without specific prior written permission.
   60.86 + *
   60.87 + *
   60.88 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   60.89 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   60.90 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   60.91 + * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
   60.92 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   60.93 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   60.94 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   60.95 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   60.96 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   60.97 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   60.98 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   60.99 + */
    61.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    61.2 +++ b/tools/blktap2/drivers/hashtable_utility.c	Fri Mar 19 18:36:57 2010 -0700
    61.3 @@ -0,0 +1,71 @@
    61.4 +/* Copyright (C) 2002 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */
    61.5 +
    61.6 +#include "hashtable.h"
    61.7 +#include "hashtable_private.h"
    61.8 +#include "hashtable_utility.h"
    61.9 +#include <stdlib.h>
   61.10 +#include <stdio.h>
   61.11 +#include <string.h>
   61.12 +
   61.13 +/*****************************************************************************/
   61.14 +/* hashtable_change
   61.15 + *
   61.16 + * function to change the value associated with a key, where there already
   61.17 + * exists a value bound to the key in the hashtable.
   61.18 + * Source due to Holger Schemel.
   61.19 + *
   61.20 + *  */
   61.21 +int
   61.22 +hashtable_change(struct hashtable *h, void *k, void *v)
   61.23 +{
   61.24 +	struct entry *e;
   61.25 +	unsigned int hashvalue, index;
   61.26 +	hashvalue = hash(h,k);
   61.27 +	index = indexFor(h->tablelength,hashvalue);
   61.28 +	e = h->table[index];
   61.29 +	while (NULL != e)
   61.30 +	{
   61.31 +		/* Check hash value to short circuit heavier comparison */
   61.32 +		if ((hashvalue == e->h) && (h->eqfn(k, e->k)))
   61.33 +		{
   61.34 +			free(e->v);
   61.35 +			e->v = v;
   61.36 +			return -1;
   61.37 +		}
   61.38 +		e = e->next;
   61.39 +	}
   61.40 +	return 0;
   61.41 +}
   61.42 +
   61.43 +/*
   61.44 + * Copyright (c) 2002, Christopher Clark
   61.45 + * All rights reserved.
   61.46 + *
   61.47 + * Redistribution and use in source and binary forms, with or without
   61.48 + * modification, are permitted provided that the following conditions
   61.49 + * are met:
   61.50 + *
   61.51 + * * Redistributions of source code must retain the above copyright
   61.52 + * notice, this list of conditions and the following disclaimer.
   61.53 + *
   61.54 + * * Redistributions in binary form must reproduce the above copyright
   61.55 + * notice, this list of conditions and the following disclaimer in the
   61.56 + * documentation and/or other materials provided with the distribution.
   61.57 + *
   61.58 + * * Neither the name of the original author; nor the names of any contributors
   61.59 + * may be used to endorse or promote products derived from this software
   61.60 + * without specific prior written permission.
   61.61 + *
   61.62 + *
   61.63 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   61.64 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   61.65 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   61.66 + * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
   61.67 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   61.68 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   61.69 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   61.70 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   61.71 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   61.72 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   61.73 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   61.74 + */
    62.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    62.2 +++ b/tools/blktap2/drivers/hashtable_utility.h	Fri Mar 19 18:36:57 2010 -0700
    62.3 @@ -0,0 +1,55 @@
    62.4 +/* Copyright (C) 2002 Christopher Clark <firstname.lastname@cl.cam.ac.uk> */
    62.5 +
    62.6 +#ifndef __HASHTABLE_CWC22_UTILITY_H__
    62.7 +#define __HASHTABLE_CWC22_UTILITY_H__
    62.8 +
    62.9 +/*****************************************************************************
   62.10 + * hashtable_change
   62.11 + *
   62.12 + * function to change the value associated with a key, where there already
   62.13 + * exists a value bound to the key in the hashtable.
   62.14 + * Source due to Holger Schemel.
   62.15 + *
   62.16 + * @name        hashtable_change
   62.17 + * @param   h   the hashtable
   62.18 + * @param       key
   62.19 + * @param       value
   62.20 + *
   62.21 + */
   62.22 +int
   62.23 +hashtable_change(struct hashtable *h, void *k, void *v);
   62.24 +
   62.25 +#endif /* __HASHTABLE_CWC22_H__ */
   62.26 +
   62.27 +/*
   62.28 + * Copyright (c) 2002, Christopher Clark
   62.29 + * All rights reserved.
   62.30 + *
   62.31 + * Redistribution and use in source and binary forms, with or without
   62.32 + * modification, are permitted provided that the following conditions
   62.33 + * are met:
   62.34 + *
   62.35 + * * Redistributions of source code must retain the above copyright
   62.36 + * notice, this list of conditions and the following disclaimer.
   62.37 + *
   62.38 + * * Redistributions in binary form must reproduce the above copyright
   62.39 + * notice, this list of conditions and the following disclaimer in the
   62.40 + * documentation and/or other materials provided with the distribution.
   62.41 + *
   62.42 + * * Neither the name of the original author; nor the names of any contributors
   62.43 + * may be used to endorse or promote products derived from this software
   62.44 + * without specific prior written permission.
   62.45 + *
   62.46 + *
   62.47 + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
   62.48 + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
   62.49 + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
   62.50 + * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER
   62.51 + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
   62.52 + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
   62.53 + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
   62.54 + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
   62.55 + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
   62.56 + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
   62.57 + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
   62.58 +*/
    63.1 --- a/tools/blktap2/drivers/io-optimize.c	Mon Nov 02 19:35:54 2009 -0800
    63.2 +++ b/tools/blktap2/drivers/io-optimize.c	Fri Mar 19 18:36:57 2010 -0700
    63.3 @@ -51,9 +51,16 @@ void
    63.4  opio_free(struct opioctx *ctx)
    63.5  {
    63.6  	free(ctx->opios);
    63.7 +	ctx->opios = NULL;
    63.8 +
    63.9  	free(ctx->free_opios);
   63.10 +	ctx->free_opios = NULL;
   63.11 +
   63.12  	free(ctx->iocb_queue);
   63.13 +	ctx->iocb_queue = NULL;
   63.14 +
   63.15  	free(ctx->event_queue);
   63.16 +	ctx->event_queue = NULL;
   63.17  }
   63.18  
   63.19  int
    64.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
    64.2 +++ b/tools/blktap2/drivers/libaio-compat.h	Fri Mar 19 18:36:57 2010 -0700
    64.3 @@ -0,0 +1,92 @@
    64.4 +/*
    64.5 + * Copyright (c) 2010, XenSource Inc.
    64.6 + * All rights reserved.
    64.7 + *
    64.8 + * This  library is  free  software; you  can  redistribute it  and/or
    64.9 + * modify it under the terms  of the GNU Lesser General Public License
   64.10 + * as published by  the Free Software Foundation; either  version 2 of
   64.11 + * the License, or (at your option) any later version.
   64.12 + *
   64.13 + * This library is distributed in the hope that it will be useful, but
   64.14 + * WITHOUT  ANY  WARRANTY;  without   even  the  implied  warranty  of
   64.15 + * MERCHANTABILITY or  FITNESS FOR A PARTICULAR PURPOSE.   See the GNU
   64.16 + * Lesser General Public License for more details.
   64.17 + *
   64.18 + * You should  have received a copy  of the GNU  Lesser General Public
   64.19 + * License along with this library; if not, write to the Free Software
   64.20 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
   64.21 + * USA
   64.22 + */
   64.23 +
   64.24 +/*
   64.25 + * kernel 2.6.21 added eventfd(2) support, kernel 2.6.22 eventfds for
   64.26 + * aio. libaio 0.3.107 updated the header file, but few systems have
   64.27 + * it. define a custom iocb_common struct instead, and work around a
   64.28 + * potentially missing sys/eventfd.h. this header should vanish over
   64.29 + * time.
   64.30 + */
   64.31 +
   64.32 +#ifndef __LIBAIO_COMPAT
   64.33 +#define __LIBAIO_COMPAT
   64.34 +
   64.35 +#include <libaio.h>
   64.36 +#include <unistd.h>
   64.37 +#include <sys/syscall.h>
   64.38 +
   64.39 +struct __compat_io_iocb_common {
   64.40 +	char             __pad_buf[8];
   64.41 +	char             __pad_nbytes[8];
   64.42 +	long long	offset;
   64.43 +	long long	__pad3;
   64.44 +	unsigned	flags;
   64.45 +	unsigned	resfd;
   64.46 +};
   64.47 +
   64.48 +static inline void __io_set_eventfd(struct iocb *iocb, int eventfd)
   64.49 +{
   64.50 +	struct __compat_io_iocb_common *c;
   64.51 +	c = (struct __compat_io_iocb_common*)&iocb->u.c;
   64.52 +	c->flags |= (1 << 0);
   64.53 +	c->resfd = eventfd;
   64.54 +}
   64.55 +
   64.56 +#ifndef SYS_eventfd
   64.57 +#ifndef __NR_eventfd
   64.58 +# if defined(__alpha__)
   64.59 +#  define __NR_eventfd		478
   64.60 +# elif defined(__arm__)
   64.61 +#  define __NR_eventfd		(__NR_SYSCALL_BASE+351)
   64.62 +# elif defined(__ia64__)
   64.63 +#  define __NR_eventfd		1309
   64.64 +# elif defined(__i386__)
   64.65 +#  define __NR_eventfd		323
   64.66 +# elif defined(__m68k__)
   64.67 +#  define __NR_eventfd		319
   64.68 +# elif 0 && defined(__mips__)
   64.69 +#  error __NR_eventfd?
   64.70 +#  define __NR_eventfd		(__NR_Linux + 319)
   64.71 +#  define __NR_eventfd		(__NR_Linux + 278)
   64.72 +#  define __NR_eventfd		(__NR_Linux + 282)
   64.73 +# elif defined(__hppa__)
   64.74 +#  define __NR_eventfd		(__NR_Linux + 304)
   64.75 +# elif defined(__PPC__) || defined(__powerpc64__)
   64.76 +#  define __NR_eventfd		307
   64.77 +# elif defined(__s390__) || defined(__s390x__)
   64.78 +#  define __NR_eventfd		318
   64.79 +# elif defined(__sparc__)
   64.80 +#  define __NR_eventfd		313
   64.81 +# elif defined(__x86_64__)
   64.82 +#  define __NR_eventfd		284
   64.83 +# endif
   64.84 +#else
   64.85 +# error __NR_eventfd?
   64.86 +#endif
   64.87 +#define SYS_eventfd __NR_eventfd
   64.88 +#endif
   64.89 +
   64.90 +static inline int tapdisk_sys_eventfd(int initval)
   64.91 +{
   64.92 +	return syscall(SYS_eventfd, initval, 0);
   64.93 +}
   64.94 +
   64.95 +#endif /* __LIBAIO_COMPAT */
    65.1 --- a/tools/blktap2/drivers/qcow2raw.c	Mon Nov 02 19:35:54 2009 -0800
    65.2 +++ b/tools/blktap2/drivers/qcow2raw.c	Fri Mar 19 18:36:57 2010 -0700
    65.3 @@ -101,12 +101,6 @@ static void print_bytes(void *ptr, int l
    65.4      return;
    65.5  }
    65.6  
    65.7 -void
    65.8 -queue_event(event_id_t id, char mode, void *private)
    65.9 -{
   65.10 -  tapdisk_complete_tiocbs(&server.aio_queue);
   65.11 -}
   65.12 -
   65.13  static void debug_output(uint64_t progress, uint64_t size)
   65.14  {
   65.15          //Output progress every PROGRESS_QUANT 
    66.1 --- a/tools/blktap2/drivers/tapdisk-image.c	Mon Nov 02 19:35:54 2009 -0800
    66.2 +++ b/tools/blktap2/drivers/tapdisk-image.c	Fri Mar 19 18:36:57 2010 -0700
    66.3 @@ -28,6 +28,9 @@
    66.4  #include <errno.h>
    66.5  #include <unistd.h>
    66.6  #include <stdlib.h>
    66.7 +#ifdef MEMSHR
    66.8 +#include <memshr.h>
    66.9 +#endif
   66.10  
   66.11  #include "tapdisk-image.h"
   66.12  #include "tapdisk-driver.h"
   66.13 @@ -52,10 +55,13 @@ tapdisk_image_allocate(char *file, int t
   66.14  		return NULL;
   66.15  	}
   66.16  
   66.17 -	image->type    = type;
   66.18 -	image->flags   = flags;
   66.19 -	image->storage = storage;
   66.20 -	image->private = private;
   66.21 +	image->type      = type;
   66.22 +	image->flags     = flags;
   66.23 +	image->storage   = storage;
   66.24 +	image->private   = private;
   66.25 +#ifdef MEMSHR
   66.26 +	image->memshr_id = memshr_vbd_image_get(file);
   66.27 +#endif
   66.28  	INIT_LIST_HEAD(&image->next);
   66.29  
   66.30  	return image;
   66.31 @@ -69,6 +75,9 @@ tapdisk_image_free(td_image_t *image)
   66.32  
   66.33  	list_del(&image->next);
   66.34  
   66.35 +#ifdef MEMSHR
   66.36 +	memshr_vbd_image_put(image->memshr_id);
   66.37 +#endif
   66.38  	free(image->name);
   66.39  	tapdisk_driver_free(image->driver);
   66.40  	free(image);
    67.1 --- a/tools/blktap2/drivers/tapdisk-image.h	Mon Nov 02 19:35:54 2009 -0800
    67.2 +++ b/tools/blktap2/drivers/tapdisk-image.h	Fri Mar 19 18:36:57 2010 -0700
    67.3 @@ -34,6 +34,7 @@
    67.4  struct td_image_handle {
    67.5  	int                          type;
    67.6  	char                        *name;
    67.7 +    uint16_t                     memshr_id;
    67.8  
    67.9  	td_flag_t                    flags;
   67.10  	int                          storage;
    68.1 --- a/tools/blktap2/drivers/tapdisk-ipc.c	Mon Nov 02 19:35:54 2009 -0800
    68.2 +++ b/tools/blktap2/drivers/tapdisk-ipc.c	Fri Mar 19 18:36:57 2010 -0700
    68.3 @@ -30,12 +30,86 @@
    68.4  #include <stdlib.h>
    68.5  #include <unistd.h>
    68.6  #include <string.h>
    68.7 +#include <fcntl.h>
    68.8  
    68.9  #include "tapdisk.h"
   68.10  #include "tapdisk-ipc.h"
   68.11  #include "tapdisk-vbd.h"
   68.12  #include "tapdisk-server.h"
   68.13  
   68.14 +static void
   68.15 +tapdisk_ipc_read_event(event_id_t id, char mode, void *private)
   68.16 +{
   68.17 +	td_ipc_t *ipc = private;
   68.18 +	tapdisk_ipc_read(ipc);
   68.19 +}
   68.20 +
   68.21 +static void
   68.22 +__tapdisk_ipc_init(td_ipc_t *ipc)
   68.23 +{
   68.24 +	ipc->rfd = -1;
   68.25 +	ipc->wfd = -1;
   68.26 +	ipc->rfd_event = -1;
   68.27 +}
   68.28 +
   68.29 +int
   68.30 +tapdisk_ipc_open(td_ipc_t *ipc, const char *read, const char *write)
   68.31 +{
   68.32 +	int err;
   68.33 +
   68.34 +	memset(ipc, 0, sizeof(td_ipc_t));
   68.35 +	__tapdisk_ipc_init(ipc);
   68.36 +
   68.37 +	if (read) {
   68.38 +		ipc->rfd = open(read, O_RDWR | O_NONBLOCK);
   68.39 +		if (ipc->rfd < 0) {
   68.40 +			err = -errno;
   68.41 +			EPRINTF("FD open failed %s: %d\n", read, err);
   68.42 +			goto fail;
   68.43 +		}
   68.44 +
   68.45 +		ipc->rfd_event = 
   68.46 +			tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
   68.47 +						      ipc->rfd, 0,
   68.48 +						      tapdisk_ipc_read_event,
   68.49 +						      ipc);
   68.50 +		if (ipc->rfd_event < 0) {
   68.51 +			err = ipc->rfd_event;
   68.52 +			goto fail;
   68.53 +		}
   68.54 +	}
   68.55 +
   68.56 +	if (write) {
   68.57 +		ipc->wfd = open(write, O_RDWR | O_NONBLOCK);
   68.58 +		if (ipc->wfd < 0) {
   68.59 +			err = -errno;
   68.60 +			EPRINTF("FD open failed %s, %d\n", write, err);
   68.61 +			goto fail;
   68.62 +		}
   68.63 +	}
   68.64 +
   68.65 +	return 0;
   68.66 +
   68.67 +fail:
   68.68 +	tapdisk_ipc_close(ipc);
   68.69 +	return err;
   68.70 +}
   68.71 +
   68.72 +void
   68.73 +tapdisk_ipc_close(td_ipc_t *ipc)
   68.74 +{
   68.75 +	if (ipc->rfd > 0)
   68.76 +		close(ipc->rfd);
   68.77 +
   68.78 +	if (ipc->wfd > 0)
   68.79 +		close(ipc->wfd);
   68.80 +
   68.81 +	if (ipc->rfd_event >= 0)
   68.82 +		tapdisk_server_unregister_event(ipc->rfd_event);
   68.83 +
   68.84 +	__tapdisk_ipc_init(ipc);
   68.85 +}
   68.86 +
   68.87  static int
   68.88  tapdisk_ipc_write_message(int fd, tapdisk_message_t *message, int timeout)
   68.89  {
    69.1 --- a/tools/blktap2/drivers/tapdisk-ipc.h	Mon Nov 02 19:35:54 2009 -0800
    69.2 +++ b/tools/blktap2/drivers/tapdisk-ipc.h	Fri Mar 19 18:36:57 2010 -0700
    69.3 @@ -29,13 +29,17 @@
    69.4  #define _TAPDISK_IPC_H_
    69.5  
    69.6  #include "tapdisk-message.h"
    69.7 +#include "scheduler.h"
    69.8  
    69.9  typedef struct td_ipc_handle {
   69.10  	int                         rfd;
   69.11  	int                         wfd;
   69.12 +	event_id_t		    rfd_event;
   69.13  	td_uuid_t                   uuid;
   69.14  } td_ipc_t;
   69.15  
   69.16 +int tapdisk_ipc_open(td_ipc_t *ipc, const char *read, const char *write);
   69.17 +void tapdisk_ipc_close(td_ipc_t *ipc);
   69.18  int tapdisk_ipc_read(td_ipc_t *ipc);
   69.19  int tapdisk_ipc_write(td_ipc_t *ipc, int type);
   69.20  int tapdisk_ipc_write_error(td_ipc_t *ipc, const char *message);
    70.1 --- a/tools/blktap2/drivers/tapdisk-queue.c	Mon Nov 02 19:35:54 2009 -0800
    70.2 +++ b/tools/blktap2/drivers/tapdisk-queue.c	Fri Mar 19 18:36:57 2010 -0700
    70.3 @@ -30,11 +30,18 @@
    70.4  #include <stdlib.h>
    70.5  #include <unistd.h>
    70.6  #include <libaio.h>
    70.7 +#ifdef __linux__
    70.8 +#include <linux/version.h>
    70.9 +#endif
   70.10  
   70.11  #include "tapdisk.h"
   70.12  #include "tapdisk-log.h"
   70.13  #include "tapdisk-queue.h"
   70.14  #include "tapdisk-filter.h"
   70.15 +#include "tapdisk-server.h"
   70.16 +#include "tapdisk-utils.h"
   70.17 +
   70.18 +#include "libaio-compat.h"
   70.19  #include "atomicio.h"
   70.20  
   70.21  #define WARN(_f, _a...) tlog_write(TLOG_WARN, _f, ##_a)
   70.22 @@ -46,7 +53,7 @@
   70.23   * so that we can concurrently poll on synchronous and async descriptors.
   70.24   * This is signalled by passing 1 as the io context to io_setup.
   70.25   */
   70.26 -#define REQUEST_ASYNC_FD 1
   70.27 +#define REQUEST_ASYNC_FD ((io_context_t)1)
   70.28  
   70.29  static inline void
   70.30  queue_tiocb(struct tqueue *queue, struct tiocb *tiocb)
   70.31 @@ -140,7 +147,7 @@ cancel_tiocbs(struct tqueue *queue, int 
   70.32  	 * use a private linked list to keep track
   70.33  	 * of the tiocbs we're cancelling. 
   70.34  	 */
   70.35 -	tiocb  = (struct tiocb *)queue->iocbs[0]->data;
   70.36 +	tiocb  = queue->iocbs[0]->data;
   70.37  	queued = queue->queued;
   70.38  	queue->queued = 0;
   70.39  
   70.40 @@ -164,8 +171,40 @@ fail_tiocbs(struct tqueue *queue, int su
   70.41  	return cancel_tiocbs(queue, err);
   70.42  }
   70.43  
   70.44 +/*
   70.45 + * rwio
   70.46 + */
   70.47 +
   70.48 +struct rwio {
   70.49 +	struct io_event *aio_events;
   70.50 +};
   70.51 +
   70.52 +static void
   70.53 +tapdisk_rwio_destroy(struct tqueue *queue)
   70.54 +{
   70.55 +	struct rwio *rwio = queue->tio_data;
   70.56 +
   70.57 +	if (rwio->aio_events) {
   70.58 +		free(rwio->aio_events);
   70.59 +		rwio->aio_events = NULL;
   70.60 +	}
   70.61 +}
   70.62 +
   70.63 +static int
   70.64 +tapdisk_rwio_setup(struct tqueue *queue, int size)
   70.65 +{
   70.66 +	struct rwio *rwio = queue->tio_data;
   70.67 +	int err;
   70.68 +
   70.69 +	rwio->aio_events = calloc(size, sizeof(struct io_event));
   70.70 +	if (!rwio->aio_events)
   70.71 +		return -errno;
   70.72 +
   70.73 +	return 0;
   70.74 +}
   70.75 +
   70.76  static inline ssize_t
   70.77 -iocb_rw(struct iocb *iocb)
   70.78 +tapdisk_rwio_rw(const struct iocb *iocb)
   70.79  {
   70.80  	int fd        = iocb->aio_fildes;
   70.81  	char *buf     = iocb->u.c.buf;
   70.82 @@ -176,7 +215,7 @@ iocb_rw(struct iocb *iocb)
   70.83  
   70.84  	if (lseek(fd, off, SEEK_SET) == (off_t)-1)
   70.85  		return -errno;
   70.86 -	
   70.87 +
   70.88  	if (atomicio(func, fd, buf, size) != size)
   70.89  		return -errno;
   70.90  
   70.91 @@ -184,8 +223,9 @@ iocb_rw(struct iocb *iocb)
   70.92  }
   70.93  
   70.94  static int
   70.95 -io_synchronous_rw(struct tqueue *queue)
   70.96 +tapdisk_rwio_submit(struct tqueue *queue)
   70.97  {
   70.98 +	struct rwio *rwio = queue->tio_data;
   70.99  	int i, merged, split;
  70.100  	struct iocb *iocb;
  70.101  	struct tiocb *tiocb;
  70.102 @@ -200,18 +240,18 @@ io_synchronous_rw(struct tqueue *queue)
  70.103  	queue->queued = 0;
  70.104  
  70.105  	for (i = 0; i < merged; i++) {
  70.106 -		ep      = queue->aio_events + i;
  70.107 +		ep      = rwio->aio_events + i;
  70.108  		iocb    = queue->iocbs[i];
  70.109  		ep->obj = iocb;
  70.110 -		ep->res = iocb_rw(iocb);
  70.111 +		ep->res = tapdisk_rwio_rw(iocb);
  70.112  	}
  70.113  
  70.114 -	split = io_split(&queue->opioctx, queue->aio_events, merged);
  70.115 -	tapdisk_filter_events(queue->filter, queue->aio_events, split);
  70.116 +	split = io_split(&queue->opioctx, rwio->aio_events, merged);
  70.117 +	tapdisk_filter_events(queue->filter, rwio->aio_events, split);
  70.118  
  70.119 -	for (i = split, ep = queue->aio_events; i-- > 0; ep++) {
  70.120 +	for (i = split, ep = rwio->aio_events; i-- > 0; ep++) {
  70.121  		iocb  = ep->obj;
  70.122 -		tiocb = (struct tiocb *)iocb->data;
  70.123 +		tiocb = iocb->data;
  70.124  		complete_tiocb(queue, tiocb, ep->res);
  70.125  	}
  70.126  
  70.127 @@ -220,52 +260,374 @@ io_synchronous_rw(struct tqueue *queue)
  70.128  	return split;
  70.129  }
  70.130  
  70.131 +static const struct tio td_tio_rwio = {
  70.132 +	.name        = "rwio",
  70.133 +	.data_size   = 0,
  70.134 +	.tio_setup   = NULL,
  70.135 +	.tio_destroy = NULL,
  70.136 +	.tio_submit  = tapdisk_rwio_submit
  70.137 +};
  70.138 +
  70.139 +/*
  70.140 + * libaio
  70.141 + */
  70.142 +
  70.143 +struct lio {
  70.144 +	io_context_t     aio_ctx;
  70.145 +	struct io_event *aio_events;
  70.146 +
  70.147 +	int              event_fd;
  70.148 +	int              event_id;
  70.149 +
  70.150 +	int              flags;
  70.151 +};
  70.152 +
  70.153 +#define LIO_FLAG_EVENTFD        (1<<0)
  70.154 +
  70.155 +static int
  70.156 +tapdisk_lio_check_resfd(void)
  70.157 +{
  70.158 +#if defined(__linux__)
  70.159 +	return tapdisk_linux_version() >= KERNEL_VERSION(2, 6, 22);
  70.160 +#else
  70.161 +	return 1;
  70.162 +#endif
  70.163 +}
  70.164 +
  70.165 +static void
  70.166 +tapdisk_lio_destroy_aio(struct tqueue *queue)
  70.167 +{
  70.168 +	struct lio *lio = queue->tio_data;
  70.169 +
  70.170 +	if (lio->event_fd >= 0) {
  70.171 +		close(lio->event_fd);
  70.172 +		lio->event_fd = -1;
  70.173 +	}
  70.174 +
  70.175 +	if (lio->aio_ctx) {
  70.176 +		io_destroy(lio->aio_ctx);
  70.177 +		lio->aio_ctx = 0;
  70.178 +	}
  70.179 +}
  70.180 +
  70.181 +static int
  70.182 +__lio_setup_aio_poll(struct tqueue *queue, int qlen)
  70.183 +{
  70.184 +	struct lio *lio = queue->tio_data;
  70.185 +	int err, fd;
  70.186 +
  70.187 +	lio->aio_ctx = REQUEST_ASYNC_FD;
  70.188 +
  70.189 +	fd = io_setup(qlen, &lio->aio_ctx);
  70.190 +	if (fd < 0) {
  70.191 +		lio->aio_ctx = 0;
  70.192 +		err = -errno;
  70.193 +
  70.194 +		if (err == -EINVAL)
  70.195 +			goto fail_fd;
  70.196 +
  70.197 +		goto fail;
  70.198 +	}
  70.199 +
  70.200 +	lio->event_fd = fd;
  70.201 +
  70.202 +	return 0;
  70.203 +
  70.204 +fail_fd:
  70.205 +	DPRINTF("Couldn't get fd for AIO poll support. This is probably "
  70.206 +		"because your kernel does not have the aio-poll patch "
  70.207 +		"applied.\n");
  70.208 +fail:
  70.209 +	return err;
  70.210 +}
  70.211 +
  70.212 +static int
  70.213 +__lio_setup_aio_eventfd(struct tqueue *queue, int qlen)
  70.214 +{
  70.215 +	struct lio *lio = queue->tio_data;
  70.216 +	int err;
  70.217 +
  70.218 +	err = io_setup(qlen, &lio->aio_ctx);
  70.219 +	if (err < 0) {
  70.220 +		lio->aio_ctx = 0;
  70.221 +		return err;
  70.222 +	}
  70.223 +
  70.224 +	lio->event_fd = tapdisk_sys_eventfd(0);
  70.225 +	if (lio->event_fd < 0)
  70.226 +		return  -errno;
  70.227 +
  70.228 +	lio->flags |= LIO_FLAG_EVENTFD;
  70.229 +
  70.230 +	return 0;
  70.231 +}
  70.232 +
  70.233 +static int
  70.234 +tapdisk_lio_setup_aio(struct tqueue *queue, int qlen)
  70.235 +{
  70.236 +	struct lio *lio = queue->tio_data;
  70.237 +	int err;
  70.238 +
  70.239 +	lio->aio_ctx  =  0;
  70.240 +	lio->event_fd = -1;
  70.241 +
  70.242 +	/*
  70.243 +	 * prefer the mainline eventfd(2) api, if available.
  70.244 +	 * if not, fall back to the poll fd patch.
  70.245 +	 */
  70.246 +
  70.247 +	err = !tapdisk_lio_check_resfd();
  70.248 +	if (!err)
  70.249 +		err = __lio_setup_aio_eventfd(queue, qlen);
  70.250 +	if (err)
  70.251 +		err = __lio_setup_aio_poll(queue, qlen);
  70.252 +
  70.253 +	if (err == -EAGAIN)
  70.254 +		goto fail_rsv;
  70.255 +fail:
  70.256 +	return err;
  70.257 +
  70.258 +fail_rsv:
  70.259 +	DPRINTF("Couldn't setup AIO context. If you are trying to "
  70.260 +		"concurrently use a large number of blktap-based disks, you may "
  70.261 +		"need to increase the system-wide aio request limit. "
  70.262 +		"(e.g. 'echo 1048576 > /proc/sys/fs/aio-max-nr')\n");
  70.263 +	goto fail;
  70.264 +}
  70.265 +
  70.266 +
  70.267 +static void
  70.268 +tapdisk_lio_destroy(struct tqueue *queue)
  70.269 +{
  70.270 +	struct lio *lio = queue->tio_data;
  70.271 +
  70.272 +	if (!lio)
  70.273 +		return;
  70.274 +
  70.275 +	if (lio->event_id >= 0) {
  70.276 +		tapdisk_server_unregister_event(lio->event_id);
  70.277 +		lio->event_id = -1;
  70.278 +	}
  70.279 +
  70.280 +	tapdisk_lio_destroy_aio(queue);
  70.281 +
  70.282 +	if (lio->aio_events) {
  70.283 +		free(lio->aio_events);
  70.284 +		lio->aio_events = NULL;
  70.285 +	}
  70.286 +}
  70.287 +
  70.288 +static void
  70.289 +tapdisk_lio_set_eventfd(struct tqueue *queue, int n, struct iocb **iocbs)
  70.290 +{
  70.291 +	struct lio *lio = queue->tio_data;
  70.292 +	int i;
  70.293 +
  70.294 +	if (lio->flags & LIO_FLAG_EVENTFD)
  70.295 +		for (i = 0; i < n; ++i)
  70.296 +			__io_set_eventfd(iocbs[i], lio->event_fd);
  70.297 +}
  70.298 +
  70.299 +static void
  70.300 +tapdisk_lio_ack_event(struct tqueue *queue)
  70.301 +{
  70.302 +	struct lio *lio = queue->tio_data;
  70.303 +	uint64_t val;
  70.304 +
  70.305 +	if (lio->flags & LIO_FLAG_EVENTFD)
  70.306 +		read(lio->event_fd, &val, sizeof(val));
  70.307 +}
  70.308 +
  70.309 +static void
  70.310 +tapdisk_lio_event(event_id_t id, char mode, void *private)
  70.311 +{
  70.312 +	struct tqueue *queue = private;
  70.313 +	struct lio *lio;
  70.314 +	int i, ret, split;
  70.315 +	struct iocb *iocb;
  70.316 +	struct tiocb *tiocb;
  70.317 +	struct io_event *ep;
  70.318 +
  70.319 +	tapdisk_lio_ack_event(queue);
  70.320 +
  70.321 +	lio   = queue->tio_data;
  70.322 +	ret   = io_getevents(lio->aio_ctx, 0,
  70.323 +			     queue->size, lio->aio_events, NULL);
  70.324 +	split = io_split(&queue->opioctx, lio->aio_events, ret);
  70.325 +	tapdisk_filter_events(queue->filter, lio->aio_events, split);
  70.326 +
  70.327 +	DBG("events: %d, tiocbs: %d\n", ret, split);
  70.328 +
  70.329 +	queue->iocbs_pending  -= ret;
  70.330 +	queue->tiocbs_pending -= split;
  70.331 +
  70.332 +	for (i = split, ep = lio->aio_events; i-- > 0; ep++) {
  70.333 +		iocb  = ep->obj;
  70.334 +		tiocb = iocb->data;
  70.335 +		complete_tiocb(queue, tiocb, ep->res);
  70.336 +	}
  70.337 +
  70.338 +	queue_deferred_tiocbs(queue);
  70.339 +}
  70.340 +
  70.341 +static int
  70.342 +tapdisk_lio_setup(struct tqueue *queue, int qlen)
  70.343 +{
  70.344 +	struct lio *lio = queue->tio_data;
  70.345 +	size_t sz;
  70.346 +	int err;
  70.347 +
  70.348 +	lio->event_id = -1;
  70.349 +
  70.350 +	err = tapdisk_lio_setup_aio(queue, qlen);
  70.351 +	if (err)
  70.352 +		goto fail;
  70.353 +
  70.354 +	lio->event_id =
  70.355 +		tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
  70.356 +					      lio->event_fd, 0,
  70.357 +					      tapdisk_lio_event,
  70.358 +					      queue);
  70.359 +	err = lio->event_id;
  70.360 +	if (err < 0)
  70.361 +		goto fail;
  70.362 +
  70.363 +	lio->aio_events = calloc(qlen, sizeof(struct io_event));
  70.364 +	if (!lio->aio_events) {
  70.365 +		err = -errno;
  70.366 +		goto fail;
  70.367 +	}
  70.368 +
  70.369 +	return 0;
  70.370 +
  70.371 +fail:
  70.372 +	tapdisk_lio_destroy(queue);
  70.373 +	return err;
  70.374 +}
  70.375 +
  70.376 +static int
  70.377 +tapdisk_lio_submit(struct tqueue *queue)
  70.378 +{
  70.379 +	struct lio *lio = queue->tio_data;
  70.380 +	int merged, submitted, err = 0;
  70.381 +
  70.382 +	if (!queue->queued)
  70.383 +		return 0;
  70.384 +
  70.385 +	tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued);
  70.386 +	merged    = io_merge(&queue->opioctx, queue->iocbs, queue->queued);
  70.387 +	tapdisk_lio_set_eventfd(queue, merged, queue->iocbs);
  70.388 +	submitted = io_submit(lio->aio_ctx, merged, queue->iocbs);
  70.389 +
  70.390 +	DBG("queued: %d, merged: %d, submitted: %d\n",
  70.391 +	    queue->queued, merged, submitted);
  70.392 +
  70.393 +	if (submitted < 0) {
  70.394 +		err = submitted;
  70.395 +		submitted = 0;
  70.396 +	} else if (submitted < merged)
  70.397 +		err = -EIO;
  70.398 +
  70.399 +	queue->iocbs_pending  += submitted;
  70.400 +	queue->tiocbs_pending += queue->queued;
  70.401 +	queue->queued          = 0;
  70.402 +
  70.403 +	if (err)
  70.404 +		queue->tiocbs_pending -= 
  70.405 +			fail_tiocbs(queue, submitted, merged, err);
  70.406 +
  70.407 +	return submitted;
  70.408 +}
  70.409 +
  70.410 +static const struct tio td_tio_lio = {
  70.411 +	.name        = "lio",
  70.412 +	.data_size   = sizeof(struct lio),
  70.413 +	.tio_setup   = tapdisk_lio_setup,
  70.414 +	.tio_destroy = tapdisk_lio_destroy,
  70.415 +	.tio_submit  = tapdisk_lio_submit,
  70.416 +};
  70.417 +
  70.418 +static void
  70.419 +tapdisk_queue_free_io(struct tqueue *queue)
  70.420 +{
  70.421 +	if (queue->tio) {
  70.422 +		if (queue->tio->tio_destroy)
  70.423 +			queue->tio->tio_destroy(queue);
  70.424 +		queue->tio = NULL;
  70.425 +	}
  70.426 +
  70.427 +	if (queue->tio_data) {
  70.428 +		free(queue->tio_data);
  70.429 +		queue->tio_data = NULL;
  70.430 +	}
  70.431 +}
  70.432 +
  70.433 +static int
  70.434 +tapdisk_queue_init_io(struct tqueue *queue, int drv)
  70.435 +{
  70.436 +	const struct tio *tio;
  70.437 +	int err;
  70.438 +
  70.439 +	switch (drv) {
  70.440 +	case TIO_DRV_LIO:
  70.441 +		tio = &td_tio_lio;
  70.442 +		break;
  70.443 +	case TIO_DRV_RWIO:
  70.444 +		tio = &td_tio_rwio;
  70.445 +		break;
  70.446 +	default:
  70.447 +		err = -EINVAL;
  70.448 +		goto fail;
  70.449 +	}
  70.450 +
  70.451 +	queue->tio_data = calloc(1, tio->data_size);
  70.452 +	if (!queue->tio_data) {
  70.453 +		PERROR("malloc(%zu)", tio->data_size);
  70.454 +		err = -errno;
  70.455 +		goto fail;
  70.456 +	}
  70.457 +
  70.458 +	queue->tio = tio;
  70.459 +
  70.460 +	if (tio->tio_setup) {
  70.461 +		err = tio->tio_setup(queue, queue->size);
  70.462 +		if (err)
  70.463 +			goto fail;
  70.464 +	}
  70.465 +
  70.466 +	DPRINTF("I/O queue driver: %s\n", tio->name);
  70.467 +
  70.468 +	return 0;
  70.469 +
  70.470 +fail:
  70.471 +	tapdisk_queue_free_io(queue);
  70.472 +	return err;
  70.473 +}
  70.474 +
  70.475  int
  70.476  tapdisk_init_queue(struct tqueue *queue, int size,
  70.477 -		   int sync, struct tfilter *filter)
  70.478 +		   int drv, struct tfilter *filter)
  70.479  {
  70.480  	int i, err;
  70.481  
  70.482  	memset(queue, 0, sizeof(struct tqueue));
  70.483  
  70.484  	queue->size   = size;
  70.485 -	queue->sync   = sync;
  70.486  	queue->filter = filter;
  70.487  
  70.488 -	if (sync) {
  70.489 -		/* set up a pipe so we can return
  70.490 -		 * a poll fd that won't fire. */
  70.491 -		if (pipe(queue->dummy_pipe))
  70.492 -			return -errno;
  70.493 -		queue->poll_fd = queue->dummy_pipe[0];
  70.494 -	} else {
  70.495 -		queue->aio_ctx = (io_context_t)REQUEST_ASYNC_FD;
  70.496 -		queue->poll_fd = io_setup(size, &queue->aio_ctx);
  70.497 +	if (!size)
  70.498 +		return 0;
  70.499  
  70.500 -		if (queue->poll_fd < 0) {
  70.501 -			if (queue->poll_fd == -EAGAIN)
  70.502 -				DPRINTF("Couldn't setup AIO context.  If you "
  70.503 -					"are trying to concurrently use a "
  70.504 -					"large number of blktap-based disks, "
  70.505 -					"you may need to increase the "
  70.506 -					"system-wide aio request limit. "
  70.507 -					"(e.g. 'echo 1048576 > /proc/sys/fs/"
  70.508 -					"aio-max-nr')\n");
  70.509 -			else
  70.510 -				DPRINTF("Couldn't get fd for AIO poll "
  70.511 -					"support.  This is probably because "
  70.512 -					"your kernel does not have the "
  70.513 -					"aio-poll patch applied.\n");
  70.514 -			return queue->poll_fd;
  70.515 -		}
  70.516 +	err = tapdisk_queue_init_io(queue, drv);
  70.517 +	if (err)
  70.518 +		goto fail;
  70.519 +
  70.520 +	queue->iocbs = calloc(size, sizeof(struct iocb *));
  70.521 +	if (!queue->iocbs) {
  70.522 +		err = -errno;
  70.523 +		goto fail;
  70.524  	}
  70.525  
  70.526 -	err               = -ENOMEM;
  70.527 -	queue->iocbs      = calloc(size, sizeof(struct iocb *));
  70.528 -	queue->aio_events = calloc(size, sizeof(struct io_event));
  70.529 -	if (!queue->iocbs || !queue->aio_events)
  70.530 -		goto fail;
  70.531 -
  70.532  	err = opio_init(&queue->opioctx, size);
  70.533  	if (err)
  70.534  		goto fail;
  70.535 @@ -280,14 +642,11 @@ tapdisk_init_queue(struct tqueue *queue,
  70.536  void
  70.537  tapdisk_free_queue(struct tqueue *queue)
  70.538  {
  70.539 -	if (queue->sync) {
  70.540 -		close(queue->dummy_pipe[0]);
  70.541 -		close(queue->dummy_pipe[1]);
  70.542 -	} else
  70.543 -		io_destroy(queue->aio_ctx);
  70.544 +	tapdisk_queue_free_io(queue);
  70.545  
  70.546  	free(queue->iocbs);
  70.547 -	free(queue->aio_events);
  70.548 +	queue->iocbs = NULL;
  70.549 +
  70.550  	opio_free(&queue->opioctx);
  70.551  }
  70.552  
  70.553 @@ -297,9 +656,9 @@ tapdisk_debug_queue(struct tqueue *queue
  70.554  	struct tiocb *tiocb = queue->deferred.head;
  70.555  
  70.556  	WARN("TAPDISK QUEUE:\n");
  70.557 -	WARN("size: %d, sync: %d, queued: %d, iocbs_pending: %d, "
  70.558 +	WARN("size: %d, tio: %s, queued: %d, iocbs_pending: %d, "
  70.559  	     "tiocbs_pending: %d, tiocbs_deferred: %d, deferrals: %"PRIx64"\n",
  70.560 -	     queue->size, queue->sync, queue->queued, queue->iocbs_pending,
  70.561 +	     queue->size, queue->tio->name, queue->queued, queue->iocbs_pending,
  70.562  	     queue->tiocbs_pending, queue->tiocbs_deferred, queue->deferrals);
  70.563  
  70.564  	if (tiocb) {
  70.565 @@ -340,42 +699,14 @@ tapdisk_queue_tiocb(struct tqueue *queue
  70.566  		defer_tiocb(queue, tiocb);
  70.567  }
  70.568  
  70.569 +
  70.570  /*
  70.571   * fail_tiocbs may queue more tiocbs
  70.572   */
  70.573  int
  70.574  tapdisk_submit_tiocbs(struct tqueue *queue)
  70.575  {
  70.576 -	int merged, submitted, err = 0;
  70.577 -
  70.578 -	if (!queue->queued)
  70.579 -		return 0;
  70.580 -
  70.581 -	if (queue->sync)
  70.582 -		return io_synchronous_rw(queue);
  70.583 -
  70.584 -	tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued);
  70.585 -	merged    = io_merge(&queue->opioctx, queue->iocbs, queue->queued);
  70.586 -	submitted = io_submit(queue->aio_ctx, merged, queue->iocbs);
  70.587 -
  70.588 -	DBG("queued: %d, merged: %d, submitted: %d\n",
  70.589 -	    queue->queued, merged, submitted);
  70.590 -
  70.591 -	if (submitted < 0) {
  70.592 -		err = submitted;
  70.593 -		submitted = 0;
  70.594 -	} else if (submitted < merged)
  70.595 -		err = -EIO;
  70.596 -
  70.597 -	queue->iocbs_pending  += submitted;
  70.598 -	queue->tiocbs_pending += queue->queued;
  70.599 -	queue->queued          = 0;
  70.600 -
  70.601 -	if (err)
  70.602 -		queue->tiocbs_pending -= 
  70.603 -			fail_tiocbs(queue, submitted, merged, err);
  70.604 -
  70.605 -	return submitted;
  70.606 +	return queue->tio->tio_submit(queue);
  70.607  }
  70.608  
  70.609  int
  70.610 @@ -390,35 +721,6 @@ tapdisk_submit_all_tiocbs(struct tqueue 
  70.611  	return submitted;
  70.612  }
  70.613  
  70.614 -int
  70.615 -tapdisk_complete_tiocbs(struct tqueue *queue)
  70.616 -{
  70.617 -	int i, ret, split;
  70.618 -	struct iocb *iocb;
  70.619 -	struct tiocb *tiocb;
  70.620 -	struct io_event *ep;
  70.621 -
  70.622 -	ret   = io_getevents(queue->aio_ctx, 0,
  70.623 -			     queue->size, queue->aio_events, NULL);
  70.624 -	split = io_split(&queue->opioctx, queue->aio_events, ret);
  70.625 -	tapdisk_filter_events(queue->filter, queue->aio_events, split);
  70.626 -
  70.627 -	DBG("events: %d, tiocbs: %d\n", ret, split);
  70.628 -
  70.629 -	queue->iocbs_pending  -= ret;
  70.630 -	queue->tiocbs_pending -= split;
  70.631 -
  70.632 -	for (i = split, ep = queue->aio_events; i-- > 0; ep++) {
  70.633 -		iocb  = ep->obj;
  70.634 -		tiocb = (struct tiocb *)iocb->data;
  70.635 -		complete_tiocb(queue, tiocb, ep->res);
  70.636 -	}
  70.637 -
  70.638 -	queue_deferred_tiocbs(queue);
  70.639 -
  70.640 -	return split;
  70.641 -}
  70.642 -
  70.643  /*
  70.644   * cancel_tiocbs may queue more tiocbs
  70.645   */
    71.1 --- a/tools/blktap2/drivers/tapdisk-queue.h	Mon Nov 02 19:35:54 2009 -0800
    71.2 +++ b/tools/blktap2/drivers/tapdisk-queue.h	Fri Mar 19 18:36:57 2010 -0700
    71.3 @@ -32,6 +32,7 @@
    71.4  #include <libaio.h>
    71.5  
    71.6  #include "io-optimize.h"
    71.7 +#include "scheduler.h"
    71.8  
    71.9  struct tiocb;
   71.10  struct tfilter;
   71.11 @@ -54,16 +55,14 @@ struct tlist {
   71.12  
   71.13  struct tqueue {
   71.14  	int                   size;
   71.15 -	int                   sync;
   71.16  
   71.17 -	int                   poll_fd;
   71.18 -	io_context_t          aio_ctx;
   71.19 +	const struct tio     *tio;
   71.20 +	void                 *tio_data;
   71.21 +
   71.22  	struct opioctx        opioctx;
   71.23 -	int                   dummy_pipe[2];
   71.24  
   71.25  	int                   queued;
   71.26  	struct iocb         **iocbs;
   71.27 -	struct io_event      *aio_events;
   71.28  
   71.29  	/* number of iocbs pending in the aio layer */
   71.30  	int                   iocbs_pending;
   71.31 @@ -85,6 +84,20 @@ struct tqueue {
   71.32  	uint64_t              deferrals;
   71.33  };
   71.34  
   71.35 +struct tio {
   71.36 +	const char           *name;
   71.37 +	size_t                data_size;
   71.38 +
   71.39 +	int  (*tio_setup)    (struct tqueue *queue, int qlen);
   71.40 +	void (*tio_destroy)  (struct tqueue *queue);
   71.41 +	int  (*tio_submit)   (struct tqueue *queue);
   71.42 +};
   71.43 +
   71.44 +enum {
   71.45 +	TIO_DRV_LIO     = 1,
   71.46 +	TIO_DRV_RWIO    = 2,
   71.47 +};
   71.48 +
   71.49  /*
   71.50   * Interface for request producer (i.e., tapdisk)
   71.51   * NB: the following functions may cause additional tiocbs to be queued:
   71.52 @@ -98,13 +111,12 @@ struct tqueue {
   71.53  #define tapdisk_queue_empty(q) ((q)->queued == 0)
   71.54  #define tapdisk_queue_full(q)  \
   71.55  	(((q)->tiocbs_pending + (q)->queued) >= (q)->size)
   71.56 -int tapdisk_init_queue(struct tqueue *, int size, int sync, struct tfilter *);
   71.57 +int tapdisk_init_queue(struct tqueue *, int size, int drv, struct tfilter *);
   71.58  void tapdisk_free_queue(struct tqueue *);
   71.59  void tapdisk_debug_queue(struct tqueue *);
   71.60  void tapdisk_queue_tiocb(struct tqueue *, struct tiocb *);
   71.61  int tapdisk_submit_tiocbs(struct tqueue *);
   71.62  int tapdisk_submit_all_tiocbs(struct tqueue *);
   71.63 -int tapdisk_complete_tiocbs(struct tqueue *);
   71.64  int tapdisk_cancel_tiocbs(struct tqueue *);
   71.65  int tapdisk_cancel_all_tiocbs(struct tqueue *);
   71.66  void tapdisk_prep_tiocb(struct tiocb *, int, int, char *, size_t,
    72.1 --- a/tools/blktap2/drivers/tapdisk-server.c	Mon Nov 02 19:35:54 2009 -0800
    72.2 +++ b/tools/blktap2/drivers/tapdisk-server.c	Fri Mar 19 18:36:57 2010 -0700
    72.3 @@ -26,7 +26,6 @@
    72.4   * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    72.5   */
    72.6  #include <stdio.h>
    72.7 -#include <fcntl.h>
    72.8  #include <errno.h>
    72.9  #include <unistd.h>
   72.10  #include <stdlib.h>
   72.11 @@ -222,63 +221,36 @@ tapdisk_server_send_error(const char *me
   72.12  		tapdisk_ipc_write_error(&vbd->ipc, message);
   72.13  }
   72.14  
   72.15 -static void
   72.16 -tapdisk_server_read_ipc_message(event_id_t id, char mode, void *private)
   72.17 +static int
   72.18 +tapdisk_server_init_ipc(const char *read, const char *write)
   72.19  {
   72.20 -	tapdisk_ipc_read(&server.ipc);
   72.21 +	return tapdisk_ipc_open(&server.ipc, read, write);
   72.22  }
   72.23  
   72.24  static void
   72.25 -tapdisk_server_aio_queue_event(event_id_t id, char mode, void *private)
   72.26 +tapdisk_server_close_ipc(void)
   72.27  {
   72.28 -	tapdisk_complete_tiocbs(&server.aio_queue);
   72.29 -}
   72.30 -
   72.31 -static void
   72.32 -tapdisk_server_free_aio_queue(void)
   72.33 -{
   72.34 -	tapdisk_server_unregister_event(server.aio_queue_event_id);
   72.35 -	tapdisk_free_queue(&server.aio_queue);
   72.36 +	tapdisk_ipc_close(&server.ipc);
   72.37  }
   72.38  
   72.39  static int
   72.40 -tapdisk_server_initialize_aio_queue(void)
   72.41 +tapdisk_server_init_aio(void)
   72.42  {
   72.43 -	int err;
   72.44 -	event_id_t id;
   72.45 -
   72.46 -	err = tapdisk_init_queue(&server.aio_queue,
   72.47 -				 TAPDISK_TIOCBS, 0, NULL);
   72.48 -	if (err)
   72.49 -		return err;
   72.50 +	return tapdisk_init_queue(&server.aio_queue, TAPDISK_TIOCBS,
   72.51 +				  TIO_DRV_LIO, NULL);
   72.52 +}
   72.53  
   72.54 -	id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
   72.55 -					   server.aio_queue.poll_fd, 0,
   72.56 -					   tapdisk_server_aio_queue_event,
   72.57 -					   NULL);
   72.58 -	if (id < 0) {
   72.59 -		tapdisk_free_queue(&server.aio_queue);
   72.60 -		return id;
   72.61 -	}
   72.62 -
   72.63 -	server.aio_queue_event_id = id;
   72.64 -
   72.65 -	return 0;
   72.66 +static void
   72.67 +tapdisk_server_close_aio(void)
   72.68 +{
   72.69 +	tapdisk_free_queue(&server.aio_queue);
   72.70  }
   72.71  
   72.72  static void
   72.73  tapdisk_server_close(void)
   72.74  {
   72.75 -	tapdisk_server_free_aio_queue();
   72.76 -
   72.77 -	if (server.control_event)
   72.78 -		scheduler_unregister_event(&server.scheduler, server.control_event);
   72.79 -
   72.80 -	if (server.ipc.rfd != -1)
   72.81 -		close(server.ipc.rfd);
   72.82 -
   72.83 -	if (server.ipc.wfd != -1)
   72.84 -		close(server.ipc.wfd);
   72.85 +	tapdisk_server_close_aio();
   72.86 +	tapdisk_server_close_ipc();
   72.87  }
   72.88  
   72.89  static void
   72.90 @@ -334,63 +306,26 @@ int
   72.91  tapdisk_server_initialize(const char *read, const char *write)
   72.92  {
   72.93  	int err;
   72.94 -	event_id_t event_id;
   72.95  
   72.96 -	event_id = 0;
   72.97  	memset(&server, 0, sizeof(tapdisk_server_t));
   72.98 -	server.ipc.rfd = server.ipc.wfd = -1;
   72.99 -
  72.100  	INIT_LIST_HEAD(&server.vbds);
  72.101  
  72.102 -	if (read) {
  72.103 -		server.ipc.rfd = open(read, O_RDWR | O_NONBLOCK);
  72.104 -		if (server.ipc.rfd < 0) {
  72.105 -			err = -errno;
  72.106 -			EPRINTF("FD open failed %s: %d\n", read, err);
  72.107 -			goto fail;
  72.108 -		}
  72.109 -	}
  72.110 -
  72.111 -	if (write) {
  72.112 -		server.ipc.wfd = open(write, O_RDWR | O_NONBLOCK);
  72.113 -		if (server.ipc.wfd < 0) {
  72.114 -			err = -errno;
  72.115 -			EPRINTF("FD open failed %s, %d\n", write, err);
  72.116 -			goto fail;
  72.117 -		}
  72.118 -	}
  72.119 -
  72.120  	scheduler_initialize(&server.scheduler);
  72.121  
  72.122 -	if (read) {
  72.123 -		event_id = scheduler_register_event(&server.scheduler,
  72.124 -						    SCHEDULER_POLL_READ_FD,
  72.125 -						    server.ipc.rfd, 0,
  72.126 -						    tapdisk_server_read_ipc_message,
  72.127 -						    NULL);
  72.128 -		if (event_id < 0) {
  72.129 -			err = event_id;
  72.130 -			goto fail;
  72.131 -		}
  72.132 -	}
  72.133 -
  72.134 -	err = tapdisk_server_initialize_aio_queue();
  72.135 +	err = tapdisk_server_init_ipc(read, write);
  72.136  	if (err)
  72.137  		goto fail;
  72.138  
  72.139 -	server.control_event = event_id;
  72.140 +	err = tapdisk_server_init_aio();
  72.141 +	if (err)
  72.142 +		goto fail;
  72.143 +
  72.144  	server.run = 1;
  72.145  
  72.146  	return 0;
  72.147  
  72.148  fail:
  72.149 -	if (server.ipc.rfd > 0)
  72.150 -		close(server.ipc.rfd);
  72.151 -	if (server.ipc.wfd > 0)
  72.152 -		close(server.ipc.wfd);
  72.153 -	if (event_id > 0)
  72.154 -		scheduler_unregister_event(&server.scheduler,
  72.155 -					   server.control_event);
  72.156 +	tapdisk_server_close_ipc();
  72.157  	return err;
  72.158  }
  72.159  
    73.1 --- a/tools/blktap2/drivers/tapdisk-server.h	Mon Nov 02 19:35:54 2009 -0800
    73.2 +++ b/tools/blktap2/drivers/tapdisk-server.h	Fri Mar 19 18:36:57 2010 -0700
    73.3 @@ -57,9 +57,7 @@ typedef struct tapdisk_server {
    73.4  	td_ipc_t                     ipc;
    73.5  	struct list_head             vbds;
    73.6  	scheduler_t                  scheduler;
    73.7 -	event_id_t                   control_event;
    73.8  	struct tqueue                aio_queue;
    73.9 -	event_id_t                   aio_queue_event_id;
   73.10  } tapdisk_server_t;
   73.11  
   73.12  #endif
    74.1 --- a/tools/blktap2/drivers/tapdisk-utils.c	Mon Nov 02 19:35:54 2009 -0800
    74.2 +++ b/tools/blktap2/drivers/tapdisk-utils.c	Fri Mar 19 18:36:57 2010 -0700
    74.3 @@ -33,6 +33,10 @@
    74.4  #include <sys/mman.h>
    74.5  #include <sys/ioctl.h>
    74.6  #include <sys/resource.h>
    74.7 +#include <sys/utsname.h>
    74.8 +#ifdef __linux__
    74.9 +#include <linux/version.h>
   74.10 +#endif
   74.11  
   74.12  #include "blk.h"
   74.13  #include "tapdisk.h"
   74.14 @@ -183,3 +187,31 @@ tapdisk_get_image_size(int fd, uint64_t 
   74.15  
   74.16  	return 0;
   74.17  }
   74.18 +
   74.19 +#ifdef __linux__
   74.20 +
   74.21 +int tapdisk_linux_version(void)
   74.22 +{
   74.23 +	struct utsname uts;
   74.24 +	unsigned int version, patchlevel, sublevel;
   74.25 +	int n, err;
   74.26 +
   74.27 +	err = uname(&uts);
   74.28 +	if (err)
   74.29 +		return -errno;
   74.30 +
   74.31 +	n = sscanf(uts.release, "%u.%u.%u", &version, &patchlevel, &sublevel);
   74.32 +	if (n != 3)
   74.33 +		return -ENOSYS;
   74.34 +
   74.35 +	return KERNEL_VERSION(version, patchlevel, sublevel);
   74.36 +}
   74.37 +
   74.38 +#else
   74.39 +
   74.40 +int tapdisk_linux_version(void)
   74.41 +{
   74.42 +	return -ENOSYS;
   74.43 +}
   74.44 +
   74.45 +#endif
    75.1 --- a/tools/blktap2/drivers/tapdisk-utils.h	Mon Nov 02 19:35:54 2009 -0800
    75.2 +++ b/tools/blktap2/drivers/tapdisk-utils.h	Fri Mar 19 18:36:57 2010 -0700
    75.3 @@ -38,5 +38,6 @@ int tapdisk_set_resource_limits(void);
    75.4  int tapdisk_namedup(char **, const char *);
    75.5  int tapdisk_parse_disk_type(const char *, char **, int *);
    75.6  int tapdisk_get_image_size(int, uint64_t *, uint32_t *);
    75.7 +int tapdisk_linux_version(void);
    75.8  
    75.9  #endif
    76.1 --- a/tools/blktap2/drivers/tapdisk-vbd.c	Mon Nov 02 19:35:54 2009 -0800
    76.2 +++ b/tools/blktap2/drivers/tapdisk-vbd.c	Fri Mar 19 18:36:57 2010 -0700
    76.3 @@ -34,6 +34,9 @@
    76.4  #include <libgen.h>
    76.5  #include <sys/mman.h>
    76.6  #include <sys/ioctl.h>
    76.7 +#ifdef MEMSHR
    76.8 +#include <memshr.h>
    76.9 +#endif
   76.10  
   76.11  #include "libvhd.h"
   76.12  #include "tapdisk-image.h"
   76.13 @@ -105,7 +108,12 @@ tapdisk_vbd_initialize(int rfd, int wfd,
   76.14  	/* default blktap ring completion */
   76.15  	vbd->callback = tapdisk_vbd_callback;
   76.16  	vbd->argument = vbd;
   76.17 +    
   76.18 +#ifdef MEMSHR
   76.19 +	memshr_vbd_initialize();
   76.20 +#endif
   76.21  
   76.22 +	INIT_LIST_HEAD(&vbd->driver_stack);
   76.23  	INIT_LIST_HEAD(&vbd->images);
   76.24  	INIT_LIST_HEAD(&vbd->new_requests);
   76.25  	INIT_LIST_HEAD(&vbd->pending_requests);
   76.26 @@ -541,6 +549,105 @@ regerr:
   76.27  	goto out;
   76.28  }
   76.29  
   76.30 +/* TODO: ugh, lets not call it parent info... */
   76.31 +static struct list_head *
   76.32 +tapdisk_vbd_open_level(td_vbd_t *vbd, char* params, int driver_type, td_disk_info_t *parent_info, td_flag_t flags)
   76.33 +{
   76.34 +	char *name;
   76.35 +	int type, err;
   76.36 +	td_image_t *image;
   76.37 +	td_disk_id_t id;
   76.38 +	struct  list_head *images;
   76.39 +	td_driver_t *driver;
   76.40 +
   76.41 +	images = calloc(1, sizeof(struct list_head));
   76.42 +	INIT_LIST_HEAD(images);
   76.43 +
   76.44 +	name   = params;
   76.45 +	type   = driver_type;
   76.46 +
   76.47 +	for (;;) {
   76.48 +		err   = -ENOMEM;
   76.49 +		image = tapdisk_image_allocate(name, type,
   76.50 +					       vbd->storage, flags, vbd);
   76.51 +
   76.52 +		/* free 'name' if it was created by td_get_parent_id() */
   76.53 +		if (name != params) {
   76.54 +			free(name);
   76.55 +			name = NULL;
   76.56 +		}
   76.57 +
   76.58 +		if (!image)
   76.59 +			return NULL;
   76.60 +
   76.61 +
   76.62 +		/* We have to do this to set the driver info for child drivers.  this conflicts with td_open */
   76.63 +		driver = image->driver;
   76.64 +		if (!driver) {
   76.65 +			driver = tapdisk_driver_allocate(image->type,
   76.66 +							 image->name,
   76.67 +							 image->flags,
   76.68 +							 image->storage);
   76.69 +			if (!driver)
   76.70 +				return NULL;
   76.71 +		}
   76.72 +		/* the image has a driver, set the info and driver */
   76.73 +		image->driver = driver;
   76.74 +		image->info = driver->info;
   76.75 +
   76.76 +		/* XXX: we don't touch driver->refcount, broken? */
   76.77 +		/* XXX: we've replicated about 90% of td_open() gross! */
   76.78 +		/* XXX: this breaks if a driver modifies its info within a layer */
   76.79 +
   76.80 +		/* if the parent info is set, pass it to the child */
   76.81 +		if(parent_info)
   76.82 +		{
   76.83 +			image->driver->info = *parent_info;
   76.84 +		}
   76.85 +
   76.86 +		err = td_load(image);
   76.87 +		if (err) {
   76.88 +			if (err != -ENODEV)
   76.89 +				return NULL;
   76.90 +
   76.91 +			err = td_open(image);
   76.92 +			if (err)
   76.93 +				return NULL;
   76.94 +		}
   76.95 +
   76.96 +		/* TODO: non-sink drivers that don't care about their child
   76.97 +		 * currently return EINVAL. Could return TD_PARENT_OK or
   76.98 +		 * TD_ANY_PARENT */
   76.99 +
  76.100 +		err = td_get_parent_id(image, &id);
  76.101 +		if (err && (err != TD_NO_PARENT && err != -EINVAL)) {
  76.102 +			td_close(image);
  76.103 +			return NULL;
  76.104 +		}
  76.105 +
  76.106 +		if (!image->storage)
  76.107 +			image->storage = vbd->storage;
  76.108 +
  76.109 +		/* add this image to the end of the list */
  76.110 +		list_add_tail(&image->next, images);
  76.111 +
  76.112 +		image = NULL;
  76.113 +
  76.114 +		/* if the image does not have a parent we return the
  76.115 +		 * list of images generated by this level of the stack */
  76.116 +		if (err == TD_NO_PARENT || err == -EINVAL)
  76.117 +			break;
  76.118 +
  76.119 +		name   = id.name;
  76.120 +		type   = id.drivertype;
  76.121 +#if 0
  76.122 +		/* catch this by validate, not here */
  76.123 +		flags |= (TD_OPEN_RDONLY | TD_OPEN_SHAREABLE);
  76.124 +#endif
  76.125 +	}
  76.126 +	return images;
  76.127 +}
  76.128 +
  76.129  static int
  76.130  __tapdisk_vbd_open_vdi(td_vbd_t *vbd, td_flag_t extra_flags)
  76.131  {
  76.132 @@ -548,58 +655,35 @@ static int
  76.133  	int err, type;
  76.134  	td_flag_t flags;
  76.135  	td_disk_id_t id;
  76.136 -	td_image_t *image, *tmp;
  76.137 +	td_image_t *tmp;
  76.138  	struct tfilter *filter = NULL;
  76.139 +	td_vbd_driver_info_t *driver_info;
  76.140 +	struct list_head *images;
  76.141 +	td_disk_info_t *parent_info = NULL;
  76.142  
  76.143  	err = tapdisk_vbd_reactivate_volumes(vbd, 0);
  76.144  	if (err)
  76.145  		return err;
  76.146  
  76.147  	flags = (vbd->flags & ~TD_OPEN_SHAREABLE) | extra_flags;
  76.148 -	file  = vbd->name;
  76.149 -	type  = vbd->type;
  76.150 -
  76.151 -	for (;;) {
  76.152 -		err   = -ENOMEM;
  76.153 -		image = tapdisk_image_allocate(file, type,
  76.154 -					       vbd->storage, flags, vbd);
  76.155 -
  76.156 -		if (file != vbd->name) {
  76.157 -			free(file);
  76.158 -			file = NULL;
  76.159 -		}
  76.160 -
  76.161 -		if (!image)
  76.162 -			goto fail;
  76.163 -
  76.164 -		err = td_load(image);
  76.165 -		if (err) {
  76.166 -			if (err != -ENODEV)
  76.167 -				goto fail;
  76.168  
  76.169 -			err = td_open(image);
  76.170 -			if (err)
  76.171 -				goto fail;
  76.172 -		}
  76.173 -
  76.174 -		err = td_get_parent_id(image, &id);
  76.175 -		if (err && err != TD_NO_PARENT) {
  76.176 -			td_close(image);
  76.177 -			goto fail;
  76.178 -		}
  76.179 +	/* loop on each user specified driver.
  76.180 +	 * NOTE: driver_info is in reverse order. That is, the first
  76.181 +	 * item is the 'parent' or 'sink' driver */
  76.182 +	list_for_each_entry(driver_info, &vbd->driver_stack, next) {
  76.183 +		file = driver_info->params;
  76.184 +		type = driver_info->type;
  76.185 +		images = tapdisk_vbd_open_level(vbd, file, type, parent_info, flags);
  76.186 +		if (!images)
  76.187 +			return -EINVAL;
  76.188  
  76.189 -		if (!image->storage)
  76.190 -			image->storage = vbd->storage;
  76.191 -
  76.192 -		tapdisk_vbd_add_image(vbd, image);
  76.193 -		image = NULL;
  76.194 +		/* after each loop, append the created stack to the result stack */
  76.195 +		list_splice(images, &vbd->images);
  76.196 +		free(images);
  76.197  
  76.198 -		if (err == TD_NO_PARENT)
  76.199 -			break;
  76.200 -
  76.201 -		file   = id.name;
  76.202 -		type   = id.drivertype;
  76.203 -		flags |= (TD_OPEN_RDONLY | TD_OPEN_SHAREABLE);
  76.204 +		/* set the parent_info to the first diskinfo on the stack */
  76.205 +		tmp = tapdisk_vbd_first_image(vbd);
  76.206 +		parent_info = &tmp->info;
  76.207  	}
  76.208  
  76.209  	if (td_flag_test(vbd->flags, TD_OPEN_LOG_DIRTY)) {
  76.210 @@ -623,14 +707,91 @@ static int
  76.211  	return 0;
  76.212  
  76.213  fail:
  76.214 +
  76.215 +/* TODO: loop over vbd to free images? maybe do that in vbd_close_vdi */
  76.216 +#if 0
  76.217  	if (image)
  76.218  		tapdisk_image_free(image);
  76.219 +#endif
  76.220  
  76.221 +	/* TODO: handle partial stack creation? */
  76.222  	tapdisk_vbd_close_vdi(vbd);
  76.223  
  76.224  	return err;
  76.225  }
  76.226  
  76.227 +/* this populates a vbd type based on path */
  76.228 +int
  76.229 +tapdisk_vbd_parse_stack(td_vbd_t *vbd, const char *path)
  76.230 +{
  76.231 +	int err;
  76.232 +	char *params, *driver_str;
  76.233 +	td_vbd_driver_info_t *driver;
  76.234 +
  76.235 +	/* make a copy of path */
  76.236 +	/* TODO: check against MAX_NAME_LEM ? */
  76.237 +	err = tapdisk_namedup(&params, path);
  76.238 +	if(err)
  76.239 +		goto error;
  76.240 +
  76.241 +
  76.242 +	/* tokenize params based on pipe '|' */
  76.243 +	driver_str = strtok(params, "|");
  76.244 +	while(driver_str != NULL)
  76.245 +	{
  76.246 +		/* parse driver info and add to vbd */
  76.247 +		driver = calloc(1, sizeof(td_vbd_driver_info_t));
  76.248 +		INIT_LIST_HEAD(&driver->next);
  76.249 +		err = tapdisk_parse_disk_type(driver_str, &driver->params, &driver->type);
  76.250 +		if(err)
  76.251 +			goto error;
  76.252 +
  76.253 +		/* build the list backwards as the last driver will be the first
  76.254 +		 * driver to open in the stack */
  76.255 +		list_add(&driver->next, &vbd->driver_stack);
  76.256 +
  76.257 +		/* get next driver string */
  76.258 +		driver_str = strtok(NULL, "|");
  76.259 +	}
  76.260 +
  76.261 +	return 0;
  76.262 +
  76.263 +	/* error: free any driver_info's and params */
  76.264 + error:
  76.265 +	while(!list_empty(&vbd->driver_stack)) {
  76.266 +		driver = list_entry(vbd->driver_stack.next, td_vbd_driver_info_t, next);
  76.267 +		list_del(&driver->next);
  76.268 +		free(driver);
  76.269 +	}
  76.270 +
  76.271 +	return err;
  76.272 +}
  76.273 +
  76.274 +/* NOTE: driver type, etc. must be set */
  76.275 +static int
  76.276 +tapdisk_vbd_open_stack(td_vbd_t *vbd, uint16_t storage, td_flag_t flags)
  76.277 +{
  76.278 +	int i, err;
  76.279 +
  76.280 +	vbd->flags   = flags;
  76.281 +	vbd->storage = storage;
  76.282 +
  76.283 +	for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
  76.284 +		err = __tapdisk_vbd_open_vdi(vbd, 0);
  76.285 +		if (err != -EIO)
  76.286 +			break;
  76.287 +
  76.288 +		sleep(TD_VBD_EIO_SLEEP);
  76.289 +	}
  76.290 +	if (err)
  76.291 +		goto fail;
  76.292 +
  76.293 +	return 0;
  76.294 +
  76.295 + fail:
  76.296 +	return err;
  76.297 +}
  76.298 +
  76.299  int
  76.300  tapdisk_vbd_open_vdi(td_vbd_t *vbd, const char *path,
  76.301  		     uint16_t drivertype, uint16_t storage, td_flag_t flags)
  76.302 @@ -759,7 +920,7 @@ tapdisk_vbd_open(td_vbd_t *vbd, const ch
  76.303  {
  76.304  	int err;
  76.305  
  76.306 -	err = tapdisk_vbd_open_vdi(vbd, name, type, storage, flags);
  76.307 +	err = tapdisk_vbd_open_stack(vbd, storage, flags);
  76.308  	if (err)
  76.309  		goto out;
  76.310  
  76.311 @@ -1099,6 +1260,8 @@ tapdisk_vbd_kick(td_vbd_t *vbd)
  76.312  	int n;
  76.313  	td_ring_t *ring;
  76.314  
  76.315 +	tapdisk_vbd_check_state(vbd);
  76.316 +
  76.317  	ring = &vbd->ring;
  76.318  	if (!ring->sring)
  76.319  		return 0;
  76.320 @@ -1261,11 +1424,26 @@ tapdisk_vbd_complete_vbd_request(td_vbd_
  76.321  	}
  76.322  }
  76.323  
  76.324 +static uint64_t 
  76.325 +tapdisk_vbd_breq_get_sector(blkif_request_t *breq, td_request_t treq)
  76.326 +{
  76.327 +    int seg, nsects; 
  76.328 +    uint64_t sector_nr = breq->sector_number; 
  76.329 +    
  76.330 +    for(seg=0; seg < treq.sidx; seg++) {
  76.331 +        nsects = breq->seg[seg].last_sect - breq->seg[seg].first_sect + 1;
  76.332 +        sector_nr += nsects;
  76.333 +    }
  76.334 +
  76.335 +    return sector_nr;
  76.336 +}
  76.337 +
  76.338  static void
  76.339  __tapdisk_vbd_complete_td_request(td_vbd_t *vbd, td_vbd_request_t *vreq,
  76.340  				  td_request_t treq, int res)
  76.341  {
  76.342  	int err;
  76.343 +    td_image_t *image = treq.image;
  76.344  
  76.345  	err = (res <= 0 ? res : -res);
  76.346  	vbd->secs_pending  -= treq.secs;
  76.347 @@ -1283,6 +1461,22 @@ static void
  76.348  			    (treq.op == TD_OP_WRITE ? "write" : "read"),
  76.349  			    treq.secs, treq.sec);
  76.350  		}
  76.351 +	} else {
  76.352 +#ifdef MEMSHR
  76.353 +		if (treq.op == TD_OP_READ
  76.354 +		   && td_flag_test(image->flags, TD_OPEN_RDONLY)) {
  76.355 +			uint64_t hnd  = treq.memshr_hnd;
  76.356 +			uint16_t uid  = image->memshr_id;
  76.357 +			blkif_request_t *breq = &vreq->req;
  76.358 +			uint64_t sec  = tapdisk_vbd_breq_get_sector(breq, treq);
  76.359 +			int secs = breq->seg[treq.sidx].last_sect -
  76.360 +			    breq->seg[treq.sidx].first_sect + 1;
  76.361 +
  76.362 +			if (hnd != 0)
  76.363 +				memshr_vbd_complete_ro_request(hnd, uid,
  76.364 +								sec, secs);
  76.365 +		}
  76.366 +#endif
  76.367  	}
  76.368  
  76.369  	tapdisk_vbd_complete_vbd_request(vbd, vreq);
  76.370 @@ -1335,7 +1529,28 @@ static void
  76.371  		break;
  76.372  
  76.373  	case TD_OP_READ:
  76.374 -		td_queue_read(parent, treq);
  76.375 +#ifdef MEMSHR
  76.376 +		if(td_flag_test(parent->flags, TD_OPEN_RDONLY)) {
  76.377 +			int ret, seg = treq.sidx;
  76.378 +			blkif_request_t *breq = &vreq->req;
  76.379 +        
  76.380 +			ret = memshr_vbd_issue_ro_request(treq.buf,
  76.381 +			      breq->seg[seg].gref,
  76.382 +			      parent->memshr_id,
  76.383 +			      treq.sec,
  76.384 +			      treq.secs,
  76.385 +			      &treq.memshr_hnd);
  76.386 +			if(ret == 0) {
  76.387 +				/* Reset memshr handle. This'll prevent
  76.388 +				 * memshr_vbd_complete_ro_request being called
  76.389 +				 */
  76.390 +				treq.memshr_hnd = 0;
  76.391 +				td_complete_request(treq, 0);
  76.392 +			} else
  76.393 +				td_queue_read(parent, treq);
  76.394 +		} else
  76.395 +#endif
  76.396 +			td_queue_read(parent, treq);
  76.397  		break;
  76.398  	}
  76.399  
  76.400 @@ -1406,9 +1621,11 @@ tapdisk_vbd_issue_request(td_vbd_t *vbd,
  76.401  	gettimeofday(&vreq->last_try, NULL);
  76.402  	tapdisk_vbd_move_request(vreq, &vbd->pending_requests);
  76.403  
  76.404 +#if 0
  76.405  	err = tapdisk_vbd_check_queue(vbd);
  76.406  	if (err)
  76.407  		goto fail;
  76.408 +#endif
  76.409  
  76.410  	err = tapdisk_image_check_ring_request(image, req);
  76.411  	if (err)
    77.1 --- a/tools/blktap2/drivers/tapdisk-vbd.h	Mon Nov 02 19:35:54 2009 -0800
    77.2 +++ b/tools/blktap2/drivers/tapdisk-vbd.h	Fri Mar 19 18:36:57 2010 -0700
    77.3 @@ -53,6 +53,7 @@
    77.4  
    77.5  typedef struct td_ring              td_ring_t;
    77.6  typedef struct td_vbd_request       td_vbd_request_t;
    77.7 +typedef struct td_vbd_driver_info   td_vbd_driver_info_t;
    77.8  typedef struct td_vbd_handle        td_vbd_t;
    77.9  typedef void (*td_vbd_cb_t)        (void *, blkif_response_t *);
   77.10  
   77.11 @@ -79,12 +80,20 @@ struct td_vbd_request {
   77.12  	struct list_head            next;
   77.13  };
   77.14  
   77.15 +struct td_vbd_driver_info {
   77.16 +	char                       *params;
   77.17 +	int                         type;
   77.18 +	struct list_head            next;
   77.19 +};
   77.20 +
   77.21  struct td_vbd_handle {
   77.22  	char                       *name;
   77.23  
   77.24  	td_uuid_t                   uuid;
   77.25  	int                         type;
   77.26  
   77.27 +	struct list_head            driver_stack;
   77.28 +
   77.29  	int                         storage;
   77.30  
   77.31  	uint8_t                     reopened;
   77.32 @@ -164,6 +173,7 @@ tapdisk_vbd_next_image(td_image_t *image
   77.33  
   77.34  int tapdisk_vbd_initialize(int, int, td_uuid_t);
   77.35  void tapdisk_vbd_set_callback(td_vbd_t *, td_vbd_cb_t, void *);
   77.36 +int tapdisk_vbd_parse_stack(td_vbd_t *vbd, const char *path);
   77.37  int tapdisk_vbd_open(td_vbd_t *, const char *, uint16_t,
   77.38  		     uint16_t, const char *, td_flag_t);
   77.39  int tapdisk_vbd_close(td_vbd_t *);
    78.1 --- a/tools/blktap2/drivers/tapdisk.h	Mon Nov 02 19:35:54 2009 -0800
    78.2 +++ b/tools/blktap2/drivers/tapdisk.h	Fri Mar 19 18:36:57 2010 -0700
    78.3 @@ -131,6 +131,10 @@ struct td_request {
    78.4  	uint64_t                     id;
    78.5  	int                          sidx;
    78.6  	void                        *private;
    78.7 +    
    78.8 +#ifdef MEMSHR
    78.9 +	uint64_t                     memshr_hnd;
   78.10 +#endif
   78.11  };
   78.12  
   78.13  /* 
    79.1 --- a/tools/blktap2/drivers/tapdisk2.c	Mon Nov 02 19:35:54 2009 -0800
    79.2 +++ b/tools/blktap2/drivers/tapdisk2.c	Fri Mar 19 18:36:57 2010 -0700
    79.3 @@ -34,6 +34,9 @@
    79.4  #include <sys/stat.h>
    79.5  #include <sys/types.h>
    79.6  #include <sys/ioctl.h>
    79.7 +#ifdef MEMSHR
    79.8 +#include <memshr.h>
    79.9 +#endif
   79.10  
   79.11  #include "tapdisk.h"
   79.12  #include "blktap2.h"
   79.13 @@ -264,6 +267,13 @@ tapdisk2_open_device(int type, const cha
   79.14  		return err;
   79.15  	}
   79.16  
   79.17 +	err = tapdisk_vbd_parse_stack(vbd, name);
   79.18 +	if (err) {
   79.19 +		CHILD_ERR(err, "vbd_parse_stack failed: %d\n", err);
   79.20 +		return err;
   79.21 +	}
   79.22 +
   79.23 +	/* TODO: clean this up */
   79.24  	err = tapdisk_vbd_open(vbd, path, type,
   79.25  			       TAPDISK_STORAGE_TYPE_DEFAULT,
   79.26  			       devname, 0);
   79.27 @@ -404,13 +414,22 @@ main(int argc, char *argv[])
   79.28  
   79.29  	params = NULL;
   79.30  
   79.31 -	while ((c = getopt(argc, argv, "n:h")) != -1) {
   79.32 +	while ((c = getopt(argc, argv, "n:s:h")) != -1) {
   79.33  		switch (c) {
   79.34  		case 'n':
   79.35  			params = optarg;
   79.36  			break;
   79.37  		case 'h':
   79.38  			usage(argv[0], 0);
   79.39 +			break;
   79.40 +		case 's':
   79.41 +#ifdef MEMSHR
   79.42 +			memshr_set_domid(atoi(optarg));
   79.43 +#else
   79.44 +			fprintf(stderr, "MEMSHR support not compiled in.\n");
   79.45 +			exit(EXIT_FAILURE);
   79.46 +#endif
   79.47 +			break;
   79.48  		default:
   79.49  			usage(argv[0], EINVAL);
   79.50  		}
    80.1 --- a/tools/blktap2/include/Makefile	Mon Nov 02 19:35:54 2009 -0800
    80.2 +++ b/tools/blktap2/include/Makefile	Fri Mar 19 18:36:57 2010 -0700
    80.3 @@ -1,4 +1,4 @@
    80.4 -XEN_ROOT := ../../../
    80.5 +XEN_ROOT := ../../..
    80.6  include $(XEN_ROOT)/tools/Rules.mk
    80.7  
    80.8  .PHONY: all
    81.1 --- a/tools/blktap2/include/blktaplib.h	Mon Nov 02 19:35:54 2009 -0800
    81.2 +++ b/tools/blktap2/include/blktaplib.h	Fri Mar 19 18:36:57 2010 -0700
    81.3 @@ -43,8 +43,9 @@
    81.4  #endif
    81.5  
    81.6  #define EPRINTF(_f, _a...) syslog(LOG_ERR, "tap-err:%s: " _f, __func__, ##_a)
    81.7 +#define PERROR(_f, _a...)  EPRINTF(_f ": %s", ##_a, strerror(errno))
    81.8  
    81.9 -#define BLK_RING_SIZE __RING_SIZE((blkif_sring_t *)0, XC_PAGE_SIZE)
   81.10 +#define BLK_RING_SIZE __CONST_RING_SIZE(blkif, XC_PAGE_SIZE)
   81.11  
   81.12  /* size of the extra VMA area to map in attached pages. */
   81.13  #define BLKTAP_VMA_PAGES BLK_RING_SIZE
    82.1 --- a/tools/blktap2/include/list.h	Mon Nov 02 19:35:54 2009 -0800
    82.2 +++ b/tools/blktap2/include/list.h	Fri Mar 19 18:36:57 2010 -0700
    82.3 @@ -87,6 +87,26 @@ static inline int list_is_last(const str
    82.4  	return list->next == head;
    82.5  }
    82.6  
    82.7 +static inline void __list_splice(struct list_head *list,
    82.8 +				 struct list_head *head)
    82.9 +{
   82.10 +	struct list_head *first = list->next;
   82.11 +	struct list_head *last = list->prev;
   82.12 +	struct list_head *at = head->next;
   82.13 +
   82.14 +	first->prev = head;
   82.15 +	head->next = first;
   82.16 +
   82.17 +	last->next = at;
   82.18 +	at->prev = last;
   82.19 +}
   82.20 +
   82.21 +static inline void list_splice(struct list_head *list, struct list_head *head)
   82.22 +{
   82.23 +	if (!list_empty(list))
   82.24 +		__list_splice(list, head);
   82.25 +}
   82.26 +
   82.27  #define list_entry(ptr, type, member)                                   \
   82.28          ((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
   82.29  
    83.1 --- a/tools/blktap2/lvm/Makefile	Mon Nov 02 19:35:54 2009 -0800
    83.2 +++ b/tools/blktap2/lvm/Makefile	Fri Mar 19 18:36:57 2010 -0700
    83.3 @@ -1,5 +1,5 @@
    83.4 -XEN_ROOT = ../../../
    83.5 -BLKTAP_ROOT := ../
    83.6 +XEN_ROOT = ../../..
    83.7 +BLKTAP_ROOT := ..
    83.8  include $(XEN_ROOT)/tools/Rules.mk
    83.9  
   83.10  ifeq ($(LVM_UTIL_TEST),y)
    84.1 --- a/tools/blktap2/vhd/Makefile	Mon Nov 02 19:35:54 2009 -0800
    84.2 +++ b/tools/blktap2/vhd/Makefile	Fri Mar 19 18:36:57 2010 -0700
    84.3 @@ -1,5 +1,5 @@
    84.4 -XEN_ROOT=../../../
    84.5 -BLKTAP_ROOT := ../
    84.6 +XEN_ROOT=../../..
    84.7 +BLKTAP_ROOT := ..
    84.8  include $(XEN_ROOT)/tools/Rules.mk
    84.9  
   84.10  SUBDIRS-y         :=
    85.1 --- a/tools/blktap2/vhd/lib/Makefile	Mon Nov 02 19:35:54 2009 -0800
    85.2 +++ b/tools/blktap2/vhd/lib/Makefile	Fri Mar 19 18:36:57 2010 -0700
    85.3 @@ -1,12 +1,12 @@
    85.4 -XEN_ROOT=../../../../
    85.5 -BLKTAP_ROOT := ../../
    85.6 +XEN_ROOT=../../../..
    85.7 +BLKTAP_ROOT := ../..
    85.8  include $(XEN_ROOT)/tools/Rules.mk
    85.9  
   85.10  LIBVHD-MAJOR     = 1.0
   85.11  LIBVHD-MINOR     = 0
   85.12  LIBVHD-SONAME    = libvhd.so.$(LIBVHD-MAJOR)
   85.13  
   85.14 -LVM-UTIL-OBJ    := $(BLKTAP_ROOT)lvm/lvm-util.o
   85.15 +LVM-UTIL-OBJ    := $(BLKTAP_ROOT)/lvm/lvm-util.o
   85.16  
   85.17  LIBVHD-BUILD    := libvhd.a
   85.18  
    86.1 --- a/tools/blktap2/vhd/lib/libvhd.c	Mon Nov 02 19:35:54 2009 -0800
    86.2 +++ b/tools/blktap2/vhd/lib/libvhd.c	Fri Mar 19 18:36:57 2010 -0700
    86.3 @@ -36,6 +36,7 @@
    86.4  #include <libgen.h>
    86.5  #include <iconv.h>
    86.6  #include <sys/mman.h>
    86.7 +#include <sys/stat.h>
    86.8  
    86.9  #include "libvhd.h"
   86.10  #include "relative-path.h"
    87.1 --- a/tools/blktap2/vhd/lib/vhd-util-scan.c	Mon Nov 02 19:35:54 2009 -0800
    87.2 +++ b/tools/blktap2/vhd/lib/vhd-util-scan.c	Fri Mar 19 18:36:57 2010 -0700
    87.3 @@ -34,6 +34,7 @@
    87.4  #include <unistd.h>
    87.5  #include <fnmatch.h>
    87.6  #include <libgen.h>	/* for basename() */
    87.7 +#include <sys/stat.h>
    87.8  
    87.9  #include "list.h"
   87.10  #include "libvhd.h"
    88.1 --- a/tools/console/client/main.c	Mon Nov 02 19:35:54 2009 -0800
    88.2 +++ b/tools/console/client/main.c	Fri Mar 19 18:36:57 2010 -0700
    88.3 @@ -287,7 +287,13 @@ int main(int argc, char **argv)
    88.4  			exit(EINVAL);
    88.5  		}
    88.6  	}
    88.7 -	
    88.8 +
    88.9 +	if (optind >= argc) {
   88.10 +		fprintf(stderr, "DOMID should be specified\n");
   88.11 +		fprintf(stderr, "Try `%s --help' for more information.\n",
   88.12 +			argv[0]);
   88.13 +		exit(EINVAL);
   88.14 +	}
   88.15  	domid = strtol(argv[optind], &end, 10);
   88.16  	if (end && *end) {
   88.17  		fprintf(stderr, "Invalid DOMID `%s'\n", argv[optind]);
    89.1 --- a/tools/examples/Makefile	Mon Nov 02 19:35:54 2009 -0800
    89.2 +++ b/tools/examples/Makefile	Fri Mar 19 18:36:57 2010 -0700
    89.3 @@ -1,4 +1,4 @@
    89.4 -XEN_ROOT = ../../
    89.5 +XEN_ROOT = ../..
    89.6  include $(XEN_ROOT)/tools/Rules.mk
    89.7  
    89.8  # Init scripts.
    90.1 --- a/tools/examples/xend-config.sxp	Mon Nov 02 19:35:54 2009 -0800
    90.2 +++ b/tools/examples/xend-config.sxp	Fri Mar 19 18:36:57 2010 -0700
    90.3 @@ -192,6 +192,16 @@
    90.4  # If enable-dom0-ballooning = no, dom0 will never balloon out.
    90.5  (enable-dom0-ballooning yes)
    90.6  
    90.7 +# 32-bit paravirtual domains can only consume physical
    90.8 +# memory below 168GB. On systems with memory beyond that address,
    90.9 +# they'll be confined to memory below 128GB.
   90.10 +# Using total_available_memory (in GB) to specify the amount of memory reserved
   90.11 +# in the memory pool exclusively for 32-bit paravirtual domains.
   90.12 +# Additionally you should use dom0_mem = <-Value> as a parameter in 
   90.13 +# xen kernel to reserve the memory for 32-bit paravirtual domains, default 
   90.14 +# is "0" (0GB).  
   90.15 +(total_available_memory 0) 
   90.16 +
   90.17  # In SMP system, dom0 will use dom0-cpus # of CPUS
   90.18  # If dom0-cpus = 0, dom0 will take all cpus available
   90.19  (dom0-cpus 0)
    91.1 --- a/tools/examples/xmexample.hvm	Mon Nov 02 19:35:54 2009 -0800
    91.2 +++ b/tools/examples/xmexample.hvm	Fri Mar 19 18:36:57 2010 -0700
    91.3 @@ -25,6 +25,10 @@ memory = 128
    91.4  # Should be at least 2KB per MB of domain memory, plus a few MB per vcpu.
    91.5  # shadow_memory = 8
    91.6  
    91.7 +# Whether to transparently share this domain's memory with other domains.
    91.8 +# default = 0
    91.9 +# memory_sharing = 0
   91.10 +
   91.11  # A name for your domain. All domains must have different names.
   91.12  name = "ExampleHVMDomain"
   91.13  
   91.14 @@ -178,11 +182,16 @@ stdvga=0
   91.15  serial='pty'
   91.16  
   91.17  #----------------------------------------------------------------------------
   91.18 -#   tsc_native : TSC mode (0=emulate TSC, 1=native TSC)
   91.19 +#   tsc_mode : TSC mode (0=default, 1=native TSC, 2=never emulate, 3=pvrdtscp)
   91.20  #   emulate TSC provides synced TSC for all vcpus, but lose perfomrance.
   91.21  #   native TSC leverages hardware's TSC(no perf loss), but vcpu's TSC may lose
   91.22 -#   sync due to hardware's unreliable/unsynced TSC between CPUs.
   91.23 -tsc_native=1
   91.24 +#    sync due to hardware's unreliable/unsynced TSC between CPUs.
   91.25 +#   default intelligently uses native TSC on machines where it is safe, but
   91.26 +#    switches to emulated if necessary after save/restore/migration
   91.27 +#   pvrdtscp is for intelligent apps that use special Xen-only paravirtualized
   91.28 +#    cpuid instructions to obtain offset/scaling/migration info and maximize
   91.29 +#    performance within pools of machines that support the rdtscp instruction
   91.30 +tsc_mode=0
   91.31  
   91.32  #-----------------------------------------------------------------------------
   91.33  #   Qemu Monitor, default is disable
   91.34 @@ -335,6 +344,12 @@ tsc_native=1
   91.35  # 
   91.36  #pci_power_mgmt=0
   91.37  
   91.38 +#   Enable graphics passthrough:
   91.39 +#
   91.40 +#   If it's set, and specify grapchis device BDF in pci passthrough option,
   91.41 +# like pci=['xx:xx.x'], it enables graphics passthrough, default=0 (disabled)
   91.42 +#gfx_passthru=0
   91.43 +
   91.44  #-----------------------------------------------------------------------------
   91.45  #   Configure PVSCSI devices:
   91.46  #
    92.1 --- a/tools/firmware/hvmloader/acpi/Makefile	Mon Nov 02 19:35:54 2009 -0800
    92.2 +++ b/tools/firmware/hvmloader/acpi/Makefile	Fri Mar 19 18:36:57 2010 -0700
    92.3 @@ -18,8 +18,7 @@
    92.4  XEN_ROOT = ../../../..
    92.5  include $(XEN_ROOT)/tools/firmware/Rules.mk
    92.6  
    92.7 -C_SRC = build.c dsdt.c static_tables.c
    92.8 -H_SRC = $(wildcard *.h)
    92.9 +C_SRC = build.c dsdt_anycpu.c dsdt_15cpu.c static_tables.c
   92.10  OBJS  = $(patsubst %.c,%.o,$(C_SRC))
   92.11  
   92.12  CFLAGS += -I. -I.. $(CFLAGS_include)
   92.13 @@ -27,18 +26,20 @@ CFLAGS += -I. -I.. $(CFLAGS_include)
   92.14  vpath iasl $(PATH)
   92.15  all: acpi.a
   92.16  
   92.17 -ssdt_pm.h ssdt_tpm.h: %.h: %.asl
   92.18 -	$(MAKE) iasl
   92.19 -	iasl -tc $<
   92.20 -	mv $*.hex $@
   92.21 -	rm -f *.aml
   92.22 +ssdt_pm.h ssdt_tpm.h: %.h: %.asl iasl
   92.23 +	iasl -p $* -tc $<
   92.24 +	sed -e 's/AmlCode/$*/g' $*.hex >$@
   92.25 +	rm -f $*.hex $*.aml
   92.26  
   92.27 -dsdt.c: dsdt.asl
   92.28 -	$(MAKE) iasl
   92.29 -	iasl -tc dsdt.asl
   92.30 -	mv dsdt.hex dsdt.c
   92.31 -	echo "int DsdtLen=sizeof(AmlCode);" >> dsdt.c
   92.32 -	rm -f *.aml
   92.33 +# NB. awk invocation is a portable alternative to 'head -n -1'
   92.34 +dsdt_15cpu.c dsdt_anycpu.c: %.c: dsdt.asl mk_dsdt.c iasl
   92.35 +	$(HOSTCC) $(HOSTCFLAGS) $(CFLAGS_include) -o mk_$* mk_dsdt.c
   92.36 +	awk 'NR > 1 {print s} {s=$$0}' $< >$*.asl
   92.37 +	./mk_$* >>$*.asl
   92.38 +	iasl -p $* -tc $*.asl
   92.39 +	sed -e 's/AmlCode/$*/g' $*.hex >$@
   92.40 +	echo "int $*_len=sizeof($*);" >>$@
   92.41 +	rm -f $*.hex $*.aml $*.asl mk_$*
   92.42  
   92.43  iasl:
   92.44  	@echo
   92.45 @@ -48,14 +49,14 @@ iasl:
   92.46  	@echo 
   92.47  	@exit 1
   92.48  
   92.49 +build.o: ssdt_pm.h ssdt_tpm.h
   92.50 +
   92.51  acpi.a: $(OBJS)
   92.52  	$(AR) rc $@ $(OBJS)
   92.53  
   92.54 -%.o: %.c $(H_SRC)
   92.55 -	$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
   92.56 -
   92.57  clean:
   92.58  	rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz $(DEPS)
   92.59 +	rm -rf ssdt_*.h dsdt*.c *~ *.aml *.hex mk_dsdt mk_dsdt15 dsdt_*cpu.asl
   92.60  
   92.61  install: all
   92.62  
    93.1 --- a/tools/firmware/hvmloader/acpi/acpi2_0.h	Mon Nov 02 19:35:54 2009 -0800
    93.2 +++ b/tools/firmware/hvmloader/acpi/acpi2_0.h	Fri Mar 19 18:36:57 2010 -0700
    93.3 @@ -382,6 +382,7 @@ struct acpi_20_madt_intsrcovr {
    93.4  #pragma pack ()
    93.5  
    93.6  void acpi_build_tables(void);
    93.7 +extern uint32_t madt_csum_addr, madt_lapic0_addr;
    93.8  
    93.9  #endif /* _ACPI_2_0_H_ */
   93.10  
    94.1 --- a/tools/firmware/hvmloader/acpi/build.c	Mon Nov 02 19:35:54 2009 -0800
    94.2 +++ b/tools/firmware/hvmloader/acpi/build.c	Fri Mar 19 18:36:57 2010 -0700
    94.3 @@ -25,13 +25,28 @@
    94.4  #define align16(sz)        (((sz) + 15) & ~15)
    94.5  #define fixed_strcpy(d, s) strncpy((d), (s), sizeof(d))
    94.6  
    94.7 +/* MADT parameters for filling in bios_info structure for DSDT. */
    94.8 +uint32_t madt_csum_addr, madt_lapic0_addr;
    94.9 +
   94.10  extern struct acpi_20_rsdp Rsdp;
   94.11  extern struct acpi_20_rsdt Rsdt;
   94.12  extern struct acpi_20_xsdt Xsdt;
   94.13  extern struct acpi_20_fadt Fadt;
   94.14  extern struct acpi_20_facs Facs;
   94.15 -extern unsigned char AmlCode[];
   94.16 -extern int DsdtLen;
   94.17 +
   94.18 +/*
   94.19 + * Alternative DSDTs we get linked against. A cover-all DSDT for up to the
   94.20 + * implementation-defined maximum number of VCPUs, and an alternative for use
   94.21 + * when a guest can only have up to 15 VCPUs.
   94.22 + * 
   94.23 + * The latter is required for Windows 2000, which experiences a BSOD of
   94.24 + * KMODE_EXCEPTION_NOT_HANDLED if it sees more than 15 processor objects.
   94.25 + */
   94.26 +extern unsigned char dsdt_anycpu[], dsdt_15cpu;
   94.27 +extern int dsdt_anycpu_len, dsdt_15cpu_len;
   94.28 +
   94.29 +/* Number of processor objects in the chosen DSDT. */
   94.30 +static unsigned int nr_processor_objects;
   94.31  
   94.32  static void set_checksum(
   94.33      void *table, uint32_t checksum_offset, uint32_t length)
   94.34 @@ -111,7 +126,8 @@ static int construct_madt(struct acpi_20
   94.35      offset += sizeof(*io_apic);
   94.36  
   94.37      lapic = (struct acpi_20_madt_lapic *)(io_apic + 1);
   94.38 -    for ( i = 0; i < hvm_info->nr_vcpus; i++ )
   94.39 +    madt_lapic0_addr = (uint32_t)lapic;
   94.40 +    for ( i = 0; i < nr_processor_objects; i++ )
   94.41      {
   94.42          memset(lapic, 0, sizeof(*lapic));
   94.43          lapic->type    = ACPI_PROCESSOR_LOCAL_APIC;
   94.44 @@ -119,13 +135,16 @@ static int construct_madt(struct acpi_20
   94.45          /* Processor ID must match processor-object IDs in the DSDT. */
   94.46          lapic->acpi_processor_id = i;
   94.47          lapic->apic_id = LAPIC_ID(i);
   94.48 -        lapic->flags   = ACPI_LOCAL_APIC_ENABLED;
   94.49 +        lapic->flags = ((i < hvm_info->nr_vcpus) &&
   94.50 +                        test_bit(i, hvm_info->vcpu_online)
   94.51 +                        ? ACPI_LOCAL_APIC_ENABLED : 0);
   94.52          offset += sizeof(*lapic);
   94.53          lapic++;
   94.54      }
   94.55  
   94.56      madt->header.length = offset;
   94.57      set_checksum(madt, offsetof(struct acpi_header, checksum), offset);
   94.58 +    madt_csum_addr = (uint32_t)&madt->header.checksum;
   94.59  
   94.60      return align16(offset);
   94.61  }
   94.62 @@ -181,8 +200,8 @@ static int construct_secondary_tables(ui
   94.63      if ( battery_port_exists() ) 
   94.64      {
   94.65          table_ptrs[nr_tables++] = (unsigned long)&buf[offset];
   94.66 -        memcpy(&buf[offset], AmlCode_PM, sizeof(AmlCode_PM));
   94.67 -        offset += align16(sizeof(AmlCode_PM));
   94.68 +        memcpy(&buf[offset], ssdt_pm, sizeof(ssdt_pm));
   94.69 +        offset += align16(sizeof(ssdt_pm));
   94.70      }
   94.71  
   94.72      /* TPM TCPA and SSDT. */
   94.73 @@ -191,9 +210,9 @@ static int construct_secondary_tables(ui
   94.74           (tis_hdr[1] == tis_signature[1]) &&
   94.75           (tis_hdr[2] == tis_signature[2]) )
   94.76      {
   94.77 -        memcpy(&buf[offset], AmlCode_TPM, sizeof(AmlCode_TPM));
   94.78 +        memcpy(&buf[offset], ssdt_tpm, sizeof(ssdt_tpm));
   94.79          table_ptrs[nr_tables++] = (unsigned long)&buf[offset];
   94.80 -        offset += align16(sizeof(AmlCode_TPM));
   94.81 +        offset += align16(sizeof(ssdt_tpm));
   94.82  
   94.83          tcpa = (struct acpi_20_tcpa *)&buf[offset];
   94.84          memset(tcpa, 0, sizeof(*tcpa));
   94.85 @@ -244,8 +263,18 @@ static void __acpi_build_tables(uint8_t 
   94.86      offset += align16(sizeof(struct acpi_20_facs));
   94.87  
   94.88      dsdt = (unsigned char *)&buf[offset];
   94.89 -    memcpy(dsdt, &AmlCode, DsdtLen);
   94.90 -    offset += align16(DsdtLen);
   94.91 +    if ( hvm_info->nr_vcpus <= 15 )
   94.92 +    {
   94.93 +        memcpy(dsdt, &dsdt_15cpu, dsdt_15cpu_len);
   94.94 +        offset += align16(dsdt_15cpu_len);
   94.95 +        nr_processor_objects = 15;
   94.96 +    }
   94.97 +    else
   94.98 +    {
   94.99 +        memcpy(dsdt, &dsdt_anycpu, dsdt_anycpu_len);
  94.100 +        offset += align16(dsdt_anycpu_len);
  94.101 +        nr_processor_objects = HVM_MAX_VCPUS;
  94.102 +    }
  94.103  
  94.104      /*
  94.105       * N.B. ACPI 1.0 operating systems may not handle FADT with revision 2
    95.1 --- a/tools/firmware/hvmloader/acpi/dsdt.asl	Mon Nov 02 19:35:54 2009 -0800
    95.2 +++ b/tools/firmware/hvmloader/acpi/dsdt.asl	Fri Mar 19 18:36:57 2010 -0700
    95.3 @@ -27,31 +27,6 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 
    95.4      Name (\APCL, 0x00010000)
    95.5      Name (\PUID, 0x00)
    95.6  
    95.7 -    Scope (\_PR)
    95.8 -    {
    95.9 -        Processor (PR00, 0x00, 0x0000, 0x00) {}
   95.10 -        Processor (PR01, 0x01, 0x0000, 0x00) {}
   95.11 -        Processor (PR02, 0x02, 0x0000, 0x00) {}
   95.12 -        Processor (PR03, 0x03, 0x0000, 0x00) {}
   95.13 -        Processor (PR04, 0x04, 0x0000, 0x00) {}
   95.14 -        Processor (PR05, 0x05, 0x0000, 0x00) {}
   95.15 -        Processor (PR06, 0x06, 0x0000, 0x00) {}
   95.16 -        Processor (PR07, 0x07, 0x0000, 0x00) {}
   95.17 -        Processor (PR08, 0x08, 0x0000, 0x00) {}
   95.18 -        Processor (PR09, 0x09, 0x0000, 0x00) {}
   95.19 -        Processor (PR0A, 0x0a, 0x0000, 0x00) {}
   95.20 -        Processor (PR0B, 0x0b, 0x0000, 0x00) {}
   95.21 -        Processor (PR0C, 0x0c, 0x0000, 0x00) {}
   95.22 -        Processor (PR0D, 0x0d, 0x0000, 0x00) {}
   95.23 -        Processor (PR0E, 0x0e, 0x0000, 0x00) {}
   95.24 -        /* No more than 15 Processor objects, as otherwise Windows 2000
   95.25 -         * experiences a BSOD of KMODE_EXCEPTION_NOT_HANDLED. If we require
   95.26 -         * more in some configurations then we should move \_PR scope into a
   95.27 -         * SSDT, statically compiled with a range of different numbers of
   95.28 -         * processors. We can then link the appropriate one into the RSDT/XSDT
   95.29 -         * at HVM guest boot time. */
   95.30 -    }
   95.31 -
   95.32      /*
   95.33       * S3 (suspend-to-ram), S4 (suspend-to-disc) and S5 (power-off) type codes:
   95.34       * must match piix4 emulation.
   95.35 @@ -87,14 +62,17 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 
   95.36      Scope (\_SB)
   95.37      {
   95.38         /* BIOS_INFO_PHYSICAL_ADDRESS == 0xEA000 */
   95.39 -       OperationRegion(BIOS, SystemMemory, 0xEA000, 16)
   95.40 +       OperationRegion(BIOS, SystemMemory, 0xEA000, 24)
   95.41         Field(BIOS, ByteAcc, NoLock, Preserve) {
   95.42             UAR1, 1,
   95.43             UAR2, 1,
   95.44 +           LTP1, 1,
   95.45             HPET, 1,
   95.46             Offset(4),
   95.47             PMIN, 32,
   95.48 -           PLEN, 32
   95.49 +           PLEN, 32,
   95.50 +           MSUA, 32, /* MADT checksum address */
   95.51 +           MAPA, 32  /* MADT LAPIC0 address */
   95.52         }
   95.53  
   95.54          /* Fix HCT test for 0x400 pci memory:
   95.55 @@ -136,6 +114,25 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 
   95.56                 })
   95.57             }
   95.58  
   95.59 +           /* Make cirrues VGA S3 suspend/resume work in Windows XP/2003 */
   95.60 +           Device (VGA)
   95.61 +           {
   95.62 +               Name (_ADR, 0x00020000)
   95.63 +
   95.64 +               Method (_S1D, 0, NotSerialized)
   95.65 +               {
   95.66 +                   Return (0x00)
   95.67 +               }
   95.68 +               Method (_S2D, 0, NotSerialized)
   95.69 +               {
   95.70 +                   Return (0x00)
   95.71 +               }
   95.72 +               Method (_S3D, 0, NotSerialized)
   95.73 +               {
   95.74 +                   Return (0x00)
   95.75 +               }
   95.76 +           }
   95.77 +
   95.78             Method (_CRS, 0, NotSerialized)
   95.79             {
   95.80                 Name (PRT0, ResourceTemplate ()
   95.81 @@ -199,157 +196,6 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 
   95.82                  Return (PRT0)
   95.83              }
   95.84  
   95.85 -            Name(BUFA, ResourceTemplate() {
   95.86 -                IRQ(Level, ActiveLow, Shared) { 5, 10, 11 }
   95.87 -            })
   95.88 -
   95.89 -            Name(BUFB, Buffer() {
   95.90 -                0x23, 0x00, 0x00, 0x18, /* IRQ descriptor */
   95.91 -                0x79, 0                 /* End tag, null checksum */
   95.92 -            })
   95.93 -
   95.94 -            CreateWordField(BUFB, 0x01, IRQV)
   95.95 -
   95.96 -            Device(LNKA) {
   95.97 -                Name(_HID, EISAID("PNP0C0F")) /* PCI interrupt link */
   95.98 -                Name(_UID, 1)
   95.99 -
  95.100 -                Method(_STA, 0) {
  95.101 -                    And(PIRA, 0x80, Local0)
  95.102 -                    If(LEqual(Local0, 0x80)) {
  95.103 -                        Return(0x09)   
  95.104 -                    } Else {
  95.105 -                        Return(0x0B) 
  95.106 -                    }
  95.107 -                }
  95.108 -
  95.109 -                Method(_PRS) {
  95.110 -                    Return(BUFA)
  95.111 -                }
  95.112 -
  95.113 -                Method(_DIS) {
  95.114 -                    Or(PIRA, 0x80, PIRA)
  95.115 -                }
  95.116 -
  95.117 -                Method(_CRS) {
  95.118 -                    And(PIRA, 0x0f, Local0)
  95.119 -                    ShiftLeft(0x1, Local0, IRQV)
  95.120 -                    Return(BUFB)
  95.121 -                }
  95.122 -
  95.123 -                Method(_SRS, 1) {
  95.124 -                    CreateWordField(ARG0, 0x01, IRQ1)
  95.125 -                    FindSetRightBit(IRQ1, Local0)
  95.126 -                    Decrement(Local0)
  95.127 -                    Store(Local0, PIRA)
  95.128 -                }
  95.129 -            }
  95.130 -
  95.131 -            Device(LNKB) {
  95.132 -                Name(_HID, EISAID("PNP0C0F")) /* PCI interrupt link */
  95.133 -                Name(_UID, 2)
  95.134 -
  95.135 -                Method(_STA, 0) {
  95.136 -                    And(PIRB, 0x80, Local0)
  95.137 -                    If(LEqual(Local0, 0x80)) {
  95.138 -                        Return(0x09) 
  95.139 -                    } Else {
  95.140 -                        Return(0x0B) 
  95.141 -                    }
  95.142 -                }
  95.143 -
  95.144 -                Method(_PRS) {
  95.145 -                    Return(BUFA) 
  95.146 -                }
  95.147 -
  95.148 -                Method(_DIS) {
  95.149 -                    Or(PIRB, 0x80, PIRB)
  95.150 -                }
  95.151 -
  95.152 -                Method(_CRS) {
  95.153 -                    And(PIRB, 0x0f, Local0) 
  95.154 -                    ShiftLeft(0x1, Local0, IRQV) 
  95.155 -                    Return(BUFB) 
  95.156 -                }
  95.157 -
  95.158 -                Method(_SRS, 1) {
  95.159 -                    CreateWordField(ARG0, 0x01, IRQ1) 
  95.160 -                    FindSetRightBit(IRQ1, Local0) 
  95.161 -                    Decrement(Local0)
  95.162 -                    Store(Local0, PIRB) 
  95.163 -                }
  95.164 -            }
  95.165 -
  95.166 -            Device(LNKC) {
  95.167 -                Name(_HID, EISAID("PNP0C0F")) /* PCI interrupt link */
  95.168 -                Name(_UID, 3)
  95.169 -
  95.170 -                Method(_STA, 0) {
  95.171 -                    And(PIRC, 0x80, Local0)
  95.172 -                    If(LEqual(Local0, 0x80)) {
  95.173 -                        Return(0x09) 
  95.174 -                    } Else {
  95.175 -                        Return(0x0B)
  95.176 -                    }
  95.177 -                }
  95.178 -
  95.179 -                Method(_PRS) { 
  95.180 -                    Return(BUFA)
  95.181 -                }
  95.182 -
  95.183 -                Method(_DIS) {
  95.184 -                    Or(PIRC, 0x80, PIRC)
  95.185 -                }
  95.186 -
  95.187 -                Method(_CRS) {
  95.188 -                    And(PIRC, 0x0f, Local0) 
  95.189 -                    ShiftLeft(0x1, Local0, IRQV) 
  95.190 -                    Return(BUFB) 
  95.191 -                }
  95.192 -
  95.193 -                Method(_SRS, 1) {
  95.194 -                    CreateWordField(ARG0, 0x01, IRQ1) 
  95.195 -                    FindSetRightBit(IRQ1, Local0) 
  95.196 -                    Decrement(Local0) 
  95.197 -                    Store(Local0, PIRC)
  95.198 -                }
  95.199 -            }
  95.200 -
  95.201 -            Device(LNKD) {
  95.202 -                Name(_HID, EISAID("PNP0C0F")) /* PCI interrupt link */
  95.203 -                Name(_UID, 4)
  95.204 -
  95.205 -                Method(_STA, 0) {
  95.206 -                    And(PIRD, 0x80, Local0)
  95.207 -                    If(LEqual(Local0, 0x80)) {
  95.208 -                        Return(0x09) 
  95.209 -                    } Else {
  95.210 -                        Return(0x0B) 
  95.211 -                    }
  95.212 -                }
  95.213 -
  95.214 -                Method(_PRS) { 
  95.215 -                    Return(BUFA) 
  95.216 -                }
  95.217 -
  95.218 -                Method(_DIS) {
  95.219 -                    Or(PIRD, 0x80, PIRD)
  95.220 -                }
  95.221 -
  95.222 -                Method(_CRS) {
  95.223 -                    And(PIRD, 0x0f, Local0) 
  95.224 -                    ShiftLeft(0x1, Local0, IRQV) 
  95.225 -                    Return(BUFB) 
  95.226 -                }
  95.227 -
  95.228 -                Method(_SRS, 1) {
  95.229 -                    CreateWordField(ARG0, 0x01, IRQ1) 
  95.230 -                    FindSetRightBit(IRQ1, Local0) 
  95.231 -                    Decrement(Local0) 
  95.232 -                    Store(Local0, PIRD) 
  95.233 -                }
  95.234 -            }
  95.235 -
  95.236              Device(HPET) {
  95.237                  Name(_HID,  EISAID("PNP0103"))
  95.238                  Name(_UID, 0)
  95.239 @@ -373,389 +219,6 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 
  95.240                  })
  95.241              }
  95.242  
  95.243 -            Method(_PRT,0) {
  95.244 -                If(PICD) {
  95.245 -                    Return(PRTA)
  95.246 -                }  
  95.247 -                Return (PRTP)  
  95.248 -            }
  95.249 -
  95.250 -            Name(PRTP, Package() {
  95.251 -                /* Device 1, INTA - INTD */
  95.252 -                Package(){0x0001ffff, 0, \_SB.PCI0.LNKB, 0},
  95.253 -                Package(){0x0001ffff, 1, \_SB.PCI0.LNKC, 0},
  95.254 -                Package(){0x0001ffff, 2, \_SB.PCI0.LNKD, 0},
  95.255 -                Package(){0x0001ffff, 3, \_SB.PCI0.LNKA, 0},
  95.256 -                        
  95.257 -                /* Device 2, INTA - INTD */
  95.258 -                Package(){0x0002ffff, 0, \_SB.PCI0.LNKC, 0},
  95.259 -                Package(){0x0002ffff, 1, \_SB.PCI0.LNKD, 0},
  95.260 -                Package(){0x0002ffff, 2, \_SB.PCI0.LNKA, 0},
  95.261 -                Package(){0x0002ffff, 3, \_SB.PCI0.LNKB, 0},
  95.262 -                        
  95.263 -                /* Device 3, INTA - INTD */
  95.264 -                Package(){0x0003ffff, 0, \_SB.PCI0.LNKD, 0},
  95.265 -                Package(){0x0003ffff, 1, \_SB.PCI0.LNKA, 0},
  95.266 -                Package(){0x0003ffff, 2, \_SB.PCI0.LNKB, 0},
  95.267 -                Package(){0x0003ffff, 3, \_SB.PCI0.LNKC, 0},
  95.268 -                        
  95.269 -                /* Device 4, INTA - INTD */
  95.270 -                Package(){0x0004ffff, 0, \_SB.PCI0.LNKA, 0},
  95.271 -                Package(){0x0004ffff, 1, \_SB.PCI0.LNKB, 0},
  95.272 -                Package(){0x0004ffff, 2, \_SB.PCI0.LNKC, 0},
  95.273 -                Package(){0x0004ffff, 3, \_SB.PCI0.LNKD, 0},
  95.274 -                        
  95.275 -                /* Device 5, INTA - INTD */
  95.276 -                Package(){0x0005ffff, 0, \_SB.PCI0.LNKB, 0},
  95.277 -                Package(){0x0005ffff, 1, \_SB.PCI0.LNKC, 0},
  95.278 -                Package(){0x0005ffff, 2, \_SB.PCI0.LNKD, 0},
  95.279 -                Package(){0x0005ffff, 3, \_SB.PCI0.LNKA, 0},
  95.280 -                        
  95.281 -                /* Device 6, INTA - INTD */
  95.282 -                Package(){0x0006ffff, 0, \_SB.PCI0.LNKC, 0},
  95.283 -                Package(){0x0006ffff, 1, \_SB.PCI0.LNKD, 0},
  95.284 -                Package(){0x0006ffff, 2, \_SB.PCI0.LNKA, 0},
  95.285 -                Package(){0x0006ffff, 3, \_SB.PCI0.LNKB, 0},
  95.286 -                        
  95.287 -                /* Device 7, INTA - INTD */
  95.288 -                Package(){0x0007ffff, 0, \_SB.PCI0.LNKD, 0},
  95.289 -                Package(){0x0007ffff, 1, \_SB.PCI0.LNKA, 0},
  95.290 -                Package(){0x0007ffff, 2, \_SB.PCI0.LNKB, 0},
  95.291 -                Package(){0x0007ffff, 3, \_SB.PCI0.LNKC, 0},
  95.292 -                        
  95.293 -                /* Device 8, INTA - INTD */
  95.294 -                Package(){0x0008ffff, 0, \_SB.PCI0.LNKA, 0},
  95.295 -                Package(){0x0008ffff, 1, \_SB.PCI0.LNKB, 0},
  95.296 -                Package(){0x0008ffff, 2, \_SB.PCI0.LNKC, 0},
  95.297 -                Package(){0x0008ffff, 3, \_SB.PCI0.LNKD, 0},
  95.298 -                        
  95.299 -                /* Device 9, INTA - INTD */
  95.300 -                Package(){0x0009ffff, 0, \_SB.PCI0.LNKB, 0},
  95.301 -                Package(){0x0009ffff, 1, \_SB.PCI0.LNKC, 0},
  95.302 -                Package(){0x0009ffff, 2, \_SB.PCI0.LNKD, 0},
  95.303 -                Package(){0x0009ffff, 3, \_SB.PCI0.LNKA, 0},
  95.304 -                        
  95.305 -                /* Device 10, INTA - INTD */
  95.306 -                Package(){0x000affff, 0, \_SB.PCI0.LNKC, 0},
  95.307 -                Package(){0x000affff, 1, \_SB.PCI0.LNKD, 0},
  95.308 -                Package(){0x000affff, 2, \_SB.PCI0.LNKA, 0},
  95.309 -                Package(){0x000affff, 3, \_SB.PCI0.LNKB, 0},
  95.310 -                        
  95.311 -                /* Device 11, INTA - INTD */
  95.312 -                Package(){0x000bffff, 0, \_SB.PCI0.LNKD, 0},
  95.313 -                Package(){0x000bffff, 1, \_SB.PCI0.LNKA, 0},
  95.314 -                Package(){0x000bffff, 2, \_SB.PCI0.LNKB, 0},
  95.315 -                Package(){0x000bffff, 3, \_SB.PCI0.LNKC, 0},
  95.316 -                        
  95.317 -                /* Device 12, INTA - INTD */
  95.318 -                Package(){0x000cffff, 0, \_SB.PCI0.LNKA, 0},
  95.319 -                Package(){0x000cffff, 1, \_SB.PCI0.LNKB, 0},
  95.320 -                Package(){0x000cffff, 2, \_SB.PCI0.LNKC, 0},
  95.321 -                Package(){0x000cffff, 3, \_SB.PCI0.LNKD, 0},
  95.322 -                        
  95.323 -                /* Device 13, INTA - INTD */
  95.324 -                Package(){0x000dffff, 0, \_SB.PCI0.LNKB, 0},
  95.325 -                Package(){0x000dffff, 1, \_SB.PCI0.LNKC, 0},
  95.326 -                Package(){0x000dffff, 2, \_SB.PCI0.LNKD, 0},
  95.327 -                Package(){0x000dffff, 3, \_SB.PCI0.LNKA, 0},
  95.328 -                        
  95.329 -                /* Device 14, INTA - INTD */
  95.330 -                Package(){0x000effff, 0, \_SB.PCI0.LNKC, 0},
  95.331 -                Package(){0x000effff, 1, \_SB.PCI0.LNKD, 0},
  95.332 -                Package(){0x000effff, 2, \_SB.PCI0.LNKA, 0},
  95.333 -                Package(){0x000effff, 3, \_SB.PCI0.LNKB, 0},
  95.334 -                        
  95.335 -                /* Device 15, INTA - INTD */
  95.336 -                Package(){0x000fffff, 0, \_SB.PCI0.LNKD, 0},
  95.337 -                Package(){0x000fffff, 1, \_SB.PCI0.LNKA, 0},
  95.338 -                Package(){0x000fffff, 2, \_SB.PCI0.LNKB, 0},
  95.339 -                Package(){0x000fffff, 3, \_SB.PCI0.LNKC, 0},
  95.340 -
  95.341 -                /* Device 16, INTA - INTD */
  95.342 -                Package(){0x0010ffff, 0, \_SB.PCI0.LNKA, 0},
  95.343 -                Package(){0x0010ffff, 1, \_SB.PCI0.LNKB, 0},
  95.344 -                Package(){0x0010ffff, 2, \_SB.PCI0.LNKC, 0},
  95.345 -                Package(){0x0010ffff, 3, \_SB.PCI0.LNKD, 0},
  95.346 -
  95.347 -                /* Device 17, INTA - INTD */
  95.348 -                Package(){0x0011ffff, 0, \_SB.PCI0.LNKB, 0},
  95.349 -                Package(){0x0011ffff, 1, \_SB.PCI0.LNKC, 0},
  95.350 -                Package(){0x0011ffff, 2, \_SB.PCI0.LNKD, 0},
  95.351 -                Package(){0x0011ffff, 3, \_SB.PCI0.LNKA, 0},
  95.352 -
  95.353 -                /* Device 18, INTA - INTD */
  95.354 -                Package(){0x0012ffff, 0, \_SB.PCI0.LNKC, 0},
  95.355 -                Package(){0x0012ffff, 1, \_SB.PCI0.LNKD, 0},
  95.356 -                Package(){0x0012ffff, 2, \_SB.PCI0.LNKA, 0},
  95.357 -                Package(){0x0012ffff, 3, \_SB.PCI0.LNKB, 0},
  95.358 -
  95.359 -                /* Device 19, INTA - INTD */
  95.360 -                Package(){0x0013ffff, 0, \_SB.PCI0.LNKD, 0},
  95.361 -                Package(){0x0013ffff, 1, \_SB.PCI0.LNKA, 0},
  95.362 -                Package(){0x0013ffff, 2, \_SB.PCI0.LNKB, 0},
  95.363 -                Package(){0x0013ffff, 3, \_SB.PCI0.LNKC, 0},
  95.364 -
  95.365 -                /* Device 20, INTA - INTD */
  95.366 -                Package(){0x0014ffff, 0, \_SB.PCI0.LNKA, 0},
  95.367 -                Package(){0x0014ffff, 1, \_SB.PCI0.LNKB, 0},
  95.368 -                Package(){0x0014ffff, 2, \_SB.PCI0.LNKC, 0},
  95.369 -                Package(){0x0014ffff, 3, \_SB.PCI0.LNKD, 0},
  95.370 -
  95.371 -                /* Device 21, INTA - INTD */
  95.372 -                Package(){0x0015ffff, 0, \_SB.PCI0.LNKB, 0},
  95.373 -                Package(){0x0015ffff, 1, \_SB.PCI0.LNKC, 0},
  95.374 -                Package(){0x0015ffff, 2, \_SB.PCI0.LNKD, 0},
  95.375 -                Package(){0x0015ffff, 3, \_SB.PCI0.LNKA, 0},
  95.376 -
  95.377 -                /* Device 22, INTA - INTD */
  95.378 -                Package(){0x0016ffff, 0, \_SB.PCI0.LNKC, 0},
  95.379 -                Package(){0x0016ffff, 1, \_SB.PCI0.LNKD, 0},
  95.380 -                Package(){0x0016ffff, 2, \_SB.PCI0.LNKA, 0},
  95.381 -                Package(){0x0016ffff, 3, \_SB.PCI0.LNKB, 0},
  95.382 -
  95.383 -                /* Device 23, INTA - INTD */
  95.384 -                Package(){0x0017ffff, 0, \_SB.PCI0.LNKD, 0},
  95.385 -                Package(){0x0017ffff, 1, \_SB.PCI0.LNKA, 0},
  95.386 -                Package(){0x0017ffff, 2, \_SB.PCI0.LNKB, 0},
  95.387 -                Package(){0x0017ffff, 3, \_SB.PCI0.LNKC, 0},
  95.388 -
  95.389 -                /* Device 24, INTA - INTD */
  95.390 -                Package(){0x0018ffff, 0, \_SB.PCI0.LNKA, 0},
  95.391 -                Package(){0x0018ffff, 1, \_SB.PCI0.LNKB, 0},
  95.392 -                Package(){0x0018ffff, 2, \_SB.PCI0.LNKC, 0},
  95.393 -                Package(){0x0018ffff, 3, \_SB.PCI0.LNKD, 0},
  95.394 -
  95.395 -                /* Device 25, INTA - INTD */
  95.396 -                Package(){0x0019ffff, 0, \_SB.PCI0.LNKB, 0},
  95.397 -                Package(){0x0019ffff, 1, \_SB.PCI0.LNKC, 0},
  95.398 -                Package(){0x0019ffff, 2, \_SB.PCI0.LNKD, 0},
  95.399 -                Package(){0x0019ffff, 3, \_SB.PCI0.LNKA, 0},
  95.400 -
  95.401 -                /* Device 26, INTA - INTD */
  95.402 -                Package(){0x001affff, 0, \_SB.PCI0.LNKC, 0},
  95.403 -                Package(){0x001affff, 1, \_SB.PCI0.LNKD, 0},
  95.404 -                Package(){0x001affff, 2, \_SB.PCI0.LNKA, 0},
  95.405 -                Package(){0x001affff, 3, \_SB.PCI0.LNKB, 0},
  95.406 -
  95.407 -                /* Device 27, INTA - INTD */
  95.408 -                Package(){0x001bffff, 0, \_SB.PCI0.LNKD, 0},
  95.409 -                Package(){0x001bffff, 1, \_SB.PCI0.LNKA, 0},
  95.410 -                Package(){0x001bffff, 2, \_SB.PCI0.LNKB, 0},
  95.411 -                Package(){0x001bffff, 3, \_SB.PCI0.LNKC, 0},
  95.412 -
  95.413 -                /* Device 28, INTA - INTD */
  95.414 -                Package(){0x001cffff, 0, \_SB.PCI0.LNKA, 0},
  95.415 -                Package(){0x001cffff, 1, \_SB.PCI0.LNKB, 0},
  95.416 -                Package(){0x001cffff, 2, \_SB.PCI0.LNKC, 0},
  95.417 -                Package(){0x001cffff, 3, \_SB.PCI0.LNKD, 0},
  95.418 -
  95.419 -                /* Device 29, INTA - INTD */
  95.420 -                Package(){0x001dffff, 0, \_SB.PCI0.LNKB, 0},
  95.421 -                Package(){0x001dffff, 1, \_SB.PCI0.LNKC, 0},
  95.422 -                Package(){0x001dffff, 2, \_SB.PCI0.LNKD, 0},
  95.423 -                Package(){0x001dffff, 3, \_SB.PCI0.LNKA, 0},
  95.424 -
  95.425 -                /* Device 30, INTA - INTD */
  95.426 -                Package(){0x001effff, 0, \_SB.PCI0.LNKC, 0},
  95.427 -                Package(){0x001effff, 1, \_SB.PCI0.LNKD, 0},
  95.428 -                Package(){0x001effff, 2, \_SB.PCI0.LNKA, 0},
  95.429 -                Package(){0x001effff, 3, \_SB.PCI0.LNKB, 0},
  95.430 -
  95.431 -                /* Device 31, INTA - INTD */
  95.432 -                Package(){0x001fffff, 0, \_SB.PCI0.LNKD, 0},
  95.433 -                Package(){0x001fffff, 1, \_SB.PCI0.LNKA, 0},
  95.434 -                Package(){0x001fffff, 2, \_SB.PCI0.LNKB, 0},
  95.435 -                Package(){0x001fffff, 3, \_SB.PCI0.LNKC, 0},
  95.436 -            })
  95.437 -
  95.438 -            Name(PRTA, Package() {
  95.439 -                /* Device 1, INTA - INTD */
  95.440 -                Package(){0x0001ffff, 0, 0, 20},
  95.441 -                Package(){0x0001ffff, 1, 0, 21},
  95.442 -                Package(){0x0001ffff, 2, 0, 22},
  95.443 -                Package(){0x0001ffff, 3, 0, 23},
  95.444 -
  95.445 -                /* Device 2, INTA - INTD */
  95.446 -                Package(){0x0002ffff, 0, 0, 24},
  95.447 -                Package(){0x0002ffff, 1, 0, 25},
  95.448 -                Package(){0x0002ffff, 2, 0, 26},
  95.449 -                Package(){0x0002ffff, 3, 0, 27},
  95.450 -
  95.451 -                /* Device 3, INTA - INTD */
  95.452 -                Package(){0x0003ffff, 0, 0, 28},
  95.453 -                Package(){0x0003ffff, 1, 0, 29},
  95.454 -                Package(){0x0003ffff, 2, 0, 30},
  95.455 -                Package(){0x0003ffff, 3, 0, 31},
  95.456 -
  95.457 -                /* Device 4, INTA - INTD */
  95.458 -                Package(){0x0004ffff, 0, 0, 32},
  95.459 -                Package(){0x0004ffff, 1, 0, 33},
  95.460 -                Package(){0x0004ffff, 2, 0, 34},
  95.461 -                Package(){0x0004ffff, 3, 0, 35},
  95.462 -
  95.463 -                /* Device 5, INTA - INTD */
  95.464 -                Package(){0x0005ffff, 0, 0, 36},
  95.465 -                Package(){0x0005ffff, 1, 0, 37},
  95.466 -                Package(){0x0005ffff, 2, 0, 38},
  95.467 -                Package(){0x0005ffff, 3, 0, 39},
  95.468 -
  95.469 -                /* Device 6, INTA - INTD */
  95.470 -                Package(){0x0006ffff, 0, 0, 40},
  95.471 -                Package(){0x0006ffff, 1, 0, 41},
  95.472 -                Package(){0x0006ffff, 2, 0, 42},
  95.473 -                Package(){0x0006ffff, 3, 0, 43},
  95.474 -
  95.475 -                /* Device 7, INTA - INTD */
  95.476 -                Package(){0x0007ffff, 0, 0, 44},
  95.477 -                Package(){0x0007ffff, 1, 0, 45},
  95.478 -                Package(){0x0007ffff, 2, 0, 46},
  95.479 -                Package(){0x0007ffff, 3, 0, 47},
  95.480 -
  95.481 -                /* Device 8, INTA - INTD */
  95.482 -                Package(){0x0008ffff, 0, 0, 17},
  95.483 -                Package(){0x0008ffff, 1, 0, 18},
  95.484 -                Package(){0x0008ffff, 2, 0, 19},
  95.485 -                Package(){0x0008ffff, 3, 0, 20},
  95.486 -
  95.487 -                /* Device 9, INTA - INTD */
  95.488 -                Package(){0x0009ffff, 0, 0, 21},
  95.489 -                Package(){0x0009ffff, 1, 0, 22},
  95.490 -                Package(){0x0009ffff, 2, 0, 23},
  95.491 -                Package(){0x0009ffff, 3, 0, 24},
  95.492 -
  95.493 -                /* Device 10, INTA - INTD */
  95.494 -                Package(){0x000affff, 0, 0, 25},
  95.495 -                Package(){0x000affff, 1, 0, 26},
  95.496 -                Package(){0x000affff, 2, 0, 27},
  95.497 -                Package(){0x000affff, 3, 0, 28},
  95.498 -
  95.499 -                /* Device 11, INTA - INTD */
  95.500 -                Package(){0x000bffff, 0, 0, 29},
  95.501 -                Package(){0x000bffff, 1, 0, 30},
  95.502 -                Package(){0x000bffff, 2, 0, 31},
  95.503 -                Package(){0x000bffff, 3, 0, 32},
  95.504 -
  95.505 -                /* Device 12, INTA - INTD */
  95.506 -                Package(){0x000cffff, 0, 0, 33},
  95.507 -                Package(){0x000cffff, 1, 0, 34},
  95.508 -                Package(){0x000cffff, 2, 0, 35},
  95.509 -                Package(){0x000cffff, 3, 0, 36},
  95.510 -
  95.511 -                /* Device 13, INTA - INTD */
  95.512 -                Package(){0x000dffff, 0, 0, 37},
  95.513 -                Package(){0x000dffff, 1, 0, 38},
  95.514 -                Package(){0x000dffff, 2, 0, 39},
  95.515 -                Package(){0x000dffff, 3, 0, 40},
  95.516 -
  95.517 -                /* Device 14, INTA - INTD */
  95.518 -                Package(){0x000effff, 0, 0, 41},
  95.519 -                Package(){0x000effff, 1, 0, 42},
  95.520 -                Package(){0x000effff, 2, 0, 43},
  95.521 -                Package(){0x000effff, 3, 0, 44},
  95.522 -
  95.523 -                /* Device 15, INTA - INTD */
  95.524 -                Package(){0x000fffff, 0, 0, 45},
  95.525 -                Package(){0x000fffff, 1, 0, 46},
  95.526 -                Package(){0x000fffff, 2, 0, 47},
  95.527 -                Package(){0x000fffff, 3, 0, 16},
  95.528 -
  95.529 -                /* Device 16, INTA - INTD */
  95.530 -                Package(){0x0010ffff, 0, 0, 18},
  95.531 -                Package(){0x0010ffff, 1, 0, 19},
  95.532 -                Package(){0x0010ffff, 2, 0, 20},
  95.533 -                Package(){0x0010ffff, 3, 0, 21},
  95.534 -
  95.535 -                /* Device 17, INTA - INTD */
  95.536 -                Package(){0x0011ffff, 0, 0, 22},
  95.537 -                Package(){0x0011ffff, 1, 0, 23},
  95.538 -                Package(){0x0011ffff, 2, 0, 24},
  95.539 -                Package(){0x0011ffff, 3, 0, 25},
  95.540 -
  95.541 -                /* Device 18, INTA - INTD */
  95.542 -                Package(){0x0012ffff, 0, 0, 26},
  95.543 -                Package(){0x0012ffff, 1, 0, 27},
  95.544 -                Package(){0x0012ffff, 2, 0, 28},
  95.545 -                Package(){0x0012ffff, 3, 0, 29},
  95.546 -
  95.547 -                /* Device 19, INTA - INTD */
  95.548 -                Package(){0x0013ffff, 0, 0, 30},
  95.549 -                Package(){0x0013ffff, 1, 0, 31},
  95.550 -                Package(){0x0013ffff, 2, 0, 32},
  95.551 -                Package(){0x0013ffff, 3, 0, 33},
  95.552 -
  95.553 -                /* Device 20, INTA - INTD */
  95.554 -                Package(){0x0014ffff, 0, 0, 34},
  95.555 -                Package(){0x0014ffff, 1, 0, 35},
  95.556 -                Package(){0x0014ffff, 2, 0, 36},
  95.557 -                Package(){0x0014ffff, 3, 0, 37},
  95.558 -
  95.559 -                /* Device 21, INTA - INTD */
  95.560 -                Package(){0x0015ffff, 0, 0, 38},
  95.561 -                Package(){0x0015ffff, 1, 0, 39},
  95.562 -                Package(){0x0015ffff, 2, 0, 40},
  95.563 -                Package(){0x0015ffff, 3, 0, 41},
  95.564 -
  95.565 -                /* Device 22, INTA - INTD */
  95.566 -                Package(){0x0016ffff, 0, 0, 42},
  95.567 -                Package(){0x0016ffff, 1, 0, 43},
  95.568 -                Package(){0x0016ffff, 2, 0, 44},
  95.569 -                Package(){0x0016ffff, 3, 0, 45},
  95.570 -
  95.571 -                /* Device 23, INTA - INTD */
  95.572 -                Package(){0x0017ffff, 0, 0, 46},
  95.573 -                Package(){0x0017ffff, 1, 0, 47},
  95.574 -                Package(){0x0017ffff, 2, 0, 16},
  95.575 -                Package(){0x0017ffff, 3, 0, 17},
  95.576 -
  95.577 -                /* Device 24, INTA - INTD */
  95.578 -                Package(){0x0018ffff, 0, 0, 19},
  95.579 -                Package(){0x0018ffff, 1, 0, 20},
  95.580 -                Package(){0x0018ffff, 2, 0, 21},
  95.581 -                Package(){0x0018ffff, 3, 0, 22},
  95.582 -
  95.583 -                /* Device 25, INTA - INTD */
  95.584 -                Package(){0x0019ffff, 0, 0, 23},
  95.585 -                Package(){0x0019ffff, 1, 0, 24},
  95.586 -                Package(){0x0019ffff, 2, 0, 25},
  95.587 -                Package(){0x0019ffff, 3, 0, 26},
  95.588 -
  95.589 -                /* Device 26, INTA - INTD */
  95.590 -                Package(){0x001affff, 0, 0, 27},
  95.591 -                Package(){0x001affff, 1, 0, 28},
  95.592 -                Package(){0x001affff, 2, 0, 29},
  95.593 -                Package(){0x001affff, 3, 0, 30},
  95.594 -
  95.595 -                /* Device 27, INTA - INTD */
  95.596 -                Package(){0x001bffff, 0, 0, 31},
  95.597 -                Package(){0x001bffff, 1, 0, 32},
  95.598 -                Package(){0x001bffff, 2, 0, 33},
  95.599 -                Package(){0x001bffff, 3, 0, 34},
  95.600 -
  95.601 -                /* Device 28, INTA - INTD */
  95.602 -                Package(){0x001cffff, 0, 0, 35},
  95.603 -                Package(){0x001cffff, 1, 0, 36},
  95.604 -                Package(){0x001cffff, 2, 0, 37},
  95.605 -                Package(){0x001cffff, 3, 0, 38},
  95.606 -
  95.607 -                /* Device 29, INTA - INTD */
  95.608 -                Package(){0x001dffff, 0, 0, 39},
  95.609 -                Package(){0x001dffff, 1, 0, 40},
  95.610 -                Package(){0x001dffff, 2, 0, 41},
  95.611 -                Package(){0x001dffff, 3, 0, 42},
  95.612 -
  95.613 -                /* Device 30, INTA - INTD */
  95.614 -                Package(){0x001effff, 0, 0, 43},
  95.615 -                Package(){0x001effff, 1, 0, 44},
  95.616 -                Package(){0x001effff, 2, 0, 45},
  95.617 -                Package(){0x001effff, 3, 0, 46},
  95.618 -
  95.619 -                /* Device 31, INTA - INTD */
  95.620 -                Package(){0x001fffff, 0, 0, 47},
  95.621 -                Package(){0x001fffff, 1, 0, 16},
  95.622 -                Package(){0x001fffff, 2, 0, 17},
  95.623 -                Package(){0x001fffff, 3, 0, 18},
  95.624 -            })
  95.625 -            
  95.626              Device (ISA)
  95.627              {
  95.628                  Name (_ADR, 0x00010000) /* device 1, fn 0 */
  95.629 @@ -951,7 +414,11 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 
  95.630                      Name (_UID, 0x02)
  95.631                      Method (_STA, 0, NotSerialized)
  95.632                      {
  95.633 -                        Return (0x0F)
  95.634 +                        If(LEqual(\_SB.LTP1, 0)) {
  95.635 +                            Return(0x00)
  95.636 +                        } Else {
  95.637 +                            Return(0x0F)
  95.638 +                        }
  95.639                      }
  95.640  
  95.641                      Name (_CRS, ResourceTemplate()
  95.642 @@ -961,9657 +428,6 @@ DefinitionBlock ("DSDT.aml", "DSDT", 2, 
  95.643                      })
  95.644                  } 
  95.645              }
  95.646 -
  95.647 -            /******************************************************************
  95.648 -             * Each PCI hotplug slot needs at least two methods to handle
  95.649 -             * the ACPI event:
  95.650 -             *  _EJ0: eject a device
  95.651 -             *  _STA: return a device's status, e.g. enabled or removed
  95.652 -             * Other methods are optional: 
  95.653 -             *  _PS0/3: put them here for debug purpose
  95.654 -             * 
  95.655 -             * Eject button would generate a general-purpose event, then the
  95.656 -             * control method for this event uses Notify() to inform OSPM which
  95.657 -             * action happened and on which device.
  95.658 -             *
  95.659 -             * Pls. refer "6.3 Device Insertion, Removal, and Status Objects"
  95.660 -             * in ACPI spec 3.0b for details.
  95.661 -             *
  95.662 -             * QEMU provides a simple hotplug controller with some I/O to
  95.663 -             * handle the hotplug action and status, which is beyond the ACPI
  95.664 -             * scope.
  95.665 -             */
  95.666 -            Device(S00)
  95.667 -            {
  95.668 -                Name (_ADR, 0x00000000) /* Dev 0x00, Func 0x0 */
  95.669 -                Name (_SUN, 0x00000000)
  95.670 -
  95.671 -                Method (_PS0, 0)
  95.672 -                {
  95.673 -                    Store (0x00, \_GPE.DPT1)
  95.674 -                    Store (0x80, \_GPE.DPT2)
  95.675 -                }
  95.676 -
  95.677 -                Method (_PS3, 0)
  95.678 -                {
  95.679 -                    Store (0x00, \_GPE.DPT1)
  95.680 -                    Store (0x83, \_GPE.DPT2)
  95.681 -                }
  95.682 -
  95.683 -                Method (_EJ0, 1)
  95.684 -                {
  95.685 -                    Store (0x00, \_GPE.DPT1)
  95.686 -                    Store (0x88, \_GPE.DPT2)
  95.687 -                    Store (0x01, \_GPE.PH00) /* eject */
  95.688 -                }
  95.689 -
  95.690 -                Method (_STA, 0)
  95.691 -                {
  95.692 -                    Store (0x00, \_GPE.DPT1)
  95.693 -                    Store (0x89, \_GPE.DPT2)
  95.694 -                    And (\_GPE.PH00, 0x0f, Local1)
  95.695 -                    Return (Local1) /* IN status as the _STA */
  95.696 -                }
  95.697 -            }
  95.698 -
  95.699 -            Device(S01)
  95.700 -            {
  95.701 -                Name (_ADR, 0x00000001) /* Dev 0x00, Func 0x1 */
  95.702 -                Name (_SUN, 0x00000000)
  95.703 -
  95.704 -                Method (_PS0, 0)
  95.705 -                {
  95.706 -                    Store (0x01, \_GPE.DPT1)
  95.707 -                    Store (0x80, \_GPE.DPT2)
  95.708 -                }
  95.709 -
  95.710 -                Method (_PS3, 0)
  95.711 -                {
  95.712 -                    Store (0x01, \_GPE.DPT1)
  95.713 -                    Store (0x83, \_GPE.DPT2)
  95.714 -                }
  95.715 -
  95.716 -                Method (_EJ0, 1)
  95.717 -                {
  95.718 -                    Store (0x01, \_GPE.DPT1)
  95.719 -                    Store (0x88, \_GPE.DPT2)
  95.720 -                    Store (0x10, \_GPE.PH00) /* eject */
  95.721 -                }
  95.722 -
  95.723 -                Method (_STA, 0)
  95.724 -                {
  95.725 -                    Store (0x01, \_GPE.DPT1)
  95.726 -                    Store (0x89, \_GPE.DPT2)
  95.727 -                    ShiftRight (0x4, \_GPE.PH00, Local1)
  95.728 -                    Return (Local1) /* IN status as the _STA */
  95.729 -                }
  95.730 -            }
  95.731 -
  95.732 -            Device(S02)
  95.733 -            {
  95.734 -                Name (_ADR, 0x00000002) /* Dev 0x00, Func 0x2 */
  95.735 -                Name (_SUN, 0x00000000)
  95.736 -
  95.737 -                Method (_PS0, 0)
  95.738 -                {
  95.739 -                    Store (0x02, \_GPE.DPT1)
  95.740 -                    Store (0x80, \_GPE.DPT2)
  95.741 -                }
  95.742 -
  95.743 -                Method (_PS3, 0)
  95.744 -                {
  95.745 -                    Store (0x02, \_GPE.DPT1)
  95.746 -                    Store (0x83, \_GPE.DPT2)
  95.747 -                }
  95.748 -
  95.749 -                Method (_EJ0, 1)
  95.750 -                {
  95.751 -                    Store (0x02, \_GPE.DPT1)
  95.752 -                    Store (0x88, \_GPE.DPT2)
  95.753 -                    Store (0x01, \_GPE.PH02) /* eject */
  95.754 -                }
  95.755 -
  95.756 -                Method (_STA, 0)
  95.757 -                {
  95.758 -                    Store (0x02, \_GPE.DPT1)
  95.759 -                    Store (0x89, \_GPE.DPT2)
  95.760 -                    And (\_GPE.PH02, 0x0f, Local1)
  95.761 -                    Return (Local1) /* IN status as the _STA */
  95.762 -                }
  95.763 -            }
  95.764 -
  95.765 -            Device(S03)
  95.766 -            {
  95.767 -                Name (_ADR, 0x00000003) /* Dev 0x00, Func 0x3 */
  95.768 -                Name (_SUN, 0x00000000)
  95.769 -
  95.770 -                Method (_PS0, 0)
  95.771 -                {
  95.772 -                    Store (0x03, \_GPE.DPT1)
  95.773 -                    Store (0x80, \_GPE.DPT2)
  95.774 -                }
  95.775 -
  95.776 -                Method (_PS3, 0)
  95.777 -                {
  95.778 -                    Store (0x03, \_GPE.DPT1)
  95.779 -                    Store (0x83, \_GPE.DPT2)
  95.780 -                }
  95.781 -
  95.782 -                Method (_EJ0, 1)
  95.783 -                {
  95.784 -                    Store (0x03, \_GPE.DPT1)
  95.785 -                    Store (0x88, \_GPE.DPT2)
  95.786 -                    Store (0x10, \_GPE.PH02) /* eject */
  95.787 -                }
  95.788 -
  95.789 -                Method (_STA, 0)
  95.790 -                {
  95.791 -                    Store (0x03, \_GPE.DPT1)
  95.792 -                    Store (0x89, \_GPE.DPT2)
  95.793 -                    ShiftRight (0x4, \_GPE.PH02, Local1)
  95.794 -                    Return (Local1) /* IN status as the _STA */
  95.795 -                }
  95.796 -            }
  95.797 -
  95.798 -            Device(S04)
  95.799 -            {
  95.800 -                Name (_ADR, 0x00000004) /* Dev 0x00, Func 0x4 */
  95.801 -                Name (_SUN, 0x00000000)
  95.802 -
  95.803 -                Method (_PS0, 0)
  95.804 -                {
  95.805 -                    Store (0x04, \_GPE.DPT1)
  95.806 -                    Store (0x80, \_GPE.DPT2)
  95.807 -                }
  95.808 -
  95.809 -                Method (_PS3, 0)
  95.810 -                {
  95.811 -                    Store (0x04, \_GPE.DPT1)
  95.812 -                    Store (0x83, \_GPE.DPT2)
  95.813 -                }
  95.814 -
  95.815 -                Method (_EJ0, 1)
  95.816 -                {
  95.817 -                    Store (0x04, \_GPE.DPT1)
  95.818 -                    Store (0x88, \_GPE.DPT2)
  95.819 -                    Store (0x01, \_GPE.PH04) /* eject */
  95.820 -                }
  95.821 -
  95.822 -                Method (_STA, 0)
  95.823 -                {
  95.824 -                    Store (0x04, \_GPE.DPT1)
  95.825 -                    Store (0x89, \_GPE.DPT2)
  95.826 -                    And (\_GPE.PH04, 0x0f, Local1)
  95.827 -                    Return (Local1) /* IN status as the _STA */
  95.828 -                }
  95.829 -            }
  95.830 -
  95.831 -            Device(S05)
  95.832 -            {
  95.833 -                Name (_ADR, 0x00000005) /* Dev 0x00, Func 0x5 */
  95.834 -                Name (_SUN, 0x00000000)
  95.835 -
  95.836 -                Method (_PS0, 0)
  95.837 -                {
  95.838 -                    Store (0x05, \_GPE.DPT1)
  95.839 -                    Store (0x80, \_GPE.DPT2)
  95.840 -                }
  95.841 -
  95.842 -                Method (_PS3, 0)
  95.843 -                {
  95.844 -                    Store (0x05, \_GPE.DPT1)
  95.845 -                    Store (0x83, \_GPE.DPT2)
  95.846 -                }
  95.847 -
  95.848 -                Method (_EJ0, 1)
  95.849 -                {
  95.850 -                    Store (0x05, \_GPE.DPT1)
  95.851 -                    Store (0x88, \_GPE.DPT2)
  95.852 -                    Store (0x10, \_GPE.PH04) /* eject */
  95.853 -                }
  95.854 -
  95.855 -                Method (_STA, 0)
  95.856 -                {
  95.857 -                    Store (0x05, \_GPE.DPT1)
  95.858 -                    Store (0x89, \_GPE.DPT2)
  95.859 -                    ShiftRight (0x4, \_GPE.PH04, Local1)
  95.860 -                    Return (Local1) /* IN status as the _STA */
  95.861 -                }
  95.862 -            }
  95.863 -
  95.864 -            Device(S06)
  95.865 -            {
  95.866 -                Name (_ADR, 0x00000006) /* Dev 0x00, Func 0x6 */
  95.867 -                Name (_SUN, 0x00000000)
  95.868 -
  95.869 -                Method (_PS0, 0)
  95.870 -                {
  95.871 -                    Store (0x06, \_GPE.DPT1)
  95.872 -                    Store (0x80, \_GPE.DPT2)
  95.873 -                }
  95.874 -
  95.875 -                Method (_PS3, 0)
  95.876 -                {
  95.877 -                    Store (0x06, \_GPE.DPT1)
  95.878 -                    Store (0x83, \_GPE.DPT2)
  95.879 -                }
  95.880 -
  95.881 -                Method (_EJ0, 1)
  95.882 -                {
  95.883 -                    Store (0x06, \_GPE.DPT1)
  95.884 -                    Store (0x88, \_GPE.DPT2)
  95.885 -                    Store (0x01, \_GPE.PH06) /* eject */
  95.886 -                }
  95.887 -
  95.888 -                Method (_STA, 0)
  95.889 -                {
  95.890 -                    Store (0x06, \_GPE.DPT1)
  95.891 -                    Store (0x89, \_GPE.DPT2)
  95.892 -                    And (\_GPE.PH06, 0x0f, Local1)
  95.893 -                    Return (Local1) /* IN status as the _STA */
  95.894 -                }
  95.895 -            }
  95.896 -
  95.897 -            Device(S07)
  95.898 -            {
  95.899 -                Name (_ADR, 0x00000007) /* Dev 0x00, Func 0x7 */
  95.900 -                Name (_SUN, 0x00000000)
  95.901 -
  95.902 -                Method (_PS0, 0)
  95.903 -                {
  95.904 -                    Store (0x07, \_GPE.DPT1)
  95.905 -                    Store (0x80, \_GPE.DPT2)
  95.906 -                }
  95.907 -
  95.908 -                Method (_PS3, 0)
  95.909 -                {
  95.910 -                    Store (0x07, \_GPE.DPT1)
  95.911 -                    Store (0x83, \_GPE.DPT2)
  95.912 -                }
  95.913 -
  95.914 -                Method (_EJ0, 1)
  95.915 -                {
  95.916 -                    Store (0x07, \_GPE.DPT1)
  95.917 -                    Store (0x88, \_GPE.DPT2)
  95.918 -                    Store (0x10, \_GPE.PH06) /* eject */
  95.919 -                }
  95.920 -
  95.921 -                Method (_STA, 0)
  95.922 -                {
  95.923 -                    Store (0x07, \_GPE.DPT1)
  95.924 -                    Store (0x89, \_GPE.DPT2)
  95.925 -                    ShiftRight (0x4, \_GPE.PH06, Local1)
  95.926 -                    Return (Local1) /* IN status as the _STA */
  95.927 -                }
  95.928 -            }
  95.929 -
  95.930 -            Device(S08)
  95.931 -            {
  95.932 -                Name (_ADR, 0x00010000) /* Dev 0x01, Func 0x0 */
  95.933 -                Name (_SUN, 0x00000001)
  95.934 -
  95.935 -                Method (_PS0, 0)
  95.936 -                {
  95.937 -                    Store (0x08, \_GPE.DPT1)
  95.938 -                    Store (0x80, \_GPE.DPT2)
  95.939 -                }
  95.940 -
  95.941 -                Method (_PS3, 0)
  95.942 -                {
  95.943 -                    Store (0x08, \_GPE.DPT1)
  95.944 -                    Store (0x83, \_GPE.DPT2)
  95.945 -                }
  95.946 -
  95.947 -                Method (_EJ0, 1)
  95.948 -                {
  95.949 -                    Store (0x08, \_GPE.DPT1)
  95.950 -                    Store (0x88, \_GPE.DPT2)
  95.951 -                    Store (0x01, \_GPE.PH08) /* eject */
  95.952 -                }
  95.953 -
  95.954 -                Method (_STA, 0)
  95.955 -                {
  95.956 -                    Store (0x08, \_GPE.DPT1)
  95.957 -                    Store (0x89, \_GPE.DPT2)
  95.958 -                    And (\_GPE.PH08, 0x0f, Local1)
  95.959 -                    Return (Local1) /* IN status as the _STA */
  95.960 -                }
  95.961 -            }
  95.962 -
  95.963 -            Device(S09)
  95.964 -            {
  95.965 -                Name (_ADR, 0x00010001) /* Dev 0x01, Func 0x1 */
  95.966 -                Name (_SUN, 0x00000001)
  95.967 -
  95.968 -                Method (_PS0, 0)
  95.969 -                {
  95.970 -                    Store (0x09, \_GPE.DPT1)
  95.971 -                    Store (0x80, \_GPE.DPT2)
  95.972 -                }
  95.973 -
  95.974 -                Method (_PS3, 0)
  95.975 -                {
  95.976 -                    Store (0x09, \_GPE.DPT1)
  95.977 -                    Store (0x83, \_GPE.DPT2)
  95.978 -                }
  95.979 -
  95.980 -                Method (_EJ0, 1)
  95.981 -                {
  95.982 -                    Store (0x09, \_GPE.DPT1)
  95.983 -                    Store (0x88, \_GPE.DPT2)
  95.984 -                    Store (0x10, \_GPE.PH08) /* eject */
  95.985 -                }
  95.986 -
  95.987 -                Method (_STA, 0)
  95.988 -                {
  95.989 -                    Store (0x09, \_GPE.DPT1)
  95.990 -                    Store (0x89, \_GPE.DPT2)
  95.991 -                    ShiftRight (0x4, \_GPE.PH08, Local1)
  95.992 -                    Return (Local1) /* IN status as the _STA */
  95.993 -                }
  95.994 -            }
  95.995 -
  95.996 -            Device(S0A)
  95.997 -            {
  95.998 -                Name (_ADR, 0x00010002) /* Dev 0x01, Func 0x2 */
  95.999 -                Name (_SUN, 0x00000001)
 95.1000 -
 95.1001 -                Method (_PS0, 0)
 95.1002 -                {
 95.1003 -                    Store (0x0a, \_GPE.DPT1)
 95.1004 -                    Store (0x80, \_GPE.DPT2)
 95.1005 -                }
 95.1006 -
 95.1007 -                Method (_PS3, 0)
 95.1008 -                {
 95.1009 -                    Store (0x0a, \_GPE.DPT1)
 95.1010 -                    Store (0x83, \_GPE.DPT2)
 95.1011 -                }
 95.1012 -
 95.1013 -                Method (_EJ0, 1)
 95.1014 -                {
 95.1015 -                    Store (0x0a, \_GPE.DPT1)
 95.1016 -                    Store (0x88, \_GPE.DPT2)
 95.1017 -                    Store (0x01, \_GPE.PH0A) /* eject */
 95.1018 -                }
 95.1019 -
 95.1020 -                Method (_STA, 0)
 95.1021 -                {
 95.1022 -                    Store (0x0a, \_GPE.DPT1)
 95.1023 -                    Store (0x89, \_GPE.DPT2)
 95.1024 -                    And (\_GPE.PH0A, 0x0f, Local1)
 95.1025 -                    Return (Local1) /* IN status as the _STA */
 95.1026 -                }
 95.1027 -            }
 95.1028 -
 95.1029 -            Device(S0B)
 95.1030 -            {
 95.1031 -                Name (_ADR, 0x00010003) /* Dev 0x01, Func 0x3 */
 95.1032 -                Name (_SUN, 0x00000001)
 95.1033 -
 95.1034 -                Method (_PS0, 0)
 95.1035 -                {
 95.1036 -                    Store (0x0b, \_GPE.DPT1)
 95.1037 -                    Store (0x80, \_GPE.DPT2)
 95.1038 -                }
 95.1039 -
 95.1040 -                Method (_PS3, 0)
 95.1041 -                {
 95.1042 -                    Store (0x0b, \_GPE.DPT1)
 95.1043 -                    Store (0x83, \_GPE.DPT2)
 95.1044 -                }
 95.1045 -
 95.1046 -                Method (_EJ0, 1)
 95.1047 -                {
 95.1048 -                    Store (0x0b, \_GPE.DPT1)
 95.1049 -                    Store (0x88, \_GPE.DPT2)
 95.1050 -                    Store (0x10, \_GPE.PH0A) /* eject */
 95.1051 -                }
 95.1052 -
 95.1053 -                Method (_STA, 0)
 95.1054 -                {
 95.1055 -                    Store (0x0b, \_GPE.DPT1)
 95.1056 -                    Store (0x89, \_GPE.DPT2)
 95.1057 -                    ShiftRight (0x4, \_GPE.PH0A, Local1)
 95.1058 -                    Return (Local1) /* IN status as the _STA */
 95.1059 -                }
 95.1060 -            }
 95.1061 -
 95.1062 -            Device(S0C)
 95.1063 -            {
 95.1064 -                Name (_ADR, 0x00010004) /* Dev 0x01, Func 0x4 */
 95.1065 -                Name (_SUN, 0x00000001)
 95.1066 -
 95.1067 -                Method (_PS0, 0)
 95.1068 -                {
 95.1069 -                    Store (0x0c, \_GPE.DPT1)
 95.1070 -                    Store (0x80, \_GPE.DPT2)
 95.1071 -                }
 95.1072 -
 95.1073 -                Method (_PS3, 0)
 95.1074 -                {
 95.1075 -                    Store (0x0c, \_GPE.DPT1)
 95.1076 -                    Store (0x83, \_GPE.DPT2)
 95.1077 -                }
 95.1078 -
 95.1079 -                Method (_EJ0, 1)
 95.1080 -                {
 95.1081 -                    Store (0x0c, \_GPE.DPT1)
 95.1082 -                    Store (0x88, \_GPE.DPT2)
 95.1083 -                    Store (0x01, \_GPE.PH0C) /* eject */
 95.1084 -                }
 95.1085 -
 95.1086 -                Method (_STA, 0)
 95.1087 -                {
 95.1088 -                    Store (0x0c, \_GPE.DPT1)
 95.1089 -                    Store (0x89, \_GPE.DPT2)
 95.1090 -                    And (\_GPE.PH0C, 0x0f, Local1)
 95.1091 -                    Return (Local1) /* IN status as the _STA */
 95.1092 -                }
 95.1093 -            }
 95.1094 -
 95.1095 -            Device(S0D)
 95.1096 -            {
 95.1097 -                Name (_ADR, 0x00010005) /* Dev 0x01, Func 0x5 */
 95.1098 -                Name (_SUN, 0x00000001)
 95.1099 -
 95.1100 -                Method (_PS0, 0)
 95.1101 -                {
 95.1102 -                    Store (0x0d, \_GPE.DPT1)
 95.1103 -                    Store (0x80, \_GPE.DPT2)
 95.1104 -                }
 95.1105 -
 95.1106 -                Method (_PS3, 0)
 95.1107 -                {
 95.1108 -                    Store (0x0d, \_GPE.DPT1)
 95.1109 -                    Store (0x83, \_GPE.DPT2)
 95.1110 -                }
 95.1111 -
 95.1112 -                Method (_EJ0, 1)
 95.1113 -                {
 95.1114 -                    Store (0x0d, \_GPE.DPT1)
 95.1115 -                    Store (0x88, \_GPE.DPT2)
 95.1116 -                    Store (0x10, \_GPE.PH0C) /* eject */
 95.1117 -                }
 95.1118 -
 95.1119 -                Method (_STA, 0)
 95.1120 -                {
 95.1121 -                    Store (0x0d, \_GPE.DPT1)
 95.1122 -                    Store (0x89, \_GPE.DPT2)
 95.1123 -                    ShiftRight (0x4, \_GPE.PH0C, Local1)
 95.1124 -                    Return (Local1) /* IN status as the _STA */
 95.1125 -                }
 95.1126 -            }
 95.1127 -
 95.1128 -            Device(S0E)
 95.1129 -            {
 95.1130 -                Name (_ADR, 0x00010006) /* Dev 0x01, Func 0x6 */
 95.1131 -                Name (_SUN, 0x00000001)
 95.1132 -
 95.1133 -                Method (_PS0, 0)
 95.1134 -                {
 95.1135 -                    Store (0x0e, \_GPE.DPT1)
 95.1136 -                    Store (0x80, \_GPE.DPT2)
 95.1137 -                }
 95.1138 -
 95.1139 -                Method (_PS3, 0)
 95.1140 -                {
 95.1141 -                    Store (0x0e, \_GPE.DPT1)
 95.1142 -                    Store (0x83, \_GPE.DPT2)
 95.1143 -                }
 95.1144 -
 95.1145 -                Method (_EJ0, 1)
 95.1146 -                {
 95.1147 -                    Store (0x0e, \_GPE.DPT1)
 95.1148 -                    Store (0x88, \_GPE.DPT2)
 95.1149 -                    Store (0x01, \_GPE.PH0E) /* eject */
 95.1150 -                }
 95.1151 -
 95.1152 -                Method (_STA, 0)
 95.1153 -                {
 95.1154 -                    Store (0x0e, \_GPE.DPT1)
 95.1155 -                    Store (0x89, \_GPE.DPT2)
 95.1156 -                    And (\_GPE.PH0E, 0x0f, Local1)
 95.1157 -                    Return (Local1) /* IN status as the _STA */
 95.1158 -                }
 95.1159 -            }
 95.1160 -
 95.1161 -            Device(S0F)
 95.1162 -            {
 95.1163 -                Name (_ADR, 0x00010007) /* Dev 0x01, Func 0x7 */
 95.1164 -                Name (_SUN, 0x00000001)
 95.1165 -
 95.1166 -                Method (_PS0, 0)
 95.1167 -                {
 95.1168 -                    Store (0x0f, \_GPE.DPT1)
 95.1169 -                    Store (0x80, \_GPE.DPT2)
 95.1170 -                }
 95.1171 -
 95.1172 -                Method (_PS3, 0)
 95.1173 -                {
 95.1174 -                    Store (0x0f, \_GPE.DPT1)
 95.1175 -                    Store (0x83, \_GPE.DPT2)
 95.1176 -                }
 95.1177 -
 95.1178 -                Method (_EJ0, 1)
 95.1179 -                {
 95.1180 -                    Store (0x0f, \_GPE.DPT1)
 95.1181 -                    Store (0x88, \_GPE.DPT2)
 95.1182 -                    Store (0x10, \_GPE.PH0E) /* eject */
 95.1183 -                }
 95.1184 -
 95.1185 -                Method (_STA, 0)
 95.1186 -                {
 95.1187 -                    Store (0x0f, \_GPE.DPT1)
 95.1188 -                    Store (0x89, \_GPE.DPT2)
 95.1189 -                    ShiftRight (0x4, \_GPE.PH0E, Local1)
 95.1190 -                    Return (Local1) /* IN status as the _STA */
 95.1191 -                }
 95.1192 -            }
 95.1193 -
 95.1194 -            Device(S10)
 95.1195 -            {
 95.1196 -                Name (_ADR, 0x00020000) /* Dev 0x02, Func 0x0 */
 95.1197 -                Name (_SUN, 0x00000002)
 95.1198 -
 95.1199 -                Method (_PS0, 0)
 95.1200 -                {
 95.1201 -                    Store (0x10, \_GPE.DPT1)
 95.1202 -                    Store (0x80, \_GPE.DPT2)
 95.1203 -                }
 95.1204 -
 95.1205 -                Method (_PS3, 0)
 95.1206 -                {
 95.1207 -                    Store (0x10, \_GPE.DPT1)
 95.1208 -                    Store (0x83, \_GPE.DPT2)
 95.1209 -                }
 95.1210 -
 95.1211 -                Method (_EJ0, 1)
 95.1212 -                {
 95.1213 -                    Store (0x10, \_GPE.DPT1)
 95.1214 -                    Store (0x88, \_GPE.DPT2)
 95.1215 -                    Store (0x01, \_GPE.PH10) /* eject */
 95.1216 -                }
 95.1217 -
 95.1218 -                Method (_STA, 0)
 95.1219 -                {
 95.1220 -                    Store (0x10, \_GPE.DPT1)
 95.1221 -                    Store (0x89, \_GPE.DPT2)
 95.1222 -                    And (\_GPE.PH10, 0x0f, Local1)
 95.1223 -                    Return (Local1) /* IN status as the _STA */
 95.1224 -                }
 95.1225 -            }
 95.1226 -
 95.1227 -            Device(S11)
 95.1228 -            {
 95.1229 -                Name (_ADR, 0x00020001) /* Dev 0x02, Func 0x1 */
 95.1230 -                Name (_SUN, 0x00000002)
 95.1231 -
 95.1232 -                Method (_PS0, 0)
 95.1233 -                {
 95.1234 -                    Store (0x11, \_GPE.DPT1)
 95.1235 -                    Store (0x80, \_GPE.DPT2)
 95.1236 -                }
 95.1237 -
 95.1238 -                Method (_PS3, 0)
 95.1239 -                {
 95.1240 -                    Store (0x11, \_GPE.DPT1)
 95.1241 -                    Store (0x83, \_GPE.DPT2)
 95.1242 -                }
 95.1243 -
 95.1244 -                Method (_EJ0, 1)
 95.1245 -                {
 95.1246 -                    Store (0x11, \_GPE.DPT1)
 95.1247 -                    Store (0x88, \_GPE.DPT2)
 95.1248 -                    Store (0x10, \_GPE.PH10) /* eject */
 95.1249 -                }
 95.1250 -
 95.1251 -                Method (_STA, 0)
 95.1252 -                {
 95.1253 -                    Store (0x11, \_GPE.DPT1)
 95.1254 -                    Store (0x89, \_GPE.DPT2)
 95.1255 -                    ShiftRight (0x4, \_GPE.PH10, Local1)
 95.1256 -                    Return (Local1) /* IN status as the _STA */
 95.1257 -                }
 95.1258 -            }
 95.1259 -
 95.1260 -            Device(S12)
 95.1261 -            {
 95.1262 -                Name (_ADR, 0x00020002) /* Dev 0x02, Func 0x2 */
 95.1263 -                Name (_SUN, 0x00000002)
 95.1264 -
 95.1265 -                Method (_PS0, 0)
 95.1266 -                {
 95.1267 -                    Store (0x12, \_GPE.DPT1)
 95.1268 -                    Store (0x80, \_GPE.DPT2)
 95.1269 -                }
 95.1270 -
 95.1271 -                Method (_PS3, 0)
 95.1272 -                {
 95.1273 -                    Store (0x12, \_GPE.DPT1)
 95.1274 -                    Store (0x83, \_GPE.DPT2)
 95.1275 -                }
 95.1276 -
 95.1277 -                Method (_EJ0, 1)
 95.1278 -                {
 95.1279 -                    Store (0x12, \_GPE.DPT1)
 95.1280 -                    Store (0x88, \_GPE.DPT2)
 95.1281 -                    Store (0x01, \_GPE.PH12) /* eject */
 95.1282 -                }
 95.1283 -
 95.1284 -                Method (_STA, 0)
 95.1285 -                {
 95.1286 -                    Store (0x12, \_GPE.DPT1)
 95.1287 -                    Store (0x89, \_GPE.DPT2)
 95.1288 -                    And (\_GPE.PH12, 0x0f, Local1)
 95.1289 -                    Return (Local1) /* IN status as the _STA */
 95.1290 -                }
 95.1291 -            }
 95.1292 -
 95.1293 -            Device(S13)
 95.1294 -            {
 95.1295 -                Name (_ADR, 0x00020003) /* Dev 0x02, Func 0x3 */
 95.1296 -                Name (_SUN, 0x00000002)
 95.1297 -
 95.1298 -                Method (_PS0, 0)
 95.1299 -                {
 95.1300 -                    Store (0x13, \_GPE.DPT1)
 95.1301 -                    Store (0x80, \_GPE.DPT2)
 95.1302 -                }
 95.1303 -
 95.1304 -                Method (_PS3, 0)
 95.1305 -                {
 95.1306 -                    Store (0x13, \_GPE.DPT1)
 95.1307 -                    Store (0x83, \_GPE.DPT2)
 95.1308 -                }
 95.1309 -
 95.1310 -                Method (_EJ0, 1)
 95.1311 -                {
 95.1312 -                    Store (0x13, \_GPE.DPT1)
 95.1313 -                    Store (0x88, \_GPE.DPT2)
 95.1314 -                    Store (0x10, \_GPE.PH12) /* eject */
 95.1315 -                }
 95.1316 -
 95.1317 -                Method (_STA, 0)
 95.1318 -                {
 95.1319 -                    Store (0x13, \_GPE.DPT1)
 95.1320 -                    Store (0x89, \_GPE.DPT2)
 95.1321 -                    ShiftRight (0x4, \_GPE.PH12, Local1)
 95.1322 -                    Return (Local1) /* IN status as the _STA */
 95.1323 -                }
 95.1324 -            }
 95.1325 -
 95.1326 -            Device(S14)
 95.1327 -            {
 95.1328 -                Name (_ADR, 0x00020004) /* Dev 0x02, Func 0x4 */
 95.1329 -                Name (_SUN, 0x00000002)
 95.1330 -
 95.1331 -                Method (_PS0, 0)
 95.1332 -                {
 95.1333 -                    Store (0x14, \_GPE.DPT1)
 95.1334 -                    Store (0x80, \_GPE.DPT2)
 95.1335 -                }
 95.1336 -
 95.1337 -                Method (_PS3, 0)
 95.1338 -                {
 95.1339 -                    Store (0x14, \_GPE.DPT1)
 95.1340 -                    Store (0x83, \_GPE.DPT2)
 95.1341 -                }
 95.1342 -
 95.1343 -                Method (_EJ0, 1)
 95.1344 -                {
 95.1345