# -*- mode: Makefile; -*-
-# A debug build of Xen and tools?
-debug ?= n
+# A debug build of Xen and tools? TEMPORARILY ENABLED
+debug ?= y
XEN_COMPILE_ARCH ?= $(shell uname -m | sed -e s/i.86/x86_32/ \
-e s/i86pc/x86_32/ -e s/amd64/x86_64/)
cc-option = $(shell if test -z "`$(1) $(2) -S -o /dev/null -xc \
/dev/null 2>&1`"; then echo "$(2)"; else echo "$(3)"; fi ;)
+# cc-option-add: Add an option to compilation flags, but only if supported.
+# Usage: $(call cc-option-add CFLAGS,CC,-march=winchip-c6)
+cc-option-add = $(eval $(call cc-option-add-closure,$(1),$(2),$(3)))
+define cc-option-add-closure
+ ifneq ($$(call cc-option,$$($(2)),$(3),n),n)
+ $(1) += $(3)
+ endif
+endef
+
# cc-ver: Check compiler is at least specified version. Return boolean 'y'/'n'.
# Usage: ifeq ($(call cc-ver,$(CC),0x030400),y)
cc-ver = $(shell if [ $$((`$(1) -dumpversion | awk -F. \
# result of any casted expression causes a warning.
CFLAGS += -Wno-unused-value
-HOSTCFLAGS += $(call cc-option,$(HOSTCC),-Wdeclaration-after-statement,)
-CFLAGS += $(call cc-option,$(CC),-Wdeclaration-after-statement,)
+$(call cc-option-add,HOSTCFLAGS,HOSTCC,-Wdeclaration-after-statement)
+$(call cc-option-add,CFLAGS,CC,-Wdeclaration-after-statement)
LDFLAGS += $(foreach i, $(EXTRA_LIB), -L$(i))
CFLAGS += $(foreach i, $(EXTRA_INCLUDES), -I$(i))
FLASK_ENABLE ?= n
ACM_SECURITY ?= n
+XEN_EXTFILES_URL=http://xenbits.xensource.com/xen-extfiles
+# All the files at that location were downloaded from elsewhere on
+# the internet. The original download URL is preserved as a comment
+# near the place in the Xen Makefiles where the file is used.
+
QEMU_REMOTE=http://xenbits.xensource.com/git-http/qemu-xen-unstable.git
# Specify which qemu-dm to use. This may be `ioemu' to use the old
rm -rf $(D)/etc/hotplug/xen-backend.agent
rm -f $(D)/etc/udev/rules.d/xen-backend.rules
rm -f $(D)/etc/udev/xen-backend.rules
+ rm -f $(D)/etc/udev/rules.d/xend.rules
+ rm -f $(D)/etc/udev/xend.rules
rm -f $(D)/etc/sysconfig/xendomains
rm -rf $(D)/var/run/xen* $(D)/var/lib/xen*
rm -rf $(D)/boot/*xen*
#
TBOOT_TARFILE = tboot-20080613.tar.gz
-TBOOT_BASE_URL = http://downloads.sourceforge.net/tboot
+#TBOOT_BASE_URL = http://downloads.sourceforge.net/tboot
+TBOOT_BASE_URL = $(XEN_EXTFILES_URL)
.PHONY: build-tboot
build-tboot: download_tboot
# Then append the new value
case ${VALUE} in
- y|m) echo "${OPTION}=${VALUE}" >> "${CONFIG}" ;;
- n) echo "# ${OPTION} is not set" >> "${CONFIG}" ;;
- *) echo "Invalid value ${VALUE} for ${OPTION}" 1>&2 ; exit 1 ;;
+ n) echo "# ${OPTION} is not set" >> "${CONFIG}" ;;
+ y|m|*) echo "${OPTION}=${VALUE}" >> "${CONFIG}" ;;
esac
}
setopt CONFIG_PARAVIRT y
+setopt CONFIG_PARAVIRT_DEBUG y
setopt CONFIG_PARAVIRT_GUEST y
+
setopt CONFIG_XEN y
+setopt CONFIG_XEN_BLKDEV_FRONTEND y
+setopt CONFIG_XEN_NETDEV_FRONTEND y
+setopt CONFIG_XEN_KBDDEV_FRONTEND y
+setopt CONFIG_XEN_FBDEV_FRONTEND y
+setopt CONFIG_XEN_BALLOON y
+setopt CONFIG_XEN_SCRUB_PAGES y
+setopt CONFIG_XEN_DEV_EVTCHN y
+setopt CONFIG_XEN_BACKEND y
+setopt CONFIG_XEN_BLKDEV_BACKEND y
+setopt CONFIG_XEN_NETDEV_BACKEND y
+setopt CONFIG_XENFS y
+setopt CONFIG_XEN_COMPAT_XENFS y
+setopt CONFIG_HVC_XEN y
+setopt CONFIG_XEN_MAX_DOMAIN_MEMORY 32
+setopt CONFIG_XEN_DEBUG_FS y
+setopt CONFIG_XEN_DOM0 y
+
setopt CONFIG_VMI y
+
setopt CONFIG_KVM y
setopt CONFIG_KVM_INTEL y
setopt CONFIG_KVM_AMD y
+setopt CONFIG_KVM_CLOCK y
+setopt CONFIG_KVM_GUEST n
+setopt CONFIG_KVM_TRACE n
+
setopt CONFIG_LGUEST n
-setopt CONFIG_XEN_BLKDEV_FRONTEND y
-setopt CONFIG_XEN_NETDEV_FRONTEND y
-setopt CONFIG_HVC_XEN y
-setopt CONFIG_NUMA n
+
setopt CONFIG_LOCALVERSION_AUTO n
+# Should all be set one way or another in defconfig but aren't
+setopt CONFIG_NUMA n
+setopt CONFIG_X86_VSMP n
+setopt CONFIG_X86_UV n
+setopt CONFIG_CALGARY_IOMMU n
+setopt CONFIG_AMD_IOMMU n
+setopt CONFIG_MAXSMP n
+setopt CONFIG_SPARSEMEM_VMEMMAP n
+setopt CONFIG_I7300_IDLE n
+setopt CONFIG_DMAR n
+setopt CONFIG_INTR_REMAP n
+setopt CONFIG_GFS2_FS n
+setopt CONFIG_IOMMU_DEBUG n
+
case ${XEN_TARGET_ARCH} in
x86_32) setopt CONFIG_64BIT n ;;
- x86_64) setopt CONFIG_64BIT y ;;
+ x86_64)
+ setopt CONFIG_64BIT y
+ setopt CONFIG_IA32_EMULATION y
+ setopt CONFIG_IA32_AOUT n
+ setopt CONFIG_CRYPTO_AES_X86_64 n
+ setopt CONFIG_CRYPTO_SALSA20_X86_64 n
+ setopt CONFIG_CRYPTO_TWOFISH_X86_64 n
+ ;;
*) ;;
esac
endif
$(__NONINT_CONFIG) $(MAKE) -C $(LINUX_SRCDIR) ARCH=$(LINUX_ARCH) oldconfig O=$$(/bin/pwd)/$(LINUX_DIR)
@set -e ; if [ ! -f $(LINUX_DIR)/Makefile ] ; then \
- echo "***********************************"; \
+ echo "==================================="; \
echo "oldconfig did not create a Makefile"; \
echo "Generating $(LINUX_DIR)/Makefile "; \
- echo "***********************************"; \
+ echo "==================================="; \
( echo "# Automatically generated: don't edit"; \
echo ""; \
echo "VERSION = 2"; \
--- /dev/null
+XEN_LINUX_SOURCE ?= git-clone
+LINUX_VER ?= 2.6-pvops
+
+IMAGE_TARGET ?= bzImage
+
+XEN_LINUX_CONFIG_UPDATE := buildconfigs/enable-xen-config
+
+XEN_LINUX_GIT_URL ?= git://git.kernel.org/pub/scm/linux/kernel/git/jeremy/xen.git
+XEN_LINUX_GIT_REMOTENAME ?= xen
+XEN_LINUX_GIT_REMOTEBRANCH ?= xen/dom0/hackery
+
+EXTRAVERSION ?=
+
+include buildconfigs/mk.linux-2.6-common
# download a pristine Linux kernel tarball if there isn't one in LINUX_SRC_PATH
linux-%.tar.bz2:
@echo "Cannot find $@ in path $(LINUX_SRC_PATH)"
- wget $(XEN_LINUX_MIRROR)/$@ -O./$@
+ false wget $(XEN_LINUX_MIRROR)/$@ -O./$@
# XXX create a pristine tree for diff -Nurp convenience
# Override settings for this OS
CURSES_LIBS = -lcurses
+
+LIBLEAFDIR_x86_64 = lib
+LIBEXEC = $(PREFIX)/libexec
+PRIVATE_BINDIR = $(BINDIR)
BINDIR = $(PREFIX)/bin
INCLUDEDIR = $(PREFIX)/include
LIBLEAFDIR = lib
+LIBLEAFDIR_x86_32 = lib
LIBLEAFDIR_x86_64 = lib64
LIBDIR = $(PREFIX)/$(LIBLEAFDIR)
+LIBDIR_x86_32 = $(PREFIX)/$(LIBLEAFDIR_x86_32)
LIBDIR_x86_64 = $(PREFIX)/$(LIBLEAFDIR_x86_64)
+LIBEXEC = $(LIBDIR_x86_32)/xen/bin
MANDIR = $(PREFIX)/share/man
MAN1DIR = $(MANDIR)/man1
MAN8DIR = $(MANDIR)/man8
silent_which ()
{
which $1 1>/dev/null 2>/dev/null || {
- echo "*************************************************"
- echo "*************************************************"
- echo "* WARNING: Package '$1' is required"
- echo "* to build Xen documentation"
- echo "*************************************************"
- echo "*************************************************"
+ echo "================================================="
+ echo "================================================="
+ echo "= WARNING: Package '$1' is required"
+ echo "= to build Xen documentation"
+ echo "================================================="
+ echo "================================================="
}
which $1 1>/dev/null 2>/dev/null
}
so running curses based interfaces over the console B<is not
advised>. Vi tends to get very odd when using it over this interface.
+Use the key combination Ctrl+] to detach the domain console.
+
=item B<create> I<configfile> [I<OPTIONS>] [I<vars>]..
The create subcommand requires a config file and can optionally take a
e_ident[EI_OSABI] = ELFOSABI_SYSV = 0
e_type = ET_CORE = 4
ELFCLASS64 is always used independent of architecture.
-e_ident[EI_DATA] and e_flags are set according to the dumping system's
-architecture. Other members are set as usual.
+e_ident[EI_DATA] is set as follows
+ For x86 PV domain case, it is set according to the guest configuration
+ (i.e. if guest is 32bit it is set to EM_386 even when the dom0 is 64 bit.)
+ For other domain case (x86 HVM domain case and ia64 domain case),
+ it is set according to the dumping system's architecture.
+e_flags is set according to the dumping system's architecture.
+Other members are set as usual.
Sections
--------
The format version isn't bumped because analysis tools can distinguish it.
- .xen_ia64_mapped_regs section was made only for ia64 PV domain.
In case of IA64 HVM domain, this section doesn't exist.
+- elf header e_ident[EI_DATA]
+ On x86 PV domain case, it is set according to the guest configuration.
+ I.e. 32-on-64 case, the file will be set EM_386 instead of EM_X86_64.
+ This is the same as 32-on-32 case, so there is no impact on analysis tools.
module /boot/vmlinuz-2.6.18.8-xen root=LABEL=/ ro xencons=ttyS console=tty0 console=ttyS0, pciback.hide=(01:00.0)(03:00.0)
module /boot/initrd-2.6.18-xen.img
-12) reboot system
+ or use dynamic hiding via PCI backend sysfs interface:
+ a) check if the driver has binded to the device
+ ls -l /sys/bus/pci/devices/0000:01:00.0/driver
+ ... /sys/bus/pci/devices/0000:01:00.0/driver -> ../../../../bus/pci/drivers/igb
+ b) if yes, then unload the driver first
+ echo -n 0000:01:00.0 >/sys/bus/pci/drivers/igb/unbind
+ c) add the device to the PCI backend
+ echo -n 0000:01:00.0 >/sys/bus/pci/drivers/pciback/new_slot
+ d) let the PCI backend bind to the device
+ echo -n 0000:01:00.0 >/sys/bus/pci/drivers/pciback/bind
+
+12) reboot system (not requires if you use the dynamic hiding method)
13) add "pci" line in /etc/xen/hvm.conf for to assigned devices
pci = [ '01:00.0', '03:00.0' ]
15) start hvm guest and use "lspci" to see the passthru device and
Add "msi=1" option in kernel line of host grub.
+MSI-INTx translation for passthrough devices in HVM
+---------------------------------------------------
+
+If the assigned device uses a physical IRQ that is shared by more than
+one device among multiple domains, there may be significant impact on
+device performance. Unfortunately, this is quite a common case if the
+IO-APIC (INTx) IRQ is used. MSI can avoid this issue, but was only
+available if the guest enables it.
+
+With MSI-INTx translation turned on, Xen enables device MSI if it's
+available, regardless of whether the guest uses INTx or MSI. If the
+guest uses INTx IRQ, Xen will inject a translated INTx IRQ to guest's
+virtual ioapic whenever an MSI message is received. This reduces the
+interrupt sharing of the system. If the guest OS enables MSI or MSI-X,
+the translation is automatically turned off.
+
+To enable or disable MSI-INTx translation globally, add "pci_msitranslate"
+in the config file:
+ pci_msitranslate = 1 (default is 1)
+
+To override for a specific device:
+ pci = [ '01:00.0,msitranslate=0', '03:00.0' ]
+
+
Caveat on Conventional PCI Device Passthrough
---------------------------------------------
[root@vt-vtd ~]# xm pci-attach HVMDomainVtd 0:2:0.0 7
+ To specify options for the device, use -o or --options=. Following command would disable MSI-INTx translation for the device
+
+ [root@vt-vtd ~]# xm pci-attach -o msitranslate=0 0:2:0.0 7
+
+
VTd hotplug usage model:
------------------------
buffer specified by driver.
Such devices assigned to HVM domain currently do not work.
+
+
+Using SR-IOV with VT-d
+--------------------------------
+
+The Single Root I/O Virtualization is a PCI Express feature supported by
+some devices such as Intel 82576 which allows you to create virtual PCI
+devices (Virtual Function) and assign them to the HVM guest.
+
+You can use latest lspci (v3.1 and above) to check if your PCIe device
+supports the SR-IOV capability or not.
+
+ $ lspci -s 01:00.0 -vvv
+
+ 01:00.0 Ethernet controller: Intel Corporation 82576 Gigabit Network Connection (rev 01)
+ Subsystem: Intel Corporation Gigabit ET Dual Port Server Adapter
+
+ ...
+
+ Capabilities: [160] Single Root I/O Virtualization (SR-IOV)
+ IOVCap: Migration-, Interrupt Message Number: 000
+ IOVCtl: Enable+ Migration- Interrupt- MSE+ ARIHierarchy+
+ IOVSta: Migration-
+ Initial VFs: 8, Total VFs: 8, Number of VFs: 7, Function Dependency Link: 00
+ VF offset: 128, stride: 2, Device ID: 10ca
+ Supported Page Size: 00000553, System Page Size: 00000001
+ VF Migration: offset: 00000000, BIR: 0
+ Kernel driver in use: igb
+
+
+The function that has the SR-IOV capability is also known as Physical
+Function. You need the Physical Function driver (runs in the Dom0 and
+controls the physical resources allocation) to enable the Virtual Function.
+Following is the Virtual Functions associated with above Physical Function.
+
+ $ lspci | grep -e 01:1[01].[0246]
+
+ 01:10.0 Ethernet controller: Intel Corporation Device 10ca (rev 01)
+ 01:10.2 Ethernet controller: Intel Corporation Device 10ca (rev 01)
+ 01:10.4 Ethernet controller: Intel Corporation Device 10ca (rev 01)
+ 01:10.6 Ethernet controller: Intel Corporation Device 10ca (rev 01)
+ 01:11.0 Ethernet controller: Intel Corporation Device 10ca (rev 01)
+ 01:11.2 Ethernet controller: Intel Corporation Device 10ca (rev 01)
+ 01:11.4 Ethernet controller: Intel Corporation Device 10ca (rev 01)
+
+We can tell that Physical Function 01:00.0 has 7 Virtual Functions (01:10.0,
+01:10.2, 01:10.4, 01:10.6, 01:11.0, 01:11.2, 01:11.4). And the Virtual
+Function PCI Configuration Space looks just like normal PCI device.
+
+ $ lspci -s 01:10.0 -vvv
+
+ 01:10.0 Ethernet controller: Intel Corporation 82576 Gigabit Virtual Function
+ Subsystem: Intel Corporation Gigabit Virtual Function
+ Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- DisINTx-
+ Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast >TAbort- <TAbort- <MAbort- >SERR- <PERR- INTx-
+ Region 0: [virtual] Memory at d2840000 (64-bit, non-prefetchable) [size=16K]
+ Region 3: [virtual] Memory at d2860000 (64-bit, non-prefetchable) [size=16K]
+ Capabilities: [70] MSI-X: Enable+ Mask- TabSize=3
+ Vector table: BAR=3 offset=00000000
+ PBA: BAR=3 offset=00002000
+ Capabilities: [a0] Express (v2) Endpoint, MSI 00
+
+ ...
+
+
+The Virtual Function only appears after the Physical Function driver
+is loaded. Once the Physical Function driver is unloaded. All Virtual
+Functions associated with this Physical Function disappear.
+
+The Virtual Function is essentially same as the normal PCI device when
+using it in VT-d environment. You need to hide the Virtual Function,
+use the Virtual Function bus, device and function number in the HVM
+guest configuration file and then boot the HVM guest. You also need the
+Virtual Function driver which is the normal PCI device driver in the
+HMV guest to drive the Virtual Function. The PCIe SR-IOV specification
+requires that the Virtual Function can only support MSI/MSI-x if it
+uses interrupt. This means you also need to enable Xen/MSI support.
+Since the Virtual Function is dynamically allocated by Physical Function
+driver, you might want to use the dynamic hiding method mentioned above.
--- /dev/null
+Error handling in Xen
+---------------------
+
+1. domain_crash()
+-----------------
+Crash the specified domain due to buggy or unsupported behaviour of the
+guest. This should not be used where the hypervisor itself is in
+error, even if the scope of that error affects only a single
+domain. BUG() is a more appropriate failure method for hypervisor
+bugs. To repeat: domain_crash() is the correct response for erroneous
+or unsupported *guest* behaviour!
+
+Note that this should be used in most cases in preference to
+domain_crash_synchronous(): domain_crash() returns to the caller,
+allowing the crash to be deferred for the currently executing VCPU
+until certain resources (notably, spinlocks) have been released.
+
+Example usages:
+ * Unrecoverable guest kernel stack overflows
+ * Unsupported corners of HVM device models
+
+2. BUG()
+--------
+Crashes the host system with an informative file/line error message
+and a backtrace. Use this to check consistency assumptions within the
+hypervisor.
+
+Be careful not to use BUG() (or BUG_ON(), or ASSERT()) for failures
+*outside* the hypervisor software -- in particular, guest bugs (where
+domain_crash() is more appropriate) or non-critical BIOS or hardware
+errors (where retry or feature disable are more appropriate).
+
+Example usage: In arch/x86/hvm/i8254.c an I/O port handler includes
+the check BUG_ON(bytes != 1). We choose this extreme reaction to the
+unexpected error case because, although it could be handled by failing
+the I/O access or crashing the domain, it is indicative of an
+unexpected inconsistency in the hypervisor itself (since the I/O
+handler was only registered for single-byte accesses).
+
+
+3. BUG_ON()
+-----------
+BUG_ON(...) is merely a convenient short form for "if (...) BUG()". It
+is most commonly used as an 'always on' alternative to ASSERT().
+
+
+4. ASSERT()
+-----------
+Similar to BUG_ON(), except that it is only enabled for debug builds
+of the hypervisor. Typically ASSERT() is used only where the (usually
+small) overheads of an always-on debug check might be considered
+excessive. A good example might be within inner loops of time-critical
+functions, or where an assertion is extreme paranoia (considered
+*particularly* unlikely ever to fail).
+
+In general, if in doubt, use BUG_ON() in preference to ASSERT().
+
+
+5. panic()
+----------
+Like BUG() and ASSERT() this will crash and reboot the host
+system. However it does this after printing only an error message with
+no extra diagnostic information such as a backtrace. panic() is
+generally used where an unsupported system configuration is detected,
+particularly during boot, and where extra diagnostic information about
+CPU context would not be useful. It may also be used before exception
+handling is enabled during Xen bootstrap (on x86, BUG() and ASSERT()
+depend on Xen's exception-handling capabilities).
+
+Example usage: Most commonly for out-of-memory errors during
+bootstrap. The failure is unexpected since a host should always have
+enough memory to boot Xen, but if the failure does occur then the
+context of the failed memory allocation itself is not very
+interesting.
+
+
+6. Feature disable
+------------------
+A possible approach to dealing with boot-time errors, rather than
+crashing the hypervisor. It's particularly appropriate when parsing
+non-critical BIOS tables and detecting extended hardware features.
+
+
+7. BUILD_BUG_ON()
+-----------------
+Useful for assertions which can be evaluated at compile time. For
+example, making explicit assumptions about size and alignment of C
+structures.
--- /dev/null
+These notes are compiled from xen-devel questions and postings that have occured
+since the inclusion of XSM. These notes are not intended to be definitive
+documentation but should address many common problems that arrise when
+experimenting with XSM:FLASK.
+
+Xen XSM:FLASK configuration
+---------------------------
+
+1) cd xen-unstable.hg
+2) edit Config.mk in the toplevel xen directory as follows:
+
+ XSM_ENABLE ?= y
+ FLASK_ENABLE ?= y
+ ACM_SECURITY ?= n
+
+NB: Only one security module can be selected at a time. If no module is
+selected, then the default DUMMY module will be enforced. The DUMMY module
+only exercises the security framework and does not enforce any security
+policies. Changing the security module selection will require recompiling xen.
+These settings will also configure the corresponding toolchain support.
+
+3) make xen
+4) make tools
+
+
+Xen XSM:FLASK policy
+--------------------
+
+These instructions will enable the configuration and build of the sample policy.
+The sample policy provides the MINIMUM policy necessary to boot a
+paravirtualized dom0 and create a paravirtualized domU. Many of the
+default capabilities and usages supported by dom0/domU are disallowed by the
+sample policy. Further, the policy is comprised of a limited number of types and
+must be adjusted to meet the specific security goals of the installation.
+Modification of the policy is straightforward and is covered in a later section.
+
+NB: The policy is not automatically built as part of the tool support because
+of an external dependancy on the checkpolicy compiler. The FLASK policy uses
+the same syntax and structure as SELinux and compiling the policy relies on
+the SELinux policy toolchain. This toolchain is available under many
+distributions as well as the following URL,
+
+ http://userspace.selinuxproject.org/releases/20080909/stable/checkpolicy-1.34.7.tar.gz
+
+1) cd xen-unstable.hg/tools/flask/policy
+2) make policy
+3) cp policy.20 /boot/xenpolicy.20
+4) edit /etc/grub.conf, add a module line to the xen entry,
+
+ module /xenpolicy.20
+
+5) reboot, and select the updated xen entry
+
+NB: The module entry can be inserted on any line after the xen kernel line. Typical
+configurations use the last module entry or the module entry that immediately
+follows the xen kernel entry.
+
+Xen configuration of xend
+-------------------------
+
+1) cd /etc/xen
+2) edit xend-config.sxp
+3) uncomment the line containing the key:value pair entry,
+
+ #(xsm_module_name dummy)
+
+4) change the value entry to 'flask'
+
+ (xsm_module_name flask)
+
+5) restart xend
+
+Creating policy controlled domains
+----------------------------------
+
+2) Edit the domain config file and add the following entry,
+
+ access_control = ["policy=,label=system_u:object_r:domU_t"]
+
+NB: The 'policy' field is not used by XSM:FLASK. The 'label' must exist in the
+loaded policy. 'system_u:object_r:domU_t' is one of the existing labels from
+the sample policy and shown for example purposes.
+
+2) Create the domain using the 'xm create' command.
+3) Use the 'xm list -l' command to list the running domains and their labels.
+
+Updating the XSM:FLASK policy
+-----------------------------
+
+It is recommended that the XSM:FLASK policy be tailored to meet the specific
+security goals of the platform. The policy is tailored by editing the xen.te
+file in the 'policy' subdirectory.
+
+1) cd xen-unstable.hg/tools/flask/policy
+2) edit policy/modules/xen/xen.te - make changes to support platform security goals.
+3) make policy
+4) cp policy.20 /boot/xenpolicy.20
+5) reboot
+
+Alternatively, one may reload the policy using the 'flask_loadpolicy' tool
+installed by the xen tools.
+
+1) flask_loadpolicy policy.20
+
+NB: The sample policy permits policy reloads as well as general manipulation of
+the Flask security server only from dom0. The policy can be tailored further to
+restrict policy reloads and other manipulations to boot-time only, by removing
+the corresponding statements from the policy.
+
+Enforcing the XSM:FLASK policy
+------------------------------
+
+By default, XSM:FLASK is compiled and installed in permissive mode. This
+configuration will allow an XSM:FLASK system to start in enforcing mode.
+
+1) edit /etc/grub.conf
+2) append the parameter 'flask_enforcing=1' to the xen kernel line.
+3) reboot, and select the updated xen entry
+
+
+Additional notes on XSM:FLASK
+-----------------------------
+
+1) xen command line parameters
+
+ a) flask_enforcing
+
+ The default value for flask_enforcing is '0'. This parameter causes the
+ platform to boot in permissive mode which means that the policy is loaded
+ but not enforced. This mode is often helpful for developing new systems
+ and policies as the policy violations are reported on the xen console and
+ may be viewed in dom0 through 'xm dmesg'.
+
+ To boot the platform into enforcing mode, which means that the policy is
+ loaded and enforced, append 'flask_enforcing=1' on the grub line.
+
+ This parameter may also be changed through the flask hyercall.
+
+ b) flask_enabled
+
+ The default value for flask_enabled is '1'. This parameter causes the
+ platform to enable the FLASK security module under the XSM framework.
+ The parameter may be enabled/disabled only once per boot. If the parameter
+ is set to '0', only a reboot can re-enable flask. When flask_enabled is '0'
+ the DUMMY module is enforced.
+
+ This parameter may also be changed through the flask hypercall. But may
+ only be performed once per boot.
\item [ dma\_bits=xxx ] Specify width of DMA addresses in bits. This
is used in NUMA systems to prevent this special DMA memory from
being exhausted in one node when remote nodes have available memory.
+\item [ vcpu\_migration\_delay=$<$minimum\_time$>$] Set minimum time of
+ vcpu migration in microseconds (default 0). This parameter avoids agressive
+ vcpu migration. For example, the linux kernel uses 0.5ms by default.
\end{description}
In addition, the following options may be specified on the Xen command
Mike Day, IBM & Daniel Veillard, Red Hat \\
Jim Fehlig, Novell & Tom Wilkie, University of Cambridge \\
Jon Harrop, XenSource & Yosuke Iwamatsu, NEC \\
+Masaki Kanno, FUJITSU \\
\end{tabular}
\end{large}
\end{flushleft}
\end{minipage}\\
\hline
+ 1.0.7 & 20th Oct. 08 & M. Kanno &
+ \begin{minipage}[t]{7cm}
+ \begin{flushleft}
+ Added definitions of new classes DSCSI and PSCSI. Updated the table
+ and the diagram representing relationships between classes.
+ Added host.PSCSIs and VM.DSCSIs fields.
+ \end{flushleft}
+ \end{minipage}\\
+ \hline
\end{tabular}
\end{center}
\newcommand{\coversheetlogo}{xen.eps}
%% Document date
-\newcommand{\datestring}{24th July 2008}
+\newcommand{\datestring}{20th October 2008}
\newcommand{\releasestatement}{Stable Release}
%% Document revision
-\newcommand{\revstring}{API Revision 1.0.6}
+\newcommand{\revstring}{API Revision 1.0.7}
%% Document authors
\newcommand{\docauthors}{
digraph "Xen-API Class Diagram" {
fontname="Verdana";
-node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user XSPolicy ACMPolicy;
-node [shape=ellipse]; PIF_metrics VIF_metrics VM_metrics VBD_metrics PBD_metrics VM_guest_metrics host_metrics;
-node [shape=box]; DPCI PPCI host_cpu console VTPM
+node [ shape=box ]; session VM host network VIF PIF SR VDI VBD PBD user;
+node [ shape=box ]; XSPolicy ACMPolicy DPCI PPCI host_cpu console VTPM;
+node [ shape=box ]; DSCSI PSCSI;
+node [ shape=ellipse ]; VM_metrics VM_guest_metrics host_metrics;
+node [ shape=ellipse ]; PIF_metrics VIF_metrics VBD_metrics PBD_metrics;
session -> host [ arrowhead="none" ]
session -> user [ arrowhead="none" ]
VM -> VM_metrics [ arrowhead="none" ]
DPCI -> VM [ arrowhead="none", arrowtail="crow" ]
DPCI -> PPCI [ arrowhead="none" ]
PPCI -> host [ arrowhead="none", arrowtail="crow" ]
+DSCSI -> VM [ arrowhead="none", arrowtail="crow" ]
+DSCSI -> PSCSI [ arrowhead="none" ]
+PSCSI -> host [ arrowhead="none", arrowtail="crow" ]
}
{\tt console} & A console \\
{\tt DPCI} & A pass-through PCI device \\
{\tt PPCI} & A physical PCI device \\
+{\tt DSCSI} & A half-virtualized SCSI device \\
+{\tt PSCSI} & A physical SCSI device \\
{\tt user} & A user of the system \\
{\tt debug} & A basic class for testing \\
{\tt XSPolicy} & A class for handling Xen Security Policies \\
console.VM & VM.consoles & one-to-many\\
DPCI.VM & VM.DPCIs & one-to-many\\
PPCI.host & host.PPCIs & one-to-many\\
+DSCSI.VM & VM.DSCSIs & one-to-many\\
+PSCSI.host & host.PSCSIs & one-to-many\\
host.resident\_VMs & VM.resident\_on & many-to-one\\
host.host\_CPUs & host\_cpu.host & many-to-one\\
\hline
$\mathit{RO}_\mathit{run}$ & {\tt crash\_dumps} & (crashdump ref) Set & crash dumps associated with this VM \\
$\mathit{RO}_\mathit{run}$ & {\tt VTPMs} & (VTPM ref) Set & virtual TPMs \\
$\mathit{RO}_\mathit{run}$ & {\tt DPCIs} & (DPCI ref) Set & pass-through PCI devices \\
+$\mathit{RO}_\mathit{run}$ & {\tt DSCSIs} & (DSCSI ref) Set & half-virtualized SCSI devices \\
$\mathit{RW}$ & {\tt PV/bootloader} & string & name of or path to bootloader \\
$\mathit{RW}$ & {\tt PV/kernel} & string & path to the kernel \\
$\mathit{RW}$ & {\tt PV/ramdisk} & string & path to the initrd \\
}
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_DSCSIs}
+
+{\bf Overview:}
+Get the DSCSIs field of the given VM.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} ((DSCSI ref) Set) get_DSCSIs (session_id s, VM ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt VM ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+(DSCSI ref) Set
+}
+
+
value of the field
\vspace{0.3cm}
\vspace{0.3cm}
$\mathit{RW}$ & {\tt crash\_dump\_sr} & SR ref & The SR in which VDIs for crash dumps are created \\
$\mathit{RO}_\mathit{run}$ & {\tt PBDs} & (PBD ref) Set & physical blockdevices \\
$\mathit{RO}_\mathit{run}$ & {\tt PPCIs} & (PPCI ref) Set & physical PCI devices \\
+$\mathit{RO}_\mathit{run}$ & {\tt PSCSIs} & (PSCSI ref) Set & physical SCSI devices \\
$\mathit{RO}_\mathit{run}$ & {\tt host\_CPUs} & (host\_cpu ref) Set & The physical CPUs on this host \\
$\mathit{RO}_\mathit{run}$ & {\tt metrics} & host\_metrics ref & metrics associated with this host \\
\hline
}
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_PSCSIs}
+
+{\bf Overview:}
+Get the PSCSIs field of the given host.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} ((PSCSI ref) Set) get_PSCSIs (session_id s, host ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt host ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+(PSCSI ref) Set
+}
+
+
value of the field
\vspace{0.3cm}
\vspace{0.3cm}
}
+all fields from the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+
+\vspace{1cm}
+\newpage
+\section{Class: DSCSI}
+\subsection{Fields for class: DSCSI}
+\begin{longtable}{|lllp{0.38\textwidth}|}
+\hline
+\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf DSCSI} \\
+\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A
+half-virtualized SCSI device.}} \\
+\hline
+Quals & Field & Type & Description \\
+\hline
+$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\
+$\mathit{RO}_\mathit{inst}$ & {\tt VM} & VM ref & the virtual machine \\
+$\mathit{RO}_\mathit{inst}$ & {\tt PSCSI} & PSCSI ref & the physical SCSI device \\
+$\mathit{RO}_\mathit{run}$ & {\tt virtual\_host} & int & the virtual host number \\
+$\mathit{RO}_\mathit{run}$ & {\tt virtual\_channel} & int & the virtual channel number \\
+$\mathit{RO}_\mathit{run}$ & {\tt virtual\_target} & int & the virtual target number \\
+$\mathit{RO}_\mathit{run}$ & {\tt virtual\_lun} & int & the virtual logical unit number \\
+$\mathit{RO}_\mathit{inst}$ & {\tt virtual\_HCTL} & string & the virtual HCTL \\
+$\mathit{RO}_\mathit{run}$ & {\tt runtime\_properties} & (string $\rightarrow$ string) Map & Device runtime properties \\
+\hline
+\end{longtable}
+\subsection{RPCs associated with class: DSCSI}
+\subsubsection{RPC name:~get\_all}
+
+{\bf Overview:}
+Return a list of all the DSCSIs known to the system.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} ((DSCSI ref) Set) get_all (session_id s)\end{verbatim}
+
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+(DSCSI ref) Set
+}
+
+
+references to all objects
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_uuid}
+
+{\bf Overview:}
+Get the uuid field of the given DSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_uuid (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_VM}
+
+{\bf Overview:}
+Get the VM field of the given DSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (VM ref) get_VM (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+VM ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_PSCSI}
+
+{\bf Overview:}
+Get the PSCSI field of the given DSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (PSCSI ref) get_PSCSI (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+PSCSI ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_host}
+
+{\bf Overview:}
+Get the virtual\_host field of the given DSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_virtual_host (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_channel}
+
+{\bf Overview:}
+Get the virtual\_channel field of the given DSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_virtual_channel (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_target}
+
+{\bf Overview:}
+Get the virtual\_target field of the given DSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_virtual_target (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_lun}
+
+{\bf Overview:}
+Get the virtual\_lun field of the given DSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_virtual_lun (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_virtual\_HCTL}
+
+{\bf Overview:}
+Get the virtual\_HCTL field of the given DSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_virtual_HCTL (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_runtime\_properties}
+
+{\bf Overview:}
+Get the runtime\_properties field of the given DSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} ((string -> string) Map) get_runtime_properties (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+(string $\rightarrow$ string) Map
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~create}
+
+{\bf Overview:}
+Create a new DSCSI instance, and return its handle.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (DSCSI ref) create (session_id s, DSCSI record args)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI record } & args & All constructor arguments \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+DSCSI ref
+}
+
+
+reference to the newly created object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~destroy}
+
+{\bf Overview:}
+Destroy the specified DSCSI instance.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} void destroy (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+void
+}
+
+
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_by\_uuid}
+
+{\bf Overview:}
+Get a reference to the DSCSI instance with the specified UUID.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (DSCSI ref) get_by_uuid (session_id s, string uuid)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt string } & uuid & UUID of object to return \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+DSCSI ref
+}
+
+
+reference to the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_record}
+
+{\bf Overview:}
+Get a record containing the current state of the given DSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (DSCSI record) get_record (session_id s, DSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt DSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+DSCSI record
+}
+
+
+all fields from the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+
+\vspace{1cm}
+\newpage
+\section{Class: PSCSI}
+\subsection{Fields for class: PSCSI}
+\begin{longtable}{|lllp{0.38\textwidth}|}
+\hline
+\multicolumn{1}{|l}{Name} & \multicolumn{3}{l|}{\bf PSCSI} \\
+\multicolumn{1}{|l}{Description} & \multicolumn{3}{l|}{\parbox{11cm}{\em A
+physical SCSI device.}} \\
+\hline
+Quals & Field & Type & Description \\
+\hline
+$\mathit{RO}_\mathit{run}$ & {\tt uuid} & string & unique identifier/object reference \\
+$\mathit{RO}_\mathit{run}$ & {\tt host} & host ref & the physical machine to which this PSCSI is connected \\
+$\mathit{RO}_\mathit{run}$ & {\tt physical\_host} & int & the physical host number \\
+$\mathit{RO}_\mathit{run}$ & {\tt physical\_channel} & int & the physical channel number \\
+$\mathit{RO}_\mathit{run}$ & {\tt physical\_target} & int & the physical target number \\
+$\mathit{RO}_\mathit{run}$ & {\tt physical\_lun} & int & the physical logical unit number \\
+$\mathit{RO}_\mathit{run}$ & {\tt physical\_HCTL} & string & the physical HCTL \\
+$\mathit{RO}_\mathit{run}$ & {\tt vendor\_name} & string & the vendor name \\
+$\mathit{RO}_\mathit{run}$ & {\tt model} & string & the model \\
+$\mathit{RO}_\mathit{run}$ & {\tt type\_id} & int & the SCSI type ID \\
+$\mathit{RO}_\mathit{run}$ & {\tt type} & string & the SCSI type \\
+$\mathit{RO}_\mathit{run}$ & {\tt dev\_name} & string & the SCSI device name (e.g. sda or st0) \\
+$\mathit{RO}_\mathit{run}$ & {\tt sg\_name} & string & the SCSI generic device name (e.g. sg0) \\
+$\mathit{RO}_\mathit{run}$ & {\tt revision} & string & the revision \\
+$\mathit{RO}_\mathit{run}$ & {\tt scsi\_id} & string & the SCSI ID \\
+$\mathit{RO}_\mathit{run}$ & {\tt scsi\_level} & int & the SCSI level \\
+\hline
+\end{longtable}
+\subsection{RPCs associated with class: PSCSI}
+\subsubsection{RPC name:~get\_all}
+
+{\bf Overview:}
+Return a list of all the PSCSIs known to the system.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} ((PSCSI ref) Set) get_all (session_id s)\end{verbatim}
+
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+(PSCSI ref) Set
+}
+
+
+references to all objects
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_uuid}
+
+{\bf Overview:}
+Get the uuid field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_uuid (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_host}
+
+{\bf Overview:}
+Get the host field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (host ref) get_host (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+host ref
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_physical\_host}
+
+{\bf Overview:}
+Get the physical\_host field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_physical_host (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_physical\_channel}
+
+{\bf Overview:}
+Get the physical\_channel field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_physical_channel (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_physical\_target}
+
+{\bf Overview:}
+Get the physical\_target field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_physical_target (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_physical\_lun}
+
+{\bf Overview:}
+Get the physical\_lun field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_physical_lun (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_physical\_HCTL}
+
+{\bf Overview:}
+Get the physical\_HCTL field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_physical_HCTL (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_vendor\_name}
+
+{\bf Overview:}
+Get the vendor\_name field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_vendor_name (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_model}
+
+{\bf Overview:}
+Get the model field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_model (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_type\_id}
+
+{\bf Overview:}
+Get the type\_id field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_type_id (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_type}
+
+{\bf Overview:}
+Get the type field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_type (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_dev\_name}
+
+{\bf Overview:}
+Get the dev\_name field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_dev_name (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_sg\_name}
+
+{\bf Overview:}
+Get the sg\_name field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_sg_name (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_revision}
+
+{\bf Overview:}
+Get the revision field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_revision (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_scsi\_id}
+
+{\bf Overview:}
+Get the scsi\_id field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} string get_scsi_id (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+string
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_scsi\_level}
+
+{\bf Overview:}
+Get the scsi\_level field of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} int get_scsi_level (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+int
+}
+
+
+value of the field
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_by\_uuid}
+
+{\bf Overview:}
+Get a reference to the PSCSI instance with the specified UUID.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (PSCSI ref) get_by_uuid (session_id s, string uuid)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt string } & uuid & UUID of object to return \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+PSCSI ref
+}
+
+
+reference to the object
+\vspace{0.3cm}
+\vspace{0.3cm}
+\vspace{0.3cm}
+\subsubsection{RPC name:~get\_record}
+
+{\bf Overview:}
+Get a record containing the current state of the given PSCSI.
+
+ \noindent {\bf Signature:}
+\begin{verbatim} (PSCSI record) get_record (session_id s, PSCSI ref self)\end{verbatim}
+
+
+\noindent{\bf Arguments:}
+
+
+\vspace{0.3cm}
+\begin{tabular}{|c|c|p{7cm}|}
+ \hline
+{\bf type} & {\bf name} & {\bf description} \\ \hline
+{\tt PSCSI ref } & self & reference to the object \\ \hline
+
+\end{tabular}
+
+\vspace{0.3cm}
+
+ \noindent {\bf Return Type:}
+{\tt
+PSCSI record
+}
+
+
all fields from the object
\vspace{0.3cm}
\vspace{0.3cm}
$(OBJ_DIR)/$(TARGET)_app.o: $(APP_OBJS) app.lds
$(LD) -r -d $(LDFLAGS) -\( $^ -\) $(APP_LDLIBS) --undefined main -o $@
-$(OBJ_DIR)/$(TARGET): links $(OBJS) $(OBJ_DIR)/$(TARGET)_app.o arch_lib
- $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(OBJ_DIR)/$(TARGET)_app.o $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o
+ifneq ($(APP_OBJS),)
+APP_O=$(OBJ_DIR)/$(TARGET)_app.o
+endif
+
+$(OBJ_DIR)/$(TARGET): links $(OBJS) $(APP_O) arch_lib
+ $(LD) -r $(LDFLAGS) $(HEAD_OBJ) $(APP_O) $(OBJS) $(LDARCHLIB) $(LDLIBS) -o $@.o
$(OBJCOPY) -w -G $(GLOBAL_PREFIX)* -G _start $@.o $@.o
$(LD) $(LDFLAGS) $(LDFLAGS_FINAL) $@.o $(EXTRA_OBJS) -o $@
gzip -f -9 -c $@ >$@.gz
-# Build for Big Endian?
-BIGENDIAN ?= n
ARCH_CFLAGS := -mfixed-range=f2-f5,f12-f15,f32-f127 -mconstant-gp
ARCH_CFLAGS += -O2
ARCH_LDFLAGS = -warn-common
-# Next lines are for big endian code !
-ifeq ($(BIGENDIAN),y)
-ARCH_CFLAGS += -mbig-endian -Wa,-mbe -Wa,-mlp64
-ARCH_CFLAGS += -DBIG_ENDIAN
-ARCH_ASFLAGS += -Wa,-mbe
-ARCH_ASFLAGS += -DBIG_ENDIAN
-ARCH_LDFLAGS = -EB -d
-endif
-
{
struct callback_register event =
{
- .type = SWAP(CALLBACKTYPE_event),
- .address = SWAP((unsigned long)&hypervisor_callback),
+ .type = CALLBACKTYPE_event,
+ .address = (unsigned long)&hypervisor_callback,
};
HYPERVISOR_callback_op(CALLBACKOP_register, &event);
}
init_start_info(start_info_t* xen_start_info)
{
/* Make a copy of the start_info structure */
- start_info.nr_pages = SWAP(xen_start_info->nr_pages);
- start_info.shared_info = SWAP(xen_start_info->shared_info);
- start_info.flags = SWAP(xen_start_info->flags);
- start_info.store_mfn = SWAP(xen_start_info->store_mfn);
- start_info.store_evtchn = SWAP(xen_start_info->store_evtchn);
- start_info.console.domU.mfn = SWAP(xen_start_info->console.domU.mfn);
+ start_info.nr_pages = xen_start_info->nr_pages;
+ start_info.shared_info = xen_start_info->shared_info;
+ start_info.flags = xen_start_info->flags;
+ start_info.store_mfn = xen_start_info->store_mfn;
+ start_info.store_evtchn = xen_start_info->store_evtchn;
+ start_info.console.domU.mfn = xen_start_info->console.domU.mfn;
start_info.console.domU.evtchn =
- SWAP(xen_start_info->console.domU.evtchn);
- start_info.pt_base = SWAP(xen_start_info->pt_base);
- start_info.nr_pt_frames = SWAP(xen_start_info->nr_pt_frames);
- start_info.mfn_list = SWAP(xen_start_info->mfn_list);
- start_info.mod_start = SWAP(xen_start_info->mod_start);
- start_info.mod_len = SWAP(xen_start_info->mod_len);
+ xen_start_info->console.domU.evtchn;
+ start_info.pt_base = xen_start_info->pt_base;
+ start_info.nr_pt_frames = xen_start_info->nr_pt_frames;
+ start_info.mfn_list = xen_start_info->mfn_list;
+ start_info.mod_start = xen_start_info->mod_start;
+ start_info.mod_len = xen_start_info->mod_len;
}
static void
init_boot_params(void)
{
- ia64BootParamG.command_line = SWAP(ia64_boot_paramP->command_line);
- ia64BootParamG.efi_systab = SWAP(ia64_boot_paramP->efi_systab);
- ia64BootParamG.efi_memmap = SWAP(ia64_boot_paramP->efi_memmap);
- ia64BootParamG.efi_memmap_size =
- SWAP(ia64_boot_paramP->efi_memmap_size);
- ia64BootParamG.efi_memdesc_size =
- SWAP(ia64_boot_paramP->efi_memdesc_size);
+ ia64BootParamG.command_line = ia64_boot_paramP->command_line;
+ ia64BootParamG.efi_systab = ia64_boot_paramP->efi_systab;
+ ia64BootParamG.efi_memmap = ia64_boot_paramP->efi_memmap;
+ ia64BootParamG.efi_memmap_size = ia64_boot_paramP->efi_memmap_size;
+ ia64BootParamG.efi_memdesc_size = ia64_boot_paramP->efi_memdesc_size;
ia64BootParamG.efi_memdesc_version =
- SWAP(ia64_boot_paramP->efi_memdesc_version);
+ ia64_boot_paramP->efi_memdesc_version;
ia64BootParamG.console_info.num_cols =
- SWAP(ia64_boot_paramP->console_info.num_cols);
+ ia64_boot_paramP->console_info.num_cols;
ia64BootParamG.console_info.num_rows =
- SWAP(ia64_boot_paramP->console_info.num_rows);
+ ia64_boot_paramP->console_info.num_rows;
ia64BootParamG.console_info.orig_x =
- SWAP(ia64_boot_paramP->console_info.orig_x);
+ ia64_boot_paramP->console_info.orig_x;
ia64BootParamG.console_info.orig_y =
- SWAP(ia64_boot_paramP->console_info.orig_y);
- ia64BootParamG.fpswa = SWAP(ia64_boot_paramP->fpswa);
- ia64BootParamG.initrd_start = SWAP(ia64_boot_paramP->initrd_start);
- ia64BootParamG.initrd_size = SWAP(ia64_boot_paramP->initrd_size);
- ia64BootParamG.domain_start = SWAP(ia64_boot_paramP->domain_start);
- ia64BootParamG.domain_size = SWAP(ia64_boot_paramP->domain_size);
+ ia64_boot_paramP->console_info.orig_y;
+ ia64BootParamG.fpswa = ia64_boot_paramP->fpswa;
+ ia64BootParamG.initrd_start = ia64_boot_paramP->initrd_start;
+ ia64BootParamG.initrd_size = ia64_boot_paramP->initrd_size;
+ ia64BootParamG.domain_start = ia64_boot_paramP->domain_start;
+ ia64BootParamG.domain_size = ia64_boot_paramP->domain_size;
/*
* Copy and parse the boot command line.
typedef struct
{
-#if !defined(BIG_ENDIAN)
uint64_t sof :7; /* 0-6 size of frame */
uint64_t sol :7; /* 7-13 size of locals (in + loc) */
uint64_t sor :4;
uint64_t rrb_pr :6;
uint64_t res :25; /* reserved */
uint64_t v :1; /* The v bit */
-#else /* !BIG_ENDIAN */
- uint64_t v :1; /* The v bit */
- uint64_t res :25; /* reserved */
- uint64_t rrb_pr :6;
- uint64_t rrb_fr :7;
- uint64_t rrb_gr :7;
- uint64_t sor :4;
- uint64_t sol :7; /* 7-13 size of locals (in + loc) */
- uint64_t sof :7; /* 0-6 size of frame */
-#endif /* BIG_ENDIAN */
} ifs_t;
void
printk("efi.getTime() failed\n");
return 0;
}
-
-#if defined(BIG_ENDIAN)
- tmP->Year = SWAP(tmP->Year);
- tmP->TimeZone = SWAP(tmP->TimeZone);
- tmP->Nanosecond = SWAP(tmP->Nanosecond);
-#endif
-
return 1;
}
static int
efi_guid_cmp(efi_guid_t* a_le, efi_guid_t* b)
{
-#if defined(BIG_ENDIAN)
- if(SWAP(a_le->Data1) != b->Data1)
- return 1;
- if(SWAP(a_le->Data2) != b->Data2)
- return 1;
- if(SWAP(a_le->Data3) != b->Data3)
- return 1;
- return memcmp(a_le->Data4, b->Data4, sizeof(uint8_t)*8);
-#else
return memcmp(a_le, b, sizeof(efi_guid_t));
-#endif
}
void
efiSysTableP = (efi_system_table_t*)__va(ia64BootParamG.efi_systab);
machineFwG.efi.efiSysTableP = efiSysTableP;
PRINT_BV("EfiSystemTable at: %p\n", efiSysTableP);
- fwP = (uint16_t*) __va(SWAP(efiSysTableP->FirmwareVendor));
+ fwP = (uint16_t*) __va(efiSysTableP->FirmwareVendor);
if (fwP) {
for (i = 0; i < (int)sizeof(fwVendor) - 1 && *fwP; ++i)
- fwVendor[i] = SWAP(*fwP++);
+ fwVendor[i] = *fwP++;
fwVendor[i] = '\0';
}
PRINT_BV(" EFI-FirmwareVendor : %s\n", fwVendor);
PRINT_BV(" EFI-FirmwareRevision : %d\n",
- SWAP(efiSysTableP->FirmwareRevision));
+ efiSysTableP->FirmwareRevision);
PRINT_BV(" EFI-SystemTable-Revision : %d.%d\n",
- SWAP(efiSysTableP->Hdr.Revision)>>16,
- SWAP(efiSysTableP->Hdr.Revision)&0xffff);
+ efiSysTableP->Hdr.Revision >> 16,
+ efiSysTableP->Hdr.Revision & 0xffff);
rsP = (efi_runtime_services_t*)
- __va(SWAP(efiSysTableP->RuntimeServices));
+ __va(efiSysTableP->RuntimeServices);
mdcnt = ia64BootParamG.efi_memmap_size /
ia64BootParamG.efi_memdesc_size;
memdP = (efi_memory_descriptor_t*) __va(ia64BootParamG.efi_memmap);
mdP = NextMemoryDescriptor(mdP, ia64BootParamG.efi_memdesc_size)) {
/* Relocate runtime memory segments for firmware. */
PRINT_BV(" %d. Type: %x Attributes: 0x%lx\n",
- i, SWAP(mdP->Type), SWAP(mdP->Attribute));
+ i, mdP->Type, mdP->Attribute);
PRINT_BV(" PhysStart: 0x%lx NumPages: 0x%lx\n",
- SWAP(mdP->PhysicalStart), SWAP(mdP->NumberOfPages));
- switch (SWAP(mdP->Type)) {
+ mdP->PhysicalStart, mdP->NumberOfPages);
+ switch (mdP->Type) {
case EfiRuntimeServicesData:
PRINT_BV(" -> EfiRuntimeServicesData\n");
break;
case EfiConventionalMemory:
PRINT_BV(" -> EfiConventionalMemory\n");
PRINT_BV(" start: 0x%lx end: 0x%lx\n",
- SWAP(mdP->PhysicalStart),
- SWAP(mdP->PhysicalStart)+
- SWAP(mdP->NumberOfPages)*EFI_PAGE_SIZE);
+ mdP->PhysicalStart,
+ mdP->PhysicalStart +
+ mdP->NumberOfPages * EFI_PAGE_SIZE);
if (numConvMem) {
printk(" Currently only one efi "
"memory chunk supported !!!\n");
break;
}
- machineFwG.mach_mem_start =
- SWAP(mdP->PhysicalStart);
+ machineFwG.mach_mem_start = mdP->PhysicalStart;
machineFwG.mach_mem_size =
- SWAP(mdP->NumberOfPages)*EFI_PAGE_SIZE;
+ mdP->NumberOfPages * EFI_PAGE_SIZE;
numConvMem++;
break;
case EfiMemoryMappedIOPortSpace:
break;
case EfiPalCode:
machineFwG.ia64_pal_base =
- __va(SWAP(mdP->PhysicalStart));
+ __va(mdP->PhysicalStart);
PRINT_BV(" -> EfiPalCode\n"
" start : %p\n",
machineFwG.ia64_pal_base);
* virtual addressing and the efi runtime functions
* may be called directly.
*/
- if (SWAP(mdP->Attribute) & EFI_MEMORY_RUNTIME) {
- if (SWAP(mdP->Attribute) & EFI_MEMORY_WB)
- mdP->VirtualStart =
- SWAP(__va(mdP->PhysicalStart));
+ if (mdP->Attribute & EFI_MEMORY_RUNTIME) {
+ if (mdP->Attribute & EFI_MEMORY_WB)
+ mdP->VirtualStart = __va(mdP->PhysicalStart);
else {
- if (SWAP(mdP->Attribute) & EFI_MEMORY_UC)
+ if (mdP->Attribute & EFI_MEMORY_UC)
printk("efi_init: RuntimeMemory with "
"UC attribute !!!!!!\n");
/*
}
/* Now switch efi runtime stuff to virtual addressing. */
status = ia64_call_efi_physical(
- (void*)__va(SWAP((uint64_t)rsP->SetVirtualAddressMap)),
+ (void*)__va((uint64_t)rsP->SetVirtualAddressMap),
ia64BootParamG.efi_memmap_size,
ia64BootParamG.efi_memdesc_size,
ia64BootParamG.efi_memdesc_version,
}
/* Getting efi function pointer for getEfiTime. */
machineFwG.efi.getTimeF =
- (efi_get_time_t)__va(SWAP((uint64_t)rsP->GetTime));
+ (efi_get_time_t)__va((uint64_t)rsP->GetTime);
/* Getting efi function pointer for resetSystem. */
machineFwG.efi.resetSystemF =
- (efi_reset_system_t)__va(SWAP((uint64_t)rsP->ResetSystem));
+ (efi_reset_system_t)__va((uint64_t)rsP->ResetSystem);
/* Scanning the Configuration table of the EfiSystemTable. */
PRINT_BV("NumberOfConfigTableEntries: %ld\n",
- SWAP(efiSysTableP->NumberOfTableEntries));
+ efiSysTableP->NumberOfTableEntries);
confP = (efi_configuration_table_t*)
- __va(SWAP(efiSysTableP->ConfigurationTable));
- for (i = 0; i < SWAP(efiSysTableP->NumberOfTableEntries); i++) {
+ __va(efiSysTableP->ConfigurationTable);
+ for (i = 0; i < efiSysTableP->NumberOfTableEntries; i++) {
if (!efi_guid_cmp(&confP[i].VendorGuid, &sal)) {
machineFwG.ia64_sal_tableP = (sal_system_table_t*)
- __va(SWAP((uint64_t) confP[i].VendorTable));
+ __va((uint64_t) confP[i].VendorTable);
PRINT_BV(" Found SalSystemTable at: 0x%lx\n",
(uint64_t) machineFwG.ia64_sal_tableP);
continue;
}
if (!efi_guid_cmp(&confP[i].VendorGuid, &acpi)) {
machineFwG.ia64_efi_acpi_table =
- __va(SWAP((uint64_t) confP[i].VendorTable));
+ __va((uint64_t) confP[i].VendorTable);
PRINT_BV(" Found AcpiTable at: 0x%lx\n",
(uint64_t) machineFwG.ia64_efi_acpi_table);
continue;
}
if (!efi_guid_cmp(&confP[i].VendorGuid, &acpi20)) {
machineFwG.ia64_efi_acpi20_table =
- __va(SWAP((uint64_t) confP[i].VendorTable));
+ __va((uint64_t) confP[i].VendorTable);
PRINT_BV(" Found Acpi20Table at: 0x%lx\n",
(uint64_t) machineFwG.ia64_efi_acpi20_table);
continue;
ld8 r14=[in0],8 // function address
;;
ld8 gp=[in0] // function gp value
-#if defined(BIG_ENDIAN)
- mux1 r14=r14,@rev // swap because mini-os is in BE
- mov ar.rsc=3
- ;;
-#endif
mov out0=in1
mov out1=in2
mov out2=in3
mov out4=in5
mov b6=r14
;;
-#if defined(BIG_ENDIAN)
- mux1 gp=gp,@rev // swap because mini-os is in BE
- rum IA64_PSR_BE
- ;;
-#endif
-
br.call.sptk.many rp=b6 // call EFI procedure
-
-#if defined(BIG_ENDIAN)
- ;;
- sum IA64_PSR_BE
- mov ar.rsc=IA64_RSE_EAGER
-#endif
mov gp=loc3 // restore kernel gp
mov r14=loc2 // psr to restore mode
;;
mov b0=palret
rsm psr.i // disable interrupts
;;
-#if defined(BIG_ENDIAN)
- rum IA64_PSR_BE // set psr.be==0
- ;;
-#endif
br.cond.sptk b6 // call into firmware
;;
-#if defined(BIG_ENDIAN)
- sum IA64_PSR_BE // set psr.be==1
- ;;
-#endif
ssm psr.i // enable interrupts
;;
2: mov psr.l=psrsave
;;
ld8 gp=[in0] // function gp value
;;
-#if defined(BIG_ENDIAN)
- mux1 r14=r14,@rev // swap if mini-os is in BE
- mux1 gp=gp,@rev // swap if mini-os is in BE
-#endif
- ;;
mov b6=r14
-
-#if defined(BIG_ENDIAN)
- rum IA64_PSR_BE
- ;;
-#endif
-
br.call.sptk.many rp=b6 // call EFI procedure
-#if defined(BIG_ENDIAN)
- sum IA64_PSR_BE
- ;;
-#endif
-
mov ar.pfs=loc0
mov gp=loc1
mov rp=loc2
START_INFO_PFN_ld r14=[r15] // load the start_info_pfn
add r16=7, r0
;;
-#if defined(BIG_ENDIAN)
- mux1 r14=r14,@rev // swap because mini-os is in BE
-#endif
- ;;
shl r15=r14,PAGE_SHIFT_XEN_16K // pfn << PAGE_SHIFT_XEN_16K
shl r16=r16,IA64_RR_IDX_POS // (7<<IA64_RR_IDX_POS)
;;
;;
ld8 r21=[r21] // XEN.ipsr
ld8 r22=[r22];; // XEN.iip
-#if defined(BIG_ENDIAN)
- mux1 r21=r21,@rev // swap because mini-os is in BE
- mux1 r22=r22,@rev // swap because mini-os is in BE
- ;;
-#endif
add r19=TF_IPSR,r18
add r20=TF_IIP,r18
;;
//bsw.1 // switch to bank 1 for saving these registers.
movl r30=XSI_BANKNUM // Switch to bank 1.
mov r31=1;;
-#if defined(BIG_ENDIAN)
- mux1 r31=r31,@rev // swap because mini-os is in BE
- ;;
-#endif
st4 [r30]=r31
;;
/*
movl r30=XSI_BANK1_R16;
movl r31=XSI_BANK1_R16+8;;
ld8 r16=[r30],16; ld8 r17=[r31],16;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r16=r16,@rev; mux1 r17=r17,@rev;;
-#endif
ld8 r18=[r30],16; ld8 r19=[r31],16;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r18=r18,@rev; mux1 r19=r19,@rev;;
-#endif
ld8 r20=[r30],16; ld8 r21=[r31],16;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r20=r20,@rev; mux1 r21=r21,@rev;;
-#endif
ld8 r22=[r30],16; ld8 r23=[r31],16;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r22=r22,@rev; mux1 r23=r23,@rev;;
-#endif
ld8 r24=[r30],16; ld8 r25=[r31],16;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r24=r24,@rev; mux1 r25=r25,@rev;;
-#endif
ld8 r26=[r30],16; ld8 r27=[r31],16;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r26=r26,@rev; mux1 r27=r27,@rev;;
-#endif
ld8 r28=[r30],16; ld8 r29=[r31],16;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r28=r28,@rev; mux1 r29=r29,@rev;;
-#endif
ld8 r30=[r30]; ld8 r31=[r31];;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r30=r30,@rev; mux1 r31=r31,@rev;;
-#endif
-
add r2=TF_GREG16,r14
add r3=TF_GREG17,r14
;;
;;
ld8 r21=[r8]
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r21=r21,@rev
- ;;
-#endif
st8 [r19]=r21 // store cr.ifs
dep.z r22=r21,0,38 // copy ifm part from ifs.ifm
;;
;;
ld8 r21=[r19] // load cr.ipsr
ld8 r22=[r20] // load cr.iip
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- ;;
- mux1 r21=r21,@rev
- mux1 r22=r22,@rev
- ;;
-#endif
movl r16=XSI_IPSR // XEN !!
;;
st8 [r16]=r21,XSI_IIP_OFS-XSI_IPSR_OFS // XEN.ipsr
ld8 r22=[r19] // ndirty
;;
shl r21=r22,16 // value for ar.rsc
- //mov r19=(MOS_IA64_RSC_BE << IA64_RSC_BE)
- ;;
- or r21=(MOS_IA64_RSC_BE << IA64_RSC_BE),r21
;;
mov ar.rsc=r21 // setup for loadrs
;;
ld8 r21=[r19] // load ar.pfs
ld8 r22=[r20] // load cr.ifs
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r22=r22,@rev
- ;;
-#endif
add r19=TF_RSC,r18
mov ar.pfs=r21
st8 [r16]=r22 // XEN.ifs
// bsw.1
movl r30=XSI_BANKNUM // Switch to bank 1.
mov r31=1;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r31=r31,@rev
- ;;
-#endif
st4 [r30]=r31
;;
add r2=TF_GREG16,r14
movl r2=XSI_BANK1_R16
movl r3=XSI_BANK1_R16+8
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r16=r16,@rev; mux1 r17=r17,@rev;;
-#endif
.mem.offset 0,0; st8.spill [r2]=r16,16
.mem.offset 8,0; st8.spill [r3]=r17,16
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r18=r18,@rev; mux1 r19=r19,@rev;;
-#endif
.mem.offset 0,0; st8.spill [r2]=r18,16
.mem.offset 8,0; st8.spill [r3]=r19,16
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r20=r20,@rev; mux1 r21=r21,@rev;;
-#endif
.mem.offset 0,0; st8.spill [r2]=r20,16
.mem.offset 8,0; st8.spill [r3]=r21,16
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r22=r22,@rev; mux1 r23=r23,@rev;;
-#endif
.mem.offset 0,0; st8.spill [r2]=r22,16
.mem.offset 8,0; st8.spill [r3]=r23,16
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r24=r24,@rev; mux1 r25=r25,@rev;;
-#endif
.mem.offset 0,0; st8.spill [r2]=r24,16
.mem.offset 8,0; st8.spill [r3]=r25,16
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r26=r26,@rev; mux1 r27=r27,@rev;;
-#endif
.mem.offset 0,0; st8.spill [r2]=r26,16
.mem.offset 8,0; st8.spill [r3]=r27,16
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r28=r28,@rev; mux1 r29=r29,@rev;;
-#endif
.mem.offset 0,0; st8.spill [r2]=r28,16
.mem.offset 8,0; st8.spill [r3]=r29,16
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r30=r30,@rev; mux1 r31=r31,@rev;;
-#endif
.mem.offset 0,0; st8.spill [r2]=r30,16
.mem.offset 8,0; st8.spill [r3]=r31,16
;;
add loc5=TF_IFA,in0
add loc6=TF_ISR,in0
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 loc3=loc3,@rev; mux1 loc4=loc4,@rev;;
-#endif
st8 [loc5]=loc3,TF_IIM-TF_IFA // store cr.ifa
st8 [loc6]=loc4 // store cr.isr
;;
ld8 loc3=[loc1] // load XEN.iim
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 loc3=loc3,@rev;;
-#endif
st8 [loc5]=loc3 // store cr.iim
;;
mov ar.pfs=loc0
mov out0=r18 // the trap frame
movl r22=XSI_PSR_IC
mov r23=1;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r23=r23,@rev;;
-#endif
st8 [r22]=r23 // ssm psr.ic
;;
br.call.sptk.few rp = do_hypervisor_callback
;;
movl r30=XSI_BANKNUM // bsw.1
mov r31=1;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r31=r31,@rev;;
-#endif
st4 [r30]=r31;;
/* Save extra interrupt registers to the trap frame. */
ld8 r23=[r23]
mov r25=1
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r25=r25,@rev; mux1 r23=r23,@rev;;
-#endif
st4 [r22]=r25 // ssm psr.ic
st1 [r23]=r0 // ssm psr.i
;;
ld8 r23=[r23]
mov r25=1
;;
-#if defined(BIG_ENDIAN) // swap because mini-os is in BE
- mux1 r25=r25,@rev;;
- mux1 r25=r25,@rev; mux1 r23=r23,@rev;;
-#endif
st1 [r23]=r25
st4 [r22]=r0 // note: clears both vpsr.i and vpsr.ic!
;;
.fini_array : { *(.fini_array) }
PROVIDE (__fini_array_end = .);
- .ctors : {
+ .ctors : AT(ADDR(.ctors) - (((5<<(61))+0x100000000) - (1 << 20)))
+ {
__CTOR_LIST__ = .;
QUAD((__CTOR_END__ - __CTOR_LIST__) / 8 - 2)
*(.ctors)
__CTOR_END__ = .;
}
- .dtors : {
+ .dtors : AT(ADDR(.dtors) - (((5<<(61))+0x100000000) - (1 << 20)))
+ {
__DTOR_LIST__ = .;
QUAD((__DTOR_END__ - __DTOR_LIST__) / 8 - 2)
*(.dtors)
ASSERT(n == 1 || (stride == 0 && increment == 1));
ASSERT(id == DOMID_SELF);
ASSERT(prot == 0);
- return (void*) __va(SWAP(frames[0]) << PAGE_SHIFT);
+ return (void*) __va(frames[0] << PAGE_SHIFT);
+}
+
+int unmap_frames(unsigned long virt_addr, unsigned long num_frames)
+{
+ /* TODO */
+ ASSERT(0);
+}
+
+unsigned long alloc_contig_pages(int order, unsigned int addr_bits)
+{
+ /* TODO */
+ ASSERT(0);
}
void arch_init_p2m(unsigned long max_pfn)
return;
}
p = (uint8_t *) (saltab + 1);
- for (i = 0; i < SWAP(saltab->sal_entry_count); i++) {
- switch (SWAP(*p)) {
+ for (i = 0; i < saltab->sal_entry_count; i++) {
+ switch (*p) {
case SAL_DESC_ENTRYPOINT: // 0
{
struct sal_entrypoint_descriptor *dp;
dp = (struct sal_entrypoint_descriptor*)p;
ia64_pal_entry =
- IA64_PHYS_TO_RR7(SWAP(dp->sale_pal_proc));
+ IA64_PHYS_TO_RR7(dp->sale_pal_proc);
PRINT_BV(" PAL Proc at 0x%lx\n", ia64_pal_entry);
sal_fdesc.func =
- IA64_PHYS_TO_RR7(SWAP(dp->sale_sal_proc));
- sal_fdesc.gp = IA64_PHYS_TO_RR7(SWAP(dp->sale_sal_gp));
+ IA64_PHYS_TO_RR7(dp->sale_sal_proc);
+ sal_fdesc.gp = IA64_PHYS_TO_RR7(dp->sale_sal_gp);
PRINT_BV(" SAL Proc at 0x%lx, GP at 0x%lx\n",
sal_fdesc.func, sal_fdesc.gp);
ia64_sal_entry = (sal_entry_t *) &sal_fdesc;
struct ia64_pal_result pal_res;
pal_res = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0);
- //sal_res = ia64_sal_call(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0);
-#if defined(BIG_ENDIAN)
-//#warning calculate_frequencies TODO
- /*
- * I have to do an own function with switching psr.be!
- * Currently it's running because it's a break into the hypervisor
- * behind the call.!
- */
-#endif
sal_res = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0);
if (sal_res.sal_status == 0 && pal_res.pal_status == 0) {
if (efi_get_time(&tm)) {
printk(" EFI-Time: %d.%d.%d %d:%d:%d\n", tm.Day,
tm.Month, tm.Year, tm.Hour, tm.Minute, tm.Second);
- os_time.tv_sec = _mktime(SWAP(tm.Year), SWAP(tm.Month),
- SWAP(tm.Day), SWAP(tm.Hour),
- SWAP(tm.Minute), SWAP(tm.Second));
+ os_time.tv_sec = _mktime(tm.Year, tm.Month,
+ tm.Day, tm.Hour, tm.Minute, tm.Second);
os_time.tv_nsec = tm.Nanosecond;
} else
printk("efi_get_time() failed\n");
#include <os.h>
+#include <mini-os/errno.h>
+#include <mini-os/lib.h>
#include <hypervisor.h>
#include <xen/xencomm.h>
#include <xen/grant_table.h>
#define xen_guest_handle(hnd) ((hnd).p)
+struct xencomm_handle;
/* Translate virtual address to physical address. */
uint64_t
return 0;
}
+/* Inline version. To be used only on linear space (kernel space). */
+static struct xencomm_handle *
+xencomm_create_inline(void *buffer)
+{
+ unsigned long paddr;
+
+ paddr = xencomm_vaddr_to_paddr((unsigned long)buffer);
+ return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
+}
+
#define min(a,b) (((a) < (b)) ? (a) : (b))
static int
xencomm_init_desc(struct xencomm_desc *desc, void *buffer, unsigned long bytes)
return -EINVAL;
}
- desc->address[i++] = SWAP(paddr);
+ desc->address[i++] = paddr;
recorded += chunksz;
}
if (recorded < bytes) {
/* mark remaining addresses invalid (just for safety) */
while (i < desc->nr_addrs)
- desc->address[i++] = SWAP(XENCOMM_INVALID);
- desc->magic = SWAP(XENCOMM_MAGIC);
+ desc->address[i++] = XENCOMM_INVALID;
+ desc->magic = XENCOMM_MAGIC;
return 0;
}
return -EINVAL;
rc = xencomm_create_mini
(xc_area, nbr_area,
- (void*)SWAP((uint64_t)
- xen_guest_handle(setup->frame_list)),
- SWAP(setup->nr_frames)
+ (void*)(uint64_t) xen_guest_handle(setup->frame_list),
+ setup->nr_frames
* sizeof(*xen_guest_handle(setup->frame_list)),
&desc1);
if (rc)
return rc;
set_xen_guest_handle(setup->frame_list,
- (void *)SWAP((uint64_t)desc1));
+ (void *)(uint64_t)desc1);
break;
}
case GNTTABOP_dump_table:
return rc;
}
+static inline int
+xencomm_arch_hypercall_grant_table_op(unsigned int cmd,
+ struct xencomm_handle *uop,
+ unsigned int count)
+{
+ return _hypercall3(int, grant_table_op, cmd, uop, count);
+}
+
int
xencomm_mini_hypercall_grant_table_op(unsigned int cmd, void *op,
unsigned int count)
{
struct sched_shutdown arg;
- arg.reason = (uint32_t)SWAP((uint32_t)SHUTDOWN_suspend);
+ arg.reason = (uint32_t)SHUTDOWN_suspend;
return xencomm_arch_hypercall_suspend(xencomm_create_inline(&arg));
}
+int
+HYPERVISOR_event_channel_op(int cmd, void *arg)
+{
+ int rc;
+ struct xencomm_handle *newArg;
+
+ newArg = xencomm_create_inline(arg);
+ rc = _hypercall2(int, event_channel_op, cmd, newArg);
+ if (unlikely(rc == -ENOSYS)) {
+ struct evtchn_op op;
+
+ op.cmd = cmd;
+ memcpy(&op.u, arg, sizeof(op.u));
+ rc = _hypercall1(int, event_channel_op_compat, &op);
+ }
+ return rc;
+}
+
+static int
+xencomm_arch_xen_version(int cmd, struct xencomm_handle *arg)
+{
+ return _hypercall2(int, xen_version, cmd, arg);
+}
+
+static int
+xencomm_arch_xen_feature(int cmd, struct xencomm_handle *arg)
+{
+ struct xencomm_handle *newArg;
+
+ newArg = xencomm_create_inline(arg);
+ return _hypercall2(int, xen_version, cmd, newArg);
+}
+
+int
+HYPERVISOR_xen_version(int cmd, void *arg)
+{
+ switch(cmd) {
+ case XENVER_version:
+ return xencomm_arch_xen_version(cmd, 0);
+ case XENVER_get_features:
+ return xencomm_arch_xen_feature(cmd, arg);
+ default:
+ return -1;
+ }
+}
+
+int
+HYPERVISOR_console_io(int cmd, int count, char *str)
+{
+ struct xencomm_handle *newStr;
+
+ newStr = xencomm_create_inline(str);
+ return _hypercall3(int, console_io, cmd, count, newStr);
+}
+
+int
+HYPERVISOR_sched_op_compat(int cmd, unsigned long arg)
+{
+ return _hypercall2(int, sched_op_compat, cmd, arg);
+}
+
+int
+HYPERVISOR_sched_op(int cmd, void *arg)
+{
+ struct xencomm_handle *newArg;
+
+ newArg = xencomm_create_inline(arg);
+ return _hypercall2(int, sched_op, cmd, newArg);
+}
+
+int
+HYPERVISOR_callback_op(int cmd, void *arg)
+{
+ struct xencomm_handle *newArg;
+
+ newArg = xencomm_create_inline(arg);
+ return _hypercall2(int, callback_op, cmd, newArg);
+}
+
+int
+HYPERVISOR_opt_feature(void *arg)
+{
+ struct xencomm_handle *new_arg;
+
+ new_arg = xencomm_create_inline(arg);
+
+ return _hypercall1(int, opt_feature, new_arg);
+}
+
+int
+HYPERVISOR_shutdown(unsigned int reason)
+{
+ struct sched_shutdown sched_shutdown = {
+ .reason = reason
+ };
+
+ int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
+
+ if (rc == -ENOSYS)
+ rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason);
+
+ return rc;
+}
+
--- /dev/null
+/*
+ * Copyright (C) 2009, Netronome Systems, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#include <types.h>
+#include <lib.h>
+#include <xmalloc.h>
+#include <mm.h>
+#include <ioremap.h>
+
+/* Map a physical address range into virtual address space with provided
+ * flags. Return a virtual address range it is mapped to. */
+static void *__do_ioremap(unsigned long phys_addr, unsigned long size,
+ unsigned long prot)
+{
+ unsigned long va;
+ unsigned long mfns, mfn;
+ unsigned long num_pages, offset;
+ int i;
+
+ /* allow non page aligned addresses but for mapping we need to align them */
+ offset = (phys_addr & ~PAGE_MASK);
+ num_pages = (offset + size + PAGE_SIZE - 1) / PAGE_SIZE;
+ phys_addr &= PAGE_MASK;
+ mfns = mfn = phys_addr >> PAGE_SHIFT;
+
+ /* sanity checks on list of MFNs */
+ for ( i = 0; i < num_pages; i++, mfn++ )
+ {
+ if ( mfn_is_ram(mfn) )
+ {
+ printk("ioremap: mfn 0x%ulx is RAM\n", mfn);
+ goto mfn_invalid;
+ }
+ }
+ va = (unsigned long)map_frames_ex(&mfns, num_pages, 0, 1, 1,
+ DOMID_IO, 0, prot);
+ return (void *)(va + offset);
+
+mfn_invalid:
+ return NULL;
+}
+
+void *ioremap(unsigned long phys_addr, unsigned long size)
+{
+ return __do_ioremap(phys_addr, size, IO_PROT);
+}
+
+void *ioremap_nocache(unsigned long phys_addr, unsigned long size)
+{
+ return __do_ioremap(phys_addr, size, IO_PROT_NOCACHE);
+}
+
+/* Un-map the io-remapped region. Currently no list of existing mappings is
+ * maintained, so the caller has to supply the size */
+void iounmap(void *virt_addr, unsigned long size)
+{
+ unsigned long num_pages;
+ unsigned long va = (unsigned long)virt_addr;
+
+ /* work out number of frames to unmap */
+ num_pages = ((va & ~PAGE_MASK) + size + PAGE_SIZE - 1) / PAGE_SIZE;
+
+ unmap_frames(va & PAGE_MASK, num_pages);
+}
+
+
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 indent-tabs-mode:nil -*- */
unsigned long *phys_to_machine_mapping;
unsigned long mfn_zero;
extern char stack[];
-extern void page_walk(unsigned long virt_addr);
+extern void page_walk(unsigned long va);
-void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn,
- unsigned long offset, unsigned long level)
+/*
+ * Make pt_pfn a new 'level' page table frame and hook it into the page
+ * table at offset in previous level MFN (pref_l_mfn). pt_pfn is a guest
+ * PFN.
+ */
+static void new_pt_frame(unsigned long *pt_pfn, unsigned long prev_l_mfn,
+ unsigned long offset, unsigned long level)
{
pgentry_t *tab = (pgentry_t *)start_info.pt_base;
unsigned long pt_page = (unsigned long)pfn_to_virt(*pt_pfn);
pgentry_t prot_e, prot_t;
mmu_update_t mmu_updates[1];
+ int rc;
prot_e = prot_t = 0;
- DEBUG("Allocating new L%d pt frame for pt_pfn=%lx, "
- "prev_l_mfn=%lx, offset=%lx",
- level, *pt_pfn, prev_l_mfn, offset);
+ DEBUG("Allocating new L%d pt frame for pfn=%lx, "
+ "prev_l_mfn=%lx, offset=%lx",
+ level, *pt_pfn, prev_l_mfn, offset);
/* We need to clear the page, otherwise we might fail to map it
as a page table page */
switch ( level )
{
case L1_FRAME:
- prot_e = L1_PROT;
- prot_t = L2_PROT;
- break;
+ prot_e = L1_PROT;
+ prot_t = L2_PROT;
+ break;
case L2_FRAME:
- prot_e = L2_PROT;
- prot_t = L3_PROT;
- break;
+ prot_e = L2_PROT;
+ prot_t = L3_PROT;
+ break;
#if defined(__x86_64__)
case L3_FRAME:
- prot_e = L3_PROT;
- prot_t = L4_PROT;
- break;
+ prot_e = L3_PROT;
+ prot_t = L4_PROT;
+ break;
#endif
default:
- printk("new_pt_frame() called with invalid level number %d\n", level);
- do_exit();
- break;
+ printk("new_pt_frame() called with invalid level number %d\n", level);
+ do_exit();
+ break;
}
- /* Update the entry */
+ /* Make PFN a page table page */
#if defined(__x86_64__)
tab = pte_to_virt(tab[l4_table_offset(pt_page)]);
#endif
tab = pte_to_virt(tab[l3_table_offset(pt_page)]);
mmu_updates[0].ptr = (tab[l2_table_offset(pt_page)] & PAGE_MASK) +
- sizeof(pgentry_t) * l1_table_offset(pt_page);
+ sizeof(pgentry_t) * l1_table_offset(pt_page);
mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT |
- (prot_e & ~_PAGE_RW);
- if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
+ (prot_e & ~_PAGE_RW);
+
+ if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 )
{
- printk("PTE for new page table page could not be updated\n");
- do_exit();
+ printk("ERROR: PTE for new page table page could not be updated\n");
+ printk(" mmu_update failed with rc=%d\n", rc);
+ do_exit();
}
-
- /* Now fill the new page table page with entries.
- Update the page directory as well. */
- mmu_updates[0].ptr = ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset;
+
+ /* Hook the new page table page into the hierarchy */
+ mmu_updates[0].ptr =
+ ((pgentry_t)prev_l_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset;
mmu_updates[0].val = (pgentry_t)pfn_to_mfn(*pt_pfn) << PAGE_SHIFT | prot_t;
- if(HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF) < 0)
+
+ if ( (rc = HYPERVISOR_mmu_update(mmu_updates, 1, NULL, DOMID_SELF)) < 0 )
{
- printk("ERROR: mmu_update failed\n");
- do_exit();
+ printk("ERROR: mmu_update failed with rc=%d\n", rc);
+ do_exit();
}
*pt_pfn += 1;
}
-/* Checks if a pagetable frame is needed (if weren't allocated by Xen) */
-static int need_pt_frame(unsigned long virt_address, int level)
+/*
+ * Checks if a pagetable frame is needed at 'level' to map a given
+ * address. Note, this function is specific to the initial page table
+ * building.
+ */
+static int need_pt_frame(unsigned long va, int level)
{
unsigned long hyp_virt_start = HYPERVISOR_VIRT_START;
#if defined(__x86_64__)
/* In general frames will _not_ be needed if they were already
allocated to map the hypervisor into our VA space */
#if defined(__x86_64__)
- if(level == L3_FRAME)
+ if ( level == L3_FRAME )
{
- if(l4_table_offset(virt_address) >=
- l4_table_offset(hyp_virt_start) &&
- l4_table_offset(virt_address) <=
- l4_table_offset(hyp_virt_end))
+ if ( l4_table_offset(va) >=
+ l4_table_offset(hyp_virt_start) &&
+ l4_table_offset(va) <=
+ l4_table_offset(hyp_virt_end))
return 0;
return 1;
- } else
+ }
+ else
#endif
- if(level == L2_FRAME)
+ if ( level == L2_FRAME )
{
#if defined(__x86_64__)
- if(l4_table_offset(virt_address) >=
- l4_table_offset(hyp_virt_start) &&
- l4_table_offset(virt_address) <=
- l4_table_offset(hyp_virt_end))
+ if ( l4_table_offset(va) >=
+ l4_table_offset(hyp_virt_start) &&
+ l4_table_offset(va) <=
+ l4_table_offset(hyp_virt_end))
#endif
- if(l3_table_offset(virt_address) >=
- l3_table_offset(hyp_virt_start) &&
- l3_table_offset(virt_address) <=
- l3_table_offset(hyp_virt_end))
+ if ( l3_table_offset(va) >=
+ l3_table_offset(hyp_virt_start) &&
+ l3_table_offset(va) <=
+ l3_table_offset(hyp_virt_end))
return 0;
return 1;
- } else
-
- /* Always need l1 frames */
- if(level == L1_FRAME)
- return 1;
+ }
+ else
+ /* Always need l1 frames */
+ if ( level == L1_FRAME )
+ return 1;
printk("ERROR: Unknown frame level %d, hypervisor %llx,%llx\n",
- level, hyp_virt_start, hyp_virt_end);
+ level, hyp_virt_start, hyp_virt_end);
return -1;
}
-void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
+/*
+ * Build the initial pagetable.
+ */
+static void build_pagetable(unsigned long *start_pfn, unsigned long *max_pfn)
{
unsigned long start_address, end_address;
unsigned long pfn_to_map, pt_pfn = *start_pfn;
static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
- unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
+ unsigned long pt_mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
unsigned long offset;
int count = 0;
+ int rc;
- pfn_to_map = (start_info.nr_pt_frames - NOT_L1_FRAMES) * L1_PAGETABLE_ENTRIES;
+ pfn_to_map =
+ (start_info.nr_pt_frames - NOT_L1_FRAMES) * L1_PAGETABLE_ENTRIES;
- if (*max_pfn >= virt_to_pfn(HYPERVISOR_VIRT_START))
+ if ( *max_pfn >= virt_to_pfn(HYPERVISOR_VIRT_START) )
{
printk("WARNING: Mini-OS trying to use Xen virtual space. "
"Truncating memory from %dMB to ",
- ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20);
+ ((unsigned long)pfn_to_virt(*max_pfn) -
+ (unsigned long)&_text)>>20);
*max_pfn = virt_to_pfn(HYPERVISOR_VIRT_START - PAGE_SIZE);
printk("%dMB\n",
- ((unsigned long)pfn_to_virt(*max_pfn) - (unsigned long)&_text)>>20);
+ ((unsigned long)pfn_to_virt(*max_pfn) -
+ (unsigned long)&_text)>>20);
}
start_address = (unsigned long)pfn_to_virt(pfn_to_map);
/* We worked out the virtual memory range to map, now mapping loop */
printk("Mapping memory range 0x%lx - 0x%lx\n", start_address, end_address);
- while(start_address < end_address)
+ while ( start_address < end_address )
{
tab = (pgentry_t *)start_info.pt_base;
- mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
+ pt_mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
#if defined(__x86_64__)
offset = l4_table_offset(start_address);
/* Need new L3 pt frame */
- if(!(start_address & L3_MASK))
- if(need_pt_frame(start_address, L3_FRAME))
- new_pt_frame(&pt_pfn, mfn, offset, L3_FRAME);
+ if ( !(start_address & L3_MASK) )
+ if ( need_pt_frame(start_address, L3_FRAME) )
+ new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME);
page = tab[offset];
- mfn = pte_to_mfn(page);
- tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT);
+ pt_mfn = pte_to_mfn(page);
+ tab = to_virt(mfn_to_pfn(pt_mfn) << PAGE_SHIFT);
#endif
offset = l3_table_offset(start_address);
/* Need new L2 pt frame */
- if(!(start_address & L2_MASK))
- if(need_pt_frame(start_address, L2_FRAME))
- new_pt_frame(&pt_pfn, mfn, offset, L2_FRAME);
+ if ( !(start_address & L2_MASK) )
+ if ( need_pt_frame(start_address, L2_FRAME) )
+ new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME);
page = tab[offset];
- mfn = pte_to_mfn(page);
- tab = to_virt(mfn_to_pfn(mfn) << PAGE_SHIFT);
+ pt_mfn = pte_to_mfn(page);
+ tab = to_virt(mfn_to_pfn(pt_mfn) << PAGE_SHIFT);
offset = l2_table_offset(start_address);
/* Need new L1 pt frame */
- if(!(start_address & L1_MASK))
- if(need_pt_frame(start_address, L1_FRAME))
- new_pt_frame(&pt_pfn, mfn, offset, L1_FRAME);
+ if ( !(start_address & L1_MASK) )
+ if ( need_pt_frame(start_address, L1_FRAME) )
+ new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME);
page = tab[offset];
- mfn = pte_to_mfn(page);
+ pt_mfn = pte_to_mfn(page);
offset = l1_table_offset(start_address);
- mmu_updates[count].ptr = ((pgentry_t)mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset;
- mmu_updates[count].val = (pgentry_t)pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
+ mmu_updates[count].ptr =
+ ((pgentry_t)pt_mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset;
+ mmu_updates[count].val =
+ (pgentry_t)pfn_to_mfn(pfn_to_map++) << PAGE_SHIFT | L1_PROT;
count++;
- if (count == L1_PAGETABLE_ENTRIES || pfn_to_map == *max_pfn)
+ if ( count == L1_PAGETABLE_ENTRIES || pfn_to_map == *max_pfn )
{
- if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0)
+ rc = HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF);
+ if ( rc < 0 )
{
- printk("PTE could not be updated\n");
+ printk("ERROR: build_pagetable(): PTE could not be updated\n");
+ printk(" mmu_update failed with rc=%d\n", rc);
do_exit();
}
count = 0;
*start_pfn = pt_pfn;
}
+/*
+ * Mark portion of the address space read only.
+ */
extern void shared_info;
static void set_readonly(void *text, void *etext)
{
- unsigned long start_address = ((unsigned long) text + PAGE_SIZE - 1) & PAGE_MASK;
+ unsigned long start_address =
+ ((unsigned long) text + PAGE_SIZE - 1) & PAGE_MASK;
unsigned long end_address = (unsigned long) etext;
static mmu_update_t mmu_updates[L1_PAGETABLE_ENTRIES + 1];
pgentry_t *tab = (pgentry_t *)start_info.pt_base, page;
unsigned long mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
unsigned long offset;
int count = 0;
+ int rc;
printk("setting %p-%p readonly\n", text, etext);
- while (start_address + PAGE_SIZE <= end_address) {
+ while ( start_address + PAGE_SIZE <= end_address )
+ {
tab = (pgentry_t *)start_info.pt_base;
mfn = pfn_to_mfn(virt_to_pfn(start_info.pt_base));
offset = l1_table_offset(start_address);
- if (start_address != (unsigned long)&shared_info) {
- mmu_updates[count].ptr = ((pgentry_t)mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset;
- mmu_updates[count].val = tab[offset] & ~_PAGE_RW;
- count++;
- } else
- printk("skipped %p\n", start_address);
+ if ( start_address != (unsigned long)&shared_info )
+ {
+ mmu_updates[count].ptr =
+ ((pgentry_t)mfn << PAGE_SHIFT) + sizeof(pgentry_t) * offset;
+ mmu_updates[count].val = tab[offset] & ~_PAGE_RW;
+ count++;
+ }
+ else
+ printk("skipped %p\n", start_address);
start_address += PAGE_SIZE;
- if (count == L1_PAGETABLE_ENTRIES || start_address + PAGE_SIZE > end_address)
+ if ( count == L1_PAGETABLE_ENTRIES ||
+ start_address + PAGE_SIZE > end_address )
{
- if(HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF) < 0)
+ rc = HYPERVISOR_mmu_update(mmu_updates, count, NULL, DOMID_SELF);
+ if ( rc < 0 )
{
- printk("PTE could not be updated\n");
+ printk("ERROR: set_readonly(): PTE could not be updated\n");
do_exit();
}
count = 0;
}
{
- mmuext_op_t op = {
- .cmd = MMUEXT_TLB_FLUSH_ALL,
- };
- int count;
- HYPERVISOR_mmuext_op(&op, 1, &count, DOMID_SELF);
+ mmuext_op_t op = {
+ .cmd = MMUEXT_TLB_FLUSH_ALL,
+ };
+ int count;
+ HYPERVISOR_mmuext_op(&op, 1, &count, DOMID_SELF);
}
}
-void mem_test(unsigned long *start_add, unsigned long *end_add)
+/*
+ * A useful mem testing function. Write the address to every address in the
+ * range provided and read back the value. If verbose, print page walk to
+ * some VA
+ *
+ * If we get MEM_TEST_MAX_ERRORS we might as well stop
+ */
+#define MEM_TEST_MAX_ERRORS 10
+int mem_test(unsigned long *start_va, unsigned long *end_va, int verbose)
{
unsigned long mask = 0x10000;
unsigned long *pointer;
-
- for(pointer = start_add; pointer < end_add; pointer++)
+ int error_count = 0;
+
+ /* write values and print page walks */
+ if ( verbose && (((unsigned long)start_va) & 0xfffff) )
+ {
+ printk("MemTest Start: 0x%lx\n", start_va);
+ page_walk((unsigned long)start_va);
+ }
+ for ( pointer = start_va; pointer < end_va; pointer++ )
{
- if(!(((unsigned long)pointer) & 0xfffff))
+ if ( verbose && !(((unsigned long)pointer) & 0xfffff) )
{
printk("Writing to %lx\n", pointer);
page_walk((unsigned long)pointer);
}
*pointer = (unsigned long)pointer & ~mask;
}
-
- for(pointer = start_add; pointer < end_add; pointer++)
+ if ( verbose && (((unsigned long)end_va) & 0xfffff) )
{
- if(((unsigned long)pointer & ~mask) != *pointer)
+ printk("MemTest End: %lx\n", end_va-1);
+ page_walk((unsigned long)end_va-1);
+ }
+
+ /* verify values */
+ for ( pointer = start_va; pointer < end_va; pointer++ )
+ {
+ if ( ((unsigned long)pointer & ~mask) != *pointer )
+ {
printk("Read error at 0x%lx. Read: 0x%lx, should read 0x%lx\n",
- (unsigned long)pointer,
- *pointer,
- ((unsigned long)pointer & ~mask));
+ (unsigned long)pointer, *pointer,
+ ((unsigned long)pointer & ~mask));
+ error_count++;
+ if ( error_count >= MEM_TEST_MAX_ERRORS )
+ {
+ printk("mem_test: too many errors\n");
+ return -1;
+ }
+ }
}
-
+ return 0;
}
-static pgentry_t *get_pgt(unsigned long addr)
+
+/*
+ * get the PTE for virtual address va if it exists. Otherwise NULL.
+ */
+static pgentry_t *get_pgt(unsigned long va)
{
unsigned long mfn;
pgentry_t *tab;
mfn = virt_to_mfn(start_info.pt_base);
#if defined(__x86_64__)
- offset = l4_table_offset(addr);
- if (!(tab[offset] & _PAGE_PRESENT))
+ offset = l4_table_offset(va);
+ if ( !(tab[offset] & _PAGE_PRESENT) )
return NULL;
mfn = pte_to_mfn(tab[offset]);
tab = mfn_to_virt(mfn);
#endif
- offset = l3_table_offset(addr);
- if (!(tab[offset] & _PAGE_PRESENT))
+ offset = l3_table_offset(va);
+ if ( !(tab[offset] & _PAGE_PRESENT) )
return NULL;
mfn = pte_to_mfn(tab[offset]);
tab = mfn_to_virt(mfn);
- offset = l2_table_offset(addr);
- if (!(tab[offset] & _PAGE_PRESENT))
+ offset = l2_table_offset(va);
+ if ( !(tab[offset] & _PAGE_PRESENT) )
return NULL;
mfn = pte_to_mfn(tab[offset]);
tab = mfn_to_virt(mfn);
- offset = l1_table_offset(addr);
+ offset = l1_table_offset(va);
return &tab[offset];
}
-pgentry_t *need_pgt(unsigned long addr)
+
+/*
+ * return a valid PTE for a given virtual address. If PTE does not exist,
+ * allocate page-table pages.
+ */
+pgentry_t *need_pgt(unsigned long va)
{
- unsigned long mfn;
+ unsigned long pt_mfn;
pgentry_t *tab;
unsigned long pt_pfn;
unsigned offset;
tab = (pgentry_t *)start_info.pt_base;
- mfn = virt_to_mfn(start_info.pt_base);
+ pt_mfn = virt_to_mfn(start_info.pt_base);
#if defined(__x86_64__)
- offset = l4_table_offset(addr);
- if (!(tab[offset] & _PAGE_PRESENT)) {
+ offset = l4_table_offset(va);
+ if ( !(tab[offset] & _PAGE_PRESENT) )
+ {
pt_pfn = virt_to_pfn(alloc_page());
- new_pt_frame(&pt_pfn, mfn, offset, L3_FRAME);
+ new_pt_frame(&pt_pfn, pt_mfn, offset, L3_FRAME);
}
ASSERT(tab[offset] & _PAGE_PRESENT);
- mfn = pte_to_mfn(tab[offset]);
- tab = mfn_to_virt(mfn);
+ pt_mfn = pte_to_mfn(tab[offset]);
+ tab = mfn_to_virt(pt_mfn);
#endif
- offset = l3_table_offset(addr);
- if (!(tab[offset] & _PAGE_PRESENT)) {
+ offset = l3_table_offset(va);
+ if ( !(tab[offset] & _PAGE_PRESENT) )
+ {
pt_pfn = virt_to_pfn(alloc_page());
- new_pt_frame(&pt_pfn, mfn, offset, L2_FRAME);
+ new_pt_frame(&pt_pfn, pt_mfn, offset, L2_FRAME);
}
ASSERT(tab[offset] & _PAGE_PRESENT);
- mfn = pte_to_mfn(tab[offset]);
- tab = mfn_to_virt(mfn);
- offset = l2_table_offset(addr);
- if (!(tab[offset] & _PAGE_PRESENT)) {
+ pt_mfn = pte_to_mfn(tab[offset]);
+ tab = mfn_to_virt(pt_mfn);
+ offset = l2_table_offset(va);
+ if ( !(tab[offset] & _PAGE_PRESENT) )
+ {
pt_pfn = virt_to_pfn(alloc_page());
- new_pt_frame(&pt_pfn, mfn, offset, L1_FRAME);
+ new_pt_frame(&pt_pfn, pt_mfn, offset, L1_FRAME);
}
ASSERT(tab[offset] & _PAGE_PRESENT);
- mfn = pte_to_mfn(tab[offset]);
- tab = mfn_to_virt(mfn);
+ pt_mfn = pte_to_mfn(tab[offset]);
+ tab = mfn_to_virt(pt_mfn);
- offset = l1_table_offset(addr);
+ offset = l1_table_offset(va);
return &tab[offset];
}
+/*
+ * Reserve an area of virtual address space for mappings and Heap
+ */
static unsigned long demand_map_area_start;
#ifdef __x86_64__
#define DEMAND_MAP_PAGES ((128ULL << 30) / PAGE_SIZE)
#define DEMAND_MAP_PAGES ((2ULL << 30) / PAGE_SIZE)
#endif
-#ifdef HAVE_LIBC
+#ifndef HAVE_LIBC
+#define HEAP_PAGES 0
+#else
unsigned long heap, brk, heap_mapped, heap_end;
#ifdef __x86_64__
#define HEAP_PAGES ((128ULL << 30) / PAGE_SIZE)
demand_map_area_start = (unsigned long) pfn_to_virt(cur_pfn);
cur_pfn += DEMAND_MAP_PAGES;
- printk("Demand map pfns at %lx-%lx.\n", demand_map_area_start, pfn_to_virt(cur_pfn));
+ printk("Demand map pfns at %lx-%lx.\n",
+ demand_map_area_start, pfn_to_virt(cur_pfn));
#ifdef HAVE_LIBC
cur_pfn++;
#endif
}
-#define MAP_BATCH ((STACK_SIZE / 2) / sizeof(mmu_update_t))
-void do_map_frames(unsigned long addr,
- unsigned long *f, unsigned long n, unsigned long stride,
- unsigned long increment, domid_t id, int may_fail, unsigned long prot)
-{
- pgentry_t *pgt = NULL;
- unsigned long done = 0;
- unsigned long i;
- int rc;
-
- while (done < n) {
- unsigned long todo;
-
- if (may_fail)
- todo = 1;
- else
- todo = n - done;
-
- if (todo > MAP_BATCH)
- todo = MAP_BATCH;
-
- {
- mmu_update_t mmu_updates[todo];
-
- for (i = 0; i < todo; i++, addr += PAGE_SIZE, pgt++) {
- if (!pgt || !(addr & L1_MASK))
- pgt = need_pgt(addr);
- mmu_updates[i].ptr = virt_to_mach(pgt);
- mmu_updates[i].val = ((pgentry_t)(f[(done + i) * stride] + (done + i) * increment) << PAGE_SHIFT) | prot;
- }
-
- rc = HYPERVISOR_mmu_update(mmu_updates, todo, NULL, id);
- if (rc < 0) {
- if (may_fail)
- f[done * stride] |= 0xF0000000;
- else {
- printk("Map %ld (%lx, ...) at %p failed: %d.\n", todo, f[done * stride] + done * increment, addr, rc);
- do_exit();
- }
- }
- }
-
- done += todo;
- }
-}
-
unsigned long allocate_ondemand(unsigned long n, unsigned long alignment)
{
unsigned long x;
unsigned long y = 0;
/* Find a properly aligned run of n contiguous frames */
- for (x = 0; x <= DEMAND_MAP_PAGES - n; x = (x + y + 1 + alignment - 1) & ~(alignment - 1)) {
+ for ( x = 0;
+ x <= DEMAND_MAP_PAGES - n;
+ x = (x + y + 1 + alignment - 1) & ~(alignment - 1) )
+ {
unsigned long addr = demand_map_area_start + x * PAGE_SIZE;
pgentry_t *pgt = get_pgt(addr);
- for (y = 0; y < n; y++, addr += PAGE_SIZE) {
- if (!(addr & L1_MASK))
+ for ( y = 0; y < n; y++, addr += PAGE_SIZE )
+ {
+ if ( !(addr & L1_MASK) )
pgt = get_pgt(addr);
- if (pgt) {
- if (*pgt & _PAGE_PRESENT)
+ if ( pgt )
+ {
+ if ( *pgt & _PAGE_PRESENT )
break;
pgt++;
}
}
- if (y == n)
+ if ( y == n )
break;
}
- if (y != n) {
+ if ( y != n )
+ {
printk("Failed to find %ld frames!\n", n);
return 0;
}
return demand_map_area_start + x * PAGE_SIZE;
}
-void *map_frames_ex(unsigned long *f, unsigned long n, unsigned long stride,
- unsigned long increment, unsigned long alignment, domid_t id,
- int may_fail, unsigned long prot)
+/*
+ * Map an array of MFNs contiguously into virtual address space starting at
+ * va. map f[i*stride]+i*increment for i in 0..n-1.
+ */
+#define MAP_BATCH ((STACK_SIZE / 2) / sizeof(mmu_update_t))
+void do_map_frames(unsigned long va,
+ unsigned long *mfns, unsigned long n,
+ unsigned long stride, unsigned long incr,
+ domid_t id, int may_fail,
+ unsigned long prot)
{
- unsigned long addr = allocate_ondemand(n, alignment);
+ pgentry_t *pgt = NULL;
+ unsigned long done = 0;
+ unsigned long i;
+ int rc;
- if (!addr)
+ if ( !mfns )
+ {
+ printk("do_map_frames: no mfns supplied\n");
+ return;
+ }
+ DEBUG("va=%p n=0x%lx, mfns[0]=0x%lx stride=0x%lx incr=0x%lx prot=0x%lx\n",
+ va, n, mfns[0], stride, incr, prot);
+
+ while ( done < n )
+ {
+ unsigned long todo;
+
+ if ( may_fail )
+ todo = 1;
+ else
+ todo = n - done;
+
+ if ( todo > MAP_BATCH )
+ todo = MAP_BATCH;
+
+ {
+ mmu_update_t mmu_updates[todo];
+
+ for ( i = 0; i < todo; i++, va += PAGE_SIZE, pgt++)
+ {
+ if ( !pgt || !(va & L1_MASK) )
+ pgt = need_pgt(va);
+
+ mmu_updates[i].ptr = virt_to_mach(pgt) | MMU_NORMAL_PT_UPDATE;
+ mmu_updates[i].val = ((pgentry_t)(mfns[(done + i) * stride] +
+ (done + i) * incr)
+ << PAGE_SHIFT) | prot;
+ }
+
+ rc = HYPERVISOR_mmu_update(mmu_updates, todo, NULL, id);
+ if ( rc < 0 )
+ {
+ if (may_fail)
+ mfns[done * stride] |= 0xF0000000;
+ else {
+ printk("Map %ld (%lx, ...) at %p failed: %d.\n",
+ todo, mfns[done * stride] + done * incr, va, rc);
+ do_exit();
+ }
+ }
+ }
+ done += todo;
+ }
+}
+
+/*
+ * Map an array of MFNs contiguous into virtual address space. Virtual
+ * addresses are allocated from the on demand area.
+ */
+void *map_frames_ex(unsigned long *mfns, unsigned long n,
+ unsigned long stride, unsigned long incr,
+ unsigned long alignment,
+ domid_t id, int may_fail, unsigned long prot)
+{
+ unsigned long va = allocate_ondemand(n, alignment);
+
+ if ( !va )
return NULL;
- /* Found it at x. Map it in. */
- do_map_frames(addr, f, n, stride, increment, id, may_fail, prot);
+ do_map_frames(va, mfns, n, stride, incr, id, may_fail, prot);
+
+ return (void *)va;
+}
+
+/*
+ * Unmap nun_frames frames mapped at virtual address va.
+ */
+#define UNMAP_BATCH ((STACK_SIZE / 2) / sizeof(multicall_entry_t))
+int unmap_frames(unsigned long va, unsigned long num_frames)
+{
+ int n = UNMAP_BATCH;
+ multicall_entry_t call[n];
+ int ret;
+ int i;
+
+ ASSERT(!((unsigned long)va & ~PAGE_MASK));
+
+ DEBUG("va=%p, num=0x%lx\n", va, num_frames);
+
+ while ( num_frames ) {
+ if ( n > num_frames )
+ n = num_frames;
+
+ for ( i = 0; i < n; i++ )
+ {
+ int arg = 0;
+ /* simply update the PTE for the VA and invalidate TLB */
+ call[i].op = __HYPERVISOR_update_va_mapping;
+ call[i].args[arg++] = va;
+ call[i].args[arg++] = 0;
+#ifdef __i386__
+ call[i].args[arg++] = 0;
+#endif
+ call[i].args[arg++] = UVMF_INVLPG;
+
+ va += PAGE_SIZE;
+ }
+
+ ret = HYPERVISOR_multicall(call, n);
+ if ( ret )
+ {
+ printk("update_va_mapping hypercall failed with rc=%d.\n", ret);
+ return -ret;
+ }
- return (void *)addr;
+ for ( i = 0; i < n; i++ )
+ {
+ if ( call[i].result )
+ {
+ printk("update_va_mapping failed for with rc=%d.\n", ret);
+ return -(call[i].result);
+ }
+ }
+ num_frames -= n;
+ }
+ return 0;
}
+/*
+ * Allocate pages which are contiguous in machine memory.
+ * Returns a VA to where they are mapped or 0 on failure.
+ *
+ * addr_bits indicates if the region has restrictions on where it is
+ * located. Typical values are 32 (if for example PCI devices can't access
+ * 64bit memory) or 0 for no restrictions.
+ *
+ * Allocated pages can be freed using the page allocators free_pages()
+ * function.
+ *
+ * based on Linux function xen_create_contiguous_region()
+ */
+#define MAX_CONTIG_ORDER 9 /* 2MB */
+unsigned long alloc_contig_pages(int order, unsigned int addr_bits)
+{
+ unsigned long in_va, va;
+ unsigned long in_frames[1UL << order], out_frames, mfn;
+ multicall_entry_t call[1UL << order];
+ unsigned int i, num_pages = 1UL << order;
+ int ret, exch_success;
+
+ /* pass in num_pages 'extends' of size 1 and
+ * request 1 extend of size 'order */
+ struct xen_memory_exchange exchange = {
+ .in = {
+ .nr_extents = num_pages,
+ .extent_order = 0,
+ .domid = DOMID_SELF
+ },
+ .out = {
+ .nr_extents = 1,
+ .extent_order = order,
+ .address_bits = addr_bits,
+ .domid = DOMID_SELF
+ },
+ .nr_exchanged = 0
+ };
+
+ if ( order > MAX_CONTIG_ORDER )
+ {
+ printk("alloc_contig_pages: order too large 0x%x > 0x%x\n",
+ order, MAX_CONTIG_ORDER);
+ return 0;
+ }
+
+ /* Allocate some potentially discontiguous pages */
+ in_va = alloc_pages(order);
+ if ( !in_va )
+ {
+ printk("alloc_contig_pages: could not get enough pages (order=0x%x\n",
+ order);
+ return 0;
+ }
+
+ /* set up arguments for exchange hyper call */
+ set_xen_guest_handle(exchange.in.extent_start, in_frames);
+ set_xen_guest_handle(exchange.out.extent_start, &out_frames);
+
+ /* unmap current frames, keep a list of MFNs */
+ for ( i = 0; i < num_pages; i++ )
+ {
+ int arg = 0;
+
+ va = in_va + (PAGE_SIZE * i);
+ in_frames[i] = virt_to_mfn(va);
+
+ /* update P2M mapping */
+ phys_to_machine_mapping[virt_to_pfn(va)] = INVALID_P2M_ENTRY;
+
+ /* build multi call */
+ call[i].op = __HYPERVISOR_update_va_mapping;
+ call[i].args[arg++] = va;
+ call[i].args[arg++] = 0;
+#ifdef __i386__
+ call[i].args[arg++] = 0;
+#endif
+ call[i].args[arg++] = UVMF_INVLPG;
+ }
+
+ ret = HYPERVISOR_multicall(call, i);
+ if ( ret )
+ {
+ printk("Odd, update_va_mapping hypercall failed with rc=%d.\n", ret);
+ return 0;
+ }
+
+ /* try getting a contig range of MFNs */
+ out_frames = virt_to_pfn(in_va); /* PFNs to populate */
+ ret = HYPERVISOR_memory_op(XENMEM_exchange, &exchange);
+ if ( ret ) {
+ printk("mem exchanged order=0x%x failed with rc=%d, nr_exchanged=%d\n",
+ order, ret, exchange.nr_exchanged);
+ /* we still need to return the allocated pages above to the pool
+ * ie. map them back into the 1:1 mapping etc. so we continue but
+ * in the end return the pages to the page allocator and return 0. */
+ exch_success = 0;
+ }
+ else
+ exch_success = 1;
+
+ /* map frames into 1:1 and update p2m */
+ for ( i = 0; i < num_pages; i++ )
+ {
+ int arg = 0;
+ pte_t pte;
+
+ va = in_va + (PAGE_SIZE * i);
+ mfn = i < exchange.nr_exchanged ? (out_frames + i) : in_frames[i];
+ pte = __pte(mfn << PAGE_SHIFT | L1_PROT);
+
+ /* update P2M mapping */
+ phys_to_machine_mapping[virt_to_pfn(va)] = mfn;
+
+ /* build multi call */
+ call[i].op = __HYPERVISOR_update_va_mapping;
+ call[i].args[arg++] = va;
+#ifdef __x86_64__
+ call[i].args[arg++] = (pgentry_t)pte.pte;
+#else
+ call[i].args[arg++] = pte.pte_low;
+ call[i].args[arg++] = pte.pte_high;
+#endif
+ call[i].args[arg++] = UVMF_INVLPG;
+ }
+ ret = HYPERVISOR_multicall(call, i);
+ if ( ret )
+ {
+ printk("update_va_mapping hypercall no. 2 failed with rc=%d.\n", ret);
+ return 0;
+ }
+
+ if ( !exch_success )
+ {
+ /* since the exchanged failed we just free the pages as well */
+ free_pages((void *) in_va, order);
+ return 0;
+ }
+
+ return in_va;
+}
+
+/*
+ * Check if a given MFN refers to real memory
+ */
+static long system_ram_end_mfn;
+int mfn_is_ram(unsigned long mfn)
+{
+ /* very crude check if a given MFN is memory or not. Probably should
+ * make this a little more sophisticated ;) */
+ return (mfn <= system_ram_end_mfn) ? 1 : 0;
+}
+
+
+/*
+ * Clear some of the bootstrap memory
+ */
static void clear_bootstrap(void)
{
pte_t nullpte = { };
+ int rc;
/* Use first page as the CoW zero page */
memset(&_text, 0, PAGE_SIZE);
mfn_zero = virt_to_mfn((unsigned long) &_text);
- if (HYPERVISOR_update_va_mapping(0, nullpte, UVMF_INVLPG))
- printk("Unable to unmap NULL page\n");
+ if ( (rc = HYPERVISOR_update_va_mapping(0, nullpte, UVMF_INVLPG)) )
+ printk("Unable to unmap NULL page. rc=%d\n", rc);
}
void arch_init_p2m(unsigned long max_pfn)
{
+#ifdef __x86_64__
#define L1_P2M_SHIFT 9
#define L2_P2M_SHIFT 18
#define L3_P2M_SHIFT 27
+#else
+#define L1_P2M_SHIFT 10
+#define L2_P2M_SHIFT 20
+#define L3_P2M_SHIFT 30
+#endif
#define L1_P2M_ENTRIES (1 << L1_P2M_SHIFT)
#define L2_P2M_ENTRIES (1 << (L2_P2M_SHIFT - L1_P2M_SHIFT))
#define L3_P2M_ENTRIES (1 << (L3_P2M_SHIFT - L2_P2M_SHIFT))
unsigned long pfn;
l3_list = (unsigned long *)alloc_page();
- for(pfn=0; pfn<max_pfn; pfn++)
+ for ( pfn=0; pfn<max_pfn; pfn++ )
{
- if(!(pfn % (L1_P2M_ENTRIES * L2_P2M_ENTRIES)))
+ if ( !(pfn % (L1_P2M_ENTRIES * L2_P2M_ENTRIES)) )
{
l2_list = (unsigned long*)alloc_page();
- if((pfn >> L3_P2M_SHIFT) > 0)
+ if ( (pfn >> L3_P2M_SHIFT) > 0 )
{
printk("Error: Too many pfns.\n");
do_exit();
}
l3_list[(pfn >> L2_P2M_SHIFT)] = virt_to_mfn(l2_list);
}
- if(!(pfn % (L1_P2M_ENTRIES)))
+ if ( !(pfn % (L1_P2M_ENTRIES)) )
{
l1_list = (unsigned long*)alloc_page();
l2_list[(pfn >> L1_P2M_SHIFT) & L2_P2M_MASK] =
void arch_init_mm(unsigned long* start_pfn_p, unsigned long* max_pfn_p)
{
-
unsigned long start_pfn, max_pfn;
- printk(" _text: %p\n", &_text);
- printk(" _etext: %p\n", &_etext);
- printk(" _erodata: %p\n", &_erodata);
- printk(" _edata: %p\n", &_edata);
- printk(" stack start: %p\n", stack);
- printk(" _end: %p\n", &_end);
+ printk(" _text: %p(VA)\n", &_text);
+ printk(" _etext: %p(VA)\n", &_etext);
+ printk(" _erodata: %p(VA)\n", &_erodata);
+ printk(" _edata: %p(VA)\n", &_edata);
+ printk("stack start: %p(VA)\n", stack);
+ printk(" _end: %p(VA)\n", &_end);
/* First page follows page table pages and 3 more pages (store page etc) */
start_pfn = PFN_UP(to_phys(start_info.pt_base)) +
- start_info.nr_pt_frames + 3;
+ start_info.nr_pt_frames + 3;
max_pfn = start_info.nr_pages;
-
- printk(" start_pfn: %lx\n", start_pfn);
- printk(" max_pfn: %lx\n", max_pfn);
+
+ /* We need room for demand mapping and heap, clip available memory */
+#if defined(__i386__)
+ {
+ unsigned long virt_pfns = 1 + DEMAND_MAP_PAGES + 1 + HEAP_PAGES;
+ if (max_pfn + virt_pfns >= 0x100000)
+ max_pfn = 0x100000 - virt_pfns - 1;
+ }
+#endif
+
+ printk(" start_pfn: %lx\n", start_pfn);
+ printk(" max_pfn: %lx\n", max_pfn);
build_pagetable(&start_pfn, &max_pfn);
clear_bootstrap();
set_readonly(&_text, &_erodata);
+ /* get the number of physical pages the system has. Used to check for
+ * system memory. */
+ system_ram_end_mfn = HYPERVISOR_memory_op(XENMEM_maximum_ram_page, NULL);
+
*start_pfn_p = start_pfn;
*max_pfn_p = max_pfn;
}
static
shared_info_t *map_shared_info(unsigned long pa)
{
- if ( HYPERVISOR_update_va_mapping(
- (unsigned long)shared_info, __pte(pa | 7), UVMF_INVLPG) )
+ int rc;
+
+ if ( (rc = HYPERVISOR_update_va_mapping(
+ (unsigned long)shared_info, __pte(pa | 7), UVMF_INVLPG)) )
{
- printk("Failed to map shared_info!!\n");
+ printk("Failed to map shared_info!! rc=%d\n", rc);
do_exit();
}
return (shared_info_t *)shared_info;
int cpu = 0;
shared_info_t *s = HYPERVISOR_shared_info;
vcpu_info_t *vcpu_info = &s->vcpu_info[cpu];
+ int rc;
- for (i = 0; i < NR_EVS; i++)
+ for ( i = 0; i < NR_EVS; i++ )
{
- if (i == start_info.console.domU.evtchn ||
- i == start_info.store_evtchn)
+ if ( i == start_info.console.domU.evtchn ||
+ i == start_info.store_evtchn)
continue;
- if (test_and_clear_bit(i, bound_ports))
+
+ if ( test_and_clear_bit(i, bound_ports) )
{
struct evtchn_close close;
printk("port %d still bound!\n", i);
mask_evtchn(i);
close.port = i;
- HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+ if ( rc )
+ printk("WARN: close_port %s failed rc=%d. ignored\n", i, rc);
clear_evtchn(i);
}
}
clear_evtchn(port);
- if (port >= NR_EVS) {
- printk("Port number too large: %d\n", port);
+ if ( port >= NR_EVS )
+ {
+ printk("WARN: do_event(): Port number too large: %d\n", port);
return 1;
}
evtchn_port_t bind_evtchn(evtchn_port_t port, evtchn_handler_t handler,
void *data)
{
- if(ev_actions[port].handler != default_handler)
+ if ( ev_actions[port].handler != default_handler )
printk("WARN: Handler for port %d already registered, replacing\n",
- port);
+ port);
ev_actions[port].data = data;
wmb();
void unbind_evtchn(evtchn_port_t port )
{
struct evtchn_close close;
+ int rc;
- if (ev_actions[port].handler == default_handler)
+ if ( ev_actions[port].handler == default_handler )
printk("WARN: No handler for port %d when unbinding\n", port);
mask_evtchn(port);
clear_evtchn(port);
clear_bit(port, bound_ports);
close.port = port;
- HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_close, &close);
+ if ( rc )
+ printk("WARN: close_port %s failed rc=%d. ignored\n", port, rc);
+
}
evtchn_port_t bind_virq(uint32_t virq, evtchn_handler_t handler, void *data)
{
evtchn_bind_virq_t op;
+ int rc;
/* Try to bind the virq to a port */
op.virq = virq;
op.vcpu = smp_processor_id();
- if ( HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &op) != 0 )
+ if ( (rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &op)) != 0 )
{
- printk("Failed to bind virtual IRQ %d\n", virq);
+ printk("Failed to bind virtual IRQ %d with rc=%d\n", virq, rc);
return -1;
}
bind_evtchn(op.port, handler, data);
return op.port;
}
-evtchn_port_t bind_pirq(uint32_t pirq, int will_share, evtchn_handler_t handler, void *data)
+evtchn_port_t bind_pirq(uint32_t pirq, int will_share,
+ evtchn_handler_t handler, void *data)
{
evtchn_bind_pirq_t op;
+ int rc;
/* Try to bind the pirq to a port */
op.pirq = pirq;
op.flags = will_share ? BIND_PIRQ__WILL_SHARE : 0;
- if ( HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &op) != 0 )
+ if ( (rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &op)) != 0 )
{
- printk("Failed to bind physical IRQ %d\n", pirq);
+ printk("Failed to bind physical IRQ %d with rc=%d\n", pirq, rc);
return -1;
}
bind_evtchn(op.port, handler, data);
asm volatile("movl %0,%%fs ; movl %0,%%gs" :: "r" (0));
wrmsrl(0xc0000101, &cpu0_pda); /* 0xc0000101 is MSR_GS_BASE */
cpu0_pda.irqcount = -1;
- cpu0_pda.irqstackptr = (void*) (((unsigned long)irqstack + 2 * STACK_SIZE) & ~(STACK_SIZE - 1));
+ cpu0_pda.irqstackptr = (void*) (((unsigned long)irqstack + 2 * STACK_SIZE)
+ & ~(STACK_SIZE - 1));
#endif
/* initialize event handler */
for ( i = 0; i < NR_EVS; i++ )
int evtchn_alloc_unbound(domid_t pal, evtchn_handler_t handler,
void *data, evtchn_port_t *port)
{
- int err;
+ int rc;
+
evtchn_alloc_unbound_t op;
op.dom = DOMID_SELF;
op.remote_dom = pal;
- err = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
- if (err)
- return err;
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_alloc_unbound, &op);
+ if ( rc )
+ {
+ printk("ERROR: alloc_unbound failed with rc=%d", rc);
+ return rc;
+ }
*port = bind_evtchn(op.port, handler, data);
- return err;
+ return rc;
}
/* Connect to a port so as to allow the exchange of notifications with
evtchn_handler_t handler, void *data,
evtchn_port_t *local_port)
{
- int err;
+ int rc;
evtchn_port_t port;
evtchn_bind_interdomain_t op;
op.remote_dom = pal;
op.remote_port = remote_port;
- err = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &op);
- if (err)
- return err;
+ rc = HYPERVISOR_event_channel_op(EVTCHNOP_bind_interdomain, &op);
+ if ( rc )
+ {
+ printk("ERROR: bind_interdomain failed with rc=%d", rc);
+ return rc;
+ }
port = op.local_port;
*local_port = bind_evtchn(port, handler, data);
- return err;
+ return rc;
}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
in_irq = 0;
}
-/* Small utility function to figure out our domain id */
-static domid_t get_self_id(void)
-{
- char *dom_id;
- domid_t ret;
-
- BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id));
- sscanf(dom_id, "%d", &ret);
-
- return ret;
-}
-
static void alloc_request_table(struct fs_import *import)
{
struct fs_request *requests;
unmask_evtchn(import->local_port);
- self_id = get_self_id();
+ self_id = xenbus_get_self_id();
/* Write the frontend info to a node in our Xenbus */
sprintf(nodename, "/local/domain/%d/device/vfs/%d",
self_id, import->import_id);
static inline void
set_bit(int num, volatile void *addr)
{
- uint32_t bit, b, old, new;
+ uint32_t bit, old, new;
volatile uint32_t *p;
p = (volatile uint32_t *) addr + (num >> 5);
- b = 1 << (num & 31);
- bit = SWAP(b);
+ bit = 1 << (num & 31);
do
{
old = *p;
static __inline__ void
clear_bit(int num, volatile void *addr)
{
- uint32_t mask, m, old, new;
+ uint32_t mask, old, new;
volatile uint32_t *p;
p = (volatile uint32_t *) addr + (num >> 5);
- m = ~(1 << (num & 31));
- mask = SWAP(m);
+ mask = ~(1 << (num & 31));
do {
old = *p;
new = old & mask;
static __inline__ int
test_bit(int num, const volatile void *addr)
{
- uint32_t val = SWAP(1);
+ uint32_t val = 1;
return val & (((const volatile uint32_t *) addr)[num >> 5] >> (num & 31));
}
static inline int
test_and_set_bit (int num, volatile void *addr)
{
- uint32_t bit, b, old, new;
+ uint32_t bit, old, new;
volatile uint32_t *m;
m = (volatile uint32_t *) addr + (num >> 5);
- b = 1 << (num & 31);
- bit = SWAP(b);
+ bit = 1 << (num & 31);
do {
old = *m;
new = old | bit;
static
inline int test_and_clear_bit(int num, volatile unsigned long * addr)
{
- uint32_t bit, b, old, new;
+ uint32_t bit, old, new;
volatile uint32_t* a;
a = (volatile uint32_t *) addr + (num >> 5);
- b = ~(1 << (num & 31));
- bit = SWAP(b);
+ bit = ~(1 << (num & 31));
do {
old = *a;
new = old & bit;
#ifndef __HYPERCALL_H__
#define __HYPERCALL_H__
-#include <mini-os/lib.h> /* memcpy() */
-#include <mini-os/errno.h> /* ENOSYS() */
#include <xen/event_channel.h>
#include <xen/sched.h>
#include <xen/version.h>
})
-extern unsigned long xencomm_vaddr_to_paddr(unsigned long vaddr);
-struct xencomm_handle;
-
-/* Inline version. To be used only on linear space (kernel space). */
-static inline struct xencomm_handle *
-xencomm_create_inline(void *buffer)
-{
- unsigned long paddr;
-
- paddr = xencomm_vaddr_to_paddr((unsigned long)buffer);
- return (struct xencomm_handle *)(paddr | XENCOMM_INLINE_FLAG);
-}
-
-static inline int
-xencomm_arch_event_channel_op(int cmd, void *arg)
-{
- int rc;
- struct xencomm_handle *newArg;
-
- newArg = xencomm_create_inline(arg);
- rc = _hypercall2(int, event_channel_op, cmd, newArg);
- if (unlikely(rc == -ENOSYS)) {
- struct evtchn_op op;
-
- op.cmd = SWAP(cmd);
- memcpy(&op.u, arg, sizeof(op.u));
- rc = _hypercall1(int, event_channel_op_compat, &op);
- }
- return rc;
-}
-#define HYPERVISOR_event_channel_op xencomm_arch_event_channel_op
-
-static inline int
-xencomm_arch_xen_version(int cmd, struct xencomm_handle *arg)
-{
- return _hypercall2(int, xen_version, cmd, arg);
-}
-
-static inline int
-xencomm_arch_xen_feature(int cmd, struct xencomm_handle *arg)
-{
- struct xencomm_handle *newArg;
-
- newArg = xencomm_create_inline(arg);
- return _hypercall2(int, xen_version, cmd, newArg);
-}
-
-static inline int
-HYPERVISOR_xen_version(int cmd, void *arg)
-{
- switch(cmd) {
- case XENVER_version:
- return xencomm_arch_xen_version(cmd, 0);
- case XENVER_get_features:
- return xencomm_arch_xen_feature(cmd, arg);
- default:
- return -1;
- }
-}
-
-static inline int
-xencomm_arch_console_io(int cmd, int count, char *str)
-{
- struct xencomm_handle *newStr;
-
- newStr = xencomm_create_inline(str);
- return _hypercall3(int, console_io, cmd, count, newStr);
-}
-
-
-#define HYPERVISOR_console_io xencomm_arch_console_io
-
-static inline int
-HYPERVISOR_sched_op_compat(int cmd, unsigned long arg)
-{
- return _hypercall2(int, sched_op_compat, cmd, arg);
-}
-
-static inline int
-xencomm_arch_sched_op(int cmd, void *arg)
-{
- struct xencomm_handle *newArg;
-
- newArg = xencomm_create_inline(arg);
- return _hypercall2(int, sched_op, cmd, newArg);
-}
-
-#define HYPERVISOR_sched_op xencomm_arch_sched_op
-
-static inline int
-xencomm_arch_callback_op(int cmd, void *arg)
-{
- struct xencomm_handle *newArg;
-
- newArg = xencomm_create_inline(arg);
- return _hypercall2(int, callback_op, cmd, newArg);
-}
-#define HYPERVISOR_callback_op xencomm_arch_callback_op
-
-static inline int
-xencomm_arch_hypercall_grant_table_op(unsigned int cmd,
- struct xencomm_handle *uop,
- unsigned int count)
-{
- return _hypercall3(int, grant_table_op, cmd, uop, count);
-}
+int HYPERVISOR_event_channel_op(int cmd, void *arg);
+
+int HYPERVISOR_xen_version(int cmd, void *arg);
+
+int HYPERVISOR_console_io(int cmd, int count, char *str);
+
+int HYPERVISOR_sched_op_compat(int cmd, unsigned long arg);
+
+int HYPERVISOR_sched_op(int cmd, void *arg);
+
+int HYPERVISOR_callback_op(int cmd, void *arg);
int HYPERVISOR_grant_table_op(unsigned int cmd, void *uop, unsigned int count);
-static inline int
-HYPERVISOR_opt_feature(void *arg)
-{
- struct xencomm_handle *new_arg;
+int HYPERVISOR_opt_feature(void *arg);
- new_arg = xencomm_create_inline(arg);
+int HYPERVISOR_suspend(unsigned long srec);
- return _hypercall1(int, opt_feature, new_arg);
-}
+int HYPERVISOR_shutdown(unsigned int reason);
#endif /* __HYPERCALL_H__ */
#define IA64_PSR_IA 0x0000200000000000
-/* Endianess of mini-os. */
-#if defined(BIG_ENDIAN)
-#define MOS_IA64_PSR_BE IA64_PSR_BE
-#else
-#define MOS_IA64_PSR_BE 0
-#endif
-
#define STARTUP_PSR (IA64_PSR_IT | IA64_PSR_PK | \
- IA64_PSR_DT | IA64_PSR_RT | MOS_IA64_PSR_BE | \
+ IA64_PSR_DT | IA64_PSR_RT | \
IA64_PSR_BN | IA64_PSR_CPL_KERN | IA64_PSR_AC)
#define MOS_SYS_PSR (IA64_PSR_IC | IA64_PSR_I | IA64_PSR_IT | \
- IA64_PSR_DT | IA64_PSR_RT | MOS_IA64_PSR_BE | \
+ IA64_PSR_DT | IA64_PSR_RT | \
IA64_PSR_BN | IA64_PSR_CPL_KERN | IA64_PSR_AC)
#define MOS_USR_PSR (IA64_PSR_IC | IA64_PSR_I | IA64_PSR_IT | \
- IA64_PSR_DT | IA64_PSR_RT | MOS_IA64_PSR_BE | \
+ IA64_PSR_DT | IA64_PSR_RT | \
IA64_PSR_BN | IA64_PSR_CPL_USER | IA64_PSR_AC)
/*
#define IA64_DCR_MBZ1_V 0xffffffffffffULL
- /* Endianess of DCR register. */
-#if defined(BIG_ENDIAN)
-#define MOS_IA64_DCR_BE (1 << IA64_DCR_BE)
-#else
-#define MOS_IA64_DCR_BE (0 << IA64_DCR_BE)
-#endif
-
-#define IA64_DCR_DEFAULT (MOS_IA64_DCR_BE)
+#define IA64_DCR_DEFAULT (IA64_DCR_BE)
/*
* Vector numbers for various ia64 interrupts.
#define IA64_RSC_MODE_LI (0x2) /* Load intensive */
#define IA64_RSC_MODE_EA (0x3) /* Eager */
-/* RSE endian mode. */
-#if defined(BIG_ENDIAN)
-#define MOS_IA64_RSC_BE 1 /* Big endian rse. */
-#else
-#define MOS_IA64_RSC_BE 0 /* Little endian rse. */
-#endif
-
-#define IA64_RSE_EAGER ((IA64_RSC_MODE_EA<<IA64_RSC_MODE) | \
- (MOS_IA64_RSC_BE << IA64_RSC_BE) )
-
-#define IA64_RSE_LAZY ((IA64_RSC_MODE_LY<<IA64_RSC_MODE) | \
- (MOS_IA64_RSC_BE << IA64_RSC_BE) )
+#define IA64_RSE_EAGER (IA64_RSC_MODE_EA<<IA64_RSC_MODE)
+#define IA64_RSE_LAZY (IA64_RSC_MODE_LY<<IA64_RSC_MODE)
*/
typedef struct
{
-#if defined(BIG_ENDIAN)
- uint64_t pte_ig :11; /* bits 53..63 */
- uint64_t pte_ed :1; /* bits 52..52 */
- uint64_t pte_rv2:2; /* bits 50..51 */
- uint64_t pte_ppn:38; /* bits 12..49 */
- uint64_t pte_ar :3; /* bits 9..11 */
- uint64_t pte_pl :2; /* bits 7..8 */
- uint64_t pte_d :1; /* bits 6..6 */
- uint64_t pte_a :1; /* bits 5..5 */
- uint64_t pte_ma :3; /* bits 2..4 */
- uint64_t pte_rv1:1; /* bits 1..1 */
- uint64_t pte_p :1; /* bits 0..0 */
-#else
uint64_t pte_p :1; /* bits 0..0 */
uint64_t pte_rv1:1; /* bits 1..1 */
uint64_t pte_ma :3; /* bits 2..4 */
uint64_t pte_rv2:2; /* bits 50..51 */
uint64_t pte_ed :1; /* bits 52..52 */
uint64_t pte_ig :11; /* bits 53..63 */
-#endif
} ia64_pte_t;
#if !defined(__ASSEMBLY__)
#include <mini-os/types.h>
-#include "endian.h"
#include "ia64_cpu.h"
#include "atomic.h"
#include "efi.h"
extern shared_info_t *HYPERVISOR_shared_info;
-static inline int
-HYPERVISOR_shutdown(unsigned int reason)
-{
- struct sched_shutdown sched_shutdown = {
- .reason = reason
- };
-
- int rc = HYPERVISOR_sched_op(SCHEDOP_shutdown, &sched_shutdown);
-
- if (rc == -ENOSYS)
- rc = HYPERVISOR_sched_op_compat(SCHEDOP_shutdown, reason);
-
- return rc;
-}
-
/*
* This code is from the originally os.h and should be put in a
do { \
vcpu_info_t *_vcpu; \
_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- _vcpu->evtchn_upcall_mask = SWAP(1); \
+ _vcpu->evtchn_upcall_mask = 1; \
barrier(); \
} while (0)
_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
_vcpu->evtchn_upcall_mask = 0; \
barrier(); /* unmask then check (avoid races) */ \
- if (unlikely(SWAP(_vcpu->evtchn_upcall_pending))) \
+ if (unlikely(_vcpu->evtchn_upcall_pending)) \
force_evtchn_callback(); \
} while (0)
do { \
vcpu_info_t *_vcpu; \
_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- (x) = SWAP(_vcpu->evtchn_upcall_mask); \
+ (x) = _vcpu->evtchn_upcall_mask; \
} while (0)
#define __restore_flags(x) \
_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
if ((_vcpu->evtchn_upcall_mask = (x)) == 0) { \
barrier(); /* unmask then check (avoid races) */ \
- if ( unlikely(SWAP(_vcpu->evtchn_upcall_pending)) ) \
+ if ( unlikely(_vcpu->evtchn_upcall_pending) ) \
force_evtchn_callback(); \
}\
} while (0)
do { \
vcpu_info_t *_vcpu; \
_vcpu = &HYPERVISOR_shared_info->vcpu_info[smp_processor_id()]; \
- (x) = SWAP(_vcpu->evtchn_upcall_mask); \
- _vcpu->evtchn_upcall_mask = SWAP(1); \
+ (x) = _vcpu->evtchn_upcall_mask; \
+ _vcpu->evtchn_upcall_mask = 1; \
barrier(); \
} while (0)
#define local_irq_enable() __sti()
#define irqs_disabled() \
- SWAP(HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].evtchn_upcall_mask)
+ (HYPERVISOR_shared_info->vcpu_info[smp_processor_id()].evtchn_upcall_mask)
/* This is a barrier for the compiler only, NOT the processor! */
#define barrier() __asm__ __volatile__("": : :"memory")
--- /dev/null
+/**
+ * Copyright (C) 2009 Netronome Systems, Inc. All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+
+#ifndef _IOREMAP_H_
+#define _IOREMAP_H_
+
+void *ioremap(unsigned long phys_addr, unsigned long size);
+void *ioremap_nocache(unsigned long phys_addr, unsigned long size);
+void iounmap(void *virt_addr, unsigned long size);
+
+#endif /* _IOREMAP_H_ */
+
+/* -*- Mode:C; c-basic-offset:4; tab-width:4 indent-tabs-mode:nil -*- */
void do_map_frames(unsigned long addr,
unsigned long *f, unsigned long n, unsigned long stride,
unsigned long increment, domid_t id, int may_fail, unsigned long prot);
+int unmap_frames(unsigned long va, unsigned long num_frames);
+unsigned long alloc_contig_pages(int order, unsigned int addr_bits);
#ifdef HAVE_LIBC
extern unsigned long heap, brk, heap_mapped, heap_end;
#endif
--- /dev/null
+/*
+ * This code is mostly taken from NetBSD net/if.h
+ * Changes: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
+ *
+ ******************************************************************************
+ *
+ * Copyright (c) 1999, 2000, 2001 The NetBSD Foundation, Inc.
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to The NetBSD Foundation
+ * by William Studenmund and Jason R. Thorpe.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
+ * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+ * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
+ * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/*
+ * Copyright (c) 1982, 1986, 1989, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#ifndef _NET_IF_H_
+#define _NET_IF_H_
+
+/*
+ * Length of interface external name, including terminating '\0'.
+ * Note: this is the same size as a generic device's external name.
+ */
+#define IF_NAMESIZE 16
+
+struct if_nameindex {
+ unsigned int if_index; /* 1, 2, ... */
+ char *if_name; /* null terminated name: "le0", ... */
+};
+
+unsigned int if_nametoindex(const char *);
+char * if_indextoname(unsigned int, char *);
+struct if_nameindex * if_nameindex(void);
+void if_freenameindex(struct if_nameindex *);
+
+#endif /* !_NET_IF_H_ */
+
void exit_thread(void) __attribute__((noreturn));
void schedule(void);
+#ifdef __INSIDE_MINIOS__
#define current get_current()
-
+#endif
void wake(struct thread *thread);
void block(struct thread *thread);
#define DEFINE_WAIT(name) \
struct wait_queue name = { \
- .thread = current, \
+ .thread = get_current(), \
.thread_list = MINIOS_LIST_HEAD_INIT((name).thread_list), \
}
unsigned long flags; \
local_irq_save(flags); \
add_wait_queue(&wq, &w); \
- block(current); \
+ block(get_current()); \
local_irq_restore(flags); \
} while (0)
/* protect the list */ \
local_irq_save(flags); \
add_wait_queue(&wq, &__wait); \
- current->wakeup_time = deadline; \
- clear_runnable(current); \
+ get_current()->wakeup_time = deadline; \
+ clear_runnable(get_current()); \
local_irq_restore(flags); \
if((condition) || (deadline && NOW() >= deadline)) \
break; \
} \
local_irq_save(flags); \
/* need to wake up */ \
- wake(current); \
+ wake(get_current()); \
remove_wait_queue(&__wait); \
local_irq_restore(flags); \
} while(0)
#define L4_PROT (_PAGE_PRESENT|_PAGE_RW|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_USER)
#endif /* __i386__ || __x86_64__ */
+/* flags for ioremap */
+#define IO_PROT (L1_PROT)
+#define IO_PROT_NOCACHE (L1_PROT | _PAGE_PCD)
+
+/* for P2M */
+#define INVALID_P2M_ENTRY (~0UL)
+
#include "arch_limits.h"
#define PAGE_SIZE __PAGE_SIZE
#define PAGE_SHIFT __PAGE_SHIFT
#define do_map_zero(start, n) do_map_frames(start, &mfn_zero, n, 0, 0, DOMID_SELF, 0, L1_PROT_RO)
pgentry_t *need_pgt(unsigned long addr);
+int mfn_is_ram(unsigned long mfn);
#endif /* _ARCH_MM_H_ */
const char* fmt, ...)
__attribute__((__format__(printf, 4, 5)));
+/* Utility function to figure out our domain id */
+domid_t xenbus_get_self_id(void);
+
/* Reset the XenBus system. */
void fini_xenbus(void);
static struct pcifront_dev *pci_dev;
-static void pcifront_thread(void *p)
+static void print_pcidev(unsigned int domain, unsigned int bus, unsigned int slot, unsigned int fun)
{
- void print(unsigned int domain, unsigned int bus, unsigned int slot, unsigned int fun)
- {
- unsigned int vendor, device, rev, class;
+ unsigned int vendor, device, rev, class;
- pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x00, 2, &vendor);
- pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x02, 2, &device);
- pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x08, 1, &rev);
- pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x0a, 2, &class);
+ pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x00, 2, &vendor);
+ pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x02, 2, &device);
+ pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x08, 1, &rev);
+ pcifront_conf_read(pci_dev, domain, bus, slot, fun, 0x0a, 2, &class);
- printk("%04x:%02x:%02x.%02x %04x: %04x:%04x (rev %02x)\n", domain, bus, slot, fun, class, vendor, device, rev);
- }
+ printk("%04x:%02x:%02x.%02x %04x: %04x:%04x (rev %02x)\n", domain, bus, slot, fun, class, vendor, device, rev);
+}
+static void pcifront_thread(void *p)
+{
pci_dev = init_pcifront(NULL);
if (!pci_dev)
return;
printk("PCI devices:\n");
- pcifront_scan(pci_dev, print);
+ pcifront_scan(pci_dev, print_pcidev);
}
static void fs_thread(void *p)
/* print out some useful information */
printk("Xen Minimal OS!\n");
- printk("start_info: %p\n", si);
- printk(" nr_pages: %lu", si->nr_pages);
- printk(" shared_inf: %08lx\n", si->shared_info);
- printk(" pt_base: %p", (void *)si->pt_base);
- printk(" mod_start: 0x%lx\n", si->mod_start);
- printk(" mod_len: %lu\n", si->mod_len);
- printk(" flags: 0x%x\n", (unsigned int)si->flags);
- printk(" cmd_line: %s\n",
+ printk(" start_info: %p(VA)\n", si);
+ printk(" nr_pages: 0x%lx\n", si->nr_pages);
+ printk(" shared_inf: 0x%08lx(MA)\n", si->shared_info);
+ printk(" pt_base: %p(VA)\n", (void *)si->pt_base);
+ printk("nr_pt_frames: 0x%lx\n", si->nr_pt_frames);
+ printk(" mfn_list: %p(VA)\n", (void *)si->mfn_list);
+ printk(" mod_start: 0x%lx(VA)\n", si->mod_start);
+ printk(" mod_len: %lu\n", si->mod_len);
+ printk(" flags: 0x%x\n", (unsigned int)si->flags);
+ printk(" cmd_line: %s\n",
si->cmd_line ? (const char *)si->cmd_line : "NULL");
/* Set up events. */
#include <sys/unistd.h>
#include <sys/stat.h>
#include <sys/mman.h>
+#include <net/if.h>
#include <time.h>
#include <errno.h>
#include <fcntl.h>
} else ASSERT(0);
}
-#define UNMAP_BATCH ((STACK_SIZE / 2) / sizeof(multicall_entry_t))
int munmap(void *start, size_t length)
{
int total = length / PAGE_SIZE;
- ASSERT(!((unsigned long)start & ~PAGE_MASK));
- while (total) {
- int n = UNMAP_BATCH;
- if (n > total)
- n = total;
- {
- int i;
- multicall_entry_t call[n];
- unsigned char (*data)[PAGE_SIZE] = start;
- int ret;
-
- for (i = 0; i < n; i++) {
- int arg = 0;
- call[i].op = __HYPERVISOR_update_va_mapping;
- call[i].args[arg++] = (unsigned long) &data[i];
- call[i].args[arg++] = 0;
-#ifdef __i386__
- call[i].args[arg++] = 0;
-#endif
- call[i].args[arg++] = UVMF_INVLPG;
- }
-
- ret = HYPERVISOR_multicall(call, n);
- if (ret) {
- errno = -ret;
- return -1;
- }
+ int ret;
- for (i = 0; i < n; i++) {
- if (call[i].result) {
- errno = call[i].result;
- return -1;
- }
- }
- }
- start = (char *)start + n * PAGE_SIZE;
- total -= n;
+ ret = unmap_frames((unsigned long)start, (unsigned long)total);
+ if (ret) {
+ errno = ret;
+ return -1;
}
return 0;
}
unsupported_function(int, tcgetattr, 0);
unsupported_function(int, poll, -1);
+/* net/if.h */
+unsupported_function_log(unsigned int, if_nametoindex, -1);
+unsupported_function_log(char *, if_indextoname, (char *) NULL);
+unsupported_function_log(struct if_nameindex *, if_nameindex, (struct if_nameindex *) NULL);
+unsupported_function_crash(if_freenameindex);
+
/* Linuxish abi for the Caml runtime, don't support */
unsupported_function_log(struct dirent *, readdir64, NULL);
unsupported_function_log(int, getrusage, -1);
DEF_CFLAGS += -O3
endif
+# Make the headers define our internal stuff
+DEF_CFLAGS += -D__INSIDE_MINIOS__
+
# Build the CFLAGS and ASFLAGS for compiling and assembling.
# DEF_... flags are the common mini-os flags,
# ARCH_... flags may be defined in arch/$(TARGET_ARCH_FAM/rules.mk
return xenbus_write(xbt,fullpath,val);
}
+domid_t xenbus_get_self_id(void)
+{
+ char *dom_id;
+ domid_t ret;
+
+ BUG_ON(xenbus_read(XBT_NIL, "domid", &dom_id));
+ sscanf(dom_id, "%d", &ret);
+
+ return ret;
+}
+
static void do_ls_test(const char *pre)
{
char **dirs, *msg;
export debug=y
include $(XEN_ROOT)/Config.mk
-IOEMU_OPTIONS=--disable-sdl --disable-opengl --disable-vnc-tls --disable-brlapi --disable-kqemu
-ZLIB_URL?=http://www.zlib.net
+#ZLIB_URL?=http://www.zlib.net
+ZLIB_URL=$(XEN_EXTFILES_URL)
ZLIB_VERSION=1.2.3
-LIBPCI_URL?=http://www.kernel.org/pub/software/utils/pciutils
+
+#LIBPCI_URL?=http://www.kernel.org/pub/software/utils/pciutils
+LIBPCI_URL?=$(XEN_EXTFILES_URL)
LIBPCI_VERSION=2.2.9
-NEWLIB_URL?=ftp://sources.redhat.com/pub/newlib
+
+#NEWLIB_URL?=ftp://sources.redhat.com/pub/newlib
+NEWLIB_URL?=$(XEN_EXTFILES_URL)
NEWLIB_VERSION=1.16.0
-LWIP_URL?=http://download.savannah.gnu.org/releases/lwip
+
+#LWIP_URL?=http://download.savannah.gnu.org/releases/lwip
+LWIP_URL?=$(XEN_EXTFILES_URL)
LWIP_VERSION=1.3.0
-GRUB_URL?=http://alpha.gnu.org/gnu/grub
+
+#GRUB_URL?=http://alpha.gnu.org/gnu/grub
+GRUB_URL?=$(XEN_EXTFILES_URL)
GRUB_VERSION=0.97
WGET=wget -c
ln -sf $(addprefix ../../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) include/xen && \
ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) include/xen && \
( [ -h include/xen/sys ] || ln -sf ../../$(XEN_ROOT)/tools/include/xen-sys/MiniOS include/xen/sys ) && \
+ ( [ -h include/xen/libelf ] || ln -sf ../../$(XEN_ROOT)/tools/include/xen/libelf include/xen/libelf ) && \
mkdir -p include/xen-foreign && \
ln -sf $(addprefix ../../,$(wildcard $(XEN_ROOT)/tools/include/xen-foreign/*)) include/xen-foreign/ && \
$(MAKE) -C include/xen-foreign/ && \
[ -f ioemu/config-host.mak ] || \
( $(absolutify_xen_root); \
cd ioemu ; \
- CONFIG_STUBDOM=yes XEN_TARGET_ARCH=$(XEN_TARGET_ARCH) CFLAGS="$(TARGET_CFLAGS)" sh ./xen-setup --cc=$(CC) --disable-gcc-check $(IOEMU_OPTIONS))
- CPPFLAGS= TARGET_CPPFLAGS="$(TARGET_CPPFLAGS)" $(MAKE) -C ioemu LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) TOOLS= CONFIG_STUBDOM=yes
+ LWIPDIR=$(CURDIR)/lwip-$(XEN_TARGET_ARCH) \
+ TARGET_CPPFLAGS="$(TARGET_CPPFLAGS)" \
+ TARGET_CFLAGS="$(TARGET_CFLAGS)" \
+ TARGET_LDFLAGS="$(TARGET_LDFLAGS)" \
+ ./xen-setup-stubdom )
+ $(MAKE) -C ioemu
######
# caml
$(INSTALL_DATA) README $(DESTDIR)$(DOCDIR)/README.stubdom
install-ioemu: ioemu-stubdom
- $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/bin"
- $(INSTALL_PROG) stubdom-dm "$(DESTDIR)/usr/lib/xen/bin"
- $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot"
- $(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-ioemu/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/ioemu-stubdom.gz"
+ $(INSTALL_DIR) "$(DESTDIR)$(LIBEXEC)"
+ $(INSTALL_PROG) stubdom-dm "$(DESTDIR)$(LIBEXEC)"
+ $(INSTALL_DIR) "$(DESTDIR)$(LIBDIR_x86_32)/xen/boot"
+ $(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-ioemu/mini-os.gz "$(DESTDIR)$(LIBDIR_x86_32)/xen/boot/ioemu-stubdom.gz"
install-grub: pv-grub
- $(INSTALL_DIR) "$(DESTDIR)/usr/lib/xen/boot"
- $(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-grub/mini-os.gz "$(DESTDIR)/usr/lib/xen/boot/pv-grub-$(XEN_TARGET_ARCH).gz"
+ $(INSTALL_DIR) "$(DESTDIR)$(LIBDIR_x86_32)/xen/boot"
+ $(INSTALL_DATA) mini-os-$(XEN_TARGET_ARCH)-grub/mini-os.gz "$(DESTDIR)$(LIBDIR_x86_32)/xen/boot/pv-grub-$(XEN_TARGET_ARCH).gz"
#######
# clean
vfb = [ 'type=sdl' ]
+ by default qemu will use sdl together with opengl for rendering, if
+ you do not want qemu to use opengl then also pass opengl=0:
+
+vfb = [ 'type=sdl, opengl=0' ]
+
* Using a VNC server in the stub domain
- In hvmconfig, set vnclisten to "172.30.206.1" for instance. Do not use a
{
struct timeval tv;
gettimeofday(&tv, NULL);
- return tv.tv_sec;
+ return tv.tv_sec % 10 + ((tv.tv_sec / 10) % 6) * 0x10;
}
int currticks (void)
vncviewer=0
vncpid=
extra=
+videoram=4
while [ "$#" -gt 0 ];
do
if [ "$#" -ge 2 ];
extra="$extra -loadvm $2";
shift
;;
+ -videoram)
+ videoram="$2"
+ shift
+ ;;
esac
fi
case "$1" in
sleep 1
done
-creation="xm create -c $domname-dm target=$domid memory=32 extra=\"$extra\""
+creation="xm create -c $domname-dm target=$domid memory=32 videoram=$videoram extra=\"$extra\""
(while true ; do sleep 60 ; done) | /bin/sh -c "$creation" &
#xterm -geometry +0+0 -e /bin/sh -c "$creation ; echo ; echo press ENTER to shut down ; read" &
SUBDIRS-y += xenstore
SUBDIRS-y += misc
SUBDIRS-y += examples
+SUBDIRS-y += hotplug
SUBDIRS-y += xentrace
SUBDIRS-$(CONFIG_XCUTILS) += xcutils
SUBDIRS-$(CONFIG_X86) += firmware
SUBDIRS-$(VTPM_TOOLS) += vtpm_manager
SUBDIRS-$(VTPM_TOOLS) += vtpm
SUBDIRS-y += xenstat
-SUBDIRS-y += libaio
-SUBDIRS-y += blktap
+SUBDIRS-$(CONFIG_Linux) += libaio
+SUBDIRS-$(CONFIG_Linux) += blktap
SUBDIRS-y += libfsimage
SUBDIRS-$(LIBXENAPI_BINDINGS) += libxen
-SUBDIRS-y += fs-back
+SUBDIRS-$(CONFIG_Linux) += fs-back
SUBDIRS-$(CONFIG_IOEMU) += ioemu-dir
+SUBDIRS-y += xenpmd
# These don't cross-compile
ifeq ($(XEN_COMPILE_ARCH),$(XEN_TARGET_ARCH))
CFLAGS += -D__XEN_TOOLS__
+# Get gcc to generate the dependencies for us.
+CFLAGS += -MMD -MF .$(@F).d
+DEPS = .*.d
+
# Enable implicit LFS support *and* explicit LFS names.
CFLAGS += $(shell getconf LFS_CFLAGS)
CFLAGS += -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE
subdir-all-% subdir-clean-% subdir-install-%: .phony
$(MAKE) -C $* $(patsubst subdir-%-$*,%,$@)
-
CFLAGS += -I $(LIBAIO_DIR)
CFLAGS += -D_GNU_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEPS = .*.d
-
-ifeq ($(shell . ./check_gcrypt),"yes")
+ifeq ($(shell . ./check_gcrypt $(CC)),yes)
CFLAGS += -DUSE_GCRYPT
CRYPT_LIB := -lgcrypt
else
CRYPT_LIB := -lcrypto
-$(warning *** libgcrypt not installed: falling back to libcrypto ***)
+$(warning === libgcrypt not installed: falling back to libcrypto ===)
endif
LDFLAGS_blktapctrl := $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenstore) -L../lib -lblktap
* return 0 on success, -1 on error.
*/
-static int test_path(char *path, char **dev, int *type, blkif_t **blkif)
+static int test_path(char *path, char **dev, int *type, blkif_t **blkif,
+ int* use_ioemu)
{
char *ptr, handle[10];
int i, size, found = 0;
*type = MAX_DISK_TYPES + 1;
*blkif = NULL;
+ if (!strncmp(path, "tapdisk:", strlen("tapdisk:"))) {
+ *use_ioemu = 0;
+ path += strlen("tapdisk:");
+ } else if (!strncmp(path, "ioemu:", strlen("ioemu:"))) {
+ *use_ioemu = 1;
+ path += strlen("ioemu:");
+ } else {
+ // Use the default for the image type
+ *use_ioemu = -1;
+ }
+
if ( (ptr = strstr(path, ":"))!=NULL) {
handle_len = (ptr - path);
memcpy(handle, path, handle_len);
}
if (found) {
+ if (*use_ioemu == -1)
+ *use_ioemu = dtypes[i]->use_ioemu;
*type = dtypes[i]->idnum;
if (dtypes[i]->single_handler == 1) {
*blkif = active_disks[dtypes[i]
->idnum]->blkif;
}
+
return 0;
}
}
entry->pprev = pprev;
}
+static int qemu_instance_has_disks(pid_t pid)
+{
+ int i;
+ int count = 0;
+ driver_list_entry_t *entry;
+
+ for (i = 0; i < MAX_DISK_TYPES; i++) {
+ entry = active_disks[i];
+ while (entry) {
+ if ((entry->blkif->tappid == pid) && dtypes[i]->use_ioemu)
+ count++;
+ entry = entry->next;
+ }
+ }
+
+ return (count != 0);
+}
+
static int del_disktype(blkif_t *blkif)
{
driver_list_entry_t *entry, **pprev;
DPRINTF("DEL_DISKTYPE: Freeing entry\n");
free(entry);
+ /*
+ * When using ioemu, all disks of one VM are connected to the same
+ * qemu-dm instance. We may close the file handle only if there is
+ * no other disk left for this domain.
+ */
+ if (dtypes[type]->use_ioemu)
+ return !qemu_instance_has_disks(blkif->tappid);
+
/* Caller should close() if no single controller, or list is empty. */
return (!dtypes[type]->single_handler || (active_disks[type] == NULL));
}
static int tapdisk_ioemu_pid = 0;
static int dom0_readfd = 0;
static int dom0_writefd = 0;
-
+ int refresh_pid = 0;
+
if (asprintf(&rdctldev, BLKTAP_CTRL_DIR "/qemu-read-%d", domid) < 0)
return -1;
if (tapdisk_ioemu_pid == 0 || kill(tapdisk_ioemu_pid, 0)) {
/* No device model and tapdisk-ioemu doesn't run yet */
DPRINTF("Launching tapdisk-ioemu\n");
- tapdisk_ioemu_pid = launch_tapdisk_ioemu();
+ launch_tapdisk_ioemu();
dom0_readfd = open_ctrl_socket(wrctldev);
dom0_writefd = open_ctrl_socket(rdctldev);
+
+ refresh_pid = 1;
}
DPRINTF("Using tapdisk-ioemu connection\n");
blkif->fds[READ] = dom0_readfd;
blkif->fds[WRITE] = dom0_writefd;
+
+ if (refresh_pid) {
+ get_tapdisk_pid(blkif);
+ tapdisk_ioemu_pid = blkif->tappid;
+ }
+
} else if (access(rdctldev, R_OK | W_OK) == 0) {
/* Use existing pipe to the device model */
DPRINTF("Using qemu-dm connection\n");
image_t *image;
blkif_t *exist = NULL;
static uint16_t next_cookie = 0;
+ int use_ioemu;
DPRINTF("Received a poll for a new vbd\n");
if ( ((blk=blkif->info) != NULL) && (blk->params != NULL) ) {
- if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0)
- return -1;
-
- if (test_path(blk->params, &ptr, &type, &exist) != 0) {
+ if (test_path(blk->params, &ptr, &type, &exist, &use_ioemu) != 0) {
DPRINTF("Error in blktap device string(%s).\n",
blk->params);
goto fail;
blkif->cookie = next_cookie++;
if (!exist) {
- if (type == DISK_TYPE_IOEMU) {
+ if (use_ioemu) {
if (connect_qemu(blkif, blkif->domid))
goto fail;
} else {
blkif->fds[WRITE] = exist->fds[WRITE];
}
- add_disktype(blkif, type);
- blkif->major = major;
- blkif->minor = minor;
-
image = (image_t *)malloc(sizeof(image_t));
blkif->prv = (void *)image;
blkif->ops = &tapdisk_ops;
goto fail;
}
+ if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0)
+ return -1;
+
+ blkif->major = major;
+ blkif->minor = minor;
+
+ add_disktype(blkif, type);
+
} else return -1;
return 0;
fail:
- ioctl(ctlfd, BLKTAP_IOCTL_FREEINTF, minor);
return -EINVAL;
}
}
if (del_disktype(blkif)) {
+ DPRINTF("Closing communication pipe to pid %d\n", blkif->tappid);
close(blkif->fds[WRITE]);
close(blkif->fds[READ]);
}
/* Open the disk file and initialize qcow state. */
static int tdqcow_open (struct disk_driver *dd, const char *name, td_flag_t flags)
{
- int fd, len, i, shift, ret, size, l1_table_size, o_flags;
+ int fd, len, i, shift, ret, size, l1_table_size, o_flags, l1_table_block;
int max_aio_reqs;
struct td_state *bs = dd->td_state;
struct tdqcow_state *s = (struct tdqcow_state *)dd->private;
- char *buf;
+ char *buf, *buf2;
QCowHeader *header;
QCowHeader_ext *exthdr;
uint32_t cksum;
(int) (s->l1_size * sizeof(uint64_t)),
l1_table_size);
- lseek(fd, s->l1_table_offset, SEEK_SET);
- if (read(fd, s->l1_table, l1_table_size) != l1_table_size)
+ lseek(fd, 0, SEEK_SET);
+ l1_table_block = l1_table_size + s->l1_table_offset;
+ l1_table_block = l1_table_block + 512 - (l1_table_block % 512);
+ ret = posix_memalign((void **)&buf2, 4096, l1_table_block);
+ if (ret != 0) goto fail;
+ if (read(fd, buf2, l1_table_block) != l1_table_block)
goto fail;
+ memcpy(s->l1_table, buf2 + s->l1_table_offset, l1_table_size);
for(i = 0; i < s->l1_size; i++) {
be64_to_cpus(&s->l1_table[i]);
DPRINTF("qcow: Converting image to big endian L1 table\n");
- lseek(fd, s->l1_table_offset, SEEK_SET);
- if (write(fd, s->l1_table, l1_table_size) != l1_table_size) {
+ memcpy(buf2 + s->l1_table_offset, s->l1_table, l1_table_size);
+ lseek(fd, 0, SEEK_SET);
+ if (write(fd, buf2, l1_table_block) != l1_table_block) {
DPRINTF("qcow: Failed to write new L1 table\n");
goto fail;
}
init_fds(dd);
if (!final_cluster)
- s->fd_end = s->l1_table_offset + l1_table_size;
+ s->fd_end = l1_table_block;
else {
s->fd_end = lseek(fd, 0, SEEK_END);
if (s->fd_end == (off_t)-1)
filename[len] = '\0';
id->name = strdup(filename);
- id->drivertype = DISK_TYPE_QCOW;
+ id->drivertype = DISK_TYPE_AIO;
err = 0;
out:
free(buf);
{
struct stat stats;
uint64_t psize, csize;
- struct tdqcow_state *c = (struct tdqcow_state *)child->private;
- struct tdqcow_state *p = (struct tdqcow_state *)parent->private;
- if (stat(p->name, &stats))
+ if (stat(parent->name, &stats))
return -EINVAL;
- if (get_filesize(p->name, &psize, &stats))
+ if (get_filesize(parent->name, &psize, &stats))
return -EINVAL;
- if (stat(c->name, &stats))
+ if (stat(child->name, &stats))
return -EINVAL;
- if (get_filesize(c->name, &csize, &stats))
+ if (get_filesize(child->name, &csize, &stats))
return -EINVAL;
if (csize != psize)
#include "tapdisk.h"
#include "tapaio.h"
#include "bswap.h"
+#include "blk.h"
#define USE_AIO
#endif
+static int get_filesize(char *filename, uint64_t *size, struct stat *st)
+{
+ int fd;
+ QCowHeader header;
+
+ /*Set to the backing file size*/
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return -1;
+ if (read(fd, &header, sizeof(header)) < sizeof(header)) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+
+ be32_to_cpus(&header.magic);
+ be32_to_cpus(&header.version);
+ be64_to_cpus(&header.size);
+ if (header.magic == QCOW_MAGIC && header.version == QCOW_VERSION) {
+ *size = header.size >> SECTOR_SHIFT;
+ return 0;
+ }
+
+ if(S_ISBLK(st->st_mode)) {
+ fd = open(filename, O_RDONLY);
+ if (fd < 0)
+ return -1;
+ if (blk_getimagesize(fd, size) != 0) {
+ close(fd);
+ return -1;
+ }
+ close(fd);
+ } else *size = (st->st_size >> SECTOR_SHIFT);
+ return 0;
+}
+
/**
* @return
* 0 if parent id successfully retrieved;
return TD_NO_PARENT;
id->name = strdup(s->backing_file);
- id->drivertype = DISK_TYPE_QCOW2;
+ id->drivertype = DISK_TYPE_AIO;
return 0;
}
static int qcow_validate_parent(struct disk_driver *child,
struct disk_driver *parent, td_flag_t flags)
{
- struct BDRVQcowState *cs = (struct BDRVQcowState*) child->private;
- struct BDRVQcowState *ps = (struct BDRVQcowState*) parent->private;
+ struct stat stats;
+ uint64_t psize, csize;
+
+ if (stat(parent->name, &stats))
+ return -EINVAL;
+ if (get_filesize(parent->name, &psize, &stats))
+ return -EINVAL;
- if (ps->total_sectors != cs->total_sectors) {
- DPRINTF("qcow_validate_parent(): %#"PRIx64" != %#"PRIx64"\n",
- ps->total_sectors, cs->total_sectors);
+ if (stat(child->name, &stats))
return -EINVAL;
- }
-
+ if (get_filesize(child->name, &csize, &stats))
+ return -EINVAL;
+
+ if (csize != psize)
+ return -EINVAL;
+
return 0;
}
+int qcow2_create(const char *filename, uint64_t total_size,
+ const char *backing_file, int flags)
+{
+ int fd, header_size, backing_filename_len, l1_size, i, shift, l2_bits;
+ int ret = 0;
+ QCowHeader header;
+ uint64_t tmp, offset;
+ QCowCreateState s1, *s = &s1;
+
+ memset(s, 0, sizeof(*s));
+
+ fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0644);
+ if (fd < 0)
+ return -1;
+ memset(&header, 0, sizeof(header));
+ header.magic = cpu_to_be32(QCOW_MAGIC);
+ header.version = cpu_to_be32(QCOW_VERSION);
+ header.size = cpu_to_be64(total_size * 512);
+ header_size = sizeof(header);
+ backing_filename_len = 0;
+ if (backing_file) {
+ header.backing_file_offset = cpu_to_be64(header_size);
+ backing_filename_len = strlen(backing_file);
+ header.backing_file_size = cpu_to_be32(backing_filename_len);
+ header_size += backing_filename_len;
+ }
+ s->cluster_bits = 12; /* 4 KB clusters */
+ s->cluster_size = 1 << s->cluster_bits;
+ header.cluster_bits = cpu_to_be32(s->cluster_bits);
+ header_size = (header_size + 7) & ~7;
+ if (flags & BLOCK_FLAG_ENCRYPT) {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_AES);
+ } else {
+ header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
+ }
+ l2_bits = s->cluster_bits - 3;
+ shift = s->cluster_bits + l2_bits;
+ l1_size = (((total_size * 512) + (1LL << shift) - 1) >> shift);
+ offset = align_offset(header_size, s->cluster_size);
+ s->l1_table_offset = offset;
+ header.l1_table_offset = cpu_to_be64(s->l1_table_offset);
+ header.l1_size = cpu_to_be32(l1_size);
+ offset += align_offset(l1_size * sizeof(uint64_t), s->cluster_size);
+
+ s->refcount_table = qemu_mallocz(s->cluster_size);
+ s->refcount_block = qemu_mallocz(s->cluster_size);
+
+ s->refcount_table_offset = offset;
+ header.refcount_table_offset = cpu_to_be64(offset);
+ header.refcount_table_clusters = cpu_to_be32(1);
+ offset += s->cluster_size;
+
+ s->refcount_table[0] = cpu_to_be64(offset);
+ s->refcount_block_offset = offset;
+ offset += s->cluster_size;
+
+ /* update refcounts */
+ create_refcount_update(s, 0, header_size);
+ create_refcount_update(s, s->l1_table_offset, l1_size * sizeof(uint64_t));
+ create_refcount_update(s, s->refcount_table_offset, s->cluster_size);
+ create_refcount_update(s, s->refcount_block_offset, s->cluster_size);
+
+ /* write all the data */
+ ret = write(fd, &header, sizeof(header));
+ if (ret < 0)
+ goto out;
+ if (backing_file) {
+ ret = write(fd, backing_file, backing_filename_len);
+ if (ret < 0)
+ goto out;
+ }
+ lseek(fd, s->l1_table_offset, SEEK_SET);
+ tmp = 0;
+ for(i = 0;i < l1_size; i++) {
+ ret = write(fd, &tmp, sizeof(tmp));
+ if (ret < 0)
+ goto out;
+ }
+ lseek(fd, s->refcount_table_offset, SEEK_SET);
+ ret = write(fd, s->refcount_table, s->cluster_size);
+ if (ret < 0)
+ goto out;
+
+ lseek(fd, s->refcount_block_offset, SEEK_SET);
+ ret = write(fd, s->refcount_block, s->cluster_size);
+ if (ret < 0)
+ goto out;
+ ret = 0;
+
+ out:
+ qemu_free(s->refcount_table);
+ qemu_free(s->refcount_block);
+ close(fd);
+ return ret;
+}
+
+
+
struct tap_disk tapdisk_qcow2 = {
"qcow2",
sizeof(BDRVQcowState),
{
fprintf(stderr, "Qcow-utils: v1.0.0\n");
fprintf(stderr,
- "usage: qcow-create [-h help] [-r reserve] <SIZE(MB)> <FILENAME> "
+ "usage: qcow-create [-h help] [-r reserve] [-f format] <SIZE(MB)> <FILENAME> "
"[<BACKING_FILENAME>]\n");
exit(-1);
}
{
int ret = -1, c, backed = 0;
int sparse = 1;
+ char *fmt = "qcow";
uint64_t size;
char filename[MAX_NAME_LEN], bfilename[MAX_NAME_LEN];
+ char *tmpfile;
for(;;) {
- c = getopt(argc, argv, "hr");
+ c = getopt(argc, argv, "hrf");
if (c == -1)
break;
switch(c) {
help();
exit(0);
break;
+ case 'f':
+ fmt = argv[optind++];
+ break;
case 'r':
sparse = 0;
break;
}
}
- DFPRINTF("Creating file size %llu, name %s\n",(long long unsigned)size, filename);
- if (!backed)
- ret = qcow_create(filename,size,NULL,sparse);
- else
- ret = qcow_create(filename,size,bfilename,sparse);
+ tmpfile = backed ? bfilename: NULL;
+ if (!strcmp(fmt, "qcow")) {
+ ret = qcow_create(filename, size, tmpfile, sparse);
+ } else if(!strcmp(fmt, "qcow2")) {
+ ret = qcow2_create(filename, size, tmpfile, sparse);
+ } else {
+ fprintf(stderr,"Unsupport format:%s\n", fmt);
+ exit(-1);
+ }
+ DFPRINTF("Creating file size %llu, name %s\n",(long long unsigned)size, filename);
if (ret < 0)
DPRINTF("Unable to create QCOW file\n");
char handle[10]; /* xend handle, e.g. 'ram' */
int single_handler; /* is there a single controller for all */
/* instances of disk type? */
+ int use_ioemu; /* backend provider: 0 = tapdisk; 1 = ioemu */
+
#ifdef TAPDISK
struct tap_disk *drv;
#endif
extern struct tap_disk tapdisk_qcow;
extern struct tap_disk tapdisk_qcow2;
-#define MAX_DISK_TYPES 20
-
-#define DISK_TYPE_AIO 0
-#define DISK_TYPE_SYNC 1
-#define DISK_TYPE_VMDK 2
-#define DISK_TYPE_RAM 3
-#define DISK_TYPE_QCOW 4
-#define DISK_TYPE_QCOW2 5
-#define DISK_TYPE_IOEMU 6
-
/*Define Individual Disk Parameters here */
static disk_info_t aio_disk = {
"raw image (aio)",
"aio",
0,
+ 0,
#ifdef TAPDISK
&tapdisk_aio,
#endif
"raw image (sync)",
"sync",
0,
+ 0,
#ifdef TAPDISK
&tapdisk_sync,
#endif
"vmware image (vmdk)",
"vmdk",
1,
+ 0,
#ifdef TAPDISK
&tapdisk_vmdk,
#endif
"ramdisk image (ram)",
"ram",
1,
+ 0,
#ifdef TAPDISK
&tapdisk_ram,
#endif
"qcow disk (qcow)",
"qcow",
0,
+ 0,
#ifdef TAPDISK
&tapdisk_qcow,
#endif
"qcow2 disk (qcow2)",
"qcow2",
0,
+ 0,
#ifdef TAPDISK
&tapdisk_qcow2,
#endif
};
-static disk_info_t ioemu_disk = {
- DISK_TYPE_IOEMU,
- "ioemu disk",
- "ioemu",
- 1,
-#ifdef TAPDISK
- NULL
-#endif
-};
-
/*Main disk info array */
static disk_info_t *dtypes[] = {
&aio_disk,
&ram_disk,
&qcow_disk,
&qcow2_disk,
- &ioemu_disk,
};
typedef struct driver_list_entry {
int qcow_create(const char *filename, uint64_t total_size,
const char *backing_file, int flags);
+
+int qcow2_create(const char *filename, uint64_t total_size,
+ const char *backing_file, int flags);
#endif /*TAPDISK_H_*/
#define CTLMSG_PID 9
#define CTLMSG_PID_RSP 10
+/* disk driver types */
+#define MAX_DISK_TYPES 20
+
+#define DISK_TYPE_AIO 0
+#define DISK_TYPE_SYNC 1
+#define DISK_TYPE_VMDK 2
+#define DISK_TYPE_RAM 3
+#define DISK_TYPE_QCOW 4
+#define DISK_TYPE_QCOW2 5
+
/* xenstore/xenbus: */
#define DOMNAME "Domain-0"
int setup_probe_watch(struct xs_handle *h);
#include <poll.h>
#include <time.h>
#include <sys/time.h>
+#include <unistd.h>
#include "blktaplib.h"
#include "list.h"
#include "xs_api.h"
return 0;
}
+static int check_sharing(struct xs_handle *h, struct backend_info *be)
+{
+ char *dom_uuid;
+ char *cur_dom_uuid;
+ char *path;
+ char *mode;
+ char *params;
+ char **domains;
+ char **devices;
+ int i, j;
+ unsigned int num_dom, num_dev;
+ blkif_info_t *info;
+ int ret = 0;
+
+ /* If the mode contains '!' or doesn't contain 'w' don't check anything */
+ xs_gather(h, be->backpath, "mode", NULL, &mode, NULL);
+ if (strchr(mode, '!'))
+ goto out;
+ if (strchr(mode, 'w') == NULL)
+ goto out;
+
+ /* Get the UUID of the domain we want to attach to */
+ if (asprintf(&path, "/local/domain/%ld", be->frontend_id) == -1)
+ goto fail;
+ xs_gather(h, path, "vm", NULL, &dom_uuid, NULL);
+ free(path);
+
+ /* Iterate through the devices of all VMs */
+ domains = xs_directory(h, XBT_NULL, "backend/tap", &num_dom);
+ if (domains == NULL)
+ num_dom = 0;
+
+ for (i = 0; !ret && (i < num_dom); i++) {
+
+ /* If it's the same VM, no action needed */
+ if (asprintf(&path, "/local/domain/%s", domains[i]) == -1) {
+ ret = -1;
+ break;
+ }
+ xs_gather(h, path, "vm", NULL, &cur_dom_uuid, NULL);
+ free(path);
+
+ if (!strcmp(cur_dom_uuid, dom_uuid)) {
+ free(cur_dom_uuid);
+ continue;
+ }
+
+ /* Check the devices */
+ if (asprintf(&path, "backend/tap/%s", domains[i]) == -1) {
+ ret = -1;
+ free(cur_dom_uuid);
+ break;
+ }
+ devices = xs_directory(h, XBT_NULL, path, &num_dev);
+ if (devices == NULL)
+ num_dev = 0;
+ free(path);
+
+ for (j = 0; !ret && (j < num_dev); j++) {
+ if (asprintf(&path, "backend/tap/%s/%s", domains[i], devices[j]) == -1) {
+ ret = -1;
+ break;
+ }
+ xs_gather(h, path, "params", NULL, ¶ms, NULL);
+ free(path);
+
+ info = be->blkif->info;
+ if (strcmp(params, info->params)) {
+ ret = -1;
+ }
+
+ free(params);
+ }
+
+ free(cur_dom_uuid);
+ free(devices);
+ }
+ free(domains);
+ free(dom_uuid);
+ goto out;
+
+fail:
+ ret = -1;
+out:
+ free(mode);
+ return ret;
+}
+
+static int check_image(struct xs_handle *h, struct backend_info *be,
+ const char** errmsg)
+{
+ const char *tmp;
+ const char *path;
+ int mode;
+ blkif_t *blkif = be->blkif;
+ blkif_info_t *info = blkif->info;
+
+ /* Strip off the image type */
+ path = info->params;
+
+ if (!strncmp(path, "tapdisk:", strlen("tapdisk:"))) {
+ path += strlen("tapdisk:");
+ } else if (!strncmp(path, "ioemu:", strlen("ioemu:"))) {
+ path += strlen("ioemu:");
+ }
+
+ tmp = strchr(path, ':');
+ if (tmp != NULL)
+ path = tmp + 1;
+
+ /* Check if the image exists and access is permitted */
+ mode = R_OK;
+ if (!be->readonly)
+ mode |= W_OK;
+ if (access(path, mode)) {
+ if (errno == ENOENT)
+ *errmsg = "File not found.";
+ else
+ *errmsg = "Insufficient file permissions.";
+ return -1;
+ }
+
+ /* Check that the image is not attached to a different VM */
+ if (check_sharing(h, be)) {
+ *errmsg = "File already in use by other domain";
+ return -1;
+ }
+
+ return 0;
+}
+
static void ueblktap_setup(struct xs_handle *h, char *bepath)
{
struct backend_info *be;
int len, er, deverr;
long int pdev = 0, handle;
blkif_info_t *blk;
+ const char* errmsg = NULL;
be = be_lookup_be(bepath);
if (be == NULL)
be->pdev = pdev;
}
+ if (check_image(h, be, &errmsg))
+ goto fail;
+
er = blkif_init(be->blkif, handle, be->pdev, be->readonly);
if (er != 0) {
DPRINTF("Unable to open device %s\n",blk->params);
}
be->blkif->state = CONNECTED;
+ xs_printf(h, be->backpath, "hotplug-status", "connected");
+
DPRINTF("[SETUP] Complete\n\n");
goto close;
fail:
- if ( (be != NULL) && (be->blkif != NULL) )
+ if (be) {
+ if (errmsg == NULL)
+ errmsg = "Setting up the backend failed. See the log "
+ "files in /var/log/xen/ for details.";
+ xs_printf(h, be->backpath, "hotplug-error", errmsg);
+ xs_printf(h, be->backpath, "hotplug-status", "error");
+
backend_remove(h, be);
+ }
close:
if (path)
free(path);
len = strsep_len(bepath, '/', 7);
if (len < 0)
goto free_be;
- bepath[len] = '\0';
+ if (bepath[len] != '\0')
+ goto free_be;
be = malloc(sizeof(*be));
if (!be) {
.PHONY: clean
clean:
- $(RM) *.a *.so *.o *.rpm $(BIN)
+ $(RM) *.a *.so *.o *.rpm $(BIN) $(DEPS)
$(RM) client/*.o daemon/*.o
xenconsoled: $(patsubst %.c,%.o,$(wildcard daemon/*.c))
$(INSTALL_PROG) xenconsoled $(DESTDIR)/$(SBINDIR)
$(INSTALL_DIR) $(DESTDIR)$(PRIVATE_BINDIR)
$(INSTALL_PROG) xenconsole $(DESTDIR)$(PRIVATE_BINDIR)
+
+-include $(DEPS)
#include <err.h>
#include <errno.h>
#include <string.h>
+#ifdef __sun__
+#include <sys/stropts.h>
+#endif
#include "xs.h"
, program);
}
+#ifdef __sun__
+void cfmakeraw(struct termios *termios_p)
+{
+ termios_p->c_iflag &=
+ ~(IGNBRK|BRKINT|PARMRK|ISTRIP|INLCR|IGNCR|ICRNL|IXON);
+ termios_p->c_oflag &= ~OPOST;
+ termios_p->c_lflag &= ~(ECHO|ECHONL|ICANON|ISIG|IEXTEN);
+ termios_p->c_cflag &= ~(CSIZE|PARENB);
+ termios_p->c_cflag |= CS8;
+
+ termios_p->c_cc[VMIN] = 0;
+ termios_p->c_cc[VTIME] = 0;
+}
+#endif
+
static int get_pty_fd(struct xs_handle *xs, char *path, int seconds)
/* Check for a pty in xenstore, open it and return its fd.
* Assumes there is already a watch set in the store for this path. */
int xs_fd = xs_fileno(xs), pty_fd = -1;
int start, now;
unsigned int len = 0;
- char *pty_path, **watch_paths;;
+ char *pty_path, **watch_paths;
start = now = time(NULL);
do {
}
}
} while (pty_fd == -1 && (now = time(NULL)) < start + seconds);
+
+#ifdef __sun__
+ if (pty_fd != -1) {
+ struct termios term;
+
+ /*
+ * The pty may come from either xend (with pygrub) or
+ * xenconsoled. It may have tty semantics set up, or not.
+ * While it isn't strictly necessary to have those
+ * semantics here, it is good to have a consistent
+ * state that is the same as under Linux.
+ *
+ * If tcgetattr fails, they have not been set up,
+ * so go ahead and set them up now, by pushing the
+ * ptem and ldterm streams modules.
+ */
+ if (tcgetattr(pty_fd, &term) < 0) {
+ ioctl(pty_fd, I_PUSH, "ptem");
+ ioctl(pty_fd, I_PUSH, "ldterm");
+ }
+ }
+#endif
+
return pty_fd;
}
new_term = *old;
cfmakeraw(&new_term);
- tcsetattr(fd, TCSAFLUSH, &new_term);
+ tcsetattr(fd, TCSANOW, &new_term);
}
static void restore_term(int fd, struct termios *old)
{
- tcsetattr(fd, TCSAFLUSH, old);
+ tcsetattr(fd, TCSANOW, old);
}
static int console_loop(int fd, struct xs_handle *xs, char *pty_path)
if (FD_ISSET(xs_fileno(xs), &fds)) {
int newfd = get_pty_fd(xs, pty_path, 0);
- close(fd);
+ if (fd != -1)
+ close(fd);
if (newfd == -1)
/* Console PTY has become invalid */
return 0;
assert(dom->slave_fd == -1);
assert(dom->master_fd == -1);
- cfmakeraw(&term);
-
- if (openpty(&dom->master_fd, &dom->slave_fd, NULL, &term, NULL) < 0) {
+ if (openpty(&dom->master_fd, &dom->slave_fd, NULL, NULL, NULL) < 0) {
err = errno;
dolog(LOG_ERR, "Failed to create tty for domain-%d "
"(errno = %i, %s)",
return 0;
}
+ if (tcgetattr(dom->slave_fd, &term) < 0) {
+ err = errno;
+ dolog(LOG_ERR, "Failed to get tty attributes for domain-%d "
+ "(errno = %i, %s)",
+ dom->domid, err, strerror(err));
+ goto out;
+ }
+ cfmakeraw(&term);
+ if (tcsetattr(dom->slave_fd, TCSANOW, &term) < 0) {
+ err = errno;
+ dolog(LOG_ERR, "Failed to set tty attributes for domain-%d "
+ "(errno = %i, %s)",
+ dom->domid, err, strerror(err));
+ goto out;
+ }
+
if ((slave = ptsname(dom->master_fd)) == NULL) {
err = errno;
dolog(LOG_ERR, "Failed to get slave name for domain-%d "
version(argv[0]);
exit(0);
case 'v':
+#ifndef __sun__
syslog_option |= LOG_PERROR;
+#endif
syslog_mask = LOG_DEBUG;
break;
case 'i':
# -I. for config files.
# -I${srcdir} for our headers.
# -I$(srcdir)/../regformats for regdef.h.
-INCLUDE_CFLAGS = -I. -I${srcdir} -I$(srcdir)/../regformats -I$(INCLUDE_DIR) -I../../../../../libxc/
+INCLUDE_CFLAGS = -I. -I${srcdir} -I$(srcdir)/../regformats -I$(INCLUDE_DIR) -I../../../../../libxc/ -I../../../../../include/
# M{H,T}_CFLAGS, if defined, has host- and target-dependent CFLAGS
# from the config/ directory.
XEN_CONFIGS += xend-pci-quirks.sxp
XEN_CONFIGS += xend-pci-permissive.sxp
-# Xen script dir and scripts to go there.
-XEN_SCRIPT_DIR = /etc/xen/scripts
-XEN_SCRIPTS = network-bridge vif-bridge
-XEN_SCRIPTS += network-route vif-route
-XEN_SCRIPTS += network-nat vif-nat
-XEN_SCRIPTS += block
-XEN_SCRIPTS += block-enbd block-nbd
-XEN_SCRIPTS += blktap
-XEN_SCRIPTS += vtpm vtpm-delete
-XEN_SCRIPTS += xen-hotplug-cleanup
-XEN_SCRIPTS += external-device-migrate
-XEN_SCRIPTS += vscsi
-XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
-XEN_SCRIPT_DATA += xen-hotplug-common.sh xen-network-common.sh vif-common.sh
-XEN_SCRIPT_DATA += block-common.sh vtpm-common.sh vtpm-hotplug-common.sh
-XEN_SCRIPT_DATA += vtpm-migration.sh vtpm-impl
-
-XEN_HOTPLUG_DIR = /etc/hotplug
-XEN_HOTPLUG_SCRIPTS = xen-backend.agent
-
-UDEV_RULES_DIR = /etc/udev
-UDEV_RULES = xen-backend.rules
-
-DI = $(if $(DISTDIR),$(shell readlink -f $(DISTDIR)),)
-DE = $(if $(DESTDIR),$(shell readlink -f $(DESTDIR)),)
-ifeq ($(findstring $(DI),$(DE)),$(DI))
-HOTPLUGS=install-hotplug install-udev
-else
-ifeq ($(shell [ -x /usr/bin/udevinfo ] && [ `/usr/bin/udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/'` -ge 059 ] && echo 1),1)
-HOTPLUGS=install-udev
-else
-HOTPLUGS=install-hotplug
-endif
-endif
-
.PHONY: all
all:
build:
.PHONY: install
-install: all install-readmes install-initd install-configs install-scripts $(HOTPLUGS)
+install: all install-readmes install-configs $(HOTPLUGS)
.PHONY: install-readmes
install-readmes:
$(INSTALL_DATA) $$i $(DESTDIR)$(XEN_CONFIG_DIR); \
done
-.PHONY: install-initd
-install-initd:
- [ -d $(DESTDIR)/etc/init.d ] || $(INSTALL_DIR) $(DESTDIR)/etc/init.d
- [ -d $(DESTDIR)/etc/sysconfig ] || $(INSTALL_DIR) $(DESTDIR)/etc/sysconfig
- $(INSTALL_PROG) $(XEND_INITD) $(DESTDIR)/etc/init.d
- $(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)/etc/init.d
- $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)/etc/sysconfig/xendomains
-
.PHONY: install-configs
install-configs: $(XEN_CONFIGS)
[ -d $(DESTDIR)$(XEN_CONFIG_DIR) ] || \
$(INSTALL_DATA) $$i $(DESTDIR)$(XEN_CONFIG_DIR); \
done
-.PHONY: install-scripts
-install-scripts:
- [ -d $(DESTDIR)$(XEN_SCRIPT_DIR) ] || \
- $(INSTALL_DIR) $(DESTDIR)$(XEN_SCRIPT_DIR)
- set -e; for i in $(XEN_SCRIPTS); \
- do \
- $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \
- done
- set -e; for i in $(XEN_SCRIPT_DATA); \
- do \
- $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \
- done
-
.PHONY: install-hotplug
install-hotplug:
[ -d $(DESTDIR)$(XEN_HOTPLUG_DIR) ] || \
#(xend-relocation-server no)
(xend-relocation-server yes)
#(xend-relocation-ssl-server no)
+#(xend-udev-event-server no)
#(xend-unix-path /var/lib/xend/xend-socket)
#============================================================================
import os, re
+
+arch_libdir = 'lib'
arch = os.uname()[4]
-if re.search('64', arch):
+if os.uname()[0] == 'Linux' and re.search('64', arch):
arch_libdir = 'lib64'
-else:
- arch_libdir = 'lib'
#----------------------------------------------------------------------------
# Kernel image file.
# Set keyboard layout, default is en-us keyboard.
#keymap='ja'
+#-----------------------------------------------------------------------------
+# Enable/disable xen platform PCI device, default=1 (enabled)
+#xen_platform_pci=1
+
#-----------------------------------------------------------------------------
# Configure guest CPUID responses:
#
# Look like a generic 686 :
# cpuid = [ '0:eax=0x3,ebx=0x0,ecx=0x0,edx=0x0',
# '1:eax=0x06b1,
-# ecx=xxxxxxxxxx0000xx00xxx0000000xx0,
-# edx=xx00000xxxxxxx0xxxxxxxxx0xxxxxx',
+# ecx=xxxxxxxxxxx0000xx00xxx0000000xx0,
+# edx=xxx00000xxxxxxx0xxxxxxxxx0xxxxxx',
# '4:eax=0x3,ebx=0x0,ecx=0x0,edx=0x0',
# '0x80000000:eax=0x3,ebx=0x0,ecx=0x0,edx=0x0']
# with the highest leaf
# 'x' -> we don't care (do not check)
# 's' -> the bit must be the same as on the host that started this VM
+#-----------------------------------------------------------------------------
+# Configure passthrough PCI{,-X,e} devices:
+#
+# pci=[ '[SSSS:]BB:DD.F[,option1[,option2[...]]]', ... ]
+#
+# [SSSS]:BB:DD.F "bus segment:bus:device.function"(1) of the device to
+# be assigned, bus segment is optional. All fields are
+# in hexadecimal and no field should be longer than that
+# as shown in the pattern. Successful assignment may need
+# certain hardware support and additional configurations
+# (e.g. VT-d, see docs/misc/vtd.txt for more details).
+#
+# (1) bus segment is sometimes also referred to as the PCI "domain",
+# not to be confused with Xen domain.
+#
+#
+# optionN per-device options in "key=val" format. Current
+# available options are:
+# - msitranslate=0|1
+# per-device overriden of pci_msitranslate, see below
+# - power_mgmt=0|1
+# per-device overriden of pci_power_mgmt, see below
+#
+#pci=[ '07:00.0', '07:00.1' ]
+
+# MSI-INTx translation for MSI capable devices:
+#
+# If it's set, Xen will enable MSI for the device that supports it even
+# if the guest don't use MSI. In the case, an IO-APIC type interrupt will
+# be injected to the guest every time a corresponding MSI message is
+# received.
+# If the guest enables MSI or MSI-X, the translation is automatically
+# turned off.
+#
+#pci_msitranslate=1
+
+# PCI Power Management:
+#
+# If it's set, the guest OS will be able to program D0-D3hot states of the
+# PCI device for the purpose of low power consumption.
+#
+#pci_power_mgmt=0
#-----------------------------------------------------------------------------
# Configure PVSCSI devices:
disk = [ 'file:/var/images/min-el3-i386.img,hda,w', ',hdc:cdrom,r' ]
# Actual output via PVFB
-vfb = [ 'type=sdl' ]
+vfb = [ 'sdl=1' ]
#
# To create one using the SDL backend and sensible defaults:
#
-# vfb = [ 'type=sdl' ]
+# vfb = [ 'sdl=1' ]
#
# This uses environment variables XAUTHORITY and DISPLAY. You
# can override that:
#
-# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ]
+# vfb = [ 'sdl=1,xauthority=/home/bozo/.Xauthority,display=:1' ]
#
# To create one using the VNC backend and sensible defaults:
#
-# vfb = [ 'type=vnc' ]
+# vfb = [ 'vnc=1' ]
#
# The backend listens on 127.0.0.1 port 5900+N by default, where N is
# the domain ID. You can override both address and N:
#
-# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=1' ]
+# vfb = [ 'vnc=1,vnclisten=127.0.0.1,vncdisplay=1' ]
#
# Or you can bind the first unused port above 5900:
#
-# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vncunused=1' ]
+# vfb = [ 'vnc=1,vnclisten=0.0.0.0,vncunused=1' ]
#
# You can override the password:
#
-# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ]
+# vfb = [ 'vnc=1,vncpasswd=MYPASSWD' ]
#
# Empty password disables authentication. Defaults to the vncpasswd
# configured in xend-config.sxp.
#============================================================================
import os, re
-arch = os.uname()[4]
arch_libdir = 'lib'
+arch = os.uname()[4]
#----------------------------------------------------------------------------
# Kernel image file.
#
# To create one using the SDL backend and sensible defaults:
#
-# vfb = [ 'type=sdl' ]
+# vfb = [ 'sdl=1' ]
#
# This uses environment variables XAUTHORITY and DISPLAY. You
# can override that:
#
-# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ]
+# vfb = [ 'sdl=1,xauthority=/home/bozo/.Xauthority,display=:1' ]
#
# To create one using the VNC backend and sensible defaults:
#
-# vfb = [ 'type=vnc' ]
+# vfb = [ 'vnc=1' ]
#
# The backend listens on 127.0.0.1 port 5900+N by default, where N is
# the domain ID. You can override both address and N:
#
-# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=1' ]
+# vfb = [ 'vnc=1,vnclisten=127.0.0.1,vncdisplay=1' ]
#
# Or you can bind the first unused port above 5900:
#
-# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vncunused=1' ]
+# vfb = [ 'vnc=1,vnclisten=0.0.0.0,vncunused=1' ]
#
# You can override the password:
#
-# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ]
+# vfb = [ 'vnc=1,vncpasswd=MYPASSWD' ]
#
# Empty password disables authentication. Defaults to the vncpasswd
# configured in xend-config.sxp.
#
# To create one using the SDL backend and sensible defaults:
#
-# vfb = [ 'type=sdl' ]
+# vfb = [ 'sdl=1' ]
#
# This uses environment variables XAUTHORITY and DISPLAY. You
# can override that:
#
-# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ]
+# vfb = [ 'sdl=1,xauthority=/home/bozo/.Xauthority,display=:1' ]
#
# To create one using the VNC backend and sensible defaults:
#
-# vfb = [ 'type=vnc' ]
+# vfb = [ 'vnc=1' ]
#
# The backend listens on 127.0.0.1 port 5900+N by default, where N is
# the domain ID. You can override both address and N:
#
-# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=%d' % vmid ]
+# vfb = [ 'vnc=1,vnclisten=127.0.0.1,vncdisplay=%d' % vmid ]
#
# Or you can bind the first unused port above 5900:
#
-# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vncunused=1' ]
+# vfb = [ 'vnc=1,vnclisten=0.0.0.0,vncunused=1' ]
#
# You can override the password:
#
-# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ]
+# vfb = [ 'vnc=1,vncpasswd=MYPASSWD' ]
#
# Empty password disables authentication. Defaults to the vncpasswd
# configured in xend-config.sxp.
#
# To create one using the SDL backend and sensible defaults:
#
-# vfb = [ 'type=sdl' ]
+# vfb = [ 'sdl=1' ]
#
# This uses environment variables XAUTHORITY and DISPLAY. You
# can override that:
#
-# vfb = [ 'type=sdl,xauthority=/home/bozo/.Xauthority,display=:1' ]
+# vfb = [ 'sdl=1,xauthority=/home/bozo/.Xauthority,display=:1' ]
#
# To create one using the VNC backend and sensible defaults:
#
-# vfb = [ 'type=vnc' ]
+# vfb = [ 'vnc=1' ]
#
# The backend listens on 127.0.0.1 port 5900+N by default, where N is
# the domain ID. You can override both address and N:
#
-# vfb = [ 'type=vnc,vnclisten=127.0.0.1,vncdisplay=%d' % vmid ]
+# vfb = [ 'vnc=1,vnclisten=127.0.0.1,vncdisplay=%d' % vmid ]
#
# Or you can bind the first unused port above 5900:
#
-# vfb = [ 'type=vnc,vnclisten=0.0.0.0,vncunused=1' ]
+# vfb = [ 'vnc=1,vnclisten=0.0.0.0,vncunused=1' ]
#
# You can override the password:
#
-# vfb = [ 'type=vnc,vncpasswd=MYPASSWD' ]
+# vfb = [ 'vnc=1,vncpasswd=MYPASSWD' ]
#
# Empty password disables authentication. Defaults to the vncpasswd
# configured in xend-config.sxp.
include $(XEN_ROOT)/tools/Rules.mk
# hvmloader is a 32-bit protected mode binary.
-# It belongs in /usr/lib, not /usr/lib64.
TARGET := hvmloader/hvmloader
-INST_DIR := $(DESTDIR)/usr/lib/xen/boot
+INST_DIR := $(DESTDIR)$(LIBDIR_x86_32)/xen/boot
SUBDIRS :=
SUBDIRS += rombios
.PHONY: all
all:
@set -e; if [ $$((`( bcc -v 2>&1 | grep version || echo 0.0.0 ) | cut -d' ' -f 3 | awk -F. '{ printf "0x%02x%02x%02x", $$1, $$2, $$3}'`)) -lt $$((0x00100e)) ] ; then \
- echo "***********************************************************"; \
- echo "Require dev86 package version >= 0.16.14 to build firmware!"; \
- echo "(visit http://www.cix.co.uk/~mayday for more information)"; \
- echo "***********************************************************"; \
- else \
- $(MAKE) subdirs-$@; \
+ echo "==========================================================================="; \
+ echo "Require dev86 rpm or bin86 & bcc debs version >= 0.16.14 to build firmware!"; \
+ echo "(visit http://www.debath.co.uk/dev86/ for more information)"; \
+ echo "==========================================================================="; \
+ false ; \
fi
+ $(MAKE) subdirs-$@; \
.PHONY: install
override XEN_TARGET_ARCH = x86_32
# User-supplied CFLAGS are not useful here.
-CFLAGS :=
+CFLAGS =
include $(XEN_ROOT)/tools/Rules.mk
CFLAGS += -Werror
# Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS += $(call cc-option,$(CC),-nopie,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
# Extra CFLAGS suitable for an embedded type of environment.
CFLAGS += -fno-builtin -msoft-float
#include "../rombios/32bit/32bitbios_flat.h"
-static void relocate_32bitbios(char *elfarray, uint32_t elfarraysize)
+static uint32_t relocate_32bitbios(char *elfarray, uint32_t elfarraysize)
{
Elf32_Ehdr *ehdr = (Elf32_Ehdr *)elfarray;
Elf32_Shdr *shdr = (Elf32_Shdr *)&elfarray[ehdr->e_shoff];
- char *secstrings = &elfarray[shdr[ehdr->e_shstrndx].sh_offset];
- char *jump_table;
uint32_t reloc_off, reloc_size;
char *highbiosarea;
- int i, jump_sec_idx = 0;
+ int i;
/*
* Step 1. General elf cleanup, and compute total relocation size.
/* By default all section data points into elf image data array. */
shdr[i].sh_addr = (Elf32_Addr)&elfarray[shdr[i].sh_offset];
- if ( !strcmp(".biosjumptable", secstrings + shdr[i].sh_name) )
- {
- /* We do not relocate the BIOS jump table to high memory. */
- shdr[i].sh_flags &= ~SHF_ALLOC;
- jump_sec_idx = i;
- }
-
/* Fix up a corner case of address alignment. */
if ( shdr[i].sh_addralign == 0 )
shdr[i].sh_addralign = 1;
*/
reloc_size = reloc_off;
printf("%d bytes of ROMBIOS high-memory extensions:\n", reloc_size);
- highbiosarea = (char *)(long)e820_malloc(reloc_size, 0);
+ highbiosarea = mem_alloc(reloc_size, 0);
BUG_ON(highbiosarea == NULL);
printf(" Relocating to 0x%x-0x%x ... ",
(uint32_t)&highbiosarea[0],
}
}
- /* Step 5. Find the ROMBIOS jump-table stub and copy in the real table. */
- for ( jump_table = (char *)ROMBIOS_BEGIN;
- jump_table != (char *)ROMBIOS_END;
- jump_table++ )
- if ( !strncmp(jump_table, "___JMPT", 7) )
- break;
- BUG_ON(jump_table == NULL);
- BUG_ON(jump_sec_idx == 0);
- memcpy(jump_table, (char *)shdr[jump_sec_idx].sh_addr,
- shdr[jump_sec_idx].sh_size);
-
printf("done\n");
+
+ return (uint32_t)highbiosarea;
}
-void highbios_setup(void)
+uint32_t highbios_setup(void)
{
- relocate_32bitbios((char *)highbios_array, sizeof(highbios_array));
+ return relocate_32bitbios((char *)highbios_array, sizeof(highbios_array));
}
.PHONY: clean
clean: subdirs-clean
rm -f roms.h acpi.h
- rm -f hvmloader hvmloader.tmp *.o
+ rm -f hvmloader hvmloader.tmp *.o $(DEPS)
+
+-include $(DEPS)
H_SRC = $(wildcard *.h)
OBJS = $(patsubst %.c,%.o,$(C_SRC))
-IASL_VER = acpica-unix-20080729
-IASL_URL = http://acpica.org/download/$(IASL_VER).tar.gz
-
CFLAGS += -I. -I.. $(CFLAGS_include)
vpath iasl $(PATH)
iasl:
@echo
- @echo "ACPI ASL compiler(iasl) is needed"
- @echo "Download Intel ACPI CA"
- @echo "If wget failed, please download and compile manually from"
+ @echo "ACPI ASL compiler (iasl) is needed"
+ @echo "Download and install Intel ACPI CA from"
@echo "http://acpica.org/downloads/"
@echo
- wget $(IASL_URL)
- tar xzf $(IASL_VER).tar.gz
- make -C $(IASL_VER)/compiler
- $(INSTALL_PROG) $(IASL_VER)/compiler/iasl $(DESTDIR)$(BINDIR)/iasl
+ @exit 1
acpi.a: $(OBJS)
$(AR) rc $@ $(OBJS)
$(CC) $(CPPFLAGS) $(CFLAGS) -c -o $@ $<
clean:
- rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz
+ rm -rf *.a *.o $(IASL_VER) $(IASL_VER).tar.gz $(DEPS)
install: all
+
+-include $(DEPS)
p[checksum_offset] = -sum;
}
-static int uart_exists(uint16_t uart_base)
-{
- uint16_t ier = uart_base + 1;
- uint8_t a, b, c;
-
- a = inb(ier);
- outb(ier, 0);
- b = inb(ier);
- outb(ier, 0xf);
- c = inb(ier);
- outb(ier, a);
-
- return ((b == 0) && (c == 0xf));
-}
-
-static int hpet_exists(unsigned long hpet_base)
-{
- uint32_t hpet_id = *(uint32_t *)hpet_base;
- return ((hpet_id >> 16) == 0x8086);
-}
-
static uint8_t battery_port_exists(void)
{
return (inb(0x88) == 0x1F);
}
-static int construct_bios_info_table(uint8_t *buf)
-{
- struct bios_info *bios_info = (struct bios_info *)buf;
-
- memset(bios_info, 0, sizeof(*bios_info));
-
- bios_info->com1_present = uart_exists(0x3f8);
- bios_info->com2_present = uart_exists(0x2f8);
-
- bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
-
- bios_info->pci_min = PCI_MEMBASE;
- bios_info->pci_len = PCI_MEMSIZE;
- bios_info->xen_pfiob = 0xdead;
-
- return align16(sizeof(*bios_info));
-}
-
static int construct_madt(struct acpi_20_madt *madt)
{
struct acpi_20_madt_intsrcovr *intsrcovr;
offset += sizeof(*io_apic);
lapic = (struct acpi_20_madt_lapic *)(io_apic + 1);
- for ( i = 0; i < get_vcpu_nr(); i++ )
+ for ( i = 0; i < hvm_info->nr_vcpus; i++ )
{
memset(lapic, 0, sizeof(*lapic));
lapic->type = ACPI_PROCESSOR_LOCAL_APIC;
struct acpi_20_tcpa *tcpa;
static const uint16_t tis_signature[] = {0x0001, 0x0001, 0x0001};
uint16_t *tis_hdr;
+ void *lasa;
/* MADT. */
- if ( (get_vcpu_nr() > 1) || get_apic_mode() )
+ if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )
{
madt = (struct acpi_20_madt *)&buf[offset];
offset += construct_madt(madt);
tcpa->header.oem_revision = ACPI_OEM_REVISION;
tcpa->header.creator_id = ACPI_CREATOR_ID;
tcpa->header.creator_revision = ACPI_CREATOR_REVISION;
- tcpa->lasa = e820_malloc(ACPI_2_0_TCPA_LAML_SIZE, 0);
- if ( tcpa->lasa )
+ if ( (lasa = mem_alloc(ACPI_2_0_TCPA_LAML_SIZE, 0)) != NULL )
{
+ tcpa->lasa = virt_to_phys(lasa);
tcpa->laml = ACPI_2_0_TCPA_LAML_SIZE;
- memset((char *)(unsigned long)tcpa->lasa, 0, tcpa->laml);
+ memset(lasa, 0, tcpa->laml);
set_checksum(tcpa,
offsetof(struct acpi_header, checksum),
tcpa->header.length);
buf = (uint8_t *)ACPI_PHYSICAL_ADDRESS;
offset = 0;
- offset += construct_bios_info_table(&buf[offset]);
rsdp = (struct acpi_20_rsdp *)&buf[offset];
-
memcpy(rsdp, &Rsdp, sizeof(struct acpi_20_rsdp));
offset += align16(sizeof(struct acpi_20_rsdp));
rsdp->rsdt_address = (unsigned long)rsdt;
memset(buf, 0, high_sz);
/* Allocate data area and set up ACPI tables there. */
- buf = (uint8_t *)e820_malloc(high_sz, 0);
+ buf = mem_alloc(high_sz, 0);
__acpi_build_tables(buf, &low_sz, &high_sz);
printf(" - Lo data: %08lx-%08lx\n"
Scope (\_SB)
{
- /* ACPI_PHYSICAL_ADDRESS == 0xEA000 */
+ /* BIOS_INFO_PHYSICAL_ADDRESS == 0xEA000 */
OperationRegion(BIOS, SystemMemory, 0xEA000, 16)
Field(BIOS, ByteAcc, NoLock, Preserve) {
UAR1, 1,
Name (_ADR, 0x00)
Name (_BBN, 0x00)
+ /*
+ * Reserve the IO port ranges [0x10c0, 0x10c2] and [0xb044, 0xb047].
+ * Or else, for a hotplugged-in device, the port IO BAR assigned
+ * by guest OS may conflict with the ranges here.
+ */
+ Device(HP0)
+ {
+ Name(_HID, EISAID("PNP0C02"))
+ Name(_CRS, ResourceTemplate() {
+ IO (Decode16, 0x10c0, 0x10c0, 0x00, 0x03)
+ IO (Decode16, 0xb044, 0xb044, 0x00, 0x04)
+ })
+ }
+
Method (_CRS, 0, NotSerialized)
{
Name (PRT0, ResourceTemplate ()
Package(){0x000fffff, 1, \_SB.PCI0.LNKA, 0},
Package(){0x000fffff, 2, \_SB.PCI0.LNKB, 0},
Package(){0x000fffff, 3, \_SB.PCI0.LNKC, 0},
+
+ /* Device 16, INTA - INTD */
+ Package(){0x0010ffff, 0, \_SB.PCI0.LNKA, 0},
+ Package(){0x0010ffff, 1, \_SB.PCI0.LNKB, 0},
+ Package(){0x0010ffff, 2, \_SB.PCI0.LNKC, 0},
+ Package(){0x0010ffff, 3, \_SB.PCI0.LNKD, 0},
+
+ /* Device 17, INTA - INTD */
+ Package(){0x0011ffff, 0, \_SB.PCI0.LNKB, 0},
+ Package(){0x0011ffff, 1, \_SB.PCI0.LNKC, 0},
+ Package(){0x0011ffff, 2, \_SB.PCI0.LNKD, 0},
+ Package(){0x0011ffff, 3, \_SB.PCI0.LNKA, 0},
+
+ /* Device 18, INTA - INTD */
+ Package(){0x0012ffff, 0, \_SB.PCI0.LNKC, 0},
+ Package(){0x0012ffff, 1, \_SB.PCI0.LNKD, 0},
+ Package(){0x0012ffff, 2, \_SB.PCI0.LNKA, 0},
+ Package(){0x0012ffff, 3, \_SB.PCI0.LNKB, 0},
+
+ /* Device 19, INTA - INTD */
+ Package(){0x0013ffff, 0, \_SB.PCI0.LNKD, 0},
+ Package(){0x0013ffff, 1, \_SB.PCI0.LNKA, 0},
+ Package(){0x0013ffff, 2, \_SB.PCI0.LNKB, 0},
+ Package(){0x0013ffff, 3, \_SB.PCI0.LNKC, 0},
+
+ /* Device 20, INTA - INTD */
+ Package(){0x0014ffff, 0, \_SB.PCI0.LNKA, 0},
+ Package(){0x0014ffff, 1, \_SB.PCI0.LNKB, 0},
+ Package(){0x0014ffff, 2, \_SB.PCI0.LNKC, 0},
+ Package(){0x0014ffff, 3, \_SB.PCI0.LNKD, 0},
+
+ /* Device 21, INTA - INTD */
+ Package(){0x0015ffff, 0, \_SB.PCI0.LNKB, 0},
+ Package(){0x0015ffff, 1, \_SB.PCI0.LNKC, 0},
+ Package(){0x0015ffff, 2, \_SB.PCI0.LNKD, 0},
+ Package(){0x0015ffff, 3, \_SB.PCI0.LNKA, 0},
+
+ /* Device 22, INTA - INTD */
+ Package(){0x0016ffff, 0, \_SB.PCI0.LNKC, 0},
+ Package(){0x0016ffff, 1, \_SB.PCI0.LNKD, 0},
+ Package(){0x0016ffff, 2, \_SB.PCI0.LNKA, 0},
+ Package(){0x0016ffff, 3, \_SB.PCI0.LNKB, 0},
+
+ /* Device 23, INTA - INTD */
+ Package(){0x0017ffff, 0, \_SB.PCI0.LNKD, 0},
+ Package(){0x0017ffff, 1, \_SB.PCI0.LNKA, 0},
+ Package(){0x0017ffff, 2, \_SB.PCI0.LNKB, 0},
+ Package(){0x0017ffff, 3, \_SB.PCI0.LNKC, 0},
+
+ /* Device 24, INTA - INTD */
+ Package(){0x0018ffff, 0, \_SB.PCI0.LNKA, 0},
+ Package(){0x0018ffff, 1, \_SB.PCI0.LNKB, 0},
+ Package(){0x0018ffff, 2, \_SB.PCI0.LNKC, 0},
+ Package(){0x0018ffff, 3, \_SB.PCI0.LNKD, 0},
+
+ /* Device 25, INTA - INTD */
+ Package(){0x0019ffff, 0, \_SB.PCI0.LNKB, 0},
+ Package(){0x0019ffff, 1, \_SB.PCI0.LNKC, 0},
+ Package(){0x0019ffff, 2, \_SB.PCI0.LNKD, 0},
+ Package(){0x0019ffff, 3, \_SB.PCI0.LNKA, 0},
+
+ /* Device 26, INTA - INTD */
+ Package(){0x001affff, 0, \_SB.PCI0.LNKC, 0},
+ Package(){0x001affff, 1, \_SB.PCI0.LNKD, 0},
+ Package(){0x001affff, 2, \_SB.PCI0.LNKA, 0},
+ Package(){0x001affff, 3, \_SB.PCI0.LNKB, 0},
+
+ /* Device 27, INTA - INTD */
+ Package(){0x001bffff, 0, \_SB.PCI0.LNKD, 0},
+ Package(){0x001bffff, 1, \_SB.PCI0.LNKA, 0},
+ Package(){0x001bffff, 2, \_SB.PCI0.LNKB, 0},
+ Package(){0x001bffff, 3, \_SB.PCI0.LNKC, 0},
+
+ /* Device 28, INTA - INTD */
+ Package(){0x001cffff, 0, \_SB.PCI0.LNKA, 0},
+ Package(){0x001cffff, 1, \_SB.PCI0.LNKB, 0},
+ Package(){0x001cffff, 2, \_SB.PCI0.LNKC, 0},
+ Package(){0x001cffff, 3, \_SB.PCI0.LNKD, 0},
+
+ /* Device 29, INTA - INTD */
+ Package(){0x001dffff, 0, \_SB.PCI0.LNKB, 0},
+ Package(){0x001dffff, 1, \_SB.PCI0.LNKC, 0},
+ Package(){0x001dffff, 2, \_SB.PCI0.LNKD, 0},
+ Package(){0x001dffff, 3, \_SB.PCI0.LNKA, 0},
+
+ /* Device 30, INTA - INTD */
+ Package(){0x001effff, 0, \_SB.PCI0.LNKC, 0},
+ Package(){0x001effff, 1, \_SB.PCI0.LNKD, 0},
+ Package(){0x001effff, 2, \_SB.PCI0.LNKA, 0},
+ Package(){0x001effff, 3, \_SB.PCI0.LNKB, 0},
+
+ /* Device 31, INTA - INTD */
+ Package(){0x001fffff, 0, \_SB.PCI0.LNKD, 0},
+ Package(){0x001fffff, 1, \_SB.PCI0.LNKA, 0},
+ Package(){0x001fffff, 2, \_SB.PCI0.LNKB, 0},
+ Package(){0x001fffff, 3, \_SB.PCI0.LNKC, 0},
})
Name(PRTA, Package() {
Package(){0x000fffff, 1, 0, 46},
Package(){0x000fffff, 2, 0, 47},
Package(){0x000fffff, 3, 0, 16},
+
+ /* Device 16, INTA - INTD */
+ Package(){0x0010ffff, 0, 0, 18},
+ Package(){0x0010ffff, 1, 0, 19},
+ Package(){0x0010ffff, 2, 0, 20},
+ Package(){0x0010ffff, 3, 0, 21},
+
+ /* Device 17, INTA - INTD */
+ Package(){0x0011ffff, 0, 0, 22},
+ Package(){0x0011ffff, 1, 0, 23},
+ Package(){0x0011ffff, 2, 0, 24},
+ Package(){0x0011ffff, 3, 0, 25},
+
+ /* Device 18, INTA - INTD */
+ Package(){0x0012ffff, 0, 0, 26},
+ Package(){0x0012ffff, 1, 0, 27},
+ Package(){0x0012ffff, 2, 0, 28},
+ Package(){0x0012ffff, 3, 0, 29},
+
+ /* Device 19, INTA - INTD */
+ Package(){0x0013ffff, 0, 0, 30},
+ Package(){0x0013ffff, 1, 0, 31},
+ Package(){0x0013ffff, 2, 0, 32},
+ Package(){0x0013ffff, 3, 0, 33},
+
+ /* Device 20, INTA - INTD */
+ Package(){0x0014ffff, 0, 0, 34},
+ Package(){0x0014ffff, 1, 0, 35},
+ Package(){0x0014ffff, 2, 0, 36},
+ Package(){0x0014ffff, 3, 0, 37},
+
+ /* Device 21, INTA - INTD */
+ Package(){0x0015ffff, 0, 0, 38},
+ Package(){0x0015ffff, 1, 0, 39},
+ Package(){0x0015ffff, 2, 0, 40},
+ Package(){0x0015ffff, 3, 0, 41},
+
+ /* Device 22, INTA - INTD */
+ Package(){0x0016ffff, 0, 0, 42},
+ Package(){0x0016ffff, 1, 0, 43},
+ Package(){0x0016ffff, 2, 0, 44},
+ Package(){0x0016ffff, 3, 0, 45},
+
+ /* Device 23, INTA - INTD */
+ Package(){0x0017ffff, 0, 0, 46},
+ Package(){0x0017ffff, 1, 0, 47},
+ Package(){0x0017ffff, 2, 0, 16},
+ Package(){0x0017ffff, 3, 0, 17},
+
+ /* Device 24, INTA - INTD */
+ Package(){0x0018ffff, 0, 0, 19},
+ Package(){0x0018ffff, 1, 0, 20},
+ Package(){0x0018ffff, 2, 0, 21},
+ Package(){0x0018ffff, 3, 0, 22},
+
+ /* Device 25, INTA - INTD */
+ Package(){0x0019ffff, 0, 0, 23},
+ Package(){0x0019ffff, 1, 0, 24},
+ Package(){0x0019ffff, 2, 0, 25},
+ Package(){0x0019ffff, 3, 0, 26},
+
+ /* Device 26, INTA - INTD */
+ Package(){0x001affff, 0, 0, 27},
+ Package(){0x001affff, 1, 0, 28},
+ Package(){0x001affff, 2, 0, 29},
+ Package(){0x001affff, 3, 0, 30},
+
+ /* Device 27, INTA - INTD */
+ Package(){0x001bffff, 0, 0, 31},
+ Package(){0x001bffff, 1, 0, 32},
+ Package(){0x001bffff, 2, 0, 33},
+ Package(){0x001bffff, 3, 0, 34},
+
+ /* Device 28, INTA - INTD */
+ Package(){0x001cffff, 0, 0, 35},
+ Package(){0x001cffff, 1, 0, 36},
+ Package(){0x001cffff, 2, 0, 37},
+ Package(){0x001cffff, 3, 0, 38},
+
+ /* Device 29, INTA - INTD */
+ Package(){0x001dffff, 0, 0, 39},
+ Package(){0x001dffff, 1, 0, 40},
+ Package(){0x001dffff, 2, 0, 41},
+ Package(){0x001dffff, 3, 0, 42},
+
+ /* Device 30, INTA - INTD */
+ Package(){0x001effff, 0, 0, 43},
+ Package(){0x001effff, 1, 0, 44},
+ Package(){0x001effff, 2, 0, 45},
+ Package(){0x001effff, 3, 0, 46},
+
+ /* Device 31, INTA - INTD */
+ Package(){0x001fffff, 0, 0, 47},
+ Package(){0x001fffff, 1, 0, 16},
+ Package(){0x001fffff, 2, 0, 17},
+ Package(){0x001fffff, 3, 0, 18},
})
Device (ISA)
* handle the hotplug action and status, which is beyond the ACPI
* scope.
*/
+ Device(S00)
+ {
+ Name (_ADR, 0x00000000) /* Dev 0, Func 0 */
+ Name (_SUN, 0x00000000)
+
+ Method (_PS0, 0)
+ {
+ Store (0x00, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x00, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x00, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH00) /* eject php slot 0x00 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x00, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH00) /* IN status as the _STA */
+ }
+ }
- Device (S1F0)
+ Device(S01)
{
- Name (_ADR, 0x00060000) /* Dev 6, Func 0 */
+ Name (_ADR, 0x00010000) /* Dev 1, Func 0 */
Name (_SUN, 0x00000001)
Method (_PS0, 0)
{
+ Store (0x01, \_GPE.DPT1)
Store (0x80, \_GPE.DPT2)
}
Method (_PS3, 0)
{
+ Store (0x01, \_GPE.DPT1)
Store (0x83, \_GPE.DPT2)
}
Method (_EJ0, 1)
{
+ Store (0x01, \_GPE.DPT1)
Store (0x88, \_GPE.DPT2)
- Store (0x1, \_GPE.PHP1) /* eject php slot 1*/
+ Store (0x1, \_GPE.PH01) /* eject php slot 0x01 */
}
Method (_STA, 0)
{
+ Store (0x01, \_GPE.DPT1)
Store (0x89, \_GPE.DPT2)
- Return ( \_GPE.PHP1 ) /* IN status as the _STA */
+ Return (\_GPE.PH01) /* IN status as the _STA */
}
}
- Device (S2F0)
+ Device(S02)
{
- Name (_ADR, 0x00070000) /* Dev 7, Func 0 */
+ Name (_ADR, 0x00020000) /* Dev 2, Func 0 */
Name (_SUN, 0x00000002)
Method (_PS0, 0)
{
- Store (0x90, \_GPE.DPT2)
+ Store (0x02, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
}
Method (_PS3, 0)
{
- Store (0x93, \_GPE.DPT2)
+ Store (0x02, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
}
Method (_EJ0, 1)
{
- Store (0x98, \_GPE.DPT2)
- Store (0x1, \_GPE.PHP2) /* eject php slot 1*/
+ Store (0x02, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH02) /* eject php slot 0x02 */
}
Method (_STA, 0)
{
- Store (0x99, \_GPE.DPT2)
- Return ( \_GPE.PHP2 ) /* IN status as the _STA */
+ Store (0x02, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH02) /* IN status as the _STA */
}
}
- }
- }
- Scope (\_GPE)
- {
- OperationRegion (PHP, SystemIO, 0x10c0, 0x03)
- Field (PHP, ByteAcc, NoLock, Preserve)
- {
- PSTA, 8, /* hotplug controller status reg */
- PHP1, 8, /* hotplug slot 1 control reg */
- PHP2, 8 /* hotplug slot 2 control reg */
- }
- OperationRegion (DG1, SystemIO, 0xb044, 0x04)
- Field (DG1, ByteAcc, NoLock, Preserve)
- {
- DPT1, 8,
- DPT2, 8
- }
- Method (_L03, 0, NotSerialized)
- {
- /* detect slot and event(remove/add) */
- Name (SLT, 0x0)
- Name (EVT, 0x0)
- Store (PSTA, Local1)
- ShiftRight (Local1, 0x4, SLT)
- And (Local1, 0xf, EVT)
+ Device(S03)
+ {
+ Name (_ADR, 0x00030000) /* Dev 3, Func 0 */
+ Name (_SUN, 0x00000003)
- /* debug */
- Store (SLT, DPT1)
- Store (EVT, DPT2)
+ Method (_PS0, 0)
+ {
+ Store (0x03, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x03, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x03, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH03) /* eject php slot 0x03 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x03, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH03) /* IN status as the _STA */
+ }
+ }
+
+ Device(S04)
+ {
+ Name (_ADR, 0x00040000) /* Dev 4, Func 0 */
+ Name (_SUN, 0x00000004)
+
+ Method (_PS0, 0)
+ {
+ Store (0x04, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x04, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x04, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH04) /* eject php slot 0x04 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x04, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH04) /* IN status as the _STA */
+ }
+ }
+
+ Device(S05)
+ {
+ Name (_ADR, 0x00050000) /* Dev 5, Func 0 */
+ Name (_SUN, 0x00000005)
+
+ Method (_PS0, 0)
+ {
+ Store (0x05, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x05, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x05, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH05) /* eject php slot 0x05 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x05, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH05) /* IN status as the _STA */
+ }
+ }
+
+ Device(S06)
+ {
+ Name (_ADR, 0x00060000) /* Dev 6, Func 0 */
+ Name (_SUN, 0x00000006)
+
+ Method (_PS0, 0)
+ {
+ Store (0x06, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x06, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x06, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH06) /* eject php slot 0x06 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x06, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH06) /* IN status as the _STA */
+ }
+ }
+
+ Device(S07)
+ {
+ Name (_ADR, 0x00070000) /* Dev 7, Func 0 */
+ Name (_SUN, 0x00000007)
+
+ Method (_PS0, 0)
+ {
+ Store (0x07, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x07, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x07, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH07) /* eject php slot 0x07 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x07, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH07) /* IN status as the _STA */
+ }
+ }
+
+ Device(S08)
+ {
+ Name (_ADR, 0x00080000) /* Dev 8, Func 0 */
+ Name (_SUN, 0x00000008)
+
+ Method (_PS0, 0)
+ {
+ Store (0x08, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x08, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x08, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH08) /* eject php slot 0x08 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x08, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH08) /* IN status as the _STA */
+ }
+ }
- If ( LEqual(SLT, 0x1) )
+ Device(S09)
{
- Notify (\_SB.PCI0.S1F0, EVT)
+ Name (_ADR, 0x00090000) /* Dev 9, Func 0 */
+ Name (_SUN, 0x00000009)
+
+ Method (_PS0, 0)
+ {
+ Store (0x09, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x09, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x09, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH09) /* eject php slot 0x09 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x09, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH09) /* IN status as the _STA */
+ }
+ }
+
+ Device(S0A)
+ {
+ Name (_ADR, 0x000a0000) /* Dev 10, Func 0 */
+ Name (_SUN, 0x0000000a)
+
+ Method (_PS0, 0)
+ {
+ Store (0x0a, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x0a, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x0a, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH0A) /* eject php slot 0x0a */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x0a, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH0A) /* IN status as the _STA */
+ }
}
- ElseIf ( LEqual(SLT, 0x2) )
+
+ Device(S0B)
{
- Notify (\_SB.PCI0.S2F0, EVT)
+ Name (_ADR, 0x000b0000) /* Dev 11, Func 0 */
+ Name (_SUN, 0x0000000b)
+
+ Method (_PS0, 0)
+ {
+ Store (0x0b, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x0b, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x0b, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH0B) /* eject php slot 0x0b */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x0b, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH0B) /* IN status as the _STA */
+ }
+ }
+
+ Device(S0C)
+ {
+ Name (_ADR, 0x000c0000) /* Dev 12, Func 0 */
+ Name (_SUN, 0x0000000c)
+
+ Method (_PS0, 0)
+ {
+ Store (0x0c, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x0c, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x0c, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH0C) /* eject php slot 0x0c */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x0c, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH0C) /* IN status as the _STA */
+ }
+ }
+
+ Device(S0D)
+ {
+ Name (_ADR, 0x000d0000) /* Dev 13, Func 0 */
+ Name (_SUN, 0x0000000d)
+
+ Method (_PS0, 0)
+ {
+ Store (0x0d, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x0d, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x0d, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH0D) /* eject php slot 0x0d */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x0d, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH0D) /* IN status as the _STA */
+ }
+ }
+
+ Device(S0E)
+ {
+ Name (_ADR, 0x000e0000) /* Dev 14, Func 0 */
+ Name (_SUN, 0x0000000e)
+
+ Method (_PS0, 0)
+ {
+ Store (0x0e, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x0e, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x0e, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH0E) /* eject php slot 0x0e */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x0e, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH0E) /* IN status as the _STA */
+ }
+ }
+
+ Device(S0F)
+ {
+ Name (_ADR, 0x000f0000) /* Dev 15, Func 0 */
+ Name (_SUN, 0x0000000f)
+
+ Method (_PS0, 0)
+ {
+ Store (0x0f, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x0f, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x0f, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH0F) /* eject php slot 0x0f */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x0f, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH0F) /* IN status as the _STA */
+ }
+ }
+
+ Device(S10)
+ {
+ Name (_ADR, 0x00100000) /* Dev 16, Func 0 */
+ Name (_SUN, 0x00000010)
+
+ Method (_PS0, 0)
+ {
+ Store (0x10, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x10, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x10, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH10) /* eject php slot 0x10 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x10, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH10) /* IN status as the _STA */
+ }
+ }
+
+ Device(S11)
+ {
+ Name (_ADR, 0x00110000) /* Dev 17, Func 0 */
+ Name (_SUN, 0x00000011)
+
+ Method (_PS0, 0)
+ {
+ Store (0x11, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x11, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x11, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH11) /* eject php slot 0x11 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x11, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH11) /* IN status as the _STA */
+ }
+ }
+
+ Device(S12)
+ {
+ Name (_ADR, 0x00120000) /* Dev 18, Func 0 */
+ Name (_SUN, 0x00000012)
+
+ Method (_PS0, 0)
+ {
+ Store (0x12, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x12, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x12, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH12) /* eject php slot 0x12 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x12, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH12) /* IN status as the _STA */
+ }
+ }
+
+ Device(S13)
+ {
+ Name (_ADR, 0x00130000) /* Dev 19, Func 0 */
+ Name (_SUN, 0x00000013)
+
+ Method (_PS0, 0)
+ {
+ Store (0x13, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x13, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x13, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH13) /* eject php slot 0x13 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x13, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH13) /* IN status as the _STA */
+ }
+ }
+
+ Device(S14)
+ {
+ Name (_ADR, 0x00140000) /* Dev 20, Func 0 */
+ Name (_SUN, 0x00000014)
+
+ Method (_PS0, 0)
+ {
+ Store (0x14, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x14, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x14, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH14) /* eject php slot 0x14 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x14, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH14) /* IN status as the _STA */
+ }
+ }
+
+ Device(S15)
+ {
+ Name (_ADR, 0x00150000) /* Dev 21, Func 0 */
+ Name (_SUN, 0x00000015)
+
+ Method (_PS0, 0)
+ {
+ Store (0x15, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x15, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x15, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH15) /* eject php slot 0x15 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x15, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH15) /* IN status as the _STA */
+ }
+ }
+
+ Device(S16)
+ {
+ Name (_ADR, 0x00160000) /* Dev 22, Func 0 */
+ Name (_SUN, 0x00000016)
+
+ Method (_PS0, 0)
+ {
+ Store (0x16, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x16, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x16, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH16) /* eject php slot 0x16 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x16, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH16) /* IN status as the _STA */
+ }
+ }
+
+ Device(S17)
+ {
+ Name (_ADR, 0x00170000) /* Dev 23, Func 0 */
+ Name (_SUN, 0x00000017)
+
+ Method (_PS0, 0)
+ {
+ Store (0x17, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x17, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x17, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH17) /* eject php slot 0x17 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x17, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH17) /* IN status as the _STA */
+ }
+ }
+
+ Device(S18)
+ {
+ Name (_ADR, 0x00180000) /* Dev 24, Func 0 */
+ Name (_SUN, 0x00000018)
+
+ Method (_PS0, 0)
+ {
+ Store (0x18, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x18, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x18, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH18) /* eject php slot 0x18 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x18, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH18) /* IN status as the _STA */
+ }
+ }
+
+ Device(S19)
+ {
+ Name (_ADR, 0x00190000) /* Dev 25, Func 0 */
+ Name (_SUN, 0x00000019)
+
+ Method (_PS0, 0)
+ {
+ Store (0x19, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x19, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x19, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH19) /* eject php slot 0x19 */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x19, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH19) /* IN status as the _STA */
+ }
+ }
+
+ Device(S1A)
+ {
+ Name (_ADR, 0x001a0000) /* Dev 26, Func 0 */
+ Name (_SUN, 0x0000001a)
+
+ Method (_PS0, 0)
+ {
+ Store (0x1a, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x1a, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x1a, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH1A) /* eject php slot 0x1a */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x1a, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH1A) /* IN status as the _STA */
+ }
+ }
+
+ Device(S1B)
+ {
+ Name (_ADR, 0x001b0000) /* Dev 27, Func 0 */
+ Name (_SUN, 0x0000001b)
+
+ Method (_PS0, 0)
+ {
+ Store (0x1b, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x1b, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x1b, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH1B) /* eject php slot 0x1b */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x1b, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH1B) /* IN status as the _STA */
+ }
+ }
+
+ Device(S1C)
+ {
+ Name (_ADR, 0x001c0000) /* Dev 28, Func 0 */
+ Name (_SUN, 0x0000001c)
+
+ Method (_PS0, 0)
+ {
+ Store (0x1c, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x1c, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x1c, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH1C) /* eject php slot 0x1c */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x1c, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH1C) /* IN status as the _STA */
+ }
+ }
+
+ Device(S1D)
+ {
+ Name (_ADR, 0x001d0000) /* Dev 29, Func 0 */
+ Name (_SUN, 0x0000001d)
+
+ Method (_PS0, 0)
+ {
+ Store (0x1d, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x1d, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x1d, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH1D) /* eject php slot 0x1d */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x1d, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH1D) /* IN status as the _STA */
+ }
+ }
+
+ Device(S1E)
+ {
+ Name (_ADR, 0x001e0000) /* Dev 30, Func 0 */
+ Name (_SUN, 0x0000001e)
+
+ Method (_PS0, 0)
+ {
+ Store (0x1e, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x1e, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x1e, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH1E) /* eject php slot 0x1e */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x1e, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH1E) /* IN status as the _STA */
+ }
+ }
+
+ Device(S1F)
+ {
+ Name (_ADR, 0x001f0000) /* Dev 31, Func 0 */
+ Name (_SUN, 0x0000001f)
+
+ Method (_PS0, 0)
+ {
+ Store (0x1f, \_GPE.DPT1)
+ Store (0x80, \_GPE.DPT2)
+ }
+
+ Method (_PS3, 0)
+ {
+ Store (0x1f, \_GPE.DPT1)
+ Store (0x83, \_GPE.DPT2)
+ }
+
+ Method (_EJ0, 1)
+ {
+ Store (0x1f, \_GPE.DPT1)
+ Store (0x88, \_GPE.DPT2)
+ Store (0x1, \_GPE.PH1F) /* eject php slot 0x1f */
+ }
+
+ Method (_STA, 0)
+ {
+ Store (0x1f, \_GPE.DPT1)
+ Store (0x89, \_GPE.DPT2)
+ Return (\_GPE.PH1F) /* IN status as the _STA */
+ }
+ }
+ }
+ }
+
+ Scope (\_GPE)
+ {
+ OperationRegion (PHP, SystemIO, 0x10c0, 0x22)
+ Field (PHP, ByteAcc, NoLock, Preserve)
+ {
+ PSTA, 8, /* hotplug controller event reg */
+ PSTB, 8, /* hotplug controller slot reg */
+ PH00, 8, /* hotplug slot 0x00 control reg */
+ PH01, 8, /* hotplug slot 0x01 control reg */
+ PH02, 8, /* hotplug slot 0x02 control reg */
+ PH03, 8, /* hotplug slot 0x03 control reg */
+ PH04, 8, /* hotplug slot 0x04 control reg */
+ PH05, 8, /* hotplug slot 0x05 control reg */
+ PH06, 8, /* hotplug slot 0x06 control reg */
+ PH07, 8, /* hotplug slot 0x07 control reg */
+ PH08, 8, /* hotplug slot 0x08 control reg */
+ PH09, 8, /* hotplug slot 0x09 control reg */
+ PH0A, 8, /* hotplug slot 0x0a control reg */
+ PH0B, 8, /* hotplug slot 0x0b control reg */
+ PH0C, 8, /* hotplug slot 0x0c control reg */
+ PH0D, 8, /* hotplug slot 0x0d control reg */
+ PH0E, 8, /* hotplug slot 0x0e control reg */
+ PH0F, 8, /* hotplug slot 0x0f control reg */
+ PH10, 8, /* hotplug slot 0x10 control reg */
+ PH11, 8, /* hotplug slot 0x11 control reg */
+ PH12, 8, /* hotplug slot 0x12 control reg */
+ PH13, 8, /* hotplug slot 0x13 control reg */
+ PH14, 8, /* hotplug slot 0x14 control reg */
+ PH15, 8, /* hotplug slot 0x15 control reg */
+ PH16, 8, /* hotplug slot 0x16 control reg */
+ PH17, 8, /* hotplug slot 0x17 control reg */
+ PH18, 8, /* hotplug slot 0x18 control reg */
+ PH19, 8, /* hotplug slot 0x19 control reg */
+ PH1A, 8, /* hotplug slot 0x1a control reg */
+ PH1B, 8, /* hotplug slot 0x1b control reg */
+ PH1C, 8, /* hotplug slot 0x1c control reg */
+ PH1D, 8, /* hotplug slot 0x1d control reg */
+ PH1E, 8, /* hotplug slot 0x1e control reg */
+ PH1F, 8 /* hotplug slot 0x1f control reg */
+ }
+ OperationRegion (DG1, SystemIO, 0xb044, 0x04)
+ Field (DG1, ByteAcc, NoLock, Preserve)
+ {
+ DPT1, 8,
+ DPT2, 8
+ }
+ Method (_L03, 0, Serialized)
+ {
+ /* detect slot and event(remove/add) */
+ Name (SLT, 0x0)
+ Name (EVT, 0x0)
+ Store (PSTA, Local1)
+ And (Local1, 0xf, EVT)
+ Store (PSTB, Local1) /* XXX: Store (PSTB, SLT) ? */
+ And (Local1, 0xff, SLT)
+
+ /* debug */
+ Store (SLT, DPT1)
+ Store (EVT, DPT2)
+
+ Switch (SLT)
+ {
+ Case (0x00) {
+ Notify (\_SB.PCI0.S00, EVT)
+ }
+ Case (0x01) {
+ Notify (\_SB.PCI0.S01, EVT)
+ }
+ Case (0x02) {
+ Notify (\_SB.PCI0.S02, EVT)
+ }
+ Case (0x03) {
+ Notify (\_SB.PCI0.S03, EVT)
+ }
+ Case (0x04) {
+ Notify (\_SB.PCI0.S04, EVT)
+ }
+ Case (0x05) {
+ Notify (\_SB.PCI0.S05, EVT)
+ }
+ Case (0x06) {
+ Notify (\_SB.PCI0.S06, EVT)
+ }
+ Case (0x07) {
+ Notify (\_SB.PCI0.S07, EVT)
+ }
+ Case (0x08) {
+ Notify (\_SB.PCI0.S08, EVT)
+ }
+ Case (0x09) {
+ Notify (\_SB.PCI0.S09, EVT)
+ }
+ Case (0x0a) {
+ Notify (\_SB.PCI0.S0A, EVT)
+ }
+ Case (0x0b) {
+ Notify (\_SB.PCI0.S0B, EVT)
+ }
+ Case (0x0c) {
+ Notify (\_SB.PCI0.S0C, EVT)
+ }
+ Case (0x0d) {
+ Notify (\_SB.PCI0.S0D, EVT)
+ }
+ Case (0x0e) {
+ Notify (\_SB.PCI0.S0E, EVT)
+ }
+ Case (0x0f) {
+ Notify (\_SB.PCI0.S0F, EVT)
+ }
+ Case (0x10) {
+ Notify (\_SB.PCI0.S10, EVT)
+ }
+ Case (0x11) {
+ Notify (\_SB.PCI0.S11, EVT)
+ }
+ Case (0x12) {
+ Notify (\_SB.PCI0.S12, EVT)
+ }
+ Case (0x13) {
+ Notify (\_SB.PCI0.S13, EVT)
+ }
+ Case (0x14) {
+ Notify (\_SB.PCI0.S14, EVT)
+ }
+ Case (0x15) {
+ Notify (\_SB.PCI0.S15, EVT)
+ }
+ Case (0x16) {
+ Notify (\_SB.PCI0.S16, EVT)
+ }
+ Case (0x17) {
+ Notify (\_SB.PCI0.S17, EVT)
+ }
+ Case (0x18) {
+ Notify (\_SB.PCI0.S18, EVT)
+ }
+ Case (0x19) {
+ Notify (\_SB.PCI0.S19, EVT)
+ }
+ Case (0x1a) {
+ Notify (\_SB.PCI0.S1A, EVT)
+ }
+ Case (0x1b) {
+ Notify (\_SB.PCI0.S1B, EVT)
+ }
+ Case (0x1c) {
+ Notify (\_SB.PCI0.S1C, EVT)
+ }
+ Case (0x1d) {
+ Notify (\_SB.PCI0.S1D, EVT)
+ }
+ Case (0x1e) {
+ Notify (\_SB.PCI0.S1E, EVT)
+ }
+ Case (0x1f) {
+ Notify (\_SB.PCI0.S1F, EVT)
+ }
}
}
}
/*
*
* Intel ACPI Component Architecture
- * ASL Optimizing Compiler version 20060707 [Feb 16 2007]
- * Copyright (C) 2000 - 2006 Intel Corporation
+ * ASL Optimizing Compiler version 20090220 [Mar 9 2009]
+ * Copyright (C) 2000 - 2009 Intel Corporation
* Supports ACPI Specification Revision 3.0a
*
- * Compilation of "dsdt.asl" - Tue May 20 14:34:40 2008
+ * Compilation of "dsdt.asl" - Tue Mar 17 10:44:21 2009
*
* C source code output
*
*/
unsigned char AmlCode[] =
{
- 0x44,0x53,0x44,0x54,0x32,0x11,0x00,0x00, /* 00000000 "DSDT2..." */
- 0x02,0xEC,0x58,0x65,0x6E,0x00,0x00,0x00, /* 00000008 "..Xen..." */
+ 0x44,0x53,0x44,0x54,0x02,0x32,0x00,0x00, /* 00000000 "DSDT.2.." */
+ 0x02,0xC6,0x58,0x65,0x6E,0x00,0x00,0x00, /* 00000008 "..Xen..." */
0x48,0x56,0x4D,0x00,0x00,0x00,0x00,0x00, /* 00000010 "HVM....." */
0x00,0x00,0x00,0x00,0x49,0x4E,0x54,0x4C, /* 00000018 "....INTL" */
- 0x07,0x07,0x06,0x20,0x08,0x50,0x4D,0x42, /* 00000020 "... .PMB" */
+ 0x20,0x02,0x09,0x20,0x08,0x50,0x4D,0x42, /* 00000020 " .. .PMB" */
0x53,0x0B,0x00,0x0C,0x08,0x50,0x4D,0x4C, /* 00000028 "S....PML" */
0x4E,0x0A,0x08,0x08,0x49,0x4F,0x42,0x31, /* 00000030 "N...IOB1" */
0x00,0x08,0x49,0x4F,0x4C,0x31,0x00,0x08, /* 00000038 "..IOL1.." */
0x07,0x0A,0x07,0x00,0x00,0x08,0x50,0x49, /* 00000148 "......PI" */
0x43,0x44,0x00,0x14,0x0C,0x5F,0x50,0x49, /* 00000150 "CD..._PI" */
0x43,0x01,0x70,0x68,0x50,0x49,0x43,0x44, /* 00000158 "C.phPICD" */
- 0x10,0x42,0xF1,0x5F,0x53,0x42,0x5F,0x5B, /* 00000160 ".B._SB_[" */
- 0x80,0x42,0x49,0x4F,0x53,0x00,0x0C,0x00, /* 00000168 ".BIOS..." */
- 0xA0,0x0E,0x00,0x0A,0x10,0x5B,0x81,0x21, /* 00000170 ".....[.!" */
- 0x42,0x49,0x4F,0x53,0x01,0x55,0x41,0x52, /* 00000178 "BIOS.UAR" */
- 0x31,0x01,0x55,0x41,0x52,0x32,0x01,0x48, /* 00000180 "1.UAR2.H" */
- 0x50,0x45,0x54,0x01,0x00,0x1D,0x50,0x4D, /* 00000188 "PET...PM" */
- 0x49,0x4E,0x20,0x50,0x4C,0x45,0x4E,0x20, /* 00000190 "IN PLEN " */
- 0x5B,0x82,0x49,0x04,0x4D,0x45,0x4D,0x30, /* 00000198 "[.I.MEM0" */
- 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 000001A0 "._HID.A." */
- 0x0C,0x02,0x08,0x5F,0x43,0x52,0x53,0x11, /* 000001A8 "..._CRS." */
- 0x33,0x0A,0x30,0x8A,0x2B,0x00,0x00,0x0D, /* 000001B0 "3.0.+..." */
- 0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 000001B8 "........" */
+ 0x10,0x83,0xB7,0x02,0x5F,0x53,0x42,0x5F, /* 00000160 "...._SB_" */
+ 0x5B,0x80,0x42,0x49,0x4F,0x53,0x00,0x0C, /* 00000168 "[.BIOS.." */
+ 0x00,0xA0,0x0E,0x00,0x0A,0x10,0x5B,0x81, /* 00000170 "......[." */
+ 0x21,0x42,0x49,0x4F,0x53,0x01,0x55,0x41, /* 00000178 "!BIOS.UA" */
+ 0x52,0x31,0x01,0x55,0x41,0x52,0x32,0x01, /* 00000180 "R1.UAR2." */
+ 0x48,0x50,0x45,0x54,0x01,0x00,0x1D,0x50, /* 00000188 "HPET...P" */
+ 0x4D,0x49,0x4E,0x20,0x50,0x4C,0x45,0x4E, /* 00000190 "MIN PLEN" */
+ 0x20,0x5B,0x82,0x49,0x04,0x4D,0x45,0x4D, /* 00000198 " [.I.MEM" */
+ 0x30,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 000001A0 "0._HID.A" */
+ 0xD0,0x0C,0x02,0x08,0x5F,0x43,0x52,0x53, /* 000001A8 "...._CRS" */
+ 0x11,0x33,0x0A,0x30,0x8A,0x2B,0x00,0x00, /* 000001B0 ".3.0.+.." */
+ 0x0D,0x03,0x00,0x00,0x00,0x00,0x00,0x00, /* 000001B8 "........" */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 000001C0 "........" */
- 0x00,0xFF,0xFF,0x09,0x00,0x00,0x00,0x00, /* 000001C8 "........" */
+ 0x00,0x00,0xFF,0xFF,0x09,0x00,0x00,0x00, /* 000001C8 "........" */
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 000001D0 "........" */
- 0x00,0x00,0x00,0x0A,0x00,0x00,0x00,0x00, /* 000001D8 "........" */
- 0x00,0x79,0x00,0x5B,0x82,0x4E,0xE8,0x50, /* 000001E0 ".y.[.N.P" */
- 0x43,0x49,0x30,0x08,0x5F,0x48,0x49,0x44, /* 000001E8 "CI0._HID" */
- 0x0C,0x41,0xD0,0x0A,0x03,0x08,0x5F,0x55, /* 000001F0 ".A...._U" */
- 0x49,0x44,0x00,0x08,0x5F,0x41,0x44,0x52, /* 000001F8 "ID.._ADR" */
- 0x00,0x08,0x5F,0x42,0x42,0x4E,0x00,0x14, /* 00000200 ".._BBN.." */
- 0x4E,0x0C,0x5F,0x43,0x52,0x53,0x00,0x08, /* 00000208 "N._CRS.." */
- 0x50,0x52,0x54,0x30,0x11,0x42,0x07,0x0A, /* 00000210 "PRT0.B.." */
- 0x6E,0x88,0x0D,0x00,0x02,0x0E,0x00,0x00, /* 00000218 "n......." */
- 0x00,0x00,0x00,0xFF,0x00,0x00,0x00,0x00, /* 00000220 "........" */
- 0x01,0x47,0x01,0xF8,0x0C,0xF8,0x0C,0x01, /* 00000228 ".G......" */
- 0x08,0x88,0x0D,0x00,0x01,0x0C,0x03,0x00, /* 00000230 "........" */
- 0x00,0x00,0x00,0xF7,0x0C,0x00,0x00,0xF8, /* 00000238 "........" */
- 0x0C,0x88,0x0D,0x00,0x01,0x0C,0x03,0x00, /* 00000240 "........" */
- 0x00,0x00,0x0D,0xFF,0xFF,0x00,0x00,0x00, /* 00000248 "........" */
- 0xF3,0x87,0x17,0x00,0x00,0x0C,0x03,0x00, /* 00000250 "........" */
- 0x00,0x00,0x00,0x00,0x00,0x0A,0x00,0xFF, /* 00000258 "........" */
- 0xFF,0x0B,0x00,0x00,0x00,0x00,0x00,0x00, /* 00000260 "........" */
- 0x00,0x02,0x00,0x87,0x17,0x00,0x00,0x0C, /* 00000268 "........" */
- 0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 00000270 "........" */
- 0xF0,0xFF,0xFF,0xFF,0xF4,0x00,0x00,0x00, /* 00000278 "........" */
- 0x00,0x00,0x00,0x00,0x05,0x79,0x00,0x8A, /* 00000280 ".....y.." */
- 0x50,0x52,0x54,0x30,0x0A,0x5C,0x4D,0x4D, /* 00000288 "PRT0.\MM" */
- 0x49,0x4E,0x8A,0x50,0x52,0x54,0x30,0x0A, /* 00000290 "IN.PRT0." */
- 0x60,0x4D,0x4D,0x41,0x58,0x8A,0x50,0x52, /* 00000298 "`MMAX.PR" */
- 0x54,0x30,0x0A,0x68,0x4D,0x4C,0x45,0x4E, /* 000002A0 "T0.hMLEN" */
- 0x70,0x50,0x4D,0x49,0x4E,0x4D,0x4D,0x49, /* 000002A8 "pPMINMMI" */
- 0x4E,0x70,0x50,0x4C,0x45,0x4E,0x4D,0x4C, /* 000002B0 "NpPLENML" */
- 0x45,0x4E,0x72,0x4D,0x4D,0x49,0x4E,0x4D, /* 000002B8 "ENrMMINM" */
- 0x4C,0x45,0x4E,0x4D,0x4D,0x41,0x58,0x74, /* 000002C0 "LENMMAXt" */
- 0x4D,0x4D,0x41,0x58,0x01,0x4D,0x4D,0x41, /* 000002C8 "MMAX.MMA" */
- 0x58,0xA4,0x50,0x52,0x54,0x30,0x08,0x42, /* 000002D0 "X.PRT0.B" */
- 0x55,0x46,0x41,0x11,0x09,0x0A,0x06,0x23, /* 000002D8 "UFA....#" */
- 0x20,0x0C,0x18,0x79,0x00,0x08,0x42,0x55, /* 000002E0 " ..y..BU" */
- 0x46,0x42,0x11,0x09,0x0A,0x06,0x23,0x00, /* 000002E8 "FB....#." */
- 0x00,0x18,0x79,0x00,0x8B,0x42,0x55,0x46, /* 000002F0 "..y..BUF" */
- 0x42,0x01,0x49,0x52,0x51,0x56,0x5B,0x82, /* 000002F8 "B.IRQV[." */
- 0x48,0x08,0x4C,0x4E,0x4B,0x41,0x08,0x5F, /* 00000300 "H.LNKA._" */
- 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F, /* 00000308 "HID.A..." */
- 0x08,0x5F,0x55,0x49,0x44,0x01,0x14,0x1C, /* 00000310 "._UID..." */
- 0x5F,0x53,0x54,0x41,0x00,0x7B,0x50,0x49, /* 00000318 "_STA.{PI" */
- 0x52,0x41,0x0A,0x80,0x60,0xA0,0x08,0x93, /* 00000320 "RA..`..." */
- 0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1,0x04, /* 00000328 "`......." */
- 0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50,0x52, /* 00000330 "....._PR" */
- 0x53,0x00,0xA4,0x42,0x55,0x46,0x41,0x14, /* 00000338 "S..BUFA." */
- 0x11,0x5F,0x44,0x49,0x53,0x00,0x7D,0x50, /* 00000340 "._DIS.}P" */
- 0x49,0x52,0x41,0x0A,0x80,0x50,0x49,0x52, /* 00000348 "IRA..PIR" */
- 0x41,0x14,0x1A,0x5F,0x43,0x52,0x53,0x00, /* 00000350 "A.._CRS." */
- 0x7B,0x50,0x49,0x52,0x41,0x0A,0x0F,0x60, /* 00000358 "{PIRA..`" */
- 0x79,0x01,0x60,0x49,0x52,0x51,0x56,0xA4, /* 00000360 "y.`IRQV." */
- 0x42,0x55,0x46,0x42,0x14,0x1B,0x5F,0x53, /* 00000368 "BUFB.._S" */
- 0x52,0x53,0x01,0x8B,0x68,0x01,0x49,0x52, /* 00000370 "RS..h.IR" */
- 0x51,0x31,0x82,0x49,0x52,0x51,0x31,0x60, /* 00000378 "Q1.IRQ1`" */
- 0x76,0x60,0x70,0x60,0x50,0x49,0x52,0x41, /* 00000380 "v`p`PIRA" */
- 0x5B,0x82,0x49,0x08,0x4C,0x4E,0x4B,0x42, /* 00000388 "[.I.LNKB" */
- 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 00000390 "._HID.A." */
- 0x0C,0x0F,0x08,0x5F,0x55,0x49,0x44,0x0A, /* 00000398 "..._UID." */
- 0x02,0x14,0x1C,0x5F,0x53,0x54,0x41,0x00, /* 000003A0 "..._STA." */
- 0x7B,0x50,0x49,0x52,0x42,0x0A,0x80,0x60, /* 000003A8 "{PIRB..`" */
- 0xA0,0x08,0x93,0x60,0x0A,0x80,0xA4,0x0A, /* 000003B0 "...`...." */
- 0x09,0xA1,0x04,0xA4,0x0A,0x0B,0x14,0x0B, /* 000003B8 "........" */
- 0x5F,0x50,0x52,0x53,0x00,0xA4,0x42,0x55, /* 000003C0 "_PRS..BU" */
- 0x46,0x41,0x14,0x11,0x5F,0x44,0x49,0x53, /* 000003C8 "FA.._DIS" */
- 0x00,0x7D,0x50,0x49,0x52,0x42,0x0A,0x80, /* 000003D0 ".}PIRB.." */
- 0x50,0x49,0x52,0x42,0x14,0x1A,0x5F,0x43, /* 000003D8 "PIRB.._C" */
- 0x52,0x53,0x00,0x7B,0x50,0x49,0x52,0x42, /* 000003E0 "RS.{PIRB" */
- 0x0A,0x0F,0x60,0x79,0x01,0x60,0x49,0x52, /* 000003E8 "..`y.`IR" */
- 0x51,0x56,0xA4,0x42,0x55,0x46,0x42,0x14, /* 000003F0 "QV.BUFB." */
- 0x1B,0x5F,0x53,0x52,0x53,0x01,0x8B,0x68, /* 000003F8 "._SRS..h" */
- 0x01,0x49,0x52,0x51,0x31,0x82,0x49,0x52, /* 00000400 ".IRQ1.IR" */
- 0x51,0x31,0x60,0x76,0x60,0x70,0x60,0x50, /* 00000408 "Q1`v`p`P" */
- 0x49,0x52,0x42,0x5B,0x82,0x49,0x08,0x4C, /* 00000410 "IRB[.I.L" */
- 0x4E,0x4B,0x43,0x08,0x5F,0x48,0x49,0x44, /* 00000418 "NKC._HID" */
- 0x0C,0x41,0xD0,0x0C,0x0F,0x08,0x5F,0x55, /* 00000420 ".A...._U" */
- 0x49,0x44,0x0A,0x03,0x14,0x1C,0x5F,0x53, /* 00000428 "ID...._S" */
- 0x54,0x41,0x00,0x7B,0x50,0x49,0x52,0x43, /* 00000430 "TA.{PIRC" */
- 0x0A,0x80,0x60,0xA0,0x08,0x93,0x60,0x0A, /* 00000438 "..`...`." */
- 0x80,0xA4,0x0A,0x09,0xA1,0x04,0xA4,0x0A, /* 00000440 "........" */
- 0x0B,0x14,0x0B,0x5F,0x50,0x52,0x53,0x00, /* 00000448 "..._PRS." */
- 0xA4,0x42,0x55,0x46,0x41,0x14,0x11,0x5F, /* 00000450 ".BUFA.._" */
- 0x44,0x49,0x53,0x00,0x7D,0x50,0x49,0x52, /* 00000458 "DIS.}PIR" */
- 0x43,0x0A,0x80,0x50,0x49,0x52,0x43,0x14, /* 00000460 "C..PIRC." */
- 0x1A,0x5F,0x43,0x52,0x53,0x00,0x7B,0x50, /* 00000468 "._CRS.{P" */
- 0x49,0x52,0x43,0x0A,0x0F,0x60,0x79,0x01, /* 00000470 "IRC..`y." */
- 0x60,0x49,0x52,0x51,0x56,0xA4,0x42,0x55, /* 00000478 "`IRQV.BU" */
- 0x46,0x42,0x14,0x1B,0x5F,0x53,0x52,0x53, /* 00000480 "FB.._SRS" */
- 0x01,0x8B,0x68,0x01,0x49,0x52,0x51,0x31, /* 00000488 "..h.IRQ1" */
- 0x82,0x49,0x52,0x51,0x31,0x60,0x76,0x60, /* 00000490 ".IRQ1`v`" */
- 0x70,0x60,0x50,0x49,0x52,0x43,0x5B,0x82, /* 00000498 "p`PIRC[." */
- 0x49,0x08,0x4C,0x4E,0x4B,0x44,0x08,0x5F, /* 000004A0 "I.LNKD._" */
- 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F, /* 000004A8 "HID.A..." */
- 0x08,0x5F,0x55,0x49,0x44,0x0A,0x04,0x14, /* 000004B0 "._UID..." */
- 0x1C,0x5F,0x53,0x54,0x41,0x00,0x7B,0x50, /* 000004B8 "._STA.{P" */
- 0x49,0x52,0x44,0x0A,0x80,0x60,0xA0,0x08, /* 000004C0 "IRD..`.." */
- 0x93,0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1, /* 000004C8 ".`......" */
- 0x04,0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50, /* 000004D0 "......_P" */
- 0x52,0x53,0x00,0xA4,0x42,0x55,0x46,0x41, /* 000004D8 "RS..BUFA" */
- 0x14,0x11,0x5F,0x44,0x49,0x53,0x00,0x7D, /* 000004E0 ".._DIS.}" */
- 0x50,0x49,0x52,0x44,0x0A,0x80,0x50,0x49, /* 000004E8 "PIRD..PI" */
- 0x52,0x44,0x14,0x1A,0x5F,0x43,0x52,0x53, /* 000004F0 "RD.._CRS" */
- 0x00,0x7B,0x50,0x49,0x52,0x44,0x0A,0x0F, /* 000004F8 ".{PIRD.." */
- 0x60,0x79,0x01,0x60,0x49,0x52,0x51,0x56, /* 00000500 "`y.`IRQV" */
- 0xA4,0x42,0x55,0x46,0x42,0x14,0x1B,0x5F, /* 00000508 ".BUFB.._" */
- 0x53,0x52,0x53,0x01,0x8B,0x68,0x01,0x49, /* 00000510 "SRS..h.I" */
- 0x52,0x51,0x31,0x82,0x49,0x52,0x51,0x31, /* 00000518 "RQ1.IRQ1" */
- 0x60,0x76,0x60,0x70,0x60,0x50,0x49,0x52, /* 00000520 "`v`p`PIR" */
- 0x44,0x5B,0x82,0x44,0x05,0x48,0x50,0x45, /* 00000528 "D[.D.HPE" */
- 0x54,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 00000530 "T._HID.A" */
- 0xD0,0x01,0x03,0x08,0x5F,0x55,0x49,0x44, /* 00000538 "...._UID" */
- 0x00,0x14,0x18,0x5F,0x53,0x54,0x41,0x00, /* 00000540 "..._STA." */
- 0xA0,0x0C,0x93,0x5E,0x5E,0x5E,0x48,0x50, /* 00000548 "...^^^HP" */
- 0x45,0x54,0x00,0xA4,0x00,0xA1,0x04,0xA4, /* 00000550 "ET......" */
- 0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53,0x11, /* 00000558 "..._CRS." */
- 0x1F,0x0A,0x1C,0x87,0x17,0x00,0x00,0x0D, /* 00000560 "........" */
- 0x01,0x00,0x00,0x00,0x00,0x00,0x00,0xD0, /* 00000568 "........" */
- 0xFE,0xFF,0x03,0xD0,0xFE,0x00,0x00,0x00, /* 00000570 "........" */
- 0x00,0x00,0x04,0x00,0x00,0x79,0x00,0x14, /* 00000578 ".....y.." */
- 0x16,0x5F,0x50,0x52,0x54,0x00,0xA0,0x0A, /* 00000580 "._PRT..." */
- 0x50,0x49,0x43,0x44,0xA4,0x50,0x52,0x54, /* 00000588 "PICD.PRT" */
- 0x41,0xA4,0x50,0x52,0x54,0x50,0x08,0x50, /* 00000590 "A.PRTP.P" */
- 0x52,0x54,0x50,0x12,0x49,0x36,0x3C,0x12, /* 00000598 "RTP.I6<." */
- 0x0D,0x04,0x0C,0xFF,0xFF,0x01,0x00,0x00, /* 000005A0 "........" */
- 0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0D,0x04, /* 000005A8 "LNKB...." */
- 0x0C,0xFF,0xFF,0x01,0x00,0x01,0x4C,0x4E, /* 000005B0 "......LN" */
- 0x4B,0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 000005B8 "KC......" */
- 0xFF,0x01,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 000005C0 ".....LNK" */
- 0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 000005C8 "D......." */
- 0x01,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x41, /* 000005D0 "....LNKA" */
- 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x02, /* 000005D8 "........" */
- 0x00,0x00,0x4C,0x4E,0x4B,0x43,0x00,0x12, /* 000005E0 "..LNKC.." */
- 0x0D,0x04,0x0C,0xFF,0xFF,0x02,0x00,0x01, /* 000005E8 "........" */
- 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04, /* 000005F0 "LNKD...." */
- 0x0C,0xFF,0xFF,0x02,0x00,0x0A,0x02,0x4C, /* 000005F8 ".......L" */
- 0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C, /* 00000600 "NKA....." */
- 0xFF,0xFF,0x02,0x00,0x0A,0x03,0x4C,0x4E, /* 00000608 "......LN" */
- 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000610 "KB......" */
- 0xFF,0x03,0x00,0x00,0x4C,0x4E,0x4B,0x44, /* 00000618 "....LNKD" */
- 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x03, /* 00000620 "........" */
- 0x00,0x01,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 00000628 "..LNKA.." */
- 0x0E,0x04,0x0C,0xFF,0xFF,0x03,0x00,0x0A, /* 00000630 "........" */
- 0x02,0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0E, /* 00000638 ".LNKB..." */
- 0x04,0x0C,0xFF,0xFF,0x03,0x00,0x0A,0x03, /* 00000640 "........" */
- 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 00000648 "LNKC...." */
- 0x0C,0xFF,0xFF,0x04,0x00,0x00,0x4C,0x4E, /* 00000650 "......LN" */
- 0x4B,0x41,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000658 "KA......" */
- 0xFF,0x04,0x00,0x01,0x4C,0x4E,0x4B,0x42, /* 00000660 "....LNKB" */
- 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x04, /* 00000668 "........" */
- 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x43,0x00, /* 00000670 "...LNKC." */
- 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x04,0x00, /* 00000678 "........" */
- 0x0A,0x03,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 00000680 "..LNKD.." */
- 0x0D,0x04,0x0C,0xFF,0xFF,0x05,0x00,0x00, /* 00000688 "........" */
- 0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0D,0x04, /* 00000690 "LNKB...." */
- 0x0C,0xFF,0xFF,0x05,0x00,0x01,0x4C,0x4E, /* 00000698 "......LN" */
- 0x4B,0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 000006A0 "KC......" */
- 0xFF,0x05,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 000006A8 ".....LNK" */
- 0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 000006B0 "D......." */
- 0x05,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x41, /* 000006B8 "....LNKA" */
- 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x06, /* 000006C0 "........" */
- 0x00,0x00,0x4C,0x4E,0x4B,0x43,0x00,0x12, /* 000006C8 "..LNKC.." */
- 0x0D,0x04,0x0C,0xFF,0xFF,0x06,0x00,0x01, /* 000006D0 "........" */
- 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04, /* 000006D8 "LNKD...." */
- 0x0C,0xFF,0xFF,0x06,0x00,0x0A,0x02,0x4C, /* 000006E0 ".......L" */
- 0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C, /* 000006E8 "NKA....." */
- 0xFF,0xFF,0x06,0x00,0x0A,0x03,0x4C,0x4E, /* 000006F0 "......LN" */
- 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 000006F8 "KB......" */
- 0xFF,0x07,0x00,0x00,0x4C,0x4E,0x4B,0x44, /* 00000700 "....LNKD" */
- 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x07, /* 00000708 "........" */
- 0x00,0x01,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 00000710 "..LNKA.." */
- 0x0E,0x04,0x0C,0xFF,0xFF,0x07,0x00,0x0A, /* 00000718 "........" */
- 0x02,0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0E, /* 00000720 ".LNKB..." */
- 0x04,0x0C,0xFF,0xFF,0x07,0x00,0x0A,0x03, /* 00000728 "........" */
- 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 00000730 "LNKC...." */
- 0x0C,0xFF,0xFF,0x08,0x00,0x00,0x4C,0x4E, /* 00000738 "......LN" */
- 0x4B,0x41,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000740 "KA......" */
- 0xFF,0x08,0x00,0x01,0x4C,0x4E,0x4B,0x42, /* 00000748 "....LNKB" */
- 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x08, /* 00000750 "........" */
- 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x43,0x00, /* 00000758 "...LNKC." */
- 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x08,0x00, /* 00000760 "........" */
- 0x0A,0x03,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 00000768 "..LNKD.." */
- 0x0D,0x04,0x0C,0xFF,0xFF,0x09,0x00,0x00, /* 00000770 "........" */
- 0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0D,0x04, /* 00000778 "LNKB...." */
- 0x0C,0xFF,0xFF,0x09,0x00,0x01,0x4C,0x4E, /* 00000780 "......LN" */
- 0x4B,0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000788 "KC......" */
- 0xFF,0x09,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 00000790 ".....LNK" */
- 0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000798 "D......." */
- 0x09,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x41, /* 000007A0 "....LNKA" */
- 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0A, /* 000007A8 "........" */
- 0x00,0x00,0x4C,0x4E,0x4B,0x43,0x00,0x12, /* 000007B0 "..LNKC.." */
- 0x0D,0x04,0x0C,0xFF,0xFF,0x0A,0x00,0x01, /* 000007B8 "........" */
- 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04, /* 000007C0 "LNKD...." */
- 0x0C,0xFF,0xFF,0x0A,0x00,0x0A,0x02,0x4C, /* 000007C8 ".......L" */
- 0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C, /* 000007D0 "NKA....." */
- 0xFF,0xFF,0x0A,0x00,0x0A,0x03,0x4C,0x4E, /* 000007D8 "......LN" */
- 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 000007E0 "KB......" */
- 0xFF,0x0B,0x00,0x00,0x4C,0x4E,0x4B,0x44, /* 000007E8 "....LNKD" */
- 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0B, /* 000007F0 "........" */
- 0x00,0x01,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 000007F8 "..LNKA.." */
- 0x0E,0x04,0x0C,0xFF,0xFF,0x0B,0x00,0x0A, /* 00000800 "........" */
- 0x02,0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0E, /* 00000808 ".LNKB..." */
- 0x04,0x0C,0xFF,0xFF,0x0B,0x00,0x0A,0x03, /* 00000810 "........" */
- 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 00000818 "LNKC...." */
- 0x0C,0xFF,0xFF,0x0C,0x00,0x00,0x4C,0x4E, /* 00000820 "......LN" */
- 0x4B,0x41,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000828 "KA......" */
- 0xFF,0x0C,0x00,0x01,0x4C,0x4E,0x4B,0x42, /* 00000830 "....LNKB" */
- 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x0C, /* 00000838 "........" */
- 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x43,0x00, /* 00000840 "...LNKC." */
- 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x0C,0x00, /* 00000848 "........" */
- 0x0A,0x03,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 00000850 "..LNKD.." */
- 0x0D,0x04,0x0C,0xFF,0xFF,0x0D,0x00,0x00, /* 00000858 "........" */
- 0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0D,0x04, /* 00000860 "LNKB...." */
- 0x0C,0xFF,0xFF,0x0D,0x00,0x01,0x4C,0x4E, /* 00000868 "......LN" */
- 0x4B,0x43,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000870 "KC......" */
- 0xFF,0x0D,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 00000878 ".....LNK" */
- 0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000880 "D......." */
- 0x0D,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x41, /* 00000888 "....LNKA" */
- 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0E, /* 00000890 "........" */
- 0x00,0x00,0x4C,0x4E,0x4B,0x43,0x00,0x12, /* 00000898 "..LNKC.." */
- 0x0D,0x04,0x0C,0xFF,0xFF,0x0E,0x00,0x01, /* 000008A0 "........" */
- 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0E,0x04, /* 000008A8 "LNKD...." */
- 0x0C,0xFF,0xFF,0x0E,0x00,0x0A,0x02,0x4C, /* 000008B0 ".......L" */
- 0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04,0x0C, /* 000008B8 "NKA....." */
- 0xFF,0xFF,0x0E,0x00,0x0A,0x03,0x4C,0x4E, /* 000008C0 "......LN" */
- 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 000008C8 "KB......" */
- 0xFF,0x0F,0x00,0x00,0x4C,0x4E,0x4B,0x44, /* 000008D0 "....LNKD" */
- 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0F, /* 000008D8 "........" */
- 0x00,0x01,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 000008E0 "..LNKA.." */
- 0x0E,0x04,0x0C,0xFF,0xFF,0x0F,0x00,0x0A, /* 000008E8 "........" */
- 0x02,0x4C,0x4E,0x4B,0x42,0x00,0x12,0x0E, /* 000008F0 ".LNKB..." */
- 0x04,0x0C,0xFF,0xFF,0x0F,0x00,0x0A,0x03, /* 000008F8 "........" */
- 0x4C,0x4E,0x4B,0x43,0x00,0x08,0x50,0x52, /* 00000900 "LNKC..PR" */
- 0x54,0x41,0x12,0x41,0x2F,0x3C,0x12,0x0B, /* 00000908 "TA.A/<.." */
- 0x04,0x0C,0xFF,0xFF,0x01,0x00,0x00,0x00, /* 00000910 "........" */
- 0x0A,0x14,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000918 "........" */
- 0x01,0x00,0x01,0x00,0x0A,0x15,0x12,0x0C, /* 00000920 "........" */
- 0x04,0x0C,0xFF,0xFF,0x01,0x00,0x0A,0x02, /* 00000928 "........" */
- 0x00,0x0A,0x16,0x12,0x0C,0x04,0x0C,0xFF, /* 00000930 "........" */
- 0xFF,0x01,0x00,0x0A,0x03,0x00,0x0A,0x17, /* 00000938 "........" */
- 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x02,0x00, /* 00000940 "........" */
- 0x00,0x00,0x0A,0x18,0x12,0x0B,0x04,0x0C, /* 00000948 "........" */
- 0xFF,0xFF,0x02,0x00,0x01,0x00,0x0A,0x19, /* 00000950 "........" */
- 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x02,0x00, /* 00000958 "........" */
- 0x0A,0x02,0x00,0x0A,0x1A,0x12,0x0C,0x04, /* 00000960 "........" */
- 0x0C,0xFF,0xFF,0x02,0x00,0x0A,0x03,0x00, /* 00000968 "........" */
- 0x0A,0x1B,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000970 "........" */
- 0x03,0x00,0x00,0x00,0x0A,0x1C,0x12,0x0B, /* 00000978 "........" */
- 0x04,0x0C,0xFF,0xFF,0x03,0x00,0x01,0x00, /* 00000980 "........" */
- 0x0A,0x1D,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000988 "........" */
- 0x03,0x00,0x0A,0x02,0x00,0x0A,0x1E,0x12, /* 00000990 "........" */
- 0x0C,0x04,0x0C,0xFF,0xFF,0x03,0x00,0x0A, /* 00000998 "........" */
- 0x03,0x00,0x0A,0x1F,0x12,0x0B,0x04,0x0C, /* 000009A0 "........" */
- 0xFF,0xFF,0x04,0x00,0x00,0x00,0x0A,0x20, /* 000009A8 "....... " */
- 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x04,0x00, /* 000009B0 "........" */
- 0x01,0x00,0x0A,0x21,0x12,0x0C,0x04,0x0C, /* 000009B8 "...!...." */
- 0xFF,0xFF,0x04,0x00,0x0A,0x02,0x00,0x0A, /* 000009C0 "........" */
- 0x22,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x04, /* 000009C8 ""......." */
- 0x00,0x0A,0x03,0x00,0x0A,0x23,0x12,0x0B, /* 000009D0 ".....#.." */
- 0x04,0x0C,0xFF,0xFF,0x05,0x00,0x00,0x00, /* 000009D8 "........" */
- 0x0A,0x24,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 000009E0 ".$......" */
- 0x05,0x00,0x01,0x00,0x0A,0x25,0x12,0x0C, /* 000009E8 ".....%.." */
- 0x04,0x0C,0xFF,0xFF,0x05,0x00,0x0A,0x02, /* 000009F0 "........" */
- 0x00,0x0A,0x26,0x12,0x0C,0x04,0x0C,0xFF, /* 000009F8 "..&....." */
- 0xFF,0x05,0x00,0x0A,0x03,0x00,0x0A,0x27, /* 00000A00 ".......'" */
- 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x06,0x00, /* 00000A08 "........" */
- 0x00,0x00,0x0A,0x28,0x12,0x0B,0x04,0x0C, /* 00000A10 "...(...." */
- 0xFF,0xFF,0x06,0x00,0x01,0x00,0x0A,0x29, /* 00000A18 ".......)" */
- 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x06,0x00, /* 00000A20 "........" */
- 0x0A,0x02,0x00,0x0A,0x2A,0x12,0x0C,0x04, /* 00000A28 "....*..." */
- 0x0C,0xFF,0xFF,0x06,0x00,0x0A,0x03,0x00, /* 00000A30 "........" */
- 0x0A,0x2B,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000A38 ".+......" */
- 0x07,0x00,0x00,0x00,0x0A,0x2C,0x12,0x0B, /* 00000A40 ".....,.." */
- 0x04,0x0C,0xFF,0xFF,0x07,0x00,0x01,0x00, /* 00000A48 "........" */
- 0x0A,0x2D,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000A50 ".-......" */
- 0x07,0x00,0x0A,0x02,0x00,0x0A,0x2E,0x12, /* 00000A58 "........" */
- 0x0C,0x04,0x0C,0xFF,0xFF,0x07,0x00,0x0A, /* 00000A60 "........" */
- 0x03,0x00,0x0A,0x2F,0x12,0x0B,0x04,0x0C, /* 00000A68 ".../...." */
- 0xFF,0xFF,0x08,0x00,0x00,0x00,0x0A,0x11, /* 00000A70 "........" */
- 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x08,0x00, /* 00000A78 "........" */
- 0x01,0x00,0x0A,0x12,0x12,0x0C,0x04,0x0C, /* 00000A80 "........" */
- 0xFF,0xFF,0x08,0x00,0x0A,0x02,0x00,0x0A, /* 00000A88 "........" */
- 0x13,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x08, /* 00000A90 "........" */
- 0x00,0x0A,0x03,0x00,0x0A,0x14,0x12,0x0B, /* 00000A98 "........" */
- 0x04,0x0C,0xFF,0xFF,0x09,0x00,0x00,0x00, /* 00000AA0 "........" */
- 0x0A,0x15,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000AA8 "........" */
- 0x09,0x00,0x01,0x00,0x0A,0x16,0x12,0x0C, /* 00000AB0 "........" */
- 0x04,0x0C,0xFF,0xFF,0x09,0x00,0x0A,0x02, /* 00000AB8 "........" */
- 0x00,0x0A,0x17,0x12,0x0C,0x04,0x0C,0xFF, /* 00000AC0 "........" */
- 0xFF,0x09,0x00,0x0A,0x03,0x00,0x0A,0x18, /* 00000AC8 "........" */
- 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0A,0x00, /* 00000AD0 "........" */
- 0x00,0x00,0x0A,0x19,0x12,0x0B,0x04,0x0C, /* 00000AD8 "........" */
- 0xFF,0xFF,0x0A,0x00,0x01,0x00,0x0A,0x1A, /* 00000AE0 "........" */
- 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0A,0x00, /* 00000AE8 "........" */
- 0x0A,0x02,0x00,0x0A,0x1B,0x12,0x0C,0x04, /* 00000AF0 "........" */
- 0x0C,0xFF,0xFF,0x0A,0x00,0x0A,0x03,0x00, /* 00000AF8 "........" */
- 0x0A,0x1C,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000B00 "........" */
- 0x0B,0x00,0x00,0x00,0x0A,0x1D,0x12,0x0B, /* 00000B08 "........" */
- 0x04,0x0C,0xFF,0xFF,0x0B,0x00,0x01,0x00, /* 00000B10 "........" */
- 0x0A,0x1E,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000B18 "........" */
- 0x0B,0x00,0x0A,0x02,0x00,0x0A,0x1F,0x12, /* 00000B20 "........" */
- 0x0C,0x04,0x0C,0xFF,0xFF,0x0B,0x00,0x0A, /* 00000B28 "........" */
- 0x03,0x00,0x0A,0x20,0x12,0x0B,0x04,0x0C, /* 00000B30 "... ...." */
- 0xFF,0xFF,0x0C,0x00,0x00,0x00,0x0A,0x21, /* 00000B38 ".......!" */
- 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0C,0x00, /* 00000B40 "........" */
- 0x01,0x00,0x0A,0x22,0x12,0x0C,0x04,0x0C, /* 00000B48 "..."...." */
- 0xFF,0xFF,0x0C,0x00,0x0A,0x02,0x00,0x0A, /* 00000B50 "........" */
- 0x23,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0C, /* 00000B58 "#......." */
- 0x00,0x0A,0x03,0x00,0x0A,0x24,0x12,0x0B, /* 00000B60 ".....$.." */
- 0x04,0x0C,0xFF,0xFF,0x0D,0x00,0x00,0x00, /* 00000B68 "........" */
- 0x0A,0x25,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000B70 ".%......" */
- 0x0D,0x00,0x01,0x00,0x0A,0x26,0x12,0x0C, /* 00000B78 ".....&.." */
- 0x04,0x0C,0xFF,0xFF,0x0D,0x00,0x0A,0x02, /* 00000B80 "........" */
- 0x00,0x0A,0x27,0x12,0x0C,0x04,0x0C,0xFF, /* 00000B88 "..'....." */
- 0xFF,0x0D,0x00,0x0A,0x03,0x00,0x0A,0x28, /* 00000B90 ".......(" */
- 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0E,0x00, /* 00000B98 "........" */
- 0x00,0x00,0x0A,0x29,0x12,0x0B,0x04,0x0C, /* 00000BA0 "...)...." */
- 0xFF,0xFF,0x0E,0x00,0x01,0x00,0x0A,0x2A, /* 00000BA8 ".......*" */
- 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0E,0x00, /* 00000BB0 "........" */
- 0x0A,0x02,0x00,0x0A,0x2B,0x12,0x0C,0x04, /* 00000BB8 "....+..." */
- 0x0C,0xFF,0xFF,0x0E,0x00,0x0A,0x03,0x00, /* 00000BC0 "........" */
- 0x0A,0x2C,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000BC8 ".,......" */
- 0x0F,0x00,0x00,0x00,0x0A,0x2D,0x12,0x0B, /* 00000BD0 ".....-.." */
- 0x04,0x0C,0xFF,0xFF,0x0F,0x00,0x01,0x00, /* 00000BD8 "........" */
- 0x0A,0x2E,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000BE0 "........" */
- 0x0F,0x00,0x0A,0x02,0x00,0x0A,0x2F,0x12, /* 00000BE8 "....../." */
- 0x0C,0x04,0x0C,0xFF,0xFF,0x0F,0x00,0x0A, /* 00000BF0 "........" */
- 0x03,0x00,0x0A,0x10,0x5B,0x82,0x46,0x37, /* 00000BF8 "....[.F7" */
- 0x49,0x53,0x41,0x5F,0x08,0x5F,0x41,0x44, /* 00000C00 "ISA_._AD" */
- 0x52,0x0C,0x00,0x00,0x01,0x00,0x5B,0x80, /* 00000C08 "R.....[." */
- 0x50,0x49,0x52,0x51,0x02,0x0A,0x60,0x0A, /* 00000C10 "PIRQ..`." */
- 0x04,0x10,0x2E,0x5C,0x00,0x5B,0x81,0x29, /* 00000C18 "...\.[.)" */
- 0x5C,0x2F,0x04,0x5F,0x53,0x42,0x5F,0x50, /* 00000C20 "\/._SB_P" */
- 0x43,0x49,0x30,0x49,0x53,0x41,0x5F,0x50, /* 00000C28 "CI0ISA_P" */
- 0x49,0x52,0x51,0x01,0x50,0x49,0x52,0x41, /* 00000C30 "IRQ.PIRA" */
- 0x08,0x50,0x49,0x52,0x42,0x08,0x50,0x49, /* 00000C38 ".PIRB.PI" */
- 0x52,0x43,0x08,0x50,0x49,0x52,0x44,0x08, /* 00000C40 "RC.PIRD." */
- 0x5B,0x82,0x46,0x0B,0x53,0x59,0x53,0x52, /* 00000C48 "[.F.SYSR" */
- 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 00000C50 "._HID.A." */
- 0x0C,0x02,0x08,0x5F,0x55,0x49,0x44,0x01, /* 00000C58 "..._UID." */
- 0x08,0x43,0x52,0x53,0x5F,0x11,0x4E,0x08, /* 00000C60 ".CRS_.N." */
- 0x0A,0x8A,0x47,0x01,0x10,0x00,0x10,0x00, /* 00000C68 "..G....." */
- 0x00,0x10,0x47,0x01,0x22,0x00,0x22,0x00, /* 00000C70 "..G."."." */
- 0x00,0x0C,0x47,0x01,0x30,0x00,0x30,0x00, /* 00000C78 "..G.0.0." */
- 0x00,0x10,0x47,0x01,0x44,0x00,0x44,0x00, /* 00000C80 "..G.D.D." */
- 0x00,0x1C,0x47,0x01,0x62,0x00,0x62,0x00, /* 00000C88 "..G.b.b." */
- 0x00,0x02,0x47,0x01,0x65,0x00,0x65,0x00, /* 00000C90 "..G.e.e." */
- 0x00,0x0B,0x47,0x01,0x72,0x00,0x72,0x00, /* 00000C98 "..G.r.r." */
- 0x00,0x0E,0x47,0x01,0x80,0x00,0x80,0x00, /* 00000CA0 "..G....." */
- 0x00,0x01,0x47,0x01,0x84,0x00,0x84,0x00, /* 00000CA8 "..G....." */
- 0x00,0x03,0x47,0x01,0x88,0x00,0x88,0x00, /* 00000CB0 "..G....." */
- 0x00,0x01,0x47,0x01,0x8C,0x00,0x8C,0x00, /* 00000CB8 "..G....." */
- 0x00,0x03,0x47,0x01,0x90,0x00,0x90,0x00, /* 00000CC0 "..G....." */
- 0x00,0x10,0x47,0x01,0xA2,0x00,0xA2,0x00, /* 00000CC8 "..G....." */
- 0x00,0x1C,0x47,0x01,0xE0,0x00,0xE0,0x00, /* 00000CD0 "..G....." */
- 0x00,0x10,0x47,0x01,0xA0,0x08,0xA0,0x08, /* 00000CD8 "..G....." */
- 0x00,0x04,0x47,0x01,0xC0,0x0C,0xC0,0x0C, /* 00000CE0 "..G....." */
- 0x00,0x10,0x47,0x01,0xD0,0x04,0xD0,0x04, /* 00000CE8 "..G....." */
- 0x00,0x02,0x79,0x00,0x14,0x0B,0x5F,0x43, /* 00000CF0 "..y..._C" */
- 0x52,0x53,0x00,0xA4,0x43,0x52,0x53,0x5F, /* 00000CF8 "RS..CRS_" */
- 0x5B,0x82,0x2B,0x50,0x49,0x43,0x5F,0x08, /* 00000D00 "[.+PIC_." */
- 0x5F,0x48,0x49,0x44,0x0B,0x41,0xD0,0x08, /* 00000D08 "_HID.A.." */
- 0x5F,0x43,0x52,0x53,0x11,0x18,0x0A,0x15, /* 00000D10 "_CRS...." */
- 0x47,0x01,0x20,0x00,0x20,0x00,0x01,0x02, /* 00000D18 "G. . ..." */
- 0x47,0x01,0xA0,0x00,0xA0,0x00,0x01,0x02, /* 00000D20 "G......." */
- 0x22,0x04,0x00,0x79,0x00,0x5B,0x82,0x47, /* 00000D28 ""..y.[.G" */
- 0x05,0x44,0x4D,0x41,0x30,0x08,0x5F,0x48, /* 00000D30 ".DMA0._H" */
- 0x49,0x44,0x0C,0x41,0xD0,0x02,0x00,0x08, /* 00000D38 "ID.A...." */
- 0x5F,0x43,0x52,0x53,0x11,0x41,0x04,0x0A, /* 00000D40 "_CRS.A.." */
- 0x3D,0x2A,0x10,0x04,0x47,0x01,0x00,0x00, /* 00000D48 "=*..G..." */
- 0x00,0x00,0x00,0x10,0x47,0x01,0x81,0x00, /* 00000D50 "....G..." */
- 0x81,0x00,0x00,0x03,0x47,0x01,0x87,0x00, /* 00000D58 "....G..." */
- 0x87,0x00,0x00,0x01,0x47,0x01,0x89,0x00, /* 00000D60 "....G..." */
- 0x89,0x00,0x00,0x03,0x47,0x01,0x8F,0x00, /* 00000D68 "....G..." */
- 0x8F,0x00,0x00,0x01,0x47,0x01,0xC0,0x00, /* 00000D70 "....G..." */
- 0xC0,0x00,0x00,0x20,0x47,0x01,0x80,0x04, /* 00000D78 "... G..." */
- 0x80,0x04,0x00,0x10,0x79,0x00,0x5B,0x82, /* 00000D80 "....y.[." */
- 0x25,0x54,0x4D,0x52,0x5F,0x08,0x5F,0x48, /* 00000D88 "%TMR_._H" */
- 0x49,0x44,0x0C,0x41,0xD0,0x01,0x00,0x08, /* 00000D90 "ID.A...." */
- 0x5F,0x43,0x52,0x53,0x11,0x10,0x0A,0x0D, /* 00000D98 "_CRS...." */
- 0x47,0x01,0x40,0x00,0x40,0x00,0x00,0x04, /* 00000DA0 "G.@.@..." */
- 0x22,0x01,0x00,0x79,0x00,0x5B,0x82,0x25, /* 00000DA8 ""..y.[.%" */
- 0x52,0x54,0x43,0x5F,0x08,0x5F,0x48,0x49, /* 00000DB0 "RTC_._HI" */
- 0x44,0x0C,0x41,0xD0,0x0B,0x00,0x08,0x5F, /* 00000DB8 "D.A...._" */
- 0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47, /* 00000DC0 "CRS....G" */
- 0x01,0x70,0x00,0x70,0x00,0x00,0x02,0x22, /* 00000DC8 ".p.p..."" */
- 0x00,0x01,0x79,0x00,0x5B,0x82,0x22,0x53, /* 00000DD0 "..y.[."S" */
- 0x50,0x4B,0x52,0x08,0x5F,0x48,0x49,0x44, /* 00000DD8 "PKR._HID" */
- 0x0C,0x41,0xD0,0x08,0x00,0x08,0x5F,0x43, /* 00000DE0 ".A...._C" */
- 0x52,0x53,0x11,0x0D,0x0A,0x0A,0x47,0x01, /* 00000DE8 "RS....G." */
- 0x61,0x00,0x61,0x00,0x00,0x01,0x79,0x00, /* 00000DF0 "a.a...y." */
- 0x5B,0x82,0x31,0x50,0x53,0x32,0x4D,0x08, /* 00000DF8 "[.1PS2M." */
- 0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x0F, /* 00000E00 "_HID.A.." */
- 0x13,0x08,0x5F,0x43,0x49,0x44,0x0C,0x41, /* 00000E08 ".._CID.A" */
- 0xD0,0x0F,0x13,0x14,0x09,0x5F,0x53,0x54, /* 00000E10 "....._ST" */
- 0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43, /* 00000E18 "A....._C" */
- 0x52,0x53,0x11,0x08,0x0A,0x05,0x22,0x00, /* 00000E20 "RS...."." */
- 0x10,0x79,0x00,0x5B,0x82,0x42,0x04,0x50, /* 00000E28 ".y.[.B.P" */
- 0x53,0x32,0x4B,0x08,0x5F,0x48,0x49,0x44, /* 00000E30 "S2K._HID" */
- 0x0C,0x41,0xD0,0x03,0x03,0x08,0x5F,0x43, /* 00000E38 ".A...._C" */
- 0x49,0x44,0x0C,0x41,0xD0,0x03,0x0B,0x14, /* 00000E40 "ID.A...." */
- 0x09,0x5F,0x53,0x54,0x41,0x00,0xA4,0x0A, /* 00000E48 "._STA..." */
- 0x0F,0x08,0x5F,0x43,0x52,0x53,0x11,0x18, /* 00000E50 ".._CRS.." */
- 0x0A,0x15,0x47,0x01,0x60,0x00,0x60,0x00, /* 00000E58 "..G.`.`." */
- 0x00,0x01,0x47,0x01,0x64,0x00,0x64,0x00, /* 00000E60 "..G.d.d." */
- 0x00,0x01,0x22,0x02,0x00,0x79,0x00,0x5B, /* 00000E68 ".."..y.[" */
- 0x82,0x3A,0x46,0x44,0x43,0x30,0x08,0x5F, /* 00000E70 ".:FDC0._" */
- 0x48,0x49,0x44,0x0C,0x41,0xD0,0x07,0x00, /* 00000E78 "HID.A..." */
- 0x14,0x09,0x5F,0x53,0x54,0x41,0x00,0xA4, /* 00000E80 ".._STA.." */
- 0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53,0x11, /* 00000E88 "..._CRS." */
- 0x1B,0x0A,0x18,0x47,0x01,0xF0,0x03,0xF0, /* 00000E90 "...G...." */
- 0x03,0x01,0x06,0x47,0x01,0xF7,0x03,0xF7, /* 00000E98 "...G...." */
- 0x03,0x01,0x01,0x22,0x40,0x00,0x2A,0x04, /* 00000EA0 "..."@.*." */
- 0x00,0x79,0x00,0x5B,0x82,0x46,0x04,0x55, /* 00000EA8 ".y.[.F.U" */
- 0x41,0x52,0x31,0x08,0x5F,0x48,0x49,0x44, /* 00000EB0 "AR1._HID" */
- 0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,0x55, /* 00000EB8 ".A...._U" */
- 0x49,0x44,0x01,0x14,0x19,0x5F,0x53,0x54, /* 00000EC0 "ID..._ST" */
- 0x41,0x00,0xA0,0x0D,0x93,0x5E,0x5E,0x5E, /* 00000EC8 "A....^^^" */
- 0x5E,0x55,0x41,0x52,0x31,0x00,0xA4,0x00, /* 00000ED0 "^UAR1..." */
- 0xA1,0x04,0xA4,0x0A,0x0F,0x08,0x5F,0x43, /* 00000ED8 "......_C" */
- 0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01, /* 00000EE0 "RS....G." */
- 0xF8,0x03,0xF8,0x03,0x08,0x08,0x22,0x10, /* 00000EE8 "......"." */
- 0x00,0x79,0x00,0x5B,0x82,0x47,0x04,0x55, /* 00000EF0 ".y.[.G.U" */
- 0x41,0x52,0x32,0x08,0x5F,0x48,0x49,0x44, /* 00000EF8 "AR2._HID" */
- 0x0C,0x41,0xD0,0x05,0x01,0x08,0x5F,0x55, /* 00000F00 ".A...._U" */
- 0x49,0x44,0x0A,0x02,0x14,0x19,0x5F,0x53, /* 00000F08 "ID...._S" */
- 0x54,0x41,0x00,0xA0,0x0D,0x93,0x5E,0x5E, /* 00000F10 "TA....^^" */
- 0x5E,0x5E,0x55,0x41,0x52,0x32,0x00,0xA4, /* 00000F18 "^^UAR2.." */
- 0x00,0xA1,0x04,0xA4,0x0A,0x0F,0x08,0x5F, /* 00000F20 "......._" */
- 0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47, /* 00000F28 "CRS....G" */
- 0x01,0xF8,0x02,0xF8,0x02,0x08,0x08,0x22, /* 00000F30 "......."" */
- 0x08,0x00,0x79,0x00,0x5B,0x82,0x36,0x4C, /* 00000F38 "..y.[.6L" */
- 0x54,0x50,0x31,0x08,0x5F,0x48,0x49,0x44, /* 00000F40 "TP1._HID" */
- 0x0C,0x41,0xD0,0x04,0x00,0x08,0x5F,0x55, /* 00000F48 ".A...._U" */
- 0x49,0x44,0x0A,0x02,0x14,0x09,0x5F,0x53, /* 00000F50 "ID...._S" */
- 0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08,0x5F, /* 00000F58 "TA....._" */
- 0x43,0x52,0x53,0x11,0x10,0x0A,0x0D,0x47, /* 00000F60 "CRS....G" */
- 0x01,0x78,0x03,0x78,0x03,0x08,0x08,0x22, /* 00000F68 ".x.x..."" */
- 0x80,0x00,0x79,0x00,0x5B,0x82,0x4D,0x07, /* 00000F70 "..y.[.M." */
- 0x53,0x31,0x46,0x30,0x08,0x5F,0x41,0x44, /* 00000F78 "S1F0._AD" */
- 0x52,0x0C,0x00,0x00,0x06,0x00,0x08,0x5F, /* 00000F80 "R......_" */
- 0x53,0x55,0x4E,0x01,0x14,0x13,0x5F,0x50, /* 00000F88 "SUN..._P" */
- 0x53,0x30,0x00,0x70,0x0A,0x80,0x5C,0x2E, /* 00000F90 "S0.p..\." */
- 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00000F98 "_GPEDPT2" */
- 0x14,0x13,0x5F,0x50,0x53,0x33,0x00,0x70, /* 00000FA0 ".._PS3.p" */
- 0x0A,0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00000FA8 "..\._GPE" */
- 0x44,0x50,0x54,0x32,0x14,0x1F,0x5F,0x45, /* 00000FB0 "DPT2.._E" */
- 0x4A,0x30,0x01,0x70,0x0A,0x88,0x5C,0x2E, /* 00000FB8 "J0.p..\." */
- 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00000FC0 "_GPEDPT2" */
- 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00000FC8 "p.\._GPE" */
- 0x50,0x48,0x50,0x31,0x14,0x1E,0x5F,0x53, /* 00000FD0 "PHP1.._S" */
- 0x54,0x41,0x00,0x70,0x0A,0x89,0x5C,0x2E, /* 00000FD8 "TA.p..\." */
- 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00000FE0 "_GPEDPT2" */
- 0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x50, /* 00000FE8 ".\._GPEP" */
- 0x48,0x50,0x31,0x5B,0x82,0x4E,0x07,0x53, /* 00000FF0 "HP1[.N.S" */
- 0x32,0x46,0x30,0x08,0x5F,0x41,0x44,0x52, /* 00000FF8 "2F0._ADR" */
- 0x0C,0x00,0x00,0x07,0x00,0x08,0x5F,0x53, /* 00001000 "......_S" */
- 0x55,0x4E,0x0A,0x02,0x14,0x13,0x5F,0x50, /* 00001008 "UN...._P" */
- 0x53,0x30,0x00,0x70,0x0A,0x90,0x5C,0x2E, /* 00001010 "S0.p..\." */
- 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00001018 "_GPEDPT2" */
- 0x14,0x13,0x5F,0x50,0x53,0x33,0x00,0x70, /* 00001020 ".._PS3.p" */
- 0x0A,0x93,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001028 "..\._GPE" */
- 0x44,0x50,0x54,0x32,0x14,0x1F,0x5F,0x45, /* 00001030 "DPT2.._E" */
- 0x4A,0x30,0x01,0x70,0x0A,0x98,0x5C,0x2E, /* 00001038 "J0.p..\." */
- 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00001040 "_GPEDPT2" */
- 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001048 "p.\._GPE" */
- 0x50,0x48,0x50,0x32,0x14,0x1E,0x5F,0x53, /* 00001050 "PHP2.._S" */
- 0x54,0x41,0x00,0x70,0x0A,0x99,0x5C,0x2E, /* 00001058 "TA.p..\." */
- 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00001060 "_GPEDPT2" */
- 0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x50, /* 00001068 ".\._GPEP" */
- 0x48,0x50,0x32,0x10,0x4E,0x0B,0x5F,0x47, /* 00001070 "HP2.N._G" */
- 0x50,0x45,0x5B,0x80,0x50,0x48,0x50,0x5F, /* 00001078 "PE[.PHP_" */
- 0x01,0x0B,0xC0,0x10,0x0A,0x03,0x5B,0x81, /* 00001080 "......[." */
- 0x15,0x50,0x48,0x50,0x5F,0x01,0x50,0x53, /* 00001088 ".PHP_.PS" */
- 0x54,0x41,0x08,0x50,0x48,0x50,0x31,0x08, /* 00001090 "TA.PHP1." */
- 0x50,0x48,0x50,0x32,0x08,0x5B,0x80,0x44, /* 00001098 "PHP2.[.D" */
- 0x47,0x31,0x5F,0x01,0x0B,0x44,0xB0,0x0A, /* 000010A0 "G1_..D.." */
- 0x04,0x5B,0x81,0x10,0x44,0x47,0x31,0x5F, /* 000010A8 ".[..DG1_" */
- 0x01,0x44,0x50,0x54,0x31,0x08,0x44,0x50, /* 000010B0 ".DPT1.DP" */
- 0x54,0x32,0x08,0x14,0x46,0x07,0x5F,0x4C, /* 000010B8 "T2..F._L" */
- 0x30,0x33,0x00,0x08,0x53,0x4C,0x54,0x5F, /* 000010C0 "03..SLT_" */
- 0x00,0x08,0x45,0x56,0x54,0x5F,0x00,0x70, /* 000010C8 "..EVT_.p" */
- 0x50,0x53,0x54,0x41,0x61,0x7A,0x61,0x0A, /* 000010D0 "PSTAaza." */
- 0x04,0x53,0x4C,0x54,0x5F,0x7B,0x61,0x0A, /* 000010D8 ".SLT_{a." */
- 0x0F,0x45,0x56,0x54,0x5F,0x70,0x53,0x4C, /* 000010E0 ".EVT_pSL" */
- 0x54,0x5F,0x44,0x50,0x54,0x31,0x70,0x45, /* 000010E8 "T_DPT1pE" */
- 0x56,0x54,0x5F,0x44,0x50,0x54,0x32,0xA0, /* 000010F0 "VT_DPT2." */
- 0x1B,0x93,0x53,0x4C,0x54,0x5F,0x01,0x86, /* 000010F8 "..SLT_.." */
- 0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F,0x50, /* 00001100 "\/._SB_P" */
- 0x43,0x49,0x30,0x53,0x31,0x46,0x30,0x45, /* 00001108 "CI0S1F0E" */
- 0x56,0x54,0x5F,0xA1,0x1E,0xA0,0x1C,0x93, /* 00001110 "VT_....." */
- 0x53,0x4C,0x54,0x5F,0x0A,0x02,0x86,0x5C, /* 00001118 "SLT_...\" */
- 0x2F,0x03,0x5F,0x53,0x42,0x5F,0x50,0x43, /* 00001120 "/._SB_PC" */
- 0x49,0x30,0x53,0x32,0x46,0x30,0x45,0x56, /* 00001128 "I0S2F0EV" */
+ 0x00,0x00,0x00,0x00,0x0A,0x00,0x00,0x00, /* 000001D8 "........" */
+ 0x00,0x00,0x79,0x00,0x5B,0x82,0x8E,0xAE, /* 000001E0 "..y.[..." */
+ 0x02,0x50,0x43,0x49,0x30,0x08,0x5F,0x48, /* 000001E8 ".PCI0._H" */
+ 0x49,0x44,0x0C,0x41,0xD0,0x0A,0x03,0x08, /* 000001F0 "ID.A...." */
+ 0x5F,0x55,0x49,0x44,0x00,0x08,0x5F,0x41, /* 000001F8 "_UID.._A" */
+ 0x44,0x52,0x00,0x08,0x5F,0x42,0x42,0x4E, /* 00000200 "DR.._BBN" */
+ 0x00,0x5B,0x82,0x2A,0x48,0x50,0x30,0x5F, /* 00000208 ".[.*HP0_" */
+ 0x08,0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0, /* 00000210 "._HID.A." */
+ 0x0C,0x02,0x08,0x5F,0x43,0x52,0x53,0x11, /* 00000218 "..._CRS." */
+ 0x15,0x0A,0x12,0x47,0x01,0xC0,0x10,0xC0, /* 00000220 "...G...." */
+ 0x10,0x00,0x03,0x47,0x01,0x44,0xB0,0x44, /* 00000228 "...G.D.D" */
+ 0xB0,0x00,0x04,0x79,0x00,0x14,0x4E,0x0C, /* 00000230 "...y..N." */
+ 0x5F,0x43,0x52,0x53,0x00,0x08,0x50,0x52, /* 00000238 "_CRS..PR" */
+ 0x54,0x30,0x11,0x42,0x07,0x0A,0x6E,0x88, /* 00000240 "T0.B..n." */
+ 0x0D,0x00,0x02,0x0E,0x00,0x00,0x00,0x00, /* 00000248 "........" */
+ 0x00,0xFF,0x00,0x00,0x00,0x00,0x01,0x47, /* 00000250 ".......G" */
+ 0x01,0xF8,0x0C,0xF8,0x0C,0x01,0x08,0x88, /* 00000258 "........" */
+ 0x0D,0x00,0x01,0x0C,0x03,0x00,0x00,0x00, /* 00000260 "........" */
+ 0x00,0xF7,0x0C,0x00,0x00,0xF8,0x0C,0x88, /* 00000268 "........" */
+ 0x0D,0x00,0x01,0x0C,0x03,0x00,0x00,0x00, /* 00000270 "........" */
+ 0x0D,0xFF,0xFF,0x00,0x00,0x00,0xF3,0x87, /* 00000278 "........" */
+ 0x17,0x00,0x00,0x0C,0x03,0x00,0x00,0x00, /* 00000280 "........" */
+ 0x00,0x00,0x00,0x0A,0x00,0xFF,0xFF,0x0B, /* 00000288 "........" */
+ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x02, /* 00000290 "........" */
+ 0x00,0x87,0x17,0x00,0x00,0x0C,0x03,0x00, /* 00000298 "........" */
+ 0x00,0x00,0x00,0x00,0x00,0x00,0xF0,0xFF, /* 000002A0 "........" */
+ 0xFF,0xFF,0xF4,0x00,0x00,0x00,0x00,0x00, /* 000002A8 "........" */
+ 0x00,0x00,0x05,0x79,0x00,0x8A,0x50,0x52, /* 000002B0 "...y..PR" */
+ 0x54,0x30,0x0A,0x5C,0x4D,0x4D,0x49,0x4E, /* 000002B8 "T0.\MMIN" */
+ 0x8A,0x50,0x52,0x54,0x30,0x0A,0x60,0x4D, /* 000002C0 ".PRT0.`M" */
+ 0x4D,0x41,0x58,0x8A,0x50,0x52,0x54,0x30, /* 000002C8 "MAX.PRT0" */
+ 0x0A,0x68,0x4D,0x4C,0x45,0x4E,0x70,0x50, /* 000002D0 ".hMLENpP" */
+ 0x4D,0x49,0x4E,0x4D,0x4D,0x49,0x4E,0x70, /* 000002D8 "MINMMINp" */
+ 0x50,0x4C,0x45,0x4E,0x4D,0x4C,0x45,0x4E, /* 000002E0 "PLENMLEN" */
+ 0x72,0x4D,0x4D,0x49,0x4E,0x4D,0x4C,0x45, /* 000002E8 "rMMINMLE" */
+ 0x4E,0x4D,0x4D,0x41,0x58,0x74,0x4D,0x4D, /* 000002F0 "NMMAXtMM" */
+ 0x41,0x58,0x01,0x4D,0x4D,0x41,0x58,0xA4, /* 000002F8 "AX.MMAX." */
+ 0x50,0x52,0x54,0x30,0x08,0x42,0x55,0x46, /* 00000300 "PRT0.BUF" */
+ 0x41,0x11,0x09,0x0A,0x06,0x23,0x20,0x0C, /* 00000308 "A....# ." */
+ 0x18,0x79,0x00,0x08,0x42,0x55,0x46,0x42, /* 00000310 ".y..BUFB" */
+ 0x11,0x09,0x0A,0x06,0x23,0x00,0x00,0x18, /* 00000318 "....#..." */
+ 0x79,0x00,0x8B,0x42,0x55,0x46,0x42,0x01, /* 00000320 "y..BUFB." */
+ 0x49,0x52,0x51,0x56,0x5B,0x82,0x48,0x08, /* 00000328 "IRQV[.H." */
+ 0x4C,0x4E,0x4B,0x41,0x08,0x5F,0x48,0x49, /* 00000330 "LNKA._HI" */
+ 0x44,0x0C,0x41,0xD0,0x0C,0x0F,0x08,0x5F, /* 00000338 "D.A...._" */
+ 0x55,0x49,0x44,0x01,0x14,0x1C,0x5F,0x53, /* 00000340 "UID..._S" */
+ 0x54,0x41,0x00,0x7B,0x50,0x49,0x52,0x41, /* 00000348 "TA.{PIRA" */
+ 0x0A,0x80,0x60,0xA0,0x08,0x93,0x60,0x0A, /* 00000350 "..`...`." */
+ 0x80,0xA4,0x0A,0x09,0xA1,0x04,0xA4,0x0A, /* 00000358 "........" */
+ 0x0B,0x14,0x0B,0x5F,0x50,0x52,0x53,0x00, /* 00000360 "..._PRS." */
+ 0xA4,0x42,0x55,0x46,0x41,0x14,0x11,0x5F, /* 00000368 ".BUFA.._" */
+ 0x44,0x49,0x53,0x00,0x7D,0x50,0x49,0x52, /* 00000370 "DIS.}PIR" */
+ 0x41,0x0A,0x80,0x50,0x49,0x52,0x41,0x14, /* 00000378 "A..PIRA." */
+ 0x1A,0x5F,0x43,0x52,0x53,0x00,0x7B,0x50, /* 00000380 "._CRS.{P" */
+ 0x49,0x52,0x41,0x0A,0x0F,0x60,0x79,0x01, /* 00000388 "IRA..`y." */
+ 0x60,0x49,0x52,0x51,0x56,0xA4,0x42,0x55, /* 00000390 "`IRQV.BU" */
+ 0x46,0x42,0x14,0x1B,0x5F,0x53,0x52,0x53, /* 00000398 "FB.._SRS" */
+ 0x01,0x8B,0x68,0x01,0x49,0x52,0x51,0x31, /* 000003A0 "..h.IRQ1" */
+ 0x82,0x49,0x52,0x51,0x31,0x60,0x76,0x60, /* 000003A8 ".IRQ1`v`" */
+ 0x70,0x60,0x50,0x49,0x52,0x41,0x5B,0x82, /* 000003B0 "p`PIRA[." */
+ 0x49,0x08,0x4C,0x4E,0x4B,0x42,0x08,0x5F, /* 000003B8 "I.LNKB._" */
+ 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x0F, /* 000003C0 "HID.A..." */
+ 0x08,0x5F,0x55,0x49,0x44,0x0A,0x02,0x14, /* 000003C8 "._UID..." */
+ 0x1C,0x5F,0x53,0x54,0x41,0x00,0x7B,0x50, /* 000003D0 "._STA.{P" */
+ 0x49,0x52,0x42,0x0A,0x80,0x60,0xA0,0x08, /* 000003D8 "IRB..`.." */
+ 0x93,0x60,0x0A,0x80,0xA4,0x0A,0x09,0xA1, /* 000003E0 ".`......" */
+ 0x04,0xA4,0x0A,0x0B,0x14,0x0B,0x5F,0x50, /* 000003E8 "......_P" */
+ 0x52,0x53,0x00,0xA4,0x42,0x55,0x46,0x41, /* 000003F0 "RS..BUFA" */
+ 0x14,0x11,0x5F,0x44,0x49,0x53,0x00,0x7D, /* 000003F8 ".._DIS.}" */
+ 0x50,0x49,0x52,0x42,0x0A,0x80,0x50,0x49, /* 00000400 "PIRB..PI" */
+ 0x52,0x42,0x14,0x1A,0x5F,0x43,0x52,0x53, /* 00000408 "RB.._CRS" */
+ 0x00,0x7B,0x50,0x49,0x52,0x42,0x0A,0x0F, /* 00000410 ".{PIRB.." */
+ 0x60,0x79,0x01,0x60,0x49,0x52,0x51,0x56, /* 00000418 "`y.`IRQV" */
+ 0xA4,0x42,0x55,0x46,0x42,0x14,0x1B,0x5F, /* 00000420 ".BUFB.._" */
+ 0x53,0x52,0x53,0x01,0x8B,0x68,0x01,0x49, /* 00000428 "SRS..h.I" */
+ 0x52,0x51,0x31,0x82,0x49,0x52,0x51,0x31, /* 00000430 "RQ1.IRQ1" */
+ 0x60,0x76,0x60,0x70,0x60,0x50,0x49,0x52, /* 00000438 "`v`p`PIR" */
+ 0x42,0x5B,0x82,0x49,0x08,0x4C,0x4E,0x4B, /* 00000440 "B[.I.LNK" */
+ 0x43,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 00000448 "C._HID.A" */
+ 0xD0,0x0C,0x0F,0x08,0x5F,0x55,0x49,0x44, /* 00000450 "...._UID" */
+ 0x0A,0x03,0x14,0x1C,0x5F,0x53,0x54,0x41, /* 00000458 "...._STA" */
+ 0x00,0x7B,0x50,0x49,0x52,0x43,0x0A,0x80, /* 00000460 ".{PIRC.." */
+ 0x60,0xA0,0x08,0x93,0x60,0x0A,0x80,0xA4, /* 00000468 "`...`..." */
+ 0x0A,0x09,0xA1,0x04,0xA4,0x0A,0x0B,0x14, /* 00000470 "........" */
+ 0x0B,0x5F,0x50,0x52,0x53,0x00,0xA4,0x42, /* 00000478 "._PRS..B" */
+ 0x55,0x46,0x41,0x14,0x11,0x5F,0x44,0x49, /* 00000480 "UFA.._DI" */
+ 0x53,0x00,0x7D,0x50,0x49,0x52,0x43,0x0A, /* 00000488 "S.}PIRC." */
+ 0x80,0x50,0x49,0x52,0x43,0x14,0x1A,0x5F, /* 00000490 ".PIRC.._" */
+ 0x43,0x52,0x53,0x00,0x7B,0x50,0x49,0x52, /* 00000498 "CRS.{PIR" */
+ 0x43,0x0A,0x0F,0x60,0x79,0x01,0x60,0x49, /* 000004A0 "C..`y.`I" */
+ 0x52,0x51,0x56,0xA4,0x42,0x55,0x46,0x42, /* 000004A8 "RQV.BUFB" */
+ 0x14,0x1B,0x5F,0x53,0x52,0x53,0x01,0x8B, /* 000004B0 ".._SRS.." */
+ 0x68,0x01,0x49,0x52,0x51,0x31,0x82,0x49, /* 000004B8 "h.IRQ1.I" */
+ 0x52,0x51,0x31,0x60,0x76,0x60,0x70,0x60, /* 000004C0 "RQ1`v`p`" */
+ 0x50,0x49,0x52,0x43,0x5B,0x82,0x49,0x08, /* 000004C8 "PIRC[.I." */
+ 0x4C,0x4E,0x4B,0x44,0x08,0x5F,0x48,0x49, /* 000004D0 "LNKD._HI" */
+ 0x44,0x0C,0x41,0xD0,0x0C,0x0F,0x08,0x5F, /* 000004D8 "D.A...._" */
+ 0x55,0x49,0x44,0x0A,0x04,0x14,0x1C,0x5F, /* 000004E0 "UID...._" */
+ 0x53,0x54,0x41,0x00,0x7B,0x50,0x49,0x52, /* 000004E8 "STA.{PIR" */
+ 0x44,0x0A,0x80,0x60,0xA0,0x08,0x93,0x60, /* 000004F0 "D..`...`" */
+ 0x0A,0x80,0xA4,0x0A,0x09,0xA1,0x04,0xA4, /* 000004F8 "........" */
+ 0x0A,0x0B,0x14,0x0B,0x5F,0x50,0x52,0x53, /* 00000500 "...._PRS" */
+ 0x00,0xA4,0x42,0x55,0x46,0x41,0x14,0x11, /* 00000508 "..BUFA.." */
+ 0x5F,0x44,0x49,0x53,0x00,0x7D,0x50,0x49, /* 00000510 "_DIS.}PI" */
+ 0x52,0x44,0x0A,0x80,0x50,0x49,0x52,0x44, /* 00000518 "RD..PIRD" */
+ 0x14,0x1A,0x5F,0x43,0x52,0x53,0x00,0x7B, /* 00000520 ".._CRS.{" */
+ 0x50,0x49,0x52,0x44,0x0A,0x0F,0x60,0x79, /* 00000528 "PIRD..`y" */
+ 0x01,0x60,0x49,0x52,0x51,0x56,0xA4,0x42, /* 00000530 ".`IRQV.B" */
+ 0x55,0x46,0x42,0x14,0x1B,0x5F,0x53,0x52, /* 00000538 "UFB.._SR" */
+ 0x53,0x01,0x8B,0x68,0x01,0x49,0x52,0x51, /* 00000540 "S..h.IRQ" */
+ 0x31,0x82,0x49,0x52,0x51,0x31,0x60,0x76, /* 00000548 "1.IRQ1`v" */
+ 0x60,0x70,0x60,0x50,0x49,0x52,0x44,0x5B, /* 00000550 "`p`PIRD[" */
+ 0x82,0x44,0x05,0x48,0x50,0x45,0x54,0x08, /* 00000558 ".D.HPET." */
+ 0x5F,0x48,0x49,0x44,0x0C,0x41,0xD0,0x01, /* 00000560 "_HID.A.." */
+ 0x03,0x08,0x5F,0x55,0x49,0x44,0x00,0x14, /* 00000568 ".._UID.." */
+ 0x18,0x5F,0x53,0x54,0x41,0x00,0xA0,0x0C, /* 00000570 "._STA..." */
+ 0x93,0x5E,0x5E,0x5E,0x48,0x50,0x45,0x54, /* 00000578 ".^^^HPET" */
+ 0x00,0xA4,0x00,0xA1,0x04,0xA4,0x0A,0x0F, /* 00000580 "........" */
+ 0x08,0x5F,0x43,0x52,0x53,0x11,0x1F,0x0A, /* 00000588 "._CRS..." */
+ 0x1C,0x87,0x17,0x00,0x00,0x0D,0x01,0x00, /* 00000590 "........" */
+ 0x00,0x00,0x00,0x00,0x00,0xD0,0xFE,0xFF, /* 00000598 "........" */
+ 0x03,0xD0,0xFE,0x00,0x00,0x00,0x00,0x00, /* 000005A0 "........" */
+ 0x04,0x00,0x00,0x79,0x00,0x14,0x16,0x5F, /* 000005A8 "...y..._" */
+ 0x50,0x52,0x54,0x00,0xA0,0x0A,0x50,0x49, /* 000005B0 "PRT...PI" */
+ 0x43,0x44,0xA4,0x50,0x52,0x54,0x41,0xA4, /* 000005B8 "CD.PRTA." */
+ 0x50,0x52,0x54,0x50,0x08,0x50,0x52,0x54, /* 000005C0 "PRTP.PRT" */
+ 0x50,0x12,0x49,0x70,0x7C,0x12,0x0D,0x04, /* 000005C8 "P.Ip|..." */
+ 0x0C,0xFF,0xFF,0x01,0x00,0x00,0x4C,0x4E, /* 000005D0 "......LN" */
+ 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 000005D8 "KB......" */
+ 0xFF,0x01,0x00,0x01,0x4C,0x4E,0x4B,0x43, /* 000005E0 "....LNKC" */
+ 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x01, /* 000005E8 "........" */
+ 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44,0x00, /* 000005F0 "...LNKD." */
+ 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x01,0x00, /* 000005F8 "........" */
+ 0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 00000600 "..LNKA.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x02,0x00,0x00, /* 00000608 "........" */
+ 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 00000610 "LNKC...." */
+ 0x0C,0xFF,0xFF,0x02,0x00,0x01,0x4C,0x4E, /* 00000618 "......LN" */
+ 0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000620 "KD......" */
+ 0xFF,0x02,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 00000628 ".....LNK" */
+ 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000630 "A......." */
+ 0x02,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x42, /* 00000638 "....LNKB" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x03, /* 00000640 "........" */
+ 0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 00000648 "..LNKD.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x03,0x00,0x01, /* 00000650 "........" */
+ 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04, /* 00000658 "LNKA...." */
+ 0x0C,0xFF,0xFF,0x03,0x00,0x0A,0x02,0x4C, /* 00000660 ".......L" */
+ 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 00000668 "NKB....." */
+ 0xFF,0xFF,0x03,0x00,0x0A,0x03,0x4C,0x4E, /* 00000670 "......LN" */
+ 0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000678 "KC......" */
+ 0xFF,0x04,0x00,0x00,0x4C,0x4E,0x4B,0x41, /* 00000680 "....LNKA" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x04, /* 00000688 "........" */
+ 0x00,0x01,0x4C,0x4E,0x4B,0x42,0x00,0x12, /* 00000690 "..LNKB.." */
+ 0x0E,0x04,0x0C,0xFF,0xFF,0x04,0x00,0x0A, /* 00000698 "........" */
+ 0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 000006A0 ".LNKC..." */
+ 0x04,0x0C,0xFF,0xFF,0x04,0x00,0x0A,0x03, /* 000006A8 "........" */
+ 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04, /* 000006B0 "LNKD...." */
+ 0x0C,0xFF,0xFF,0x05,0x00,0x00,0x4C,0x4E, /* 000006B8 "......LN" */
+ 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 000006C0 "KB......" */
+ 0xFF,0x05,0x00,0x01,0x4C,0x4E,0x4B,0x43, /* 000006C8 "....LNKC" */
+ 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x05, /* 000006D0 "........" */
+ 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44,0x00, /* 000006D8 "...LNKD." */
+ 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x05,0x00, /* 000006E0 "........" */
+ 0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 000006E8 "..LNKA.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x06,0x00,0x00, /* 000006F0 "........" */
+ 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 000006F8 "LNKC...." */
+ 0x0C,0xFF,0xFF,0x06,0x00,0x01,0x4C,0x4E, /* 00000700 "......LN" */
+ 0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000708 "KD......" */
+ 0xFF,0x06,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 00000710 ".....LNK" */
+ 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000718 "A......." */
+ 0x06,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x42, /* 00000720 "....LNKB" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x07, /* 00000728 "........" */
+ 0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 00000730 "..LNKD.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x07,0x00,0x01, /* 00000738 "........" */
+ 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04, /* 00000740 "LNKA...." */
+ 0x0C,0xFF,0xFF,0x07,0x00,0x0A,0x02,0x4C, /* 00000748 ".......L" */
+ 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 00000750 "NKB....." */
+ 0xFF,0xFF,0x07,0x00,0x0A,0x03,0x4C,0x4E, /* 00000758 "......LN" */
+ 0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000760 "KC......" */
+ 0xFF,0x08,0x00,0x00,0x4C,0x4E,0x4B,0x41, /* 00000768 "....LNKA" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x08, /* 00000770 "........" */
+ 0x00,0x01,0x4C,0x4E,0x4B,0x42,0x00,0x12, /* 00000778 "..LNKB.." */
+ 0x0E,0x04,0x0C,0xFF,0xFF,0x08,0x00,0x0A, /* 00000780 "........" */
+ 0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 00000788 ".LNKC..." */
+ 0x04,0x0C,0xFF,0xFF,0x08,0x00,0x0A,0x03, /* 00000790 "........" */
+ 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04, /* 00000798 "LNKD...." */
+ 0x0C,0xFF,0xFF,0x09,0x00,0x00,0x4C,0x4E, /* 000007A0 "......LN" */
+ 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 000007A8 "KB......" */
+ 0xFF,0x09,0x00,0x01,0x4C,0x4E,0x4B,0x43, /* 000007B0 "....LNKC" */
+ 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x09, /* 000007B8 "........" */
+ 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44,0x00, /* 000007C0 "...LNKD." */
+ 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x09,0x00, /* 000007C8 "........" */
+ 0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 000007D0 "..LNKA.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x0A,0x00,0x00, /* 000007D8 "........" */
+ 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 000007E0 "LNKC...." */
+ 0x0C,0xFF,0xFF,0x0A,0x00,0x01,0x4C,0x4E, /* 000007E8 "......LN" */
+ 0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 000007F0 "KD......" */
+ 0xFF,0x0A,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 000007F8 ".....LNK" */
+ 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000800 "A......." */
+ 0x0A,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x42, /* 00000808 "....LNKB" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0B, /* 00000810 "........" */
+ 0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 00000818 "..LNKD.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x0B,0x00,0x01, /* 00000820 "........" */
+ 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04, /* 00000828 "LNKA...." */
+ 0x0C,0xFF,0xFF,0x0B,0x00,0x0A,0x02,0x4C, /* 00000830 ".......L" */
+ 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 00000838 "NKB....." */
+ 0xFF,0xFF,0x0B,0x00,0x0A,0x03,0x4C,0x4E, /* 00000840 "......LN" */
+ 0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000848 "KC......" */
+ 0xFF,0x0C,0x00,0x00,0x4C,0x4E,0x4B,0x41, /* 00000850 "....LNKA" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0C, /* 00000858 "........" */
+ 0x00,0x01,0x4C,0x4E,0x4B,0x42,0x00,0x12, /* 00000860 "..LNKB.." */
+ 0x0E,0x04,0x0C,0xFF,0xFF,0x0C,0x00,0x0A, /* 00000868 "........" */
+ 0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 00000870 ".LNKC..." */
+ 0x04,0x0C,0xFF,0xFF,0x0C,0x00,0x0A,0x03, /* 00000878 "........" */
+ 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04, /* 00000880 "LNKD...." */
+ 0x0C,0xFF,0xFF,0x0D,0x00,0x00,0x4C,0x4E, /* 00000888 "......LN" */
+ 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000890 "KB......" */
+ 0xFF,0x0D,0x00,0x01,0x4C,0x4E,0x4B,0x43, /* 00000898 "....LNKC" */
+ 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x0D, /* 000008A0 "........" */
+ 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44,0x00, /* 000008A8 "...LNKD." */
+ 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x0D,0x00, /* 000008B0 "........" */
+ 0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 000008B8 "..LNKA.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x0E,0x00,0x00, /* 000008C0 "........" */
+ 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 000008C8 "LNKC...." */
+ 0x0C,0xFF,0xFF,0x0E,0x00,0x01,0x4C,0x4E, /* 000008D0 "......LN" */
+ 0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 000008D8 "KD......" */
+ 0xFF,0x0E,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 000008E0 ".....LNK" */
+ 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 000008E8 "A......." */
+ 0x0E,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x42, /* 000008F0 "....LNKB" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x0F, /* 000008F8 "........" */
+ 0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 00000900 "..LNKD.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x0F,0x00,0x01, /* 00000908 "........" */
+ 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04, /* 00000910 "LNKA...." */
+ 0x0C,0xFF,0xFF,0x0F,0x00,0x0A,0x02,0x4C, /* 00000918 ".......L" */
+ 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 00000920 "NKB....." */
+ 0xFF,0xFF,0x0F,0x00,0x0A,0x03,0x4C,0x4E, /* 00000928 "......LN" */
+ 0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000930 "KC......" */
+ 0xFF,0x10,0x00,0x00,0x4C,0x4E,0x4B,0x41, /* 00000938 "....LNKA" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x10, /* 00000940 "........" */
+ 0x00,0x01,0x4C,0x4E,0x4B,0x42,0x00,0x12, /* 00000948 "..LNKB.." */
+ 0x0E,0x04,0x0C,0xFF,0xFF,0x10,0x00,0x0A, /* 00000950 "........" */
+ 0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 00000958 ".LNKC..." */
+ 0x04,0x0C,0xFF,0xFF,0x10,0x00,0x0A,0x03, /* 00000960 "........" */
+ 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04, /* 00000968 "LNKD...." */
+ 0x0C,0xFF,0xFF,0x11,0x00,0x00,0x4C,0x4E, /* 00000970 "......LN" */
+ 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000978 "KB......" */
+ 0xFF,0x11,0x00,0x01,0x4C,0x4E,0x4B,0x43, /* 00000980 "....LNKC" */
+ 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x11, /* 00000988 "........" */
+ 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44,0x00, /* 00000990 "...LNKD." */
+ 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x11,0x00, /* 00000998 "........" */
+ 0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 000009A0 "..LNKA.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x12,0x00,0x00, /* 000009A8 "........" */
+ 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 000009B0 "LNKC...." */
+ 0x0C,0xFF,0xFF,0x12,0x00,0x01,0x4C,0x4E, /* 000009B8 "......LN" */
+ 0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 000009C0 "KD......" */
+ 0xFF,0x12,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 000009C8 ".....LNK" */
+ 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 000009D0 "A......." */
+ 0x12,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x42, /* 000009D8 "....LNKB" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x13, /* 000009E0 "........" */
+ 0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 000009E8 "..LNKD.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x13,0x00,0x01, /* 000009F0 "........" */
+ 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04, /* 000009F8 "LNKA...." */
+ 0x0C,0xFF,0xFF,0x13,0x00,0x0A,0x02,0x4C, /* 00000A00 ".......L" */
+ 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 00000A08 "NKB....." */
+ 0xFF,0xFF,0x13,0x00,0x0A,0x03,0x4C,0x4E, /* 00000A10 "......LN" */
+ 0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000A18 "KC......" */
+ 0xFF,0x14,0x00,0x00,0x4C,0x4E,0x4B,0x41, /* 00000A20 "....LNKA" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x14, /* 00000A28 "........" */
+ 0x00,0x01,0x4C,0x4E,0x4B,0x42,0x00,0x12, /* 00000A30 "..LNKB.." */
+ 0x0E,0x04,0x0C,0xFF,0xFF,0x14,0x00,0x0A, /* 00000A38 "........" */
+ 0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 00000A40 ".LNKC..." */
+ 0x04,0x0C,0xFF,0xFF,0x14,0x00,0x0A,0x03, /* 00000A48 "........" */
+ 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04, /* 00000A50 "LNKD...." */
+ 0x0C,0xFF,0xFF,0x15,0x00,0x00,0x4C,0x4E, /* 00000A58 "......LN" */
+ 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000A60 "KB......" */
+ 0xFF,0x15,0x00,0x01,0x4C,0x4E,0x4B,0x43, /* 00000A68 "....LNKC" */
+ 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x15, /* 00000A70 "........" */
+ 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44,0x00, /* 00000A78 "...LNKD." */
+ 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x15,0x00, /* 00000A80 "........" */
+ 0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 00000A88 "..LNKA.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x16,0x00,0x00, /* 00000A90 "........" */
+ 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 00000A98 "LNKC...." */
+ 0x0C,0xFF,0xFF,0x16,0x00,0x01,0x4C,0x4E, /* 00000AA0 "......LN" */
+ 0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000AA8 "KD......" */
+ 0xFF,0x16,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 00000AB0 ".....LNK" */
+ 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000AB8 "A......." */
+ 0x16,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x42, /* 00000AC0 "....LNKB" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x17, /* 00000AC8 "........" */
+ 0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 00000AD0 "..LNKD.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x17,0x00,0x01, /* 00000AD8 "........" */
+ 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04, /* 00000AE0 "LNKA...." */
+ 0x0C,0xFF,0xFF,0x17,0x00,0x0A,0x02,0x4C, /* 00000AE8 ".......L" */
+ 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 00000AF0 "NKB....." */
+ 0xFF,0xFF,0x17,0x00,0x0A,0x03,0x4C,0x4E, /* 00000AF8 "......LN" */
+ 0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000B00 "KC......" */
+ 0xFF,0x18,0x00,0x00,0x4C,0x4E,0x4B,0x41, /* 00000B08 "....LNKA" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x18, /* 00000B10 "........" */
+ 0x00,0x01,0x4C,0x4E,0x4B,0x42,0x00,0x12, /* 00000B18 "..LNKB.." */
+ 0x0E,0x04,0x0C,0xFF,0xFF,0x18,0x00,0x0A, /* 00000B20 "........" */
+ 0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 00000B28 ".LNKC..." */
+ 0x04,0x0C,0xFF,0xFF,0x18,0x00,0x0A,0x03, /* 00000B30 "........" */
+ 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04, /* 00000B38 "LNKD...." */
+ 0x0C,0xFF,0xFF,0x19,0x00,0x00,0x4C,0x4E, /* 00000B40 "......LN" */
+ 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000B48 "KB......" */
+ 0xFF,0x19,0x00,0x01,0x4C,0x4E,0x4B,0x43, /* 00000B50 "....LNKC" */
+ 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x19, /* 00000B58 "........" */
+ 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44,0x00, /* 00000B60 "...LNKD." */
+ 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x19,0x00, /* 00000B68 "........" */
+ 0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 00000B70 "..LNKA.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x1A,0x00,0x00, /* 00000B78 "........" */
+ 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 00000B80 "LNKC...." */
+ 0x0C,0xFF,0xFF,0x1A,0x00,0x01,0x4C,0x4E, /* 00000B88 "......LN" */
+ 0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000B90 "KD......" */
+ 0xFF,0x1A,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 00000B98 ".....LNK" */
+ 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000BA0 "A......." */
+ 0x1A,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x42, /* 00000BA8 "....LNKB" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x1B, /* 00000BB0 "........" */
+ 0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 00000BB8 "..LNKD.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x1B,0x00,0x01, /* 00000BC0 "........" */
+ 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04, /* 00000BC8 "LNKA...." */
+ 0x0C,0xFF,0xFF,0x1B,0x00,0x0A,0x02,0x4C, /* 00000BD0 ".......L" */
+ 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 00000BD8 "NKB....." */
+ 0xFF,0xFF,0x1B,0x00,0x0A,0x03,0x4C,0x4E, /* 00000BE0 "......LN" */
+ 0x4B,0x43,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000BE8 "KC......" */
+ 0xFF,0x1C,0x00,0x00,0x4C,0x4E,0x4B,0x41, /* 00000BF0 "....LNKA" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x1C, /* 00000BF8 "........" */
+ 0x00,0x01,0x4C,0x4E,0x4B,0x42,0x00,0x12, /* 00000C00 "..LNKB.." */
+ 0x0E,0x04,0x0C,0xFF,0xFF,0x1C,0x00,0x0A, /* 00000C08 "........" */
+ 0x02,0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0E, /* 00000C10 ".LNKC..." */
+ 0x04,0x0C,0xFF,0xFF,0x1C,0x00,0x0A,0x03, /* 00000C18 "........" */
+ 0x4C,0x4E,0x4B,0x44,0x00,0x12,0x0D,0x04, /* 00000C20 "LNKD...." */
+ 0x0C,0xFF,0xFF,0x1D,0x00,0x00,0x4C,0x4E, /* 00000C28 "......LN" */
+ 0x4B,0x42,0x00,0x12,0x0D,0x04,0x0C,0xFF, /* 00000C30 "KB......" */
+ 0xFF,0x1D,0x00,0x01,0x4C,0x4E,0x4B,0x43, /* 00000C38 "....LNKC" */
+ 0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x1D, /* 00000C40 "........" */
+ 0x00,0x0A,0x02,0x4C,0x4E,0x4B,0x44,0x00, /* 00000C48 "...LNKD." */
+ 0x12,0x0E,0x04,0x0C,0xFF,0xFF,0x1D,0x00, /* 00000C50 "........" */
+ 0x0A,0x03,0x4C,0x4E,0x4B,0x41,0x00,0x12, /* 00000C58 "..LNKA.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x1E,0x00,0x00, /* 00000C60 "........" */
+ 0x4C,0x4E,0x4B,0x43,0x00,0x12,0x0D,0x04, /* 00000C68 "LNKC...." */
+ 0x0C,0xFF,0xFF,0x1E,0x00,0x01,0x4C,0x4E, /* 00000C70 "......LN" */
+ 0x4B,0x44,0x00,0x12,0x0E,0x04,0x0C,0xFF, /* 00000C78 "KD......" */
+ 0xFF,0x1E,0x00,0x0A,0x02,0x4C,0x4E,0x4B, /* 00000C80 ".....LNK" */
+ 0x41,0x00,0x12,0x0E,0x04,0x0C,0xFF,0xFF, /* 00000C88 "A......." */
+ 0x1E,0x00,0x0A,0x03,0x4C,0x4E,0x4B,0x42, /* 00000C90 "....LNKB" */
+ 0x00,0x12,0x0D,0x04,0x0C,0xFF,0xFF,0x1F, /* 00000C98 "........" */
+ 0x00,0x00,0x4C,0x4E,0x4B,0x44,0x00,0x12, /* 00000CA0 "..LNKD.." */
+ 0x0D,0x04,0x0C,0xFF,0xFF,0x1F,0x00,0x01, /* 00000CA8 "........" */
+ 0x4C,0x4E,0x4B,0x41,0x00,0x12,0x0E,0x04, /* 00000CB0 "LNKA...." */
+ 0x0C,0xFF,0xFF,0x1F,0x00,0x0A,0x02,0x4C, /* 00000CB8 ".......L" */
+ 0x4E,0x4B,0x42,0x00,0x12,0x0E,0x04,0x0C, /* 00000CC0 "NKB....." */
+ 0xFF,0xFF,0x1F,0x00,0x0A,0x03,0x4C,0x4E, /* 00000CC8 "......LN" */
+ 0x4B,0x43,0x00,0x08,0x50,0x52,0x54,0x41, /* 00000CD0 "KC..PRTA" */
+ 0x12,0x41,0x61,0x7C,0x12,0x0B,0x04,0x0C, /* 00000CD8 ".Aa|...." */
+ 0xFF,0xFF,0x01,0x00,0x00,0x00,0x0A,0x14, /* 00000CE0 "........" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x01,0x00, /* 00000CE8 "........" */
+ 0x01,0x00,0x0A,0x15,0x12,0x0C,0x04,0x0C, /* 00000CF0 "........" */
+ 0xFF,0xFF,0x01,0x00,0x0A,0x02,0x00,0x0A, /* 00000CF8 "........" */
+ 0x16,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x01, /* 00000D00 "........" */
+ 0x00,0x0A,0x03,0x00,0x0A,0x17,0x12,0x0B, /* 00000D08 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x02,0x00,0x00,0x00, /* 00000D10 "........" */
+ 0x0A,0x18,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000D18 "........" */
+ 0x02,0x00,0x01,0x00,0x0A,0x19,0x12,0x0C, /* 00000D20 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x02,0x00,0x0A,0x02, /* 00000D28 "........" */
+ 0x00,0x0A,0x1A,0x12,0x0C,0x04,0x0C,0xFF, /* 00000D30 "........" */
+ 0xFF,0x02,0x00,0x0A,0x03,0x00,0x0A,0x1B, /* 00000D38 "........" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x03,0x00, /* 00000D40 "........" */
+ 0x00,0x00,0x0A,0x1C,0x12,0x0B,0x04,0x0C, /* 00000D48 "........" */
+ 0xFF,0xFF,0x03,0x00,0x01,0x00,0x0A,0x1D, /* 00000D50 "........" */
+ 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x03,0x00, /* 00000D58 "........" */
+ 0x0A,0x02,0x00,0x0A,0x1E,0x12,0x0C,0x04, /* 00000D60 "........" */
+ 0x0C,0xFF,0xFF,0x03,0x00,0x0A,0x03,0x00, /* 00000D68 "........" */
+ 0x0A,0x1F,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000D70 "........" */
+ 0x04,0x00,0x00,0x00,0x0A,0x20,0x12,0x0B, /* 00000D78 "..... .." */
+ 0x04,0x0C,0xFF,0xFF,0x04,0x00,0x01,0x00, /* 00000D80 "........" */
+ 0x0A,0x21,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000D88 ".!......" */
+ 0x04,0x00,0x0A,0x02,0x00,0x0A,0x22,0x12, /* 00000D90 "......"." */
+ 0x0C,0x04,0x0C,0xFF,0xFF,0x04,0x00,0x0A, /* 00000D98 "........" */
+ 0x03,0x00,0x0A,0x23,0x12,0x0B,0x04,0x0C, /* 00000DA0 "...#...." */
+ 0xFF,0xFF,0x05,0x00,0x00,0x00,0x0A,0x24, /* 00000DA8 ".......$" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x05,0x00, /* 00000DB0 "........" */
+ 0x01,0x00,0x0A,0x25,0x12,0x0C,0x04,0x0C, /* 00000DB8 "...%...." */
+ 0xFF,0xFF,0x05,0x00,0x0A,0x02,0x00,0x0A, /* 00000DC0 "........" */
+ 0x26,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x05, /* 00000DC8 "&......." */
+ 0x00,0x0A,0x03,0x00,0x0A,0x27,0x12,0x0B, /* 00000DD0 ".....'.." */
+ 0x04,0x0C,0xFF,0xFF,0x06,0x00,0x00,0x00, /* 00000DD8 "........" */
+ 0x0A,0x28,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000DE0 ".(......" */
+ 0x06,0x00,0x01,0x00,0x0A,0x29,0x12,0x0C, /* 00000DE8 ".....).." */
+ 0x04,0x0C,0xFF,0xFF,0x06,0x00,0x0A,0x02, /* 00000DF0 "........" */
+ 0x00,0x0A,0x2A,0x12,0x0C,0x04,0x0C,0xFF, /* 00000DF8 "..*....." */
+ 0xFF,0x06,0x00,0x0A,0x03,0x00,0x0A,0x2B, /* 00000E00 ".......+" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x07,0x00, /* 00000E08 "........" */
+ 0x00,0x00,0x0A,0x2C,0x12,0x0B,0x04,0x0C, /* 00000E10 "...,...." */
+ 0xFF,0xFF,0x07,0x00,0x01,0x00,0x0A,0x2D, /* 00000E18 ".......-" */
+ 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x07,0x00, /* 00000E20 "........" */
+ 0x0A,0x02,0x00,0x0A,0x2E,0x12,0x0C,0x04, /* 00000E28 "........" */
+ 0x0C,0xFF,0xFF,0x07,0x00,0x0A,0x03,0x00, /* 00000E30 "........" */
+ 0x0A,0x2F,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000E38 "./......" */
+ 0x08,0x00,0x00,0x00,0x0A,0x11,0x12,0x0B, /* 00000E40 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x08,0x00,0x01,0x00, /* 00000E48 "........" */
+ 0x0A,0x12,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000E50 "........" */
+ 0x08,0x00,0x0A,0x02,0x00,0x0A,0x13,0x12, /* 00000E58 "........" */
+ 0x0C,0x04,0x0C,0xFF,0xFF,0x08,0x00,0x0A, /* 00000E60 "........" */
+ 0x03,0x00,0x0A,0x14,0x12,0x0B,0x04,0x0C, /* 00000E68 "........" */
+ 0xFF,0xFF,0x09,0x00,0x00,0x00,0x0A,0x15, /* 00000E70 "........" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x09,0x00, /* 00000E78 "........" */
+ 0x01,0x00,0x0A,0x16,0x12,0x0C,0x04,0x0C, /* 00000E80 "........" */
+ 0xFF,0xFF,0x09,0x00,0x0A,0x02,0x00,0x0A, /* 00000E88 "........" */
+ 0x17,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x09, /* 00000E90 "........" */
+ 0x00,0x0A,0x03,0x00,0x0A,0x18,0x12,0x0B, /* 00000E98 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x0A,0x00,0x00,0x00, /* 00000EA0 "........" */
+ 0x0A,0x19,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000EA8 "........" */
+ 0x0A,0x00,0x01,0x00,0x0A,0x1A,0x12,0x0C, /* 00000EB0 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x0A,0x00,0x0A,0x02, /* 00000EB8 "........" */
+ 0x00,0x0A,0x1B,0x12,0x0C,0x04,0x0C,0xFF, /* 00000EC0 "........" */
+ 0xFF,0x0A,0x00,0x0A,0x03,0x00,0x0A,0x1C, /* 00000EC8 "........" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0B,0x00, /* 00000ED0 "........" */
+ 0x00,0x00,0x0A,0x1D,0x12,0x0B,0x04,0x0C, /* 00000ED8 "........" */
+ 0xFF,0xFF,0x0B,0x00,0x01,0x00,0x0A,0x1E, /* 00000EE0 "........" */
+ 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0B,0x00, /* 00000EE8 "........" */
+ 0x0A,0x02,0x00,0x0A,0x1F,0x12,0x0C,0x04, /* 00000EF0 "........" */
+ 0x0C,0xFF,0xFF,0x0B,0x00,0x0A,0x03,0x00, /* 00000EF8 "........" */
+ 0x0A,0x20,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000F00 ". ......" */
+ 0x0C,0x00,0x00,0x00,0x0A,0x21,0x12,0x0B, /* 00000F08 ".....!.." */
+ 0x04,0x0C,0xFF,0xFF,0x0C,0x00,0x01,0x00, /* 00000F10 "........" */
+ 0x0A,0x22,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000F18 "."......" */
+ 0x0C,0x00,0x0A,0x02,0x00,0x0A,0x23,0x12, /* 00000F20 "......#." */
+ 0x0C,0x04,0x0C,0xFF,0xFF,0x0C,0x00,0x0A, /* 00000F28 "........" */
+ 0x03,0x00,0x0A,0x24,0x12,0x0B,0x04,0x0C, /* 00000F30 "...$...." */
+ 0xFF,0xFF,0x0D,0x00,0x00,0x00,0x0A,0x25, /* 00000F38 ".......%" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0D,0x00, /* 00000F40 "........" */
+ 0x01,0x00,0x0A,0x26,0x12,0x0C,0x04,0x0C, /* 00000F48 "...&...." */
+ 0xFF,0xFF,0x0D,0x00,0x0A,0x02,0x00,0x0A, /* 00000F50 "........" */
+ 0x27,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0D, /* 00000F58 "'......." */
+ 0x00,0x0A,0x03,0x00,0x0A,0x28,0x12,0x0B, /* 00000F60 ".....(.." */
+ 0x04,0x0C,0xFF,0xFF,0x0E,0x00,0x00,0x00, /* 00000F68 "........" */
+ 0x0A,0x29,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000F70 ".)......" */
+ 0x0E,0x00,0x01,0x00,0x0A,0x2A,0x12,0x0C, /* 00000F78 ".....*.." */
+ 0x04,0x0C,0xFF,0xFF,0x0E,0x00,0x0A,0x02, /* 00000F80 "........" */
+ 0x00,0x0A,0x2B,0x12,0x0C,0x04,0x0C,0xFF, /* 00000F88 "..+....." */
+ 0xFF,0x0E,0x00,0x0A,0x03,0x00,0x0A,0x2C, /* 00000F90 ".......," */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x0F,0x00, /* 00000F98 "........" */
+ 0x00,0x00,0x0A,0x2D,0x12,0x0B,0x04,0x0C, /* 00000FA0 "...-...." */
+ 0xFF,0xFF,0x0F,0x00,0x01,0x00,0x0A,0x2E, /* 00000FA8 "........" */
+ 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x0F,0x00, /* 00000FB0 "........" */
+ 0x0A,0x02,0x00,0x0A,0x2F,0x12,0x0C,0x04, /* 00000FB8 "..../..." */
+ 0x0C,0xFF,0xFF,0x0F,0x00,0x0A,0x03,0x00, /* 00000FC0 "........" */
+ 0x0A,0x10,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00000FC8 "........" */
+ 0x10,0x00,0x00,0x00,0x0A,0x12,0x12,0x0B, /* 00000FD0 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x10,0x00,0x01,0x00, /* 00000FD8 "........" */
+ 0x0A,0x13,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00000FE0 "........" */
+ 0x10,0x00,0x0A,0x02,0x00,0x0A,0x14,0x12, /* 00000FE8 "........" */
+ 0x0C,0x04,0x0C,0xFF,0xFF,0x10,0x00,0x0A, /* 00000FF0 "........" */
+ 0x03,0x00,0x0A,0x15,0x12,0x0B,0x04,0x0C, /* 00000FF8 "........" */
+ 0xFF,0xFF,0x11,0x00,0x00,0x00,0x0A,0x16, /* 00001000 "........" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x11,0x00, /* 00001008 "........" */
+ 0x01,0x00,0x0A,0x17,0x12,0x0C,0x04,0x0C, /* 00001010 "........" */
+ 0xFF,0xFF,0x11,0x00,0x0A,0x02,0x00,0x0A, /* 00001018 "........" */
+ 0x18,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x11, /* 00001020 "........" */
+ 0x00,0x0A,0x03,0x00,0x0A,0x19,0x12,0x0B, /* 00001028 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x12,0x00,0x00,0x00, /* 00001030 "........" */
+ 0x0A,0x1A,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00001038 "........" */
+ 0x12,0x00,0x01,0x00,0x0A,0x1B,0x12,0x0C, /* 00001040 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x12,0x00,0x0A,0x02, /* 00001048 "........" */
+ 0x00,0x0A,0x1C,0x12,0x0C,0x04,0x0C,0xFF, /* 00001050 "........" */
+ 0xFF,0x12,0x00,0x0A,0x03,0x00,0x0A,0x1D, /* 00001058 "........" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x13,0x00, /* 00001060 "........" */
+ 0x00,0x00,0x0A,0x1E,0x12,0x0B,0x04,0x0C, /* 00001068 "........" */
+ 0xFF,0xFF,0x13,0x00,0x01,0x00,0x0A,0x1F, /* 00001070 "........" */
+ 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x13,0x00, /* 00001078 "........" */
+ 0x0A,0x02,0x00,0x0A,0x20,0x12,0x0C,0x04, /* 00001080 ".... ..." */
+ 0x0C,0xFF,0xFF,0x13,0x00,0x0A,0x03,0x00, /* 00001088 "........" */
+ 0x0A,0x21,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00001090 ".!......" */
+ 0x14,0x00,0x00,0x00,0x0A,0x22,0x12,0x0B, /* 00001098 ".....".." */
+ 0x04,0x0C,0xFF,0xFF,0x14,0x00,0x01,0x00, /* 000010A0 "........" */
+ 0x0A,0x23,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 000010A8 ".#......" */
+ 0x14,0x00,0x0A,0x02,0x00,0x0A,0x24,0x12, /* 000010B0 "......$." */
+ 0x0C,0x04,0x0C,0xFF,0xFF,0x14,0x00,0x0A, /* 000010B8 "........" */
+ 0x03,0x00,0x0A,0x25,0x12,0x0B,0x04,0x0C, /* 000010C0 "...%...." */
+ 0xFF,0xFF,0x15,0x00,0x00,0x00,0x0A,0x26, /* 000010C8 ".......&" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x15,0x00, /* 000010D0 "........" */
+ 0x01,0x00,0x0A,0x27,0x12,0x0C,0x04,0x0C, /* 000010D8 "...'...." */
+ 0xFF,0xFF,0x15,0x00,0x0A,0x02,0x00,0x0A, /* 000010E0 "........" */
+ 0x28,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x15, /* 000010E8 "(......." */
+ 0x00,0x0A,0x03,0x00,0x0A,0x29,0x12,0x0B, /* 000010F0 ".....).." */
+ 0x04,0x0C,0xFF,0xFF,0x16,0x00,0x00,0x00, /* 000010F8 "........" */
+ 0x0A,0x2A,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00001100 ".*......" */
+ 0x16,0x00,0x01,0x00,0x0A,0x2B,0x12,0x0C, /* 00001108 ".....+.." */
+ 0x04,0x0C,0xFF,0xFF,0x16,0x00,0x0A,0x02, /* 00001110 "........" */
+ 0x00,0x0A,0x2C,0x12,0x0C,0x04,0x0C,0xFF, /* 00001118 "..,....." */
+ 0xFF,0x16,0x00,0x0A,0x03,0x00,0x0A,0x2D, /* 00001120 ".......-" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x17,0x00, /* 00001128 "........" */
+ 0x00,0x00,0x0A,0x2E,0x12,0x0B,0x04,0x0C, /* 00001130 "........" */
+ 0xFF,0xFF,0x17,0x00,0x01,0x00,0x0A,0x2F, /* 00001138 "......./" */
+ 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x17,0x00, /* 00001140 "........" */
+ 0x0A,0x02,0x00,0x0A,0x10,0x12,0x0C,0x04, /* 00001148 "........" */
+ 0x0C,0xFF,0xFF,0x17,0x00,0x0A,0x03,0x00, /* 00001150 "........" */
+ 0x0A,0x11,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00001158 "........" */
+ 0x18,0x00,0x00,0x00,0x0A,0x13,0x12,0x0B, /* 00001160 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x18,0x00,0x01,0x00, /* 00001168 "........" */
+ 0x0A,0x14,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00001170 "........" */
+ 0x18,0x00,0x0A,0x02,0x00,0x0A,0x15,0x12, /* 00001178 "........" */
+ 0x0C,0x04,0x0C,0xFF,0xFF,0x18,0x00,0x0A, /* 00001180 "........" */
+ 0x03,0x00,0x0A,0x16,0x12,0x0B,0x04,0x0C, /* 00001188 "........" */
+ 0xFF,0xFF,0x19,0x00,0x00,0x00,0x0A,0x17, /* 00001190 "........" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x19,0x00, /* 00001198 "........" */
+ 0x01,0x00,0x0A,0x18,0x12,0x0C,0x04,0x0C, /* 000011A0 "........" */
+ 0xFF,0xFF,0x19,0x00,0x0A,0x02,0x00,0x0A, /* 000011A8 "........" */
+ 0x19,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x19, /* 000011B0 "........" */
+ 0x00,0x0A,0x03,0x00,0x0A,0x1A,0x12,0x0B, /* 000011B8 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x1A,0x00,0x00,0x00, /* 000011C0 "........" */
+ 0x0A,0x1B,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 000011C8 "........" */
+ 0x1A,0x00,0x01,0x00,0x0A,0x1C,0x12,0x0C, /* 000011D0 "........" */
+ 0x04,0x0C,0xFF,0xFF,0x1A,0x00,0x0A,0x02, /* 000011D8 "........" */
+ 0x00,0x0A,0x1D,0x12,0x0C,0x04,0x0C,0xFF, /* 000011E0 "........" */
+ 0xFF,0x1A,0x00,0x0A,0x03,0x00,0x0A,0x1E, /* 000011E8 "........" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x1B,0x00, /* 000011F0 "........" */
+ 0x00,0x00,0x0A,0x1F,0x12,0x0B,0x04,0x0C, /* 000011F8 "........" */
+ 0xFF,0xFF,0x1B,0x00,0x01,0x00,0x0A,0x20, /* 00001200 "....... " */
+ 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x1B,0x00, /* 00001208 "........" */
+ 0x0A,0x02,0x00,0x0A,0x21,0x12,0x0C,0x04, /* 00001210 "....!..." */
+ 0x0C,0xFF,0xFF,0x1B,0x00,0x0A,0x03,0x00, /* 00001218 "........" */
+ 0x0A,0x22,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00001220 "."......" */
+ 0x1C,0x00,0x00,0x00,0x0A,0x23,0x12,0x0B, /* 00001228 ".....#.." */
+ 0x04,0x0C,0xFF,0xFF,0x1C,0x00,0x01,0x00, /* 00001230 "........" */
+ 0x0A,0x24,0x12,0x0C,0x04,0x0C,0xFF,0xFF, /* 00001238 ".$......" */
+ 0x1C,0x00,0x0A,0x02,0x00,0x0A,0x25,0x12, /* 00001240 "......%." */
+ 0x0C,0x04,0x0C,0xFF,0xFF,0x1C,0x00,0x0A, /* 00001248 "........" */
+ 0x03,0x00,0x0A,0x26,0x12,0x0B,0x04,0x0C, /* 00001250 "...&...." */
+ 0xFF,0xFF,0x1D,0x00,0x00,0x00,0x0A,0x27, /* 00001258 ".......'" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x1D,0x00, /* 00001260 "........" */
+ 0x01,0x00,0x0A,0x28,0x12,0x0C,0x04,0x0C, /* 00001268 "...(...." */
+ 0xFF,0xFF,0x1D,0x00,0x0A,0x02,0x00,0x0A, /* 00001270 "........" */
+ 0x29,0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x1D, /* 00001278 ")......." */
+ 0x00,0x0A,0x03,0x00,0x0A,0x2A,0x12,0x0B, /* 00001280 ".....*.." */
+ 0x04,0x0C,0xFF,0xFF,0x1E,0x00,0x00,0x00, /* 00001288 "........" */
+ 0x0A,0x2B,0x12,0x0B,0x04,0x0C,0xFF,0xFF, /* 00001290 ".+......" */
+ 0x1E,0x00,0x01,0x00,0x0A,0x2C,0x12,0x0C, /* 00001298 ".....,.." */
+ 0x04,0x0C,0xFF,0xFF,0x1E,0x00,0x0A,0x02, /* 000012A0 "........" */
+ 0x00,0x0A,0x2D,0x12,0x0C,0x04,0x0C,0xFF, /* 000012A8 "..-....." */
+ 0xFF,0x1E,0x00,0x0A,0x03,0x00,0x0A,0x2E, /* 000012B0 "........" */
+ 0x12,0x0B,0x04,0x0C,0xFF,0xFF,0x1F,0x00, /* 000012B8 "........" */
+ 0x00,0x00,0x0A,0x2F,0x12,0x0B,0x04,0x0C, /* 000012C0 ".../...." */
+ 0xFF,0xFF,0x1F,0x00,0x01,0x00,0x0A,0x10, /* 000012C8 "........" */
+ 0x12,0x0C,0x04,0x0C,0xFF,0xFF,0x1F,0x00, /* 000012D0 "........" */
+ 0x0A,0x02,0x00,0x0A,0x11,0x12,0x0C,0x04, /* 000012D8 "........" */
+ 0x0C,0xFF,0xFF,0x1F,0x00,0x0A,0x03,0x00, /* 000012E0 "........" */
+ 0x0A,0x12,0x5B,0x82,0x46,0x37,0x49,0x53, /* 000012E8 "..[.F7IS" */
+ 0x41,0x5F,0x08,0x5F,0x41,0x44,0x52,0x0C, /* 000012F0 "A_._ADR." */
+ 0x00,0x00,0x01,0x00,0x5B,0x80,0x50,0x49, /* 000012F8 "....[.PI" */
+ 0x52,0x51,0x02,0x0A,0x60,0x0A,0x04,0x10, /* 00001300 "RQ..`..." */
+ 0x2E,0x5C,0x00,0x5B,0x81,0x29,0x5C,0x2F, /* 00001308 ".\.[.)\/" */
+ 0x04,0x5F,0x53,0x42,0x5F,0x50,0x43,0x49, /* 00001310 "._SB_PCI" */
+ 0x30,0x49,0x53,0x41,0x5F,0x50,0x49,0x52, /* 00001318 "0ISA_PIR" */
+ 0x51,0x01,0x50,0x49,0x52,0x41,0x08,0x50, /* 00001320 "Q.PIRA.P" */
+ 0x49,0x52,0x42,0x08,0x50,0x49,0x52,0x43, /* 00001328 "IRB.PIRC" */
+ 0x08,0x50,0x49,0x52,0x44,0x08,0x5B,0x82, /* 00001330 ".PIRD.[." */
+ 0x46,0x0B,0x53,0x59,0x53,0x52,0x08,0x5F, /* 00001338 "F.SYSR._" */
+ 0x48,0x49,0x44,0x0C,0x41,0xD0,0x0C,0x02, /* 00001340 "HID.A..." */
+ 0x08,0x5F,0x55,0x49,0x44,0x01,0x08,0x43, /* 00001348 "._UID..C" */
+ 0x52,0x53,0x5F,0x11,0x4E,0x08,0x0A,0x8A, /* 00001350 "RS_.N..." */
+ 0x47,0x01,0x10,0x00,0x10,0x00,0x00,0x10, /* 00001358 "G......." */
+ 0x47,0x01,0x22,0x00,0x22,0x00,0x00,0x0C, /* 00001360 "G."."..." */
+ 0x47,0x01,0x30,0x00,0x30,0x00,0x00,0x10, /* 00001368 "G.0.0..." */
+ 0x47,0x01,0x44,0x00,0x44,0x00,0x00,0x1C, /* 00001370 "G.D.D..." */
+ 0x47,0x01,0x62,0x00,0x62,0x00,0x00,0x02, /* 00001378 "G.b.b..." */
+ 0x47,0x01,0x65,0x00,0x65,0x00,0x00,0x0B, /* 00001380 "G.e.e..." */
+ 0x47,0x01,0x72,0x00,0x72,0x00,0x00,0x0E, /* 00001388 "G.r.r..." */
+ 0x47,0x01,0x80,0x00,0x80,0x00,0x00,0x01, /* 00001390 "G......." */
+ 0x47,0x01,0x84,0x00,0x84,0x00,0x00,0x03, /* 00001398 "G......." */
+ 0x47,0x01,0x88,0x00,0x88,0x00,0x00,0x01, /* 000013A0 "G......." */
+ 0x47,0x01,0x8C,0x00,0x8C,0x00,0x00,0x03, /* 000013A8 "G......." */
+ 0x47,0x01,0x90,0x00,0x90,0x00,0x00,0x10, /* 000013B0 "G......." */
+ 0x47,0x01,0xA2,0x00,0xA2,0x00,0x00,0x1C, /* 000013B8 "G......." */
+ 0x47,0x01,0xE0,0x00,0xE0,0x00,0x00,0x10, /* 000013C0 "G......." */
+ 0x47,0x01,0xA0,0x08,0xA0,0x08,0x00,0x04, /* 000013C8 "G......." */
+ 0x47,0x01,0xC0,0x0C,0xC0,0x0C,0x00,0x10, /* 000013D0 "G......." */
+ 0x47,0x01,0xD0,0x04,0xD0,0x04,0x00,0x02, /* 000013D8 "G......." */
+ 0x79,0x00,0x14,0x0B,0x5F,0x43,0x52,0x53, /* 000013E0 "y..._CRS" */
+ 0x00,0xA4,0x43,0x52,0x53,0x5F,0x5B,0x82, /* 000013E8 "..CRS_[." */
+ 0x2B,0x50,0x49,0x43,0x5F,0x08,0x5F,0x48, /* 000013F0 "+PIC_._H" */
+ 0x49,0x44,0x0B,0x41,0xD0,0x08,0x5F,0x43, /* 000013F8 "ID.A.._C" */
+ 0x52,0x53,0x11,0x18,0x0A,0x15,0x47,0x01, /* 00001400 "RS....G." */
+ 0x20,0x00,0x20,0x00,0x01,0x02,0x47,0x01, /* 00001408 " . ...G." */
+ 0xA0,0x00,0xA0,0x00,0x01,0x02,0x22,0x04, /* 00001410 "......"." */
+ 0x00,0x79,0x00,0x5B,0x82,0x47,0x05,0x44, /* 00001418 ".y.[.G.D" */
+ 0x4D,0x41,0x30,0x08,0x5F,0x48,0x49,0x44, /* 00001420 "MA0._HID" */
+ 0x0C,0x41,0xD0,0x02,0x00,0x08,0x5F,0x43, /* 00001428 ".A...._C" */
+ 0x52,0x53,0x11,0x41,0x04,0x0A,0x3D,0x2A, /* 00001430 "RS.A..=*" */
+ 0x10,0x04,0x47,0x01,0x00,0x00,0x00,0x00, /* 00001438 "..G....." */
+ 0x00,0x10,0x47,0x01,0x81,0x00,0x81,0x00, /* 00001440 "..G....." */
+ 0x00,0x03,0x47,0x01,0x87,0x00,0x87,0x00, /* 00001448 "..G....." */
+ 0x00,0x01,0x47,0x01,0x89,0x00,0x89,0x00, /* 00001450 "..G....." */
+ 0x00,0x03,0x47,0x01,0x8F,0x00,0x8F,0x00, /* 00001458 "..G....." */
+ 0x00,0x01,0x47,0x01,0xC0,0x00,0xC0,0x00, /* 00001460 "..G....." */
+ 0x00,0x20,0x47,0x01,0x80,0x04,0x80,0x04, /* 00001468 ". G....." */
+ 0x00,0x10,0x79,0x00,0x5B,0x82,0x25,0x54, /* 00001470 "..y.[.%T" */
+ 0x4D,0x52,0x5F,0x08,0x5F,0x48,0x49,0x44, /* 00001478 "MR_._HID" */
+ 0x0C,0x41,0xD0,0x01,0x00,0x08,0x5F,0x43, /* 00001480 ".A...._C" */
+ 0x52,0x53,0x11,0x10,0x0A,0x0D,0x47,0x01, /* 00001488 "RS....G." */
+ 0x40,0x00,0x40,0x00,0x00,0x04,0x22,0x01, /* 00001490 "@.@..."." */
+ 0x00,0x79,0x00,0x5B,0x82,0x25,0x52,0x54, /* 00001498 ".y.[.%RT" */
+ 0x43,0x5F,0x08,0x5F,0x48,0x49,0x44,0x0C, /* 000014A0 "C_._HID." */
+ 0x41,0xD0,0x0B,0x00,0x08,0x5F,0x43,0x52, /* 000014A8 "A...._CR" */
+ 0x53,0x11,0x10,0x0A,0x0D,0x47,0x01,0x70, /* 000014B0 "S....G.p" */
+ 0x00,0x70,0x00,0x00,0x02,0x22,0x00,0x01, /* 000014B8 ".p...".." */
+ 0x79,0x00,0x5B,0x82,0x22,0x53,0x50,0x4B, /* 000014C0 "y.[."SPK" */
+ 0x52,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 000014C8 "R._HID.A" */
+ 0xD0,0x08,0x00,0x08,0x5F,0x43,0x52,0x53, /* 000014D0 "...._CRS" */
+ 0x11,0x0D,0x0A,0x0A,0x47,0x01,0x61,0x00, /* 000014D8 "....G.a." */
+ 0x61,0x00,0x00,0x01,0x79,0x00,0x5B,0x82, /* 000014E0 "a...y.[." */
+ 0x31,0x50,0x53,0x32,0x4D,0x08,0x5F,0x48, /* 000014E8 "1PS2M._H" */
+ 0x49,0x44,0x0C,0x41,0xD0,0x0F,0x13,0x08, /* 000014F0 "ID.A...." */
+ 0x5F,0x43,0x49,0x44,0x0C,0x41,0xD0,0x0F, /* 000014F8 "_CID.A.." */
+ 0x13,0x14,0x09,0x5F,0x53,0x54,0x41,0x00, /* 00001500 "..._STA." */
+ 0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53, /* 00001508 "...._CRS" */
+ 0x11,0x08,0x0A,0x05,0x22,0x00,0x10,0x79, /* 00001510 "...."..y" */
+ 0x00,0x5B,0x82,0x42,0x04,0x50,0x53,0x32, /* 00001518 ".[.B.PS2" */
+ 0x4B,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 00001520 "K._HID.A" */
+ 0xD0,0x03,0x03,0x08,0x5F,0x43,0x49,0x44, /* 00001528 "...._CID" */
+ 0x0C,0x41,0xD0,0x03,0x0B,0x14,0x09,0x5F, /* 00001530 ".A....._" */
+ 0x53,0x54,0x41,0x00,0xA4,0x0A,0x0F,0x08, /* 00001538 "STA....." */
+ 0x5F,0x43,0x52,0x53,0x11,0x18,0x0A,0x15, /* 00001540 "_CRS...." */
+ 0x47,0x01,0x60,0x00,0x60,0x00,0x00,0x01, /* 00001548 "G.`.`..." */
+ 0x47,0x01,0x64,0x00,0x64,0x00,0x00,0x01, /* 00001550 "G.d.d..." */
+ 0x22,0x02,0x00,0x79,0x00,0x5B,0x82,0x3A, /* 00001558 ""..y.[.:" */
+ 0x46,0x44,0x43,0x30,0x08,0x5F,0x48,0x49, /* 00001560 "FDC0._HI" */
+ 0x44,0x0C,0x41,0xD0,0x07,0x00,0x14,0x09, /* 00001568 "D.A....." */
+ 0x5F,0x53,0x54,0x41,0x00,0xA4,0x0A,0x0F, /* 00001570 "_STA...." */
+ 0x08,0x5F,0x43,0x52,0x53,0x11,0x1B,0x0A, /* 00001578 "._CRS..." */
+ 0x18,0x47,0x01,0xF0,0x03,0xF0,0x03,0x01, /* 00001580 ".G......" */
+ 0x06,0x47,0x01,0xF7,0x03,0xF7,0x03,0x01, /* 00001588 ".G......" */
+ 0x01,0x22,0x40,0x00,0x2A,0x04,0x00,0x79, /* 00001590 "."@.*..y" */
+ 0x00,0x5B,0x82,0x46,0x04,0x55,0x41,0x52, /* 00001598 ".[.F.UAR" */
+ 0x31,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 000015A0 "1._HID.A" */
+ 0xD0,0x05,0x01,0x08,0x5F,0x55,0x49,0x44, /* 000015A8 "...._UID" */
+ 0x01,0x14,0x19,0x5F,0x53,0x54,0x41,0x00, /* 000015B0 "..._STA." */
+ 0xA0,0x0D,0x93,0x5E,0x5E,0x5E,0x5E,0x55, /* 000015B8 "...^^^^U" */
+ 0x41,0x52,0x31,0x00,0xA4,0x00,0xA1,0x04, /* 000015C0 "AR1....." */
+ 0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52,0x53, /* 000015C8 "...._CRS" */
+ 0x11,0x10,0x0A,0x0D,0x47,0x01,0xF8,0x03, /* 000015D0 "....G..." */
+ 0xF8,0x03,0x08,0x08,0x22,0x10,0x00,0x79, /* 000015D8 "...."..y" */
+ 0x00,0x5B,0x82,0x47,0x04,0x55,0x41,0x52, /* 000015E0 ".[.G.UAR" */
+ 0x32,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 000015E8 "2._HID.A" */
+ 0xD0,0x05,0x01,0x08,0x5F,0x55,0x49,0x44, /* 000015F0 "...._UID" */
+ 0x0A,0x02,0x14,0x19,0x5F,0x53,0x54,0x41, /* 000015F8 "...._STA" */
+ 0x00,0xA0,0x0D,0x93,0x5E,0x5E,0x5E,0x5E, /* 00001600 "....^^^^" */
+ 0x55,0x41,0x52,0x32,0x00,0xA4,0x00,0xA1, /* 00001608 "UAR2...." */
+ 0x04,0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52, /* 00001610 "....._CR" */
+ 0x53,0x11,0x10,0x0A,0x0D,0x47,0x01,0xF8, /* 00001618 "S....G.." */
+ 0x02,0xF8,0x02,0x08,0x08,0x22,0x08,0x00, /* 00001620 ".....".." */
+ 0x79,0x00,0x5B,0x82,0x36,0x4C,0x54,0x50, /* 00001628 "y.[.6LTP" */
+ 0x31,0x08,0x5F,0x48,0x49,0x44,0x0C,0x41, /* 00001630 "1._HID.A" */
+ 0xD0,0x04,0x00,0x08,0x5F,0x55,0x49,0x44, /* 00001638 "...._UID" */
+ 0x0A,0x02,0x14,0x09,0x5F,0x53,0x54,0x41, /* 00001640 "...._STA" */
+ 0x00,0xA4,0x0A,0x0F,0x08,0x5F,0x43,0x52, /* 00001648 "....._CR" */
+ 0x53,0x11,0x10,0x0A,0x0D,0x47,0x01,0x78, /* 00001650 "S....G.x" */
+ 0x03,0x78,0x03,0x08,0x08,0x22,0x80,0x00, /* 00001658 ".x...".." */
+ 0x79,0x00,0x5B,0x82,0x49,0x0A,0x53,0x30, /* 00001660 "y.[.I.S0" */
+ 0x30,0x5F,0x08,0x5F,0x41,0x44,0x52,0x00, /* 00001668 "0_._ADR." */
+ 0x08,0x5F,0x53,0x55,0x4E,0x00,0x14,0x1F, /* 00001670 "._SUN..." */
+ 0x5F,0x50,0x53,0x30,0x00,0x70,0x00,0x5C, /* 00001678 "_PS0.p.\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001680 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00001688 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x1F, /* 00001690 "PEDPT2.." */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x00,0x5C, /* 00001698 "_PS3.p.\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 000016A0 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F,0x47, /* 000016A8 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x2B, /* 000016B0 "PEDPT2.+" */
+ 0x5F,0x45,0x4A,0x30,0x01,0x70,0x00,0x5C, /* 000016B8 "_EJ0.p.\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 000016C0 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x88,0x5C,0x2E,0x5F,0x47, /* 000016C8 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x70,0x01, /* 000016D0 "PEDPT2p." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x50,0x48, /* 000016D8 "\._GPEPH" */
+ 0x30,0x30,0x14,0x2A,0x5F,0x53,0x54,0x41, /* 000016E0 "00.*_STA" */
+ 0x00,0x70,0x00,0x5C,0x2E,0x5F,0x47,0x50, /* 000016E8 ".p.\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x89, /* 000016F0 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 000016F8 "\._GPEDP" */
+ 0x54,0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50, /* 00001700 "T2.\._GP" */
+ 0x45,0x50,0x48,0x30,0x30,0x5B,0x82,0x4D, /* 00001708 "EPH00[.M" */
+ 0x0A,0x53,0x30,0x31,0x5F,0x08,0x5F,0x41, /* 00001710 ".S01_._A" */
+ 0x44,0x52,0x0C,0x00,0x00,0x01,0x00,0x08, /* 00001718 "DR......" */
+ 0x5F,0x53,0x55,0x4E,0x01,0x14,0x1F,0x5F, /* 00001720 "_SUN..._" */
+ 0x50,0x53,0x30,0x00,0x70,0x01,0x5C,0x2E, /* 00001728 "PS0.p.\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00001730 "_GPEDPT1" */
+ 0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47,0x50, /* 00001738 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0x14,0x1F,0x5F, /* 00001740 "EDPT2.._" */
+ 0x50,0x53,0x33,0x00,0x70,0x01,0x5C,0x2E, /* 00001748 "PS3.p.\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00001750 "_GPEDPT1" */
+ 0x70,0x0A,0x83,0x5C,0x2E,0x5F,0x47,0x50, /* 00001758 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0x14,0x2B,0x5F, /* 00001760 "EDPT2.+_" */
+ 0x45,0x4A,0x30,0x01,0x70,0x01,0x5C,0x2E, /* 00001768 "EJ0.p.\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00001770 "_GPEDPT1" */
+ 0x70,0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50, /* 00001778 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0x70,0x01,0x5C, /* 00001780 "EDPT2p.\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x50,0x48,0x30, /* 00001788 "._GPEPH0" */
+ 0x31,0x14,0x2A,0x5F,0x53,0x54,0x41,0x00, /* 00001790 "1.*_STA." */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001798 "p.\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 000017A0 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 000017A8 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000017B0 "2.\._GPE" */
+ 0x50,0x48,0x30,0x31,0x5B,0x82,0x42,0x0B, /* 000017B8 "PH01[.B." */
+ 0x53,0x30,0x32,0x5F,0x08,0x5F,0x41,0x44, /* 000017C0 "S02_._AD" */
+ 0x52,0x0C,0x00,0x00,0x02,0x00,0x08,0x5F, /* 000017C8 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x02,0x14,0x20,0x5F, /* 000017D0 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x02,0x5C, /* 000017D8 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 000017E0 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 000017E8 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 000017F0 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x02, /* 000017F8 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001800 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00001808 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00001810 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00001818 ",_EJ0.p." */
+ 0x02,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001820 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00001828 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00001830 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001838 "p.\._GPE" */
+ 0x50,0x48,0x30,0x32,0x14,0x2B,0x5F,0x53, /* 00001840 "PH02.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x02,0x5C,0x2E, /* 00001848 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00001850 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00001858 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00001860 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x32, /* 00001868 "_GPEPH02" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x30,0x33,0x5F, /* 00001870 "[.B.S03_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00001878 "._ADR..." */
+ 0x03,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00001880 "..._SUN." */
+ 0x03,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00001888 ".. _PS0." */
+ 0x70,0x0A,0x03,0x5C,0x2E,0x5F,0x47,0x50, /* 00001890 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00001898 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 000018A0 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 000018A8 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x03,0x5C,0x2E,0x5F,0x47, /* 000018B0 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 000018B8 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 000018C0 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 000018C8 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x03,0x5C,0x2E,0x5F, /* 000018D0 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 000018D8 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000018E0 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 000018E8 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x33, /* 000018F0 "_GPEPH03" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 000018F8 ".+_STA.p" */
+ 0x0A,0x03,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001900 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00001908 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001910 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001918 "2.\._GPE" */
+ 0x50,0x48,0x30,0x33,0x5B,0x82,0x42,0x0B, /* 00001920 "PH03[.B." */
+ 0x53,0x30,0x34,0x5F,0x08,0x5F,0x41,0x44, /* 00001928 "S04_._AD" */
+ 0x52,0x0C,0x00,0x00,0x04,0x00,0x08,0x5F, /* 00001930 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x04,0x14,0x20,0x5F, /* 00001938 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x04,0x5C, /* 00001940 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001948 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00001950 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00001958 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x04, /* 00001960 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001968 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00001970 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00001978 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00001980 ",_EJ0.p." */
+ 0x04,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001988 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00001990 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00001998 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000019A0 "p.\._GPE" */
+ 0x50,0x48,0x30,0x34,0x14,0x2B,0x5F,0x53, /* 000019A8 "PH04.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x04,0x5C,0x2E, /* 000019B0 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 000019B8 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 000019C0 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 000019C8 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x34, /* 000019D0 "_GPEPH04" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x30,0x35,0x5F, /* 000019D8 "[.B.S05_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 000019E0 "._ADR..." */
+ 0x05,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 000019E8 "..._SUN." */
+ 0x05,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 000019F0 ".. _PS0." */
+ 0x70,0x0A,0x05,0x5C,0x2E,0x5F,0x47,0x50, /* 000019F8 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00001A00 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001A08 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00001A10 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x05,0x5C,0x2E,0x5F,0x47, /* 00001A18 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00001A20 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001A28 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00001A30 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x05,0x5C,0x2E,0x5F, /* 00001A38 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00001A40 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001A48 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00001A50 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x35, /* 00001A58 "_GPEPH05" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00001A60 ".+_STA.p" */
+ 0x0A,0x05,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001A68 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00001A70 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001A78 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001A80 "2.\._GPE" */
+ 0x50,0x48,0x30,0x35,0x5B,0x82,0x42,0x0B, /* 00001A88 "PH05[.B." */
+ 0x53,0x30,0x36,0x5F,0x08,0x5F,0x41,0x44, /* 00001A90 "S06_._AD" */
+ 0x52,0x0C,0x00,0x00,0x06,0x00,0x08,0x5F, /* 00001A98 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x06,0x14,0x20,0x5F, /* 00001AA0 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x06,0x5C, /* 00001AA8 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001AB0 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00001AB8 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00001AC0 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x06, /* 00001AC8 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001AD0 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00001AD8 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00001AE0 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00001AE8 ",_EJ0.p." */
+ 0x06,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001AF0 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00001AF8 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00001B00 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001B08 "p.\._GPE" */
+ 0x50,0x48,0x30,0x36,0x14,0x2B,0x5F,0x53, /* 00001B10 "PH06.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x06,0x5C,0x2E, /* 00001B18 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00001B20 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00001B28 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00001B30 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x36, /* 00001B38 "_GPEPH06" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x30,0x37,0x5F, /* 00001B40 "[.B.S07_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00001B48 "._ADR..." */
+ 0x07,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00001B50 "..._SUN." */
+ 0x07,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00001B58 ".. _PS0." */
+ 0x70,0x0A,0x07,0x5C,0x2E,0x5F,0x47,0x50, /* 00001B60 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00001B68 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001B70 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00001B78 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x07,0x5C,0x2E,0x5F,0x47, /* 00001B80 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00001B88 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001B90 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00001B98 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x07,0x5C,0x2E,0x5F, /* 00001BA0 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00001BA8 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001BB0 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00001BB8 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x37, /* 00001BC0 "_GPEPH07" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00001BC8 ".+_STA.p" */
+ 0x0A,0x07,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001BD0 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00001BD8 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001BE0 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001BE8 "2.\._GPE" */
+ 0x50,0x48,0x30,0x37,0x5B,0x82,0x42,0x0B, /* 00001BF0 "PH07[.B." */
+ 0x53,0x30,0x38,0x5F,0x08,0x5F,0x41,0x44, /* 00001BF8 "S08_._AD" */
+ 0x52,0x0C,0x00,0x00,0x08,0x00,0x08,0x5F, /* 00001C00 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x08,0x14,0x20,0x5F, /* 00001C08 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x08,0x5C, /* 00001C10 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001C18 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00001C20 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00001C28 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x08, /* 00001C30 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001C38 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00001C40 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00001C48 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00001C50 ",_EJ0.p." */
+ 0x08,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001C58 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00001C60 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00001C68 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001C70 "p.\._GPE" */
+ 0x50,0x48,0x30,0x38,0x14,0x2B,0x5F,0x53, /* 00001C78 "PH08.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x08,0x5C,0x2E, /* 00001C80 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00001C88 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00001C90 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00001C98 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x38, /* 00001CA0 "_GPEPH08" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x30,0x39,0x5F, /* 00001CA8 "[.B.S09_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00001CB0 "._ADR..." */
+ 0x09,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00001CB8 "..._SUN." */
+ 0x09,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00001CC0 ".. _PS0." */
+ 0x70,0x0A,0x09,0x5C,0x2E,0x5F,0x47,0x50, /* 00001CC8 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00001CD0 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001CD8 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00001CE0 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x09,0x5C,0x2E,0x5F,0x47, /* 00001CE8 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00001CF0 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001CF8 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00001D00 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x09,0x5C,0x2E,0x5F, /* 00001D08 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00001D10 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001D18 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00001D20 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x39, /* 00001D28 "_GPEPH09" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00001D30 ".+_STA.p" */
+ 0x0A,0x09,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001D38 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00001D40 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001D48 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001D50 "2.\._GPE" */
+ 0x50,0x48,0x30,0x39,0x5B,0x82,0x42,0x0B, /* 00001D58 "PH09[.B." */
+ 0x53,0x30,0x41,0x5F,0x08,0x5F,0x41,0x44, /* 00001D60 "S0A_._AD" */
+ 0x52,0x0C,0x00,0x00,0x0A,0x00,0x08,0x5F, /* 00001D68 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x0A,0x14,0x20,0x5F, /* 00001D70 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x0A,0x5C, /* 00001D78 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001D80 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00001D88 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00001D90 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x0A, /* 00001D98 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001DA0 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00001DA8 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00001DB0 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00001DB8 ",_EJ0.p." */
+ 0x0A,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001DC0 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00001DC8 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00001DD0 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001DD8 "p.\._GPE" */
+ 0x50,0x48,0x30,0x41,0x14,0x2B,0x5F,0x53, /* 00001DE0 "PH0A.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x0A,0x5C,0x2E, /* 00001DE8 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00001DF0 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00001DF8 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00001E00 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x41, /* 00001E08 "_GPEPH0A" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x30,0x42,0x5F, /* 00001E10 "[.B.S0B_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00001E18 "._ADR..." */
+ 0x0B,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00001E20 "..._SUN." */
+ 0x0B,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00001E28 ".. _PS0." */
+ 0x70,0x0A,0x0B,0x5C,0x2E,0x5F,0x47,0x50, /* 00001E30 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00001E38 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001E40 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00001E48 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x0B,0x5C,0x2E,0x5F,0x47, /* 00001E50 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00001E58 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001E60 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00001E68 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x0B,0x5C,0x2E,0x5F, /* 00001E70 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00001E78 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001E80 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00001E88 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x42, /* 00001E90 "_GPEPH0B" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00001E98 ".+_STA.p" */
+ 0x0A,0x0B,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001EA0 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00001EA8 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001EB0 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001EB8 "2.\._GPE" */
+ 0x50,0x48,0x30,0x42,0x5B,0x82,0x42,0x0B, /* 00001EC0 "PH0B[.B." */
+ 0x53,0x30,0x43,0x5F,0x08,0x5F,0x41,0x44, /* 00001EC8 "S0C_._AD" */
+ 0x52,0x0C,0x00,0x00,0x0C,0x00,0x08,0x5F, /* 00001ED0 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x0C,0x14,0x20,0x5F, /* 00001ED8 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x0C,0x5C, /* 00001EE0 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00001EE8 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00001EF0 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00001EF8 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x0C, /* 00001F00 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001F08 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00001F10 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00001F18 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00001F20 ",_EJ0.p." */
+ 0x0C,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001F28 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00001F30 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00001F38 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001F40 "p.\._GPE" */
+ 0x50,0x48,0x30,0x43,0x14,0x2B,0x5F,0x53, /* 00001F48 "PH0C.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x0C,0x5C,0x2E, /* 00001F50 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00001F58 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00001F60 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00001F68 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x43, /* 00001F70 "_GPEPH0C" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x30,0x44,0x5F, /* 00001F78 "[.B.S0D_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00001F80 "._ADR..." */
+ 0x0D,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00001F88 "..._SUN." */
+ 0x0D,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00001F90 ".. _PS0." */
+ 0x70,0x0A,0x0D,0x5C,0x2E,0x5F,0x47,0x50, /* 00001F98 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00001FA0 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00001FA8 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00001FB0 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x0D,0x5C,0x2E,0x5F,0x47, /* 00001FB8 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00001FC0 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00001FC8 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00001FD0 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x0D,0x5C,0x2E,0x5F, /* 00001FD8 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00001FE0 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00001FE8 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00001FF0 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x44, /* 00001FF8 "_GPEPH0D" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00002000 ".+_STA.p" */
+ 0x0A,0x0D,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002008 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00002010 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002018 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002020 "2.\._GPE" */
+ 0x50,0x48,0x30,0x44,0x5B,0x82,0x42,0x0B, /* 00002028 "PH0D[.B." */
+ 0x53,0x30,0x45,0x5F,0x08,0x5F,0x41,0x44, /* 00002030 "S0E_._AD" */
+ 0x52,0x0C,0x00,0x00,0x0E,0x00,0x08,0x5F, /* 00002038 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x0E,0x14,0x20,0x5F, /* 00002040 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x0E,0x5C, /* 00002048 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002050 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00002058 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00002060 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x0E, /* 00002068 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002070 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00002078 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00002080 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00002088 ",_EJ0.p." */
+ 0x0E,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002090 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00002098 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 000020A0 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000020A8 "p.\._GPE" */
+ 0x50,0x48,0x30,0x45,0x14,0x2B,0x5F,0x53, /* 000020B0 "PH0E.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x0E,0x5C,0x2E, /* 000020B8 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 000020C0 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 000020C8 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 000020D0 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x45, /* 000020D8 "_GPEPH0E" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x30,0x46,0x5F, /* 000020E0 "[.B.S0F_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 000020E8 "._ADR..." */
+ 0x0F,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 000020F0 "..._SUN." */
+ 0x0F,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 000020F8 ".. _PS0." */
+ 0x70,0x0A,0x0F,0x5C,0x2E,0x5F,0x47,0x50, /* 00002100 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00002108 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002110 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00002118 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x0F,0x5C,0x2E,0x5F,0x47, /* 00002120 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00002128 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002130 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00002138 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x0F,0x5C,0x2E,0x5F, /* 00002140 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00002148 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002150 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00002158 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x30,0x46, /* 00002160 "_GPEPH0F" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00002168 ".+_STA.p" */
+ 0x0A,0x0F,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002170 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00002178 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002180 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002188 "2.\._GPE" */
+ 0x50,0x48,0x30,0x46,0x5B,0x82,0x42,0x0B, /* 00002190 "PH0F[.B." */
+ 0x53,0x31,0x30,0x5F,0x08,0x5F,0x41,0x44, /* 00002198 "S10_._AD" */
+ 0x52,0x0C,0x00,0x00,0x10,0x00,0x08,0x5F, /* 000021A0 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x10,0x14,0x20,0x5F, /* 000021A8 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x10,0x5C, /* 000021B0 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 000021B8 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 000021C0 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 000021C8 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x10, /* 000021D0 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 000021D8 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 000021E0 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 000021E8 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 000021F0 ",_EJ0.p." */
+ 0x10,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 000021F8 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00002200 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00002208 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002210 "p.\._GPE" */
+ 0x50,0x48,0x31,0x30,0x14,0x2B,0x5F,0x53, /* 00002218 "PH10.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x10,0x5C,0x2E, /* 00002220 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00002228 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00002230 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00002238 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x30, /* 00002240 "_GPEPH10" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x31,0x31,0x5F, /* 00002248 "[.B.S11_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00002250 "._ADR..." */
+ 0x11,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00002258 "..._SUN." */
+ 0x11,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00002260 ".. _PS0." */
+ 0x70,0x0A,0x11,0x5C,0x2E,0x5F,0x47,0x50, /* 00002268 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00002270 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002278 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00002280 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x11,0x5C,0x2E,0x5F,0x47, /* 00002288 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00002290 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002298 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 000022A0 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x11,0x5C,0x2E,0x5F, /* 000022A8 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 000022B0 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000022B8 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 000022C0 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x31, /* 000022C8 "_GPEPH11" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 000022D0 ".+_STA.p" */
+ 0x0A,0x11,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000022D8 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 000022E0 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 000022E8 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000022F0 "2.\._GPE" */
+ 0x50,0x48,0x31,0x31,0x5B,0x82,0x42,0x0B, /* 000022F8 "PH11[.B." */
+ 0x53,0x31,0x32,0x5F,0x08,0x5F,0x41,0x44, /* 00002300 "S12_._AD" */
+ 0x52,0x0C,0x00,0x00,0x12,0x00,0x08,0x5F, /* 00002308 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x12,0x14,0x20,0x5F, /* 00002310 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x12,0x5C, /* 00002318 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002320 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00002328 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00002330 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x12, /* 00002338 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002340 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00002348 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00002350 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00002358 ",_EJ0.p." */
+ 0x12,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002360 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00002368 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00002370 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002378 "p.\._GPE" */
+ 0x50,0x48,0x31,0x32,0x14,0x2B,0x5F,0x53, /* 00002380 "PH12.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x12,0x5C,0x2E, /* 00002388 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00002390 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00002398 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 000023A0 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x32, /* 000023A8 "_GPEPH12" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x31,0x33,0x5F, /* 000023B0 "[.B.S13_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 000023B8 "._ADR..." */
+ 0x13,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 000023C0 "..._SUN." */
+ 0x13,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 000023C8 ".. _PS0." */
+ 0x70,0x0A,0x13,0x5C,0x2E,0x5F,0x47,0x50, /* 000023D0 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 000023D8 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 000023E0 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 000023E8 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x13,0x5C,0x2E,0x5F,0x47, /* 000023F0 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 000023F8 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002400 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00002408 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x13,0x5C,0x2E,0x5F, /* 00002410 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00002418 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002420 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00002428 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x33, /* 00002430 "_GPEPH13" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00002438 ".+_STA.p" */
+ 0x0A,0x13,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002440 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00002448 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002450 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002458 "2.\._GPE" */
+ 0x50,0x48,0x31,0x33,0x5B,0x82,0x42,0x0B, /* 00002460 "PH13[.B." */
+ 0x53,0x31,0x34,0x5F,0x08,0x5F,0x41,0x44, /* 00002468 "S14_._AD" */
+ 0x52,0x0C,0x00,0x00,0x14,0x00,0x08,0x5F, /* 00002470 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x14,0x14,0x20,0x5F, /* 00002478 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x14,0x5C, /* 00002480 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002488 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00002490 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00002498 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x14, /* 000024A0 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 000024A8 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 000024B0 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 000024B8 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 000024C0 ",_EJ0.p." */
+ 0x14,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 000024C8 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 000024D0 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 000024D8 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000024E0 "p.\._GPE" */
+ 0x50,0x48,0x31,0x34,0x14,0x2B,0x5F,0x53, /* 000024E8 "PH14.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x14,0x5C,0x2E, /* 000024F0 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 000024F8 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00002500 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00002508 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x34, /* 00002510 "_GPEPH14" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x31,0x35,0x5F, /* 00002518 "[.B.S15_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00002520 "._ADR..." */
+ 0x15,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00002528 "..._SUN." */
+ 0x15,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00002530 ".. _PS0." */
+ 0x70,0x0A,0x15,0x5C,0x2E,0x5F,0x47,0x50, /* 00002538 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00002540 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002548 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00002550 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x15,0x5C,0x2E,0x5F,0x47, /* 00002558 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00002560 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002568 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00002570 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x15,0x5C,0x2E,0x5F, /* 00002578 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00002580 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002588 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00002590 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x35, /* 00002598 "_GPEPH15" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 000025A0 ".+_STA.p" */
+ 0x0A,0x15,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000025A8 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 000025B0 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 000025B8 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000025C0 "2.\._GPE" */
+ 0x50,0x48,0x31,0x35,0x5B,0x82,0x42,0x0B, /* 000025C8 "PH15[.B." */
+ 0x53,0x31,0x36,0x5F,0x08,0x5F,0x41,0x44, /* 000025D0 "S16_._AD" */
+ 0x52,0x0C,0x00,0x00,0x16,0x00,0x08,0x5F, /* 000025D8 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x16,0x14,0x20,0x5F, /* 000025E0 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x16,0x5C, /* 000025E8 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 000025F0 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 000025F8 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00002600 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x16, /* 00002608 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002610 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00002618 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00002620 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00002628 ",_EJ0.p." */
+ 0x16,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002630 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00002638 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00002640 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002648 "p.\._GPE" */
+ 0x50,0x48,0x31,0x36,0x14,0x2B,0x5F,0x53, /* 00002650 "PH16.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x16,0x5C,0x2E, /* 00002658 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00002660 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00002668 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00002670 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x36, /* 00002678 "_GPEPH16" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x31,0x37,0x5F, /* 00002680 "[.B.S17_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00002688 "._ADR..." */
+ 0x17,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00002690 "..._SUN." */
+ 0x17,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00002698 ".. _PS0." */
+ 0x70,0x0A,0x17,0x5C,0x2E,0x5F,0x47,0x50, /* 000026A0 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 000026A8 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 000026B0 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 000026B8 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x17,0x5C,0x2E,0x5F,0x47, /* 000026C0 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 000026C8 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 000026D0 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 000026D8 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x17,0x5C,0x2E,0x5F, /* 000026E0 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 000026E8 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000026F0 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 000026F8 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x37, /* 00002700 "_GPEPH17" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00002708 ".+_STA.p" */
+ 0x0A,0x17,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002710 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00002718 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002720 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002728 "2.\._GPE" */
+ 0x50,0x48,0x31,0x37,0x5B,0x82,0x42,0x0B, /* 00002730 "PH17[.B." */
+ 0x53,0x31,0x38,0x5F,0x08,0x5F,0x41,0x44, /* 00002738 "S18_._AD" */
+ 0x52,0x0C,0x00,0x00,0x18,0x00,0x08,0x5F, /* 00002740 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x18,0x14,0x20,0x5F, /* 00002748 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x18,0x5C, /* 00002750 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002758 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00002760 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00002768 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x18, /* 00002770 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002778 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00002780 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00002788 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00002790 ",_EJ0.p." */
+ 0x18,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002798 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 000027A0 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 000027A8 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000027B0 "p.\._GPE" */
+ 0x50,0x48,0x31,0x38,0x14,0x2B,0x5F,0x53, /* 000027B8 "PH18.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x18,0x5C,0x2E, /* 000027C0 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 000027C8 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 000027D0 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 000027D8 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x38, /* 000027E0 "_GPEPH18" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x31,0x39,0x5F, /* 000027E8 "[.B.S19_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 000027F0 "._ADR..." */
+ 0x19,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 000027F8 "..._SUN." */
+ 0x19,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00002800 ".. _PS0." */
+ 0x70,0x0A,0x19,0x5C,0x2E,0x5F,0x47,0x50, /* 00002808 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00002810 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002818 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00002820 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x19,0x5C,0x2E,0x5F,0x47, /* 00002828 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00002830 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002838 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00002840 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x19,0x5C,0x2E,0x5F, /* 00002848 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00002850 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002858 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00002860 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x39, /* 00002868 "_GPEPH19" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00002870 ".+_STA.p" */
+ 0x0A,0x19,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002878 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00002880 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002888 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002890 "2.\._GPE" */
+ 0x50,0x48,0x31,0x39,0x5B,0x82,0x42,0x0B, /* 00002898 "PH19[.B." */
+ 0x53,0x31,0x41,0x5F,0x08,0x5F,0x41,0x44, /* 000028A0 "S1A_._AD" */
+ 0x52,0x0C,0x00,0x00,0x1A,0x00,0x08,0x5F, /* 000028A8 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x1A,0x14,0x20,0x5F, /* 000028B0 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x1A,0x5C, /* 000028B8 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 000028C0 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 000028C8 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 000028D0 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x1A, /* 000028D8 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 000028E0 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 000028E8 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 000028F0 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 000028F8 ",_EJ0.p." */
+ 0x1A,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002900 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00002908 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00002910 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002918 "p.\._GPE" */
+ 0x50,0x48,0x31,0x41,0x14,0x2B,0x5F,0x53, /* 00002920 "PH1A.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x1A,0x5C,0x2E, /* 00002928 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00002930 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00002938 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00002940 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x41, /* 00002948 "_GPEPH1A" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x31,0x42,0x5F, /* 00002950 "[.B.S1B_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00002958 "._ADR..." */
+ 0x1B,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00002960 "..._SUN." */
+ 0x1B,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00002968 ".. _PS0." */
+ 0x70,0x0A,0x1B,0x5C,0x2E,0x5F,0x47,0x50, /* 00002970 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00002978 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002980 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00002988 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x1B,0x5C,0x2E,0x5F,0x47, /* 00002990 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00002998 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 000029A0 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 000029A8 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x1B,0x5C,0x2E,0x5F, /* 000029B0 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 000029B8 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000029C0 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 000029C8 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x42, /* 000029D0 "_GPEPH1B" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 000029D8 ".+_STA.p" */
+ 0x0A,0x1B,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000029E0 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 000029E8 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 000029F0 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 000029F8 "2.\._GPE" */
+ 0x50,0x48,0x31,0x42,0x5B,0x82,0x42,0x0B, /* 00002A00 "PH1B[.B." */
+ 0x53,0x31,0x43,0x5F,0x08,0x5F,0x41,0x44, /* 00002A08 "S1C_._AD" */
+ 0x52,0x0C,0x00,0x00,0x1C,0x00,0x08,0x5F, /* 00002A10 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x1C,0x14,0x20,0x5F, /* 00002A18 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x1C,0x5C, /* 00002A20 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002A28 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00002A30 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00002A38 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x1C, /* 00002A40 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002A48 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00002A50 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00002A58 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00002A60 ",_EJ0.p." */
+ 0x1C,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002A68 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00002A70 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00002A78 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002A80 "p.\._GPE" */
+ 0x50,0x48,0x31,0x43,0x14,0x2B,0x5F,0x53, /* 00002A88 "PH1C.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x1C,0x5C,0x2E, /* 00002A90 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00002A98 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00002AA0 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00002AA8 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x43, /* 00002AB0 "_GPEPH1C" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x31,0x44,0x5F, /* 00002AB8 "[.B.S1D_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00002AC0 "._ADR..." */
+ 0x1D,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00002AC8 "..._SUN." */
+ 0x1D,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00002AD0 ".. _PS0." */
+ 0x70,0x0A,0x1D,0x5C,0x2E,0x5F,0x47,0x50, /* 00002AD8 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00002AE0 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002AE8 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00002AF0 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x1D,0x5C,0x2E,0x5F,0x47, /* 00002AF8 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00002B00 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002B08 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00002B10 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x1D,0x5C,0x2E,0x5F, /* 00002B18 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00002B20 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002B28 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00002B30 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x44, /* 00002B38 "_GPEPH1D" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00002B40 ".+_STA.p" */
+ 0x0A,0x1D,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002B48 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00002B50 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002B58 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002B60 "2.\._GPE" */
+ 0x50,0x48,0x31,0x44,0x5B,0x82,0x42,0x0B, /* 00002B68 "PH1D[.B." */
+ 0x53,0x31,0x45,0x5F,0x08,0x5F,0x41,0x44, /* 00002B70 "S1E_._AD" */
+ 0x52,0x0C,0x00,0x00,0x1E,0x00,0x08,0x5F, /* 00002B78 "R......_" */
+ 0x53,0x55,0x4E,0x0A,0x1E,0x14,0x20,0x5F, /* 00002B80 "SUN... _" */
+ 0x50,0x53,0x30,0x00,0x70,0x0A,0x1E,0x5C, /* 00002B88 "PS0.p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002B90 "._GPEDPT" */
+ 0x31,0x70,0x0A,0x80,0x5C,0x2E,0x5F,0x47, /* 00002B98 "1p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x32,0x14,0x20, /* 00002BA0 "PEDPT2. " */
+ 0x5F,0x50,0x53,0x33,0x00,0x70,0x0A,0x1E, /* 00002BA8 "_PS3.p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002BB0 "\._GPEDP" */
+ 0x54,0x31,0x70,0x0A,0x83,0x5C,0x2E,0x5F, /* 00002BB8 "T1p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x32,0x14, /* 00002BC0 "GPEDPT2." */
+ 0x2C,0x5F,0x45,0x4A,0x30,0x01,0x70,0x0A, /* 00002BC8 ",_EJ0.p." */
+ 0x1E,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002BD0 ".\._GPED" */
+ 0x50,0x54,0x31,0x70,0x0A,0x88,0x5C,0x2E, /* 00002BD8 "PT1p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x32, /* 00002BE0 "_GPEDPT2" */
+ 0x70,0x01,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002BE8 "p.\._GPE" */
+ 0x50,0x48,0x31,0x45,0x14,0x2B,0x5F,0x53, /* 00002BF0 "PH1E.+_S" */
+ 0x54,0x41,0x00,0x70,0x0A,0x1E,0x5C,0x2E, /* 00002BF8 "TA.p..\." */
+ 0x5F,0x47,0x50,0x45,0x44,0x50,0x54,0x31, /* 00002C00 "_GPEDPT1" */
+ 0x70,0x0A,0x89,0x5C,0x2E,0x5F,0x47,0x50, /* 00002C08 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x32,0xA4,0x5C,0x2E, /* 00002C10 "EDPT2.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x45, /* 00002C18 "_GPEPH1E" */
+ 0x5B,0x82,0x42,0x0B,0x53,0x31,0x46,0x5F, /* 00002C20 "[.B.S1F_" */
+ 0x08,0x5F,0x41,0x44,0x52,0x0C,0x00,0x00, /* 00002C28 "._ADR..." */
+ 0x1F,0x00,0x08,0x5F,0x53,0x55,0x4E,0x0A, /* 00002C30 "..._SUN." */
+ 0x1F,0x14,0x20,0x5F,0x50,0x53,0x30,0x00, /* 00002C38 ".. _PS0." */
+ 0x70,0x0A,0x1F,0x5C,0x2E,0x5F,0x47,0x50, /* 00002C40 "p..\._GP" */
+ 0x45,0x44,0x50,0x54,0x31,0x70,0x0A,0x80, /* 00002C48 "EDPT1p.." */
+ 0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44,0x50, /* 00002C50 "\._GPEDP" */
+ 0x54,0x32,0x14,0x20,0x5F,0x50,0x53,0x33, /* 00002C58 "T2. _PS3" */
+ 0x00,0x70,0x0A,0x1F,0x5C,0x2E,0x5F,0x47, /* 00002C60 ".p..\._G" */
+ 0x50,0x45,0x44,0x50,0x54,0x31,0x70,0x0A, /* 00002C68 "PEDPT1p." */
+ 0x83,0x5C,0x2E,0x5F,0x47,0x50,0x45,0x44, /* 00002C70 ".\._GPED" */
+ 0x50,0x54,0x32,0x14,0x2C,0x5F,0x45,0x4A, /* 00002C78 "PT2.,_EJ" */
+ 0x30,0x01,0x70,0x0A,0x1F,0x5C,0x2E,0x5F, /* 00002C80 "0.p..\._" */
+ 0x47,0x50,0x45,0x44,0x50,0x54,0x31,0x70, /* 00002C88 "GPEDPT1p" */
+ 0x0A,0x88,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002C90 "..\._GPE" */
+ 0x44,0x50,0x54,0x32,0x70,0x01,0x5C,0x2E, /* 00002C98 "DPT2p.\." */
+ 0x5F,0x47,0x50,0x45,0x50,0x48,0x31,0x46, /* 00002CA0 "_GPEPH1F" */
+ 0x14,0x2B,0x5F,0x53,0x54,0x41,0x00,0x70, /* 00002CA8 ".+_STA.p" */
+ 0x0A,0x1F,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002CB0 "..\._GPE" */
+ 0x44,0x50,0x54,0x31,0x70,0x0A,0x89,0x5C, /* 00002CB8 "DPT1p..\" */
+ 0x2E,0x5F,0x47,0x50,0x45,0x44,0x50,0x54, /* 00002CC0 "._GPEDPT" */
+ 0x32,0xA4,0x5C,0x2E,0x5F,0x47,0x50,0x45, /* 00002CC8 "2.\._GPE" */
+ 0x50,0x48,0x31,0x46,0x10,0x4D,0x52,0x5F, /* 00002CD0 "PH1F.MR_" */
+ 0x47,0x50,0x45,0x5B,0x80,0x50,0x48,0x50, /* 00002CD8 "GPE[.PHP" */
+ 0x5F,0x01,0x0B,0xC0,0x10,0x0A,0x22,0x5B, /* 00002CE0 "_....."[" */
+ 0x81,0x41,0x0B,0x50,0x48,0x50,0x5F,0x01, /* 00002CE8 ".A.PHP_." */
+ 0x50,0x53,0x54,0x41,0x08,0x50,0x53,0x54, /* 00002CF0 "PSTA.PST" */
+ 0x42,0x08,0x50,0x48,0x30,0x30,0x08,0x50, /* 00002CF8 "B.PH00.P" */
+ 0x48,0x30,0x31,0x08,0x50,0x48,0x30,0x32, /* 00002D00 "H01.PH02" */
+ 0x08,0x50,0x48,0x30,0x33,0x08,0x50,0x48, /* 00002D08 ".PH03.PH" */
+ 0x30,0x34,0x08,0x50,0x48,0x30,0x35,0x08, /* 00002D10 "04.PH05." */
+ 0x50,0x48,0x30,0x36,0x08,0x50,0x48,0x30, /* 00002D18 "PH06.PH0" */
+ 0x37,0x08,0x50,0x48,0x30,0x38,0x08,0x50, /* 00002D20 "7.PH08.P" */
+ 0x48,0x30,0x39,0x08,0x50,0x48,0x30,0x41, /* 00002D28 "H09.PH0A" */
+ 0x08,0x50,0x48,0x30,0x42,0x08,0x50,0x48, /* 00002D30 ".PH0B.PH" */
+ 0x30,0x43,0x08,0x50,0x48,0x30,0x44,0x08, /* 00002D38 "0C.PH0D." */
+ 0x50,0x48,0x30,0x45,0x08,0x50,0x48,0x30, /* 00002D40 "PH0E.PH0" */
+ 0x46,0x08,0x50,0x48,0x31,0x30,0x08,0x50, /* 00002D48 "F.PH10.P" */
+ 0x48,0x31,0x31,0x08,0x50,0x48,0x31,0x32, /* 00002D50 "H11.PH12" */
+ 0x08,0x50,0x48,0x31,0x33,0x08,0x50,0x48, /* 00002D58 ".PH13.PH" */
+ 0x31,0x34,0x08,0x50,0x48,0x31,0x35,0x08, /* 00002D60 "14.PH15." */
+ 0x50,0x48,0x31,0x36,0x08,0x50,0x48,0x31, /* 00002D68 "PH16.PH1" */
+ 0x37,0x08,0x50,0x48,0x31,0x38,0x08,0x50, /* 00002D70 "7.PH18.P" */
+ 0x48,0x31,0x39,0x08,0x50,0x48,0x31,0x41, /* 00002D78 "H19.PH1A" */
+ 0x08,0x50,0x48,0x31,0x42,0x08,0x50,0x48, /* 00002D80 ".PH1B.PH" */
+ 0x31,0x43,0x08,0x50,0x48,0x31,0x44,0x08, /* 00002D88 "1C.PH1D." */
+ 0x50,0x48,0x31,0x45,0x08,0x50,0x48,0x31, /* 00002D90 "PH1E.PH1" */
+ 0x46,0x08,0x5B,0x80,0x44,0x47,0x31,0x5F, /* 00002D98 "F.[.DG1_" */
+ 0x01,0x0B,0x44,0xB0,0x0A,0x04,0x5B,0x81, /* 00002DA0 "..D...[." */
+ 0x10,0x44,0x47,0x31,0x5F,0x01,0x44,0x50, /* 00002DA8 ".DG1_.DP" */
+ 0x54,0x31,0x08,0x44,0x50,0x54,0x32,0x08, /* 00002DB0 "T1.DPT2." */
+ 0x14,0x49,0x44,0x5F,0x4C,0x30,0x33,0x08, /* 00002DB8 ".ID_L03." */
+ 0x08,0x5F,0x54,0x5F,0x30,0x00,0x08,0x53, /* 00002DC0 "._T_0..S" */
+ 0x4C,0x54,0x5F,0x00,0x08,0x45,0x56,0x54, /* 00002DC8 "LT_..EVT" */
+ 0x5F,0x00,0x70,0x50,0x53,0x54,0x41,0x61, /* 00002DD0 "_.pPSTAa" */
+ 0x7B,0x61,0x0A,0x0F,0x45,0x56,0x54,0x5F, /* 00002DD8 "{a..EVT_" */
+ 0x70,0x50,0x53,0x54,0x42,0x61,0x7B,0x61, /* 00002DE0 "pPSTBa{a" */
+ 0x0A,0xFF,0x53,0x4C,0x54,0x5F,0x70,0x53, /* 00002DE8 "..SLT_pS" */
+ 0x4C,0x54,0x5F,0x44,0x50,0x54,0x31,0x70, /* 00002DF0 "LT_DPT1p" */
+ 0x45,0x56,0x54,0x5F,0x44,0x50,0x54,0x32, /* 00002DF8 "EVT_DPT2" */
+ 0x70,0x53,0x4C,0x54,0x5F,0x5F,0x54,0x5F, /* 00002E00 "pSLT__T_" */
+ 0x30,0xA0,0x1B,0x93,0x5F,0x54,0x5F,0x30, /* 00002E08 "0..._T_0" */
+ 0x00,0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42, /* 00002E10 "..\/._SB" */
+ 0x5F,0x50,0x43,0x49,0x30,0x53,0x30,0x30, /* 00002E18 "_PCI0S00" */
+ 0x5F,0x45,0x56,0x54,0x5F,0xA1,0x4C,0x3D, /* 00002E20 "_EVT_.L=" */
+ 0xA0,0x1B,0x93,0x5F,0x54,0x5F,0x30,0x01, /* 00002E28 "..._T_0." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002E30 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x31,0x5F, /* 00002E38 "PCI0S01_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x3B,0xA0, /* 00002E40 "EVT_.M;." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x02, /* 00002E48 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002E50 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x32,0x5F, /* 00002E58 "PCI0S02_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x39,0xA0, /* 00002E60 "EVT_.M9." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x03, /* 00002E68 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002E70 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x33,0x5F, /* 00002E78 "PCI0S03_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x37,0xA0, /* 00002E80 "EVT_.M7." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x04, /* 00002E88 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002E90 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x34,0x5F, /* 00002E98 "PCI0S04_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x35,0xA0, /* 00002EA0 "EVT_.M5." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x05, /* 00002EA8 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002EB0 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x35,0x5F, /* 00002EB8 "PCI0S05_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x33,0xA0, /* 00002EC0 "EVT_.M3." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x06, /* 00002EC8 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002ED0 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x36,0x5F, /* 00002ED8 "PCI0S06_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x31,0xA0, /* 00002EE0 "EVT_.M1." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x07, /* 00002EE8 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002EF0 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x37,0x5F, /* 00002EF8 "PCI0S07_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x2F,0xA0, /* 00002F00 "EVT_.M/." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x08, /* 00002F08 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002F10 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x38,0x5F, /* 00002F18 "PCI0S08_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x2D,0xA0, /* 00002F20 "EVT_.M-." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x09, /* 00002F28 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002F30 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x39,0x5F, /* 00002F38 "PCI0S09_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x2B,0xA0, /* 00002F40 "EVT_.M+." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0A, /* 00002F48 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002F50 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x41,0x5F, /* 00002F58 "PCI0S0A_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x29,0xA0, /* 00002F60 "EVT_.M)." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0B, /* 00002F68 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002F70 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x42,0x5F, /* 00002F78 "PCI0S0B_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x27,0xA0, /* 00002F80 "EVT_.M'." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0C, /* 00002F88 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002F90 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x43,0x5F, /* 00002F98 "PCI0S0C_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x25,0xA0, /* 00002FA0 "EVT_.M%." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0D, /* 00002FA8 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002FB0 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x44,0x5F, /* 00002FB8 "PCI0S0D_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x23,0xA0, /* 00002FC0 "EVT_.M#." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0E, /* 00002FC8 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002FD0 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x45,0x5F, /* 00002FD8 "PCI0S0E_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x21,0xA0, /* 00002FE0 "EVT_.M!." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x0F, /* 00002FE8 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00002FF0 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x30,0x46,0x5F, /* 00002FF8 "PCI0S0F_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x1F,0xA0, /* 00003000 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x10, /* 00003008 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003010 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x30,0x5F, /* 00003018 "PCI0S10_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x1D,0xA0, /* 00003020 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x11, /* 00003028 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003030 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x31,0x5F, /* 00003038 "PCI0S11_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x1B,0xA0, /* 00003040 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x12, /* 00003048 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003050 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x32,0x5F, /* 00003058 "PCI0S12_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x19,0xA0, /* 00003060 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x13, /* 00003068 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003070 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x33,0x5F, /* 00003078 "PCI0S13_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x17,0xA0, /* 00003080 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x14, /* 00003088 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003090 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x34,0x5F, /* 00003098 "PCI0S14_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x15,0xA0, /* 000030A0 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x15, /* 000030A8 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 000030B0 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x35,0x5F, /* 000030B8 "PCI0S15_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x13,0xA0, /* 000030C0 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x16, /* 000030C8 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 000030D0 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x36,0x5F, /* 000030D8 "PCI0S16_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x11,0xA0, /* 000030E0 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x17, /* 000030E8 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 000030F0 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x37,0x5F, /* 000030F8 "PCI0S17_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x0F,0xA0, /* 00003100 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x18, /* 00003108 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003110 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x38,0x5F, /* 00003118 "PCI0S18_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x0D,0xA0, /* 00003120 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x19, /* 00003128 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003130 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x39,0x5F, /* 00003138 "PCI0S19_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x0B,0xA0, /* 00003140 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x1A, /* 00003148 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003150 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x41,0x5F, /* 00003158 "PCI0S1A_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x09,0xA0, /* 00003160 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x1B, /* 00003168 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003170 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x42,0x5F, /* 00003178 "PCI0S1B_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x07,0xA0, /* 00003180 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x1C, /* 00003188 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 00003190 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x43,0x5F, /* 00003198 "PCI0S1C_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x4D,0x05,0xA0, /* 000031A0 "EVT_.M.." */
+ 0x1C,0x93,0x5F,0x54,0x5F,0x30,0x0A,0x1D, /* 000031A8 ".._T_0.." */
+ 0x86,0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F, /* 000031B0 ".\/._SB_" */
+ 0x50,0x43,0x49,0x30,0x53,0x31,0x44,0x5F, /* 000031B8 "PCI0S1D_" */
+ 0x45,0x56,0x54,0x5F,0xA1,0x3D,0xA0,0x1C, /* 000031C0 "EVT_.=.." */
+ 0x93,0x5F,0x54,0x5F,0x30,0x0A,0x1E,0x86, /* 000031C8 "._T_0..." */
+ 0x5C,0x2F,0x03,0x5F,0x53,0x42,0x5F,0x50, /* 000031D0 "\/._SB_P" */
+ 0x43,0x49,0x30,0x53,0x31,0x45,0x5F,0x45, /* 000031D8 "CI0S1E_E" */
+ 0x56,0x54,0x5F,0xA1,0x1E,0xA0,0x1C,0x93, /* 000031E0 "VT_....." */
+ 0x5F,0x54,0x5F,0x30,0x0A,0x1F,0x86,0x5C, /* 000031E8 "_T_0...\" */
+ 0x2F,0x03,0x5F,0x53,0x42,0x5F,0x50,0x43, /* 000031F0 "/._SB_PC" */
+ 0x49,0x30,0x53,0x31,0x46,0x5F,0x45,0x56, /* 000031F8 "I0S1F_EV" */
0x54,0x5F,
};
int DsdtLen=sizeof(AmlCode);
.p_lvl2_lat = 0x0fff, /* >100, means we do not support C2 state */
.p_lvl3_lat = 0x0fff, /* >1000, means we do not support C3 state */
- .iapc_boot_arch = ACPI_LEGACY_DEVICES | ACPI_8042,
+ .iapc_boot_arch = ACPI_8042,
.flags = (ACPI_PROC_C1 | ACPI_SLP_BUTTON |
ACPI_WBINVD | ACPI_PWR_BUTTON |
ACPI_FIX_RTC | ACPI_TMR_VAL_EXT),
nr_var_ranges = (uint8_t)mtrr_cap;
if ( nr_var_ranges != 0 )
{
- /* A single UC range covering PCI space. */
- wrmsr(MSR_MTRRphysBase(0), PCI_MEMBASE);
- wrmsr(MSR_MTRRphysMask(0),
- ((uint64_t)(int32_t)PCI_MEMBASE & addr_mask) | (1u << 11));
- printf("var MTRRs ... ");
+ unsigned long base = pci_mem_start, size;
+ int i;
+
+ for ( i = 0; (base != pci_mem_end) && (i < nr_var_ranges); i++ )
+ {
+ size = PAGE_SIZE;
+ while ( !(base & size) )
+ size <<= 1;
+ while ( ((base + size) < base) || ((base + size) > pci_mem_end) )
+ size >>= 1;
+
+ wrmsr(MSR_MTRRphysBase(i), base);
+ wrmsr(MSR_MTRRphysMask(i),
+ (~(uint64_t)(size-1) & addr_mask) | (1u << 11));
+
+ base += size;
+ }
+
+ printf("var MTRRs [%d/%d] ... ", i, nr_var_ranges);
}
wrmsr(MSR_MTRRdefType, mtrr_def);
#ifndef __HVMLOADER_CONFIG_H__
#define __HVMLOADER_CONFIG_H__
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1ul << PAGE_SHIFT)
+
#define IOAPIC_BASE_ADDRESS 0xfec00000
#define IOAPIC_ID 0x01
#define IOAPIC_VERSION 0x11
#define PCI_ISA_DEVFN 0x08 /* dev 1, fn 0 */
#define PCI_ISA_IRQ_MASK 0x0c20U /* ISA IRQs 5,10,11 are PCI connected */
-#define PCI_MEMBASE 0xf0000000
-#define PCI_MEMSIZE 0x0c000000
+/* MMIO hole: Hardcoded defaults, which can be dynamically expanded. */
+#define PCI_MEM_START 0xf0000000
+#define PCI_MEM_END 0xfc000000
+extern unsigned long pci_mem_start, pci_mem_end;
+
+/* We reserve 16MB for special BIOS mappings, etc. */
+#define RESERVED_MEMBASE 0xfc000000
+#define RESERVED_MEMSIZE 0x01000000
#define ROMBIOS_SEG 0xF000
#define ROMBIOS_BEGIN 0x000F0000
#define ROMBIOS_END (ROMBIOS_BEGIN + ROMBIOS_SIZE)
/* Memory map. */
+#define SCRATCH_PHYSICAL_ADDRESS 0x00010000
#define HYPERCALL_PHYSICAL_ADDRESS 0x00080000
#define VGABIOS_PHYSICAL_ADDRESS 0x000C0000
-#define ETHERBOOT_PHYSICAL_ADDRESS 0x000D0000
-#define SMBIOS_PHYSICAL_ADDRESS 0x000E9000
-#define SMBIOS_MAXIMUM_SIZE 0x00001000
-#define ACPI_PHYSICAL_ADDRESS 0x000EA000
+#define OPTIONROM_PHYSICAL_ADDRESS 0x000C8000
+#define OPTIONROM_PHYSICAL_END 0x000EA000
+#define BIOS_INFO_PHYSICAL_ADDRESS 0x000EA000
+#define ACPI_PHYSICAL_ADDRESS 0x000EA020
+#define E820_PHYSICAL_ADDRESS 0x000EA100
+#define SMBIOS_PHYSICAL_ADDRESS 0x000EB000
+#define SMBIOS_MAXIMUM_SIZE 0x00005000
#define ROMBIOS_PHYSICAL_ADDRESS 0x000F0000
-#define SCRATCH_PHYSICAL_ADDRESS 0x00010000
+
+/* Offsets from E820_PHYSICAL_ADDRESS. */
+#define E820_NR_OFFSET 0x0
+#define E820_OFFSET 0x8
/* Xen Platform Device */
+#define XEN_PF_IOBASE 0x10
#define PFFLAG_ROM_LOCK 1 /* Sets whether ROM memory area is RW or RO */
+/* Located at BIOS_INFO_PHYSICAL_ADDRESS. */
struct bios_info {
- uint8_t com1_present:1;
- uint8_t com2_present:1;
- uint8_t hpet_present:1;
- uint32_t pci_min, pci_len;
- uint16_t xen_pfiob;
+ uint8_t com1_present:1; /* 0[0] - System has COM1? */
+ uint8_t com2_present:1; /* 0[1] - System has COM2? */
+ uint8_t hpet_present:1; /* 0[2] - System has HPET? */
+ uint32_t pci_min, pci_len; /* 4, 8 - PCI I/O hole boundaries */
+ uint32_t bios32_entry; /* 12 - Entry point for 32-bit BIOS */
};
+#define BIOSINFO_OFF_bios32_entry 12
#endif /* __HVMLOADER_CONFIG_H__ */
uint32_t type;
} __attribute__((packed));
-#define HVM_E820_NR ((unsigned char *)HVM_E820_PAGE + HVM_E820_NR_OFFSET)
-#define HVM_E820 ((struct e820entry *)(HVM_E820_PAGE + HVM_E820_OFFSET))
+#define E820_NR ((uint16_t *)(E820_PHYSICAL_ADDRESS + E820_NR_OFFSET))
+#define E820 ((struct e820entry *)(E820_PHYSICAL_ADDRESS + E820_OFFSET))
#endif /* __HVMLOADER_E820_H__ */
#include "option_rom.h"
#include <xen/version.h>
#include <xen/hvm/params.h>
+#include <xen/memory.h>
asm (
" .text \n"
" .text \n"
);
+unsigned long pci_mem_start = PCI_MEM_START;
+unsigned long pci_mem_end = PCI_MEM_END;
+
static enum { VGA_none, VGA_std, VGA_cirrus } virtual_vga = VGA_none;
static void init_hypercalls(void)
static void pci_setup(void)
{
- uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd;
+ uint32_t base, devfn, bar_reg, bar_data, bar_sz, cmd, mmio_total = 0;
uint16_t class, vendor_id, device_id;
unsigned int bar, pin, link, isa_irq;
/* Resources assignable to PCI devices via BARs. */
struct resource {
uint32_t base, max;
- } *resource;
- struct resource mem_resource = { PCI_MEMBASE, PCI_MEMBASE + PCI_MEMSIZE };
- struct resource io_resource = { 0xc000, 0x10000 };
+ } *resource, mem_resource, io_resource;
/* Create a list of device BARs in descending order of size. */
struct bars {
bars[i].bar_reg = bar_reg;
bars[i].bar_sz = bar_sz;
+ if ( (bar_data & PCI_BASE_ADDRESS_SPACE) ==
+ PCI_BASE_ADDRESS_SPACE_MEMORY )
+ mmio_total += bar_sz;
+
nr_bars++;
/* Skip the upper-half of the address for a 64-bit BAR. */
printf("pci dev %02x:%x INT%c->IRQ%u\n",
devfn>>3, devfn&7, 'A'+pin-1, isa_irq);
}
+
+ /* Enable bus mastering. */
+ cmd = pci_readw(devfn, PCI_COMMAND);
+ cmd |= PCI_COMMAND_MASTER;
+ pci_writew(devfn, PCI_COMMAND, cmd);
}
+ while ( (mmio_total > (pci_mem_end - pci_mem_start)) &&
+ ((pci_mem_start << 1) != 0) )
+ pci_mem_start <<= 1;
+
+ while ( (pci_mem_start >> PAGE_SHIFT) < hvm_info->low_mem_pgend )
+ {
+ struct xen_add_to_physmap xatp;
+ if ( hvm_info->high_mem_pgend == 0 )
+ hvm_info->high_mem_pgend = 1ull << (32 - PAGE_SHIFT);
+ xatp.domid = DOMID_SELF;
+ xatp.space = XENMAPSPACE_gmfn;
+ xatp.idx = --hvm_info->low_mem_pgend;
+ xatp.gpfn = hvm_info->high_mem_pgend++;
+ if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+ BUG();
+ }
+
+ mem_resource.base = pci_mem_start;
+ mem_resource.max = pci_mem_end;
+ io_resource.base = 0xc000;
+ io_resource.max = 0x10000;
+
/* Assign iomem and ioport resources in descending order of size. */
for ( i = 0; i < nr_bars; i++ )
{
}
/*
- * Scan the PCI bus for the first NIC supported by etherboot, and copy
- * the corresponding rom data to *copy_rom_dest. Returns the length of the
- * selected rom, or 0 if no NIC found.
+ * Scan the list of Option ROMs at @roms for one which supports
+ * PCI (@vendor_id, @device_id) found at slot @devfn. If one is found,
+ * copy it to @dest and return its size rounded up to a multiple 2kB. This
+ * function will not copy ROMs beyond address OPTIONROM_PHYSICAL_END.
*/
-static int scan_etherboot_nic(void *copy_rom_dest)
+#define round_option_rom(x) (((x) + 2047) & ~2047)
+static int scan_option_rom(
+ uint8_t devfn, uint16_t vendor_id, uint16_t device_id,
+ void *roms, uint32_t dest)
{
struct option_rom_header *rom;
struct option_rom_pnp_header *pnph;
struct option_rom_pci_header *pcih;
- uint32_t devfn;
- uint16_t class, vendor_id, device_id;
uint8_t csum;
int i;
- for ( devfn = 0; devfn < 128; devfn++ )
- {
- class = pci_readw(devfn, PCI_CLASS_DEVICE);
- vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
- device_id = pci_readw(devfn, PCI_DEVICE_ID);
+ static uint32_t orom_ids[64];
+ static int nr_roms;
- if ( (vendor_id == 0xffff) && (device_id == 0xffff) )
- continue;
+ /* Avoid duplicate ROMs. */
+ for ( i = 0; i < nr_roms; i++ )
+ if ( orom_ids[i] == (vendor_id | ((uint32_t)device_id << 16)) )
+ return 0;
- /* We're only interested in NICs. */
- if ( class != 0x0200 )
- continue;
+ rom = roms;
+ for ( ; ; )
+ {
+ /* Invalid signature means we're out of option ROMs. */
+ if ( strncmp((char *)rom->signature, "\x55\xaa", 2) ||
+ (rom->rom_size == 0) )
+ break;
- rom = (struct option_rom_header *)etherboot;
- for ( ; ; )
- {
- /* Invalid signature means we're out of option ROMs. */
- if ( strncmp((char *)rom->signature, "\x55\xaa", 2) ||
- (rom->rom_size == 0) )
- break;
-
- /* Invalid checksum means we're out of option ROMs. */
- csum = 0;
- for ( i = 0; i < (rom->rom_size * 512); i++ )
- csum += ((uint8_t *)rom)[i];
- if ( csum != 0 )
- break;
-
- /* Check the PCI PnP header (if any) for a match. */
- pcih = (struct option_rom_pci_header *)
- ((char *)rom + rom->pci_header_offset);
- if ( (rom->pci_header_offset != 0) &&
- !strncmp((char *)pcih->signature, "PCIR", 4) &&
- (pcih->vendor_id == vendor_id) &&
- (pcih->device_id == device_id) )
- goto found;
-
- rom = (struct option_rom_header *)
- ((char *)rom + rom->rom_size * 512);
- }
+ /* Invalid checksum means we're out of option ROMs. */
+ csum = 0;
+ for ( i = 0; i < (rom->rom_size * 512); i++ )
+ csum += ((uint8_t *)rom)[i];
+ if ( csum != 0 )
+ break;
+
+ /* Check the PCI PnP header (if any) for a match. */
+ pcih = (struct option_rom_pci_header *)
+ ((char *)rom + rom->pci_header_offset);
+ if ( (rom->pci_header_offset != 0) &&
+ !strncmp((char *)pcih->signature, "PCIR", 4) &&
+ (pcih->vendor_id == vendor_id) &&
+ (pcih->device_id == device_id) )
+ goto found;
+
+ rom = (struct option_rom_header *)
+ ((char *)rom + rom->rom_size * 512);
}
return 0;
((char *)rom + pnph->next_header_offset))
: ((struct option_rom_pnp_header *)NULL));
- printf("Loading PXE ROM ...\n");
+ printf("Loading PCI Option ROM ...\n");
if ( (pnph != NULL) && (pnph->manufacturer_name_offset != 0) )
printf(" - Manufacturer: %s\n",
(char *)rom + pnph->manufacturer_name_offset);
if ( (pnph != NULL) && (pnph->product_name_offset != 0) )
printf(" - Product name: %s\n",
(char *)rom + pnph->product_name_offset);
- memcpy(copy_rom_dest, rom, rom->rom_size * 512);
- return rom->rom_size * 512;
+
+ if ( (dest + rom->rom_size * 512 + 1) > OPTIONROM_PHYSICAL_END )
+ {
+ printf("Option ROM size %x exceeds available space\n",
+ rom->rom_size * 512);
+ return 0;
+ }
+
+ orom_ids[nr_roms++] = vendor_id | ((uint32_t)device_id << 16);
+ memcpy((void *)dest, rom, rom->rom_size * 512);
+ *(uint8_t *)(dest + rom->rom_size * 512) = devfn;
+ return round_option_rom(rom->rom_size * 512 + 1);
}
-/* Replace possibly erroneous memory-size CMOS fields with correct values. */
-static void cmos_write_memory_size(void)
+/*
+ * Scan the PCI bus for the first NIC supported by etherboot, and copy
+ * the corresponding rom data to *copy_rom_dest. Returns the length of the
+ * selected rom, or 0 if no NIC found.
+ */
+static int scan_etherboot_nic(uint32_t copy_rom_dest)
{
- struct e820entry *map = HVM_E820;
- int i, nr = *HVM_E820_NR;
- uint32_t base_mem = 640, ext_mem = 0, alt_mem = 0;
+ uint8_t devfn;
+ uint16_t class, vendor_id, device_id;
- for ( i = 0; i < nr; i++ )
- if ( (map[i].addr >= 0x100000) && (map[i].type == E820_RAM) )
- break;
+ for ( devfn = 0; devfn < 128; devfn++ )
+ {
+ class = pci_readw(devfn, PCI_CLASS_DEVICE);
+ vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
+ device_id = pci_readw(devfn, PCI_DEVICE_ID);
+
+ /* We're only interested in NICs. */
+ if ( (vendor_id != 0xffff) &&
+ (device_id != 0xffff) &&
+ (class == 0x0200) )
+ return scan_option_rom(
+ devfn, vendor_id, device_id, etherboot, copy_rom_dest);
+ }
+
+ return 0;
+}
- if ( i != nr )
+/*
+ * Scan the PCI bus for the devices that have an option ROM, and copy
+ * the corresponding rom data to rom_phys_addr.
+ */
+static int pci_load_option_roms(uint32_t rom_base_addr)
+{
+ uint32_t option_rom_addr, rom_phys_addr = rom_base_addr;
+ uint16_t vendor_id, device_id;
+ uint8_t devfn, class;
+
+ for ( devfn = 0; devfn < 128; devfn++ )
{
- alt_mem = ext_mem = map[i].addr + map[i].size;
- ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0;
- if ( ext_mem > 0xffff )
- ext_mem = 0xffff;
- alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0;
+ class = pci_readb(devfn, PCI_CLASS_DEVICE + 1);
+ vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
+ device_id = pci_readw(devfn, PCI_DEVICE_ID);
+
+ if ( (vendor_id == 0xffff) && (device_id == 0xffff) )
+ continue;
+
+ /*
+ * Currently only scan options from mass storage devices and serial
+ * bus controller (Fibre Channel included).
+ */
+ if ( (class != 0x1) && (class != 0xc) )
+ continue;
+
+ option_rom_addr = pci_readl(devfn, PCI_ROM_ADDRESS);
+ if ( !option_rom_addr )
+ continue;
+
+ /* Ensure Expansion Bar is enabled before copying */
+ pci_writel(devfn, PCI_ROM_ADDRESS, option_rom_addr | 0x1);
+
+ rom_phys_addr += scan_option_rom(
+ devfn, vendor_id, device_id,
+ (void *)(option_rom_addr & ~2047), rom_phys_addr);
+
+ /* Restore the default original value of Expansion Bar */
+ pci_writel(devfn, PCI_ROM_ADDRESS, option_rom_addr);
}
+ return rom_phys_addr - rom_base_addr;
+}
+
+/* Replace possibly erroneous memory-size CMOS fields with correct values. */
+static void cmos_write_memory_size(void)
+{
+ uint32_t base_mem = 640, ext_mem, alt_mem;
+
+ alt_mem = ext_mem = hvm_info->low_mem_pgend << PAGE_SHIFT;
+ ext_mem = (ext_mem > 0x0100000) ? (ext_mem - 0x0100000) >> 10 : 0;
+ if ( ext_mem > 0xffff )
+ ext_mem = 0xffff;
+ alt_mem = (alt_mem > 0x1000000) ? (alt_mem - 0x1000000) >> 16 : 0;
+
/* All BIOSes: conventional memory (CMOS *always* reports 640kB). */
cmos_outb(0x15, (uint8_t)(base_mem >> 0));
cmos_outb(0x16, (uint8_t)(base_mem >> 8));
cmos_outb(0x35, (uint8_t)( alt_mem >> 8));
}
-static uint16_t init_xen_platform_io_base(void)
+/*
+ * Set up an empty TSS area for virtual 8086 mode to use.
+ * The only important thing is that it musn't have any bits set
+ * in the interrupt redirection bitmap, so all zeros will do.
+ */
+static void init_vm86_tss(void)
{
- struct bios_info *bios_info = (struct bios_info *)ACPI_PHYSICAL_ADDRESS;
- uint32_t devfn, bar_data;
- uint16_t vendor_id, device_id;
-
- bios_info->xen_pfiob = 0;
+ void *tss;
+ struct xen_hvm_param p;
+
+ tss = mem_alloc(128, 128);
+ memset(tss, 0, 128);
+ p.domid = DOMID_SELF;
+ p.index = HVM_PARAM_VM86_TSS;
+ p.value = virt_to_phys(tss);
+ hypercall_hvm_op(HVMOP_set_param, &p);
+ printf("vm86 TSS at %08lx\n", virt_to_phys(tss));
+}
- for ( devfn = 0; devfn < 128; devfn++ )
+/* Create an E820 table based on memory parameters provided in hvm_info. */
+static void build_e820_table(void)
+{
+ struct e820entry *e820 = E820;
+ unsigned int nr = 0;
+
+ /* 0x0-0x9FC00: Ordinary RAM. */
+ e820[nr].addr = 0x0;
+ e820[nr].size = 0x9FC00;
+ e820[nr].type = E820_RAM;
+ nr++;
+
+ /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */
+ e820[nr].addr = 0x9FC00;
+ e820[nr].size = 0x400;
+ e820[nr].type = E820_RESERVED;
+ nr++;
+
+ /*
+ * Following regions are standard regions of the PC memory map.
+ * They are not covered by e820 regions. OSes will not use as RAM.
+ * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
+ * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
+ * TODO: free pages which turn out to be unused.
+ */
+
+ /*
+ * 0xE0000-0x0F0000: PC-specific area. We place various tables here.
+ * 0xF0000-0x100000: System BIOS.
+ * TODO: free pages which turn out to be unused.
+ */
+ e820[nr].addr = 0xE0000;
+ e820[nr].size = 0x20000;
+ e820[nr].type = E820_RESERVED;
+ nr++;
+
+ /* Low RAM goes here. Reserve space for special pages. */
+ BUG_ON((hvm_info->low_mem_pgend << PAGE_SHIFT) < (2u << 20));
+ e820[nr].addr = 0x100000;
+ e820[nr].size = (hvm_info->low_mem_pgend << PAGE_SHIFT) - e820[nr].addr;
+ e820[nr].type = E820_RAM;
+ nr++;
+
+ /*
+ * Explicitly reserve space for special pages.
+ * This space starts at RESERVED_MEMBASE an extends to cover various
+ * fixed hardware mappings (e.g., LAPIC, IOAPIC, default SVGA framebuffer).
+ */
+ e820[nr].addr = RESERVED_MEMBASE;
+ e820[nr].size = (uint32_t)-e820[nr].addr;
+ e820[nr].type = E820_RESERVED;
+ nr++;
+
+ if ( hvm_info->high_mem_pgend )
{
- vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
- device_id = pci_readw(devfn, PCI_DEVICE_ID);
- if ( (vendor_id != 0x5853) || (device_id != 0x0001) )
- continue;
- bar_data = pci_readl(devfn, PCI_BASE_ADDRESS_0);
- bios_info->xen_pfiob = bar_data & PCI_BASE_ADDRESS_IO_MASK;
+ e820[nr].addr = ((uint64_t)1 << 32);
+ e820[nr].size =
+ ((uint64_t)hvm_info->high_mem_pgend << PAGE_SHIFT) - e820[nr].addr;
+ e820[nr].type = E820_RAM;
+ nr++;
}
- return bios_info->xen_pfiob;
+ *E820_NR = nr;
}
int main(void)
{
- int vgabios_sz = 0, etherboot_sz = 0, rombios_sz, smbios_sz;
- uint32_t vga_ram = 0;
- uint16_t xen_pfiob;
+ int option_rom_sz = 0, vgabios_sz = 0, etherboot_sz = 0;
+ int rombios_sz, smbios_sz;
+ uint32_t etherboot_phys_addr, option_rom_phys_addr, bios32_addr;
+ struct bios_info *bios_info;
printf("HVM Loader\n");
printf("CPU speed is %u MHz\n", get_cpu_mhz());
+ apic_setup();
+ pci_setup();
+
smp_initialise();
perform_tests();
if ( rombios_sz > 0x10000 )
rombios_sz = 0x10000;
memcpy((void *)ROMBIOS_PHYSICAL_ADDRESS, rombios, rombios_sz);
- highbios_setup();
-
- apic_setup();
- pci_setup();
+ bios32_addr = highbios_setup();
- if ( (get_vcpu_nr() > 1) || get_apic_mode() )
+ if ( (hvm_info->nr_vcpus > 1) || hvm_info->apic_mode )
create_mp_tables();
switch ( virtual_vga )
printf("Loading Cirrus VGABIOS ...\n");
memcpy((void *)VGABIOS_PHYSICAL_ADDRESS,
vgabios_cirrusvga, sizeof(vgabios_cirrusvga));
- vgabios_sz = sizeof(vgabios_cirrusvga);
+ vgabios_sz = round_option_rom(sizeof(vgabios_cirrusvga));
break;
case VGA_std:
printf("Loading Standard VGABIOS ...\n");
memcpy((void *)VGABIOS_PHYSICAL_ADDRESS,
vgabios_stdvga, sizeof(vgabios_stdvga));
- vgabios_sz = sizeof(vgabios_stdvga);
+ vgabios_sz = round_option_rom(sizeof(vgabios_stdvga));
break;
default:
printf("No emulated VGA adaptor ...\n");
break;
}
- if ( virtual_vga != VGA_none )
- {
- vga_ram = e820_malloc(8 << 20, 4096);
- printf("VGA RAM at %08x\n", vga_ram);
- }
+ etherboot_phys_addr = VGABIOS_PHYSICAL_ADDRESS + vgabios_sz;
+ if ( etherboot_phys_addr < OPTIONROM_PHYSICAL_ADDRESS )
+ etherboot_phys_addr = OPTIONROM_PHYSICAL_ADDRESS;
+ etherboot_sz = scan_etherboot_nic(etherboot_phys_addr);
- etherboot_sz = scan_etherboot_nic((void*)ETHERBOOT_PHYSICAL_ADDRESS);
+ option_rom_phys_addr = etherboot_phys_addr + etherboot_sz;
+ option_rom_sz = pci_load_option_roms(option_rom_phys_addr);
- if ( get_acpi_enabled() )
+ if ( hvm_info->acpi_enabled )
{
printf("Loading ACPI ...\n");
acpi_build_tables();
}
+ init_vm86_tss();
+
cmos_write_memory_size();
printf("BIOS map:\n");
VGABIOS_PHYSICAL_ADDRESS + vgabios_sz - 1);
if ( etherboot_sz )
printf(" %05x-%05x: Etherboot ROM\n",
- ETHERBOOT_PHYSICAL_ADDRESS,
- ETHERBOOT_PHYSICAL_ADDRESS + etherboot_sz - 1);
+ etherboot_phys_addr,
+ etherboot_phys_addr + etherboot_sz - 1);
+ if ( option_rom_sz )
+ printf(" %05x-%05x: PCI Option ROMs\n",
+ option_rom_phys_addr,
+ option_rom_phys_addr + option_rom_sz - 1);
if ( smbios_sz )
printf(" %05x-%05x: SMBIOS tables\n",
SMBIOS_PHYSICAL_ADDRESS,
ROMBIOS_PHYSICAL_ADDRESS,
ROMBIOS_PHYSICAL_ADDRESS + rombios_sz - 1);
- xen_pfiob = init_xen_platform_io_base();
- if ( xen_pfiob && vga_ram )
- outl(xen_pfiob + 4, vga_ram);
+ build_e820_table();
+
+ bios_info = (struct bios_info *)BIOS_INFO_PHYSICAL_ADDRESS;
+ memset(bios_info, 0, sizeof(*bios_info));
+ bios_info->com1_present = uart_exists(0x3f8);
+ bios_info->com2_present = uart_exists(0x2f8);
+ bios_info->hpet_present = hpet_exists(ACPI_HPET_ADDRESS);
+ bios_info->pci_min = pci_mem_start;
+ bios_info->pci_len = pci_mem_end - pci_mem_start;
+ bios_info->bios32_entry = bios32_addr;
printf("Invoking ROMBIOS ...\n");
return 0;
int vcpu_nr, i;
uint8_t checksum;
- vcpu_nr = get_vcpu_nr();
+ vcpu_nr = hvm_info->nr_vcpus;
/* fill in the MP configuration table signature, "PCMP" */
mpct->signature[0] = 'P';
char *p;
int vcpu_nr, i, length;
- vcpu_nr = get_vcpu_nr();
+ vcpu_nr = hvm_info->nr_vcpus;
printf("Creating MP tables ...\n");
do_struct(smbios_type_16_init(p, memsize, nr_mem_devs));
for ( i = 0; i < nr_mem_devs; i++ )
{
- uint32_t dev_memsize = ((i == (nr_mem_devs - 1))
- ? (memsize & 0x3fff) : 0x4000);
+ uint32_t dev_memsize = 0x4000; /* all but last covers 16GB */
+ if ( (i == (nr_mem_devs - 1)) && ((memsize & 0x3fff) != 0) )
+ dev_memsize = memsize & 0x3fff; /* last dev is <16GB */
do_struct(smbios_type_17_init(p, dev_memsize, i));
do_struct(smbios_type_19_init(p, dev_memsize, i));
do_struct(smbios_type_20_init(p, dev_memsize, i));
static uint64_t
get_memsize(void)
{
- struct e820entry *map = HVM_E820;
- uint8_t num_entries = *HVM_E820_NR;
- uint64_t memsize = 0;
- int i;
+ uint64_t sz;
- /*
- * Walk through e820map, ignoring any entries that aren't marked
- * as usable or reserved.
- */
- for ( i = 0; i < num_entries; i++ )
- {
- if ( (map->type == E820_RAM) || (map->type == E820_RESERVED) )
- memsize += map->size;
- map++;
- }
+ sz = (uint64_t)hvm_info->low_mem_pgend << PAGE_SHIFT;
+ if ( hvm_info->high_mem_pgend )
+ sz += (hvm_info->high_mem_pgend << PAGE_SHIFT) - (1ull << 32);
/*
* Round up to the nearest MB. The user specifies domU pseudo-physical
* memory in megabytes, so not doing this could easily lead to reporting
* one less MB than the user specified.
*/
- return (memsize + (1 << 20) - 1) >> 20;
+ return (sz + (1ul << 20) - 1) >> 20;
}
int
/* SCRATCH_PHYSICAL_ADDRESS is a safe large memory area for scratch. */
len = write_smbios_tables((void *)SCRATCH_PHYSICAL_ADDRESS,
- get_vcpu_nr(), get_memsize(),
+ hvm_info->nr_vcpus, get_memsize(),
uuid, xen_version_str,
xen_major_version, xen_minor_version);
if ( len > SMBIOS_MAXIMUM_SIZE )
void smp_initialise(void)
{
- unsigned int i, nr_cpus = get_vcpu_nr();
+ unsigned int i, nr_cpus = hvm_info->nr_vcpus;
memcpy((void *)AP_BOOT_EIP, ap_boot_start, ap_boot_end - ap_boot_start);
#include <stdint.h>
#include <xen/xen.h>
#include <xen/memory.h>
-#include <xen/hvm/hvm_info_table.h>
void wrmsr(uint32_t idx, uint64_t v)
{
*p = '\0';
}
-static void e820_collapse(void)
+void *mem_alloc(uint32_t size, uint32_t align)
{
- int i = 0;
- struct e820entry *ent = (struct e820entry *)HVM_E820;
-
- while ( i < (*HVM_E820_NR-1) )
- {
- if ( (ent[i].type == ent[i+1].type) &&
- ((ent[i].addr + ent[i].size) == ent[i+1].addr) )
- {
- ent[i].size += ent[i+1].size;
- memcpy(&ent[i+1], &ent[i+2], (*HVM_E820_NR-i-2) * sizeof(*ent));
- (*HVM_E820_NR)--;
- }
- else
- {
- i++;
- }
- }
-}
-
-uint32_t e820_malloc(uint32_t size, uint32_t align)
-{
- uint32_t addr;
- int i;
- struct e820entry *ent = (struct e820entry *)HVM_E820;
+ static uint32_t reserve = RESERVED_MEMBASE - 1;
+ static int over_allocated;
+ struct xen_add_to_physmap xatp;
+ struct xen_memory_reservation xmr;
+ xen_pfn_t mfn;
+ uint32_t s, e;
- /* Align to at leats one kilobyte. */
+ /* Align to at least one kilobyte. */
if ( align < 1024 )
align = 1024;
- for ( i = *HVM_E820_NR - 1; i >= 0; i-- )
+ s = (reserve + align) & ~(align - 1);
+ e = s + size - 1;
+
+ BUG_ON((e < s) || (e >> PAGE_SHIFT) >= hvm_info->reserved_mem_pgstart);
+
+ while ( (reserve >> PAGE_SHIFT) != (e >> PAGE_SHIFT) )
{
- addr = (ent[i].addr + ent[i].size - size) & ~(align-1);
- if ( (ent[i].type != E820_RAM) || /* not ram? */
- (addr < ent[i].addr) || /* too small or starts above 4gb? */
- ((addr + size) < addr) ) /* ends above 4gb? */
- continue;
+ reserve += PAGE_SIZE;
+ mfn = reserve >> PAGE_SHIFT;
- if ( addr != ent[i].addr )
+ /* Try to allocate a brand new page in the reserved area. */
+ if ( !over_allocated )
{
- memmove(&ent[i+1], &ent[i], (*HVM_E820_NR-i) * sizeof(*ent));
- (*HVM_E820_NR)++;
- ent[i].size = addr - ent[i].addr;
- ent[i+1].addr = addr;
- ent[i+1].size -= ent[i].size;
- i++;
+ xmr.domid = DOMID_SELF;
+ xmr.mem_flags = 0;
+ xmr.extent_order = 0;
+ xmr.nr_extents = 1;
+ set_xen_guest_handle(xmr.extent_start, &mfn);
+ if ( hypercall_memory_op(XENMEM_populate_physmap, &xmr) == 1 )
+ continue;
+ over_allocated = 1;
}
- ent[i].type = E820_RESERVED;
-
- e820_collapse();
-
- return addr;
+ /* Otherwise, relocate a page from the ordinary RAM map. */
+ if ( hvm_info->high_mem_pgend )
+ {
+ xatp.idx = --hvm_info->high_mem_pgend;
+ if ( xatp.idx == (1ull << (32 - PAGE_SHIFT)) )
+ hvm_info->high_mem_pgend = 0;
+ }
+ else
+ {
+ xatp.idx = --hvm_info->low_mem_pgend;
+ }
+ xatp.domid = DOMID_SELF;
+ xatp.space = XENMAPSPACE_gmfn;
+ xatp.gpfn = mfn;
+ if ( hypercall_memory_op(XENMEM_add_to_physmap, &xatp) != 0 )
+ BUG();
}
- return 0;
+ reserve = e;
+
+ return (void *)(unsigned long)s;
}
uint32_t ioapic_read(uint32_t reg)
asm volatile ( "ud2" );
}
-static int validate_hvm_info(struct hvm_info_table *t)
+static void validate_hvm_info(struct hvm_info_table *t)
{
- char signature[] = "HVM INFO";
uint8_t *ptr = (uint8_t *)t;
uint8_t sum = 0;
int i;
- /* strncmp(t->signature, "HVM INFO", 8) */
- for ( i = 0; i < 8; i++ )
+ if ( strncmp(t->signature, "HVM INFO", 8) )
{
- if ( signature[i] != t->signature[i] )
- {
- printf("Bad hvm info signature\n");
- return 0;
- }
+ printf("Bad hvm info signature\n");
+ BUG();
+ }
+
+ if ( t->length < sizeof(struct hvm_info_table) )
+ {
+ printf("Bad hvm info length\n");
+ BUG();
}
for ( i = 0; i < t->length; i++ )
sum += ptr[i];
- return (sum == 0);
+ if ( sum != 0 )
+ {
+ printf("Bad hvm info checksum\n");
+ BUG();
+ }
}
-static struct hvm_info_table *get_hvm_info_table(void)
+struct hvm_info_table *get_hvm_info_table(void)
{
static struct hvm_info_table *table;
struct hvm_info_table *t;
t = (struct hvm_info_table *)HVM_INFO_PADDR;
- if ( !validate_hvm_info(t) )
- {
- printf("Bad hvm info table\n");
- return NULL;
- }
+ validate_hvm_info(t);
table = t;
return table;
}
-int get_vcpu_nr(void)
-{
- struct hvm_info_table *t = get_hvm_info_table();
- return (t ? t->nr_vcpus : 1);
-}
-
-int get_acpi_enabled(void)
-{
- struct hvm_info_table *t = get_hvm_info_table();
- return (t ? t->acpi_enabled : 1);
-}
-
-int get_apic_mode(void)
-{
- struct hvm_info_table *t = get_hvm_info_table();
- return (t ? t->apic_mode : 1);
-}
-
uint16_t get_cpu_mhz(void)
{
struct xen_add_to_physmap xatp;
return cpu_mhz;
}
+int uart_exists(uint16_t uart_base)
+{
+ uint16_t ier = uart_base + 1;
+ uint8_t a, b, c;
+
+ a = inb(ier);
+ outb(ier, 0);
+ b = inb(ier);
+ outb(ier, 0xf);
+ c = inb(ier);
+ outb(ier, a);
+
+ return ((b == 0) && (c == 0xf));
+}
+
+int hpet_exists(unsigned long hpet_base)
+{
+ uint32_t hpet_id = *(uint32_t *)hpet_base;
+ return ((hpet_id >> 16) == 0x8086);
+}
+
/*
* Local variables:
* mode: C
#include <stdarg.h>
#include <stdint.h>
+#include <xen/hvm/hvm_info_table.h>
#undef offsetof
#define offsetof(t, m) ((unsigned long)&((t *)0)->m)
/* Get CPU speed in MHz. */
uint16_t get_cpu_mhz(void);
+/* Hardware detection. */
+int uart_exists(uint16_t uart_base);
+int hpet_exists(unsigned long hpet_base);
+
/* Do cpuid instruction, with operation 'idx' */
void cpuid(uint32_t idx, uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx);
})
/* HVM-builder info. */
-int get_vcpu_nr(void);
-int get_acpi_enabled(void);
-int get_apic_mode(void);
+struct hvm_info_table *get_hvm_info_table(void);
+#define hvm_info (get_hvm_info_table())
/* String and memory functions */
int strcmp(const char *cs, const char *ct);
int printf(const char *fmt, ...) __attribute__ ((format (printf, 1, 2)));
int vprintf(const char *fmt, va_list ap);
-/* Reserve a RAM region in the e820 table. */
-uint32_t e820_malloc(uint32_t size, uint32_t align);
+/* Allocate memory in a reserved region below 4GB. */
+void *mem_alloc(uint32_t size, uint32_t align);
+#define virt_to_phys(v) ((unsigned long)(v))
/* Prepare the 32bit BIOS */
-void highbios_setup(void);
+uint32_t highbios_setup(void);
/* Miscellaneous. */
void cacheattr_init(void);
*
* Author: Stefan Berger <stefanb@us.ibm.com>
*/
-#include "rombios_compat.h"
-#include "32bitprotos.h"
-
-/*
- the jumptable that will be copied into the rombios in the 0xf000 segment
- for every function that is to be called from the lower BIOS, make an entry
- here.
- */
-#define TABLE_ENTRY(idx, func) [idx] = (uint32_t)func
-uint32_t jumptable[IDX_LAST+1] __attribute__((section (".biosjumptable"))) =
-{
- TABLE_ENTRY(IDX_TCPA_ACPI_INIT, tcpa_acpi_init),
- TABLE_ENTRY(IDX_TCPA_EXTEND_ACPI_LOG, tcpa_extend_acpi_log),
- TABLE_ENTRY(IDX_TCGINTERRUPTHANDLER, TCGInterruptHandler),
-
- TABLE_ENTRY(IDX_TCPA_CALLING_INT19H, tcpa_calling_int19h),
- TABLE_ENTRY(IDX_TCPA_RETURNED_INT19H, tcpa_returned_int19h),
- TABLE_ENTRY(IDX_TCPA_ADD_EVENT_SEPARATORS, tcpa_add_event_separators),
- TABLE_ENTRY(IDX_TCPA_WAKE_EVENT, tcpa_wake_event),
- TABLE_ENTRY(IDX_TCPA_ADD_BOOTDEVICE, tcpa_add_bootdevice),
- TABLE_ENTRY(IDX_TCPA_START_OPTION_ROM_SCAN, tcpa_start_option_rom_scan),
- TABLE_ENTRY(IDX_TCPA_OPTION_ROM, tcpa_option_rom),
- TABLE_ENTRY(IDX_TCPA_IPL, tcpa_ipl),
- TABLE_ENTRY(IDX_TCPA_MEASURE_POST, tcpa_measure_post),
-
- TABLE_ENTRY(IDX_TCPA_INITIALIZE_TPM, tcpa_initialize_tpm),
-
- TABLE_ENTRY(IDX_GET_S3_WAKING_VECTOR, get_s3_waking_vector),
+#include "rombios_compat.h"
- TABLE_ENTRY(IDX_LAST , 0) /* keep last */
-};
+asm (
+ " .text \n"
+ " movzwl %bx,%eax \n"
+ " jmp *jumptable(,%eax,4) \n"
+ " .data \n"
+ "jumptable: \n"
+#define X(idx, ret, fn, args...) " .long "#fn"\n"
+#include "32bitprotos.h"
+#undef X
+ );
XEN_ROOT = ../../../..
include $(XEN_ROOT)/tools/firmware/Rules.mk
-SOURCES = util.c
TARGET = 32bitbios_flat.h
-CFLAGS += $(CFLAGS_include) -I.. -DGCC_PROTOS
+CFLAGS += $(CFLAGS_include) -I..
SUBDIRS = tcgbios
-MODULES = tcgbios/tcgbiosext.o
-
.PHONY: all
all: subdirs-all
$(MAKE) $(TARGET)
.PHONY: clean
clean: subdirs-clean
- rm -rf *.o $(TARGET)
+ rm -rf *.o $(TARGET) $(DEPS)
+
+$(TARGET): 32bitbios_all.o
+ sh mkhex highbios_array 32bitbios_all.o > $@
-$(TARGET): 32bitbios.o $(MODULES) util.o
+32bitbios_all.o: 32bitbios.o tcgbios/tcgbiosext.o util.o pmm.o
$(LD) $(LDFLAGS_DIRECT) -s -r $^ -o 32bitbios_all.o
@nm 32bitbios_all.o | \
egrep '^ +U ' >/dev/null && { \
nm -u 32bitbios_all.o; \
exit 11; \
} || :
- sh mkhex highbios_array 32bitbios_all.o > $@
+
+-include $(DEPS)
--- /dev/null
+/*
+ * pmm.c - POST(Power On Self Test) Memory Manager
+ * according to the specification described in
+ * http://www.phoenix.com/NR/rdonlyres/873A00CF-33AC-4775-B77E-08E7B9754993/0/specspmm101.pdf
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ * Copyright (C) 2009 FUJITSU LIMITED
+ *
+ * Author: Kouya Shimura <kouya@jp.fujitsu.com>
+ */
+
+/*
+ * Algorithm:
+ *
+ * This is not a fast storage allocator but simple one. There is no
+ * segregated management by block size and it does nothing special for
+ * avoiding the fragmentation.
+ *
+ * The allocation algorithm is a first-fit. All memory blocks are
+ * managed by linear single linked list in order of the address.
+ * (i.e. There is no backward pointer) It searches the first available
+ * equal or larger block from the head (lowest address) of memory
+ * heap. The larger block is splitted into two blocks unless one side
+ * becomes too small.
+ *
+ * For de-allocation, the specified block is just marked as available
+ * and it does nothing else. Thus, the fragmentation will occur. The
+ * collection of continuous available blocks are done on the search
+ * phase of another block allocation.
+ *
+ * The following is an abstract of this algorithm. The actual code
+ * looks complicated on account of alignment and checking the handle.
+ *
+ * static memblk_t *
+ * alloc(heap_t *heap, uint32_t size)
+ * {
+ * static memblk_t *mb;
+ * for_each_memblk(heap, mb) // search memory blocks
+ * if (memblk_is_avail(mb))
+ * {
+ * collect_avail_memblks(heap, mb);
+ * if (size <= memblk_bufsize(mb))
+ * {
+ * split_memblk(mb, size);
+ * set_inuse(mb);
+ * return mb;
+ * }
+ * }
+ * return NULL;
+ * }
+ */
+
+#include <stdint.h>
+#include <stddef.h>
+#include <../hvmloader/config.h>
+#include <../hvmloader/e820.h>
+#include "util.h"
+
+#define DEBUG_PMM 0
+
+#define __stringify(a) #a
+#define stringify(a) __stringify(a)
+
+#define ASSERT(_expr, _action) \
+ if (!(_expr)) { \
+ printf("ASSERTION FAIL: %s %s:%d %s()\n", \
+ stringify(_expr), __FILE__, __LINE__, __func__); \
+ _action; \
+ } else
+
+#if DEBUG_PMM
+# define PMM_DEBUG(format, p...) printf("PMM " format, ##p)
+#else
+# define PMM_DEBUG(format, p...)
+#endif
+
+struct pmmAllocArgs {
+ uint16_t function;
+ uint32_t length;
+ uint32_t handle;
+ uint16_t flags;
+} __attribute__ ((packed));
+
+struct pmmFindArgs {
+ uint16_t function;
+ uint32_t handle;
+} __attribute__ ((packed));
+
+struct pmmDeallocateArgs {
+ uint16_t function;
+ uint32_t buffer;
+} __attribute__ ((packed));
+
+#define PMM_FUNCTION_ALLOCATE 0
+#define PMM_FUNCTION_FIND 1
+#define PMM_FUNCTION_DEALLOC 2
+
+#define PARAGRAPH_LENGTH 16 // unit of length
+
+#define PMM_HANDLE_ANONYMOUS 0xffffffff
+
+#define PMM_FLAGS_MEMORY_TYPE_MASK 0x0003
+#define PMM_FLAGS_MEMORY_INVALID 0
+#define PMM_FLAGS_MEMORY_CONVENTIONAL 1 // 0 to 1MB
+#define PMM_FLAGS_MEMORY_EXTENDED 2 // 1MB to 4GB
+#define PMM_FLAGS_MEMORY_ANY 3 // whichever is available
+#define PMM_FLAGS_ALIGINMENT 0x0004
+
+/* Error code */
+#define PMM_ENOMEM (0) // Out of memory, duplicate handle
+#define PMM_EINVAL (-1) // Invalid argument
+
+#define ALIGN_UP(addr, size) (((addr)+((size)-1))&(~((size)-1)))
+#define ALIGN_DOWN(addr, size) ((addr)&(~((size)-1)))
+
+typedef struct memblk {
+ uint32_t magic; // inuse or available
+ struct memblk *next; // points the very next of this memblk
+ uint32_t handle; // identifier of this block
+ uint32_t __fill; // for 16byte alignment, not used
+ uint8_t buffer[0];
+} memblk_t;
+
+typedef struct heap {
+ memblk_t *head; // start address of heap
+ memblk_t *end; // end address of heap
+} heap_t;
+
+#define HEAP_NOT_INITIALIZED (memblk_t *)-1
+#define HEAP_ALIGNMENT 16
+
+/*
+ * PMM handles two memory heaps, the caller chooses either.
+ *
+ * - conventional memroy (below 1MB)
+ * In HVM, the area is fixed. 0x00010000-0x0007FFFF
+ * (from SCRATCH_PHYSICAL_ADDRESS to HYPERCALL_PHYSICAL_ADDRESS)
+ *
+ * - extended memory (start at 1MB, below 4GB)
+ * In HVM, the area starts at memory address 0x00100000.
+ * The end address is variable. We read low RAM address from e820 table.
+ *
+ * The following struct must be located in the data segment since bss
+ * in 32bitbios doesn't be relocated.
+ */
+static struct {
+ heap_t heap; // conventional memory
+ heap_t ext_heap; // extended memory
+} pmm_data = { {HEAP_NOT_INITIALIZED, NULL}, {NULL, NULL} };
+
+/* These values are private use, not a spec in PMM */
+#define MEMBLK_MAGIC_INUSE 0x2A4D4D50 // 'PMM*'
+#define MEMBLK_MAGIC_AVAIL 0x5F4D4D50 // 'PMM_'
+
+#define memblk_is_inuse(_mb) ((_mb)->magic == MEMBLK_MAGIC_INUSE)
+#define memblk_is_avail(_mb) ((_mb)->magic == MEMBLK_MAGIC_AVAIL)
+
+static void set_inuse(memblk_t *mb, uint32_t handle)
+{
+ mb->magic = MEMBLK_MAGIC_INUSE;
+ mb->handle = handle;
+}
+
+static void set_avail(memblk_t *mb)
+{
+ mb->magic = MEMBLK_MAGIC_AVAIL;
+ mb->handle = PMM_HANDLE_ANONYMOUS;
+}
+
+#define MEMBLK_HEADER_SIZE ((int)(&((memblk_t *)0)->buffer))
+#define MIN_MEMBLK_SIZE (MEMBLK_HEADER_SIZE + PARAGRAPH_LENGTH)
+
+#define memblk_size(_mb) ((void *)((_mb)->next) - (void *)(_mb))
+#define memblk_buffer(_mb) ((uint32_t)(&(_mb)->buffer))
+#define memblk_bufsize(_mb) (memblk_size(_mb) - MEMBLK_HEADER_SIZE)
+
+#define buffer_memblk(_buf) (memblk_t *)((_buf) - MEMBLK_HEADER_SIZE)
+
+#define memblk_loop_mbondition(_h, _mb) \
+ (((_mb) < (_h)->end) && (/* avoid infinite loop */ (_mb) < (_mb)->next))
+
+#define for_each_memblk(_h, _mb) \
+ for ((_mb) = (_h)->head; \
+ memblk_loop_mbondition(_h, _mb); \
+ (_mb) = (_mb)->next)
+
+#define for_remain_memblk(_h, _mb) \
+ for (; \
+ memblk_loop_mbondition(_h, _mb); \
+ (_mb) = (_mb)->next)
+
+/*
+ * <-size->
+ * +==================+======+ +========+========+======+
+ * | avail | | | avail | avail | |
+ * | memblk |memblk|... | memblk | memblk |memblk|...
+ * +==================+======+ => +========+========+======+
+ * ^ | ^ | ^ | ^ | ^ | ^
+ * | |next | |next| |next | |next | |next|
+ * | \________________/ \____/ \______/ \______/ \____/
+ * | ^
+ * | |
+ * mb +- sb(return value)
+ */
+static memblk_t *
+split_memblk(memblk_t *mb, uint32_t size)
+{
+ memblk_t *sb = (void *)memblk_buffer(mb) + size;
+
+ /* Only split if the remaining fragment is big enough. */
+ if ( (memblk_bufsize(mb) - size) < MIN_MEMBLK_SIZE)
+ return mb;
+
+ sb->next = mb->next;
+ set_avail(sb);
+
+ mb->next = sb;
+ return sb;
+}
+
+/*
+ * +======+======+======+======+ +=================+======+
+ * |avail |avail |avail |inuse | | avail |inuse |
+ * |memblk|memblk|memblk|memblk|... | memblk |memblk|...
+ * +======+======+======+======+ => +=================+======+
+ * ^ | ^ | ^ | ^ | ^ | ^ | ^
+ * | |next| |next| |next| |next| |next | |next|
+ * | \____/ \____/ \____/ \____/ \_______________/ \____/
+ * |
+ * mb
+ */
+static void
+collect_avail_memblks(heap_t *heap, memblk_t *mb)
+{
+ memblk_t *nb = mb->next;
+
+ for_remain_memblk ( heap, nb )
+ if ( memblk_is_inuse(nb) )
+ break;
+ mb->next = nb;
+}
+
+static void
+pmm_init_heap(heap_t *heap, uint32_t from_addr, uint32_t to_addr)
+{
+ memblk_t *mb = (memblk_t *)ALIGN_UP(from_addr, HEAP_ALIGNMENT);
+
+ mb->next = (memblk_t *)ALIGN_DOWN(to_addr, HEAP_ALIGNMENT);
+ set_avail(mb);
+
+ heap->head = mb;
+ heap->end = mb->next;
+}
+
+static void
+pmm_initalize(void)
+{
+ int i, e820_nr = *E820_NR;
+ struct e820entry *e820 = E820;
+
+ /* Extended memory: RAM below 4GB, 0x100000-0xXXXXXXXX */
+ for ( i = 0; i < e820_nr; i++ )
+ {
+ if ( (e820[i].type == E820_RAM) && (e820[i].addr >= 0x00100000) )
+ {
+ pmm_init_heap(&pmm_data.ext_heap, e820[i].addr,
+ e820[i].addr + e820[i].size);
+ break;
+ }
+ }
+
+ /* convectional memory: RAM below 1MB, 0x10000-0x7FFFF */
+ pmm_init_heap(&pmm_data.heap, SCRATCH_PHYSICAL_ADDRESS,
+ HYPERCALL_PHYSICAL_ADDRESS);
+}
+
+static uint32_t
+pmm_max_avail_length(heap_t *heap)
+{
+ memblk_t *mb;
+ uint32_t size, max = 0;
+
+ for_each_memblk ( heap, mb )
+ {
+ if ( !memblk_is_avail(mb) )
+ continue;
+ collect_avail_memblks(heap, mb);
+ size = memblk_bufsize(mb);
+ if ( size > max )
+ max = size;
+ }
+
+ return (max / PARAGRAPH_LENGTH);
+}
+
+static memblk_t *
+first_fit(heap_t *heap, uint32_t size, uint32_t handle, uint32_t flags)
+{
+ memblk_t *mb;
+ int32_t align = 0;
+
+ if ( flags & PMM_FLAGS_ALIGINMENT )
+ align = ((size ^ (size - 1)) >> 1) + 1;
+
+ for_each_memblk ( heap, mb )
+ {
+ if ( memblk_is_avail(mb) )
+ {
+ collect_avail_memblks(heap, mb);
+
+ if ( align )
+ {
+ uint32_t addr = memblk_buffer(mb);
+ uint32_t offset = ALIGN_UP(addr, align) - addr;
+
+ if ( offset > 0 )
+ {
+ ASSERT(offset >= MEMBLK_HEADER_SIZE, continue);
+
+ if ( (offset + size) > memblk_bufsize(mb) )
+ continue;
+
+ mb = split_memblk(mb, offset - MEMBLK_HEADER_SIZE);
+ return mb;
+ }
+ }
+
+ if ( size <= memblk_bufsize(mb) )
+ return mb;
+ }
+ else
+ {
+ ASSERT(memblk_is_inuse(mb), return NULL);
+
+ /* Duplication check for handle. */
+ if ( (handle != PMM_HANDLE_ANONYMOUS) && (mb->handle == handle) )
+ return NULL;
+ }
+ }
+
+ return NULL;
+}
+
+static memblk_t *
+pmm_find_handle(heap_t *heap, uint32_t handle)
+{
+ memblk_t *mb;
+
+ if ( handle == PMM_HANDLE_ANONYMOUS )
+ return NULL;
+
+ for_each_memblk ( heap, mb )
+ if ( mb->handle == handle )
+ return mb;
+
+ return NULL;
+}
+
+/*
+ * allocate a memory block of the specified type and size, and returns
+ * the address of the memory block.
+ *
+ * A client-specified identifier to be associated with the allocated
+ * memory block. A handle of 0xFFFFFFFF indicates that no identifier
+ * should be associated with the block. Such a memory block is known
+ * as an "anonymous" memory block and cannot be found using the
+ * pmmFind function. If a specified handle for a requested memory
+ * block is already used in a currently allocated memory block, the
+ * error value of 0x00000000 is returned
+ *
+ * If length is 0x00000000, no memory is allocated and the value
+ * returned is the size of the largest memory block available for the
+ * memory type specified in the flags parameter. The alignment bit in
+ * the flags register is ignored when calculating the largest memory
+ * block available.
+ *
+ * If a specified handle for a requested memory block is already used
+ * in a currently allocated memory block, the error value of
+ * 0x00000000 is returned.
+ *
+ * A return value of 0x00000000 indicates that an error occurred and
+ * no memory has been allocated.
+ */
+static uint32_t
+pmmAllocate(uint32_t length, uint32_t handle, uint16_t flags)
+{
+ heap_t *heap;
+ memblk_t *mb;
+ uint32_t size;
+
+ switch ( flags & PMM_FLAGS_MEMORY_TYPE_MASK )
+ {
+ case PMM_FLAGS_MEMORY_CONVENTIONAL:
+ heap = &pmm_data.heap;
+ break;
+
+ case PMM_FLAGS_MEMORY_EXTENDED:
+ case PMM_FLAGS_MEMORY_ANY: /* XXX: ignore conventional memory for now */
+ heap = &pmm_data.ext_heap;
+ break;
+
+ default:
+ return PMM_EINVAL;
+ }
+
+ /* return the largest memory block available */
+ if ( length == 0 )
+ return pmm_max_avail_length(heap);
+
+ size = length * PARAGRAPH_LENGTH;
+ mb = first_fit(heap, size, handle, flags);
+
+ if ( mb == NULL )
+ return PMM_ENOMEM;
+
+ /* duplication check for handle */
+ if ( handle != PMM_HANDLE_ANONYMOUS )
+ {
+ memblk_t *nb = mb->next;
+
+ for_remain_memblk(heap, nb)
+ if (nb->handle == handle)
+ return PMM_ENOMEM;
+ }
+
+ split_memblk(mb, size);
+ set_inuse(mb, handle);
+
+ return memblk_buffer(mb);
+}
+
+/*
+ * returns the address of the memory block associated with the
+ * specified handle.
+ *
+ * A return value of 0x00000000 indicates that the handle does not
+ * correspond to a currently allocated memory block.
+ */
+static uint32_t
+pmmFind(uint32_t handle)
+{
+ memblk_t *mb;
+
+ if ( handle == PMM_HANDLE_ANONYMOUS )
+ return 0;
+
+ mb = pmm_find_handle(&pmm_data.heap, handle);
+ if ( mb == NULL )
+ mb = pmm_find_handle(&pmm_data.ext_heap, handle);
+
+ return mb ? memblk_buffer(mb) : 0;
+}
+
+/*
+ * frees the specified memory block that was previously allocated by
+ * pmmAllocate.
+ *
+ * If the memory block was deallocated correctly, the return value is
+ * 0x00000000. If there was an error, the return value is non-zero.
+ */
+static uint32_t
+pmmDeallocate(uint32_t buffer)
+{
+ memblk_t *mb = buffer_memblk(buffer);
+
+ if ( !memblk_is_inuse(mb) )
+ return PMM_EINVAL;
+
+ set_avail(mb);
+ return 0;
+}
+
+
+union pmm_args {
+ uint16_t function;
+ struct pmmAllocArgs alloc;
+ struct pmmFindArgs find;
+ struct pmmDeallocateArgs dealloc;
+} __attribute__ ((packed));
+
+/*
+ * entry function of all PMM services.
+ *
+ * Values returned to the caller are placed in the DX:AX register
+ * pair. The flags and all registers, other than DX and AX, are
+ * preserved across calls to PMM services.
+ */
+uint32_t
+pmm(void *argp)
+{
+ union pmm_args *ap = argp;
+ uint32_t ret = PMM_EINVAL;
+
+ if ( pmm_data.heap.head == HEAP_NOT_INITIALIZED )
+ pmm_initalize();
+
+ switch ( ap->function )
+ {
+ case PMM_FUNCTION_ALLOCATE:
+ ret = pmmAllocate(ap->alloc.length, ap->alloc.handle, ap->alloc.flags);
+ PMM_DEBUG("Alloc length=%x handle=%x flags=%x ret=%x\n",
+ ap->alloc.length, ap->alloc.handle, ap->alloc.flags, ret);
+ break;
+
+ case PMM_FUNCTION_FIND:
+ ret = pmmFind(ap->find.handle);
+ PMM_DEBUG("Find handle=%x ret=%x\n", ap->find.handle, ret);
+ break;
+
+ case PMM_FUNCTION_DEALLOC:
+ ret = pmmDeallocate(ap->dealloc.buffer);
+ PMM_DEBUG("Dealloc buffer=%x ret=%x\n", ap->dealloc.buffer, ret);
+ break;
+
+ default:
+ PMM_DEBUG("Invalid function:%d\n", ap->function);
+ break;
+ }
+
+ return ret;
+}
*addr = val;
}
+#define X(idx, ret, fn, args...) ret fn (args);
+#include "32bitprotos.h"
+#undef X
+
#endif
include $(XEN_ROOT)/tools/firmware/Rules.mk
TARGET = tcgbiosext.o
-FILES = tcgbios tpm_drivers
-OBJECTS = $(foreach f,$(FILES),$(f).o)
-CFLAGS += $(CFLAGS_include) -I.. -I../.. -DGCC_PROTOS
-
-.PHONY: all clean
+CFLAGS += $(CFLAGS_include) -I.. -I../..
+.PHONY: all
all: $(TARGET)
+.PHONY: clean
clean:
- rm -rf *.o $(TARGET)
+ rm -rf *.o $(TARGET) $(DEPS)
-$(TARGET): $(OBJECTS)
+$(TARGET): tcgbios.o tpm_drivers.o
$(LD) $(LDFLAGS_DIRECT) -r $^ -o $@
+
+-include $(DEPS)
#include "util.h"
#include "tcgbios.h"
-#include "32bitprotos.h"
/* local structure and variables */
struct ptti_cust {
}
+/*
+ initialize the TCPA ACPI subsystem; find the ACPI tables and determine
+ where the TCPA table is.
+ */
void tcpa_acpi_init(void)
{
struct acpi_20_rsdt *rsdt;
}
+/*
+ * Extend the ACPI log with the given entry by copying the
+ * entry data into the log.
+ * Input
+ * Pointer to the structure to be copied into the log
+ *
+ * Output:
+ * lower 16 bits of return code contain entry number
+ * if entry number is '0', then upper 16 bits contain error code.
+ */
uint32_t tcpa_extend_acpi_log(uint32_t entry_ptr)
{
uint32_t res = 0;
}
/*
- * add the boot device to the measurement log
+ * Add a measurement regarding the boot device (CDRom, Floppy, HDD) to
+ * the list of measurements.
*/
void tcpa_add_bootdevice(uint32_t bootcd, uint32_t bootdrv)
{
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*
* Copyright (C) IBM Corporation, 2006
+ * Copyright (c) 2008, Citrix Systems, Inc.
*
* Author: Stefan Berger <stefanb@us.ibm.com>
+ * Author: Keir Fraser <keir.fraser@citrix.com>
*/
/*
* (4 bytes) even for uint16_t, so casting to 32bit from bcc is a good idea.
*/
-#define SEGMENT_OFFSET 0xf0000
-#define REAL_MODE_CODE_SEGMENT 0xf000
+/* At most 32 bytes in argument list to a 32-bit function. */
+#define MAX_ARG_BYTES 32
-#define START_PM_CODE USE32
-#define END_PM_CODE USE16
+#define REAL_MODE_CODE_OFFSET 0xf0000
-/* definition of used code/data segment descriptors */
-#define PM_NORMAL_CS (gdt_entry_pm_cs - gdt_base)
+/* Definitions of code/data segment descriptors. */
+#define PM_32BIT_CS (gdt_entry_pm_32bit_cs - gdt_base)
#define PM_16BIT_CS (gdt_entry_pm_16bit_cs - gdt_base)
#define PM_32BIT_DS (gdt_entry_pm_32bit_ds - gdt_base)
-
- ASM_START
-
- ; Switch into protected mode to allow access to 32 bit addresses.
- ; This function allows switching into protected mode.
- ; (the specs says big real mode, but that will not work)
- ;
- ; preserves all registers and prepares cs, ds, es, ss for usage
- ; in protected mode; while in prot.mode interrupts remain disabled
-switch_to_protmode:
- cli
-
- ; have to fix the stack for proper return address in 32 bit mode
- push WORD #(REAL_MODE_CODE_SEGMENT>>12) ;extended return address
- push bp ;pop@A1
- mov bp, sp
- push eax ;pop@A2
- mov eax, 2[bp] ; fix return address
- rol eax, #16
- mov 2[bp], eax
-
- mov eax, esp
- ror eax, #16 ; hi(esp)
-
- push bx ; preserve before function call
- push cx
- push dx
-
- push ax ; prepare stack for
- push es ; call
- push ds
- push cs
- push ss
- call _store_segment_registers
- add sp, #10 ; pop ax,es-ss
-
- pop dx ; restore after function call
- pop cx
- pop bx
-
- ; calculate protected-mode esp from ss:sp
- and esp, #0xffff
- xor eax, eax
- mov ax, ss
- rol eax, #4
- add eax, esp
- mov esp, eax
-
- seg cs
- lgdt my_gdtdesc ; switch to own table
-
- mov eax, cr0
- or al, #0x1 ; protected mode 'on'
- mov cr0, eax
-
- jmpf DWORD (SEGMENT_OFFSET | switch_to_protmode_goon_1), #PM_NORMAL_CS
-
- START_PM_CODE
-
-switch_to_protmode_goon_1:
- mov ax, #PM_32BIT_DS ; 32 bit segment that allows
- mov ds, ax ; to reach all 32 bit
- mov es, ax ; addresses
- mov ss, ax
-
- pop eax ;@A2
- pop bp ;@A1
- ret
-
- END_PM_CODE
-
-
+#define PM_16BIT_DS (gdt_entry_pm_16bit_ds - gdt_base)
.align 16
gdt_base:
- ; see Intel SW Dev. Manuals section 3.4.5, Volume 3 for meaning of bits
.word 0,0
.byte 0,0,0,0
-
-gdt_entry_pm_cs:
- ; 32 bit code segment for protected mode
+gdt_entry_pm_32bit_cs:
.word 0xffff, 0x0000
- .byte 0x00, 0x9a, 0xcf, 0x00
-
+ .byte 0x00, 0x9b, 0xcf, 0x00
gdt_entry_pm_16bit_cs:
- ; temp. 16 bit code segment used while in protected mode
.word 0xffff, 0x0000
- .byte SEGMENT_OFFSET >> 16, 0x9a, 0x0, 0x0
-
+ .byte REAL_MODE_CODE_OFFSET >> 16, 0x9b, 0x0, 0x0
gdt_entry_pm_32bit_ds:
- ; (32 bit) data segment (r/w) reaching all possible areas in 32bit memory
- ; 4kb granularity
.word 0xffff, 0x0000
- .byte 0x0, 0x92, 0xcf, 0x0
+ .byte 0x0, 0x93, 0xcf, 0x0
+gdt_entry_pm_16bit_ds:
+ .word 0xffff, 0x0000
+ .byte 0x0, 0x93, 0x0, 0x0
gdt_entry_end:
-my_gdtdesc:
+protmode_gdtdesc:
.word (gdt_entry_end - gdt_base) - 1
- .long gdt_base | SEGMENT_OFFSET
-
+ .long gdt_base | REAL_MODE_CODE_OFFSET
-realmode_gdtdesc: ;to be used in real mode
+realmode_gdtdesc:
.word 0xffff
.long 0x0
+Upcall:
+ ; Do an upcall into 32 bit space
+ ;
+ ; Input:
+ ; bx: index of function to call
+ ; Ouput:
+ ; dx, ax: 32 bit result of call (even if 'void' is expected)
+
+ ; Save caller state, stack frame offsets listed below
+#define esp_off 0
+#define ss_off 4
+#define es_off 6
+#define ds_off 8
+#define flags_off 10
+#define retaddr_off 12
+#define args_off 14
+ pushf
+ cli
+ push ds
+ push es
+ push ss
+ push esp
+ ; Calculate protected-mode esp from ss:sp
+ and esp, #0xffff
+ xor eax, eax
+ mov ax, ss
+ shl eax, #4
+ add esp, eax
-switch_to_realmode:
- ; Implementation of switching from protected mode to real mode
- ; prepares cs, es, ds, ss to be used in real mode
- ; spills eax
- START_PM_CODE
-
- ; need to fix up the stack to return in 16 bit mode
- ; currently the 32 bit return address is on the stack
- pop eax
- push ax
-
- push bx ;pop@1
- push si ;pop@2
-
- call _ebda_ss_offset32 ; get the offset of the ss
- mov bx, ax ; entry within the ebda.
-
- jmpf switch_to_realmode_goon_1, #PM_16BIT_CS
-
- END_PM_CODE
-
-switch_to_realmode_goon_1:
+ ; Switch to protected mode
+ seg cs
+ lgdt protmode_gdtdesc
mov eax, cr0
- and al, #0xfe ; protected mode 'off'
+ or al, #0x1 ; protected mode on
mov cr0, eax
-
- jmpf switch_to_realmode_goon_2, #REAL_MODE_CODE_SEGMENT
-
-switch_to_realmode_goon_2:
-
- ; get orig. 'ss' without using the stack (no 'call'!)
- xor eax, eax ; clear upper 16 bits (and lower)
- mov ax, #0x40 ; where is the ebda located?
+ jmpf DWORD (REAL_MODE_CODE_OFFSET|upcall1), #PM_32BIT_CS
+upcall1:
+ USE32
+ mov ax, #PM_32BIT_DS
mov ds, ax
- mov si, #0xe
- seg ds
- mov ax, [si] ; ax = segment of ebda
-
- mov ds, ax ; segment of ebda
- seg ds
- mov ax, [bx] ; stack segment - bx has been set above
+ mov es, ax
mov ss, ax
- ; from esp and ss calculate real-mode sp
- rol eax, #4
+ ; Marshal arguments and call 32-bit function
+ mov ecx, #MAX_ARG_BYTES/4
+upcall2:
+ push MAX_ARG_BYTES-4+args_off[esp]
+ loop upcall2
+ mov eax, [BIOS_INFO_PHYSICAL_ADDRESS + BIOSINFO_OFF_bios32_entry]
+ call eax
+ add esp, #MAX_ARG_BYTES
+ mov ecx, eax ; Result in ecx
+
+ ; Restore real-mode stack pointer
+ xor eax, eax
+ mov ax, ss_off[esp]
+ mov bx, ax ; Real-mode ss in bx
+ shl eax, 4
sub esp, eax
- push dx ;preserve before call(s)
- push cx
- push bx
-
- call _get_register_ds ; get orig. 'ds'
+ ; Return to real mode
+ jmpf upcall3, #PM_16BIT_CS
+upcall3:
+ USE16
+ mov ax, #PM_16BIT_DS
mov ds, ax
- call _get_register_es ; get orig. 'es'
mov es, ax
- call _get_register_esp_hi ; fix the upper 16 bits of esp
- ror esp, #16
- mov sp, ax
- rol esp, #16
-
- pop bx
- pop cx
- pop dx
-
+ mov ss, ax
+ mov eax, cr0
+ and al, #0xfe ; protected mode off
+ mov cr0, eax
+ jmpf upcall4, #REAL_MODE_CODE_OFFSET>>4
+upcall4:
seg cs
lgdt realmode_gdtdesc
- sti ; allow interrupts
-
- pop si ;@2
- pop bx ;@1
-
+ ; Restore real-mode ss
+ mov ss, bx
+
+ ; Convert result into dx:ax format
+ mov eax, ecx
+ ror eax, #16
+ mov dx, ax
+ ror eax, #16
+
+ ; Restore caller state and return
+ pop esp
+ pop bx ; skip ss
+ pop es
+ pop ds
+ popf
ret
- ASM_END
-
-/*
- * Helper function to get the offset of the reg_ss within the ebda struct
- * Only 'C' can tell the offset.
- */
-Bit16u
-ebda_ss_offset32()
-{
- ASM_START
- START_PM_CODE // need to have this
- ASM_END // compiled for protected mode
- return &EbdaData->upcall.reg_ss; // 'C' knows the offset!
- ASM_START
- END_PM_CODE
- ASM_END
-}
-
-/*
- * Two often-used functions
- */
-Bit16u
-read_word_from_ebda(offset)
- Bit16u offset;
-{
- Bit16u ebda_seg = read_word(0x0040, 0x000E);
- return read_word(ebda_seg, offset);
-}
-
-Bit32u
-read_dword_from_ebda(offset)
- Bit16u offset;
-{
- Bit16u ebda_seg = read_word(0x0040, 0x000E);
- return read_dword(ebda_seg, offset);
-}
-
-/*
- * Store registers in the EBDA; used to keep the registers'
- * content in a well-defined place during protected mode execution
- */
- void
-store_segment_registers(ss, cs, ds, es, esp_hi)
- Bit16u ss, cs, ds, es, esp_hi;
-{
- Bit16u ebda_seg = read_word(0x0040, 0x000E);
- write_word(ebda_seg, &EbdaData->upcall.reg_ss, ss);
- write_word(ebda_seg, &EbdaData->upcall.reg_cs, cs);
- write_word(ebda_seg, &EbdaData->upcall.reg_ds, ds);
- write_word(ebda_seg, &EbdaData->upcall.reg_es, es);
- write_word(ebda_seg, &EbdaData->upcall.esp_hi, esp_hi);
-}
-
-
- void
-store_returnaddress(retaddr)
- Bit16u retaddr;
-{
- Bit16u ebda_seg = read_word(0x0040, 0x000E);
- write_word(ebda_seg, &EbdaData->upcall.retaddr, retaddr);
-}
-
-Bit16u
-get_returnaddress()
-{
- return read_word_from_ebda(&EbdaData->upcall.retaddr);
-}
-
-/*
- * get the segment register 'cs' value from the EBDA
- */
-Bit16u
-get_register_cs()
-{
- return read_word_from_ebda(&EbdaData->upcall.reg_cs);
-}
-
-/*
- * get the segment register 'ds' value from the EBDA
- */
-Bit16u
-get_register_ds()
-{
- return read_word_from_ebda(&EbdaData->upcall.reg_ds);
-}
-
-/*
- * get the segment register 'es' value from the EBDA
- */
-Bit16u
-get_register_es()
-{
- return read_word_from_ebda(&EbdaData->upcall.reg_es);
-}
-
-/*
- * get the upper 16 bits of the esp from the EBDA
- */
-Bit16u
-get_register_esp_hi()
-{
- return read_word_from_ebda(&EbdaData->upcall.esp_hi);
-}
-
-
-
-/********************************************************/
-
-
-ASM_START
-
-Upcall:
- ; do the upcall into 32 bit space
- ; clear the stack frame so that 32 bit space sees all the parameters
- ; on the stack as if they were prepared for it
- ; ---> take the 16 bit return address off the stack and remember it
- ;
- ; Input:
- ; bx: index of function to call
- ; Ouput:
- ; dx, ax: 32 bit result of call (even if 'void' is expected)
-
- push bp ;pop @1
- mov bp, sp
- push si ;pop @2
-
- mov ax, 2[bp] ; 16 bit return address
- push ax
- call _store_returnaddress ; store away
- pop ax
-
- ; XXX GDT munging requires ROM to be writable!
- call _enable_rom_write_access
-
- rol bx, #2
- mov si, #jmptable
- seg cs
- mov eax, dword ptr [si+bx] ; address to call from table
-
- pop si ;@2
- pop bp ;@1
-
- add sp, #2 ; remove 16bit return address from stack
-
- call switch_to_protmode
- START_PM_CODE
-
- call eax ; call 32bit function
- push eax ; preserve result
-
- call switch_to_realmode ; back to realmode
- END_PM_CODE
-
- pop eax ; get result
-
- push word 0x0000 ; placeholder for 16 bit return address
- push bp
- mov bp,sp
- push eax ; preserve work register
-
- call _disable_rom_write_access
-
- call _get_returnaddress
- mov 2[bp], ax ; 16bit return address onto stack
-
- pop eax
- pop bp
-
- ror eax, #16 ; result into dx/ax
- mov dx, ax ; hi(res) -> dx
- ror eax, #16
-
- ret
-
-
-/* macro for functions to declare their call into 32bit space */
MACRO DoUpcall
- mov bx, #?1
- jmp Upcall
+ mov bx, #?1
+ jmp Upcall
MEND
-
-ASM_END
-
+#define X(idx, ret, fn, args...) _ ## fn: DoUpcall(idx)
#include "32bitprotos.h"
-#include "32bitgateway.h"
-
-#include "tcgbios.c"
-
-Bit32u get_s3_waking_vector()
-{
- ASM_START
- DoUpcall(IDX_GET_S3_WAKING_VECTOR)
- ASM_END
-}
+#undef X
-#ifndef PROTOS_HIGHBIOS
-#define PROTOS_HIGHBIOS
-
-/* shared include file for bcc and gcc */
-
-/* bcc does not like 'enum' */
-#define IDX_TCGINTERRUPTHANDLER 0
-#define IDX_TCPA_ACPI_INIT 1
-#define IDX_TCPA_EXTEND_ACPI_LOG 2
-#define IDX_TCPA_CALLING_INT19H 3
-#define IDX_TCPA_RETURNED_INT19H 4
-#define IDX_TCPA_ADD_EVENT_SEPARATORS 5
-#define IDX_TCPA_WAKE_EVENT 6
-#define IDX_TCPA_ADD_BOOTDEVICE 7
-#define IDX_TCPA_START_OPTION_ROM_SCAN 8
-#define IDX_TCPA_OPTION_ROM 9
-#define IDX_TCPA_IPL 10
-#define IDX_TCPA_INITIALIZE_TPM 11
-#define IDX_TCPA_MEASURE_POST 12
-#define IDX_GET_S3_WAKING_VECTOR 13
-#define IDX_LAST 14 /* keep last! */
-
-#ifdef GCC_PROTOS
- #define PARMS(x...) x
-#else
- /* bcc doesn't want any parameter types in prototypes */
- #define PARMS(x...)
-#endif
-
-Bit32u TCGInterruptHandler( PARMS(pushad_regs_t *regs, Bit32u esds, Bit32u flags_ptr));
-
-void tcpa_acpi_init( PARMS(void) );
-Bit32u tcpa_extend_acpi_log( PARMS(Bit32u entry_ptr) );
-void tcpa_calling_int19h( PARMS(void) );
-void tcpa_returned_int19h( PARMS(void) );
-void tcpa_add_event_separators( PARMS(void) );
-void tcpa_wake_event( PARMS(void) );
-void tcpa_add_bootdevice( PARMS(Bit32u bootcd, Bit32u bootdrv) );
-void tcpa_start_option_rom_scan( PARMS(void) );
-void tcpa_option_rom( PARMS(Bit32u seg) );
-void tcpa_ipl( PARMS(Bit32u bootcd,Bit32u seg,Bit32u off,Bit32u count) );
-void tcpa_measure_post( PARMS(Bit32u from, Bit32u to) );
-Bit32u tcpa_initialize_tpm( PARMS(Bit32u physpres) );
-
-Bit32u get_s3_waking_vector( PARMS(void) );
-
-#endif
+X(0, Bit32u, TCGInterruptHandler,
+ pushad_regs_t *regs, Bit32u esds, Bit32u flags_ptr)
+X(1, void, tcpa_acpi_init, void)
+X(2, Bit32u, tcpa_extend_acpi_log, Bit32u entry_ptr)
+X(3, void, tcpa_calling_int19h,void)
+X(4, void, tcpa_returned_int19h, void)
+X(5, void, tcpa_add_event_separators, void)
+X(6, void, tcpa_wake_event, void)
+X(7, void, tcpa_add_bootdevice, Bit32u bootcd, Bit32u bootdrv)
+X(8, void, tcpa_start_option_rom_scan, void)
+X(9, void, tcpa_option_rom, Bit32u seg)
+X(10, void, tcpa_ipl, Bit32u bootcd, Bit32u seg, Bit32u off, Bit32u count)
+X(11, void, tcpa_measure_post, Bit32u from, Bit32u to)
+X(12, Bit32u, tcpa_initialize_tpm, Bit32u physpres)
+X(13, Bit32u, get_s3_waking_vector, void)
+X(14, Bit32u, pmm, void *argp)
rm -f as86-sym.txt ld86-sym.txt
rm -f rombios*.txt rombios*.sym usage biossums
rm -f BIOS-bochs-*
+ rm -f $(DEPS)
BIOS-bochs-latest: rombios.c biossums 32bitgateway.c tcgbios.c
gcc -DBX_SMP_PROCESSORS=1 -E -P $< > _rombios_.c
biossums: biossums.c
gcc -o biossums biossums.c
+-include $(DEPS)
/////////////////////////////////////////////////////////////////////////
-// $Id: rombios.c,v 1.138 2005/05/07 15:55:26 vruppert Exp $
+// $Id: rombios.c,v 1.221 2008/12/07 17:32:29 sshwarts Exp $
/////////////////////////////////////////////////////////////////////////
//
// Copyright (C) 2002 MandrakeSoft S.A.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
-// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
-// ROM BIOS for use with Bochs/Plex x86 emulation environment
+// ROM BIOS for use with Bochs/Plex86/QEMU emulation environment
#define uint8_t unsigned char
#define uint16_t unsigned short
//
// NOTES for El-Torito Boot (cbbochs@free.fr)
// - CD-ROM booting is only available if ATA/ATAPI Driver is available
-// - Current code is only able to boot mono-session cds
+// - Current code is only able to boot mono-session cds
// - Current code can not boot and emulate a hard-disk
// the bios will panic otherwise
-// - Current code also use memory in EBDA segement.
+// - Current code also use memory in EBDA segement.
// - I used cmos byte 0x3D to store extended information on boot-device
// - Code has to be modified modified to handle multiple cdrom drives
// - Here are the cdrom boot failure codes:
// 12 : can not read cd - boot image
//
// ATA driver
-// - EBDA segment.
+// - EBDA segment.
// I used memory starting at 0x121 in the segment
// - the translation policy is defined in cmos regs 0x39 & 0x3a
//
// TODO :
//
-// int74
+// int74
// - needs to be reworked. Uses direct [bp] offsets. (?)
//
// int13:
// - Implement remaining int13_cdemu functions (as defined by El-Torito specs)
// - cdrom drive is hardcoded to ide 0 device 1 in several places. see "FIXME ElTorito Hardcoded"
// - int13 Fix DL when emulating a cd. In that case DL is decremented before calling real int13.
-// This is ok. But DL should be reincremented afterwards.
+// This is ok. But DL should be reincremented afterwards.
// - Fix all "FIXME ElTorito Various"
// - should be able to boot any cdrom instead of the first one
//
// BCC Bug: find a generic way to handle the bug of #asm after an "if" (fixed in 0.16.7)
-#define DEBUG_ROMBIOS 0
+#include "rombios.h"
#define DEBUG_ATA 0
#define DEBUG_INT13_HD 0
#define BX_USE_ATADRV 1
#define BX_ELTORITO_BOOT 1
-#define BX_TCGBIOS 0 /* main switch for TCG BIOS ext. */
+#define BX_TCGBIOS 0 /* main switch for TCG BIOS ext. */
+
+#define BX_PMM 1 /* POST Memory Manager */
#define BX_MAX_ATA_INTERFACES 4
#define BX_MAX_ATA_DEVICES (BX_MAX_ATA_INTERFACES*2)
# define BIOS_BUILD_DATE "06/23/99"
#endif
+#define E820_SEG (Bit16u)(E820_PHYSICAL_ADDRESS >> 4)
+
// 1K of base memory used for Extended Bios Data Area (EBDA)
// EBDA is used for PS/2 mouse support, and IDE BIOS, etc.
#define EBDA_SEG 0x9FC0
#define EBDA_SIZE 1 // In KiB
#define BASE_MEM_IN_K (640 - EBDA_SIZE)
- // Define the application NAME
-#ifdef HVMASSIST
-# define BX_APPNAME "HVMAssist"
-#elif PLEX86
-# define BX_APPNAME "Plex86"
-#else
-# define BX_APPNAME "Bochs"
-#endif
+/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */
+#define IPL_TABLE_OFFSET 0x0300 /* offset from EBDA */
+#define IPL_TABLE_ENTRIES 8
+#define IPL_COUNT_OFFSET 0x0380 /* u16: number of valid table entries */
+#define IPL_SEQUENCE_OFFSET 0x0382 /* u16: next boot device */
+#define IPL_BOOTFIRST_OFFSET 0x0384 /* u16: user selected device */
+#define IPL_SIZE 0xff
+#define IPL_TYPE_FLOPPY 0x01
+#define IPL_TYPE_HARDDISK 0x02
+#define IPL_TYPE_CDROM 0x03
+#define IPL_TYPE_BEV 0x80
+
// Sanity Checks
#if BX_USE_ATADRV && BX_CPU<3
# error APM BIOS can only be used with 386+ cpu
#endif
-#ifndef BX_SMP_PROCESSORS
-#define BX_SMP_PROCESSORS 1
-# warning BX_SMP_PROCESSORS not defined, defaulting to 1
-#endif
-
-#define PANIC_PORT 0x400
-#define PANIC_PORT2 0x401
-#define INFO_PORT 0x402
-#define DEBUG_PORT 0x403
+// define this if you want to make PCIBIOS working on a specific bridge only
+// undef enables PCIBIOS when at least one PCI device is found
+// i440FX is emulated by Bochs and QEMU
+#define PCI_FIXED_HOST_BRIDGE 0x12378086 ;; i440FX PCI bridge
// #20 is dec 20
// #$20 is hex 20 = 32
MACRO HALT
;; the HALT macro is called with the line number of the HALT call.
- ;; The line number is then sent to the PANIC_PORT, causing Bochs/Plex
+ ;; The line number is then sent to the PANIC_PORT, causing Bochs/Plex
;; to print a BX_PANIC message. This will normally halt the simulation
;; with a message such as "BIOS panic at rombios.c, line 4091".
;; However, users can choose to make panics non-fatal and continue.
void memsetb(seg,offset,value,count);
void memcpyb(dseg,doffset,sseg,soffset,count);
void memcpyd(dseg,doffset,sseg,soffset,count);
-
+
// memset of count bytes
- void
+ void
memsetb(seg,offset,value,count)
Bit16u seg;
Bit16u offset;
ASM_START
push bp
mov bp, sp
-
+
push ax
push cx
push es
push di
-
+
mov cx, 10[bp] ; count
- cmp cx, #0x00
+ test cx, cx
je memsetb_end
mov ax, 4[bp] ; segment
mov es, ax
cld
rep
stosb
-
+
memsetb_end:
pop di
pop es
pop cx
pop ax
-
+
pop bp
ASM_END
}
-
+
// memcpy of count bytes
- void
+ void
memcpyb(dseg,doffset,sseg,soffset,count)
Bit16u dseg;
Bit16u doffset;
ASM_START
push bp
mov bp, sp
-
+
push ax
push cx
push es
push di
push ds
push si
-
+
mov cx, 12[bp] ; count
- cmp cx, #0x0000
+ test cx, cx
je memcpyb_end
mov ax, 4[bp] ; dsegment
mov es, ax
cld
rep
movsb
-
+
memcpyb_end:
pop si
pop ds
pop es
pop cx
pop ax
-
+
pop bp
ASM_END
}
-#if 0
// memcpy of count dword
- void
+ void
memcpyd(dseg,doffset,sseg,soffset,count)
Bit16u dseg;
Bit16u doffset;
ASM_START
push bp
mov bp, sp
-
+
push ax
push cx
push es
push di
push ds
push si
-
+
mov cx, 12[bp] ; count
- cmp cx, #0x0000
+ test cx, cx
je memcpyd_end
mov ax, 4[bp] ; dsegment
mov es, ax
cld
rep
movsd
-
+
memcpyd_end:
pop si
pop ds
pop es
pop cx
pop ax
-
+
pop bp
ASM_END
}
-#endif
// read_dword and write_dword functions
static Bit32u read_dword();
static void write_dword();
-
+
Bit32u
read_dword(seg, offset)
Bit16u seg;
ASM_START
push bp
mov bp, sp
-
+
push bx
push ds
mov ax, 4[bp] ; segment
mov ds, ax
mov bx, 6[bp] ; offset
mov ax, [bx]
- inc bx
- inc bx
+ add bx, #2
mov dx, [bx]
;; ax = return value (word)
;; dx = return value (word)
pop ds
pop bx
-
+
pop bp
ASM_END
}
-
+
void
write_dword(seg, offset, data)
Bit16u seg;
ASM_START
push bp
mov bp, sp
-
+
push ax
push bx
push ds
mov bx, 6[bp] ; offset
mov ax, 8[bp] ; data word
mov [bx], ax ; write data word
- inc bx
- inc bx
+ add bx, #2
mov ax, 10[bp] ; data word
mov [bx], ax ; write data word
pop ds
pop bx
pop ax
-
+
pop bp
ASM_END
}
-
+
// Bit32u (unsigned long) and long helper functions
ASM_START
-
+
;; and function
landl:
landul:
- SEG SS
+ SEG SS
and ax,[di]
- SEG SS
+ SEG SS
and bx,2[di]
ret
-
+
;; add function
laddl:
laddul:
- SEG SS
+ SEG SS
add ax,[di]
- SEG SS
+ SEG SS
adc bx,2[di]
ret
-
+
;; cmp function
lcmpl:
lcmpul:
and eax, #0x0000FFFF
shl ebx, #16
- add eax, ebx
+ or eax, ebx
shr ebx, #16
SEG SS
cmp eax, dword ptr [di]
ret
-
+
;; sub function
lsubl:
lsubul:
SEG SS
sbb bx,2[di]
ret
-
+
;; mul function
lmull:
lmulul:
and eax, #0x0000FFFF
shl ebx, #16
- add eax, ebx
+ or eax, ebx
SEG SS
mul eax, dword ptr [di]
mov ebx, eax
shr ebx, #16
ret
-
+
;; dec function
ldecl:
ldecul:
SEG SS
dec dword ptr [bx]
ret
-
+
;; or function
lorl:
lorul:
SEG SS
or bx,2[di]
ret
-
+
;; inc function
lincl:
lincul:
SEG SS
inc dword ptr [bx]
ret
-
+
;; tst function
ltstl:
ltstul:
and eax, #0x0000FFFF
shl ebx, #16
- add eax, ebx
+ or eax, ebx
shr ebx, #16
test eax, eax
ret
-
+
;; sr function
lsrul:
mov cx,di
jcxz lsr_exit
and eax, #0x0000FFFF
shl ebx, #16
- add eax, ebx
+ or eax, ebx
lsr_loop:
shr eax, #1
loop lsr_loop
shr ebx, #16
lsr_exit:
ret
-
+
;; sl function
lsll:
lslul:
jcxz lsl_exit
and eax, #0x0000FFFF
shl ebx, #16
- add eax, ebx
- lsl_loop:
+ or eax, ebx
+ lsl_loop:
shl eax, #1
loop lsl_loop
mov ebx, eax
shr ebx, #16
lsl_exit:
ret
-
+
idiv_:
cwd
idiv bx
ldivul:
and eax, #0x0000FFFF
shl ebx, #16
- add eax, ebx
+ or eax, ebx
xor edx, edx
SEG SS
mov bx, 2[di]
Bit8u revision;
Bit8u checksum;
} dpte_t;
-
+
typedef struct {
Bit8u iface; // ISA or PCI
Bit16u iobase1; // IO Base 1
Bit8u device; // Detected type of attached devices (hd/cd/none)
Bit8u removable; // Removable device flag
Bit8u lock; // Locks for removable devices
- // Bit8u lba_capable; // LBA capable flag - always yes for bochs devices
- Bit8u mode; // transfert mode : PIO 16/32 bits - IRQ - ISADMA - PCIDMA
+ Bit8u mode; // transfer mode : PIO 16/32 bits - IRQ - ISADMA - PCIDMA
Bit16u blksize; // block size
Bit8u translation; // type of translation
chs_t lchs; // Logical CHS
chs_t pchs; // Physical CHS
- Bit32u sectors; // Total sectors count
+ Bit32u sectors_low; // Total sectors count
+ Bit32u sectors_high;
} ata_device_t;
typedef struct {
ata_device_t devices[BX_MAX_ATA_DEVICES];
//
// map between (bios hd id - 0x80) and ata channels
- Bit8u hdcount, hdidmap[BX_MAX_ATA_DEVICES];
+ Bit8u hdcount, hdidmap[BX_MAX_ATA_DEVICES];
// map between (bios cd id - 0xE0) and ata channels
- Bit8u cdcount, cdidmap[BX_MAX_ATA_DEVICES];
+ Bit8u cdcount, cdidmap[BX_MAX_ATA_DEVICES];
// Buffer for DPTE table
dpte_t dpte;
Bit32u trsfbytes;
} ata_t;
-
+
#if BX_ELTORITO_BOOT
- // ElTorito Device Emulation data
+ // ElTorito Device Emulation data
typedef struct {
Bit8u active;
Bit8u media;
Bit16u buffer_segment;
Bit16u load_segment;
Bit16u sector_count;
-
+
// Virtual device
chs_t vdevice;
} cdemu_t;
#endif // BX_ELTORITO_BOOT
-
-#include "32bitgateway.h"
+
+#define X(idx, ret, fn, arg...) ret fn ();
+#include "32bitprotos.h"
+#undef X
// for access to EBDA area
- // The EBDA structure should conform to
- // http://www.cybertrails.com/~fys/rombios.htm document
+ // The EBDA structure should conform to
+ // http://www.frontiernet.net/~fys/rombios.htm document
// I made the ata and cdemu structs begin at 0x121 in the EBDA seg
- // EBDA must be at most 768 bytes; it lives at 0x9fc00, and the boot
- // device tables are at 0x9ff00 -- 0x9ffff
+ // EBDA must be at most 768 bytes; it lives at EBDA_SEG, and the boot
+ // device tables are at EBDA_SEG:IPL_TABLE_OFFSET
typedef struct {
unsigned char ebda_size;
unsigned char cmos_shutdown_status;
// El Torito Emulation data
cdemu_t cdemu;
#endif // BX_ELTORITO_BOOT
-
- upcall_t upcall;
} ebda_data_t;
-
+
#define EBDA_CMOS_SHUTDOWN_STATUS_OFFSET 1
#define EbdaData ((ebda_data_t *) 0)
Bit32u lba1;
Bit32u lba2;
} int13ext_t;
-
+
#define Int13Ext ((int13ext_t *) 0)
// Disk Physical Table definition
Bit8u reserved3;
Bit8u checksum;
} dpt_t;
-
+
#define Int13DPT ((dpt_t *) 0)
#endif // BX_USE_ATADRV
} r16;
struct {
Bit32u filler[4];
- Bit8u bl, bh;
+ Bit8u bl, bh;
Bit16u filler1;
- Bit8u dl, dh;
+ Bit8u dl, dh;
Bit16u filler2;
Bit8u cl, ch;
Bit16u filler3;
flags_t flags;
} iret_addr_t;
+typedef struct {
+ Bit16u type;
+ Bit16u flags;
+ Bit32u vector;
+ Bit32u description;
+ Bit32u reserved;
+ } ipl_entry_t;
+
static Bit8u inb();
static void write_byte();
static void write_word();
static void bios_printf();
-static void copy_e820_table();
static Bit8u inhibit_mouse_int_and_events();
static void enable_mouse_int_and_events();
static void int70_function();
static void int74_function();
static Bit16u get_CS();
-//static Bit16u get_DS();
-//static void set_DS();
static Bit16u get_SS();
static unsigned int enqueue_key();
static unsigned int dequeue_key();
static void keyboard_panic();
static void shutdown_status_panic();
static void nmi_handler_msg();
+static void delay_ticks();
+static void delay_ticks_and_check_for_keystroke();
+static void interactive_bootkey();
static void print_bios_banner();
static void print_boot_device();
static void print_boot_failure();
#endif // BX_ELTORITO_BOOT
-static char bios_cvs_version_string[] = "$Revision: 1.138 $";
-static char bios_date_string[] = "$Date: 2005/05/07 15:55:26 $";
-
-static char CVSID[] = "$Id: rombios.c,v 1.138 2005/05/07 15:55:26 vruppert Exp $";
-
-/* Offset to skip the CVS $Id: prefix */
-#define bios_version_string (CVSID + 4)
+static char bios_cvs_version_string[] = "$Revision: 1.221 $ $Date: 2008/12/07 17:32:29 $";
-#define BIOS_PRINTF_HALT 1
-#define BIOS_PRINTF_SCREEN 2
-#define BIOS_PRINTF_INFO 4
-#define BIOS_PRINTF_DEBUG 8
-#define BIOS_PRINTF_ALL (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO)
-#define BIOS_PRINTF_DEBHALT (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO | BIOS_PRINTF_HALT)
-
-#define printf(format, p...) bios_printf(BIOS_PRINTF_SCREEN, format, ##p)
-
-// Defines the output macros.
-// BX_DEBUG goes to INFO port until we can easily choose debug info on a
-// per-device basis. Debug info are sent only in debug mode
-#if DEBUG_ROMBIOS
-# define BX_DEBUG(format, p...) bios_printf(BIOS_PRINTF_INFO, format, ##p)
-#else
-# define BX_DEBUG(format, p...)
-#endif
-#define BX_INFO(format, p...) bios_printf(BIOS_PRINTF_INFO, format, ##p)
-#define BX_PANIC(format, p...) bios_printf(BIOS_PRINTF_DEBHALT, format, ##p)
+#define BIOS_COPYRIGHT_STRING "(c) 2002 MandrakeSoft S.A. Written by Kevin Lawton & the Bochs team."
#if DEBUG_ATA
# define BX_DEBUG_ATA(a...) BX_DEBUG(a)
{ 0x5100, 0x5133, 0x7600, none, 0x20 }, /* 3 PgDn */
{ 0x5200, 0x5230, none, none, 0x20 }, /* 0 Ins */
{ 0x5300, 0x532e, none, none, 0x20 }, /* Del */
- { none, none, none, none, none }, /* ??? */
- { none, none, none, none, none }, /* ??? */
- { none, none, none, none, none }, /* ??? */
+ { none, none, none, none, none },
+ { none, none, none, none, none },
+ { 0x565c, 0x567c, none, none, none }, /* \| */
{ 0x8500, 0x8700, 0x8900, 0x8b00, none }, /* F11 */
{ 0x8600, 0x8800, 0x8a00, 0x8c00, none }, /* F12 */
};
ASM_END
}
-// Bit16u
-//get_DS()
-//{
-//ASM_START
-// mov ax, ds
-//ASM_END
-//}
-//
-// void
-//set_DS(ds_selector)
-// Bit16u ds_selector;
-//{
-//ASM_START
-// push bp
-// mov bp, sp
-//
-// push ax
-// mov ax, 4[bp] ; ds_selector
-// mov ds, ax
-// pop ax
-//
-// pop bp
-//ASM_END
-//}
-
Bit16u
get_SS()
{
#ifdef HVMASSIST
void
-copy_e820_table()
+fixup_base_mem_in_k()
{
- Bit8u nr_entries = read_byte(0x9000, 0x1e8);
- Bit32u base_mem;
- if (nr_entries > 32)
- nr_entries = 32;
- write_word(0xe000, 0x8, nr_entries);
- memcpyb(0xe000, 0x10, 0x9000, 0x2d0, nr_entries * 0x14);
/* Report the proper base memory size at address 0x0413: otherwise
* non-e820 code will clobber things if BASE_MEM_IN_K is bigger than
* the first e820 entry. Get the size by reading the second 64bit
* field of the first e820 slot. */
- base_mem = read_dword(0x9000, 0x2d0 + 8);
+ Bit32u base_mem = read_dword(E820_SEG, E820_OFFSET + 8);
write_word(0x40, 0x13, base_mem >> 10);
}
-void
-set_rom_write_access(action)
- Bit16u action;
-{
- Bit16u off = (Bit16u)&((struct bios_info *)0)->xen_pfiob;
-ASM_START
- mov si,.set_rom_write_access.off[bp]
- push ds
- mov ax,#(ACPI_PHYSICAL_ADDRESS >> 4)
- mov ds,ax
- mov dx,[si]
- pop ds
- mov ax,.set_rom_write_access.action[bp]
- out dx,al
-ASM_END
-}
-
void enable_rom_write_access()
{
- set_rom_write_access(0);
+ outb(XEN_PF_IOBASE, 0);
}
void disable_rom_write_access()
{
- set_rom_write_access(PFFLAG_ROM_LOCK);
+ outb(XEN_PF_IOBASE, PFFLAG_ROM_LOCK);
}
#endif /* HVMASSIST */
pop bp
ASM_END
}
-
+
void
send(action, c)
Bit16u action;
send(action, val - (nval * 10) + '0');
}
+ void
+put_luint(action, val, width, neg)
+ Bit16u action;
+ unsigned long val;
+ short width;
+ bx_bool neg;
+{
+ unsigned long nval = val / 10;
+ if (nval)
+ put_luint(action, nval, width - 1, neg);
+ else {
+ while (--width > 0) send(action, ' ');
+ if (neg) send(action, '-');
+ }
+ send(action, val - (nval * 10) + '0');
+}
+
+void put_str(action, segment, offset)
+ Bit16u action;
+ Bit16u segment;
+ Bit16u offset;
+{
+ Bit8u c;
+
+ while (c = read_byte(segment, offset)) {
+ send(action, c);
+ offset++;
+ }
+}
+
+ void
+delay_ticks(ticks)
+ Bit16u ticks;
+{
+ long ticks_to_wait, delta;
+ Bit32u prev_ticks, t;
+
+ /*
+ * The 0:046c wraps around at 'midnight' according to a 18.2Hz clock.
+ * We also have to be careful about interrupt storms.
+ */
+ASM_START
+ pushf
+ sti
+ASM_END
+ ticks_to_wait = ticks;
+ prev_ticks = read_dword(0x0, 0x46c);
+ do
+ {
+ASM_START
+ hlt
+ASM_END
+ t = read_dword(0x0, 0x46c);
+ if (t > prev_ticks)
+ {
+ delta = t - prev_ticks; /* The temp var is required or bcc screws up. */
+ ticks_to_wait -= delta;
+ }
+ else if (t < prev_ticks)
+ {
+ ticks_to_wait -= t; /* wrapped */
+ }
+
+ prev_ticks = t;
+ } while (ticks_to_wait > 0);
+ASM_START
+ cli
+ popf
+ASM_END
+}
+
+ Bit8u
+check_for_keystroke()
+{
+ASM_START
+ mov ax, #0x100
+ int #0x16
+ jz no_key
+ mov al, #1
+ jmp done
+no_key:
+ xor al, al
+done:
+ASM_END
+}
+
+ Bit8u
+get_keystroke()
+{
+ASM_START
+ mov ax, #0x0
+ int #0x16
+ xchg ah, al
+ASM_END
+}
+
+ void
+delay_ticks_and_check_for_keystroke(ticks, count)
+ Bit16u ticks, count;
+{
+ Bit16u i;
+ for (i = 1; i <= count; i++) {
+ delay_ticks(ticks);
+ if (check_for_keystroke())
+ break;
+ }
+}
+
//--------------------------------------------------------------------------
// bios_printf()
-// A compact variable argument printf function which prints its output via
-// an I/O port so that it can be logged by Bochs/Plex.
-// Currently, only %x is supported (or %02x, %04x, etc).
+// A compact variable argument printf function.
//
-// Supports %[format_width][format]
-// where format can be d,x,c,s
+// Supports %[format_width][length]format
+// where format can be x,X,u,d,s,S,c
+// and the optional length modifier is l (ell)
//--------------------------------------------------------------------------
void
bios_printf(action, s)
bx_bool in_format;
short i;
Bit16u *arg_ptr;
- Bit16u arg_seg, arg, nibble, shift_count, format_width;
+ Bit16u arg_seg, arg, nibble, hibyte, shift_count, format_width, hexadd;
arg_ptr = &s;
arg_seg = get_SS();
else {
arg_ptr++; // increment to next arg
arg = read_word(arg_seg, arg_ptr);
- if (c == 'x') {
+ if (c == 'x' || c == 'X') {
if (format_width == 0)
format_width = 4;
+ if (c == 'x')
+ hexadd = 'a';
+ else
+ hexadd = 'A';
for (i=format_width-1; i>=0; i--) {
nibble = (arg >> (4 * i)) & 0x000f;
- send (action, (nibble<=9)? (nibble+'0') : (nibble-10+'A'));
+ send (action, (nibble<=9)? (nibble+'0') : (nibble-10+hexadd));
}
}
else if (c == 'u') {
put_uint(action, arg, format_width, 0);
}
+ else if (c == 'l') {
+ s++;
+ c = read_byte(get_CS(), s); /* is it ld,lx,lu? */
+ arg_ptr++; /* increment to next arg */
+ hibyte = read_word(arg_seg, arg_ptr);
+ if (c == 'd') {
+ if (hibyte & 0x8000)
+ put_luint(action, 0L-(((Bit32u) hibyte << 16) | arg), format_width-1, 1);
+ else
+ put_luint(action, ((Bit32u) hibyte << 16) | arg, format_width, 0);
+ }
+ else if (c == 'u') {
+ put_luint(action, ((Bit32u) hibyte << 16) | arg, format_width, 0);
+ }
+ else if (c == 'x' || c == 'X')
+ {
+ if (format_width == 0)
+ format_width = 8;
+ if (c == 'x')
+ hexadd = 'a';
+ else
+ hexadd = 'A';
+ for (i=format_width-1; i>=0; i--) {
+ nibble = ((((Bit32u) hibyte <<16) | arg) >> (4 * i)) & 0x000f;
+ send (action, (nibble<=9)? (nibble+'0') : (nibble-10+hexadd));
+ }
+ }
+ }
else if (c == 'd') {
if (arg & 0x8000)
put_int(action, -arg, format_width - 1, 1);
put_int(action, arg, format_width, 0);
}
else if (c == 's') {
- bios_printf(action & (~BIOS_PRINTF_HALT), arg);
+ put_str(action, get_CS(), arg);
+ }
+ else if (c == 'S') {
+ hibyte = arg;
+ arg_ptr++;
+ arg = read_word(arg_seg, arg_ptr);
+ put_str(action, hibyte, arg);
}
else if (c == 'c') {
send(action, arg);
}
if (action & BIOS_PRINTF_HALT) {
- // freeze in a busy loop.
+ // freeze in a busy loop.
ASM_START
cli
halt2_loop:
max = 0x2000;
}
}
-
- // Due to timer issues, and if the IPS setting is > 15000000,
+
+ // Due to timer issues, and if the IPS setting is > 15000000,
// the incoming keys might not be flushed here. That will
// cause a panic a few lines below. See sourceforge bug report :
// [ 642031 ] FATAL: Keyboard RESET error:993
keyboard_panic(status)
Bit16u status;
{
- // If you're getting a 993 keyboard panic here,
+ // If you're getting a 993 keyboard panic here,
// please see the comment in keyboard_init
-
+
BX_PANIC("Keyboard error:%u\n",status);
}
-
#define CMOS_SHUTDOWN_S3 0xFE
//--------------------------------------------------------------------------
// machine_reset
BX_PANIC("Unimplemented shutdown status: %02x\n",(Bit8u)status);
}
+void s3_resume_panic()
+{
+ BX_PANIC("Returned from s3_resume.\n");
+}
+
//--------------------------------------------------------------------------
// print_bios_banner
// displays a the bios version
void
print_bios_banner()
{
- printf(BX_APPNAME" BIOS, %d cpu%s, ", BX_SMP_PROCESSORS, BX_SMP_PROCESSORS>1?"s":"");
- printf("%s %s\n", bios_cvs_version_string, bios_date_string);
+ printf(BX_APPNAME" BIOS - build: %s\n%s\nOptions: ",
+ BIOS_BUILD_DATE, bios_cvs_version_string);
+ printf(
+#if BX_APM
+ "apmbios "
+#endif
+#if BX_PCIBIOS
+ "pcibios "
+#endif
+#if BX_ELTORITO_BOOT
+ "eltorito "
+#endif
+#if BX_ROMBIOS32
+ "rombios32 "
+#endif
#if BX_TCGBIOS
- printf("TCG-enabled BIOS.\n");
+ "TCG-enabled "
#endif
- printf("\n");
+#if BX_PMM
+ "PMM "
+#endif
+ "\n\n");
}
-
//--------------------------------------------------------------------------
// BIOS Boot Specification 1.0.1 compatibility
//
-// Very basic support for the BIOS Boot Specification, which allows expansion
-// ROMs to register themselves as boot devices, instead of just stealing the
+// Very basic support for the BIOS Boot Specification, which allows expansion
+// ROMs to register themselves as boot devices, instead of just stealing the
// INT 19h boot vector.
-//
+//
// This is a hack: to do it properly requires a proper PnP BIOS and we aren't
-// one; we just lie to the option ROMs to make them behave correctly.
-// We also don't support letting option ROMs register as bootable disk
-// drives (BCVs), only as bootable devices (BEVs).
+// one; we just lie to the option ROMs to make them behave correctly.
+// We also don't support letting option ROMs register as bootable disk
+// drives (BCVs), only as bootable devices (BEVs).
//
// http://www.phoenix.com/en/Customer+Services/White+Papers-Specs/pc+industry+specifications.htm
//--------------------------------------------------------------------------
-/* 256 bytes at 0x9ff00 -- 0x9ffff is used for the IPL boot table. */
-#define IPL_SEG 0x9ff0
-#define IPL_TABLE_OFFSET 0x0000
-#define IPL_TABLE_ENTRIES 8
-#define IPL_COUNT_OFFSET 0x0080 /* u16: number of valid table entries */
-#define IPL_SEQUENCE_OFFSET 0x0082 /* u16: next boot device */
-
-struct ipl_entry {
- Bit16u type;
- Bit16u flags;
- Bit32u vector;
- Bit32u description;
- Bit32u reserved;
-};
+static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"};
-static void
-init_boot_vectors()
+static void
+init_boot_vectors()
{
- struct ipl_entry e;
+ ipl_entry_t e;
Bit16u count = 0;
Bit16u ss = get_SS();
+ Bit16u ebda_seg = read_word(0x0040, 0x000E);
/* Clear out the IPL table. */
- memsetb(IPL_SEG, IPL_TABLE_OFFSET, 0, 0xff);
+ memsetb(ebda_seg, IPL_TABLE_OFFSET, 0, IPL_SIZE);
+
+ /* User selected device not set */
+ write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, 0xFFFF);
/* Floppy drive */
- e.type = 1; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
- memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+ e.type = IPL_TYPE_FLOPPY; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
+ memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
count++;
/* First HDD */
- e.type = 2; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
- memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+ e.type = IPL_TYPE_HARDDISK; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
+ memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
count++;
#if BX_ELTORITO_BOOT
/* CDROM */
- e.type = 3; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
- memcpyb(IPL_SEG, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
+ e.type = IPL_TYPE_CDROM; e.flags = 0; e.vector = 0; e.description = 0; e.reserved = 0;
+ memcpyb(ebda_seg, IPL_TABLE_OFFSET + count * sizeof (e), ss, &e, sizeof (e));
count++;
-#endif
+#endif
/* Remember how many devices we have */
- write_word(IPL_SEG, IPL_COUNT_OFFSET, count);
+ write_word(ebda_seg, IPL_COUNT_OFFSET, count);
/* Not tried booting anything yet */
- write_word(IPL_SEG, IPL_SEQUENCE_OFFSET, 0xffff);
+ write_word(ebda_seg, IPL_SEQUENCE_OFFSET, 0xffff);
}
static Bit8u
get_boot_vector(i, e)
-Bit16u i; struct ipl_entry *e;
+Bit16u i; ipl_entry_t *e;
{
Bit16u count;
Bit16u ss = get_SS();
+ Bit16u ebda_seg = read_word(0x0040, 0x000E);
/* Get the count of boot devices, and refuse to overrun the array */
- count = read_word(IPL_SEG, IPL_COUNT_OFFSET);
+ count = read_word(ebda_seg, IPL_COUNT_OFFSET);
if (i >= count) return 0;
/* OK to read this device */
- memcpyb(ss, e, IPL_SEG, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e));
+ memcpyb(ss, e, ebda_seg, IPL_TABLE_OFFSET + i * sizeof (*e), sizeof (*e));
return 1;
}
+#if BX_ELTORITO_BOOT
+ void
+interactive_bootkey()
+{
+ ipl_entry_t e;
+ Bit16u count;
+ char description[33];
+ Bit8u scan_code;
+ Bit8u i;
+ Bit16u ss = get_SS();
+ Bit16u valid_choice = 0;
+ Bit16u ebda_seg = read_word(0x0040, 0x000E);
+
+ printf("\n\nPress F12 for boot menu.\n\n");
+
+ while (check_for_keystroke())
+ {
+ scan_code = get_keystroke();
+ if (scan_code != 0x86) /* F12 */
+ continue;
+
+ while (check_for_keystroke())
+ get_keystroke();
+
+ printf("Select boot device:\n\n");
+
+ count = read_word(ebda_seg, IPL_COUNT_OFFSET);
+ for (i = 0; i < count; i++)
+ {
+ memcpyb(ss, &e, ebda_seg, IPL_TABLE_OFFSET + i * sizeof (e), sizeof (e));
+ printf("%d. ", i+1);
+ switch(e.type)
+ {
+ case IPL_TYPE_FLOPPY:
+ case IPL_TYPE_HARDDISK:
+ case IPL_TYPE_CDROM:
+ printf("%s\n", drivetypes[e.type]);
+ break;
+ case IPL_TYPE_BEV:
+ printf("%s", drivetypes[4]);
+ if (e.description != 0)
+ {
+ memcpyb(ss, &description, (Bit16u)(e.description >> 16), (Bit16u)(e.description & 0xffff), 32);
+ description[32] = 0;
+ printf(" [%S]", ss, description);
+ }
+ printf("\n");
+ break;
+ }
+ }
+
+ count++;
+ while (!valid_choice) {
+ scan_code = get_keystroke();
+ if (scan_code == 0x01 || scan_code == 0x58) /* ESC or F12 */
+ {
+ valid_choice = 1;
+ }
+ else if (scan_code <= count)
+ {
+ valid_choice = 1;
+ scan_code -= 1;
+ /* Set user selected device */
+ write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, scan_code);
+ }
+ }
+
+ printf("\n");
+ break;
+ }
+}
+#endif // BX_ELTORITO_BOOT
//--------------------------------------------------------------------------
// print_boot_device
// displays the boot device
//--------------------------------------------------------------------------
-static char drivetypes[][10]={"", "Floppy","Hard Disk","CD-Rom", "Network"};
-
void
-print_boot_device(type)
- Bit16u type;
+print_boot_device(e)
+ ipl_entry_t *e;
{
- /* NIC appears as type 0x80 */
- if (type == 0x80 ) type = 0x4;
- if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n");
- printf("Booting from %s...\n", drivetypes[type]);
+ Bit16u type;
+ char description[33];
+ Bit16u ss = get_SS();
+ type = e->type;
+ /* NIC appears as type 0x80 */
+ if (type == IPL_TYPE_BEV) type = 0x4;
+ if (type == 0 || type > 0x4) BX_PANIC("Bad drive type\n");
+ printf("Booting from %s", drivetypes[type]);
+ /* print product string if BEV */
+ if (type == 4 && e->description != 0) {
+ /* first 32 bytes are significant */
+ memcpyb(ss, &description, (Bit16u)(e->description >> 16), (Bit16u)(e->description & 0xffff), 32);
+ /* terminate string */
+ description[32] = 0;
+ printf(" [%S]", ss, description);
+ }
+ printf("...\n");
}
//--------------------------------------------------------------------------
print_boot_failure(type, reason)
Bit16u type; Bit8u reason;
{
- if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n");
+ if (type == 0 || type > 0x3) BX_PANIC("Bad drive type\n");
printf("Boot from %s failed", drivetypes[type]);
if (type < 4) {
/* Report the reason too */
- if (reason==0)
- printf(": not a bootable disk");
- else
- printf(": could not read the boot disk");
+ if (reason==0)
+ printf(": not a bootable disk");
+ else
+ printf(": could not read the boot disk");
}
- printf("\n");
+ printf("\n\n");
}
//--------------------------------------------------------------------------
Bit16u code;
{
bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "CDROM boot failure code : %04x\n",code);
-
+
return;
}
-#define WAIT_HZ 18
-/**
- * Check for keystroke.
- * @returns True if keystroke available, False if not.
- */
-Bit8u check_for_keystroke()
+void
+nmi_handler_msg()
{
-ASM_START
- mov ax, #0x100
- int #0x16
- jz no_key
- mov al, #1
- jmp done
-no_key:
- xor al, al
-done:
-ASM_END
+ BX_PANIC("NMI Handler called\n");
}
-/**
- * Get keystroke.
- * @returns BIOS scan code.
- */
-Bit8u get_keystroke()
+void
+int18_panic_msg()
{
-ASM_START
- mov ax, #0x0
- int #0x16
- xchg ah, al
-ASM_END
+ BX_PANIC("INT18: BOOT FAILURE\n");
}
-/**
- * Waits (sleeps) for the given number of ticks.
- * Checks for keystroke.
- *
- * @returns BIOS scan code if available, 0 if not.
- * @param ticks Number of ticks to sleep.
- * @param stop_on_key Whether to stop immediately upon keypress.
- */
-Bit8u wait(ticks, stop_on_key)
- Bit16u ticks;
- Bit8u stop_on_key;
+void
+log_bios_start()
{
- long ticks_to_wait, delta;
- Bit32u prev_ticks, t;
- Bit8u scan_code = 0;
-
- /*
- * The 0:046c wraps around at 'midnight' according to a 18.2Hz clock.
- * We also have to be careful about interrupt storms.
- */
- ticks_to_wait = ticks;
- prev_ticks = read_dword(0x0, 0x46c);
- do
- {
- t = read_dword(0x0, 0x46c);
- if (t > prev_ticks)
- {
- delta = t - prev_ticks; /* The temp var is required or bcc screws up. */
- ticks_to_wait -= delta;
- }
- else if (t < prev_ticks)
- ticks_to_wait -= t; /* wrapped */
- prev_ticks = t;
-
- if (check_for_keystroke())
- {
- scan_code = get_keystroke();
- bios_printf(BIOS_PRINTF_DEBUG, "Key pressed: %x\n", scan_code);
- if (stop_on_key)
- return scan_code;
- }
- } while (ticks_to_wait > 0);
- return scan_code;
-}
-
-static void clearscreen() {
- /* Hide cursor, clear screen and move cursor to starting position */
-ASM_START
- push bx
- push cx
- push dx
-
- mov ax, #0x100
- mov cx, #0x1000
- int #0x10
-
- mov ax, #0x700
- mov bh, #7
- xor cx, cx
- mov dx, #0x184f
- int #0x10
-
- mov ax, #0x200
- xor bx, bx
- xor dx, dx
- int #0x10
-
- pop dx
- pop cx
- pop bx
-ASM_END
-}
-
-int bootmenu(selected)
- int selected;
-{
- Bit8u scode;
- int max;
-
- /* get the number of boot devices */
- max = read_word(IPL_SEG, IPL_COUNT_OFFSET);
-
- for(;;) {
- if (selected > max || selected < 1) selected = 1;
- clearscreen();
- bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\n\n\n\n\n\n");
- bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, " Select boot device\n\n");
- bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, " 1. Floppy\n");
- bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, " 2. Hard drive\n");
- bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, " 3. CD-ROM\n");
- if (max == 4)
- bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, " 4. Network\n");
- bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO, "\n\n Currently selected: %d\n", selected);
-
- do {
- scode = wait(WAIT_HZ, 1);
- } while (scode == 0);
- switch(scode) {
- case 0x02:
- case 0x03:
- case 0x04:
- selected = scode - 1;
- break;
- case 0x05:
- if (max == 4)
- selected = scode -1 ;
- else
- scode = 0;
- break;
- case 0x48:
- selected -= 1;
- if (selected < 1)
- selected = 1;
- scode = 0;
- break;
- case 0x50:
- selected += 1;
- if (selected > max)
- selected = max;
- scode = 0;
- break;
- case 0x1c:
- break;
- default:
- scode = 0;
- break;
- }
- if (scode != 0)
- break;
- }
-
- switch (selected) {
- case 1:
- return 0x3D;
- case 2:
- return 0x3E;
- case 3:
- return 0x3F;
- case 4:
- return 0x58;
- default:
- return 0;
- }
-}
-
-void interactive_bootkey()
-{
- Bit16u i;
- Bit8u scan = 0;
-
- bios_printf(BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO,
- "\n\nPress F10 to select boot device.\n");
-
- scan = wait(1, 0);
- if (scan == 0x44)
- scan = bootmenu(inb_cmos(0x3d) & 0x0f);
-
- /* set the default based on the keypress or menu */
- switch(scan) {
- case 0x3D:
- outb_cmos(0x3d, 0x01);
- break;
- case 0x3E:
- outb_cmos(0x3d, 0x02);
- break;
- case 0x3F:
- outb_cmos(0x3d, 0x03);
- break;
- case 0x58:
- outb_cmos(0x3d, 0x04);
- break;
- default:
- break;
- }
-}
-
-
-void
-nmi_handler_msg()
-{
- BX_PANIC("NMI Handler called\n");
-}
-
-void
-int18_panic_msg()
-{
- BX_PANIC("INT18: BOOT FAILURE\n");
-}
-
-void
-log_bios_start()
-{
-#if BX_DEBUG_SERIAL
- outb(BX_DEBUG_PORT+UART_LCR, 0x03); /* setup for serial logging: 8N1 */
-#endif
- BX_INFO("%s\n", bios_version_string);
+#if BX_DEBUG_SERIAL
+ outb(BX_DEBUG_PORT+UART_LCR, 0x03); /* setup for serial logging: 8N1 */
+#endif
+ BX_INFO("%s\n", bios_cvs_version_string);
}
bx_bool
outb(0xfedc, 0x00);
}
-void
+int
s3_resume()
{
Bit32u s3_wakeup_vector;
- Bit16u s3_wakeup_ip, s3_wakeup_cs;
- Bit8u cmos_shutdown_status;
+ Bit8u s3_resume_flag;
-ASM_START
- push ds
- push ax
- mov ax, #EBDA_SEG
- mov ds, ax
- mov al, [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET]
- mov .s3_resume.cmos_shutdown_status[bp], al
- pop ax
- pop ds
-ASM_END
+ s3_resume_flag = read_byte(0x40, 0xb0);
+#ifdef HVMASSIST
+ s3_wakeup_vector = get_s3_waking_vector();
+#else
+ s3_wakeup_vector = read_dword(0x40, 0xb2);
+#endif
- if (cmos_shutdown_status != CMOS_SHUTDOWN_S3)
- return;
+ BX_INFO("S3 resume called %x 0x%lx\n", s3_resume_flag, s3_wakeup_vector);
+ if (s3_resume_flag != CMOS_SHUTDOWN_S3 || !s3_wakeup_vector)
+ return 0;
- s3_wakeup_vector = get_s3_waking_vector();
- if (!s3_wakeup_vector)
- return;
+ write_byte(0x40, 0xb0, 0);
- s3_wakeup_ip = s3_wakeup_vector & 0xF;
- s3_wakeup_cs = s3_wakeup_vector >> 4;
+ /* setup wakeup vector */
+ write_word(0x40, 0xb6, (s3_wakeup_vector & 0xF)); /* IP */
+ write_word(0x40, 0xb8, (s3_wakeup_vector >> 4)); /* CS */
+ BX_INFO("S3 resume jump to %x:%x\n", (s3_wakeup_vector >> 4),
+ (s3_wakeup_vector & 0xF));
ASM_START
- push .s3_resume.s3_wakeup_cs[bp]
- push .s3_resume.s3_wakeup_ip[bp]
- retf
+ jmpf [0x04b6]
ASM_END
+ return 1;
}
#if BX_USE_ATADRV
// bits 7-4 of the device/head (CB_DH) reg
#define ATA_CB_DH_DEV0 0xa0 // select device 0
#define ATA_CB_DH_DEV1 0xb0 // select device 1
+#define ATA_CB_DH_LBA 0x40 // use LBA
// status reg (CB_STAT and CB_ASTAT) bits
#define ATA_CB_STAT_BSY 0x80 // busy
#define ATA_CMD_READ_SECTORS 0x20
#define ATA_CMD_READ_VERIFY_SECTORS 0x40
#define ATA_CMD_RECALIBRATE 0x10
+#define ATA_CMD_REQUEST_SENSE 0x03
#define ATA_CMD_SEEK 0x70
#define ATA_CMD_SET_FEATURES 0xEF
#define ATA_CMD_SET_MULTIPLE_MODE 0xC6
#define ATA_DATA_NO 0x00
#define ATA_DATA_IN 0x01
#define ATA_DATA_OUT 0x02
-
+
// ---------------------------------------------------------------------------
// ATA/ATAPI driver : initialization
// ---------------------------------------------------------------------------
Bit16u ebda_seg=read_word(0x0040,0x000E);
Bit8u channel, device;
- // Channels info init.
+ // Channels info init.
for (channel=0; channel<BX_MAX_ATA_INTERFACES; channel++) {
write_byte(ebda_seg,&EbdaData->ata.channels[channel].iface,ATA_IFACE_NONE);
write_word(ebda_seg,&EbdaData->ata.channels[channel].iobase1,0x0);
write_byte(ebda_seg,&EbdaData->ata.channels[channel].irq,0);
}
- // Devices info init.
+ // Devices info init.
for (device=0; device<BX_MAX_ATA_DEVICES; device++) {
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_NONE);
write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_NONE);
write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.heads,0);
write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.cylinders,0);
write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.spt,0);
-
- write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors,0L);
+
+ write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low,0L);
+ write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high,0L);
}
- // hdidmap and cdidmap init.
+ // hdidmap and cdidmap init.
for (device=0; device<BX_MAX_ATA_DEVICES; device++) {
write_byte(ebda_seg,&EbdaData->ata.hdidmap[device],BX_MAX_ATA_DEVICES);
write_byte(ebda_seg,&EbdaData->ata.cdidmap[device],BX_MAX_ATA_DEVICES);
write_byte(ebda_seg,&EbdaData->ata.cdcount,0);
}
+#define TIMEOUT 0
+#define BSY 1
+#define NOT_BSY 2
+#define NOT_BSY_DRQ 3
+#define NOT_BSY_NOT_DRQ 4
+#define NOT_BSY_RDY 5
+
+#define IDE_TIMEOUT 32000u //32 seconds max for IDE ops
+
+int await_ide();
+static int await_ide(when_done,base,timeout)
+ Bit8u when_done;
+ Bit16u base;
+ Bit16u timeout;
+{
+ Bit32u time=0,last=0;
+ Bit16u status;
+ Bit8u result;
+ status = inb(base + ATA_CB_STAT); // for the times you're supposed to throw one away
+ for(;;) {
+ status = inb(base+ATA_CB_STAT);
+ time++;
+ if (when_done == BSY)
+ result = status & ATA_CB_STAT_BSY;
+ else if (when_done == NOT_BSY)
+ result = !(status & ATA_CB_STAT_BSY);
+ else if (when_done == NOT_BSY_DRQ)
+ result = !(status & ATA_CB_STAT_BSY) && (status & ATA_CB_STAT_DRQ);
+ else if (when_done == NOT_BSY_NOT_DRQ)
+ result = !(status & ATA_CB_STAT_BSY) && !(status & ATA_CB_STAT_DRQ);
+ else if (when_done == NOT_BSY_RDY)
+ result = !(status & ATA_CB_STAT_BSY) && (status & ATA_CB_STAT_RDY);
+ else if (when_done == TIMEOUT)
+ result = 0;
+
+ if (result) return 0;
+ if (time>>16 != last) // mod 2048 each 16 ms
+ {
+ last = time >>16;
+ BX_DEBUG_ATA("await_ide: (TIMEOUT,BSY,!BSY,!BSY_DRQ,!BSY_!DRQ,!BSY_RDY) %d time= %ld timeout= %d\n",when_done,time>>11, timeout);
+ }
+ if (status & ATA_CB_STAT_ERR)
+ {
+ BX_DEBUG_ATA("await_ide: ERROR (TIMEOUT,BSY,!BSY,!BSY_DRQ,!BSY_!DRQ,!BSY_RDY) %d time= %ld timeout= %d\n",when_done,time>>11, timeout);
+ return -1;
+ }
+ if ((timeout == 0) || ((time>>11) > timeout)) break;
+ }
+ BX_INFO("IDE time out\n");
+ return -1;
+}
+
// ---------------------------------------------------------------------------
// ATA/ATAPI driver : device detection
// ---------------------------------------------------------------------------
// Device detection
hdcount=cdcount=0;
-
+
for(device=0; device<BX_MAX_ATA_DEVICES; device++) {
Bit16u iobase1, iobase2;
Bit8u channel, slave, shift;
if ( (sc == 0x55) && (sn == 0xaa) ) {
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_UNKNOWN);
-
+
// reset the channel
- ata_reset (device);
-
+ ata_reset(device);
+
// check for ATA or ATAPI
outb(iobase1+ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
sc = inb(iobase1+ATA_CB_SC);
sn = inb(iobase1+ATA_CB_SN);
- if ( (sc==0x01) && (sn==0x01) ) {
+ if ((sc==0x01) && (sn==0x01)) {
cl = inb(iobase1+ATA_CB_CL);
ch = inb(iobase1+ATA_CB_CH);
st = inb(iobase1+ATA_CB_STAT);
- if ( (cl==0x14) && (ch==0xeb) ) {
+ if ((cl==0x14) && (ch==0xeb)) {
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_ATAPI);
- }
- else if ( (cl==0x00) && (ch==0x00) && (st!=0x00) ) {
+ } else if ((cl==0x00) && (ch==0x00) && (st!=0x00)) {
write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_ATA);
- }
+ } else if ((cl==0xff) && (ch==0xff)) {
+ write_byte(ebda_seg,&EbdaData->ata.devices[device].type,ATA_TYPE_NONE);
}
}
+ }
type=read_byte(ebda_seg,&EbdaData->ata.devices[device].type);
-
- // Now we send a IDENTIFY command to ATA device
+
+ // Now we send a IDENTIFY command to ATA device
if(type == ATA_TYPE_ATA) {
- Bit32u sectors;
+ Bit32u sectors_low, sectors_high;
Bit16u cylinders, heads, spt, blksize;
Bit8u translation, removable, mode;
write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
- if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE, 1, 0, 0, 0, 0L, get_SS(),buffer) !=0 )
+ if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE, 1, 0, 0, 0, 0L, 0L, get_SS(),buffer) !=0 )
BX_PANIC("ata-detect: Failed to detect ATA device\n");
removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
-#ifndef NO_PIO32
+#ifndef NO_PIO32
mode = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : ATA_MODE_PIO16;
#endif
-
blksize = read_word(get_SS(),buffer+10);
-
+
cylinders = read_word(get_SS(),buffer+(1*2)); // word 1
heads = read_word(get_SS(),buffer+(3*2)); // word 3
spt = read_word(get_SS(),buffer+(6*2)); // word 6
- sectors = read_dword(get_SS(),buffer+(60*2)); // word 60 and word 61
+ if (read_word(get_SS(),buffer+(83*2)) & (1 << 10)) { // word 83 - lba48 support
+ sectors_low = read_dword(get_SS(),buffer+(100*2)); // word 100 and word 101
+ sectors_high = read_dword(get_SS(),buffer+(102*2)); // word 102 and word 103
+ } else {
+ sectors_low = read_dword(get_SS(),buffer+(60*2)); // word 60 and word 61
+ sectors_high = 0;
+ }
write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_HD);
write_byte(ebda_seg,&EbdaData->ata.devices[device].removable, removable);
write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.heads, heads);
write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.cylinders, cylinders);
write_word(ebda_seg,&EbdaData->ata.devices[device].pchs.spt, spt);
- write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors, sectors);
+ write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low, sectors_low);
+ write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high, sectors_high);
BX_INFO("ata%d-%d: PCHS=%u/%d/%d translation=", channel, slave,cylinders, heads, spt);
translation = inb_cmos(0x39 + channel/2);
break;
case ATA_TRANSLATION_LBA:
spt = 63;
- sectors /= 63;
- heads = sectors / 1024;
+ sectors_low /= 63;
+ heads = sectors_low / 1024;
if (heads>128) heads = 255;
else if (heads>64) heads = 128;
else if (heads>32) heads = 64;
else if (heads>16) heads = 32;
else heads=16;
- cylinders = sectors / heads;
+ cylinders = sectors_low / heads;
break;
case ATA_TRANSLATION_RECHS:
// Take care not to overflow
write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.heads, heads);
write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.cylinders, cylinders);
write_word(ebda_seg,&EbdaData->ata.devices[device].lchs.spt, spt);
-
- // fill hdidmap
+
+ // fill hdidmap
write_byte(ebda_seg,&EbdaData->ata.hdidmap[hdcount], device);
hdcount++;
}
-
+
// Now we send a IDENTIFY command to ATAPI device
if(type == ATA_TYPE_ATAPI) {
-
+
Bit8u type, removable, mode;
Bit16u blksize;
write_byte(ebda_seg,&EbdaData->ata.devices[device].device,ATA_DEVICE_CDROM);
write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, ATA_MODE_PIO16);
- if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE_PACKET, 1, 0, 0, 0, 0L, get_SS(),buffer) != 0)
+ if (ata_cmd_data_in(device,ATA_CMD_IDENTIFY_DEVICE_PACKET, 1, 0, 0, 0, 0L, 0L, get_SS(),buffer) != 0)
BX_PANIC("ata-detect: Failed to detect ATAPI device\n");
type = read_byte(get_SS(),buffer+1) & 0x1f;
removable = (read_byte(get_SS(),buffer+0) & 0x80) ? 1 : 0;
-#ifndef NO_PIO32
+#ifndef NO_PIO32
mode = read_byte(get_SS(),buffer+96) ? ATA_MODE_PIO32 : ATA_MODE_PIO16;
#endif
blksize = 2048;
write_byte(ebda_seg,&EbdaData->ata.devices[device].mode, mode);
write_word(ebda_seg,&EbdaData->ata.devices[device].blksize, blksize);
- // fill cdidmap
+ // fill cdidmap
write_byte(ebda_seg,&EbdaData->ata.cdidmap[cdcount], device);
cdcount++;
}
-
+
{
Bit32u sizeinmb;
Bit16u ataversion;
Bit8u c, i, version, model[41];
-
+
switch (type) {
case ATA_TYPE_ATA:
- sizeinmb = read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors);
- sizeinmb >>= 11;
+ sizeinmb = (read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_high) << 21)
+ | (read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low) >> 11);
case ATA_TYPE_ATAPI:
// Read ATA/ATAPI version
ataversion=((Bit16u)(read_byte(get_SS(),buffer+161))<<8)|read_byte(get_SS(),buffer+160);
- for(version=15;version>0;version--) {
+ for(version=15;version>0;version--) {
if((ataversion&(1<<version))!=0)
break;
}
for(i=0;i<20;i++){
write_byte(get_SS(),model+(i*2),read_byte(get_SS(),buffer+(i*2)+54+1));
write_byte(get_SS(),model+(i*2)+1,read_byte(get_SS(),buffer+(i*2)+54));
- }
+ }
// Reformat
write_byte(get_SS(),model+40,0x00);
if(read_byte(get_SS(),model+i)==0x20)
write_byte(get_SS(),model+i,0x00);
else break;
+ }
+ if (i>36) {
+ write_byte(get_SS(),model+36,0x00);
+ for(i=35;i>32;i--){
+ write_byte(get_SS(),model+i,0x2E);
}
+ }
break;
}
case ATA_TYPE_ATA:
printf("ata%d %s: ",channel,slave?" slave":"master");
i=0; while(c=read_byte(get_SS(),model+i++)) printf("%c",c);
- if (sizeinmb < 1UL<<16)
- printf(" ATA-%d Hard-Disk (%04u MBytes)\n",version,(Bit16u)sizeinmb);
- else
- printf(" ATA-%d Hard-Disk (%04u GBytes)\n",version,(Bit16u)(sizeinmb>>10));
+ if (sizeinmb < (1UL<<16))
+ printf(" ATA-%d Hard-Disk (%4u MBytes)\n", version, (Bit16u)sizeinmb);
+ else
+ printf(" ATA-%d Hard-Disk (%4u GBytes)\n", version, (Bit16u)(sizeinmb>>10));
break;
case ATA_TYPE_ATAPI:
printf("ata%d %s: ",channel,slave?" slave":"master");
write_byte(ebda_seg,&EbdaData->ata.hdcount, hdcount);
write_byte(ebda_seg,&EbdaData->ata.cdcount, cdcount);
write_byte(0x40,0x75, hdcount);
-
+
printf("\n");
// FIXME : should use bios=cmos|auto|disable bits
// FIXME : should know about translation bits
- // FIXME : move hard_drive_post here
-
+ // FIXME : move hard_drive_post here
+
}
// ---------------------------------------------------------------------------
-// ATA/ATAPI driver : software reset
+// ATA/ATAPI driver : software reset
// ---------------------------------------------------------------------------
// ATA-3
// 8.2.1 Software reset - Device 0
{
Bit16u ebda_seg=read_word(0x0040,0x000E);
Bit16u iobase1, iobase2;
- Bit8u channel, slave, sn, sc;
+ Bit8u channel, slave, sn, sc;
+ Bit8u type;
Bit16u max;
channel = device / 2;
outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN | ATA_CB_DC_SRST);
// 8.2.1 (b) -- wait for BSY
- max=0xff;
- while(--max>0) {
- Bit8u status = inb(iobase1+ATA_CB_STAT);
- if ((status & ATA_CB_STAT_BSY) != 0) break;
- }
+ await_ide(BSY, iobase1, 20);
// 8.2.1 (f) -- clear SRST
outb(iobase2+ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
- if (read_byte(ebda_seg,&EbdaData->ata.devices[device].type) != ATA_TYPE_NONE) {
+ type=read_byte(ebda_seg,&EbdaData->ata.devices[device].type);
+ if (type != ATA_TYPE_NONE) {
// 8.2.1 (g) -- check for sc==sn==0x01
// select device
sn = inb(iobase1+ATA_CB_SN);
if ( (sc==0x01) && (sn==0x01) ) {
-
-// 8.2.1 (h) -- wait for not BSY
- max=0xff;
- while(--max>0) {
- Bit8u status = inb(iobase1+ATA_CB_STAT);
- if ((status & ATA_CB_STAT_BSY) == 0) break;
- }
- }
+ if (type == ATA_TYPE_ATA) //ATA
+ await_ide(NOT_BSY_RDY, iobase1, IDE_TIMEOUT);
+ else //ATAPI
+ await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
}
-// 8.2.1 (i) -- wait for DRDY
- max=0xfff;
- while(--max>0) {
- Bit8u status = inb(iobase1+ATA_CB_STAT);
- if ((status & ATA_CB_STAT_RDY) != 0) break;
+// 8.2.1 (h) -- wait for not BSY
+ await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
}
// Enable interrupts
}
// ---------------------------------------------------------------------------
-// ATA/ATAPI driver : execute a non data command
+// ATA/ATAPI driver : execute a non data command
// ---------------------------------------------------------------------------
Bit16u ata_cmd_non_data()
// 5 : more sectors to read/verify
// 6 : no sectors left to write
// 7 : more sectors to write
-Bit16u ata_cmd_data_in(device, command, count, cylinder, head, sector, lba, segment, offset)
+Bit16u ata_cmd_data_in(device, command, count, cylinder, head, sector, lba_low, lba_high, segment, offset)
Bit16u device, command, count, cylinder, head, sector, segment, offset;
-Bit32u lba;
+Bit32u lba_low, lba_high;
{
Bit16u ebda_seg=read_word(0x0040,0x000E);
Bit16u iobase1, iobase2, blksize;
// sector will be 0 only on lba access. Convert to lba-chs
if (sector == 0) {
- if ((count >= 1 << 8) || (lba + count >= 1UL << 28)) {
+ if ((count >= 1 << 8) || lba_high || (lba_low + count >= 1UL << 28)) {
outb(iobase1 + ATA_CB_FR, 0x00);
outb(iobase1 + ATA_CB_SC, (count >> 8) & 0xff);
- outb(iobase1 + ATA_CB_SN, lba >> 24);
- outb(iobase1 + ATA_CB_CL, 0);
- outb(iobase1 + ATA_CB_CH, 0);
+ outb(iobase1 + ATA_CB_SN, lba_low >> 24);
+ outb(iobase1 + ATA_CB_CL, lba_high & 0xff);
+ outb(iobase1 + ATA_CB_CH, lba_high >> 8);
command |= 0x04;
count &= (1UL << 8) - 1;
- lba &= (1UL << 24) - 1;
+ lba_low &= (1UL << 24) - 1;
}
- sector = (Bit16u) (lba & 0x000000ffL);
- lba >>= 8;
- cylinder = (Bit16u) (lba & 0x0000ffffL);
- lba >>= 16;
- head = ((Bit16u) (lba & 0x0000000fL)) | 0x40;
- }
+ sector = (Bit16u) (lba_low & 0x000000ffL);
+ cylinder = (Bit16u) ((lba_low>>8) & 0x0000ffffL);
+ head = ((Bit16u) ((lba_low>>24) & 0x0000000fL)) | ATA_CB_DH_LBA;
+ }
outb(iobase1 + ATA_CB_FR, 0x00);
outb(iobase1 + ATA_CB_SC, count);
outb(iobase1 + ATA_CB_DH, (slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0) | (Bit8u) head );
outb(iobase1 + ATA_CB_CMD, command);
- while (1) {
- status = inb(iobase1 + ATA_CB_STAT);
- if ( !(status & ATA_CB_STAT_BSY) ) break;
- }
+ await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+ status = inb(iobase1 + ATA_CB_STAT);
if (status & ATA_CB_STAT_ERR) {
BX_DEBUG_ATA("ata_cmd_data_in : read error\n");
ASM_START
push bp
mov bp, sp
- mov di, _ata_cmd_data_in.offset + 2[bp]
- mov ax, _ata_cmd_data_in.segment + 2[bp]
- mov cx, _ata_cmd_data_in.blksize + 2[bp]
+ mov di, _ata_cmd_data_in.offset + 2[bp]
+ mov ax, _ata_cmd_data_in.segment + 2[bp]
+ mov cx, _ata_cmd_data_in.blksize + 2[bp]
;; adjust if there will be an overrun. 2K max sector size
- cmp di, #0xf800 ;;
+ cmp di, #0xf800 ;;
jbe ata_in_no_adjust
ata_in_adjust:
mov dx, _ata_cmd_data_in.iobase1 + 2[bp] ;; ATA data read port
- mov ah, _ata_cmd_data_in.mode + 2[bp]
+ mov ah, _ata_cmd_data_in.mode + 2[bp]
cmp ah, #ATA_MODE_PIO32
je ata_in_32
current++;
write_word(ebda_seg, &EbdaData->ata.trsfsectors,current);
count--;
+ await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
status = inb(iobase1 + ATA_CB_STAT);
if (count == 0) {
- if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
!= ATA_CB_STAT_RDY ) {
BX_DEBUG_ATA("ata_cmd_data_in : no sectors left (status %02x)\n", (unsigned) status);
return 4;
break;
}
else {
- if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
!= (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
BX_DEBUG_ATA("ata_cmd_data_in : more sectors left (status %02x)\n", (unsigned) status);
return 5;
// 5 : more sectors to read/verify
// 6 : no sectors left to write
// 7 : more sectors to write
-Bit16u ata_cmd_data_out(device, command, count, cylinder, head, sector, lba, segment, offset)
+Bit16u ata_cmd_data_out(device, command, count, cylinder, head, sector, lba_low, lba_high, segment, offset)
Bit16u device, command, count, cylinder, head, sector, segment, offset;
-Bit32u lba;
+Bit32u lba_low, lba_high;
{
Bit16u ebda_seg=read_word(0x0040,0x000E);
Bit16u iobase1, iobase2, blksize;
// sector will be 0 only on lba access. Convert to lba-chs
if (sector == 0) {
- if ((count >= 1 << 8) || (lba + count >= 1UL << 28)) {
+ if ((count >= 1 << 8) || lba_high || (lba_low + count >= 1UL << 28)) {
outb(iobase1 + ATA_CB_FR, 0x00);
outb(iobase1 + ATA_CB_SC, (count >> 8) & 0xff);
- outb(iobase1 + ATA_CB_SN, lba >> 24);
- outb(iobase1 + ATA_CB_CL, 0);
- outb(iobase1 + ATA_CB_CH, 0);
+ outb(iobase1 + ATA_CB_SN, lba_low >> 24);
+ outb(iobase1 + ATA_CB_CL, lba_high & 0xff);
+ outb(iobase1 + ATA_CB_CH, lba_high >> 8);
command |= 0x04;
count &= (1UL << 8) - 1;
- lba &= (1UL << 24) - 1;
+ lba_low &= (1UL << 24) - 1;
}
- sector = (Bit16u) (lba & 0x000000ffL);
- lba >>= 8;
- cylinder = (Bit16u) (lba & 0x0000ffffL);
- lba >>= 16;
- head = ((Bit16u) (lba & 0x0000000fL)) | 0x40;
- }
+ sector = (Bit16u) (lba_low & 0x000000ffL);
+ cylinder = (Bit16u) ((lba_low>>8) & 0x0000ffffL);
+ head = ((Bit16u) ((lba_low>>24) & 0x0000000fL)) | ATA_CB_DH_LBA;
+ }
outb(iobase1 + ATA_CB_FR, 0x00);
outb(iobase1 + ATA_CB_SC, count);
outb(iobase1 + ATA_CB_DH, (slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0) | (Bit8u) head );
outb(iobase1 + ATA_CB_CMD, command);
- while (1) {
- status = inb(iobase1 + ATA_CB_STAT);
- if ( !(status & ATA_CB_STAT_BSY) ) break;
- }
+ await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+ status = inb(iobase1 + ATA_CB_STAT);
if (status & ATA_CB_STAT_ERR) {
BX_DEBUG_ATA("ata_cmd_data_out : read error\n");
ASM_START
push bp
mov bp, sp
- mov si, _ata_cmd_data_out.offset + 2[bp]
- mov ax, _ata_cmd_data_out.segment + 2[bp]
- mov cx, _ata_cmd_data_out.blksize + 2[bp]
+ mov si, _ata_cmd_data_out.offset + 2[bp]
+ mov ax, _ata_cmd_data_out.segment + 2[bp]
+ mov cx, _ata_cmd_data_out.blksize + 2[bp]
;; adjust if there will be an overrun. 2K max sector size
- cmp si, #0xf800 ;;
+ cmp si, #0xf800 ;;
jbe ata_out_no_adjust
ata_out_adjust:
mov dx, _ata_cmd_data_out.iobase1 + 2[bp] ;; ATA data write port
- mov ah, _ata_cmd_data_out.mode + 2[bp]
+ mov ah, _ata_cmd_data_out.mode + 2[bp]
cmp ah, #ATA_MODE_PIO32
je ata_out_32
count--;
status = inb(iobase1 + ATA_CB_STAT);
if (count == 0) {
- if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
!= ATA_CB_STAT_RDY ) {
BX_DEBUG_ATA("ata_cmd_data_out : no sectors left (status %02x)\n", (unsigned) status);
return 6;
break;
}
else {
- if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
!= (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
BX_DEBUG_ATA("ata_cmd_data_out : more sectors left (status %02x)\n", (unsigned) status);
return 7;
if (status & ATA_CB_STAT_BSY) return 2;
outb(iobase2 + ATA_CB_DC, ATA_CB_DC_HD15 | ATA_CB_DC_NIEN);
- // outb(iobase1 + ATA_CB_FR, 0x00);
- // outb(iobase1 + ATA_CB_SC, 0x00);
- // outb(iobase1 + ATA_CB_SN, 0x00);
+ outb(iobase1 + ATA_CB_FR, 0x00);
+ outb(iobase1 + ATA_CB_SC, 0x00);
+ outb(iobase1 + ATA_CB_SN, 0x00);
outb(iobase1 + ATA_CB_CL, 0xfff0 & 0x00ff);
outb(iobase1 + ATA_CB_CH, 0xfff0 >> 8);
outb(iobase1 + ATA_CB_DH, slave ? ATA_CB_DH_DEV1 : ATA_CB_DH_DEV0);
outb(iobase1 + ATA_CB_CMD, ATA_CMD_PACKET);
// Device should ok to receive command
- while (1) {
- status = inb(iobase1 + ATA_CB_STAT);
- if ( !(status & ATA_CB_STAT_BSY) ) break;
- }
+ await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+ status = inb(iobase1 + ATA_CB_STAT);
if (status & ATA_CB_STAT_ERR) {
BX_DEBUG_ATA("ata_cmd_packet : error, status is %02x\n",status);
// Send command to device
ASM_START
sti ;; enable higher priority interrupts
-
+
push bp
mov bp, sp
-
- mov si, _ata_cmd_packet.cmdoff + 2[bp]
- mov ax, _ata_cmd_packet.cmdseg + 2[bp]
- mov cx, _ata_cmd_packet.cmdlen + 2[bp]
+
+ mov si, _ata_cmd_packet.cmdoff + 2[bp]
+ mov ax, _ata_cmd_packet.cmdseg + 2[bp]
+ mov cx, _ata_cmd_packet.cmdlen + 2[bp]
mov es, ax ;; segment in es
mov dx, _ata_cmd_packet.iobase1 + 2[bp] ;; ATA data write port
ASM_END
if (inout == ATA_DATA_NO) {
+ await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
status = inb(iobase1 + ATA_CB_STAT);
}
else {
+ Bit16u loops = 0;
+ Bit8u sc;
while (1) {
+ if (loops == 0) {//first time through
+ status = inb(iobase2 + ATA_CB_ASTAT);
+ await_ide(NOT_BSY_DRQ, iobase1, IDE_TIMEOUT);
+ }
+ else
+ await_ide(NOT_BSY, iobase1, IDE_TIMEOUT);
+ loops++;
+
status = inb(iobase1 + ATA_CB_STAT);
+ sc = inb(iobase1 + ATA_CB_SC);
// Check if command completed
- if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_DRQ) ) ==0 ) break;
+ if(((inb(iobase1 + ATA_CB_SC)&0x7)==0x3) &&
+ ((status & (ATA_CB_STAT_RDY | ATA_CB_STAT_ERR)) == ATA_CB_STAT_RDY)) break;
if (status & ATA_CB_STAT_ERR) {
BX_DEBUG_ATA("ata_cmd_packet : error (status %02x)\n",status);
return 3;
}
- // Device must be ready to send data
- if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
- != (ATA_CB_STAT_RDY | ATA_CB_STAT_DRQ) ) {
- BX_DEBUG_ATA("ata_cmd_packet : not ready (status %02x)\n", status);
- return 4;
- }
-
// Normalize address
bufseg += (bufoff / 16);
bufoff %= 16;
-
+
// Get the byte count
lcount = ((Bit16u)(inb(iobase1 + ATA_CB_CH))<<8)+inb(iobase1 + ATA_CB_CL);
mov dx, _ata_cmd_packet.iobase1 + 2[bp] ;; ATA data read port
- mov cx, _ata_cmd_packet.lbefore + 2[bp]
+ mov cx, _ata_cmd_packet.lbefore + 2[bp]
jcxz ata_packet_no_before
- mov ah, _ata_cmd_packet.lmode + 2[bp]
+ mov ah, _ata_cmd_packet.lmode + 2[bp]
cmp ah, #ATA_MODE_PIO32
je ata_packet_in_before_32
pop eax
ata_packet_no_before:
- mov cx, _ata_cmd_packet.lcount + 2[bp]
+ mov cx, _ata_cmd_packet.lcount + 2[bp]
jcxz ata_packet_after
- mov di, _ata_cmd_packet.bufoff + 2[bp]
- mov ax, _ata_cmd_packet.bufseg + 2[bp]
+ mov di, _ata_cmd_packet.bufoff + 2[bp]
+ mov ax, _ata_cmd_packet.bufseg + 2[bp]
mov es, ax
- mov ah, _ata_cmd_packet.lmode + 2[bp]
+ mov ah, _ata_cmd_packet.lmode + 2[bp]
cmp ah, #ATA_MODE_PIO32
je ata_packet_in_32
insd ;; CX dwords transfered to port(DX) to ES:[DI]
ata_packet_after:
- mov cx, _ata_cmd_packet.lafter + 2[bp]
+ mov cx, _ata_cmd_packet.lafter + 2[bp]
jcxz ata_packet_done
- mov ah, _ata_cmd_packet.lmode + 2[bp]
+ mov ah, _ata_cmd_packet.lmode + 2[bp]
cmp ah, #ATA_MODE_PIO32
je ata_packet_in_after_32
}
// Final check, device must be ready
- if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
+ if ( (status & (ATA_CB_STAT_BSY | ATA_CB_STAT_RDY | ATA_CB_STAT_DF | ATA_CB_STAT_DRQ | ATA_CB_STAT_ERR) )
!= ATA_CB_STAT_RDY ) {
BX_DEBUG_ATA("ata_cmd_packet : not ready (status %02x)\n", (unsigned) status);
return 4;
// Start of ATA/ATAPI generic functions
// ---------------------------------------------------------------------------
- Bit16u
-atapi_get_sense(device)
+ Bit16u
+atapi_get_sense(device, seg, asc, ascq)
Bit16u device;
{
Bit8u atacmd[12];
- Bit8u buffer[16];
+ Bit8u buffer[18];
Bit8u i;
memsetb(get_SS(),atacmd,0,12);
- // Request SENSE
- atacmd[0]=0x03;
- atacmd[4]=0x20;
- if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 16L, ATA_DATA_IN, get_SS(), buffer) != 0)
+ // Request SENSE
+ atacmd[0]=ATA_CMD_REQUEST_SENSE;
+ atacmd[4]=sizeof(buffer);
+ if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 18L, ATA_DATA_IN, get_SS(), buffer) != 0)
return 0x0002;
- if ((buffer[0] & 0x7e) == 0x70) {
- return (((Bit16u)buffer[2]&0x0f)*0x100)+buffer[12];
- }
+ write_byte(seg,asc,buffer[12]);
+ write_byte(seg,ascq,buffer[13]);
return 0;
}
- Bit16u
+ Bit16u
atapi_is_ready(device)
Bit16u device;
{
- Bit8u atacmd[12];
- Bit8u buffer[];
+ Bit8u packet[12];
+ Bit8u buf[8];
+ Bit32u block_len;
+ Bit32u sectors;
+ Bit32u timeout; //measured in ms
+ Bit32u time;
+ Bit8u asc, ascq;
+ Bit8u in_progress;
+ Bit16u ebda_seg = read_word(0x0040,0x000E);
+ if (read_byte(ebda_seg,&EbdaData->ata.devices[device].type) != ATA_TYPE_ATAPI) {
+ printf("not implemented for non-ATAPI device\n");
+ return -1;
+ }
- memsetb(get_SS(),atacmd,0,12);
-
- // Test Unit Ready
- if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 0L, ATA_DATA_NO, get_SS(), buffer) != 0)
- return 0x000f;
+ BX_DEBUG_ATA("ata_detect_medium: begin\n");
+ memsetb(get_SS(),packet, 0, sizeof packet);
+ packet[0] = 0x25; /* READ CAPACITY */
+
+ /* Retry READ CAPACITY 50 times unless MEDIUM NOT PRESENT
+ * is reported by the device. If the device reports "IN PROGRESS",
+ * 30 seconds is added. */
+ timeout = 5000;
+ time = 0;
+ in_progress = 0;
+ while (time < timeout) {
+ if (ata_cmd_packet(device, sizeof(packet), get_SS(), packet, 0, 8L, ATA_DATA_IN, get_SS(), buf) == 0)
+ goto ok;
+
+ if (atapi_get_sense(device, get_SS(), &asc, &ascq) == 0) {
+ if (asc == 0x3a) { /* MEDIUM NOT PRESENT */
+ BX_DEBUG_ATA("Device reports MEDIUM NOT PRESENT\n");
+ return -1;
+ }
- if (atapi_get_sense(device) !=0 ) {
- memsetb(get_SS(),atacmd,0,12);
+ if (asc == 0x04 && ascq == 0x01 && !in_progress) {
+ /* IN PROGRESS OF BECOMING READY */
+ printf("Waiting for device to detect medium... ");
+ /* Allow 30 seconds more */
+ timeout = 30000;
+ in_progress = 1;
+ }
+ }
+ time += 100;
+ }
+ BX_DEBUG_ATA("read capacity failed\n");
+ return -1;
+ok:
- // try to send Test Unit Ready again
- if (ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 0L, ATA_DATA_NO, get_SS(), buffer) != 0)
- return 0x000f;
+ block_len = (Bit32u) buf[4] << 24
+ | (Bit32u) buf[5] << 16
+ | (Bit32u) buf[6] << 8
+ | (Bit32u) buf[7] << 0;
+ BX_DEBUG_ATA("block_len=%u\n", block_len);
- return atapi_get_sense(device);
- }
+ if (block_len!= 2048 && block_len!= 512)
+ {
+ printf("Unsupported sector size %u\n", block_len);
+ return -1;
+ }
+ write_dword(ebda_seg,&EbdaData->ata.devices[device].blksize, block_len);
+
+ sectors = (Bit32u) buf[0] << 24
+ | (Bit32u) buf[1] << 16
+ | (Bit32u) buf[2] << 8
+ | (Bit32u) buf[3] << 0;
+
+ BX_DEBUG_ATA("sectors=%u\n", sectors);
+ if (block_len == 2048)
+ sectors <<= 2; /* # of sectors in 512-byte "soft" sector */
+ if (sectors != read_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low))
+ printf("%dMB medium detected\n", sectors>>(20-9));
+ write_dword(ebda_seg,&EbdaData->ata.devices[device].sectors_low, sectors);
return 0;
}
- Bit16u
+ Bit16u
atapi_is_cdrom(device)
Bit8u device;
{
//
// Returns ah: emulated drive, al: error code
//
- Bit16u
+ Bit16u
cdrom_boot()
{
Bit16u ebda_seg=read_word(0x0040,0x000E);
for (device=0; device<BX_MAX_ATA_DEVICES;device++) {
if (atapi_is_cdrom(device)) break;
}
-
+
// if not found
if(device >= BX_MAX_ATA_DEVICES) return 2;
+ if(error = atapi_is_ready(device) != 0)
+ BX_INFO("ata_is_ready returned %d\n",error);
+
// Read the Boot Record Volume Descriptor
memsetb(get_SS(),atacmd,0,12);
atacmd[0]=0x28; // READ command
}
for(i=0;i<23;i++)
if(buffer[7+i]!=read_byte(0xf000,&eltorito[i]))return 6;
-
+
// ok, now we calculate the Boot catalog address
lba=buffer[0x4A]*0x1000000+buffer[0x49]*0x10000+buffer[0x48]*0x100+buffer[0x47];
atacmd[5]=(lba & 0x000000ff);
if((error = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, 2048L, ATA_DATA_IN, get_SS(), buffer)) != 0)
return 7;
-
+
// Validation entry
if(buffer[0x00]!=0x01)return 8; // Header
if(buffer[0x01]!=0x00)return 9; // Platform
write_byte(ebda_seg,&EbdaData->cdemu.media,buffer[0x21]);
if(buffer[0x21]==0){
- // FIXME ElTorito Hardcoded. cdrom is hardcoded as device 0xE0.
+ // FIXME ElTorito Hardcoded. cdrom is hardcoded as device 0xE0.
// Win2000 cd boot needs to know it booted from cd
write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0xE0);
- }
+ }
else if(buffer[0x21]<4)
write_byte(ebda_seg,&EbdaData->cdemu.emulated_drive,0x00);
else
write_word(ebda_seg,&EbdaData->cdemu.load_segment,boot_segment);
write_word(ebda_seg,&EbdaData->cdemu.buffer_segment,0x0000);
-
+
nbsectors=buffer[0x27]*0x100+buffer[0x26];
write_word(ebda_seg,&EbdaData->cdemu.sector_count,nbsectors);
tcpa_ipl((Bit32u)1L,(Bit32u)boot_segment,(Bit32u)0L,(Bit32u)512L);
#endif
-
// Remember the media type
switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
case 0x01: // 1.2M floppy
case 0x04: // Harddrive
write_word(ebda_seg,&EbdaData->cdemu.vdevice.spt,read_byte(boot_segment,446+6)&0x3f);
write_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders,
- (read_byte(boot_segment,446+6)<<2) + read_byte(boot_segment,446+7) + 1);
+ (read_byte(boot_segment,446+6)<<2) + read_byte(boot_segment,446+7) + 1);
write_word(ebda_seg,&EbdaData->cdemu.vdevice.heads,read_byte(boot_segment,446+5) + 1);
break;
}
write_byte(ebda_seg, &EbdaData->ata.hdcount, read_byte(ebda_seg, &EbdaData->ata.hdcount) + 1);
}
-
+
// everything is ok, so from now on, the emulation is active
if(read_byte(ebda_seg,&EbdaData->cdemu.media)!=0)
write_byte(ebda_seg,&EbdaData->cdemu.active,0x01);
regs.u.r8.al = inb_cmos(0x30);
regs.u.r8.ah = inb_cmos(0x31);
- // limit to 15M
- if(regs.u.r16.ax > 0x3c00)
- regs.u.r16.ax = 0x3c00;
+ // According to Ralf Brown's interrupt the limit should be 15M,
+ // but real machines mostly return max. 63M.
+ if(regs.u.r16.ax > 0xffc0)
+ regs.u.r16.ax = 0xffc0;
CLEAR_CF();
#endif
case 3: // Set Resolution
BX_DEBUG_INT15("case 3:\n");
- // BX:
+ // BH:
// 0 = 25 dpi, 1 count per millimeter
// 1 = 50 dpi, 2 counts per millimeter
// 2 = 100 dpi, 4 counts per millimeter
// 3 = 200 dpi, 8 counts per millimeter
- CLEAR_CF();
- regs.u.r8.ah = 0;
+ comm_byte = inhibit_mouse_int_and_events(); // disable IRQ12 and packets
+ if (regs.u.r8.bh < 4) {
+ ret = send_to_mouse_ctrl(0xE8); // set resolution command
+ if (ret == 0) {
+ ret = get_mouse_data(&mouse_data1);
+ if (mouse_data1 != 0xfa)
+ BX_PANIC("Mouse status returned %02x (should be ack)\n", (unsigned)mouse_data1);
+ ret = send_to_mouse_ctrl(regs.u.r8.bh);
+ ret = get_mouse_data(&mouse_data1);
+ if (mouse_data1 != 0xfa)
+ BX_PANIC("Mouse status returned %02x (should be ack)\n", (unsigned)mouse_data1);
+ CLEAR_CF();
+ regs.u.r8.ah = 0;
+ } else {
+ // error
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ }
+ } else {
+ // error
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ }
+ set_kbd_command_byte(comm_byte); // restore IRQ12 and serial enable
break;
case 4: // Get Device ID
break;
}
}
-#endif
+#endif // BX_USE_PS2_MOUSE
+
+
+void set_e820_range(ES, DI, start, end, type)
+ Bit16u ES;
+ Bit16u DI;
+ Bit32u start;
+ Bit32u end;
+ Bit16u type;
+{
+ write_word(ES, DI, start);
+ write_word(ES, DI+2, start >> 16);
+ write_word(ES, DI+4, 0x00);
+ write_word(ES, DI+6, 0x00);
+
+ end -= start;
+ write_word(ES, DI+8, end);
+ write_word(ES, DI+10, end >> 16);
+ write_word(ES, DI+12, 0x0000);
+ write_word(ES, DI+14, 0x0000);
+
+ write_word(ES, DI+16, type);
+ write_word(ES, DI+18, 0x0);
+}
void
int15_function32(regs, ES, DS, FLAGS)
{
Bit32u extended_memory_size=0; // 64bits long
Bit16u CX,DX;
+#ifdef HVMASSIST
+ Bit16u off, e820_table_size;
+ Bit32u base, type, size;
+#endif
BX_DEBUG_INT15("int15 AX=%04x\n",regs.u.r16.ax);
switch (regs.u.r8.ah) {
case 0x86:
- // Wait for CX:DX microseconds. currently using the
- // refresh request port 0x61 bit4, toggling every 15usec
+ // Wait for CX:DX microseconds. currently using the
+ // refresh request port 0x61 bit4, toggling every 15usec
CX = regs.u.r16.cx;
DX = regs.u.r16.dx;
ASM_START
+ sti
+
;; Get the count in eax
- mov ax, .int15_function32.CX [bp]
+ mov bx, sp
+SEG SS
+ mov ax, _int15_function32.CX [bx]
shl eax, #16
- mov ax, .int15_function32.DX [bp]
+SEG SS
+ mov ax, _int15_function32.DX [bx]
;; convert to numbers of 15usec ticks
mov ebx, #15
case 0xe8:
switch(regs.u.r8.al)
{
- case 0x20: {
- Bit16u e820_table_size = read_word(0xe000, 0x8) * 0x14;
+#ifdef HVMASSIST
+ case 0x20: {
+ e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
if (regs.u.r32.edx != 0x534D4150) /* SMAP */
goto int15_unimplemented;
if ((regs.u.r16.bx / 0x14) * 0x14 == regs.u.r16.bx) {
if (regs.u.r16.bx + 0x14 <= e820_table_size)
memcpyb(ES, regs.u.r16.di,
- 0xe000, 0x10 + regs.u.r16.bx, 0x14);
+ E820_SEG, E820_OFFSET + regs.u.r16.bx, 0x14);
regs.u.r32.ebx += 0x14;
if ((regs.u.r32.ebx + 0x14 - 1) > e820_table_size)
regs.u.r32.ebx = 0;
} else if (regs.u.r16.bx == 1) {
- Bit32u base, type;
- Bit16u off;
for (off = 0; off < e820_table_size; off += 0x14) {
- base = read_dword(0xe000, 0x10 + off);
- type = read_dword(0xe000, 0x20 + off);
+ base = read_dword(E820_SEG, E820_OFFSET + off);
+ type = read_dword(E820_SEG, E820_OFFSET + 0x10 + off);
if ((base >= 0x100000) && (type == 1))
break;
}
SET_CF();
break;
}
- memcpyb(ES, regs.u.r16.di, 0xe000, 0x10 + off, 0x14);
+ memcpyb(ES, regs.u.r16.di, E820_SEG, E820_OFFSET + off, 0x14);
regs.u.r32.ebx = 0;
} else { /* AX=E820, DX=534D4150, BX unrecognized */
goto int15_unimplemented;
}
case 0x01: {
- Bit16u off, e820_table_size = read_word(0xe000, 0x8) * 0x14;
- Bit32u base, type, size;
+ e820_table_size = read_word(E820_SEG, E820_NR_OFFSET) * 0x14;
// do we have any reason to fail here ?
CLEAR_CF();
// Get the amount of extended memory (above 1M)
regs.u.r8.cl = inb_cmos(0x30);
regs.u.r8.ch = inb_cmos(0x31);
-
+
// limit to 15M
if (regs.u.r16.cx > (15*1024))
regs.u.r16.cx = 15*1024;
// Find first RAM E820 entry >= 1MB.
for (off = 0; off < e820_table_size; off += 0x14) {
- base = read_dword(0xe000, 0x10 + off);
- type = read_dword(0xe000, 0x20 + off);
+ base = read_dword(E820_SEG, E820_OFFSET + off);
+ type = read_dword(E820_SEG, E820_OFFSET + 0x10 + off);
if ((base >= 0x100000) && (type == 1))
break;
}
// If there is RAM above 16MB, return amount in 64kB chunks.
regs.u.r16.dx = 0;
if (off != e820_table_size) {
- size = base + read_dword(0xe000, 0x18 + off);
+ size = base + read_dword(E820_SEG, E820_OFFSET + 0x8 + off);
if (size > 0x1000000) {
size -= 0x1000000;
regs.u.r16.dx = (Bit16u)(size >> 16);
regs.u.r16.bx = regs.u.r16.dx;
break;
}
- default: /* AH=0xE8?? but not implemented */
+ default: /* AH=0xE8?? but not implemented */
goto int15_unimplemented;
}
break;
regs.u.r8.ah = UNSUPPORTED_FUNCTION;
break;
}
+#else
+ case 0x20: // coded by osmaker aka K.J.
+ if(regs.u.r32.edx == 0x534D4150)
+ {
+ extended_memory_size = inb_cmos(0x35);
+ extended_memory_size <<= 8;
+ extended_memory_size |= inb_cmos(0x34);
+ extended_memory_size *= 64;
+ // greater than EFF00000???
+ if(extended_memory_size > 0x3bc000) {
+ extended_memory_size = 0x3bc000; // everything after this is reserved memory until we get to 0x100000000
+ }
+ extended_memory_size *= 1024;
+ extended_memory_size += (16L * 1024 * 1024);
+
+ if(extended_memory_size <= (16L * 1024 * 1024)) {
+ extended_memory_size = inb_cmos(0x31);
+ extended_memory_size <<= 8;
+ extended_memory_size |= inb_cmos(0x30);
+ extended_memory_size *= 1024;
+ extended_memory_size += (1L * 1024 * 1024);
+ }
+
+ switch(regs.u.r16.bx)
+ {
+ case 0:
+ set_e820_range(ES, regs.u.r16.di,
+ 0x0000000L, 0x0009f000L, 1);
+ regs.u.r32.ebx = 1;
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+ return;
+ break;
+ case 1:
+ set_e820_range(ES, regs.u.r16.di,
+ 0x0009f000L, 0x000a0000L, 2);
+ regs.u.r32.ebx = 2;
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+ return;
+ break;
+ case 2:
+ set_e820_range(ES, regs.u.r16.di,
+ 0x000e8000L, 0x00100000L, 2);
+ regs.u.r32.ebx = 3;
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+ return;
+ break;
+ case 3:
+#if BX_ROMBIOS32
+ set_e820_range(ES, regs.u.r16.di,
+ 0x00100000L,
+ extended_memory_size - ACPI_DATA_SIZE, 1);
+ regs.u.r32.ebx = 4;
+#else
+ set_e820_range(ES, regs.u.r16.di,
+ 0x00100000L,
+ extended_memory_size, 1);
+ regs.u.r32.ebx = 5;
+#endif
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+ return;
+ break;
+ case 4:
+ set_e820_range(ES, regs.u.r16.di,
+ extended_memory_size - ACPI_DATA_SIZE,
+ extended_memory_size, 3); // ACPI RAM
+ regs.u.r32.ebx = 5;
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+ return;
+ break;
+ case 5:
+ /* 256KB BIOS area at the end of 4 GB */
+ set_e820_range(ES, regs.u.r16.di,
+ 0xfffc0000L, 0x00000000L, 2);
+ regs.u.r32.ebx = 0;
+ regs.u.r32.eax = 0x534D4150;
+ regs.u.r32.ecx = 0x14;
+ CLEAR_CF();
+ return;
+ default: /* AX=E820, DX=534D4150, BX unrecognized */
+ goto int15_unimplemented;
+ break;
+ }
+ } else {
+ // if DX != 0x534D4150)
+ goto int15_unimplemented;
+ }
+ break;
+
+ case 0x01:
+ // do we have any reason to fail here ?
+ CLEAR_CF();
+
+ // my real system sets ax and bx to 0
+ // this is confirmed by Ralph Brown list
+ // but syslinux v1.48 is known to behave
+ // strangely if ax is set to 0
+ // regs.u.r16.ax = 0;
+ // regs.u.r16.bx = 0;
+
+ // Get the amount of extended memory (above 1M)
+ regs.u.r8.cl = inb_cmos(0x30);
+ regs.u.r8.ch = inb_cmos(0x31);
+
+ // limit to 15M
+ if(regs.u.r16.cx > 0x3c00)
+ {
+ regs.u.r16.cx = 0x3c00;
+ }
+
+ // Get the amount of extended memory above 16M in 64k blocs
+ regs.u.r8.dl = inb_cmos(0x34);
+ regs.u.r8.dh = inb_cmos(0x35);
+
+ // Set configured memory equal to extended memory
+ regs.u.r16.ax = regs.u.r16.cx;
+ regs.u.r16.bx = regs.u.r16.dx;
+ break;
+ default: /* AH=0xE8?? but not implemented */
+ goto int15_unimplemented;
+ }
+ break;
+ int15_unimplemented:
+ // fall into the default
+ default:
+ BX_INFO("*** int 15h function AX=%04x, BX=%04x not yet supported!\n",
+ (unsigned) regs.u.r16.ax, (unsigned) regs.u.r16.bx);
+ SET_CF();
+ regs.u.r8.ah = UNSUPPORTED_FUNCTION;
+ break;
+ }
+#endif /* HVMASSIST */
}
void
int16_function(DI, SI, BP, SP, BX, DX, CX, AX, FLAGS)
Bit16u DI, SI, BP, SP, BX, DX, CX, AX, FLAGS;
{
- Bit8u scan_code, ascii_code, shift_flags, count;
+ Bit8u scan_code, ascii_code, shift_flags, led_flags, count;
Bit16u kbd_code, max;
BX_DEBUG_INT16("int16: AX=%04x BX=%04x CX=%04x DX=%04x \n", AX, BX, CX, DX);
+ shift_flags = read_byte(0x0040, 0x17);
+ led_flags = read_byte(0x0040, 0x97);
+ if ((((shift_flags >> 4) & 0x07) ^ (led_flags & 0x07)) != 0) {
+ASM_START
+ cli
+ASM_END
+ outb(0x60, 0xed);
+ while ((inb(0x64) & 0x01) == 0) outb(0x80, 0x21);
+ if ((inb(0x60) == 0xfa)) {
+ led_flags &= 0xf8;
+ led_flags |= ((shift_flags >> 4) & 0x07);
+ outb(0x60, led_flags & 0x07);
+ while ((inb(0x64) & 0x01) == 0) outb(0x80, 0x21);
+ inb(0x60);
+ write_byte(0x0040, 0x97, led_flags);
+ }
+ASM_START
+ sti
+ASM_END
+ }
+
switch (GET_AH()) {
case 0x00: /* read keyboard input */
break;
case 0x09: /* GET KEYBOARD FUNCTIONALITY */
- // bit Bochs Description
+ // bit Bochs Description
// 7 0 reserved
// 6 0 INT 16/AH=20h-22h supported (122-key keyboard support)
// 5 1 INT 16/AH=10h-12h supported (enhanced keyboard support)
kbd_code |= (inb(0x60) << 8);
}
} while (--count>0);
- }
+ }
}
BX=kbd_code;
break;
case 0x12: /* get extended keyboard status */
shift_flags = read_byte(0x0040, 0x17);
SET_AL(shift_flags);
- shift_flags = read_byte(0x0040, 0x18);
+ shift_flags = read_byte(0x0040, 0x18) & 0x73;
+ shift_flags |= read_byte(0x0040, 0x96) & 0x0c;
SET_AH(shift_flags);
BX_DEBUG_INT16("int16: func 12 sending %04x\n",AX);
break;
case 0x6F:
if (GET_AL() == 0x08)
- SET_AH(0x02); // unsupported, aka normal keyboard
+ SET_AH(0x02); // unsupported, aka normal keyboard
default:
BX_INFO("KBD: unsupported int 16h function %02x\n", GET_AH());
Bit16u DI, SI, BP, SP, BX, DX, CX, AX;
{
Bit8u scancode, asciicode, shift_flags;
- Bit8u mf2_flags, mf2_state, led_flags;
+ Bit8u mf2_flags, mf2_state;
//
// DS has been set to F000 before call
shift_flags = read_byte(0x0040, 0x17);
mf2_flags = read_byte(0x0040, 0x18);
mf2_state = read_byte(0x0040, 0x96);
- led_flags = read_byte(0x0040, 0x97);
asciicode = 0;
switch (scancode) {
write_byte(0x0040, 0x17, shift_flags);
mf2_flags |= 0x40;
write_byte(0x0040, 0x18, mf2_flags);
- led_flags ^= 0x04;
- write_byte(0x0040, 0x97, led_flags);
break;
case 0xba: /* Caps Lock release */
mf2_flags &= ~0x40;
break;
case 0x2a: /* L Shift press */
- /*shift_flags &= ~0x40;*/
shift_flags |= 0x02;
write_byte(0x0040, 0x17, shift_flags);
- led_flags &= ~0x04;
- write_byte(0x0040, 0x97, led_flags);
break;
case 0xaa: /* L Shift release */
shift_flags &= ~0x02;
break;
case 0x36: /* R Shift press */
- /*shift_flags &= ~0x40;*/
shift_flags |= 0x01;
write_byte(0x0040, 0x17, shift_flags);
- led_flags &= ~0x04;
- write_byte(0x0040, 0x97, led_flags);
break;
case 0xb6: /* R Shift release */
shift_flags &= ~0x01;
break;
case 0x1d: /* Ctrl press */
- shift_flags |= 0x04;
- write_byte(0x0040, 0x17, shift_flags);
- if (mf2_state & 0x01) {
- mf2_flags |= 0x04;
- } else {
- mf2_flags |= 0x01;
+ if ((mf2_state & 0x01) == 0) {
+ shift_flags |= 0x04;
+ write_byte(0x0040, 0x17, shift_flags);
+ if (mf2_state & 0x02) {
+ mf2_state |= 0x04;
+ write_byte(0x0040, 0x96, mf2_state);
+ } else {
+ mf2_flags |= 0x01;
+ write_byte(0x0040, 0x18, mf2_flags);
}
- write_byte(0x0040, 0x18, mf2_flags);
+ }
break;
case 0x9d: /* Ctrl release */
- shift_flags &= ~0x04;
- write_byte(0x0040, 0x17, shift_flags);
- if (mf2_state & 0x01) {
- mf2_flags &= ~0x04;
- } else {
- mf2_flags &= ~0x01;
+ if ((mf2_state & 0x01) == 0) {
+ shift_flags &= ~0x04;
+ write_byte(0x0040, 0x17, shift_flags);
+ if (mf2_state & 0x02) {
+ mf2_state &= ~0x04;
+ write_byte(0x0040, 0x96, mf2_state);
+ } else {
+ mf2_flags &= ~0x01;
+ write_byte(0x0040, 0x18, mf2_flags);
}
- write_byte(0x0040, 0x18, mf2_flags);
+ }
break;
case 0x38: /* Alt press */
shift_flags |= 0x08;
write_byte(0x0040, 0x17, shift_flags);
- if (mf2_state & 0x01) {
- mf2_flags |= 0x08;
+ if (mf2_state & 0x02) {
+ mf2_state |= 0x08;
+ write_byte(0x0040, 0x96, mf2_state);
} else {
mf2_flags |= 0x02;
- }
- write_byte(0x0040, 0x18, mf2_flags);
+ write_byte(0x0040, 0x18, mf2_flags);
+ }
break;
case 0xb8: /* Alt release */
shift_flags &= ~0x08;
write_byte(0x0040, 0x17, shift_flags);
- if (mf2_state & 0x01) {
- mf2_flags &= ~0x08;
+ if (mf2_state & 0x02) {
+ mf2_state &= ~0x08;
+ write_byte(0x0040, 0x96, mf2_state);
} else {
mf2_flags &= ~0x02;
- }
- write_byte(0x0040, 0x18, mf2_flags);
+ write_byte(0x0040, 0x18, mf2_flags);
+ }
break;
case 0x45: /* Num Lock press */
- if ((mf2_state & 0x01) == 0) {
+ if ((mf2_state & 0x03) == 0) {
mf2_flags |= 0x20;
write_byte(0x0040, 0x18, mf2_flags);
shift_flags ^= 0x20;
- led_flags ^= 0x02;
write_byte(0x0040, 0x17, shift_flags);
- write_byte(0x0040, 0x97, led_flags);
- }
+ }
break;
case 0xc5: /* Num Lock release */
- if ((mf2_state & 0x01) == 0) {
+ if ((mf2_state & 0x03) == 0) {
mf2_flags &= ~0x20;
write_byte(0x0040, 0x18, mf2_flags);
- }
+ }
break;
case 0x46: /* Scroll Lock press */
mf2_flags |= 0x10;
write_byte(0x0040, 0x18, mf2_flags);
shift_flags ^= 0x10;
- led_flags ^= 0x01;
write_byte(0x0040, 0x17, shift_flags);
- write_byte(0x0040, 0x97, led_flags);
break;
case 0xc6: /* Scroll Lock release */
machine_reset();
/* Fall through */
default:
- if (scancode & 0x80) return; /* toss key releases ... */
+ if (scancode & 0x80) {
+ break; /* toss key releases ... */
+ }
if (scancode > MAX_SCAN_CODE) {
- BX_INFO("KBD: int09h_handler(): unknown scancode (%x) read!\n", scancode);
+ BX_INFO("KBD: int09h_handler(): unknown scancode read: 0x%02x!\n", scancode);
return;
- }
+ }
if (shift_flags & 0x08) { /* ALT */
asciicode = scan_to_scanascii[scancode].alt;
scancode = scan_to_scanascii[scancode].alt >> 8;
- }
- else if (shift_flags & 0x04) { /* CONTROL */
+ } else if (shift_flags & 0x04) { /* CONTROL */
asciicode = scan_to_scanascii[scancode].control;
scancode = scan_to_scanascii[scancode].control >> 8;
- }
- else if (shift_flags & 0x03) { /* LSHIFT + RSHIFT */
- /* check if lock state should be ignored
+ } else if (((mf2_state & 0x02) > 0) && ((scancode >= 0x47) && (scancode <= 0x53))) {
+ /* extended keys handling */
+ asciicode = 0xe0;
+ scancode = scan_to_scanascii[scancode].normal >> 8;
+ } else if (shift_flags & 0x03) { /* LSHIFT + RSHIFT */
+ /* check if lock state should be ignored
* because a SHIFT key are pressed */
-
+
if (shift_flags & scan_to_scanascii[scancode].lock_flags) {
asciicode = scan_to_scanascii[scancode].normal;
scancode = scan_to_scanascii[scancode].normal >> 8;
- }
- else {
+ } else {
asciicode = scan_to_scanascii[scancode].shift;
scancode = scan_to_scanascii[scancode].shift >> 8;
- }
}
- else {
+ } else {
/* check if lock is on */
if (shift_flags & scan_to_scanascii[scancode].lock_flags) {
asciicode = scan_to_scanascii[scancode].shift;
scancode = scan_to_scanascii[scancode].shift >> 8;
- }
- else {
+ } else {
asciicode = scan_to_scanascii[scancode].normal;
scancode = scan_to_scanascii[scancode].normal >> 8;
- }
}
+ }
if (scancode==0 && asciicode==0) {
BX_INFO("KBD: int09h_handler(): scancode & asciicode are zero?\n");
- }
+ }
enqueue_key(scancode, asciicode);
break;
- }
- mf2_state &= ~0x01;
+ }
+ if ((scancode & 0x7f) != 0x1d) {
+ mf2_state &= ~0x01;
+ }
+ mf2_state &= ~0x02;
+ write_byte(0x0040, 0x96, mf2_state);
}
unsigned int
{
Bit16u buffer_start, buffer_end, buffer_head, buffer_tail, temp_tail;
- //BX_INFO("KBD: enqueue_key() called scan:%02x, ascii:%02x\n",
- // scan_code, ascii_code);
-
#if BX_CPU < 2
buffer_start = 0x001E;
buffer_end = 0x003E;
mouse_flags_2 = read_byte(ebda_seg, 0x0027);
if ( (mouse_flags_2 & 0x80) != 0x80 ) {
- // BX_PANIC("int74_function:\n");
return;
- }
+ }
package_count = mouse_flags_2 & 0x07;
index = mouse_flags_1 & 0x07;
#if BX_USE_ATADRV
void
-int13_harddisk(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
- Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+int13_harddisk(EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+ Bit16u EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
{
- Bit32u lba;
+ Bit32u lba_low, lba_high;
Bit16u ebda_seg=read_word(0x0040,0x000E);
Bit16u cylinder, head, sector;
Bit16u segment, offset;
// Get the ata channel
device=read_byte(ebda_seg,&EbdaData->ata.hdidmap[GET_ELDL()-0x80]);
- // basic check : device has to be valid
+ // basic check : device has to be valid
if (device >= BX_MAX_ATA_DEVICES) {
BX_INFO("int13_harddisk: function %02x, unmapped device for ELDL=%02x\n", GET_AH(), GET_ELDL());
goto int13_fail;
}
-
+
switch (GET_AH()) {
case 0x00: /* disk controller reset */
break;
case 0x02: // read disk sectors
- case 0x03: // write disk sectors
+ case 0x03: // write disk sectors
case 0x04: // verify disk sectors
count = GET_AL();
segment = ES;
offset = BX;
- if ( (count > 128) || (count == 0) ) {
- BX_INFO("int13_harddisk: function %02x, count out of range!\n",GET_AH());
+ if ((count > 128) || (count == 0) || (sector == 0)) {
+ BX_INFO("int13_harddisk: function %02x, parameter out of range!\n",GET_AH());
goto int13_fail;
- }
+ }
nlc = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.cylinders);
nlh = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
BX_INFO("int13_harddisk: function %02x, parameters out of range %04x/%04x/%04x!\n", GET_AH(), cylinder, head, sector);
goto int13_fail;
}
-
+
// FIXME verify
if ( GET_AH() == 0x04 ) goto int13_success;
// if needed, translate lchs to lba, and execute command
if ( (nph != nlh) || (npspt != nlspt)) {
- lba = ((((Bit32u)cylinder * (Bit32u)nlh) + (Bit32u)head) * (Bit32u)nlspt) + (Bit32u)sector - 1;
+ lba_low = ((((Bit32u)cylinder * (Bit32u)nlh) + (Bit32u)head) * (Bit32u)nlspt) + (Bit32u)sector - 1;
+ lba_high = 0;
sector = 0; // this forces the command to be lba
}
if ( GET_AH() == 0x02 )
- status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, cylinder, head, sector, lba, segment, offset);
+ status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, cylinder, head, sector, lba_low, lba_high, segment, offset);
else
- status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, cylinder, head, sector, lba, segment, offset);
+ status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, cylinder, head, sector, lba_low, lba_high, segment, offset);
// Set nb of sector transferred
SET_AL(read_word(ebda_seg, &EbdaData->ata.trsfsectors));
break;
case 0x08: /* read disk drive parameters */
-
+
// Get logical geometry from table
nlc = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.cylinders);
nlh = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
SET_DL(count); /* FIXME returns 0, 1, or n hard drives */
// FIXME should set ES & DI
-
+
goto int13_success;
break;
case 0x10: /* check drive ready */
// should look at 40:8E also???
-
+
// Read the status from controller
status = inb(read_word(ebda_seg, &EbdaData->ata.channels[device/2].iobase1) + ATA_CB_STAT);
if ( (status & ( ATA_CB_STAT_BSY | ATA_CB_STAT_RDY )) == ATA_CB_STAT_RDY ) {
case 0x15: /* read disk drive size */
- // Get physical geometry from table
- npc = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.cylinders);
- nph = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.heads);
- npspt = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
+ // Get logical geometry from table
+ nlc = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.cylinders);
+ nlh = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.heads);
+ nlspt = read_word(ebda_seg, &EbdaData->ata.devices[device].lchs.spt);
// Compute sector count seen by int13
- lba = (Bit32u)(npc - 1) * (Bit32u)nph * (Bit32u)npspt;
- CX = lba >> 16;
- DX = lba & 0xffff;
+ lba_low = (Bit32u)(nlc - 1) * (Bit32u)nlh * (Bit32u)nlspt;
+ CX = lba_low >> 16;
+ DX = lba_low & 0xffff;
SET_AH(3); // hard disk accessible
goto int13_success_noah;
count=read_word(DS, SI+(Bit16u)&Int13Ext->count);
segment=read_word(DS, SI+(Bit16u)&Int13Ext->segment);
offset=read_word(DS, SI+(Bit16u)&Int13Ext->offset);
-
- // Can't use 64 bits lba
- lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
- if (lba != 0L) {
- BX_PANIC("int13_harddisk: function %02x. Can't use 64bits lba\n",GET_AH());
+
+ // Get 32 msb lba and check
+ lba_high=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
+ if (lba_high > read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors_high) ) {
+ BX_INFO("int13_harddisk: function %02x. LBA out of range\n",GET_AH());
goto int13_fail;
}
- // Get 32 bits lba and check
- lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
- if (lba >= read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors) ) {
+ // Get 32 lsb lba and check
+ lba_low=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
+ if (lba_high == read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors_high)
+ && lba_low >= read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors_low) ) {
BX_INFO("int13_harddisk: function %02x. LBA out of range\n",GET_AH());
goto int13_fail;
}
// If verify or seek
if (( GET_AH() == 0x44 ) || ( GET_AH() == 0x47 ))
goto int13_success;
-
+
// Execute the command
if ( GET_AH() == 0x42 )
- status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, 0, 0, 0, lba, segment, offset);
+ status=ata_cmd_data_in(device, ATA_CMD_READ_SECTORS, count, 0, 0, 0, lba_low, lba_high, segment, offset);
else
- status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 0, 0, 0, lba, segment, offset);
+ status=ata_cmd_data_out(device, ATA_CMD_WRITE_SECTORS, count, 0, 0, 0, lba_low, lba_high, segment, offset);
count=read_word(ebda_seg, &EbdaData->ata.trsfsectors);
write_word(DS, SI+(Bit16u)&Int13Ext->count, count);
case 0x49: // IBM/MS extended media change
goto int13_success; // Always success for HD
break;
-
+
case 0x46: // IBM/MS eject media
SET_AH(0xb2); // Volume Not Removable
goto int13_fail_noah; // Always fail for HD
size=read_word(DS,SI+(Bit16u)&Int13DPT->size);
// Buffer is too small
- if(size < 0x1a)
+ if(size < 0x1a)
goto int13_fail;
// EDD 1.x
npc = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.cylinders);
nph = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.heads);
npspt = read_word(ebda_seg, &EbdaData->ata.devices[device].pchs.spt);
- lba = read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors);
+ lba_low = read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors_low);
+ lba_high = read_dword(ebda_seg, &EbdaData->ata.devices[device].sectors_high);
blksize = read_word(ebda_seg, &EbdaData->ata.devices[device].blksize);
write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1a);
- write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x02); // geometry is valid
- write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, (Bit32u)npc);
+ if (lba_high || (lba_low/npspt)/nph > 0x3fff)
+ {
+ write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x00); // geometry is invalid
+ write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, 0x3fff);
+ }
+ else
+ {
+ write_word(DS, SI+(Bit16u)&Int13DPT->infos, 0x02); // geometry is valid
+ write_dword(DS, SI+(Bit16u)&Int13DPT->cylinders, (Bit32u)npc);
+ }
write_dword(DS, SI+(Bit16u)&Int13DPT->heads, (Bit32u)nph);
write_dword(DS, SI+(Bit16u)&Int13DPT->spt, (Bit32u)npspt);
- write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, lba); // FIXME should be Bit64
- write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0L);
- write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, lba_low);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, lba_high);
+ write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
}
// EDD 2.x
write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1e);
- write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
- write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
+ write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
+ write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
// Fill in dpte
channel = device / 2;
mode = read_byte(ebda_seg, &EbdaData->ata.devices[device].mode);
translation = read_byte(ebda_seg, &EbdaData->ata.devices[device].translation);
- options = (translation==ATA_TRANSLATION_NONE?0:1<<3); // chs translation
+ options = (translation==ATA_TRANSLATION_NONE?0:1)<<3; // chs translation
options |= (1<<4); // lba translation
- options |= (mode==ATA_MODE_PIO32?1:0<<7);
- options |= (translation==ATA_TRANSLATION_LBA?1:0<<9);
- options |= (translation==ATA_TRANSLATION_RECHS?3:0<<9);
+ options |= (mode==ATA_MODE_PIO32?1:0)<<7;
+ options |= (translation==ATA_TRANSLATION_LBA?1:0)<<9;
+ options |= (translation==ATA_TRANSLATION_RECHS?3:0)<<9;
write_word(ebda_seg, &EbdaData->ata.dpte.iobase1, iobase1);
- write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2);
+ write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2 + ATA_CB_DC);
write_byte(ebda_seg, &EbdaData->ata.dpte.prefix, (0xe | (device % 2))<<4 );
write_byte(ebda_seg, &EbdaData->ata.dpte.unused, 0xcb );
write_byte(ebda_seg, &EbdaData->ata.dpte.irq, irq );
write_byte(ebda_seg, &EbdaData->ata.dpte.pio, 0 );
write_word(ebda_seg, &EbdaData->ata.dpte.options, options);
write_word(ebda_seg, &EbdaData->ata.dpte.reserved, 0);
- write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
-
+ if (size >=0x42)
+ write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
+ else
+ write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x10);
+
checksum=0;
- for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, (&EbdaData->ata.dpte) + i);
+ for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, ((Bit8u*)(&EbdaData->ata.dpte)) + i);
checksum = ~checksum;
write_byte(ebda_seg, &EbdaData->ata.dpte.checksum, checksum);
}
write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[2], 'A');
write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[3], 0);
}
- else {
+ else {
// FIXME PCI
}
write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[0], 'A');
write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[2], 0);
write_dword(DS, SI+(Bit16u)&Int13DPT->iface_path[4], 0L);
}
- else {
+ else {
// FIXME PCI
}
write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[0], device%2);
case 0x0d: /* alternate disk reset */
case 0x11: /* recalibrate */
case 0x14: /* controller internal diagnostic */
- BX_INFO("int13h_harddisk function %02xh unimplemented, returns success\n", GET_AH());
+ BX_INFO("int13_harddisk: function %02xh unimplemented, returns success\n", GET_AH());
goto int13_success;
break;
case 0x18: // set media type for format
case 0x50: // IBM/MS send packet command
default:
- BX_INFO("int13_harddisk function %02xh unsupported, returns fail\n", GET_AH());
+ BX_INFO("int13_harddisk: function %02xh unsupported, returns fail\n", GET_AH());
goto int13_fail;
break;
}
Bit16u count, segment, offset, i, size;
BX_DEBUG_INT13_CD("int13_cdrom: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
- // BX_DEBUG_INT13_CD("int13_cdrom: SS=%04x DS=%04x ES=%04x DI=%04x SI=%04x\n",get_SS(), DS, ES, DI, SI);
-
+
SET_DISK_RET_STATUS(0x00);
/* basic check : device should be 0xE0+ */
BX_INFO("int13_cdrom: function %02x, unmapped device for ELDL=%02x\n", GET_AH(), GET_ELDL());
goto int13_fail;
}
-
+
switch (GET_AH()) {
// all those functions return SUCCESS
case 0x00: /* disk controller reset */
case 0x09: /* initialize drive parameters */
case 0x0c: /* seek to specified cylinder */
- case 0x0d: /* alternate disk reset */
- case 0x10: /* check drive ready */
- case 0x11: /* recalibrate */
+ case 0x0d: /* alternate disk reset */
+ case 0x10: /* check drive ready */
+ case 0x11: /* recalibrate */
case 0x14: /* controller internal diagnostic */
case 0x16: /* detect disk change */
goto int13_success;
/* set CF if error status read */
if (status) goto int13_fail_nostatus;
else goto int13_success_noah;
- break;
+ break;
case 0x15: /* read disk drive size */
SET_AH(0x02);
case 0x42: // IBM/MS extended read
case 0x44: // IBM/MS verify sectors
case 0x47: // IBM/MS extended seek
-
+
count=read_word(DS, SI+(Bit16u)&Int13Ext->count);
segment=read_word(DS, SI+(Bit16u)&Int13Ext->segment);
offset=read_word(DS, SI+(Bit16u)&Int13Ext->offset);
-
+
// Can't use 64 bits lba
lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba2);
if (lba != 0L) {
goto int13_fail;
}
- // Get 32 bits lba
+ // Get 32 bits lba
lba=read_dword(DS, SI+(Bit16u)&Int13Ext->lba1);
// If verify or seek
if (( GET_AH() == 0x44 ) || ( GET_AH() == 0x47 ))
goto int13_success;
-
+
memsetb(get_SS(),atacmd,0,12);
atacmd[0]=0x28; // READ command
atacmd[7]=(count & 0xff00) >> 8; // Sectors
atacmd[3]=(lba & 0x00ff0000) >> 16;
atacmd[4]=(lba & 0x0000ff00) >> 8;
atacmd[5]=(lba & 0x000000ff);
- status = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, count*2048L, ATA_DATA_IN, segment,offset);
+ status = ata_cmd_packet(device, 12, get_SS(), atacmd, 0, count*2048L, ATA_DATA_IN, segment,offset);
count = (Bit16u)(read_dword(ebda_seg, &EbdaData->ata.trsfbytes) >> 11);
write_word(DS, SI+(Bit16u)&Int13Ext->count, count);
case 0x46: // IBM/MS eject media
locks = read_byte(ebda_seg, &EbdaData->ata.devices[device].lock);
-
+
if (locks != 0) {
SET_AH(0xb1); // media locked
goto int13_fail_noah;
}
// FIXME should handle 0x31 no media in device
// FIXME should handle 0xb5 valid request failed
-
+
// Call removable media eject
ASM_START
push bp
mov bp, sp
mov ah, #0x52
- int 15
+ int #0x15
mov _int13_cdrom.status + 2[bp], ah
jnc int13_cdrom_rme_end
mov _int13_cdrom.status, #1
size = read_word(DS,SI+(Bit16u)&Int13Ext->size);
// Buffer is too small
- if(size < 0x1a)
+ if(size < 0x1a)
goto int13_fail;
// EDD 1.x
write_dword(DS, SI+(Bit16u)&Int13DPT->heads, 0xffffffff);
write_dword(DS, SI+(Bit16u)&Int13DPT->spt, 0xffffffff);
write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count1, 0xffffffff); // FIXME should be Bit64
- write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0xffffffff);
- write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
+ write_dword(DS, SI+(Bit16u)&Int13DPT->sector_count2, 0xffffffff);
+ write_word(DS, SI+(Bit16u)&Int13DPT->blksize, blksize);
}
// EDD 2.x
write_word(DS, SI+(Bit16u)&Int13DPT->size, 0x1e);
- write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
- write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
+ write_word(DS, SI+(Bit16u)&Int13DPT->dpte_segment, ebda_seg);
+ write_word(DS, SI+(Bit16u)&Int13DPT->dpte_offset, &EbdaData->ata.dpte);
// Fill in dpte
channel = device / 2;
options |= (mode==ATA_MODE_PIO32?1:0<<7);
write_word(ebda_seg, &EbdaData->ata.dpte.iobase1, iobase1);
- write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2);
+ write_word(ebda_seg, &EbdaData->ata.dpte.iobase2, iobase2 + ATA_CB_DC);
write_byte(ebda_seg, &EbdaData->ata.dpte.prefix, (0xe | (device % 2))<<4 );
write_byte(ebda_seg, &EbdaData->ata.dpte.unused, 0xcb );
write_byte(ebda_seg, &EbdaData->ata.dpte.irq, irq );
write_byte(ebda_seg, &EbdaData->ata.dpte.revision, 0x11);
checksum=0;
- for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, (&EbdaData->ata.dpte) + i);
+ for (i=0; i<15; i++) checksum+=read_byte(ebda_seg, ((Bit8u*)(&EbdaData->ata.dpte)) + i);
checksum = ~checksum;
write_byte(ebda_seg, &EbdaData->ata.dpte.checksum, checksum);
}
write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[2], 'A');
write_byte(DS, SI+(Bit16u)&Int13DPT->host_bus[3], 0);
}
- else {
+ else {
// FIXME PCI
}
write_byte(DS, SI+(Bit16u)&Int13DPT->iface_type[0], 'A');
write_word(DS, SI+(Bit16u)&Int13DPT->iface_path[2], 0);
write_dword(DS, SI+(Bit16u)&Int13DPT->iface_path[4], 0L);
}
- else {
+ else {
// FIXME PCI
}
write_byte(DS, SI+(Bit16u)&Int13DPT->device_path[0], device%2);
SET_AH(06);
goto int13_fail_nostatus;
break;
-
+
case 0x4e: // // IBM/MS set hardware configuration
// DMA, prefetch, PIO maximum not supported
switch (GET_AL()) {
BX_DEBUG_INT13_ET("int13_eltorito: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
// BX_DEBUG_INT13_ET("int13_eltorito: SS=%04x DS=%04x ES=%04x DI=%04x SI=%04x\n",get_SS(), DS, ES, DI, SI);
-
+
switch (GET_AH()) {
// FIXME ElTorito Various. Should be implemented
Bit8u atacmd[12];
BX_DEBUG_INT13_ET("int13_cdemu: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
- //BX_DEBUG_INT13_ET("int13_cdemu: SS=%04x ES=%04x DI=%04x SI=%04x\n", get_SS(), ES, DI, SI);
-
+
/* at this point, we are emulating a floppy/harddisk */
-
- // Recompute the device number
+
+ // Recompute the device number
device = read_byte(ebda_seg,&EbdaData->cdemu.controller_index) * 2;
device += read_byte(ebda_seg,&EbdaData->cdemu.device_spec);
goto int13_fail;
}
-
switch (GET_AH()) {
// all those functions return SUCCESS
case 0x0c: /* seek to specified cylinder */
case 0x0d: /* alternate disk reset */ // FIXME ElTorito Various. should really reset ?
case 0x10: /* check drive ready */ // FIXME ElTorito Various. should check if ready ?
- case 0x11: /* recalibrate */
+ case 0x11: /* recalibrate */
case 0x14: /* controller internal diagnostic */
case 0x16: /* detect disk change */
goto int13_success;
case 0x02: // read disk sectors
case 0x04: // verify disk sectors
- vspt = read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
- vcylinders = read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders);
- vheads = read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads);
+ vspt = read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
+ vcylinders = read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders);
+ vheads = read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads);
ilba = read_dword(ebda_seg,&EbdaData->cdemu.ilba);
// calculate the virtual lba inside the image
vlba=((((Bit32u)cylinder*(Bit32u)vheads)+(Bit32u)head)*(Bit32u)vspt)+((Bit32u)(sector-1));
-
+
// In advance so we don't loose the count
SET_AL(nbsectors);
// start lba on cd
- slba = (Bit32u)vlba/4;
+ slba = (Bit32u)vlba/4;
before= (Bit16u)vlba%4;
// end lba on cd
elba = (Bit32u)(vlba+nbsectors-1)/4;
-
+
memsetb(get_SS(),atacmd,0,12);
atacmd[0]=0x28; // READ command
atacmd[7]=((Bit16u)(elba-slba+1) & 0xff00) >> 8; // Sectors
break;
case 0x08: /* read disk drive parameters */
- vspt=read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
- vcylinders=read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders) - 1;
- vheads=read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads) - 1;
-
+ vspt=read_word(ebda_seg,&EbdaData->cdemu.vdevice.spt);
+ vcylinders=read_word(ebda_seg,&EbdaData->cdemu.vdevice.cylinders) - 1;
+ vheads=read_word(ebda_seg,&EbdaData->cdemu.vdevice.heads) - 1;
+
SET_AL( 0x00 );
SET_BL( 0x00 );
SET_CH( vcylinders & 0xff );
SET_DH( vheads );
SET_DL( 0x02 ); // FIXME ElTorito Various. should send the real count of drives 1 or 2
// FIXME ElTorito Harddisk. should send the HD count
-
+
switch(read_byte(ebda_seg,&EbdaData->cdemu.media)) {
case 0x01: SET_BL( 0x02 ); break;
case 0x02: SET_BL( 0x04 ); break;
case 0x45: // IBM/MS lock/unlock drive
case 0x46: // IBM/MS eject media
case 0x47: // IBM/MS extended seek
- case 0x48: // IBM/MS get drive parameters
+ case 0x48: // IBM/MS get drive parameters
case 0x49: // IBM/MS extended media change
case 0x4e: // ? - set hardware configuration
case 0x50: // ? - send packet command
}
void
-int13_harddisk(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
- Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
+int13_harddisk(EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
+ Bit16u EHAX, DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
{
Bit8u drive, num_sectors, sector, head, status, mod;
Bit8u drive_map;
}
if ( (num_sectors > 128) || (num_sectors == 0) )
- BX_PANIC("int13_harddisk(): num_sectors out of range!\n");
+ BX_PANIC("int13_harddisk: num_sectors out of range!\n");
if (head > 15)
BX_PANIC("hard drive BIOS:(read/verify) head > 15\n");
}
if ( (num_sectors > 128) || (num_sectors == 0) )
- BX_PANIC("int13_harddisk(): num_sectors out of range!\n");
+ BX_PANIC("int13_harddisk: num_sectors out of range!\n");
if (head > 15)
BX_PANIC("hard drive BIOS:(read) head > 15\n");
case 0x08: /* read disk drive parameters */
BX_DEBUG_INT13_HD("int13_f08\n");
-
+
drive = GET_ELDL ();
get_hd_geometry(drive, &hd_cylinders, &hd_heads, &hd_sectors);
break;
case 0x18: // set media type for format
- case 0x41: // IBM/MS
- case 0x42: // IBM/MS
- case 0x43: // IBM/MS
- case 0x44: // IBM/MS
+ case 0x41: // IBM/MS
+ case 0x42: // IBM/MS
+ case 0x43: // IBM/MS
+ case 0x44: // IBM/MS
case 0x45: // IBM/MS lock/unlock drive
case 0x46: // IBM/MS eject media
case 0x47: // IBM/MS extended seek
hd_type = inb_cmos(0x12) & 0x0f;
if (hd_type != 0x0f)
BX_INFO(panic_msg_reg12h,1);
- hd_type = inb_cmos(0x1a); // HD0: extended type
+ hd_type = inb_cmos(0x1a); // HD1: extended type
if (hd_type != 47)
BX_INFO(panic_msg_reg19h,0,0x1a);
iobase = 0x24;
#endif //else BX_USE_ATADRV
+#if BX_SUPPORT_FLOPPY
//////////////////////
// FLOPPY functions //
//////////////////////
+void floppy_reset_controller()
+{
+ Bit8u val8;
+
+ // Reset controller
+ val8 = inb(0x03f2);
+ outb(0x03f2, val8 & ~0x04);
+ outb(0x03f2, val8 | 0x04);
+
+ // Wait for controller to come out of reset
+ do {
+ val8 = inb(0x3f4);
+ } while ( (val8 & 0xc0) != 0x80 );
+}
+
+void floppy_prepare_controller(drive)
+ Bit16u drive;
+{
+ Bit8u val8, dor, prev_reset;
+
+ // set 40:3e bit 7 to 0
+ val8 = read_byte(0x0040, 0x003e);
+ val8 &= 0x7f;
+ write_byte(0x0040, 0x003e, val8);
+
+ // turn on motor of selected drive, DMA & int enabled, normal operation
+ prev_reset = inb(0x03f2) & 0x04;
+ if (drive)
+ dor = 0x20;
+ else
+ dor = 0x10;
+ dor |= 0x0c;
+ dor |= drive;
+ outb(0x03f2, dor);
+
+ // reset the disk motor timeout value of INT 08
+ write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
+
+ // wait for drive readiness
+ do {
+ val8 = inb(0x3f4);
+ } while ( (val8 & 0xc0) != 0x80 );
+
+ if (prev_reset == 0) {
+ // turn on interrupts
+ASM_START
+ sti
+ASM_END
+ // wait on 40:3e bit 7 to become 1
+ do {
+ val8 = read_byte(0x0040, 0x003e);
+ } while ( (val8 & 0x80) == 0 );
+ val8 &= 0x7f;
+ASM_START
+ cli
+ASM_END
+ write_byte(0x0040, 0x003e, val8);
+ }
+}
+
bx_bool
floppy_media_known(drive)
Bit16u drive;
retval = 1;
}
//
- // Extended floppy size uses special cmos setting
+ // Extended floppy size uses special cmos setting
else if ( drive_type == 6 ) {
// 160k 5.25" drive
config_data = 0x00; // 0000 0000
floppy_drive_recal(drive)
Bit16u drive;
{
- Bit8u val8, dor;
+ Bit8u val8;
Bit16u curr_cyl_offset;
- // set 40:3e bit 7 to 0
- val8 = read_byte(0x0000, 0x043e);
- val8 &= 0x7f;
- write_byte(0x0000, 0x043e, val8);
-
- // turn on motor of selected drive, DMA & int enabled, normal operation
- if (drive)
- dor = 0x20;
- else
- dor = 0x10;
- dor |= 0x0c;
- dor |= drive;
- outb(0x03f2, dor);
-
- // reset the disk motor timeout value of INT 08
- write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
- // check port 3f4 for drive readiness
- val8 = inb(0x3f4);
- if ( (val8 & 0xf0) != 0x80 )
- BX_PANIC("floppy recal:f07: ctrl not ready\n");
+ floppy_prepare_controller(drive);
// send Recalibrate command (2 bytes) to controller
outb(0x03f5, 0x07); // 07: Recalibrate
outb(0x03f5, drive); // 0=drive0, 1=drive1
- // turn on interrupts
+ // turn on interrupts
ASM_START
sti
ASM_END
// wait on 40:3e bit 7 to become 1
- val8 = (read_byte(0x0000, 0x043e) & 0x80);
- while ( val8 == 0 ) {
- val8 = (read_byte(0x0000, 0x043e) & 0x80);
- }
+ do {
+ val8 = (read_byte(0x0040, 0x003e) & 0x80);
+ } while ( val8 == 0 );
- val8 = 0; // separate asm from while() loop
- // turn off interrupts
+ val8 = 0; // separate asm from while() loop
+ // turn off interrupts
ASM_START
cli
ASM_END
// set 40:3e bit 7 to 0, and calibrated bit
- val8 = read_byte(0x0000, 0x043e);
+ val8 = read_byte(0x0040, 0x003e);
val8 &= 0x7f;
if (drive) {
val8 |= 0x02; // Drive 1 calibrated
curr_cyl_offset = 0x0095;
- }
- else {
+ } else {
val8 |= 0x01; // Drive 0 calibrated
curr_cyl_offset = 0x0094;
- }
+ }
write_byte(0x0040, 0x003e, val8);
write_byte(0x0040, curr_cyl_offset, 0); // current cylinder is 0
return(1);
}
-#if BX_SUPPORT_FLOPPY
void
int13_diskette_function(DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS)
Bit16u DS, ES, DI, SI, BP, ELDX, BX, DX, CX, AX, IP, CS, FLAGS;
Bit16u es, last_addr;
BX_DEBUG_INT13_FL("int13_diskette: AX=%04x BX=%04x CX=%04x DX=%04x ES=%04x\n", AX, BX, CX, DX, ES);
- // BX_DEBUG_INT13_FL("int13_diskette: SS=%04x DS=%04x ES=%04x DI=%04x SI=%04x\n",get_SS(), get_DS(), ES, DI, SI);
ah = GET_AH();
set_diskette_ret_status(1);
SET_CF();
return;
- }
+ }
drive_type = inb_cmos(0x10);
if (drive == 0)
set_diskette_ret_status(0x80);
SET_CF();
return;
- }
+ }
SET_AH(0);
set_diskette_ret_status(0);
CLEAR_CF(); // successful
SET_AH(val8);
if (val8) {
SET_CF();
- }
+ }
return;
case 0x02: // Read Diskette Sectors
head = GET_DH();
drive = GET_ELDL();
- if ( (drive > 1) || (head > 1) ||
- (num_sectors == 0) || (num_sectors > 72) ) {
-BX_INFO("floppy: drive>1 || head>1 ...\n");
+ if ((drive > 1) || (head > 1) || (sector == 0) ||
+ (num_sectors == 0) || (num_sectors > 72)) {
+ BX_INFO("int13_diskette: read/write/verify: parameter out of range\n");
SET_AH(1);
set_diskette_ret_status(1);
SET_AL(0); // no sectors read
SET_CF(); // error occurred
return;
- }
+ }
// see if drive exists
if (floppy_drive_exists(drive) == 0) {
SET_AL(0); // no sectors read
SET_CF(); // error occurred
return;
- }
+ }
// see if media in drive, and type is known
if (floppy_media_known(drive) == 0) {
SET_AL(0); // no sectors read
SET_CF(); // error occurred
return;
- }
}
+ }
if (ah == 0x02) {
// Read Diskette Sectors
if ( base_address < base_es ) {
// in case of carry, adjust page by 1
page++;
- }
+ }
base_count = (num_sectors * 512) - 1;
// check for 64K boundary overrun
SET_AL(0); // no sectors read
SET_CF(); // error occurred
return;
- }
+ }
BX_DEBUG_INT13_FL("masking DMA-1 c2\n");
outb(0x000a, 0x06);
//--------------------------------------
// set up floppy controller for transfer
//--------------------------------------
-
- // set 40:3e bit 7 to 0
- val8 = read_byte(0x0000, 0x043e);
- val8 &= 0x7f;
- write_byte(0x0000, 0x043e, val8);
-
- // turn on motor of selected drive, DMA & int enabled, normal operation
- if (drive)
- dor = 0x20;
- else
- dor = 0x10;
- dor |= 0x0c;
- dor |= drive;
- outb(0x03f2, dor);
-
- // reset the disk motor timeout value of INT 08
- write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
- // check port 3f4 for drive readiness
- val8 = inb(0x3f4);
- if ( (val8 & 0xf0) != 0x80 )
- BX_PANIC("int13_diskette:f02: ctrl not ready\n");
+ floppy_prepare_controller(drive);
// send read-normal-data command (9 bytes) to controller
outb(0x03f5, 0xe6); // e6: read normal data
outb(0x03f5, head);
outb(0x03f5, sector);
outb(0x03f5, 2); // 512 byte sector size
- outb(0x03f5, 0); // last sector number possible on track
+ outb(0x03f5, sector + num_sectors - 1); // last sector to read on track
outb(0x03f5, 0); // Gap length
outb(0x03f5, 0xff); // Gap length
- // turn on interrupts
+ // turn on interrupts
ASM_START
sti
ASM_END
// wait on 40:3e bit 7 to become 1
- val8 = (read_byte(0x0000, 0x043e) & 0x80);
- while ( val8 == 0 ) {
- val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ do {
+ val8 = read_byte(0x0040, 0x0040);
+ if (val8 == 0) {
+ floppy_reset_controller();
+ SET_AH(0x80); // drive not ready (timeout)
+ set_diskette_ret_status(0x80);
+ SET_AL(0); // no sectors read
+ SET_CF(); // error occurred
+ return;
}
+ val8 = (read_byte(0x0040, 0x003e) & 0x80);
+ } while ( val8 == 0 );
- val8 = 0; // separate asm from while() loop
- // turn off interrupts
+ val8 = 0; // separate asm from while() loop
+ // turn off interrupts
ASM_START
cli
ASM_END
// set 40:3e bit 7 to 0
- val8 = read_byte(0x0000, 0x043e);
+ val8 = read_byte(0x0040, 0x003e);
val8 &= 0x7f;
- write_byte(0x0000, 0x043e, val8);
+ write_byte(0x0040, 0x003e, val8);
// check port 3f4 for accessibility to status bytes
val8 = inb(0x3f4);
SET_AL(0); // no sectors read
SET_CF(); // error occurred
return;
- }
+ }
// ??? should track be new val from return_status[3] ?
set_diskette_current_cyl(drive, track);
SET_AH(0x00); // success
CLEAR_CF(); // success
return;
- }
- else if (ah == 0x03) {
+ } else if (ah == 0x03) {
// Write Diskette Sectors
//-----------------------------------
if ( base_address < base_es ) {
// in case of carry, adjust page by 1
page++;
- }
+ }
base_count = (num_sectors * 512) - 1;
// check for 64K boundary overrun
SET_AL(0); // no sectors read
SET_CF(); // error occurred
return;
- }
+ }
BX_DEBUG_INT13_FL("masking DMA-1 c2\n");
outb(0x000a, 0x06);
//--------------------------------------
// set up floppy controller for transfer
//--------------------------------------
+ floppy_prepare_controller(drive);
- // set 40:3e bit 7 to 0
- val8 = read_byte(0x0000, 0x043e);
- val8 &= 0x7f;
- write_byte(0x0000, 0x043e, val8);
-
- // turn on motor of selected drive, DMA & int enabled, normal operation
- if (drive)
- dor = 0x20;
- else
- dor = 0x10;
- dor |= 0x0c;
- dor |= drive;
- outb(0x03f2, dor);
-
- // reset the disk motor timeout value of INT 08
- write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
- // check port 3f4 for drive readiness
- val8 = inb(0x3f4);
- if ( (val8 & 0xf0) != 0x80 )
- BX_PANIC("int13_diskette:f03: ctrl not ready\n");
-
- // send read-normal-data command (9 bytes) to controller
+ // send write-normal-data command (9 bytes) to controller
outb(0x03f5, 0xc5); // c5: write normal data
outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
outb(0x03f5, track);
outb(0x03f5, head);
outb(0x03f5, sector);
outb(0x03f5, 2); // 512 byte sector size
- outb(0x03f5, 0); // last sector number possible on track
+ outb(0x03f5, sector + num_sectors - 1); // last sector to write on track
outb(0x03f5, 0); // Gap length
outb(0x03f5, 0xff); // Gap length
- // turn on interrupts
+ // turn on interrupts
ASM_START
sti
ASM_END
// wait on 40:3e bit 7 to become 1
- val8 = (read_byte(0x0000, 0x043e) & 0x80);
- while ( val8 == 0 ) {
- val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ do {
+ val8 = read_byte(0x0040, 0x0040);
+ if (val8 == 0) {
+ floppy_reset_controller();
+ SET_AH(0x80); // drive not ready (timeout)
+ set_diskette_ret_status(0x80);
+ SET_AL(0); // no sectors written
+ SET_CF(); // error occurred
+ return;
}
+ val8 = (read_byte(0x0040, 0x003e) & 0x80);
+ } while ( val8 == 0 );
- val8 = 0; // separate asm from while() loop
- // turn off interrupts
+ val8 = 0; // separate asm from while() loop
+ // turn off interrupts
ASM_START
cli
ASM_END
// set 40:3e bit 7 to 0
- val8 = read_byte(0x0000, 0x043e);
+ val8 = read_byte(0x0040, 0x003e);
val8 &= 0x7f;
- write_byte(0x0000, 0x043e, val8);
+ write_byte(0x0040, 0x003e, val8);
// check port 3f4 for accessibility to status bytes
val8 = inb(0x3f4);
SET_AH(0x00); // success
CLEAR_CF(); // success
return;
- }
- else { // if (ah == 0x04)
+ } else { // if (ah == 0x04)
// Verify Diskette Sectors
// ??? should track be new val from return_status[3] ?
CLEAR_CF(); // success
SET_AH(0x00); // success
return;
- }
-
+ }
+ break;
case 0x05: // format diskette track
BX_DEBUG_INT13_FL("floppy f05\n");
SET_AH(1);
set_diskette_ret_status(1);
SET_CF(); // error occurred
- }
+ }
// see if drive exists
if (floppy_drive_exists(drive) == 0) {
set_diskette_ret_status(0x80);
SET_CF(); // error occurred
return;
- }
+ }
// see if media in drive, and type is known
if (floppy_media_known(drive) == 0) {
SET_AL(0); // no sectors read
SET_CF(); // error occurred
return;
- }
}
+ }
// set up DMA controller for transfer
page = (ES >> 12); // upper 4 bits
if ( base_address < base_es ) {
// in case of carry, adjust page by 1
page++;
- }
+ }
base_count = (num_sectors * 4) - 1;
// check for 64K boundary overrun
SET_AL(0); // no sectors read
SET_CF(); // error occurred
return;
- }
+ }
outb(0x000a, 0x06);
outb(0x000c, 0x00); // clear flip-flop
outb(0x000a, 0x02);
// set up floppy controller for transfer
- val8 = read_byte(0x0000, 0x043e);
- val8 &= 0x7f;
- write_byte(0x0000, 0x043e, val8);
- // turn on motor of selected drive, DMA & int enabled, normal operation
- if (drive)
- dor = 0x20;
- else
- dor = 0x10;
- dor |= 0x0c;
- dor |= drive;
- outb(0x03f2, dor);
+ floppy_prepare_controller(drive);
- // reset the disk motor timeout value of INT 08
- write_byte(0x40,0x40, BX_FLOPPY_ON_CNT);
-
- // check port 3f4 for drive readiness
- val8 = inb(0x3f4);
- if ( (val8 & 0xf0) != 0x80 )
- BX_PANIC("int13_diskette:f05: ctrl not ready\n");
-
- // send read-normal-data command (6 bytes) to controller
+ // send format-track command (6 bytes) to controller
outb(0x03f5, 0x4d); // 4d: format track
outb(0x03f5, (head << 2) | drive); // HD DR1 DR2
outb(0x03f5, 2); // 512 byte sector size
ASM_START
sti
ASM_END
+
// wait on 40:3e bit 7 to become 1
- val8 = (read_byte(0x0000, 0x043e) & 0x80);
- while ( val8 == 0 ) {
- val8 = (read_byte(0x0000, 0x043e) & 0x80);
+ do {
+ val8 = read_byte(0x0040, 0x0040);
+ if (val8 == 0) {
+ floppy_reset_controller();
+ SET_AH(0x80); // drive not ready (timeout)
+ set_diskette_ret_status(0x80);
+ SET_CF(); // error occurred
+ return;
}
- val8 = 0; // separate asm from while() loop
- // turn off interrupts
+ val8 = (read_byte(0x0040, 0x003e) & 0x80);
+ } while ( val8 == 0 );
+
+ val8 = 0; // separate asm from while() loop
+ // turn off interrupts
ASM_START
cli
ASM_END
// set 40:3e bit 7 to 0
- val8 = read_byte(0x0000, 0x043e);
+ val8 = read_byte(0x0040, 0x003e);
val8 &= 0x7f;
- write_byte(0x0000, 0x043e, val8);
+ write_byte(0x0040, 0x003e, val8);
// check port 3f4 for accessibility to status bytes
val8 = inb(0x3f4);
if ( (val8 & 0xc0) != 0xc0 )
Bit16u bootseg;
Bit16u bootip;
Bit16u status;
+ Bit16u bootfirst;
- struct ipl_entry e;
+ ipl_entry_t e;
// if BX_ELTORITO_BOOT is not defined, old behavior
// check bit 5 in CMOS reg 0x2d. load either 0x00 or 0x80 into DL
// CMOS reg 0x38 & 0xf0 : 3rd boot device
// boot device codes:
// 0x00 : not defined
- // 0x01 : first floppy
+ // 0x01 : first floppy
// 0x02 : first harddrive
// 0x03 : first cdrom
// 0x04 - 0x0f : PnP expansion ROMs (e.g. Etherboot)
bootdev |= ((inb_cmos(0x38) & 0xf0) << 4);
bootdev >>= 4 * seq_nr;
bootdev &= 0xf;
- if (bootdev == 0) BX_PANIC("No bootable device.\n");
-
+
+ /* Read user selected device */
+ bootfirst = read_word(ebda_seg, IPL_BOOTFIRST_OFFSET);
+ if (bootfirst != 0xFFFF) {
+ bootdev = bootfirst;
+ /* User selected device not set */
+ write_word(ebda_seg, IPL_BOOTFIRST_OFFSET, 0xFFFF);
+ /* Reset boot sequence */
+ write_word(ebda_seg, IPL_SEQUENCE_OFFSET, 0xFFFF);
+ } else if (bootdev == 0) BX_PANIC("No bootable device.\n");
+
/* Translate from CMOS runes to an IPL table offset by subtracting 1 */
bootdev -= 1;
-#else
+#else
if (seq_nr ==2) BX_PANIC("No more boot devices.");
- if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1))
+ if (!!(inb_cmos(0x2d) & 0x20) ^ (seq_nr == 1))
/* Boot from floppy if the bit is set or it's the second boot */
bootdev = 0x00;
- else
+ else
bootdev = 0x01;
#endif
/* Do the loading, and set up vector as a far pointer to the boot
* address, and bootdrv as the boot drive */
- print_boot_device(e.type);
+ print_boot_device(&e);
switch(e.type) {
- case 0x01: /* FDD */
- case 0x02: /* HDD */
+ case IPL_TYPE_FLOPPY: /* FDD */
+ case IPL_TYPE_HARDDISK: /* HDD */
- bootdrv = (e.type == 0x02) ? 0x80 : 0x00;
+ bootdrv = (e.type == IPL_TYPE_HARDDISK) ? 0x80 : 0x00;
bootseg = 0x07c0;
status = 0;
mov dl, _int18_function.bootdrv + 2[bp]
mov ax, _int18_function.bootseg + 2[bp]
mov es, ax ;; segment
- mov bx, #0x0000 ;; offset
+ xor bx, bx ;; offset
mov ah, #0x02 ;; function 2, read diskette sector
mov al, #0x01 ;; read 1 sector
mov ch, #0x00 ;; track 0
pop ax
pop bp
ASM_END
-
+
if (status != 0) {
print_boot_failure(e.type, 1);
return;
/* Always check the signature on a HDD boot sector; on FDD, only do
* the check if the CMOS doesn't tell us to skip it */
- if (e.type != 0x00 || !((inb_cmos(0x38) & 0x01))) {
+ if ((e.type != IPL_TYPE_FLOPPY) || !((inb_cmos(0x38) & 0x01))) {
if (read_word(bootseg,0x1fe) != 0xaa55) {
print_boot_failure(e.type, 0);
return;
break;
#if BX_ELTORITO_BOOT
- case 0x03: /* CD-ROM */
+ case IPL_TYPE_CDROM: /* CD-ROM */
status = cdrom_boot();
// If failure
break;
#endif
- case 0x80: /* Expansion ROM with a Bootstrap Entry Vector (a far pointer) */
+ case IPL_TYPE_BEV: /* Expansion ROM with a Bootstrap Entry Vector (a far pointer) */
bootseg = e.vector >> 16;
bootip = e.vector & 0xffff;
break;
default: return;
}
-
+ /* Debugging info */
+ BX_INFO("Booting from %x:%x\n", bootseg, bootip);
+
/* Jump to the boot vector */
ASM_START
mov bp, sp
+// push cs
+// push #int18_handler
;; Build an iret stack frame that will take us to the boot vector.
;; iret pops ip, then cs, then flags, so push them in the opposite order.
pushf
- mov ax, _int18_function.bootseg + 0[bp]
+ mov ax, _int18_function.bootseg + 0[bp]
push ax
- mov ax, _int18_function.bootip + 0[bp]
+ mov ax, _int18_function.bootip + 0[bp]
push ax
;; Set the magic number in ax and the boot drive in dl.
mov ax, #0xaa55
} else if (regs.u.r8.bl == 0x83) {
BX_INFO("bad PCI vendor ID %04x\n", regs.u.r16.dx);
} else if (regs.u.r8.bl == 0x86) {
- BX_INFO("PCI device %04x:%04x not found\n", regs.u.r16.dx, regs.u.r16.cx);
+ if (regs.u.r8.al == 0x02) {
+ BX_INFO("PCI device %04x:%04x not found at index %d\n", regs.u.r16.dx, regs.u.r16.cx, regs.u.r16.si);
+ } else {
+ BX_INFO("no PCI device with class code 0x%02x%04x found at index %d\n", regs.u.r8.cl, regs.u.r16.dx, regs.u.r16.si);
+ }
}
regs.u.r8.ah = regs.u.r8.bl;
SetCF(iret_addr.flags);
// Done waiting.
Bit16u segment, offset;
- offset = read_word( 0x40, 0x98 );
- segment = read_word( 0x40, 0x9A );
+ segment = read_word( 0x40, 0x98 );
+ offset = read_word( 0x40, 0x9A );
write_byte( 0x40, 0xA0, 0 ); // Turn of status byte.
outb_cmos( 0xB, registerB & 0x37 ); // Clear the Periodic Interrupt.
- write_byte( segment, offset, 0x80 ); // Write to specified flag byte.
+ write_byte(segment, offset, read_byte(segment, offset) | 0x80 ); // Write to specified flag byte.
} else {
// Continue waiting.
time -= 0x3D1;
#endif
int13_disk:
+ ;; int13_harddisk modifies high word of EAX
+ shr eax, #16
+ push ax
call _int13_harddisk
+ pop ax
+ shl eax, #16
int13_out:
pop ds
pop es
popa
- iret
+ iret
;----------
;- INT18h -
xor ax, ax
mov ss, ax
- ;; Get the boot sequence number out of the IPL memory
;; The first time we do this it will have been set to -1 so
;; we will start from device 0.
- mov bx, #IPL_SEG
+ mov ds, ax
+ mov bx, word ptr [0x40E] ;; EBDA segment
mov ds, bx ;; Set segment
mov bx, IPL_SEQUENCE_OFFSET ;; BX is now the sequence number
inc bx ;; ++
mov IPL_SEQUENCE_OFFSET, bx ;; Write it back
- mov ds, ax ;; and reset the segment to zero.
+ mov ds, ax ;; and reset the segment to zero.
;; Call the C code for the next boot device
push bx
+
call _int18_function
;; Boot failed: invoke the boot recovery function...
;- INT19h -
;----------
int19_relocated: ;; Boot function, relocated
+
;;
;; *** Warning: INT 19h resets the whole machine ***
;;
;; boot sequence will start, which is more or less the required behaviour.
;;
;; Reset SP and SS
+
mov ax, #0xfffe
mov sp, ax
xor ax, ax
mov ss, ax
+
call _machine_reset
;----------
;- POST: Floppy Drive -
;----------------------
floppy_drive_post:
- mov ax, #0x0000
+ xor ax, ax
mov ds, ax
mov al, #0x00
mov dx, #0x03f6
out dx, al
- mov ax, #0x0000
+ xor ax, ax
mov ds, ax
mov 0x0474, al /* hard disk status of last operation */
mov 0x0477, al /* hard disk port offset (XT only ???) */
SET_INT_VECTOR(0x76, #0xF000, #int76_handler)
;; INT 41h: hard disk 0 configuration pointer
;; INT 46h: hard disk 1 configuration pointer
- SET_INT_VECTOR(0x41, #EBDA_SEG, #0x003D)
- SET_INT_VECTOR(0x46, #EBDA_SEG, #0x004D)
+ SET_INT_VECTOR(0x41, word ptr [0x40E], #0x003D) /* EBDA:003D */
+ SET_INT_VECTOR(0x46, word ptr [0x40E], #0x004D) /* EBDA:004D */
;; move disk geometry data from CMOS to EBDA disk parameter table(s)
mov al, #0x12
;; 22 landing zone high D
;; 23 sectors/track E
- mov ax, #EBDA_SEG
+ xor ax, ax
+ mov ds, ax
+ mov ax, word ptr [0x40E] ;; EBDA segment
mov ds, ax
;;; Filling EBDA table for hard disk 0.
;; 0x2b landing zone high D
;; 0x2c sectors/track E
;;; Fill EBDA table for hard disk 1.
- mov ax, #EBDA_SEG
+ xor ax, ax
+ mov ds, ax
+ mov ax, word ptr [0x40E] ;; EBDA segment
mov ds, ax
mov al, #0x28
out #0x70, al
;--------------------
; relocated here because the primary POST area isnt big enough.
eoi_jmp_post:
- call eoi_both_pics
+ mov al, #0x20
+ out #0xA0, al ;; slave PIC EOI
+ mov al, #0x20
+ out #0x20, al ;; master PIC EOI
+jmp_post_0x467:
xor ax, ax
mov ds, ax
jmp far ptr [0x467]
+iret_post_0x467:
+ xor ax, ax
+ mov ds, ax
+
+ mov sp, [0x467]
+ mov ss, [0x469]
+ iret
+
+retf_post_0x467:
+ xor ax, ax
+ mov ds, ax
+
+ mov sp, [0x467]
+ mov ss, [0x469]
+ retf
+
+s3_post:
+#if BX_ROMBIOS32
+ call rombios32_init
+#endif
+ call _s3_resume
+ mov bl, #0x00
+ and ax, ax
+ jz normal_post
+ call _s3_resume_panic
;--------------------
eoi_both_pics:
#endif
-ASM_END
#include "32bitgateway.c"
+ASM_END
+#include "tcgbios.c"
ASM_START
;--------------------
.align 16
bios32_entry_point:
- pushf
- cmp eax, #0x49435024
+ pushfd
+ cmp eax, #0x49435024 ;; "$PCI"
jne unknown_service
mov eax, #0x80000000
mov dx, #0x0cf8
out dx, eax
mov dx, #0x0cfc
in eax, dx
- cmp eax, #0x12378086
+#ifdef PCI_FIXED_HOST_BRIDGE
+ cmp eax, #PCI_FIXED_HOST_BRIDGE
jne unknown_service
+#else
+ ;; say ok if a device is present
+ cmp eax, #0xffffffff
+ je unknown_service
+#endif
mov ebx, #0x000f0000
mov ecx, #0
mov edx, #pcibios_protected
unknown_service:
mov al, #0x80
bios32_end:
- popf
+#ifdef BX_QEMU
+ and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
+#endif
+ popfd
retf
.align 16
pcibios_protected:
- pushf
+ pushfd
cli
push esi
push edi
jne pci_pro_f02
mov bx, #0x0210
mov cx, #0
- mov edx, #0x20494350
+ mov edx, #0x20494350 ;; "PCI "
mov al, #0x01
jmp pci_pro_ok
pci_pro_f02: ;; find pci device
cmp al, #0x02
- jne pci_pro_f08
+ jne pci_pro_f03
shl ecx, #16
mov cx, dx
- mov bx, #0x0000
+ xor bx, bx
mov di, #0x00
pci_pro_devloop:
call pci_pro_select_reg
jne pci_pro_devloop
mov ah, #0x86
jmp pci_pro_fail
+pci_pro_f03: ;; find class code
+ cmp al, #0x03
+ jne pci_pro_f08
+ xor bx, bx
+ mov di, #0x08
+pci_pro_devloop2:
+ call pci_pro_select_reg
+ mov dx, #0x0cfc
+ in eax, dx
+ shr eax, #8
+ cmp eax, ecx
+ jne pci_pro_nextdev2
+ cmp si, #0
+ je pci_pro_ok
+ dec si
+pci_pro_nextdev2:
+ inc bx
+ cmp bx, #0x0100
+ jne pci_pro_devloop2
+ mov ah, #0x86
+ jmp pci_pro_fail
pci_pro_f08: ;; read configuration byte
cmp al, #0x08
jne pci_pro_f09
pci_pro_fail:
pop edi
pop esi
- sti
- popf
+#ifdef BX_QEMU
+ and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
+#endif
+ popfd
stc
retf
pci_pro_ok:
xor ah, ah
pop edi
pop esi
- sti
- popf
+#ifdef BX_QEMU
+ and dword ptr[esp+8],0xfffffffc ;; reset CS.RPL for kqemu
+#endif
+ popfd
clc
retf
out dx, eax
mov dx, #0x0cfc
in eax, dx
- cmp eax, #0x12378086
+#ifdef PCI_FIXED_HOST_BRIDGE
+ cmp eax, #PCI_FIXED_HOST_BRIDGE
je pci_present
+#else
+ ;; say ok if a device is present
+ cmp eax, #0xffffffff
+ jne pci_present
+#endif
pop dx
pop eax
mov ah, #0xff
mov ax, #0x0001
mov bx, #0x0210
mov cx, #0
- mov edx, #0x20494350
+ mov edx, #0x20494350 ;; "PCI "
mov edi, #0xf0000
mov di, #pcibios_protected
clc
push esi
push edi
cmp al, #0x02
- jne pci_real_f08
+ jne pci_real_f03
shl ecx, #16
mov cx, dx
- mov bx, #0x0000
+ xor bx, bx
mov di, #0x00
pci_real_devloop:
call pci_real_select_reg
jne pci_real_devloop
mov dx, cx
shr ecx, #16
- mov ah, #0x86
+ mov ax, #0x8602
+ jmp pci_real_fail
+pci_real_f03: ;; find class code
+ cmp al, #0x03
+ jne pci_real_f08
+ xor bx, bx
+ mov di, #0x08
+pci_real_devloop2:
+ call pci_real_select_reg
+ mov dx, #0x0cfc
+ in eax, dx
+ shr eax, #8
+ cmp eax, ecx
+ jne pci_real_nextdev2
+ cmp si, #0
+ je pci_real_ok
+ dec si
+pci_real_nextdev2:
+ inc bx
+ cmp bx, #0x0100
+ jne pci_real_devloop2
+ mov dx, cx
+ shr ecx, #16
+ mov ax, #0x8603
jmp pci_real_fail
pci_real_f08: ;; read configuration byte
cmp al, #0x08
jmp pci_real_ok
pci_real_f0d: ;; write configuration dword
cmp al, #0x0d
- jne pci_real_unknown
+ jne pci_real_f0e
call pci_real_select_reg
push dx
mov dx, #0x0cfc
out dx, eax
pop dx
jmp pci_real_ok
+pci_real_f0e: ;; get irq routing options
+ cmp al, #0x0e
+ jne pci_real_unknown
+ SEG ES
+ cmp word ptr [di], #pci_routing_table_structure_end - pci_routing_table_structure_start
+ jb pci_real_too_small
+ SEG ES
+ mov word ptr [di], #pci_routing_table_structure_end - pci_routing_table_structure_start
+ pushf
+ push ds
+ push es
+ push cx
+ push si
+ push di
+ cld
+ mov si, #pci_routing_table_structure_start
+ push cs
+ pop ds
+ SEG ES
+ mov cx, [di+2]
+ SEG ES
+ mov es, [di+4]
+ mov di, cx
+ mov cx, #pci_routing_table_structure_end - pci_routing_table_structure_start
+ rep
+ movsb
+ pop di
+ pop si
+ pop cx
+ pop es
+ pop ds
+ popf
+ mov bx, #(1 << 9) | (1 << 11) ;; irq 9 and 11 are used
+ jmp pci_real_ok
+pci_real_too_small:
+ SEG ES
+ mov word ptr [di], #pci_routing_table_structure_end - pci_routing_table_structure_start
+ mov ah, #0x89
+ jmp pci_real_fail
+
pci_real_unknown:
mov ah, #0x81
pci_real_fail:
out dx, eax
pop dx
ret
-
+
.align 16
pci_routing_table_structure:
db 0x24, 0x50, 0x49, 0x52 ;; "$PIR" signature
dw 32 + (6 * 16) ;; table size
db 0 ;; PCI interrupt router bus
db 0x08 ;; PCI interrupt router DevFunc
- dw 0x0000 ;; PCI exclusive IRQs
+ dw 0x0000 ;; PCI exclusive IRQs
dw 0x8086 ;; compatible PCI interrupt router vendor ID
- dw 0x7000 ;; compatible PCI interrupt router device ID
+ dw 0x122e ;; compatible PCI interrupt router device ID
dw 0,0 ;; Miniport data
db 0,0,0,0,0,0,0,0,0,0,0 ;; reserved
- db 0x07 ;; checksum
+ db 0x37 ;; checksum
+pci_routing_table_structure_start:
;; first slot entry PCI-to-ISA (embedded)
db 0 ;; pci bus number
db 0x08 ;; pci device number (bit 7-3)
db 0x61 ;; link value INTA#: pointer into PCI2ISA config space
- dw 0x0c20 ;; IRQ bitmap INTA#
+ dw 0x0c20 ;; IRQ bitmap INTA#
db 0x62 ;; link value INTB#
- dw 0x0c20 ;; IRQ bitmap INTB#
+ dw 0x0c20 ;; IRQ bitmap INTB#
db 0x63 ;; link value INTC#
- dw 0x0c20 ;; IRQ bitmap INTC#
+ dw 0x0c20 ;; IRQ bitmap INTC#
db 0x60 ;; link value INTD#
dw 0x0c20 ;; IRQ bitmap INTD#
db 0 ;; physical slot (0 = embedded)
db 0 ;; pci bus number
db 0x10 ;; pci device number (bit 7-3)
db 0x62 ;; link value INTA#
- dw 0x0c20 ;; IRQ bitmap INTA#
+ dw 0x0c20 ;; IRQ bitmap INTA#
db 0x63 ;; link value INTB#
- dw 0x0c20 ;; IRQ bitmap INTB#
+ dw 0x0c20 ;; IRQ bitmap INTB#
db 0x60 ;; link value INTC#
- dw 0x0c20 ;; IRQ bitmap INTC#
+ dw 0x0c20 ;; IRQ bitmap INTC#
db 0x61 ;; link value INTD#
dw 0x0c20 ;; IRQ bitmap INTD#
db 1 ;; physical slot (0 = embedded)
db 0 ;; pci bus number
db 0x18 ;; pci device number (bit 7-3)
db 0x63 ;; link value INTA#
- dw 0x0c20 ;; IRQ bitmap INTA#
+ dw 0x0c20 ;; IRQ bitmap INTA#
db 0x60 ;; link value INTB#
- dw 0x0c20 ;; IRQ bitmap INTB#
+ dw 0x0c20 ;; IRQ bitmap INTB#
db 0x61 ;; link value INTC#
- dw 0x0c20 ;; IRQ bitmap INTC#
+ dw 0x0c20 ;; IRQ bitmap INTC#
db 0x62 ;; link value INTD#
dw 0x0c20 ;; IRQ bitmap INTD#
db 2 ;; physical slot (0 = embedded)
db 0 ;; pci bus number
db 0x20 ;; pci device number (bit 7-3)
db 0x60 ;; link value INTA#
- dw 0x0c20 ;; IRQ bitmap INTA#
+ dw 0x0c20 ;; IRQ bitmap INTA#
db 0x61 ;; link value INTB#
- dw 0x0c20 ;; IRQ bitmap INTB#
+ dw 0x0c20 ;; IRQ bitmap INTB#
db 0x62 ;; link value INTC#
- dw 0x0c20 ;; IRQ bitmap INTC#
+ dw 0x0c20 ;; IRQ bitmap INTC#
db 0x63 ;; link value INTD#
dw 0x0c20 ;; IRQ bitmap INTD#
db 3 ;; physical slot (0 = embedded)
db 0 ;; pci bus number
db 0x28 ;; pci device number (bit 7-3)
db 0x61 ;; link value INTA#
- dw 0x0c20 ;; IRQ bitmap INTA#
+ dw 0x0c20 ;; IRQ bitmap INTA#
db 0x62 ;; link value INTB#
- dw 0x0c20 ;; IRQ bitmap INTB#
+ dw 0x0c20 ;; IRQ bitmap INTB#
db 0x63 ;; link value INTC#
- dw 0x0c20 ;; IRQ bitmap INTC#
+ dw 0x0c20 ;; IRQ bitmap INTC#
db 0x60 ;; link value INTD#
dw 0x0c20 ;; IRQ bitmap INTD#
db 4 ;; physical slot (0 = embedded)
db 0 ;; pci bus number
db 0x30 ;; pci device number (bit 7-3)
db 0x62 ;; link value INTA#
- dw 0x0c20 ;; IRQ bitmap INTA#
+ dw 0x0c20 ;; IRQ bitmap INTA#
db 0x63 ;; link value INTB#
- dw 0x0c20 ;; IRQ bitmap INTB#
+ dw 0x0c20 ;; IRQ bitmap INTB#
db 0x60 ;; link value INTC#
- dw 0x0c20 ;; IRQ bitmap INTC#
+ dw 0x0c20 ;; IRQ bitmap INTC#
db 0x61 ;; link value INTD#
dw 0x0c20 ;; IRQ bitmap INTD#
db 5 ;; physical slot (0 = embedded)
db 0 ;; reserved
+pci_routing_table_structure_end:
+
+#if !BX_ROMBIOS32 && !defined(HVMASSIST)
+pci_irq_list:
+ db 11, 10, 9, 5;
+
+pcibios_init_sel_reg:
+ push eax
+ mov eax, #0x800000
+ mov ax, bx
+ shl eax, #8
+ and dl, #0xfc
+ or al, dl
+ mov dx, #0x0cf8
+ out dx, eax
+ pop eax
+ ret
+
+pcibios_init_iomem_bases:
+ push bp
+ mov bp, sp
+ mov eax, #0xe0000000 ;; base for memory init
+ push eax
+ mov ax, #0xc000 ;; base for i/o init
+ push ax
+ mov ax, #0x0010 ;; start at base address #0
+ push ax
+ mov bx, #0x0008
+pci_init_io_loop1:
+ mov dl, #0x00
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ in ax, dx
+ cmp ax, #0xffff
+ jz next_pci_dev
+ mov dl, #0x04 ;; disable i/o and memory space access
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ in al, dx
+ and al, #0xfc
+ out dx, al
+pci_init_io_loop2:
+ mov dl, [bp-8]
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ in eax, dx
+ test al, #0x01
+ jnz init_io_base
+ mov ecx, eax
+ mov eax, #0xffffffff
+ out dx, eax
+ in eax, dx
+ cmp eax, ecx
+ je next_pci_base
+ xor eax, #0xffffffff
+ mov ecx, eax
+ mov eax, [bp-4]
+ out dx, eax
+ add eax, ecx ;; calculate next free mem base
+ add eax, #0x01000000
+ and eax, #0xff000000
+ mov [bp-4], eax
+ jmp next_pci_base
+init_io_base:
+ mov cx, ax
+ mov ax, #0xffff
+ out dx, ax
+ in ax, dx
+ cmp ax, cx
+ je next_pci_base
+ xor ax, #0xfffe
+ mov cx, ax
+ mov ax, [bp-6]
+ out dx, ax
+ add ax, cx ;; calculate next free i/o base
+ add ax, #0x0100
+ and ax, #0xff00
+ mov [bp-6], ax
+next_pci_base:
+ mov al, [bp-8]
+ add al, #0x04
+ cmp al, #0x28
+ je enable_iomem_space
+ mov byte ptr[bp-8], al
+ jmp pci_init_io_loop2
+enable_iomem_space:
+ mov dl, #0x04 ;; enable i/o and memory space access if available
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ in al, dx
+ or al, #0x07
+ out dx, al
+next_pci_dev:
+ mov byte ptr[bp-8], #0x10
+ inc bx
+ cmp bx, #0x0100
+ jne pci_init_io_loop1
+ mov sp, bp
+ pop bp
+ ret
+
+pcibios_init_set_elcr:
+ push ax
+ push cx
+ mov dx, #0x04d0
+ test al, #0x08
+ jz is_master_pic
+ inc dx
+ and al, #0x07
+is_master_pic:
+ mov cl, al
+ mov bl, #0x01
+ shl bl, cl
+ in al, dx
+ or al, bl
+ out dx, al
+ pop cx
+ pop ax
+ ret
+
+pcibios_init_irqs:
+ push ds
+ push bp
+ mov ax, #0xf000
+ mov ds, ax
+ mov dx, #0x04d0 ;; reset ELCR1 + ELCR2
+ mov al, #0x00
+ out dx, al
+ inc dx
+ out dx, al
+ mov si, #pci_routing_table_structure
+ mov bh, [si+8]
+ mov bl, [si+9]
+ mov dl, #0x00
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ in eax, dx
+ cmp eax, [si+12] ;; check irq router
+ jne pci_init_end
+ mov dl, [si+34]
+ call pcibios_init_sel_reg
+ push bx ;; save irq router bus + devfunc
+ mov dx, #0x0cfc
+ mov ax, #0x8080
+ out dx, ax ;; reset PIRQ route control
+ add dx, #2
+ out dx, ax
+ mov ax, [si+6]
+ sub ax, #0x20
+ shr ax, #4
+ mov cx, ax
+ add si, #0x20 ;; set pointer to 1st entry
+ mov bp, sp
+ mov ax, #pci_irq_list
+ push ax
+ xor ax, ax
+ push ax
+pci_init_irq_loop1:
+ mov bh, [si]
+ mov bl, [si+1]
+pci_init_irq_loop2:
+ mov dl, #0x00
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ in ax, dx
+ cmp ax, #0xffff
+ jnz pci_test_int_pin
+ test bl, #0x07
+ jz next_pir_entry
+ jmp next_pci_func
+pci_test_int_pin:
+ mov dl, #0x3c
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfd
+ in al, dx
+ and al, #0x07
+ jz next_pci_func
+ dec al ;; determine pirq reg
+ mov dl, #0x03
+ mul al, dl
+ add al, #0x02
+ xor ah, ah
+ mov bx, ax
+ mov al, [si+bx]
+ mov dl, al
+ mov bx, [bp]
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ and al, #0x03
+ add dl, al
+ in al, dx
+ cmp al, #0x80
+ jb pirq_found
+ mov bx, [bp-2] ;; pci irq list pointer
+ mov al, [bx]
+ out dx, al
+ inc bx
+ mov [bp-2], bx
+ call pcibios_init_set_elcr
+pirq_found:
+ mov bh, [si]
+ mov bl, [si+1]
+ add bl, [bp-3] ;; pci function number
+ mov dl, #0x3c
+ call pcibios_init_sel_reg
+ mov dx, #0x0cfc
+ out dx, al
+next_pci_func:
+ inc byte ptr[bp-3]
+ inc bl
+ test bl, #0x07
+ jnz pci_init_irq_loop2
+next_pir_entry:
+ add si, #0x10
+ mov byte ptr[bp-3], #0x00
+ loop pci_init_irq_loop1
+ mov sp, bp
+ pop bx
+pci_init_end:
+ pop bp
+ pop ds
+ ret
+#endif // !BX_ROMBIOS32
#endif // BX_PCIBIOS
+#if BX_ROMBIOS32
+rombios32_init:
+ ;; save a20 and enable it
+ in al, 0x92
+ push ax
+ or al, #0x02
+ out 0x92, al
+
+ ;; save SS:SP to the BDA
+ xor ax, ax
+ mov ds, ax
+ mov 0x0469, ss
+ mov 0x0467, sp
+
+ SEG CS
+ lidt [pmode_IDT_info]
+ SEG CS
+ lgdt [rombios32_gdt_48]
+ ;; set PE bit in CR0
+ mov eax, cr0
+ or al, #0x01
+ mov cr0, eax
+ ;; start protected mode code: ljmpl 0x10:rombios32_init1
+ db 0x66, 0xea
+ dw rombios32_05
+ dw 0x000f ;; high 16 bit address
+ dw 0x0010
+
+use32 386
+rombios32_05:
+ ;; init data segments
+ mov eax, #0x18
+ mov ds, ax
+ mov es, ax
+ mov ss, ax
+ xor eax, eax
+ mov fs, ax
+ mov gs, ax
+ cld
+
+ ;; init the stack pointer to point below EBDA
+ mov ax, [0x040e]
+ shl eax, #4
+ mov esp, #-0x10
+ add esp, eax
+
+ ;; pass pointer to s3_resume_flag and s3_resume_vector to rombios32
+ push #0x04b0
+ push #0x04b2
+
+ ;; call rombios32 code
+ mov eax, #0x000e0000
+ call eax
+
+ ;; return to 16 bit protected mode first
+ db 0xea
+ dd rombios32_10
+ dw 0x20
+
+use16 386
+rombios32_10:
+ ;; restore data segment limits to 0xffff
+ mov ax, #0x28
+ mov ds, ax
+ mov es, ax
+ mov ss, ax
+ mov fs, ax
+ mov gs, ax
+
+ ;; reset PE bit in CR0
+ mov eax, cr0
+ and al, #0xFE
+ mov cr0, eax
+
+ ;; far jump to flush CPU queue after transition to real mode
+ JMP_AP(0xf000, rombios32_real_mode)
+
+rombios32_real_mode:
+ ;; restore IDT to normal real-mode defaults
+ SEG CS
+ lidt [rmode_IDT_info]
+
+ xor ax, ax
+ mov ds, ax
+ mov es, ax
+ mov fs, ax
+ mov gs, ax
+
+ ;; restore SS:SP from the BDA
+ mov ss, 0x0469
+ xor esp, esp
+ mov sp, 0x0467
+ ;; restore a20
+ pop ax
+ out 0x92, al
+ ret
+
+rombios32_gdt_48:
+ dw 0x30
+ dw rombios32_gdt
+ dw 0x000f
+
+rombios32_gdt:
+ dw 0, 0, 0, 0
+ dw 0, 0, 0, 0
+ dw 0xffff, 0, 0x9b00, 0x00cf ; 32 bit flat code segment (0x10)
+ dw 0xffff, 0, 0x9300, 0x00cf ; 32 bit flat data segment (0x18)
+ dw 0xffff, 0, 0x9b0f, 0x0000 ; 16 bit code segment base=0xf0000 limit=0xffff
+ dw 0xffff, 0, 0x9300, 0x0000 ; 16 bit data segment base=0x0 limit=0xffff
+#endif // BX_ROMBIOS32
+
+#if BX_PMM
+; according to POST Memory Manager Specification Version 1.01
+.align 16
+pmm_structure:
+ db 0x24,0x50,0x4d,0x4d ;; "$PMM" signature
+ db 0x01 ;; revision
+ db 16 ;; length
+ db (-((pmm_entry_point>>8)+pmm_entry_point+0x20f))&0xff;; checksum
+ dw pmm_entry_point,0xf000 ;; far call entrypoint
+ db 0,0,0,0,0 ;; reserved
+
+pmm_entry_point:
+ pushf
+ pushad
+; Calculate protected-mode address of PMM function args
+ xor eax, eax
+ mov ax, sp
+ xor ebx, ebx
+ mov bx, ss
+ shl ebx, 4
+ lea ebx, [eax+ebx+38] ;; ebx=(ss<<4)+sp+4(far call)+2(pushf)+32(pushad)
+ push ebx
+;
+; Stack layout at this point:
+;
+; : +0x0 +0x2 +0x4 +0x6 +0x8 +0xa +0xc +0xe
+; -----------------------------------------------------------------------
+; sp : [&arg1 ][edi ][esi ][ebp ]
+; sp+0x10: [esp ][ebx ][edx ][ecx ]
+; sp+0x20: [eax ][flags ][ip ][cs ][arg1 ][arg2, ...
+;
+ call _pmm
+ mov bx, sp
+SEG SS
+ mov [bx+0x20], ax
+SEG SS
+ mov [bx+0x18], dx
+ pop ebx
+ popad
+ popf
+ retf
+#endif // BX_PMM
+
; parallel port detection: base address in DX, index in BX, timeout in CL
detect_parport:
push dx
ret
-;; We need a copy of this string, but we are not actually a PnP BIOS,
+;; We need a copy of this string, but we are not actually a PnP BIOS,
;; so make sure it is *not* aligned, so OSes will not see it if they scan.
.align 16
db 0
pnp_string:
.ascii "$PnP"
-
rom_scan:
;; Scan for existence of valid expansion ROMS.
;; Video ROM: from 0xC0000..0xC7FFF in 2k increments
- ;; General ROM: from 0xC8000..0xDFFFF in 2k increments
- ;; System ROM: only 0xE0000
+ ;; General ROM: from 0xC8000..0xE9FFF in 2k increments
+ ;; System ROM: only 0xF0000
;;
;; Header:
;; Offset Value
;; 3 ROM initialization entry point (FAR CALL)
#if BX_TCGBIOS
+ push ax
call _tcpa_start_option_rom_scan /* specs: 3.2.3.3 + 10.4.3 */
+ pop ax
#endif
- mov cx, #0xc000
+
rom_scan_loop:
+ push ax ;; Save AX
mov ds, cx
mov ax, #0x0004 ;; start with increment of 4 (512-byte) blocks = 2k
cmp [0], #0xAA55 ;; look for signature
push ecx ;; segment where option rom is located at
call _tcpa_option_rom /* specs: 3.2.3.3 */
add sp, #4 ;; pop segment
- pop ecx ;; original ecx
+ pop ecx ;; original ecx
pop ds
pop ax
#endif
- xor bx, bx ;; Restore DS back to 0000:
- mov ds, bx
push ax ;; Save AX
push di ;; Save DI
;; Push addr of ROM entry point
push cx ;; Push seg
push #0x0003 ;; Push offset
- ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.
+ ;; Get the BDF into ax before invoking the option ROM
+ mov bl, [2]
+ mov al, bl
+ shr al, #7
+ cmp al, #1
+ jne fetch_bdf
+ mov ax, ds ;; Increment the DS since rom size larger than an segment
+ add ax, #0x1000
+ mov ds, ax
+fetch_bdf:
+ shl bx, #9
+ xor ax, ax
+ mov al, [bx]
+
+ ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.
;; That should stop it grabbing INT 19h; we will use its BEV instead.
- mov ax, #0xf000
- mov es, ax
- lea di, pnp_string
+ mov bx, #0xf000
+ mov es, bx
+ lea di, pnp_string
+
+ xor bx, bx ;; Restore DS back to 0000:
+ mov ds, bx
mov bp, sp ;; Call ROM init routine using seg:off on stack
db 0xff ;; call_far ss:[bp+0]
add sp, #2 ;; Pop offset value
pop cx ;; Pop seg value (restore CX)
- ;; Look at the ROM's PnP Expansion header. Properly, we're supposed
- ;; to init all the ROMs and then go back and build an IPL table of
+ ;; Look at the ROM's PnP Expansion header. Properly, we're supposed
+ ;; to init all the ROMs and then go back and build an IPL table of
;; all the bootable devices, but we can get away with one pass.
mov ds, cx ;; ROM base
mov bx, 0x001a ;; 0x1A is the offset into ROM header that contains...
cmp ax, #0x5024 ;; we look for signature "$PnP"
jne no_bev
mov ax, 2[bx]
- cmp ax, #0x506e
+ cmp ax, #0x506e
jne no_bev
+
+ mov ax, 0x16[bx] ;; 0x16 is the offset of Boot Connection Vector
+ cmp ax, #0x0000
+ je no_bcv
+
+ ;; Option ROM has BCV. Run it now.
+ push cx ;; Push seg
+ push ax ;; Push offset
+
+ ;; Point ES:DI at "$PnP", which tells the ROM that we are a PnP BIOS.
+ mov bx, #0xf000
+ mov es, bx
+ lea di, pnp_string
+ /* jump to BCV function entry pointer */
+ mov bp, sp ;; Call ROM BCV routine using seg:off on stack
+ db 0xff ;; call_far ss:[bp+0]
+ db 0x5e
+ db 0
+ cli ;; In case expansion ROM BIOS turns IF on
+ add sp, #2 ;; Pop offset value
+ pop cx ;; Pop seg value (restore CX)
+ jmp no_bev
+
+no_bcv:
mov ax, 0x1a[bx] ;; 0x1A is also the offset into the expansion header of...
cmp ax, #0x0000 ;; the Bootstrap Entry Vector, or zero if there is none.
je no_bev
- ;; Found a device that thinks it can boot the system. Record its BEV.
- mov bx, #IPL_SEG ;; Go to the segment where the IPL table lives
+ ;; Found a device that thinks it can boot the system. Record its BEV and product name string.
+ mov di, 0x10[bx] ;; Pointer to the product name string or zero if none
+ xor bx, bx
mov ds, bx
+ mov bx, word ptr [0x40E] ;; EBDA segment
+ mov ds, bx ;; Go to the segment where the IPL table lives
mov bx, IPL_COUNT_OFFSET ;; Read the number of entries so far
cmp bx, #IPL_TABLE_ENTRIES
je no_bev ;; Get out if the table is full
shl bx, #0x4 ;; Turn count into offset (entries are 16 bytes)
- mov 0[bx], #0x80 ;; This entry is a BEV device
- mov 6[bx], cx ;; Build a far pointer from the segment...
- mov 4[bx], ax ;; and the offset
+ mov IPL_TABLE_OFFSET+0[bx], #IPL_TYPE_BEV ;; This entry is a BEV device
+ mov IPL_TABLE_OFFSET+6[bx], cx ;; Build a far pointer from the segment...
+ mov IPL_TABLE_OFFSET+4[bx], ax ;; and the offset
+ cmp di, #0x0000
+ je no_prod_str
+ mov 0xA[bx], cx ;; Build a far pointer from the segment...
+ mov 8[bx], di ;; and the offset
+no_prod_str:
shr bx, #0x4 ;; Turn the offset back into a count
inc bx ;; We have one more entry now
mov IPL_COUNT_OFFSET, bx ;; Remember that.
shl ax, #5 ;; convert 512-bytes blocks to 16-byte increments
;; because the segment selector is shifted left 4 bits.
add cx, ax
- cmp cx, #0xe000
+ pop ax ;; Restore AX
+ cmp cx, ax
jbe rom_scan_loop
xor ax, ax ;; Restore DS back to 0000:
#endif
-;; for 'C' strings and other data, insert them here with
-;; a the following hack:
-;; DATA_SEG_DEFS_HERE
+post_init_pic:
+ mov al, #0x11 ; send initialisation commands
+ out 0x20, al
+ out 0xa0, al
+ mov al, #0x08
+ out 0x21, al
+ mov al, #0x70
+ out 0xa1, al
+ mov al, #0x04
+ out 0x21, al
+ mov al, #0x02
+ out 0xa1, al
+ mov al, #0x01
+ out 0x21, al
+ out 0xa1, al
+ mov al, #0xb8
+ out 0x21, AL ;master pic: unmask IRQ 0, 1, 2, 6
+#if BX_USE_PS2_MOUSE
+ mov al, #0x8f
+#else
+ mov al, #0x9f
+#endif
+ out 0xa1, AL ;slave pic: unmask IRQ 12, 13, 14
+ ret
+;; the following area can be used to write dynamically generated tables
+ .align 16
+bios_table_area_start:
+ dd 0xaafb4442
+ dd bios_table_area_end - bios_table_area_start - 8;
;--------
;- POST -
;; Examine CMOS shutdown status.
mov al, bl
- mov dx, #EBDA_SEG
- mov ds, dx
- mov [EBDA_CMOS_SHUTDOWN_STATUS_OFFSET], AL
+
+ ;; 0x00, 0x09, 0x0D+ = normal startup
+ cmp AL, #0x00
+ jz normal_post
+ cmp AL, #0x0d
+ jae normal_post
+ cmp AL, #0x09
+ je normal_post
+
+ ;; 0x05 = eoi + jmp via [0x40:0x67] jump
+ cmp al, #0x05
+ je eoi_jmp_post
+
+ ;; 0x0A = jmp via [0x40:0x67] jump
+ cmp al, #0x0a
+ je jmp_post_0x467
+
+ ;; 0x0B = iret via [0x40:0x67]
+ cmp al, #0x0b
+ je iret_post_0x467
+
+ ;; 0x0C = retf via [0x40:0x67]
+ cmp al, #0x0c
+ je retf_post_0x467
+
+ ;; Examine CMOS shutdown status.
+ ;; 0x01,0x02,0x03,0x04,0x06,0x07,0x08 = Unimplemented shutdown status.
+ push bx
+ call _shutdown_status_panic
+
+#if 0
+ HALT(__LINE__)
+ ;
+ ;#if 0
+ ; 0xb0, 0x20, /* mov al, #0x20 */
+ ; 0xe6, 0x20, /* out 0x20, al ;send EOI to PIC */
+ ;#endif
+ ;
+ pop es
+ pop ds
+ popa
+ iret
+#endif
+
+normal_post:
+ ; case 0: normal startup
cli
mov ax, #0xfffe
mov sp, ax
- mov ax, #0x0000
+ xor ax, ax
mov ds, ax
mov ss, ax
+ ;; Save shutdown status
+ mov 0x04b0, bl
+
+ cmp bl, #0xfe
+ jz s3_post
+
;; zero out BIOS data area (40:00..40:ff)
mov es, ax
mov cx, #0x0080 ;; 128 words
call _log_bios_start
;; set all interrupts to default handler
- mov bx, #0x0000 ;; offset index
+ xor bx, bx ;; offset index
mov cx, #0x0100 ;; counter (256 interrupts)
mov ax, #dummy_iret_handler
mov dx, #0xF000
post_default_ints:
mov [bx], ax
- inc bx
- inc bx
+ add bx, #2
mov [bx], dx
- inc bx
- inc bx
+ add bx, #2
loop post_default_ints
;; set vector 0x79 to zero
mov ax, #BASE_MEM_IN_K
mov 0x0413, ax
-
;; Manufacturing Test 40:12
;; zerod out above
SET_INT_VECTOR(0x10, #0xF000, #int10_handler)
;; PIC
- mov al, #0x11 ; send initialisation commands
- out 0x20, al
- out 0xa0, al
- mov al, #0x08
- out 0x21, al
- mov al, #0x70
- out 0xa1, al
- mov al, #0x04
- out 0x21, al
- mov al, #0x02
- out 0xa1, al
- mov al, #0x01
- out 0x21, al
- out 0xa1, al
- mov al, #0xb8
- out 0x21, AL ;master pic: unmask IRQ 0, 1, 2, 6
-#if BX_USE_PS2_MOUSE
- mov al, #0x8f
-#else
- mov al, #0x9f
-#endif
- out 0xa1, AL ;slave pic: unmask IRQ 12, 13, 14
-
-#ifdef HVMASSIST
- call _enable_rom_write_access
- call _clobber_entry_point
- call _copy_e820_table
- call smbios_init
- call _disable_rom_write_access
-#endif
-
- call _init_boot_vectors
+ call post_init_pic
+ mov cx, #0xc000 ;; init vga bios
+ mov ax, #0xc780
call rom_scan
- call _print_bios_banner
+ call _print_bios_banner
+
+#if BX_ROMBIOS32
+ call rombios32_init
+#else
+#if BX_PCIBIOS && !defined(HVMASSIST)
+ call pcibios_init_iomem_bases
+ call pcibios_init_irqs
+#endif //BX_PCIBIOS
+#endif
;;
;; Floppy setup
;;
call floppy_drive_post
-#if BX_USE_ATADRV
-
;;
;; Hard Drive setup
;;
call hard_drive_post
+#if BX_USE_ATADRV
+
;;
;; ATA/ATAPI driver setup
;;
call _ata_init
call _ata_detect
;;
-#else // BX_USE_ATADRV
-
- ;;
- ;; Hard Drive setup
- ;;
- call hard_drive_post
#endif // BX_USE_ATADRV
;;
#endif // BX_ELTORITO_BOOT
- call _s3_resume
+#ifdef HVMASSIST
+ call _enable_rom_write_access
+ call _clobber_entry_point
+ call _fixup_base_mem_in_k
+ call smbios_init
+#endif
+
+ call _init_boot_vectors
+
+ mov cx, #(OPTIONROM_PHYSICAL_ADDRESS >> 4) ;; init option roms
+ mov ax, #(OPTIONROM_PHYSICAL_END >> 4)
+ call rom_scan
+
+#ifdef HVMASSIST
+ call _disable_rom_write_access
+#endif
+
+#if BX_ELTORITO_BOOT
call _interactive_bootkey
+#endif // BX_ELTORITO_BOOT
#if BX_TCGBIOS
call tcpa_post_part2
#endif
+ sti ;; enable interrupts
;; Start the boot sequence. See the comments in int19_relocated
;; for why we use INT 18h instead of INT 19h here.
int #0x18
iret
int75_handler:
- out 0xf0, al // clear irq13
+ out 0xf0, al // clear irq13
call eoi_both_pics // clear interrupt
int 2 // legacy nmi call
iret
int14_handler:
push ds
pusha
- mov ax, #0x0000
+ xor ax, ax
mov ds, ax
call _int14_function
popa
jz int09_finish
in al, #0x60 ;;read key from keyboard controller
- //test al, #0x80 ;;look for key release
- //jnz int09_process_key ;; dont pass releases to intercept?
-
- ;; check for extended key
- cmp al, #0xe0
- jne int09_call_int15_4f
-
- push ds
- xor ax, ax
- mov ds, ax
- mov al, BYTE [0x496] ;; mf2_state |= 0x01
- or al, #0x01
- mov BYTE [0x496], al
- pop ds
-
- in al, #0x60 ;;read another key from keyboard controller
-
sti
-
-int09_call_int15_4f:
push ds
pusha
#ifdef BX_CALL_INT15_4F
jnc int09_done
#endif
+ ;; check for extended key
+ cmp al, #0xe0
+ jne int09_check_pause
+ xor ax, ax
+ mov ds, ax
+ mov al, BYTE [0x496] ;; mf2_state |= 0x02
+ or al, #0x02
+ mov BYTE [0x496], al
+ jmp int09_done
+
+int09_check_pause: ;; check for pause key
+ cmp al, #0xe1
+ jne int09_process_key
+ xor ax, ax
+ mov ds, ax
+ mov al, BYTE [0x496] ;; mf2_state |= 0x01
+ or al, #0x01
+ mov BYTE [0x496], al
+ jmp int09_done
-//int09_process_key:
+int09_process_key:
mov bx, #0xf000
mov ds, bx
call _int09_function
iret
-
-
;----------------------------------------
;- INT 13h Diskette Service Entry Point -
;----------------------------------------
je int0e_loop2
int0e_normal:
push ds
- mov ax, #0x0000 ;; segment 0000
+ xor ax, ax ;; segment 0000
mov ds, ax
call eoi_master_pic
mov al, 0x043e
int17_handler:
push ds
pusha
- mov ax, #0x0000
+ xor ax, ax
mov ds, ax
call _int17_function
popa
;;
int70_handler:
push ds
- pusha
+ pushad
xor ax, ax
mov ds, ax
call _int70_function
- popa
+ popad
pop ds
iret
.org 0xff00
-.ascii "(c) 2002 MandrakeSoft S.A. Written by Kevin Lawton & the Bochs team."
+.ascii BIOS_COPYRIGHT_STRING
;------------------------------------------------
;- IRET Instruction for Dummy Interrupt Handler -
#ifdef HVMTEST
jmp 0xd000:0x0003;
#else
- jmp 0xf000:post
+ jmp 0xf000:post
#endif
.org 0xfff5 ; ASCII Date ROM was built - 8 characters in MM/DD/YY
.org 0xfa6e ;; Character Font for 320x200 & 640x200 Graphics (lower 128 characters)
ASM_END
/*
- * This font comes from the fntcol16.zip package (c) by Joseph Gil
+ * This font comes from the fntcol16.zip package (c) by Joseph Gil
* found at ftp://ftp.simtel.net/pub/simtelnet/msdos/screen/fntcol16.zip
* This font is public domain
- */
+ */
static Bit8u vgafont8[128*8]=
{
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
#ifdef HVMASSIST
ASM_START
-// space for addresses in 32bit BIOS area; currently 256/4 entries
-// are allocated
-.org 0xcb00
-jmptable:
-db 0x5F, 0x5F, 0x5F, 0x4A, 0x4D, 0x50, 0x54 ;; ___JMPT
-dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 64 bytes
-dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 128 bytes
-dw 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 ;; 192 bytes
-
//
// MP Tables
// just carve out some blank space for HVMLOADER to write the MP tables to
db 0,0,0,0,0,0,0 ; 31 bytes
ASM_END
-#else // !HVMASSIST
-
+#endif // HVMASSIST
ASM_START
-.org 0xcc00
+.org 0xcff0
+bios_table_area_end:
// bcc-generated data will be placed here
-
-// For documentation of this config structure, look on developer.intel.com and
-// search for multiprocessor specification. Note that when you change anything
-// you must update the checksum (a pain!). It would be better to construct this
-// with C structures, or at least fill in the checksum automatically.
-//
-// Maybe this structs could be moved elsewhere than d000
-
-#if (BX_SMP_PROCESSORS==1)
- // no structure necessary.
-#elif (BX_SMP_PROCESSORS==2)
-// define the Intel MP Configuration Structure for 2 processors at
-// APIC ID 0,1. I/O APIC at ID=2.
-.align 16
-mp_config_table:
- db 0x50, 0x43, 0x4d, 0x50 ;; "PCMP" signature
- dw (mp_config_end-mp_config_table) ;; table length
- db 4 ;; spec rev
- db 0x65 ;; checksum
- .ascii "BOCHSCPU" ;; OEM id = "BOCHSCPU"
- db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1 "
- db 0x20, 0x20, 0x20, 0x20
- db 0x20, 0x20, 0x20, 0x20
- dw 0,0 ;; oem table ptr
- dw 0 ;; oem table size
- dw 20 ;; entry count
- dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
- dw 0 ;; extended table length
- db 0 ;; extended table checksum
- db 0 ;; reserved
-mp_config_proc0:
- db 0 ;; entry type=processor
- db 0 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 3 ;; cpu flags: enabled, bootstrap processor
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc1:
- db 0 ;; entry type=processor
- db 1 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_isa_bus:
- db 1 ;; entry type=bus
- db 0 ;; bus ID
- db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20 ;; bus type="ISA "
-mp_config_ioapic:
- db 2 ;; entry type=I/O APIC
- db 2 ;; apic id=2. linux will set.
- db 0x11 ;; I/O APIC version number
- db 1 ;; flags=1=enabled
- dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
-mp_config_irqs:
- db 3 ;; entry type=I/O interrupt
- db 0 ;; interrupt type=vectored interrupt
- db 0,0 ;; flags po=0, el=0 (linux uses as default)
- db 0 ;; source bus ID is ISA
- db 0 ;; source bus IRQ
- db 2 ;; destination I/O APIC ID
- db 0 ;; destination I/O APIC interrrupt in
- ;; repeat pattern for interrupts 0-15
- db 3,0,0,0,0,1,2,1
- db 3,0,0,0,0,2,2,2
- db 3,0,0,0,0,3,2,3
- db 3,0,0,0,0,4,2,4
- db 3,0,0,0,0,5,2,5
- db 3,0,0,0,0,6,2,6
- db 3,0,0,0,0,7,2,7
- db 3,0,0,0,0,8,2,8
- db 3,0,0,0,0,9,2,9
- db 3,0,0,0,0,10,2,10
- db 3,0,0,0,0,11,2,11
- db 3,0,0,0,0,12,2,12
- db 3,0,0,0,0,13,2,13
- db 3,0,0,0,0,14,2,14
- db 3,0,0,0,0,15,2,15
-#elif (BX_SMP_PROCESSORS==4)
-// define the Intel MP Configuration Structure for 4 processors at
-// APIC ID 0,1,2,3. I/O APIC at ID=4.
-.align 16
-mp_config_table:
- db 0x50, 0x43, 0x4d, 0x50 ;; "PCMP" signature
- dw (mp_config_end-mp_config_table) ;; table length
- db 4 ;; spec rev
- db 0xdd ;; checksum
- .ascii "BOCHSCPU" ;; OEM id = "BOCHSCPU"
- db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1 "
- db 0x20, 0x20, 0x20, 0x20
- db 0x20, 0x20, 0x20, 0x20
- dw 0,0 ;; oem table ptr
- dw 0 ;; oem table size
- dw 22 ;; entry count
- dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
- dw 0 ;; extended table length
- db 0 ;; extended table checksum
- db 0 ;; reserved
-mp_config_proc0:
- db 0 ;; entry type=processor
- db 0 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 3 ;; cpu flags: enabled, bootstrap processor
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc1:
- db 0 ;; entry type=processor
- db 1 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc2:
- db 0 ;; entry type=processor
- db 2 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc3:
- db 0 ;; entry type=processor
- db 3 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_isa_bus:
- db 1 ;; entry type=bus
- db 0 ;; bus ID
- db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20 ;; bus type="ISA "
-mp_config_ioapic:
- db 2 ;; entry type=I/O APIC
- db 4 ;; apic id=4. linux will set.
- db 0x11 ;; I/O APIC version number
- db 1 ;; flags=1=enabled
- dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
-mp_config_irqs:
- db 3 ;; entry type=I/O interrupt
- db 0 ;; interrupt type=vectored interrupt
- db 0,0 ;; flags po=0, el=0 (linux uses as default)
- db 0 ;; source bus ID is ISA
- db 0 ;; source bus IRQ
- db 4 ;; destination I/O APIC ID
- db 0 ;; destination I/O APIC interrrupt in
- ;; repeat pattern for interrupts 0-15
- db 3,0,0,0,0,1,4,1
- db 3,0,0,0,0,2,4,2
- db 3,0,0,0,0,3,4,3
- db 3,0,0,0,0,4,4,4
- db 3,0,0,0,0,5,4,5
- db 3,0,0,0,0,6,4,6
- db 3,0,0,0,0,7,4,7
- db 3,0,0,0,0,8,4,8
- db 3,0,0,0,0,9,4,9
- db 3,0,0,0,0,10,4,10
- db 3,0,0,0,0,11,4,11
- db 3,0,0,0,0,12,4,12
- db 3,0,0,0,0,13,4,13
- db 3,0,0,0,0,14,4,14
- db 3,0,0,0,0,15,4,15
-#elif (BX_SMP_PROCESSORS==8)
-// define the Intel MP Configuration Structure for 8 processors at
-// APIC ID 0,1,2,3,4,5,6,7. I/O APIC at ID=8.
-.align 16
-mp_config_table:
- db 0x50, 0x43, 0x4d, 0x50 ;; "PCMP" signature
- dw (mp_config_end-mp_config_table) ;; table length
- db 4 ;; spec rev
- db 0xc3 ;; checksum
- .ascii "BOCHSCPU" ;; OEM id = "BOCHSCPU"
- db 0x30, 0x2e, 0x31, 0x20 ;; vendor id = "0.1 "
- db 0x20, 0x20, 0x20, 0x20
- db 0x20, 0x20, 0x20, 0x20
- dw 0,0 ;; oem table ptr
- dw 0 ;; oem table size
- dw 26 ;; entry count
- dw 0x0000, 0xfee0 ;; memory mapped address of local APIC
- dw 0 ;; extended table length
- db 0 ;; extended table checksum
- db 0 ;; reserved
-mp_config_proc0:
- db 0 ;; entry type=processor
- db 0 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 3 ;; cpu flags: enabled, bootstrap processor
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc1:
- db 0 ;; entry type=processor
- db 1 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc2:
- db 0 ;; entry type=processor
- db 2 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc3:
- db 0 ;; entry type=processor
- db 3 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc4:
- db 0 ;; entry type=processor
- db 4 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc5:
- db 0 ;; entry type=processor
- db 5 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc6:
- db 0 ;; entry type=processor
- db 6 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_proc7:
- db 0 ;; entry type=processor
- db 7 ;; local APIC id
- db 0x11 ;; local APIC version number
- db 1 ;; cpu flags: enabled
- db 0,6,0,0 ;; cpu signature
- dw 0x201,0 ;; feature flags
- dw 0,0 ;; reserved
- dw 0,0 ;; reserved
-mp_config_isa_bus:
- db 1 ;; entry type=bus
- db 0 ;; bus ID
- db 0x49, 0x53, 0x41, 0x20, 0x20, 0x20 ;; bus type="ISA "
-mp_config_ioapic:
- db 2 ;; entry type=I/O APIC
- db 8 ;; apic id=8
- db 0x11 ;; I/O APIC version number
- db 1 ;; flags=1=enabled
- dw 0x0000, 0xfec0 ;; memory mapped address of I/O APIC
-mp_config_irqs:
- db 3 ;; entry type=I/O interrupt
- db 0 ;; interrupt type=vectored interrupt
- db 0,0 ;; flags po=0, el=0 (linux uses as default)
- db 0 ;; source bus ID is ISA
- db 0 ;; source bus IRQ
- db 8 ;; destination I/O APIC ID
- db 0 ;; destination I/O APIC interrrupt in
- ;; repeat pattern for interrupts 0-15
- db 3,0,0,0,0,1,8,1
- db 3,0,0,0,0,2,8,2
- db 3,0,0,0,0,3,8,3
- db 3,0,0,0,0,4,8,4
- db 3,0,0,0,0,5,8,5
- db 3,0,0,0,0,6,8,6
- db 3,0,0,0,0,7,8,7
- db 3,0,0,0,0,8,8,8
- db 3,0,0,0,0,9,8,9
- db 3,0,0,0,0,10,8,10
- db 3,0,0,0,0,11,8,11
- db 3,0,0,0,0,12,8,12
- db 3,0,0,0,0,13,8,13
- db 3,0,0,0,0,14,8,14
- db 3,0,0,0,0,15,8,15
-#else
-# error Sorry, rombios only has configurations for 1, 2, 4 or 8 processors.
-#endif // if (BX_SMP_PROCESSORS==...)
-
-mp_config_end: // this label used to find length of mp structure
- db 0
-
-#if (BX_SMP_PROCESSORS>1)
-.align 16
-mp_floating_pointer_structure:
-db 0x5f, 0x4d, 0x50, 0x5f ; "_MP_" signature
-dw mp_config_table, 0xf ;; pointer to MP configuration table
-db 1 ;; length of this struct in 16-bit byte chunks
-db 4 ;; MP spec revision
-db 0xc1 ;; checksum
-db 0 ;; MP feature byte 1. value 0 means look at the config table
-db 0,0,0,0 ;; MP feature bytes 2-5.
-#endif
-
ASM_END
-#endif // HVMASSIST
--- /dev/null
+/////////////////////////////////////////////////////////////////////////
+// $Id: rombios.h,v 1.8 2008/12/04 18:48:33 sshwarts Exp $
+/////////////////////////////////////////////////////////////////////////
+//
+// Copyright (C) 2006 Volker Ruppert
+//
+// This library is free software; you can redistribute it and/or
+// modify it under the terms of the GNU Lesser General Public
+// License as published by the Free Software Foundation; either
+// version 2 of the License, or (at your option) any later version.
+//
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+// Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public
+// License along with this library; if not, write to the Free Software
+// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+/* define it to include QEMU specific code */
+//#define BX_QEMU
+#define LEGACY
+
+#ifndef LEGACY
+# define BX_ROMBIOS32 1
+#else
+# define BX_ROMBIOS32 0
+#endif
+#define DEBUG_ROMBIOS 0
+
+#define PANIC_PORT 0x400
+#define PANIC_PORT2 0x401
+#define INFO_PORT 0x402
+#define DEBUG_PORT 0x403
+
+#define BIOS_PRINTF_HALT 1
+#define BIOS_PRINTF_SCREEN 2
+#define BIOS_PRINTF_INFO 4
+#define BIOS_PRINTF_DEBUG 8
+#define BIOS_PRINTF_ALL (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO)
+#define BIOS_PRINTF_DEBHALT (BIOS_PRINTF_SCREEN | BIOS_PRINTF_INFO | BIOS_PRINTF_HALT)
+
+#define printf(format, p...) bios_printf(BIOS_PRINTF_SCREEN, format, ##p)
+
+// Defines the output macros.
+// BX_DEBUG goes to INFO port until we can easily choose debug info on a
+// per-device basis. Debug info are sent only in debug mode
+#if DEBUG_ROMBIOS
+# define BX_DEBUG(format, p...) bios_printf(BIOS_PRINTF_INFO, format, ##p)
+#else
+# define BX_DEBUG(format, p...)
+#endif
+#define BX_INFO(format, p...) bios_printf(BIOS_PRINTF_INFO, format, ##p)
+#define BX_PANIC(format, p...) bios_printf(BIOS_PRINTF_DEBHALT, format, ##p)
+
+#define ACPI_DATA_SIZE 0x00010000L
+#define PM_IO_BASE 0xb000
+#define SMB_IO_BASE 0xb100
+
+ // Define the application NAME
+#if define HVMASSIST
+# define BX_APPNAME "HVMAssist"
+#elif defined(BX_QEMU)
+# define BX_APPNAME "QEMU"
+#elif defined(PLEX86)
+# define BX_APPNAME "Plex86"
+#else
+# define BX_APPNAME "Bochs"
+#endif
Support for TCPA ACPI logging
******************************************************************/
-/*
- * Extend the ACPI log with the given entry by copying the
- * entry data into the log.
- * Input
- * Pointer to the structure to be copied into the log
- *
- * Output:
- * lower 16 bits of return code contain entry number
- * if entry number is '0', then upper 16 bits contain error code.
- */
-Bit32u tcpa_extend_acpi_log(entry_ptr)
- Bit32u entry_ptr;
-{
- ASM_START
- DoUpcall(IDX_TCPA_EXTEND_ACPI_LOG)
- ASM_END
-}
-
-
-/*
- initialize the TCPA ACPI subsystem; find the ACPI tables and determine
- where the TCPA table is.
- */
- void
-tcpa_acpi_init()
-{
- ASM_START
- DoUpcall(IDX_TCPA_ACPI_INIT)
- ASM_END
-}
-
-
-/*
- * Add measurement to log about call of int 19h
- */
- void
-tcpa_calling_int19h()
-{
- ASM_START
- DoUpcall(IDX_TCPA_CALLING_INT19H)
- ASM_END
-}
-
-/*
- * Add measurement to log about retuning from int 19h
- */
- void
-tcpa_returned_int19h()
-{
- ASM_START
- DoUpcall(IDX_TCPA_RETURNED_INT19H)
- ASM_END
-}
-
-/*
- * Add event separators for PCRs 0 to 7; specs 8.2.3
- */
- void
-tcpa_add_event_separators()
-{
- ASM_START
- DoUpcall(IDX_TCPA_ADD_EVENT_SEPARATORS)
- ASM_END
-}
-
-
-/*
- * Add a wake event to the log
- */
- void
-tcpa_wake_event()
-{
- ASM_START
- DoUpcall(IDX_TCPA_WAKE_EVENT)
- ASM_END
-}
-
-
-/*
- * Add measurement to the log about option rom scan
- * 10.4.3 : action 14
- */
- void
-tcpa_start_option_rom_scan()
-{
- ASM_START
- DoUpcall(IDX_TCPA_START_OPTION_ROM_SCAN)
- ASM_END
-}
-
-
-/*
- * Add measurement to the log about an option rom
- */
- void
-tcpa_option_rom(seg)
- Bit32u seg;
-{
- ASM_START
- DoUpcall(IDX_TCPA_OPTION_ROM)
- ASM_END
-}
-
-/*
- * Add a measurement regarding the boot device (CDRom, Floppy, HDD) to
- * the list of measurements.
- */
-void
- tcpa_add_bootdevice(bootcd, bootdrv)
- Bit32u bootcd;
- Bit32u bootdrv;
-{
- ASM_START
- DoUpcall(IDX_TCPA_ADD_BOOTDEVICE)
- ASM_END
-}
-
-/*
- * Add a measurement to the log in support of 8.2.5.3
- * Creates two log entries
- *
- * Input parameter:
- * seg : segment where the IPL data are located
- */
- void
- tcpa_ipl(bootcd,seg,off,count)
- Bit32u bootcd;
- Bit32u seg;
- Bit32u off;
- Bit32u count;
-{
- ASM_START
- DoUpcall(IDX_TCPA_IPL)
- ASM_END
-}
-
-
-Bit32u
-tcpa_initialize_tpm(physpres)
- Bit32u physpres;
-{
- ASM_START
- DoUpcall(IDX_TCPA_INITIALIZE_TPM)
- ASM_END
-}
-
-void
-tcpa_measure_post(from, to)
- Bit32u from;
- Bit32u to;
-{
- ASM_START
- DoUpcall(IDX_TCPA_MEASURE_POST)
- ASM_END
-}
-
ASM_START
MACRO POST_MEASURE
push word #0x000f
ASM_END
}
-Bit32u
-TCGInterruptHandler(regs_ptr, es, ds, flags_ptr)
- Bit32u regs_ptr;
- Bit16u es;
- Bit16u ds;
- Bit32u flags_ptr;
-{
- ASM_START
- DoUpcall(IDX_TCGINTERRUPTHANDLER)
- ASM_END
-}
-
/*
* C-dispatcher for the TCG BIOS functions
*/
#include "vbe.h"
#include "vbetables.h"
-#define VBE_TOTAL_VIDEO_MEMORY_DIV_64K (VBE_DISPI_TOTAL_VIDEO_MEMORY_MB*1024/64)
-
// The current OEM Software Revision of this VBE Bios
#define VBE_OEM_SOFTWARE_REV 0x0002;
vbe_info_block.VideoModePtr_Off= DI + 34;
// VBE Total Memory (in 64b blocks)
- vbe_info_block.TotalMemory = VBE_TOTAL_VIDEO_MEMORY_DIV_64K;
+ outw(VBE_DISPI_IOPORT_INDEX, VBE_DISPI_INDEX_VIDEO_MEMORY_64K);
+ vbe_info_block.TotalMemory = inw(VBE_DISPI_IOPORT_DATA);
if (vbe2_info)
{
do
{
if ((cur_info->info.XResolution <= dispi_get_max_xres()) &&
- (cur_info->info.BitsPerPixel <= dispi_get_max_bpp())) {
+ (cur_info->info.BitsPerPixel <= dispi_get_max_bpp()) &&
+ (cur_info->info.XResolution * cur_info->info.XResolution * cur_info->info.BitsPerPixel <= vbe_info_block.TotalMemory << 19 )) {
#ifdef DEBUG
printf("VBE found mode %x => %x\n", cur_info->mode,cur_mode);
#endif
cur_ptr+=2;
} else {
#ifdef DEBUG
- printf("VBE mode %x (xres=%x / bpp=%02x) not supported by display\n", cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel);
+ printf("VBE mode %x (xres=%x / bpp=%02x) not supported \n", cur_info->mode,cur_info->info.XResolution,cur_info->info.BitsPerPixel);
#endif
}
cur_info++;
info.WinFuncPtr = 0xC0000000UL;
*(Bit16u *)&(info.WinFuncPtr) = (Bit16u)(dispi_set_bank_farcall);
}
-
+ outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_LFB_ADDRESS_H);
+ info.PhysBasePtr = inw(VBE_DISPI_IOPORT_DATA);
+ info.PhysBasePtr = info.PhysBasePtr << 16;
+#if 0
+ outw(VBE_DISPI_IOPORT_INDEX,VBE_DISPI_INDEX_LFB_ADDRESS_L);
+ info.PhysBasePtr |= inw(VBE_DISPI_IOPORT_DATA);
+#endif
result = 0x4f;
}
else
// like 0xE0000000
- #define VBE_DISPI_BANK_ADDRESS 0xA0000
- #define VBE_DISPI_BANK_SIZE_KB 64
+ #define VBE_DISPI_BANK_ADDRESS 0xA0000
+ #define VBE_DISPI_BANK_SIZE_KB 64
- #define VBE_DISPI_MAX_XRES 1024
- #define VBE_DISPI_MAX_YRES 768
+ #define VBE_DISPI_MAX_XRES 2560
+ #define VBE_DISPI_MAX_YRES 1600
- #define VBE_DISPI_IOPORT_INDEX 0x01CE
- #define VBE_DISPI_IOPORT_DATA 0x01CF
+ #define VBE_DISPI_IOPORT_INDEX 0x01CE
+ #define VBE_DISPI_IOPORT_DATA 0x01CF
- #define VBE_DISPI_INDEX_ID 0x0
- #define VBE_DISPI_INDEX_XRES 0x1
- #define VBE_DISPI_INDEX_YRES 0x2
- #define VBE_DISPI_INDEX_BPP 0x3
- #define VBE_DISPI_INDEX_ENABLE 0x4
- #define VBE_DISPI_INDEX_BANK 0x5
- #define VBE_DISPI_INDEX_VIRT_WIDTH 0x6
- #define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7
- #define VBE_DISPI_INDEX_X_OFFSET 0x8
- #define VBE_DISPI_INDEX_Y_OFFSET 0x9
-
- #define VBE_DISPI_ID0 0xB0C0
- #define VBE_DISPI_ID1 0xB0C1
- #define VBE_DISPI_ID2 0xB0C2
- #define VBE_DISPI_ID3 0xB0C3
- #define VBE_DISPI_ID4 0xB0C4
-
- #define VBE_DISPI_DISABLED 0x00
- #define VBE_DISPI_ENABLED 0x01
- #define VBE_DISPI_GETCAPS 0x02
- #define VBE_DISPI_8BIT_DAC 0x20
- #define VBE_DISPI_LFB_ENABLED 0x40
- #define VBE_DISPI_NOCLEARMEM 0x80
-
- #define VBE_DISPI_LFB_PHYSICAL_ADDRESS 0xE0000000
+ #define VBE_DISPI_INDEX_ID 0x0
+ #define VBE_DISPI_INDEX_XRES 0x1
+ #define VBE_DISPI_INDEX_YRES 0x2
+ #define VBE_DISPI_INDEX_BPP 0x3
+ #define VBE_DISPI_INDEX_ENABLE 0x4
+ #define VBE_DISPI_INDEX_BANK 0x5
+ #define VBE_DISPI_INDEX_VIRT_WIDTH 0x6
+ #define VBE_DISPI_INDEX_VIRT_HEIGHT 0x7
+ #define VBE_DISPI_INDEX_X_OFFSET 0x8
+ #define VBE_DISPI_INDEX_Y_OFFSET 0x9
+ #define VBE_DISPI_INDEX_VIDEO_MEMORY_64K 0xa
+ #define VBE_DISPI_INDEX_LFB_ADDRESS_H 0xb
+ #define VBE_DISPI_INDEX_LFB_ADDRESS_L 0xc
+
+ #define VBE_DISPI_LFB_PHYSICAL_ADDRESS 0xE0000000
+ #define VBE_DISPI_ID0 0xB0C0
+ #define VBE_DISPI_ID1 0xB0C1
+ #define VBE_DISPI_ID2 0xB0C2
+ #define VBE_DISPI_ID3 0xB0C3
+ #define VBE_DISPI_ID4 0xB0C4
+
+ #define VBE_DISPI_DISABLED 0x00
+ #define VBE_DISPI_ENABLED 0x01
+ #define VBE_DISPI_GETCAPS 0x02
+ #define VBE_DISPI_8BIT_DAC 0x20
+ #define VBE_DISPI_LFB_ENABLED 0x40
+ #define VBE_DISPI_NOCLEARMEM 0x80
#endif
#include <stdlib.h>
#include <stdio.h>
-#define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 8
+#define VBE_DISPI_TOTAL_VIDEO_MEMORY_MB 16
typedef struct {
int width;
{ 1600, 1200, 24 , 0x11F},
/* BOCHS/PLE, 86 'own' mode numbers */
-{ 320, 200, 32 , 0x140},
-{ 640, 400, 32 , 0x141},
-{ 640, 480, 32 , 0x142},
-{ 800, 600, 32 , 0x143},
-{ 1024, 768, 32 , 0x144},
-{ 1280, 1024, 32 , 0x145},
-{ 320, 200, 8 , 0x146},
-{ 1600, 1200, 32 , 0x147},
-{ 1152, 864, 8 , 0x148},
+{ 320, 200, 32 , 0x140},
+{ 640, 400, 32 , 0x141},
+{ 640, 480, 32 , 0x142},
+{ 800, 600, 32 , 0x143},
+{ 1024, 768, 32 , 0x144},
+{ 1280, 1024, 32 , 0x145},
+{ 320, 200, 8 , 0x146},
+{ 1600, 1200, 32 , 0x147},
+{ 1152, 864, 8 , 0x148},
{ 1152, 864, 15 , 0x149},
{ 1152, 864, 16 , 0x14a},
{ 1152, 864, 24 , 0x14b},
{ 1152, 864, 32 , 0x14c},
+{ 1280, 800, 16 , 0x178},
+{ 1280, 800, 24 , 0x179},
+{ 1280, 800, 32 , 0x17a},
+{ 1280, 960, 16 , 0x17b},
+{ 1280, 960, 24 , 0x17c},
+{ 1280, 960, 32 , 0x17d},
+{ 1440, 900, 16 , 0x17e},
+{ 1440, 900, 24 , 0x17f},
+{ 1440, 900, 32 , 0x180},
+{ 1400, 1050, 16 , 0x181},
+{ 1400, 1050, 24 , 0x182},
+{ 1400, 1050, 32 , 0x183},
+{ 1680, 1050, 16 , 0x184},
+{ 1680, 1050, 24 , 0x185},
+{ 1680, 1050, 32 , 0x186},
+{ 1920, 1200, 16 , 0x187},
+{ 1920, 1200, 24 , 0x188},
+{ 1920, 1200, 32 , 0x189},
+{ 2560, 1600, 16 , 0x18a},
+{ 2560, 1600, 24 , 0x18b},
+{ 2560, 1600, 32 , 0x18c},
{ 0, },
};
for (i=0; i<format_width; i++) {
nibble = (arg >> (4 * digit)) & 0x000f;
if (nibble <= 9)
- outb(0x0500, nibble + '0');
+ outb(0xe9, nibble + '0');
else
- outb(0x0500, (nibble - 10) + 'A');
+ outb(0xe9, (nibble - 10) + 'A');
digit--;
}
in_format = 0;
// }
}
else {
- outb(0x0500, c);
+ outb(0xe9, c);
}
s ++;
}
# Get gcc to generate the dependencies for us.
CFLAGS += -Wp,-MD,.$(@F).d
LDFLAGS += -L.
-DEPS = .*.d
LIB_OBJS := $(patsubst %.c,%.o,$(SRCS))
PIC_OBJS := $(patsubst %.c,%.opic,$(SRCS))
PROFILE=#-pg
BASECFLAGS=-Wall -g -Werror
-# Make gcc generate dependencies.
-BASECFLAGS += -Wp,-MD,.$(@F).d
-PROG_DEP = .*.d
BASECFLAGS+= $(PROFILE)
#BASECFLAGS+= -I$(XEN_ROOT)/tools
BASECFLAGS+= $(CFLAGS_libxenctrl)
clean:
rm -f *.o *.opic *.so
rm -f $(CLIENTS)
- $(RM) $(PROG_DEP)
+ $(RM) $(DEPS)
.PHONY: print-dir
print-dir:
$(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
$(INSTALL_PROG) $(CLIENTS) $(DESTDIR)$(SBINDIR)
--include $(PROG_DEP)
+-include $(DEPS)
# never delete any intermediate files.
.SECONDARY:
allow dom0_t pirq_t:event {vector};
allow dom0_t xen_t:mmu {memorymap};
-allow dom0_t dom0_t:mmu {pinpage map_read map_write adjust};
+allow dom0_t dom0_t:mmu {pinpage map_read map_write adjust updatemp};
allow dom0_t dom0_t:grant {query setup};
allow dom0_t dom0_t:domain {scheduler getdomaininfo getvcpuinfo getvcpuaffinity};
allow dom0_t dom0_t:event {send};
allow dom0_t domU_t:grant {copy};
+allow domU_t domU_t:grant {copy};
manage_domain(dom0_t, domU_t)
CFLAGS += $(INCLUDES) -I.
CFLAGS += -D_GNU_SOURCE
-# Get gcc to generate the dependencies for us.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEPS = .*.d
-
LIBS := -L. -L.. -L../lib
LIBS += $(LDFLAGS_libxenctrl)
LIBS += $(LDFLAGS_libxenstore)
-LIBS += -lpthread -lrt
+LIBS += -lrt
OBJS := fs-xenbus.o fs-ops.o
#undef NDEBUG
+#include <unistd.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <malloc.h>
-#include <pthread.h>
#include <xenctrl.h>
#include <aio.h>
#include <sys/mman.h>
#include <sys/select.h>
+#include <sys/socket.h>
#include <xen/io/ring.h>
+#include <xc_private.h>
+#include <err.h>
+#include "sys-queue.h"
#include "fs-backend.h"
+#include "fs-debug.h"
struct xs_handle *xsh = NULL;
static struct fs_export *fs_exports = NULL;
static int export_id = 0;
static int mount_id = 0;
+static int pipefds[2];
+static LIST_HEAD(mount_requests_head, fs_mount) mount_requests_head;
-static void dispatch_response(struct fs_mount *mount, int priv_req_id)
+static void free_mount_request(struct fs_mount *mount);
+
+static void dispatch_response(struct fs_request *request)
{
int i;
struct fs_op *op;
- struct fs_request *req = &mount->requests[priv_req_id];
for(i=0;;i++)
{
op = fsops[i];
/* We should dispatch a response before reaching the end of the array */
assert(op != NULL);
- if(op->type == req->req_shadow.type)
+ if(op->type == request->req_shadow.type)
{
- printf("Found op for type=%d\n", op->type);
+ FS_DEBUG("Found op for type=%d\n", op->type);
/* There needs to be a response handler */
assert(op->response_handler != NULL);
- op->response_handler(mount, req);
+ op->response_handler(request->mount, request);
break;
}
}
- req->active = 0;
- add_id_to_freelist(priv_req_id, mount->freelist);
+ request->active = 0;
+ add_id_to_freelist(request->id, request->mount->freelist);
}
-static void handle_aio_events(struct fs_mount *mount)
+static void handle_aio_event(struct fs_request *request)
{
- int fd, ret, count, i, notify;
- evtchn_port_t port;
- /* AIO control block for the evtchn file destriptor */
- struct aiocb evtchn_cb;
- const struct aiocb * cb_list[mount->nr_entries];
- int request_ids[mount->nr_entries];
-
- /* Prepare the AIO control block for evtchn */
- fd = xc_evtchn_fd(mount->evth);
- bzero(&evtchn_cb, sizeof(struct aiocb));
- evtchn_cb.aio_fildes = fd;
- evtchn_cb.aio_nbytes = sizeof(port);
- evtchn_cb.aio_buf = &port;
- assert(aio_read(&evtchn_cb) == 0);
-
-wait_again:
- /* Create list of active AIO requests */
- count = 0;
- for(i=0; i<mount->nr_entries; i++)
- if(mount->requests[i].active)
- {
- cb_list[count] = &mount->requests[i].aiocb;
- request_ids[count] = i;
- count++;
- }
- /* Add the event channel at the end of the list. Event channel needs to be
- * handled last as it exits this function. */
- cb_list[count] = &evtchn_cb;
- request_ids[count] = -1;
- count++;
-
- /* Block till an AIO requset finishes, or we get an event */
- while(1) {
- int ret = aio_suspend(cb_list, count, NULL);
- if (!ret)
- break;
- assert(errno == EINTR);
+ int ret, notify;
+
+ FS_DEBUG("handle_aio_event: mount %s request %d\n", request->mount->frontend, request->id);
+ if (request->active < 0) {
+ request->mount->nr_entries++;
+ if (!request->mount->nr_entries)
+ free_mount_request(request->mount);
+ return;
}
- for(i=0; i<count; i++)
- if(aio_error(cb_list[i]) != EINPROGRESS)
- {
- if(request_ids[i] >= 0)
- dispatch_response(mount, request_ids[i]);
- else
- goto read_event_channel;
- }
-
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&mount->ring, notify);
- printf("Pushed responces and notify=%d\n", notify);
- if(notify)
- xc_evtchn_notify(mount->evth, mount->local_evtchn);
-
- goto wait_again;
-read_event_channel:
- assert(aio_return(&evtchn_cb) == sizeof(evtchn_port_t));
- assert(xc_evtchn_unmask(mount->evth, mount->local_evtchn) >= 0);
-}
+ ret = aio_error(&request->aiocb);
+ if(ret != EINPROGRESS && ret != ECANCELED)
+ dispatch_response(request);
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&request->mount->ring, notify);
+ FS_DEBUG("Pushed responces and notify=%d\n", notify);
+ if(notify)
+ xc_evtchn_notify(request->mount->evth, request->mount->local_evtchn);
+}
static void allocate_request_array(struct fs_mount *mount)
{
for(i=0; i< nr_entries; i++)
{
requests[i].active = 0;
+ requests[i].mount = mount;
add_id_to_freelist(i, freelist);
}
mount->requests = requests;
}
-static void *handle_mount(void *data)
+static void handle_mount(struct fs_mount *mount)
{
int more, notify;
- struct fs_mount *mount = (struct fs_mount *)data;
-
- printf("Starting a thread for mount: %d\n", mount->mount_id);
- allocate_request_array(mount);
-
- for(;;)
- {
- int nr_consumed=0;
- RING_IDX cons, rp;
- struct fsif_request *req;
+ int nr_consumed=0;
+ RING_IDX cons, rp;
+ struct fsif_request *req;
- handle_aio_events(mount);
moretodo:
- rp = mount->ring.sring->req_prod;
- xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
+ rp = mount->ring.sring->req_prod;
+ xen_rmb(); /* Ensure we see queued requests up to 'rp'. */
- while ((cons = mount->ring.req_cons) != rp)
+ while ((cons = mount->ring.req_cons) != rp)
+ {
+ int i;
+ struct fs_op *op;
+
+ FS_DEBUG("Got a request at %d (of %d)\n",
+ cons, RING_SIZE(&mount->ring));
+ req = RING_GET_REQUEST(&mount->ring, cons);
+ FS_DEBUG("Request type=%d\n", req->type);
+ for(i=0;;i++)
{
- int i;
- struct fs_op *op;
-
- printf("Got a request at %d (of %d)\n",
- cons, RING_SIZE(&mount->ring));
- req = RING_GET_REQUEST(&mount->ring, cons);
- printf("Request type=%d\n", req->type);
- for(i=0;;i++)
+ op = fsops[i];
+ if(op == NULL)
{
- op = fsops[i];
- if(op == NULL)
- {
- /* We've reached the end of the array, no appropirate
- * handler found. Warn, ignore and continue. */
- printf("WARN: Unknown request type: %d\n", req->type);
- mount->ring.req_cons++;
- break;
- }
- if(op->type == req->type)
- {
- /* There needs to be a dispatch handler */
- assert(op->dispatch_handler != NULL);
- op->dispatch_handler(mount, req);
- break;
- }
- }
-
- nr_consumed++;
+ /* We've reached the end of the array, no appropirate
+ * handler found. Warn, ignore and continue. */
+ FS_DEBUG("WARN: Unknown request type: %d\n", req->type);
+ mount->ring.req_cons++;
+ break;
+ }
+ if(op->type == req->type)
+ {
+ /* There needs to be a dispatch handler */
+ assert(op->dispatch_handler != NULL);
+ op->dispatch_handler(mount, req);
+ break;
+ }
}
- printf("Backend consumed: %d requests\n", nr_consumed);
- RING_FINAL_CHECK_FOR_REQUESTS(&mount->ring, more);
- if(more) goto moretodo;
-
- RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&mount->ring, notify);
- printf("Pushed responces and notify=%d\n", notify);
- if(notify)
- xc_evtchn_notify(mount->evth, mount->local_evtchn);
+
+ nr_consumed++;
}
-
- printf("Destroying thread for mount: %d\n", mount->mount_id);
- xc_gnttab_munmap(mount->gnth, mount->ring.sring, 1);
+ FS_DEBUG("Backend consumed: %d requests\n", nr_consumed);
+ RING_FINAL_CHECK_FOR_REQUESTS(&mount->ring, more);
+ if(more) goto moretodo;
+
+ RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(&mount->ring, notify);
+ FS_DEBUG("Pushed responces and notify=%d\n", notify);
+ if(notify)
+ xc_evtchn_notify(mount->evth, mount->local_evtchn);
+}
+
+static void terminate_mount_request(struct fs_mount *mount) {
+ int count = 0, i;
+
+ FS_DEBUG("terminate_mount_request %s\n", mount->frontend);
+ xenbus_write_backend_state(mount, STATE_CLOSING);
+
+ for(i=0; i<mount->nr_entries; i++)
+ if(mount->requests[i].active) {
+ mount->requests[i].active = -1;
+ aio_cancel(mount->requests[i].aiocb.aio_fildes, &mount->requests[i].aiocb);
+ count--;
+ }
+ mount->nr_entries = count;
+
+ while (!xenbus_frontend_state_changed(mount, STATE_CLOSING));
+ xenbus_write_backend_state(mount, STATE_CLOSED);
+
+ xc_gnttab_munmap(mount->gnth, mount->ring.sring, mount->shared_ring_size);
xc_gnttab_close(mount->gnth);
xc_evtchn_unbind(mount->evth, mount->local_evtchn);
xc_evtchn_close(mount->evth);
+
+ if (!count)
+ free_mount_request(mount);
+}
+
+static void free_mount_request(struct fs_mount *mount) {
+ FS_DEBUG("free_mount_request %s\n", mount->frontend);
free(mount->frontend);
- pthread_exit(NULL);
+ free(mount->requests);
+ free(mount->freelist);
+ LIST_REMOVE (mount, entries);
+ free(mount);
}
static void handle_connection(int frontend_dom_id, int export_id, char *frontend)
{
struct fs_mount *mount;
struct fs_export *export;
- int evt_port;
- pthread_t handling_thread;
struct fsif_sring *sring;
uint32_t dom_ids[MAX_RING_SIZE];
int i;
- printf("Handling connection from dom=%d, for export=%d\n",
+ FS_DEBUG("Handling connection from dom=%d, for export=%d\n",
frontend_dom_id, export_id);
/* Try to find the export on the list */
export = fs_exports;
}
if(!export)
{
- printf("Could not find the export (the id is unknown).\n");
+ FS_DEBUG("Could not find the export (the id is unknown).\n");
return;
}
mount->export = export;
mount->mount_id = mount_id++;
xenbus_read_mount_request(mount, frontend);
- printf("Frontend found at: %s (gref=%d, evtchn=%d)\n",
+ FS_DEBUG("Frontend found at: %s (gref=%d, evtchn=%d)\n",
mount->frontend, mount->grefs[0], mount->remote_evtchn);
xenbus_write_backend_node(mount);
mount->evth = -1;
mount->nr_entries = mount->ring.nr_ents;
for (i = 0; i < MAX_FDS; i++)
mount->fds[i] = -1;
- xenbus_write_backend_ready(mount);
- pthread_create(&handling_thread, NULL, &handle_mount, mount);
+ LIST_INSERT_HEAD(&mount_requests_head, mount, entries);
+ xenbus_watch_frontend_state(mount);
+ xenbus_write_backend_state(mount, STATE_READY);
+
+ allocate_request_array(mount);
}
static void await_connections(void)
{
- int fd, ret, dom_id, export_id;
+ int fd, max_fd, ret, dom_id, export_id;
fd_set fds;
char **watch_paths;
unsigned int len;
char d;
+ struct fs_mount *pointer;
+
+ LIST_INIT (&mount_requests_head);
assert(xsh != NULL);
fd = xenbus_get_watch_fd();
do {
FD_ZERO(&fds);
FD_SET(fd, &fds);
- ret = select(fd+1, &fds, NULL, NULL, NULL);
- assert(ret == 1);
- watch_paths = xs_read_watch(xsh, &len);
- assert(len == 2);
- assert(strcmp(watch_paths[1], "conn-watch") == 0);
- dom_id = -1;
- export_id = -1;
- d = 0;
- printf("Path changed %s\n", watch_paths[0]);
- sscanf(watch_paths[0], WATCH_NODE"/%d/%d/fronten%c",
- &dom_id, &export_id, &d);
- if((dom_id >= 0) && (export_id >= 0) && d == 'd') {
- char *frontend = xs_read(xsh, XBT_NULL, watch_paths[0], NULL);
- if (frontend) {
- handle_connection(dom_id, export_id, frontend);
- xs_rm(xsh, XBT_NULL, watch_paths[0]);
- }
- }
-next_select:
- printf("Awaiting next connection.\n");
- /* TODO - we need to figure out what to free */
- free(watch_paths);
+ FD_SET(pipefds[0], &fds);
+ max_fd = fd > pipefds[0] ? fd : pipefds[0];
+ LIST_FOREACH(pointer, &mount_requests_head, entries) {
+ int tfd = xc_evtchn_fd(pointer->evth);
+ FD_SET(tfd, &fds);
+ if (tfd > max_fd) max_fd = tfd;
+ }
+ ret = select(max_fd+1, &fds, NULL, NULL, NULL);
+ if (ret < 0) {
+ if (errno == EINTR) continue;
+ /* try to recover */
+ else if (errno == EBADF) {
+ struct timeval timeout;
+ memset(&timeout, 0x00, sizeof(timeout));
+ FD_ZERO(&fds);
+ FD_SET(fd, &fds);
+ FD_SET(pipefds[0], &fds);
+ max_fd = fd > pipefds[0] ? fd : pipefds[0];
+ ret = select(max_fd + 1, &fds, NULL, NULL, &timeout);
+ if (ret < 0)
+ err(1, "select: unrecoverable error occurred: %d\n", errno);
+
+ /* trying to find the bogus fd among the open event channels */
+ LIST_FOREACH(pointer, &mount_requests_head, entries) {
+ int tfd = xc_evtchn_fd(pointer->evth);
+ memset(&timeout, 0x00, sizeof(timeout));
+ FD_ZERO(&fds);
+ FD_SET(tfd, &fds);
+ ret = select(tfd + 1, &fds, NULL, NULL, &timeout);
+ if (ret < 0) {
+ FS_DEBUG("fd %d is bogus, closing the related connection\n", tfd);
+ pointer->evth = fd;
+ terminate_mount_request(pointer);
+ continue;
+ }
+ }
+ continue;
+ } else
+ err(1, "select: unrecoverable error occurred: %d\n", errno);
+ }
+ if (FD_ISSET(fd, &fds)) {
+ watch_paths = xs_read_watch(xsh, &len);
+ if (!strcmp(watch_paths[XS_WATCH_TOKEN], "conn-watch")) {
+ dom_id = -1;
+ export_id = -1;
+ d = 0;
+ FS_DEBUG("Path changed %s\n", watch_paths[0]);
+ sscanf(watch_paths[XS_WATCH_PATH], WATCH_NODE"/%d/%d/fronten%c",
+ &dom_id, &export_id, &d);
+ if((dom_id >= 0) && (export_id >= 0) && d == 'd') {
+ char *frontend = xs_read(xsh, XBT_NULL, watch_paths[XS_WATCH_PATH], NULL);
+ if (frontend) {
+ handle_connection(dom_id, export_id, frontend);
+ xs_rm(xsh, XBT_NULL, watch_paths[XS_WATCH_PATH]);
+ }
+ }
+ } else if (!strcmp(watch_paths[XS_WATCH_TOKEN], "frontend-state")) {
+ LIST_FOREACH(pointer, &mount_requests_head, entries) {
+ if (!strncmp(pointer->frontend, watch_paths[XS_WATCH_PATH], strlen(pointer->frontend))) {
+ char *state = xenbus_read_frontend_state(pointer);
+ if (!state || strcmp(state, STATE_READY)) {
+ xenbus_unwatch_frontend_state(pointer);
+ terminate_mount_request(pointer);
+ }
+ free(state);
+ break;
+ }
+ }
+ } else {
+ FS_DEBUG("xenstore watch event unrecognized\n");
+ }
+ FS_DEBUG("Awaiting next connection.\n");
+ /* TODO - we need to figure out what to free */
+ free(watch_paths);
+ }
+ if (FD_ISSET(pipefds[0], &fds)) {
+ struct fs_request *request;
+ if (read_exact(pipefds[0], &request, sizeof(struct fs_request *)) < 0)
+ err(1, "read request failed\n");
+ handle_aio_event(request);
+ }
+ LIST_FOREACH(pointer, &mount_requests_head, entries) {
+ if (FD_ISSET(xc_evtchn_fd(pointer->evth), &fds)) {
+ evtchn_port_t port;
+ port = xc_evtchn_pending(pointer->evth);
+ if (port != -1) {
+ handle_mount(pointer);
+ xc_evtchn_unmask(pointer->evth, port);
+ }
+ }
+ }
} while (1);
}
return curr_export;
}
+static void aio_signal_handler(int signo, siginfo_t *info, void *context)
+{
+ struct fs_request *request = (struct fs_request*) info->si_value.sival_ptr;
+ int saved_errno = errno;
+ if (write_exact(pipefds[1], &request, sizeof(struct fs_request *)) < 0)
+ err(1, "write request filed\n");
+ errno = saved_errno;
+}
int main(void)
{
struct fs_export *export;
+ struct sigaction act;
+ sigset_t enable;
+
+ sigemptyset(&enable);
+ sigaddset(&enable, SIGUSR2);
+ pthread_sigmask(SIG_UNBLOCK, &enable, NULL);
+
+ sigfillset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO; /* do not restart syscalls to interrupt select(); use sa_sigaction */
+ act.sa_sigaction = aio_signal_handler;
+ sigaction(SIGUSR2, &act, NULL);
/* Open the connection to XenStore first */
xsh = xs_domain_open();
export = create_export("default", "/exports");
xenbus_register_export(export);
+ if (socketpair(PF_UNIX,SOCK_STREAM, 0, pipefds) == -1)
+ err(1, "failed to create pipe\n");
+
await_connections();
/* Close the connection to XenStore when we are finished with everything */
xs_daemon_close(xsh);
#include <xen/event_channel.h>
#include <xen/io/ring.h>
#include <xen/io/fsif.h>
+#include "sys-queue.h"
#define ROOT_NODE "backend/vfs"
#define EXPORTS_SUBNODE "exports"
struct fs_request
{
+ struct fs_mount *mount;
+ int id;
int active;
void *page; /* Pointer to mapped grant */
int count;
struct fs_request *requests;
unsigned short *freelist;
int fds[MAX_FDS];
+ LIST_ENTRY(fs_mount) entries;
};
int xenbus_get_watch_fd(void);
void xenbus_read_mount_request(struct fs_mount *mount, char *frontend);
void xenbus_write_backend_node(struct fs_mount *mount);
-void xenbus_write_backend_ready(struct fs_mount *mount);
+void xenbus_write_backend_state(struct fs_mount *mount, const char *state);
+int xenbus_frontend_state_changed(struct fs_mount *mount, const char *oldstate);
+void xenbus_watch_frontend_state(struct fs_mount *mount);
+void xenbus_unwatch_frontend_state(struct fs_mount *mount);
+char* xenbus_read_frontend_state(struct fs_mount *mount);
/* File operations, implemented in fs-ops.c */
struct fs_op
--- /dev/null
+#ifndef __FS_DEBUG__
+#define __FS_DEBUG__
+
+// #define DEBUG 1
+
+#ifdef DEBUG
+#define FS_DEBUG(fmt, ...) do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
+#else
+#define FS_DEBUG(fmt, ...) do { } while (0)
+#endif
+
+#endif /*__FS_DEBUG__*/
#include <sys/mount.h>
#include <unistd.h>
#include "fs-backend.h"
+#include "fs-debug.h"
/* For debugging only */
#include <sys/time.h>
#define BUFFER_SIZE 1024
-
static unsigned short get_request(struct fs_mount *mount, struct fsif_request *req)
{
unsigned short id = get_id_from_freelist(mount->freelist);
- printf("Private Request id: %d\n", id);
+ FS_DEBUG("Private Request id: %d\n", id);
memcpy(&mount->requests[id].req_shadow, req, sizeof(struct fsif_request));
mount->requests[id].active = 1;
{
char *file_name, full_path[BUFFER_SIZE];
int fd;
- struct timeval tv1, tv2;
RING_IDX rsp_idx;
fsif_response_t *rsp;
uint16_t req_id;
- printf("Dispatching file open operation (gref=%d).\n", req->u.fopen.gref);
+ FS_DEBUG("Dispatching file open operation (gref=%d).\n", req->u.fopen.gref);
/* Read the request, and open file */
file_name = xc_gnttab_map_grant_ref(mount->gnth,
mount->dom_id,
PROT_READ);
req_id = req->id;
- printf("File open issued for %s\n", file_name);
+ FS_DEBUG("File open issued for %s\n", file_name);
assert(BUFFER_SIZE >
strlen(file_name) + strlen(mount->export->export_path) + 1);
snprintf(full_path, sizeof(full_path), "%s/%s",
mount->export->export_path, file_name);
assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
- printf("Issuing open for %s\n", full_path);
+ FS_DEBUG("Issuing open for %s\n", full_path);
fd = get_fd(mount);
if (fd >= 0) {
int real_fd = open(full_path, O_RDWR);
else
{
mount->fds[fd] = real_fd;
- printf("Got FD: %d for real %d\n", fd, real_fd);
+ FS_DEBUG("Got FD: %d for real %d\n", fd, real_fd);
}
}
/* We can advance the request consumer index, from here on, the request
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)fd;
fsif_response_t *rsp;
uint16_t req_id;
- printf("Dispatching file close operation (fd=%d).\n", req->u.fclose.fd);
+ FS_DEBUG("Dispatching file close operation (fd=%d).\n", req->u.fclose.fd);
req_id = req->id;
if (req->u.fclose.fd < MAX_FDS) {
mount->fds[req->u.fclose.fd] = -1;
} else
ret = -1;
- printf("Got ret: %d\n", ret);
+ FS_DEBUG("Got ret: %d\n", ret);
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)ret;
static void dispatch_file_read(struct fs_mount *mount, struct fsif_request *req)
{
void *buf;
- int fd, i, count;
+ int fd, count;
uint16_t req_id;
unsigned short priv_id;
struct fs_request *priv_req;
PROT_WRITE);
req_id = req->id;
- printf("File read issued for FD=%d (len=%"PRIu64", offest=%"PRIu64")\n",
+ FS_DEBUG("File read issued for FD=%d (len=%"PRIu64", offest=%"PRIu64")\n",
req->u.fread.fd, req->u.fread.len, req->u.fread.offset);
if (req->u.fread.fd < MAX_FDS)
fd = -1;
priv_id = get_request(mount, req);
- printf("Private id is: %d\n", priv_id);
+ FS_DEBUG("Private id is: %d\n", priv_id);
priv_req = &mount->requests[priv_id];
priv_req->page = buf;
priv_req->count = count;
+ priv_req->id = priv_id;
/* Dispatch AIO read request */
bzero(&priv_req->aiocb, sizeof(struct aiocb));
priv_req->aiocb.aio_nbytes = req->u.fread.len;
priv_req->aiocb.aio_offset = req->u.fread.offset;
priv_req->aiocb.aio_buf = buf;
+ priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+ priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
+ priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
assert(aio_read(&priv_req->aiocb) >= 0);
-out:
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
req_id = priv_req->req_shadow.id;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)aio_return(&priv_req->aiocb);
static void dispatch_file_write(struct fs_mount *mount, struct fsif_request *req)
{
void *buf;
- int fd, count, i;
+ int fd, count;
uint16_t req_id;
unsigned short priv_id;
struct fs_request *priv_req;
PROT_READ);
req_id = req->id;
- printf("File write issued for FD=%d (len=%"PRIu64", offest=%"PRIu64")\n",
+ FS_DEBUG("File write issued for FD=%d (len=%"PRIu64", offest=%"PRIu64")\n",
req->u.fwrite.fd, req->u.fwrite.len, req->u.fwrite.offset);
if (req->u.fwrite.fd < MAX_FDS)
fd = -1;
priv_id = get_request(mount, req);
- printf("Private id is: %d\n", priv_id);
+ FS_DEBUG("Private id is: %d\n", priv_id);
priv_req = &mount->requests[priv_id];
priv_req->page = buf;
priv_req->count = count;
+ priv_req->id = priv_id;
/* Dispatch AIO write request */
bzero(&priv_req->aiocb, sizeof(struct aiocb));
priv_req->aiocb.aio_nbytes = req->u.fwrite.len;
priv_req->aiocb.aio_offset = req->u.fwrite.offset;
priv_req->aiocb.aio_buf = buf;
+ priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+ priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
+ priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
assert(aio_write(&priv_req->aiocb) >= 0);
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
req_id = priv_req->req_shadow.id;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)aio_return(&priv_req->aiocb);
static void dispatch_stat(struct fs_mount *mount, struct fsif_request *req)
{
- struct fsif_stat_response *buf;
struct stat stat;
int fd, ret;
uint16_t req_id;
else
fd = -1;
- printf("File stat issued for FD=%d\n", req->u.fstat.fd);
+ FS_DEBUG("File stat issued for FD=%d\n", req->u.fstat.fd);
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
/* Stat, and create the response */
ret = fstat(fd, &stat);
- printf("Mode=%o, uid=%d, a_time=%ld\n",
+ FS_DEBUG("Mode=%o, uid=%d, a_time=%ld\n",
stat.st_mode, stat.st_uid, (long)stat.st_atime);
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->fstat.stat_ret = (uint32_t)ret;
req_id = req->id;
length = req->u.ftruncate.length;
- printf("File truncate issued for FD=%d, length=%"PRId64"\n", req->u.ftruncate.fd, length);
+ FS_DEBUG("File truncate issued for FD=%d, length=%"PRId64"\n", req->u.ftruncate.fd, length);
if (req->u.ftruncate.fd < MAX_FDS)
fd = mount->fds[req->u.ftruncate.fd];
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)ret;
fsif_response_t *rsp;
uint16_t req_id;
- printf("Dispatching remove operation (gref=%d).\n", req->u.fremove.gref);
+ FS_DEBUG("Dispatching remove operation (gref=%d).\n", req->u.fremove.gref);
/* Read the request, and open file */
file_name = xc_gnttab_map_grant_ref(mount->gnth,
mount->dom_id,
PROT_READ);
req_id = req->id;
- printf("File remove issued for %s\n", file_name);
+ FS_DEBUG("File remove issued for %s\n", file_name);
assert(BUFFER_SIZE >
strlen(file_name) + strlen(mount->export->export_path) + 1);
snprintf(full_path, sizeof(full_path), "%s/%s",
mount->export->export_path, file_name);
assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
- printf("Issuing remove for %s\n", full_path);
+ FS_DEBUG("Issuing remove for %s\n", full_path);
ret = remove(full_path);
- printf("Got ret: %d\n", ret);
+ FS_DEBUG("Got ret: %d\n", ret);
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)ret;
fsif_response_t *rsp;
uint16_t req_id;
- printf("Dispatching rename operation (gref=%d).\n", req->u.fremove.gref);
+ FS_DEBUG("Dispatching rename operation (gref=%d).\n", req->u.fremove.gref);
/* Read the request, and open file */
buf = xc_gnttab_map_grant_ref(mount->gnth,
mount->dom_id,
req_id = req->id;
old_file_name = buf + req->u.frename.old_name_offset;
new_file_name = buf + req->u.frename.new_name_offset;
- printf("File rename issued for %s -> %s (buf=%s)\n",
+ FS_DEBUG("File rename issued for %s -> %s (buf=%s)\n",
old_file_name, new_file_name, buf);
assert(BUFFER_SIZE >
strlen(old_file_name) + strlen(mount->export->export_path) + 1);
snprintf(new_full_path, sizeof(new_full_path), "%s/%s",
mount->export->export_path, new_file_name);
assert(xc_gnttab_munmap(mount->gnth, buf, 1) == 0);
- printf("Issuing rename for %s -> %s\n", old_full_path, new_full_path);
+ FS_DEBUG("Issuing rename for %s -> %s\n", old_full_path, new_full_path);
ret = rename(old_full_path, new_full_path);
- printf("Got ret: %d\n", ret);
+ FS_DEBUG("Got ret: %d\n", ret);
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)ret;
fsif_response_t *rsp;
uint16_t req_id;
- printf("Dispatching file create operation (gref=%d).\n", req->u.fcreate.gref);
+ FS_DEBUG("Dispatching file create operation (gref=%d).\n", req->u.fcreate.gref);
/* Read the request, and create file/directory */
mode = req->u.fcreate.mode;
directory = req->u.fcreate.directory;
PROT_READ);
req_id = req->id;
- printf("File create issued for %s\n", file_name);
+ FS_DEBUG("File create issued for %s\n", file_name);
assert(BUFFER_SIZE >
strlen(file_name) + strlen(mount->export->export_path) + 1);
snprintf(full_path, sizeof(full_path), "%s/%s",
if(directory)
{
- printf("Issuing create for directory: %s\n", full_path);
+ FS_DEBUG("Issuing create for directory: %s\n", full_path);
ret = mkdir(full_path, mode);
}
else
{
- printf("Issuing create for file: %s\n", full_path);
+ FS_DEBUG("Issuing create for file: %s\n", full_path);
ret = get_fd(mount);
if (ret >= 0) {
int real_fd = creat(full_path, mode);
else
{
mount->fds[ret] = real_fd;
- printf("Got FD: %d for real %d\n", ret, real_fd);
+ FS_DEBUG("Got FD: %d for real %d\n", ret, real_fd);
}
}
}
- printf("Got ret %d (errno=%d)\n", ret, errno);
+ FS_DEBUG("Got ret %d (errno=%d)\n", ret, errno);
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)ret;
DIR *dir;
struct dirent *dirent = NULL;
- printf("Dispatching list operation (gref=%d).\n", req->u.flist.gref);
+ FS_DEBUG("Dispatching list operation (gref=%d).\n", req->u.flist.gref);
/* Read the request, and list directory */
offset = req->u.flist.offset;
buf = file_name = xc_gnttab_map_grant_ref(mount->gnth,
PROT_READ | PROT_WRITE);
req_id = req->id;
- printf("Dir list issued for %s\n", file_name);
+ FS_DEBUG("Dir list issued for %s\n", file_name);
assert(BUFFER_SIZE >
strlen(file_name) + strlen(mount->export->export_path) + 1);
snprintf(full_path, sizeof(full_path), "%s/%s",
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = ret_val;
uint16_t req_id;
int32_t mode;
- printf("Dispatching file chmod operation (fd=%d, mode=%o).\n",
+ FS_DEBUG("Dispatching file chmod operation (fd=%d, mode=%o).\n",
req->u.fchmod.fd, req->u.fchmod.mode);
req_id = req->id;
if (req->u.fchmod.fd < MAX_FDS)
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)ret;
struct statvfs stat;
int64_t ret;
- printf("Dispatching fs space operation (gref=%d).\n", req->u.fspace.gref);
+ FS_DEBUG("Dispatching fs space operation (gref=%d).\n", req->u.fspace.gref);
/* Read the request, and open file */
file_name = xc_gnttab_map_grant_ref(mount->gnth,
mount->dom_id,
PROT_READ);
req_id = req->id;
- printf("Fs space issued for %s\n", file_name);
+ FS_DEBUG("Fs space issued for %s\n", file_name);
assert(BUFFER_SIZE >
strlen(file_name) + strlen(mount->export->export_path) + 1);
snprintf(full_path, sizeof(full_path), "%s/%s",
mount->export->export_path, file_name);
assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
- printf("Issuing fs space for %s\n", full_path);
+ FS_DEBUG("Issuing fs space for %s\n", full_path);
ret = statvfs(full_path, &stat);
if(ret >= 0)
ret = stat.f_bsize * stat.f_bfree;
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)ret;
else
fd = -1;
- printf("File sync issued for FD=%d\n", req->u.fsync.fd);
+ FS_DEBUG("File sync issued for FD=%d\n", req->u.fsync.fd);
priv_id = get_request(mount, req);
- printf("Private id is: %d\n", priv_id);
+ FS_DEBUG("Private id is: %d\n", priv_id);
priv_req = &mount->requests[priv_id];
+ priv_req->id = priv_id;
/* Dispatch AIO read request */
bzero(&priv_req->aiocb, sizeof(struct aiocb));
priv_req->aiocb.aio_fildes = fd;
+ priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
+ priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
+ priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
assert(aio_fsync(O_SYNC, &priv_req->aiocb) >= 0);
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
req_id = priv_req->req_shadow.id;
- printf("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
+ FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
rsp = RING_GET_RESPONSE(&mount->ring, rsp_idx);
rsp->id = req_id;
rsp->ret_val = (uint64_t)aio_return(&priv_req->aiocb);
#include <stdarg.h>
#include <string.h>
#include <assert.h>
+#include <sys/select.h>
#include <xenctrl.h>
#include <xs.h>
#include <xen/io/fsif.h>
#include "fs-backend.h"
+#include "fs-debug.h"
static bool xenbus_printf(struct xs_handle *xsh,
snprintf(fullpath, sizeof(fullpath), "%s/%s", node, path);
vsnprintf(val, sizeof(val), fmt, args);
va_end(args);
- printf("xenbus_printf (%s) <= %s.\n", fullpath, val);
+ FS_DEBUG("xenbus_printf (%s) <= %s.\n", fullpath, val);
return xs_write(xsh, xbt, fullpath, val, strlen(val));
}
assert(xsh != NULL);
if(xsh == NULL)
{
- printf("Could not open connection to xenbus deamon.\n");
+ FS_DEBUG("Could not open connection to xenbus deamon.\n");
goto error_exit;
}
- printf("Connection to the xenbus deamon opened successfully.\n");
+ FS_DEBUG("Connection to the xenbus deamon opened successfully.\n");
/* Start transaction */
xst = xs_transaction_start(xsh);
if(xst == 0)
{
- printf("Could not start a transaction.\n");
+ FS_DEBUG("Could not start a transaction.\n");
goto error_exit;
}
- printf("XS transaction is %d\n", xst);
+ FS_DEBUG("XS transaction is %d\n", xst);
/* Create node string */
snprintf(node, sizeof(node), "%s/%d", EXPORTS_NODE, export->export_id);
if(!xenbus_printf(xsh, xst, node, "name", "%s", export->name))
{
- printf("Could not write the export node.\n");
+ FS_DEBUG("Could not write the export node.\n");
goto error_exit;
}
perms.perms = XS_PERM_READ;
if(!xs_set_permissions(xsh, xst, EXPORTS_NODE, &perms, 1))
{
- printf("Could not set permissions on the export node.\n");
+ FS_DEBUG("Could not set permissions on the export node.\n");
goto error_exit;
}
assert(xsh != NULL);
self_id = get_self_id();
- printf("Our own dom_id=%d\n", self_id);
+ FS_DEBUG("Our own dom_id=%d\n", self_id);
snprintf(node, sizeof(node), "%s/backend", mount->frontend);
snprintf(backend_node, sizeof(backend_node), "/local/domain/%d/"ROOT_NODE"/%d",
self_id, mount->mount_id);
xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, strlen(STATE_INITIALISED));
}
-void xenbus_write_backend_ready(struct fs_mount *mount)
+void xenbus_write_backend_state(struct fs_mount *mount, const char *state)
{
char node[1024];
int self_id;
assert(xsh != NULL);
self_id = get_self_id();
snprintf(node, sizeof(node), ROOT_NODE"/%d/state", mount->mount_id);
- xs_write(xsh, XBT_NULL, node, STATE_READY, strlen(STATE_READY));
+ xs_write(xsh, XBT_NULL, node, state, strlen(state));
+}
+
+void xenbus_watch_frontend_state(struct fs_mount *mount)
+{
+ int res;
+ char statepath[1024];
+
+ assert(xsh != NULL);
+ snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend);
+ res = xs_watch(xsh, statepath, "frontend-state");
+ assert(res);
+}
+
+void xenbus_unwatch_frontend_state(struct fs_mount *mount)
+{
+ int res;
+ char statepath[1024];
+
+ assert(xsh != NULL);
+ snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend);
+ res = xs_unwatch(xsh, statepath, "frontend-state");
+ assert(res);
+}
+
+int xenbus_frontend_state_changed(struct fs_mount *mount, const char *oldstate)
+{
+ unsigned int len;
+ char statepath[1024];
+ char *state = NULL;
+
+ assert(xsh != NULL);
+ snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend);
+ state = xs_read(xsh, XBT_NULL, statepath, &len);
+ if (state && len > 0) {
+ if (strcmp(state, oldstate)) {
+ free(state);
+ return 1;
+ } else {
+ free(state);
+ return 0;
+ }
+ } else
+ return 1;
+}
+
+char* xenbus_read_frontend_state(struct fs_mount *mount)
+{
+ unsigned int len;
+ char statepath[1024];
+
+ assert(xsh != NULL);
+ snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend);
+ return xs_read(xsh, XBT_NULL, statepath, &len);
}
--- /dev/null
+/* $NetBSD: queue.h,v 1.45.14.1 2007/07/18 20:13:24 liamjfoy Exp $ */
+
+/*
+ * Qemu version: Copy from netbsd, removed debug code, removed some of
+ * the implementations. Left in lists, tail queues and circular queues.
+ */
+
+/*
+ * Copyright (c) 1991, 1993
+ * The Regents of the University of California. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * @(#)queue.h 8.5 (Berkeley) 8/20/94
+ */
+
+#ifndef _SYS_QUEUE_H_
+#define _SYS_QUEUE_H_
+
+/*
+ * This file defines three types of data structures:
+ * lists, tail queues, and circular queues.
+ *
+ * A list is headed by a single forward pointer (or an array of forward
+ * pointers for a hash table header). The elements are doubly linked
+ * so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before
+ * or after an existing element or at the head of the list. A list
+ * may only be traversed in the forward direction.
+ *
+ * A tail queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or
+ * after an existing element, at the head of the list, or at the end of
+ * the list. A tail queue may be traversed in either direction.
+ *
+ * A circle queue is headed by a pair of pointers, one to the head of the
+ * list and the other to the tail of the list. The elements are doubly
+ * linked so that an arbitrary element can be removed without a need to
+ * traverse the list. New elements can be added to the list before or after
+ * an existing element, at the head of the list, or at the end of the list.
+ * A circle queue may be traversed in either direction, but has a more
+ * complex end of list detection.
+ *
+ * For details on the use of these macros, see the queue(3) manual page.
+ */
+
+/*
+ * List definitions.
+ */
+#define LIST_HEAD(name, type) \
+struct name { \
+ struct type *lh_first; /* first element */ \
+}
+
+#define LIST_HEAD_INITIALIZER(head) \
+ { NULL }
+
+#define LIST_ENTRY(type) \
+struct { \
+ struct type *le_next; /* next element */ \
+ struct type **le_prev; /* address of previous next element */ \
+}
+
+/*
+ * List functions.
+ */
+#define LIST_INIT(head) do { \
+ (head)->lh_first = NULL; \
+} while (/*CONSTCOND*/0)
+
+#define LIST_INSERT_AFTER(listelm, elm, field) do { \
+ if (((elm)->field.le_next = (listelm)->field.le_next) != NULL) \
+ (listelm)->field.le_next->field.le_prev = \
+ &(elm)->field.le_next; \
+ (listelm)->field.le_next = (elm); \
+ (elm)->field.le_prev = &(listelm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define LIST_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.le_prev = (listelm)->field.le_prev; \
+ (elm)->field.le_next = (listelm); \
+ *(listelm)->field.le_prev = (elm); \
+ (listelm)->field.le_prev = &(elm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define LIST_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.le_next = (head)->lh_first) != NULL) \
+ (head)->lh_first->field.le_prev = &(elm)->field.le_next;\
+ (head)->lh_first = (elm); \
+ (elm)->field.le_prev = &(head)->lh_first; \
+} while (/*CONSTCOND*/0)
+
+#define LIST_REMOVE(elm, field) do { \
+ if ((elm)->field.le_next != NULL) \
+ (elm)->field.le_next->field.le_prev = \
+ (elm)->field.le_prev; \
+ *(elm)->field.le_prev = (elm)->field.le_next; \
+} while (/*CONSTCOND*/0)
+
+#define LIST_FOREACH(var, head, field) \
+ for ((var) = ((head)->lh_first); \
+ (var); \
+ (var) = ((var)->field.le_next))
+
+/*
+ * List access methods.
+ */
+#define LIST_EMPTY(head) ((head)->lh_first == NULL)
+#define LIST_FIRST(head) ((head)->lh_first)
+#define LIST_NEXT(elm, field) ((elm)->field.le_next)
+
+
+/*
+ * Tail queue definitions.
+ */
+#define _TAILQ_HEAD(name, type, qual) \
+struct name { \
+ qual type *tqh_first; /* first element */ \
+ qual type *qual *tqh_last; /* addr of last next element */ \
+}
+#define TAILQ_HEAD(name, type) _TAILQ_HEAD(name, struct type,)
+
+#define TAILQ_HEAD_INITIALIZER(head) \
+ { NULL, &(head).tqh_first }
+
+#define _TAILQ_ENTRY(type, qual) \
+struct { \
+ qual type *tqe_next; /* next element */ \
+ qual type *qual *tqe_prev; /* address of previous next element */\
+}
+#define TAILQ_ENTRY(type) _TAILQ_ENTRY(struct type,)
+
+/*
+ * Tail queue functions.
+ */
+#define TAILQ_INIT(head) do { \
+ (head)->tqh_first = NULL; \
+ (head)->tqh_last = &(head)->tqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define TAILQ_INSERT_HEAD(head, elm, field) do { \
+ if (((elm)->field.tqe_next = (head)->tqh_first) != NULL) \
+ (head)->tqh_first->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (head)->tqh_first = (elm); \
+ (elm)->field.tqe_prev = &(head)->tqh_first; \
+} while (/*CONSTCOND*/0)
+
+#define TAILQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.tqe_next = NULL; \
+ (elm)->field.tqe_prev = (head)->tqh_last; \
+ *(head)->tqh_last = (elm); \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define TAILQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ if (((elm)->field.tqe_next = (listelm)->field.tqe_next) != NULL)\
+ (elm)->field.tqe_next->field.tqe_prev = \
+ &(elm)->field.tqe_next; \
+ else \
+ (head)->tqh_last = &(elm)->field.tqe_next; \
+ (listelm)->field.tqe_next = (elm); \
+ (elm)->field.tqe_prev = &(listelm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define TAILQ_INSERT_BEFORE(listelm, elm, field) do { \
+ (elm)->field.tqe_prev = (listelm)->field.tqe_prev; \
+ (elm)->field.tqe_next = (listelm); \
+ *(listelm)->field.tqe_prev = (elm); \
+ (listelm)->field.tqe_prev = &(elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define TAILQ_REMOVE(head, elm, field) do { \
+ if (((elm)->field.tqe_next) != NULL) \
+ (elm)->field.tqe_next->field.tqe_prev = \
+ (elm)->field.tqe_prev; \
+ else \
+ (head)->tqh_last = (elm)->field.tqe_prev; \
+ *(elm)->field.tqe_prev = (elm)->field.tqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define TAILQ_FOREACH(var, head, field) \
+ for ((var) = ((head)->tqh_first); \
+ (var); \
+ (var) = ((var)->field.tqe_next))
+
+#define TAILQ_FOREACH_REVERSE(var, head, headname, field) \
+ for ((var) = (*(((struct headname *)((head)->tqh_last))->tqh_last)); \
+ (var); \
+ (var) = (*(((struct headname *)((var)->field.tqe_prev))->tqh_last)))
+
+/*
+ * Tail queue access methods.
+ */
+#define TAILQ_EMPTY(head) ((head)->tqh_first == NULL)
+#define TAILQ_FIRST(head) ((head)->tqh_first)
+#define TAILQ_NEXT(elm, field) ((elm)->field.tqe_next)
+
+#define TAILQ_LAST(head, headname) \
+ (*(((struct headname *)((head)->tqh_last))->tqh_last))
+#define TAILQ_PREV(elm, headname, field) \
+ (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
+
+
+/*
+ * Circular queue definitions.
+ */
+#define CIRCLEQ_HEAD(name, type) \
+struct name { \
+ struct type *cqh_first; /* first element */ \
+ struct type *cqh_last; /* last element */ \
+}
+
+#define CIRCLEQ_HEAD_INITIALIZER(head) \
+ { (void *)&head, (void *)&head }
+
+#define CIRCLEQ_ENTRY(type) \
+struct { \
+ struct type *cqe_next; /* next element */ \
+ struct type *cqe_prev; /* previous element */ \
+}
+
+/*
+ * Circular queue functions.
+ */
+#define CIRCLEQ_INIT(head) do { \
+ (head)->cqh_first = (void *)(head); \
+ (head)->cqh_last = (void *)(head); \
+} while (/*CONSTCOND*/0)
+
+#define CIRCLEQ_INSERT_AFTER(head, listelm, elm, field) do { \
+ (elm)->field.cqe_next = (listelm)->field.cqe_next; \
+ (elm)->field.cqe_prev = (listelm); \
+ if ((listelm)->field.cqe_next == (void *)(head)) \
+ (head)->cqh_last = (elm); \
+ else \
+ (listelm)->field.cqe_next->field.cqe_prev = (elm); \
+ (listelm)->field.cqe_next = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define CIRCLEQ_INSERT_BEFORE(head, listelm, elm, field) do { \
+ (elm)->field.cqe_next = (listelm); \
+ (elm)->field.cqe_prev = (listelm)->field.cqe_prev; \
+ if ((listelm)->field.cqe_prev == (void *)(head)) \
+ (head)->cqh_first = (elm); \
+ else \
+ (listelm)->field.cqe_prev->field.cqe_next = (elm); \
+ (listelm)->field.cqe_prev = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define CIRCLEQ_INSERT_HEAD(head, elm, field) do { \
+ (elm)->field.cqe_next = (head)->cqh_first; \
+ (elm)->field.cqe_prev = (void *)(head); \
+ if ((head)->cqh_last == (void *)(head)) \
+ (head)->cqh_last = (elm); \
+ else \
+ (head)->cqh_first->field.cqe_prev = (elm); \
+ (head)->cqh_first = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define CIRCLEQ_INSERT_TAIL(head, elm, field) do { \
+ (elm)->field.cqe_next = (void *)(head); \
+ (elm)->field.cqe_prev = (head)->cqh_last; \
+ if ((head)->cqh_first == (void *)(head)) \
+ (head)->cqh_first = (elm); \
+ else \
+ (head)->cqh_last->field.cqe_next = (elm); \
+ (head)->cqh_last = (elm); \
+} while (/*CONSTCOND*/0)
+
+#define CIRCLEQ_REMOVE(head, elm, field) do { \
+ if ((elm)->field.cqe_next == (void *)(head)) \
+ (head)->cqh_last = (elm)->field.cqe_prev; \
+ else \
+ (elm)->field.cqe_next->field.cqe_prev = \
+ (elm)->field.cqe_prev; \
+ if ((elm)->field.cqe_prev == (void *)(head)) \
+ (head)->cqh_first = (elm)->field.cqe_next; \
+ else \
+ (elm)->field.cqe_prev->field.cqe_next = \
+ (elm)->field.cqe_next; \
+} while (/*CONSTCOND*/0)
+
+#define CIRCLEQ_FOREACH(var, head, field) \
+ for ((var) = ((head)->cqh_first); \
+ (var) != (const void *)(head); \
+ (var) = ((var)->field.cqe_next))
+
+#define CIRCLEQ_FOREACH_REVERSE(var, head, field) \
+ for ((var) = ((head)->cqh_last); \
+ (var) != (const void *)(head); \
+ (var) = ((var)->field.cqe_prev))
+
+/*
+ * Circular queue access methods.
+ */
+#define CIRCLEQ_EMPTY(head) ((head)->cqh_first == (void *)(head))
+#define CIRCLEQ_FIRST(head) ((head)->cqh_first)
+#define CIRCLEQ_LAST(head) ((head)->cqh_last)
+#define CIRCLEQ_NEXT(elm, field) ((elm)->field.cqe_next)
+#define CIRCLEQ_PREV(elm, field) ((elm)->field.cqe_prev)
+
+#define CIRCLEQ_LOOP_NEXT(head, elm, field) \
+ (((elm)->field.cqe_next == (void *)(head)) \
+ ? ((head)->cqh_first) \
+ : (elm->field.cqe_next))
+#define CIRCLEQ_LOOP_PREV(head, elm, field) \
+ (((elm)->field.cqe_prev == (void *)(head)) \
+ ? ((head)->cqh_last) \
+ : (elm->field.cqe_prev))
+
+#endif /* !_SYS_QUEUE_H_ */
--- /dev/null
+XEN_ROOT = ../../../
+include $(XEN_ROOT)/tools/Rules.mk
+
+# Init scripts.
+XEND_INITD = init.d/xend
+XENDOMAINS_INITD = init.d/xendomains
+XENDOMAINS_SYSCONFIG = init.d/sysconfig.xendomains
+
+# Xen configuration dir and configs to go there.
+XEN_CONFIG_DIR = /etc/xen
+
+# Xen script dir and scripts to go there.
+XEN_SCRIPT_DIR = /etc/xen/scripts
+XEN_SCRIPTS = network-bridge vif-bridge
+XEN_SCRIPTS += network-route vif-route
+XEN_SCRIPTS += network-nat vif-nat
+XEN_SCRIPTS += block
+XEN_SCRIPTS += block-enbd block-nbd
+XEN_SCRIPTS += vtpm vtpm-delete
+XEN_SCRIPTS += xen-hotplug-cleanup
+XEN_SCRIPTS += external-device-migrate
+XEN_SCRIPTS += vscsi
+XEN_SCRIPT_DATA = xen-script-common.sh locking.sh logging.sh
+XEN_SCRIPT_DATA += xen-hotplug-common.sh xen-network-common.sh vif-common.sh
+XEN_SCRIPT_DATA += block-common.sh vtpm-common.sh vtpm-hotplug-common.sh
+XEN_SCRIPT_DATA += vtpm-migration.sh vtpm-impl
+
+XEN_HOTPLUG_DIR = /etc/hotplug
+XEN_HOTPLUG_SCRIPTS = xen-backend.agent
+
+UDEV_RULES_DIR = /etc/udev
+UDEV_RULES = xen-backend.rules xend.rules
+
+DI = $(if $(DISTDIR),$(shell readlink -f $(DISTDIR)),)
+DE = $(if $(DESTDIR),$(shell readlink -f $(DESTDIR)),)
+ifeq ($(findstring $(DI),$(DE)),$(DI))
+HOTPLUGS=install-hotplug install-udev
+else
+ifeq ($(shell [ -x /usr/bin/udevinfo ] && [ `/usr/bin/udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/'` -ge 059 ] && echo 1),1)
+HOTPLUGS=install-udev
+else
+HOTPLUGS=install-hotplug
+endif
+endif
+
+.PHONY: all
+all:
+
+.PHONY: build
+build:
+
+.PHONY: install
+install: all install-initd install-scripts $(HOTPLUGS)
+
+.PHONY: install-initd
+install-initd:
+ [ -d $(DESTDIR)/etc/init.d ] || $(INSTALL_DIR) $(DESTDIR)/etc/init.d
+ [ -d $(DESTDIR)/etc/sysconfig ] || $(INSTALL_DIR) $(DESTDIR)/etc/sysconfig
+ $(INSTALL_PROG) $(XEND_INITD) $(DESTDIR)/etc/init.d
+ $(INSTALL_PROG) $(XENDOMAINS_INITD) $(DESTDIR)/etc/init.d
+ $(INSTALL_PROG) $(XENDOMAINS_SYSCONFIG) $(DESTDIR)/etc/sysconfig/xendomains
+
+.PHONY: install-scripts
+install-scripts:
+ [ -d $(DESTDIR)$(XEN_SCRIPT_DIR) ] || \
+ $(INSTALL_DIR) $(DESTDIR)$(XEN_SCRIPT_DIR)
+ set -e; for i in $(XEN_SCRIPTS); \
+ do \
+ $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \
+ done
+ set -e; for i in $(XEN_SCRIPT_DATA); \
+ do \
+ $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \
+ done
+
+.PHONY: install-hotplug
+install-hotplug:
+ [ -d $(DESTDIR)$(XEN_HOTPLUG_DIR) ] || \
+ $(INSTALL_DIR) $(DESTDIR)$(XEN_HOTPLUG_DIR)
+ set -e; for i in $(XEN_HOTPLUG_SCRIPTS); \
+ do \
+ $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_HOTPLUG_DIR); \
+ done
+
+.PHONY: install-udev
+install-udev:
+ [ -d $(DESTDIR)$(UDEV_RULES_DIR) ] || \
+ $(INSTALL_DIR) $(DESTDIR)$(UDEV_RULES_DIR)/rules.d
+ set -e; for i in $(UDEV_RULES); \
+ do \
+ $(INSTALL_DATA) $$i $(DESTDIR)$(UDEV_RULES_DIR); \
+ ln -sf ../$$i $(DESTDIR)$(UDEV_RULES_DIR)/rules.d; \
+ done
+
+.PHONY: clean
+clean:
--- /dev/null
+#!/bin/bash
+
+dir=$(dirname "$0")
+. "$dir/block-common.sh"
+
+expand_dev() {
+ local dev
+ case $1 in
+ /*)
+ dev=$1
+ ;;
+ *)
+ dev=/dev/$1
+ ;;
+ esac
+ echo -n $dev
+}
+
+
+##
+# check_sharing device mode
+#
+# Check whether the device requested is already in use. To use the device in
+# read-only mode, it may be in use in read-only mode, but may not be in use in
+# read-write anywhere at all. To use the device in read-write mode, it must
+# not be in use anywhere at all.
+#
+# Prints one of
+#
+# 'local': the device may not be used because it is mounted in the current
+# (i.e. the privileged domain) in a way incompatible with the
+# requested mode;
+# 'guest': the device may not be used because it already mounted by a guest
+# in a way incompatible with the requested mode; or
+# 'ok': the device may be used.
+#
+check_sharing()
+{
+ local dev="$1"
+ local mode="$2"
+
+ local devmm=$(device_major_minor "$dev")
+ local file
+
+ if [ "$mode" = 'w' ]
+ then
+ toskip="^$"
+ else
+ toskip="^[^ ]* [^ ]* [^ ]* ro[, ]"
+ fi
+
+ for file in $(cat /proc/mounts | grep -v "$toskip" | cut -f 1 -d ' ')
+ do
+ if [ -e "$file" ]
+ then
+ local d=$(device_major_minor "$file")
+
+ if [ "$d" = "$devmm" ]
+ then
+ echo 'local'
+ return
+ fi
+ fi
+ done
+
+ local base_path="$XENBUS_BASE_PATH/$XENBUS_TYPE"
+ for dom in $(xenstore-list "$base_path")
+ do
+ for dev in $(xenstore-list "$base_path/$dom")
+ do
+ d=$(xenstore_read_default "$base_path/$dom/$dev/physical-device" "")
+
+ if [ "$d" = "$devmm" ]
+ then
+ if [ "$mode" = 'w' ]
+ then
+ if ! same_vm $dom
+ then
+ echo 'guest'
+ return
+ fi
+ else
+ local m=$(xenstore_read "$base_path/$dom/$dev/mode")
+ m=$(canonicalise_mode "$m")
+
+ if [ "$m" = 'w' ]
+ then
+ if ! same_vm $dom
+ then
+ echo 'guest'
+ return
+ fi
+ fi
+ fi
+ fi
+ done
+ done
+
+ echo 'ok'
+}
+
+
+##
+# check_device_sharing dev mode
+#
+# Perform the sharing check for the given physical device and mode.
+#
+check_device_sharing()
+{
+ local dev="$1"
+ local mode=$(canonicalise_mode "$2")
+ local result
+
+ if [ "x$mode" = 'x!' ]
+ then
+ return 0
+ fi
+
+ result=$(check_sharing "$dev" "$mode")
+
+ if [ "$result" != 'ok' ]
+ then
+ do_ebusy "Device $dev is mounted " "$mode" "$result"
+ fi
+}
+
+
+##
+# check_device_sharing file dev mode
+#
+# Perform the sharing check for the given file mounted through the given
+# loopback interface, in the given mode.
+#
+check_file_sharing()
+{
+ local file="$1"
+ local dev="$2"
+ local mode="$3"
+
+ result=$(check_sharing "$dev" "$mode")
+
+ if [ "$result" != 'ok' ]
+ then
+ do_ebusy "File $file is loopback-mounted through $dev,
+which is mounted " "$mode" "$result"
+ fi
+}
+
+
+##
+# do_ebusy prefix mode result
+#
+# Helper function for check_device_sharing check_file_sharing, calling ebusy
+# with an error message constructed from the given prefix, mode, and result
+# from a call to check_sharing.
+#
+do_ebusy()
+{
+ local prefix="$1"
+ local mode="$2"
+ local result="$3"
+
+ if [ "$result" = 'guest' ]
+ then
+ dom='a guest '
+ when='now'
+ else
+ dom='the privileged '
+ when='by a guest'
+ fi
+
+ if [ "$mode" = 'w' ]
+ then
+ m1=''
+ m2=''
+ else
+ m1='read-write '
+ m2='read-only '
+ fi
+
+ release_lock "block"
+ ebusy \
+"${prefix}${m1}in ${dom}domain,
+and so cannot be mounted ${m2}${when}."
+}
+
+
+t=$(xenstore_read_default "$XENBUS_PATH/type" 'MISSING')
+
+case "$command" in
+ add)
+ phys=$(xenstore_read_default "$XENBUS_PATH/physical-device" 'MISSING')
+ if [ "$phys" != 'MISSING' ]
+ then
+ # Depending upon the hotplug configuration, it is possible for this
+ # script to be called twice, so just bail.
+ exit 0
+ fi
+
+ if [ -n "$t" ]
+ then
+ p=$(xenstore_read "$XENBUS_PATH/params")
+ mode=$(xenstore_read "$XENBUS_PATH/mode")
+ fi
+
+ case $t in
+ phy)
+ dev=$(expand_dev $p)
+ FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id")
+ FRONTEND_UUID=$(xenstore_read_default \
+ "/local/domain/$FRONTEND_ID/vm" 'unknown')
+
+ if [ -L "$dev" ]
+ then
+ dev=$(readlink -f "$dev") || fatal "$dev link does not exist."
+ fi
+ test -e "$dev" || fatal "$dev does not exist."
+ test -b "$dev" || fatal "$dev is not a block device."
+
+ claim_lock "block"
+ check_device_sharing "$dev" "$mode"
+ write_dev "$dev"
+ release_lock "block"
+ exit 0
+ ;;
+
+ file)
+ # Canonicalise the file, for sharing check comparison, and the mode
+ # for ease of use here.
+ file=$(readlink -f "$p") || fatal "$p does not exist."
+ test -f "$file" || fatal "$file does not exist."
+ mode=$(canonicalise_mode "$mode")
+
+ claim_lock "block"
+
+ if [ "$mode" = 'w' ] && ! stat "$file" -c %A | grep -q w
+ then
+ release_lock "block"
+ ebusy \
+"File $file is read-only, and so I will not
+mount it read-write in a guest domain."
+ fi
+
+ loopdev=''
+ for dev in /dev/loop*
+ do
+ if [ ! -b "$dev" ]
+ then
+ continue
+ fi
+
+ f=$(losetup "$dev" 2>/dev/null) || f=''
+
+ if [ "$f" ]
+ then
+ # $dev is in use. Check sharing.
+ if [ "x$mode" = 'x!' ]
+ then
+ continue
+ fi
+
+ f=$(echo "$f" | sed -e 's/.*(\(.*\)).*/\1/g')
+
+ # $f is the filename, as read from losetup, but the loopback
+ # driver truncates filenames at 64 characters, so we need to go
+ # trawling through the store if it's longer than that. Truncation
+ # is indicated by an asterisk at the end of the filename.
+ if expr index "$f" '*' >/dev/null
+ then
+ found=""
+ for dom in $(xenstore-list "$XENBUS_BASE_PATH")
+ do
+ for domdev in $(xenstore-list "$XENBUS_BASE_PATH/$dom")
+ do
+ d=$(xenstore_read_default \
+ "$XENBUS_BASE_PATH/$dom/$domdev/node" "")
+ if [ "$d" = "$dev" ]
+ then
+ f=$(xenstore_read "$XENBUS_BASE_PATH/$dom/$domdev/params")
+ found=1
+ break 2
+ fi
+ done
+ done
+
+ if [ ! "$found" ]
+ then
+ # This loopback device is in use by someone else, so skip it.
+ log debug "Loopback sharing check skips device $dev."
+ continue
+ fi
+ fi
+
+ # Canonicalise the filename for the comparison.
+
+ # I have seen this readlink fails because the filename given by
+ # losetup is only the basename. This cannot happen when the loop
+ # device is set up through this script, because file is
+ # canonicalised above, but it may happen when loop devices are set
+ # up some other way. This readlink may also conceivably fail if
+ # the file backing this loop device has been removed.
+
+ # For maximum safety, in the case that $f does not resolve, we
+ # assume that $file and $f are in the same directory.
+
+ # If you create a loopback filesystem, remove it and continue to
+ # run on it, and then create another file with the same name, then
+ # this check will block that -- don't do that.
+
+ # If you create loop devices through some other mechanism, use
+ # relative filenames, and then use the same filename through this
+ # script, then this check will block that -- don't do that either.
+
+ f=$(readlink -f "$f" || echo $(dirname "$file")/$(basename "$f"))
+
+
+ if [ "$f" = "$file" ]
+ then
+ check_file_sharing "$file" "$dev" "$mode"
+ fi
+ else
+ # $dev is not in use, so we'll remember it for use later; we want
+ # to finish the sharing check first.
+
+ if [ "$loopdev" = '' ]
+ then
+ loopdev="$dev"
+ fi
+ fi
+ done
+
+ if [ "$loopdev" = '' ]
+ then
+ release_lock "block"
+ fatal 'Failed to find an unused loop device'
+ fi
+
+ if LANG=C losetup -h 2>&1 | grep read-only >/dev/null
+ then
+ roflag="-$mode"; roflag="${roflag#-w}"; roflag="${roflag#-!}"
+ else
+ roflag=''
+ fi
+ do_or_die losetup $roflag "$loopdev" "$file"
+ xenstore_write "$XENBUS_PATH/node" "$loopdev"
+ write_dev "$loopdev"
+ release_lock "block"
+ exit 0
+ ;;
+
+ "")
+ claim_lock "block"
+ success
+ release_lock "block"
+ ;;
+ esac
+ ;;
+
+ remove)
+ case $t in
+ phy)
+ exit 0
+ ;;
+
+ file)
+ node=$(xenstore_read "$XENBUS_PATH/node")
+ losetup -d "$node"
+ exit 0
+ ;;
+
+ "")
+ exit 0
+ ;;
+ esac
+ ;;
+
+esac
+
+# If we've reached here, $t is neither phy nor file, so fire a helper script.
+[ -x /etc/xen/scripts/block-"$t" ] && \
+ /etc/xen/scripts/block-"$t" "$command" $node
--- /dev/null
+#
+# Copyright (c) 2005 XenSource Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+
+dir=$(dirname "$0")
+. "$dir/xen-hotplug-common.sh"
+
+findCommand "$@"
+
+if [ "$command" != "add" ] &&
+ [ "$command" != "remove" ]
+then
+ log err "Invalid command: $command"
+ exit 1
+fi
+
+
+XENBUS_PATH="${XENBUS_PATH:?}"
+
+
+ebusy()
+{
+ xenstore_write "$XENBUS_PATH/hotplug-error" "$*" \
+ "$XENBUS_PATH/hotplug-status" busy
+ log err "$@"
+ exit 1
+}
+
+
+##
+# Print the given device's major and minor numbers, written in hex and
+# separated by a colon.
+device_major_minor()
+{
+ stat -L -c %t:%T "$1"
+}
+
+
+##
+# Write physical-device = MM,mm to the store, where MM and mm are the major
+# and minor numbers of device respectively.
+#
+# @param device The device from which major and minor numbers are read, which
+# will be written into the store.
+#
+write_dev() {
+ local mm
+
+ mm=$(device_major_minor "$1")
+
+ if [ -z $mm ]
+ then
+ fatal "Backend device does not exist"
+ fi
+
+ xenstore_write "$XENBUS_PATH/physical-device" "$mm"
+
+ success
+}
+
+
+##
+# canonicalise_mode mode
+#
+# Takes the given mode, which may be r, w, ro, rw, w!, or rw!, or variations
+# thereof, and canonicalises them to one of
+#
+# 'r': perform checks for a new read-only mount;
+# 'w': perform checks for a read-write mount; or
+# '!': perform no checks at all.
+#
+canonicalise_mode()
+{
+ local mode="$1"
+
+ if ! expr index "$mode" 'w' >/dev/null
+ then
+ echo 'r'
+ elif ! expr index "$mode" '!' >/dev/null
+ then
+ echo 'w'
+ else
+ echo '!'
+ fi
+}
+
+
+same_vm()
+{
+ local otherdom="$1"
+ # Note that othervm can be MISSING here, because Xend will be racing with
+ # the hotplug scripts -- the entries in /local/domain can be removed by
+ # Xend before the hotplug scripts have removed the entry in
+ # /local/domain/0/backend/. In this case, we want to pretend that the
+ # VM is the same as FRONTEND_UUID, because that way the 'sharing' will be
+ # allowed.
+ local othervm=$(xenstore_read_default "/local/domain/$otherdom/vm" \
+ "$FRONTEND_UUID")
+
+ [ "$FRONTEND_UUID" = "$othervm" ]
+}
+
--- /dev/null
+#!/bin/bash
+
+# Usage: block-enbd [bind server ctl_port |unbind node]
+#
+# The node argument to unbind is the name of the device node we are to
+# unbind.
+#
+# This assumes you're running a correctly configured server at the other end!
+
+dir=$(dirname "$0")
+. "$dir/block-common.sh"
+
+case "$command" in
+ add)
+ for dev in /dev/nd*; do
+ if nbd-client $2:$3 $dev; then
+ write_dev $dev
+ exit 0
+ fi
+ done
+ exit 1
+ ;;
+ remove)
+ nbd-client -d $2
+ exit 0
+ ;;
+esac
--- /dev/null
+#!/bin/bash
+
+# Usage: block-nbd [bind server ctl_port |unbind node]
+#
+# The node argument to unbind is the name of the device node we are to
+# unbind.
+#
+# This assumes you're running a correctly configured server at the other end!
+
+dir=$(dirname "$0")
+. "$dir/block-common.sh"
+
+case "$command" in
+ add)
+ for dev in /dev/nbd*; do
+ if nbd-client $2 $3 $dev; then
+ write_dev $dev
+ exit 0
+ fi
+ done
+ exit 1
+ ;;
+ remove)
+ nbd-client -d $2
+ exit 0
+ ;;
+esac
--- /dev/null
+#!/bin/bash
+
+# Copyright (c) 2005 IBM Corporation
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+set -x
+
+# This script is called by XenD for migration of external devices
+# It does not handle the migration of those devices itself, but
+# passes the requests on to further applications
+# It handles the low-level command line parsing and some of the
+# synchronization
+
+dir=$(dirname "$0")
+. "$dir/logging.sh"
+
+
+function ext_dev_migrate_usage() {
+cat <<EOF
+Pass the following command line parameters to the script:
+
+-step <n> : n-th migration step
+-host <host> : the destination host
+-domname <domain name> : name of the domain that is migrating
+-type <device type> : the type of device that is migrating
+-subtype <dev. subtype>: the subtype of the device
+-recover : indicates recovery request; an error
+ occurred during migration
+-help : display this help screen
+EOF
+}
+
+# Parse the command line paramters. The following parameters must be
+# passed as the first ones in the sequence:
+# -step [required]
+# -host [required]
+# -domname [required]
+# -type [required]
+# -subtype [optional]
+# -recover [optional]
+# The remaining ones will be passed to the called function.
+function evaluate_params()
+{
+ local step host domname typ recover filename func stype
+ stype=""
+ while [ $# -ge 1 ]; do
+ case "$1" in
+ -step) step=$2; shift; shift;;
+ -host) host=$2; shift; shift;;
+ -domname) domname=$2; shift; shift;;
+ -type) typ=$2; shift; shift;;
+ -subtype) stype=$2; shift; shift;;
+ -recover) recover=1; shift;;
+ -help) ext_dev_migrate_usage; exit 0;;
+ *) break;;
+ esac
+ done
+
+ if [ "$step" = "" -o \
+ "$host" = "" -o \
+ "$typ" = "" -o \
+ "$domname" = "" ]; then
+ echo "Error: Parameter(s) missing (-step/-host/-type/-domname)" 1>&2
+ echo "" 1>&2
+ echo "$0 -help for usage." 1>&2
+ exit 1
+ fi
+
+ filename="$dir/$typ$stype-migration.sh"
+ if [ ! -r $filename ]; then
+ echo "Error: Could not find script '$filename'"
+ return
+ fi
+ . "$filename"
+
+ if [ "$recover" = "1" ]; then
+ func="$typ"_recover
+ eval $func $host $domname $step $*
+ else
+ func="$typ"_migration_step
+ eval $func $host $domname $step $*
+ fi
+}
+
+evaluate_params "$@"
--- /dev/null
+## Path: System/xen
+## Description: xen domain start/stop on boot
+## Type: string
+## Default:
+#
+# The xendomains script can send SysRq requests to domains on shutdown.
+# If you don't want to MIGRATE, SAVE, or SHUTDOWN, this may be a possibility
+# to do a quick and dirty shutdown ("s e i u o") or at least sync the disks
+# of the domains ("s").
+#
+XENDOMAINS_SYSRQ=""
+
+## Type: integer
+## Default: 100000
+#
+# If XENDOMAINS_SYSRQ is set, this variable determines how long to wait
+# (in microseconds) after each SysRq, so the domain has a chance to react.
+# If you want to a quick'n'dirty shutdown via SysRq, you may want to set
+# it to a relatively high value (1200000).
+#
+XENDOMAINS_USLEEP=100000
+
+## Type: integer
+## Default: 5000000
+#
+# When creating a guest domain, it is sensible to allow a little time for it
+# to get started before creating another domain or proceeding through the
+# boot process. Without this, the booting guests will thrash the disk as they
+# start up. This timeout (in microseconds) specifies the delay after guest
+# domain creation.
+#
+XENDOMAINS_CREATE_USLEEP=5000000
+
+## Type: string
+## Default: ""
+#
+# Set this to a non-empty string if you want to migrate virtual machines
+# on shutdown. The string will be passed to the xm migrate DOMID command
+# as is: It should contain the target IP address of the physical machine
+# to migrate to and optionally parameters like --live. Leave empty if
+# you don't want to try virtual machine relocation on shutdown.
+# If migration succeeds, neither SAVE nor SHUTDOWN will be executed for
+# that domain.
+#
+XENDOMAINS_MIGRATE=""
+
+## Type: string
+## Default: /var/lib/xen/save
+#
+# Directory to save running domains to when the system (dom0) is
+# shut down. Will also be used to restore domains from if # XENDOMAINS_RESTORE
+# is set (see below). Leave empty to disable domain saving on shutdown
+# (e.g. because you rather shut domains down).
+# If domain saving does succeed, SHUTDOWN will not be executed.
+#
+XENDOMAINS_SAVE=/var/lib/xen/save
+
+## Type: string
+## Default: "--halt --wait"
+#
+# If neither MIGRATE nor SAVE were enabled or if they failed, you can
+# try to shut down a domain by sending it a shutdown request. To do this,
+# set this to "--halt --wait". Omit the "--wait" flag to avoid waiting
+# for the domain to be really down. Leave empty to skip domain shutdown.
+#
+XENDOMAINS_SHUTDOWN="--halt --wait"
+
+## Type: string
+## Default: "--all --halt --wait"
+#
+# After we have gone over all virtual machines (resp. all automatically
+# started ones, see XENDOMAINS_AUTO_ONLY below) in a loop and sent SysRq,
+# migrated, saved and/or shutdown according to the settings above, we
+# might want to shutdown the virtual machines that are still running
+# for some reason or another. To do this, set this variable to
+# "--all --halt --wait", it will be passed to xm shutdown.
+# Leave it empty not to do anything special here.
+# (Note: This will hit all virtual machines, even if XENDOMAINS_AUTO_ONLY
+# is set.)
+#
+XENDOMAINS_SHUTDOWN_ALL="--all --halt --wait"
+
+## Type: boolean
+## Default: true
+#
+# This variable determines whether saved domains from XENDOMAINS_SAVE
+# will be restored on system startup.
+#
+XENDOMAINS_RESTORE=true
+
+## Type: string
+## Default: /etc/xen/auto
+#
+# This variable sets the directory where domains configurations
+# are stored that should be started on system startup automatically.
+# Leave empty if you don't want to start domains automatically
+# (or just don't place any xen domain config files in that dir).
+# Note that the script tries to be clever if both RESTORE and AUTO are
+# set: It will first restore saved domains and then only start domains
+# in AUTO which are not running yet.
+# Note that the name matching is somewhat fuzzy.
+#
+XENDOMAINS_AUTO=/etc/xen/auto
+
+## Type: boolean
+## Default: false
+#
+# If this variable is set to "true", only the domains started via config
+# files in XENDOMAINS_AUTO will be treated according to XENDOMAINS_SYSRQ,
+# XENDOMAINS_MIGRATE, XENDOMAINS_SAVE, XENDMAINS_SHUTDOWN; otherwise
+# all running domains will be.
+# Note that the name matching is somewhat fuzzy.
+#
+XENDOMAINS_AUTO_ONLY=false
+
+## Type: integer
+## Default: 300
+#
+# On xendomains stop, a number of xm commands (xm migrate, save, shutdown,
+# shutdown --all) may be executed. In the worst case, these commands may
+# stall forever, which will prevent a successful shutdown of the machine.
+# If this variable is non-zero, the script will set up a watchdog timer
+# for every of these xm commands and time it out after the number of seconds
+# specified by this variable.
+# Note that SHUTDOWN_ALL will not be called if no virtual machines or only
+# zombies are still running, so you don't need to enable this timeout just
+# for the zombie case.
+# The setting should be large enough to make sure that migrate/save/shutdown
+# can succeed. If you do live migrations, keep in mind that live migration
+# of a 1GB machine over Gigabit ethernet may actually take something like
+# 100s (assuming that live migration uses 10% of the network # bandwidth).
+# Depending on the virtual machine, a shutdown may also require a significant
+# amount of time. So better setup this variable to a huge number and hope the
+# watchdog never fires.
+#
+XENDOMAINS_STOP_MAXWAIT=300
+
--- /dev/null
+#!/bin/bash
+#
+# xend Script to start and stop the Xen control daemon.
+#
+# Author: Keir Fraser <keir.fraser@cl.cam.ac.uk>
+#
+# chkconfig: 2345 98 01
+# description: Starts and stops the Xen control daemon.
+### BEGIN INIT INFO
+# Provides: xend
+# Required-Start: $syslog $remote_fs
+# Should-Start:
+# Required-Stop: $syslog $remote_fs
+# Should-Stop:
+# Default-Start: 3 4 5
+# Default-Stop: 0 1 2 6
+# Default-Enabled: yes
+# Short-Description: Start/stop xend
+# Description: Starts and stops the Xen control daemon.
+### END INIT INFO
+
+if ! grep -q "control_d" /proc/xen/capabilities ; then
+ exit 0
+fi
+
+# Wait for Xend to be up
+function await_daemons_up
+{
+ i=1
+ rets=10
+ xend status
+ while [ $? -ne 0 -a $i -lt $rets ]; do
+ sleep 1
+ echo -n .
+ i=$(($i + 1))
+ xend status
+ done
+}
+
+case "$1" in
+ start)
+ touch /var/lock/subsys/xend
+ xend start
+ await_daemons_up
+ ;;
+ stop)
+ xend stop
+ rm -f /var/lock/subsys/xend
+ ;;
+ status)
+ xend status
+ ;;
+ reload)
+ xend reload
+ ;;
+ restart|force-reload)
+ xend restart
+ await_daemons_up
+ ;;
+ *)
+ # do not advertise unreasonable commands that there is no reason
+ # to use with this device
+ echo $"Usage: $0 {start|stop|status|restart|reload|force-reload}"
+ exit 1
+esac
+
+exit $?
+
--- /dev/null
+#!/bin/bash
+#
+# /etc/init.d/xendomains
+# Start / stop domains automatically when domain 0 boots / shuts down.
+#
+# chkconfig: 345 99 00
+# description: Start / stop Xen domains.
+#
+# This script offers fairly basic functionality. It should work on Redhat
+# but also on LSB-compliant SuSE releases and on Debian with the LSB package
+# installed. (LSB is the Linux Standard Base)
+#
+# Based on the example in the "Designing High Quality Integrated Linux
+# Applications HOWTO" by Avi Alkalay
+# <http://www.tldp.org/HOWTO/HighQuality-Apps-HOWTO/>
+#
+### BEGIN INIT INFO
+# Provides: xendomains
+# Required-Start: $syslog $remote_fs xend
+# Should-Start:
+# Required-Stop: $syslog $remote_fs xend
+# Should-Stop:
+# Default-Start: 3 4 5
+# Default-Stop: 0 1 2 6
+# Default-Enabled: yes
+# Short-Description: Start/stop secondary xen domains
+# Description: Start / stop domains automatically when domain 0
+# boots / shuts down.
+### END INIT INFO
+
+# Correct exit code would probably be 5, but it's enough
+# if xend complains if we're not running as privileged domain
+if ! [ -e /proc/xen/privcmd ]; then
+ exit 0
+fi
+
+LOCKFILE=/var/lock/subsys/xendomains
+XENDOM_CONFIG=/etc/sysconfig/xendomains
+
+test -r $XENDOM_CONFIG || { echo "$XENDOM_CONFIG not existing";
+ if [ "$1" = "stop" ]; then exit 0;
+ else exit 6; fi; }
+
+. $XENDOM_CONFIG
+
+# Use the SUSE rc_ init script functions;
+# emulate them on LSB, RH and other systems
+if test -e /etc/rc.status; then
+ # SUSE rc script library
+ . /etc/rc.status
+else
+ _cmd=$1
+ declare -a _SMSG
+ if test "${_cmd}" = "status"; then
+ _SMSG=(running dead dead unused unknown)
+ _RC_UNUSED=3
+ else
+ _SMSG=(done failed failed missed failed skipped unused failed failed)
+ _RC_UNUSED=6
+ fi
+ if test -e /etc/init.d/functions; then
+ # REDHAT
+ . /etc/init.d/functions
+ echo_rc()
+ {
+ #echo -n " [${_SMSG[${_RC_RV}]}] "
+ if test ${_RC_RV} = 0; then
+ success " [${_SMSG[${_RC_RV}]}] "
+ else
+ failure " [${_SMSG[${_RC_RV}]}] "
+ fi
+ }
+ elif test -e /lib/lsb/init-functions; then
+ # LSB
+ . /lib/lsb/init-functions
+ if alias log_success_msg >/dev/null 2>/dev/null; then
+ echo_rc()
+ {
+ echo " [${_SMSG[${_RC_RV}]}] "
+ }
+ else
+ echo_rc()
+ {
+ if test ${_RC_RV} = 0; then
+ log_success_msg " [${_SMSG[${_RC_RV}]}] "
+ else
+ log_failure_msg " [${_SMSG[${_RC_RV}]}] "
+ fi
+ }
+ fi
+ else
+ # emulate it
+ echo_rc()
+ {
+ echo " [${_SMSG[${_RC_RV}]}] "
+ }
+ fi
+ rc_reset() { _RC_RV=0; }
+ rc_failed()
+ {
+ if test -z "$1"; then
+ _RC_RV=1;
+ elif test "$1" != "0"; then
+ _RC_RV=$1;
+ fi
+ return ${_RC_RV}
+ }
+ rc_check()
+ {
+ return rc_failed $?
+ }
+ rc_status()
+ {
+ rc_failed $?
+ if test "$1" = "-r"; then _RC_RV=0; shift; fi
+ if test "$1" = "-s"; then rc_failed 5; echo_rc; rc_failed 3; shift; fi
+ if test "$1" = "-u"; then rc_failed ${_RC_UNUSED}; echo_rc; rc_failed 3; shift; fi
+ if test "$1" = "-v"; then echo_rc; shift; fi
+ if test "$1" = "-r"; then _RC_RV=0; shift; fi
+ return ${_RC_RV}
+ }
+ rc_exit() { exit ${_RC_RV}; }
+ rc_active()
+ {
+ if test -z "$RUNLEVEL"; then read RUNLEVEL REST < <(/sbin/runlevel); fi
+ if test -e /etc/init.d/S[0-9][0-9]${1}; then return 0; fi
+ return 1
+ }
+fi
+
+if ! which usleep >&/dev/null
+then
+ usleep()
+ {
+ if [ -n "$1" ]
+ then
+ sleep $(( $1 / 1000000 ))
+ fi
+ }
+fi
+
+# Reset status of this service
+rc_reset
+
+##
+# Returns 0 (success) if the given parameter names a directory, and that
+# directory is not empty.
+#
+contains_something()
+{
+ if [ -d "$1" ] && [ `/bin/ls $1 | wc -l` -gt 0 ]
+ then
+ return 0
+ else
+ return 1
+ fi
+}
+
+# read name from xen config file
+rdname()
+{
+ NM=$(xm create --quiet --dryrun --defconfig "$1" |
+ sed -n 's/^.*(name \(.*\))$/\1/p')
+}
+
+rdnames()
+{
+ NAMES=
+ if ! contains_something "$XENDOMAINS_AUTO"
+ then
+ return
+ fi
+ for dom in $XENDOMAINS_AUTO/*; do
+ rdname $dom
+ if test -z $NAMES; then
+ NAMES=$NM;
+ else
+ NAMES="$NAMES|$NM"
+ fi
+ done
+}
+
+parseln()
+{
+ if [[ "$1" =~ "\(domain" ]]; then
+ name=;id=
+ else if [[ "$1" =~ "\(name" ]]; then
+ name=$(echo $1 | sed -e 's/^.*(name \(.*\))$/\1/')
+ else if [[ "$1" =~ "\(domid" ]]; then
+ id=$(echo $1 | sed -e 's/^.*(domid \(.*\))$/\1/')
+ fi; fi; fi
+
+ [ -n "$name" -a -n "$id" ] && return 0 || return 1
+}
+
+is_running()
+{
+ rdname $1
+ RC=1
+ name=;id=
+ while read LN; do
+ parseln "$LN" || continue
+ if test $id = 0; then continue; fi
+ case $name in
+ ($NM)
+ RC=0
+ ;;
+ esac
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
+ return $RC
+}
+
+start()
+{
+ if [ -f $LOCKFILE ]; then
+ echo -e "xendomains already running (lockfile exists)"
+ return;
+ fi
+
+ saved_domains=" "
+ if [ "$XENDOMAINS_RESTORE" = "true" ] &&
+ contains_something "$XENDOMAINS_SAVE"
+ then
+ mkdir -p $(dirname "$LOCKFILE")
+ touch $LOCKFILE
+ echo -n "Restoring Xen domains:"
+ saved_domains=`ls $XENDOMAINS_SAVE`
+ for dom in $XENDOMAINS_SAVE/*; do
+ if [ -f $dom ] ; then
+ HEADER=`head -c 16 $dom | head -n 1 2> /dev/null`
+ if [ $HEADER = "LinuxGuestRecord" ]; then
+ echo -n " ${dom##*/}"
+ XMR=`xm restore $dom 2>&1 1>/dev/null`
+ #xm restore $dom
+ if [ $? -ne 0 ]; then
+ echo -e "\nAn error occurred while restoring domain ${dom##*/}:\n$XMR"
+ rc_failed $?
+ echo -e '!'
+ else
+ # mv $dom ${dom%/*}/.${dom##*/}
+ rm $dom
+ fi
+ fi
+ fi
+ done
+ echo -e
+ fi
+
+ if contains_something "$XENDOMAINS_AUTO"
+ then
+ touch $LOCKFILE
+ echo -n "Starting auto Xen domains:"
+ # We expect config scripts for auto starting domains to be in
+ # XENDOMAINS_AUTO - they could just be symlinks to files elsewhere
+
+ # Create all domains with config files in XENDOMAINS_AUTO.
+ # TODO: We should record which domain name belongs
+ # so we have the option to selectively shut down / migrate later
+ # If a domain statefile from $XENDOMAINS_SAVE matches a domain name
+ # in $XENDOMAINS_AUTO, do not try to start that domain; if it didn't
+ # restore correctly it requires administrative attention.
+ for dom in $XENDOMAINS_AUTO/*; do
+ echo -n " ${dom##*/}"
+ shortdom=$(echo $dom | sed -n 's/^.*\/\(.*\)$/\1/p')
+ echo $saved_domains | grep -w $shortdom > /dev/null
+ if [ $? -eq 0 ] || is_running $dom; then
+ echo -n "(skip)"
+ else
+ XMC=`xm create --quiet --defconfig $dom`
+ if [ $? -ne 0 ]; then
+ echo -e "\nAn error occurred while creating domain ${dom##*/}: $XMC\n"
+ rc_failed $?
+ echo -e '!'
+ else
+ usleep $XENDOMAINS_CREATE_USLEEP
+ fi
+ fi
+ done
+ fi
+}
+
+all_zombies()
+{
+ name=;id=
+ while read LN; do
+ parseln "$LN" || continue
+ if test $id = 0; then continue; fi
+ if test "$state" != "-b---d" -a "$state" != "-----d"; then
+ return 1;
+ fi
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
+ return 0
+}
+
+# Wait for max $XENDOMAINS_STOP_MAXWAIT for xm $1 to finish;
+# if it has not exited by that time kill it, so the init script will
+# succeed within a finite amount of time; if $2 is nonnull, it will
+# kill the command as well as soon as no domain (except for zombies)
+# are left (used for shutdown --all). Third parameter, if any, suppresses
+# output of dots per working state (formatting issues)
+watchdog_xm()
+{
+ if test -z "$XENDOMAINS_STOP_MAXWAIT" -o "$XENDOMAINS_STOP_MAXWAIT" = "0"; then
+ exit
+ fi
+
+ usleep 20000
+ for no in `seq 0 $XENDOMAINS_STOP_MAXWAIT`; do
+ # exit if xm save/migrate/shutdown is finished
+ PSAX=`ps axlw | grep "xm $1" | grep -v grep`
+ if test -z "$PSAX"; then exit; fi
+ if ! test -n "$3"; then echo -n '.'; fi
+ sleep 1
+ # go to kill immediately if there's only zombies left
+ if all_zombies && test -n "$2"; then break; fi
+ done
+ sleep 1
+ read PSF PSUID PSPID PSPPID < <(echo "$PSAX")
+ # kill xm $1
+ kill $PSPID >/dev/null 2>&1
+
+ echo -e .
+}
+
+stop()
+{
+ exec 3>&2 2> /dev/null
+
+ # Collect list of domains to shut down
+ if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
+ rdnames
+ fi
+ echo -n "Shutting down Xen domains:"
+ name=;id=
+ while read LN; do
+ parseln "$LN" || continue
+ if test $id = 0; then continue; fi
+ echo -n " $name"
+ if test "$XENDOMAINS_AUTO_ONLY" = "true"; then
+ eval "
+ case \"\$name\" in
+ ($NAMES)
+ # nothing
+ ;;
+ (*)
+ echo -e '(skip)'
+ continue
+ ;;
+ esac
+ "
+ fi
+ # XENDOMAINS_SYSRQ chould be something like just "s"
+ # or "s e i u" or even "s e s i u o"
+ # for the latter, you should set XENDOMAINS_USLEEP to 1200000 or so
+ if test -n "$XENDOMAINS_SYSRQ"; then
+ for sysrq in $XENDOMAINS_SYSRQ; do
+ echo -n "(SR-$sysrq)"
+ XMR=`xm sysrq $id $sysrq 2>&1 1>/dev/null`
+ if test $? -ne 0; then
+ echo -e "\nAn error occurred while doing sysrq on domain:\n$XMR\n"
+ rc_failed $?
+ echo -n '!'
+ fi
+ # usleep just ignores empty arg
+ usleep $XENDOMAINS_USLEEP
+ done
+ fi
+ if test "$state" = "-b---d" -o "$state" = "-----d"; then
+ echo -n "(zomb)"
+ continue
+ fi
+ if test -n "$XENDOMAINS_MIGRATE"; then
+ echo -n "(migr)"
+ watchdog_xm migrate &
+ WDOG_PID=$!
+ XMR=`xm migrate $id $XENDOMAINS_MIGRATE 2>&1 1>/dev/null`
+ if test $? -ne 0; then
+ echo -e "\nAn error occurred while migrating domain:\n$XMR\n"
+ rc_failed $?
+ echo -e '!'
+
+ kill $WDOG_PID >/dev/null 2>&1
+ else
+ kill $WDOG_PID >/dev/null 2>&1
+
+ echo -e .
+ usleep 1000
+ continue
+ fi
+ fi
+ if test -n "$XENDOMAINS_SAVE"; then
+ echo -n "(save)"
+ watchdog_xm save &
+ WDOG_PID=$!
+ mkdir -p "$XENDOMAINS_SAVE"
+ XMR=`xm save $id $XENDOMAINS_SAVE/$name 2>&1 1>/dev/null`
+ if test $? -ne 0; then
+ echo -e "\nAn error occurred while saving domain:\n$XMR\n"
+ rc_failed $?
+ echo -e '!'
+ kill $WDOG_PID >/dev/null 2>&1
+ else
+ kill $WDOG_PID >/dev/null 2>&1
+ echo -e .
+ usleep 1000
+ continue
+ fi
+ fi
+ if test -n "$XENDOMAINS_SHUTDOWN"; then
+ # XENDOMAINS_SHUTDOWN should be "--halt --wait"
+ echo -n "(shut)"
+ watchdog_xm shutdown &
+ WDOG_PID=$!
+ XMR=`xm shutdown $id $XENDOMAINS_SHUTDOWN 2>&1 1>/dev/null`
+ if test $? -ne 0; then
+ echo -e "\nAn error occurred while shutting down domain:\n$XMR\n"
+ rc_failed $?
+ echo -e '!'
+ fi
+ kill $WDOG_PID >/dev/null 2>&1
+ fi
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
+
+ # NB. this shuts down ALL Xen domains (politely), not just the ones in
+ # AUTODIR/*
+ # This is because it's easier to do ;-) but arguably if this script is run
+ # on system shutdown then it's also the right thing to do.
+ if ! all_zombies && test -n "$XENDOMAINS_SHUTDOWN_ALL"; then
+ # XENDOMAINS_SHUTDOWN_ALL should be "--all --halt --wait"
+ echo -n " SHUTDOWN_ALL "
+ watchdog_xm shutdown 1 false &
+ WDOG_PID=$!
+ XMR=`xm shutdown $XENDOMAINS_SHUTDOWN_ALL 2>&1 1>/dev/null`
+ if test $? -ne 0; then
+ echo -e "\nAn error occurred while shutting down all domains: $XMR\n"
+ rc_failed $?
+ echo -e '!'
+ fi
+ kill $WDOG_PID >/dev/null 2>&1
+ fi
+
+ # Unconditionally delete lock file
+ rm -f $LOCKFILE
+
+ exec 2>&3
+}
+
+check_domain_up()
+{
+ name=;id=
+ while read LN; do
+ parseln "$LN" || continue
+ if test $id = 0; then continue; fi
+ case $name in
+ ($1)
+ return 0
+ ;;
+ esac
+ done < <(xm list -l | grep '(\(domain\|domid\|name\)')
+ return 1
+}
+
+check_all_auto_domains_up()
+{
+ if ! contains_something "$XENDOMAINS_AUTO"
+ then
+ return 0
+ fi
+ missing=
+ for nm in $XENDOMAINS_AUTO/*; do
+ rdname $nm
+ found=0
+ if check_domain_up "$NM"; then
+ echo -n " $name"
+ else
+ missing="$missing $NM"
+ fi
+ done
+ if test -n "$missing"; then
+ echo -n " MISS AUTO:$missing"
+ return 1
+ fi
+ return 0
+}
+
+check_all_saved_domains_up()
+{
+ if ! contains_something "$XENDOMAINS_SAVE"
+ then
+ return 0
+ fi
+ missing=`/bin/ls $XENDOMAINS_SAVE`
+ echo -n " MISS SAVED: " $missing
+ return 1
+}
+
+# This does NOT necessarily restart all running domains: instead it
+# stops all running domains and then boots all the domains specified in
+# AUTODIR. If other domains have been started manually then they will
+# not get restarted.
+# Commented out to avoid confusion!
+
+restart()
+{
+ stop
+ start
+}
+
+reload()
+{
+ restart
+}
+
+
+case "$1" in
+ start)
+ start
+ rc_status
+ if test -f $LOCKFILE; then rc_status -v; fi
+ ;;
+
+ stop)
+ stop
+ rc_status -v
+ ;;
+
+ restart)
+ restart
+ ;;
+ reload)
+ reload
+ ;;
+
+ status)
+ echo -n "Checking for xendomains:"
+ if test ! -f $LOCKFILE; then
+ rc_failed 3
+ else
+ check_all_auto_domains_up
+ rc_status
+ check_all_saved_domains_up
+ rc_status
+ fi
+ rc_status -v
+ ;;
+
+ *)
+ echo "Usage: $0 {start|stop|restart|reload|status}"
+ rc_failed 3
+ rc_status -v
+ ;;
+esac
+
+rc_exit
--- /dev/null
+#
+# Copyright (c) 2005 XenSource Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+#
+# Serialisation
+#
+
+LOCK_SLEEPTIME=1
+LOCK_SPINNING_RETRIES=5
+LOCK_RETRIES=100
+LOCK_BASEDIR=/var/run/xen-hotplug
+
+
+claim_lock()
+{
+ local lockdir="$LOCK_BASEDIR/$1"
+ mkdir -p "$LOCK_BASEDIR"
+ _claim_lock "$lockdir"
+}
+
+
+release_lock()
+{
+ _release_lock "$LOCK_BASEDIR/$1"
+}
+
+
+_claim_lock()
+{
+ local lockdir="$1"
+ local owner=$(_lock_owner "$lockdir")
+ local retries=0
+
+ while [ $retries -lt $LOCK_RETRIES ]
+ do
+ mkdir "$lockdir" 2>/dev/null && trap "release_lock $1; sigerr" ERR &&
+ _update_lock_info "$lockdir" && return
+
+ local new_owner=$(_lock_owner "$lockdir")
+ if [ "$new_owner" != "$owner" ]
+ then
+ owner="$new_owner"
+ retries=0
+ fi
+
+ if [ $retries -gt $LOCK_SPINNING_RETRIES ]
+ then
+ sleep $LOCK_SLEEPTIME
+ else
+ sleep 0
+ fi
+ retries=$(($retries + 1))
+ done
+ _steal_lock "$lockdir"
+}
+
+
+_release_lock()
+{
+ trap sigerr ERR
+ rm -rf "$1" 2>/dev/null || true
+}
+
+
+_steal_lock()
+{
+ local lockdir="$1"
+ local owner=$(cat "$lockdir/owner" 2>/dev/null || echo "unknown")
+ log err "Forced to steal lock on $lockdir from $owner!"
+ _release_lock "$lockdir"
+ _claim_lock "$lockdir"
+}
+
+
+_lock_owner()
+{
+ cat "$1/owner" 2>/dev/null || echo "unknown"
+}
+
+
+_update_lock_info()
+{
+ echo "$$: $0" >"$1/owner"
+}
--- /dev/null
+#
+# Copyright (c) 2005 XenSource Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+log() {
+ local level="$1"
+ shift
+ logger -p "daemon.$level" -- "$0:" "$@" || echo "$0 $@" >&2
+}
--- /dev/null
+#!/bin/bash
+#============================================================================
+# Default Xen network start/stop script.
+# Xend calls a network script when it starts.
+# The script name to use is defined in /etc/xen/xend-config.sxp
+# in the network-script field.
+#
+# This script creates a bridge (default ${netdev}), adds a device
+# (defaults to the device on the default gateway route) to it, copies
+# the IP addresses from the device to the bridge and adjusts the routes
+# accordingly.
+#
+# If all goes well, this should ensure that networking stays up.
+# However, some configurations are upset by this, especially
+# NFS roots. If the bridged setup does not meet your needs,
+# configure a different script, for example using routing instead.
+#
+# Usage:
+#
+# network-bridge (start|stop|status) {VAR=VAL}*
+#
+# Vars:
+#
+# bridge The bridge to use (default ${netdev}).
+# netdev The interface to add to the bridge (default gateway device).
+# antispoof Whether to use iptables to prevent spoofing (default no).
+#
+# Internal Vars:
+# pdev="p${netdev}"
+# tdev=tmpbridge
+#
+# start:
+# Creates the bridge as tdev
+# Copies the IP and MAC addresses from pdev to bridge
+# Renames netdev to be pdev
+# Renames tdev to bridge
+# Enslaves pdev to bridge
+#
+# stop:
+# Removes pdev from the bridge
+# Transfers addresses, routes from bridge to pdev
+# Renames bridge to tdev
+# Renames pdev to netdev
+# Deletes tdev
+#
+# status:
+# Print addresses, interfaces, routes
+#
+#============================================================================
+
+
+dir=$(dirname "$0")
+. "$dir/xen-script-common.sh"
+. "$dir/xen-network-common.sh"
+
+findCommand "$@"
+evalVariables "$@"
+
+is_network_root () {
+ local rootfs=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $3; }}' /etc/mtab)
+ local rootopts=$(awk '{ if ($1 !~ /^[ \t]*#/ && $2 == "/") { print $4; }}' /etc/mtab)
+
+ [[ "$rootfs" =~ "^nfs" ]] || [[ "$rootopts" =~ "_netdev" ]] && has_nfsroot=1 || has_nfsroot=0
+ if [ $has_nfsroot -eq 1 ]; then
+ local bparms=$(cat /proc/cmdline)
+ for p in $bparms; do
+ local ipaddr=$(echo $p | awk /nfsroot=/'{ print substr($1,9,index($1,":")-9) }')
+ if [ "$ipaddr" != "" ]; then
+ local nfsdev=$(ip route get $ipaddr | awk /$ipaddr/'{ print $3 }')
+ [[ "$nfsdev" == "$netdev" ]] && return 0 || return 1
+ fi
+ done
+ fi
+ return 1
+}
+
+find_alt_device () {
+ local interf=$1
+ local prefix=${interf%[[:digit:]]}
+ local ifs=$(ip link show | grep " $prefix" |\
+ gawk '{ printf ("%s",substr($2,1,length($2)-1)) }' |\
+ sed s/$interf//)
+ echo "$ifs"
+}
+
+netdev=${netdev:-$(ip route list 0.0.0.0/0 | \
+ sed 's/.*dev \([a-z]\+[0-9]\+\).*$/\1/')}
+if is_network_root ; then
+ altdevs=$(find_alt_device $netdev)
+ for netdev in $altdevs; do break; done
+ if [ -z "$netdev" ]; then
+ [ -x /usr/bin/logger ] && /usr/bin/logger "network-bridge: bridging not supported on network root; not starting"
+ exit
+ fi
+fi
+netdev=${netdev:-eth0}
+bridge=${bridge:-${netdev}}
+antispoof=${antispoof:-no}
+
+pdev="p${netdev}"
+tdev=tmpbridge
+
+get_ip_info() {
+ addr_pfx=`ip addr show dev $1 | egrep '^ *inet' | sed -e 's/ *inet //' -e 's/ .*//'`
+ gateway=`ip route show dev $1 | fgrep default | sed 's/default via //'`
+}
+
+do_ifup() {
+ if ! ifup $1 ; then
+ if [ -n "$addr_pfx" ] ; then
+ # use the info from get_ip_info()
+ ip addr flush $1
+ ip addr add ${addr_pfx} dev $1
+ ip link set dev $1 up
+ [ -n "$gateway" ] && ip route add default via ${gateway}
+ fi
+ fi
+}
+
+# Usage: transfer_addrs src dst
+# Copy all IP addresses (including aliases) from device $src to device $dst.
+transfer_addrs () {
+ local src=$1
+ local dst=$2
+ # Don't bother if $dst already has IP addresses.
+ if ip addr show dev ${dst} | egrep -q '^ *inet ' ; then
+ return
+ fi
+ # Address lines start with 'inet' and have the device in them.
+ # Replace 'inet' with 'ip addr add' and change the device name $src
+ # to 'dev $src'.
+ ip addr show dev ${src} | egrep '^ *inet ' | sed -e "
+s/inet/ip addr add/
+s@\([0-9]\+\.[0-9]\+\.[0-9]\+\.[0-9]\+/[0-9]\+\)@\1@
+s/${src}/dev ${dst} label ${dst}/
+s/secondary//
+" | sh -e
+ # Remove automatic routes on destination device
+ ip route list | sed -ne "
+/dev ${dst}\( \|$\)/ {
+ s/^/ip route del /
+ p
+}" | sh -e
+}
+
+# Usage: transfer_routes src dst
+# Get all IP routes to device $src, delete them, and
+# add the same routes to device $dst.
+# The original routes have to be deleted, otherwise adding them
+# for $dst fails (duplicate routes).
+transfer_routes () {
+ local src=$1
+ local dst=$2
+ # List all routes and grep the ones with $src in.
+ # Stick 'ip route del' on the front to delete.
+ # Change $src to $dst and use 'ip route add' to add.
+ ip route list | sed -ne "
+/dev ${src}\( \|$\)/ {
+ h
+ s/^/ip route del /
+ P
+ g
+ s/${src}/${dst}/
+ s/^/ip route add /
+ P
+ d
+}" | sh -e
+}
+
+
+##
+# link_exists interface
+#
+# Returns 0 if the interface named exists (whether up or down), 1 otherwise.
+#
+link_exists()
+{
+ if ip link show "$1" >/dev/null 2>/dev/null
+ then
+ return 0
+ else
+ return 1
+ fi
+}
+
+# Set the default forwarding policy for $dev to drop.
+# Allow forwarding to the bridge.
+antispoofing () {
+ iptables -P FORWARD DROP
+ iptables -F FORWARD
+ iptables -A FORWARD -m physdev --physdev-in ${pdev} -j ACCEPT
+}
+
+# Usage: show_status dev bridge
+# Print ifconfig and routes.
+show_status () {
+ local dev=$1
+ local bridge=$2
+
+ echo '============================================================'
+ ip addr show ${dev}
+ ip addr show ${bridge}
+ echo ' '
+ brctl show ${bridge}
+ echo ' '
+ ip route list
+ echo ' '
+ route -n
+ echo '============================================================'
+}
+
+op_start () {
+ if [ "${bridge}" = "null" ] ; then
+ return
+ fi
+
+ if link_exists "$pdev"; then
+ # The device is already up.
+ return
+ fi
+
+ create_bridge ${tdev}
+
+ preiftransfer ${netdev}
+ transfer_addrs ${netdev} ${tdev}
+ if ! ifdown ${netdev}; then
+ # If ifdown fails, remember the IP details.
+ get_ip_info ${netdev}
+ ip link set ${netdev} down
+ ip addr flush ${netdev}
+ fi
+ ip link set ${netdev} name ${pdev}
+ ip link set ${tdev} name ${bridge}
+
+ setup_bridge_port ${pdev}
+
+ add_to_bridge2 ${bridge} ${pdev}
+ do_ifup ${bridge}
+
+ if [ ${antispoof} = 'yes' ] ; then
+ antispoofing
+ fi
+}
+
+op_stop () {
+ if [ "${bridge}" = "null" ]; then
+ return
+ fi
+ if ! link_exists "$bridge"; then
+ return
+ fi
+
+ transfer_addrs ${bridge} ${pdev}
+ if ! ifdown ${bridge}; then
+ get_ip_info ${bridge}
+ fi
+ ip link set ${pdev} down
+ ip addr flush ${bridge}
+
+ brctl delif ${bridge} ${pdev}
+ ip link set ${bridge} down
+
+ ip link set ${bridge} name ${tdev}
+ ip link set ${pdev} name ${netdev}
+ do_ifup ${netdev}
+
+ brctl delbr ${tdev}
+}
+
+# adds $dev to $bridge but waits for $dev to be in running state first
+add_to_bridge2() {
+ local bridge=$1
+ local dev=$2
+ local maxtries=10
+
+ echo -n "Waiting for ${dev} to negotiate link."
+ ip link set ${dev} up
+ for i in `seq ${maxtries}` ; do
+ if ifconfig ${dev} | grep -q RUNNING ; then
+ break
+ else
+ echo -n '.'
+ sleep 1
+ fi
+ done
+
+ if [ ${i} -eq ${maxtries} ] ; then echo -n '(link isnt in running state)' ; fi
+ echo
+
+ add_to_bridge ${bridge} ${dev}
+}
+
+case "$command" in
+ start)
+ op_start
+ ;;
+
+ stop)
+ op_stop
+ ;;
+
+ status)
+ show_status ${netdev} ${bridge}
+ ;;
+
+ *)
+ echo "Unknown command: $command" >&2
+ echo 'Valid commands are: start, stop, status' >&2
+ exit 1
+esac
--- /dev/null
+#!/bin/bash -x
+#============================================================================
+# Default Xen network start/stop script when using NAT.
+# Xend calls a network script when it starts.
+# The script name to use is defined in /etc/xen/xend-config.sxp
+# in the network-script field.
+#
+# Usage:
+#
+# network-nat (start|stop|status) {VAR=VAL}*
+#
+# Vars:
+#
+# netdev The gateway interface (default eth0).
+# antispoof Whether to use iptables to prevent spoofing (default no).
+# dhcp Whether to alter the local DHCP configuration (default no).
+#
+#============================================================================
+
+dir=$(dirname "$0")
+. "$dir/xen-script-common.sh"
+. "$dir/xen-network-common.sh"
+
+findCommand "$@"
+evalVariables "$@"
+
+netdev=${netdev:-eth0}
+# antispoofing not yet implemented
+antispoof=${antispoof:-no}
+
+# turn on dhcp feature by default if dhcpd is installed
+if [ -f /etc/dhcpd.conf ]
+then
+ dhcp=${dhcp:-yes}
+else
+ dhcp=${dhcp:-no}
+fi
+
+
+if [ "$dhcp" != 'no' ]
+then
+ dhcpd_conf_file=$(find_dhcpd_conf_file)
+ dhcpd_init_file=$(find_dhcpd_init_file)
+ if [ -z "$dhcpd_conf_file" ] || [ -z "$dhcpd_init_file" ]
+ then
+ echo 'Failed to find dhcpd configuration or init file.' >&2
+ exit 1
+ fi
+fi
+
+
+function dhcp_start()
+{
+ if ! grep -q "subnet 10.0.0.0" "$dhcpd_conf_file"
+ then
+ echo >>"$dhcpd_conf_file" "subnet 10.0.0.0 netmask 255.255.0.0 {}"
+ fi
+
+ "$dhcpd_init_file" restart
+}
+
+
+function dhcp_stop()
+{
+ local tmpfile=$(mktemp)
+ grep -v "subnet 10.0.0.0" "$dhcpd_conf_file" >"$tmpfile"
+ if diff "$tmpfile" "$dhcpd_conf_file" >&/dev/null
+ then
+ rm "$tmpfile"
+ else
+ mv "$tmpfile" "$dhcpd_conf_file"
+ fi
+
+ "$dhcpd_init_file" restart
+}
+
+
+op_start() {
+ echo 1 >/proc/sys/net/ipv4/ip_forward
+ iptables -t nat -A POSTROUTING -o ${netdev} -j MASQUERADE
+ [ "$dhcp" != 'no' ] && dhcp_start
+}
+
+
+op_stop() {
+ [ "$dhcp" != 'no' ] && dhcp_stop
+ iptables -t nat -D POSTROUTING -o ${netdev} -j MASQUERADE
+}
+
+
+show_status() {
+ echo '============================================================'
+ ifconfig
+ echo ' '
+ ip route list
+ echo ' '
+ route -n
+ echo '============================================================'
+
+}
+
+case "$command" in
+ start)
+ op_start
+ ;;
+
+ stop)
+ op_stop
+ ;;
+
+ status)
+ show_status
+ ;;
+
+ *)
+ echo "Unknown command: $command" >&2
+ echo 'Valid commands are: start, stop, status' >&2
+ exit 1
+esac
--- /dev/null
+#!/bin/bash
+#============================================================================
+# Default Xen network start/stop script.
+# Xend calls a network script when it starts.
+# The script name to use is defined in /etc/xen/xend-config.sxp
+# in the network-script field.
+#
+# Usage:
+#
+# network-route (start|stop|status) {VAR=VAL}*
+#
+# Vars:
+#
+# netdev The gateway interface (default eth0).
+# antispoof Whether to use iptables to prevent spoofing (default yes).
+#
+#============================================================================
+
+dir=$(dirname "$0")
+. "$dir/xen-script-common.sh"
+
+evalVariables "$@"
+
+netdev=${netdev:-eth${vifnum}}
+
+echo 1 >/proc/sys/net/ipv4/ip_forward
+echo 1 >/proc/sys/net/ipv4/conf/${netdev}/proxy_arp
--- /dev/null
+#!/bin/bash
+#============================================================================
+# /etc/xen/vif-bridge
+#
+# Script for configuring a vif in bridged mode.
+# The hotplugging system will call this script if it is specified either in
+# the device configuration given to Xend, or the default Xend configuration
+# in /etc/xen/xend-config.sxp. If the script is specified in neither of those
+# places, then this script is the default.
+#
+# Usage:
+# vif-bridge (add|remove|online|offline)
+#
+# Environment vars:
+# vif vif interface name (required).
+# XENBUS_PATH path to this device's details in the XenStore (required).
+#
+# Read from the store:
+# bridge bridge to add the vif to (optional). Defaults to searching for the
+# bridge itself.
+# ip list of IP networks for the vif, space-separated (optional).
+#
+# up:
+# Enslaves the vif interface to the bridge and adds iptables rules
+# for its ip addresses (if any).
+#
+# down:
+# Removes the vif interface from the bridge and removes the iptables
+# rules for its ip addresses (if any).
+#============================================================================
+
+dir=$(dirname "$0")
+. "$dir/vif-common.sh"
+
+bridge=${bridge:-}
+bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge")
+
+if [ -z "$bridge" ]
+then
+ bridge=$(brctl show | cut -d "
+" -f 2 | cut -f 1)
+
+ if [ -z "$bridge" ]
+ then
+ fatal "Could not find bridge, and none was specified"
+ fi
+else
+ #
+ # Old style bridge setup with netloop, used to have a bridge name
+ # of xenbrX, enslaving pethX and vif0.X, and then configuring
+ # eth0.
+ #
+ # New style bridge setup does not use netloop, so the bridge name
+ # is ethX and the physical device is enslaved pethX
+ #
+ # So if...
+ #
+ # - User asks for xenbrX
+ # - AND xenbrX doesn't exist
+ # - AND there is a ethX device which is a bridge
+ #
+ # ..then we translate xenbrX to ethX
+ #
+ # This lets old config files work without modification
+ #
+ if [ ! -e "/sys/class/net/$bridge" ] && [ -z "${bridge##xenbr*}" ]
+ then
+ if [ -e "/sys/class/net/eth${bridge#xenbr}/bridge" ]
+ then
+ bridge="eth${bridge#xenbr}"
+ fi
+ fi
+fi
+
+RET=0
+ip link show $bridge 1>/dev/null 2>&1 || RET=1
+if [ "$RET" -eq 1 ]
+then
+ fatal "Could not find bridge device $bridge"
+fi
+
+case "$command" in
+ online)
+ setup_bridge_port "$vif"
+ add_to_bridge "$bridge" "$vif"
+ ;;
+
+ offline)
+ do_without_error brctl delif "$bridge" "$vif"
+ do_without_error ifconfig "$vif" down
+ ;;
+esac
+
+handle_iptable
+
+log debug "Successful vif-bridge $command for $vif, bridge $bridge."
+if [ "$command" == "online" ]
+then
+ success
+fi
--- /dev/null
+#
+# Copyright (c) 2005 XenSource Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+
+dir=$(dirname "$0")
+. "$dir/xen-hotplug-common.sh"
+. "$dir/xen-network-common.sh"
+
+findCommand "$@"
+
+if [ "$command" != "online" ] &&
+ [ "$command" != "offline" ] &&
+ [ "$command" != "add" ] &&
+ [ "$command" != "remove" ]
+then
+ log err "Invalid command: $command"
+ exit 1
+fi
+
+case "$command" in
+ add | remove)
+ exit 0
+ ;;
+esac
+
+
+# Parameters may be read from the environment, the command line arguments, and
+# the store, with overriding in that order. The environment is given by the
+# driver, the command line is given by the Xend global configuration, and
+# store details are given by the per-domain or per-device configuration.
+
+evalVariables "$@"
+
+ip=${ip:-}
+ip=$(xenstore_read_default "$XENBUS_PATH/ip" "$ip")
+
+# Check presence of compulsory args.
+XENBUS_PATH="${XENBUS_PATH:?}"
+vif="${vif:?}"
+
+
+vifname=$(xenstore_read_default "$XENBUS_PATH/vifname" "")
+if [ "$vifname" ]
+then
+ if [ "$command" == "online" ] && ! ip link show "$vifname" >&/dev/null
+ then
+ do_or_die ip link set "$vif" name "$vifname"
+ fi
+ vif="$vifname"
+fi
+
+
+frob_iptable()
+{
+ if [ "$command" == "online" ]
+ then
+ local c="-A"
+ else
+ local c="-D"
+ fi
+
+ iptables "$c" FORWARD -m physdev --physdev-in "$vif" "$@" -j ACCEPT \
+ 2>/dev/null ||
+ [ "$c" == "-D" ] ||
+ log err \
+ "iptables $c FORWARD -m physdev --physdev-in $vif $@ -j ACCEPT failed.
+If you are using iptables, this may affect networking for guest domains."
+}
+
+
+##
+# Add or remove the appropriate entries in the iptables. With antispoofing
+# turned on, we have to explicitly allow packets to the interface, regardless
+# of the ip setting. If ip is set, then we additionally restrict the packets
+# to those coming from the specified networks, though we allow DHCP requests
+# as well.
+#
+handle_iptable()
+{
+ # Check for a working iptables installation. Checking for the iptables
+ # binary is not sufficient, because the user may not have the appropriate
+ # modules installed. If iptables is not working, then there's no need to do
+ # anything with it, so we can just return.
+ if ! iptables -L -n >&/dev/null
+ then
+ return
+ fi
+
+ if [ "$ip" != "" ]
+ then
+ local addr
+ for addr in $ip
+ do
+ frob_iptable -s "$addr"
+ done
+
+ # Always allow the domain to talk to a DHCP server.
+ frob_iptable -p udp --sport 68 --dport 67
+ else
+ # No IP addresses have been specified, so allow anything.
+ frob_iptable
+ fi
+}
+
+
+##
+# ip_of interface
+#
+# Print the IP address currently in use at the given interface, or nothing if
+# the interface is not up.
+#
+ip_of()
+{
+ ip addr show "$1" | awk "/^.*inet.*$1\$/{print \$2}" | sed -n '1 s,/.*,,p'
+}
+
+
+##
+# dom0_ip
+#
+# Print the IP address of the interface in dom0 through which we are routing.
+# This is the IP address on the interface specified as "netdev" as a parameter
+# to these scripts, or eth0 by default. This function will call fatal if no
+# such interface could be found.
+#
+dom0_ip()
+{
+ local nd=${netdev:-eth0}
+ local result=$(ip_of "$nd")
+ if [ -z "$result" ]
+ then
+ fatal
+"$netdev is not up. Bring it up or specify another interface with " \
+"netdev=<if> as a parameter to $0."
+ fi
+ echo "$result"
+}
--- /dev/null
+#!/bin/bash
+#============================================================================
+# /etc/xen/vif-nat
+#
+# Script for configuring a vif in routed-nat mode.
+# The hotplugging system will call this script if it is specified either in
+# the device configuration given to Xend, or the default Xend configuration
+# in /etc/xen/xend-config.sxp. If the script is specified in neither of those
+# places, then vif-bridge is the default.
+#
+# Usage:
+# vif-nat (add|remove|online|offline)
+#
+# Environment vars:
+# vif vif interface name (required).
+# XENBUS_PATH path to this device's details in the XenStore (required).
+#
+# Parameters:
+# dhcp Whether to alter the local DHCP configuration to include this
+# new host (default no).
+#
+# Read from the store:
+# ip list of IP networks for the vif, space-separated (default given in
+# this script).
+#============================================================================
+
+
+dir=$(dirname "$0")
+. "$dir/vif-common.sh"
+
+# turn on dhcp feature by default if dhcpd is installed
+if [ -f /etc/dhcpd.conf ]
+then
+ dhcp=${dhcp:-yes}
+else
+ dhcp=${dhcp:-no}
+fi
+
+if [ "$dhcp" != 'no' ]
+then
+ dhcpd_conf_file=$(find_dhcpd_conf_file)
+ dhcpd_init_file=$(find_dhcpd_init_file)
+ dhcpd_arg_file=$(find_dhcpd_arg_file)
+ if [ -z "$dhcpd_conf_file" ] || [ -z "$dhcpd_init_file" ] || [ -z "$dhcpd_arg_file" ]
+ then
+ echo 'Failed to find dhcpd configuration or init or args file.' >&2
+ exit 1
+ fi
+fi
+
+
+domid=$(xenstore_read "$XENBUS_PATH/frontend-id")
+vifid=$(xenstore_read "$XENBUS_PATH/handle")
+vifid=$(( $vifid + 1 ))
+
+
+ip_from_dom()
+{
+ local domid1=$(( $domid / 256 ))
+ local domid2=$(( $domid % 256 ))
+
+ echo "10.$domid1.$domid2.$vifid/16"
+}
+
+
+routing_ip()
+{
+ echo $(echo $1 | awk -F. '{print $1"."$2"."$3"."$4 + 127}')
+}
+
+
+dotted_quad()
+{
+ echo\
+ $(( ($1 & 0xFF000000) >> 24))\
+.$(( ($1 & 0x00FF0000) >> 16))\
+.$(( ($1 & 0x0000FF00) >> 8 ))\
+.$(( $1 & 0x000000FF ))
+}
+
+
+if [ "$ip" = "" ]
+then
+ ip=$(ip_from_dom)
+fi
+
+router_ip=$(routing_ip "$ip")
+
+# Split the given IP/bits pair.
+vif_ip=`echo ${ip} | awk -F/ '{print $1}'`
+
+hostname=$(xenstore_read "$XENBUS_PATH/domain" | tr -- '_.:/+' '-----')
+if [ "$vifid" != "1" ]
+then
+ hostname="$hostname-$vifid"
+fi
+
+dhcparg_remove_entry()
+{
+ local tmpfile=$(mktemp)
+ sed -e "s/$vif //" "$dhcpd_arg_file" >"$tmpfile"
+ if diff "$tmpfile" "$dhcpd_arg_file" >/dev/null
+ then
+ rm "$tmpfile"
+ else
+ mv "$tmpfile" "$dhcpd_arg_file"
+ fi
+}
+
+dhcparg_add_entry()
+{
+ dhcparg_remove_entry
+ local tmpfile=$(mktemp)
+ # handle Red Hat, SUSE, and Debian styles, with or without quotes
+ sed -e 's/^DHCPDARGS="*\([^"]*\)"*/DHCPDARGS="\1'"$vif "'"/' \
+ "$dhcpd_arg_file" >"$tmpfile" && mv "$tmpfile" "$dhcpd_arg_file"
+ sed -e 's/^DHCPD_INTERFACE="*\([^"]*\)"*/DHCPD_INTERFACE="\1'"$vif "'"/' \
+ "$dhcpd_arg_file" >"$tmpfile" && mv "$tmpfile" "$dhcpd_arg_file"
+ sed -e 's/^INTERFACES="*\([^"]*\)"*/INTERFACES="\1'"$vif "'"/' \
+ "$dhcpd_arg_file" >"$tmpfile" && mv "$tmpfile" "$dhcpd_arg_file"
+ rm -f "$tmpfile"
+}
+
+dhcp_remove_entry()
+{
+ local tmpfile=$(mktemp)
+ grep -v "host $hostname" "$dhcpd_conf_file" >"$tmpfile"
+ if diff "$tmpfile" "$dhcpd_conf_file" >/dev/null
+ then
+ rm "$tmpfile"
+ else
+ mv "$tmpfile" "$dhcpd_conf_file"
+ fi
+ dhcparg_remove_entry
+}
+
+
+dhcp_up()
+{
+ claim_lock "vif-nat-dhcp"
+ dhcp_remove_entry
+ mac=$(xenstore_read "$XENBUS_PATH/mac")
+ echo >>"$dhcpd_conf_file" \
+"host $hostname { hardware ethernet $mac; fixed-address $vif_ip; option routers $router_ip; option host-name \"$hostname\"; }"
+ dhcparg_add_entry
+ release_lock "vif-nat-dhcp"
+ "$dhcpd_init_file" restart || true
+}
+
+
+dhcp_down()
+{
+ claim_lock "vif-nat-dhcp"
+ dhcp_remove_entry
+ release_lock "vif-nat-dhcp"
+ "$dhcpd_init_file" restart || true # We need to ignore failure because
+ # ISC dhcpd 3 borks if there is nothing
+ # for it to do, which is the case if
+ # the outgoing interface is not
+ # configured to offer leases and there
+ # are no vifs.
+}
+
+
+case "$command" in
+ online)
+ if ip route | grep -q "dev $vif"
+ then
+ log debug "$vif already up"
+ exit 0
+ fi
+
+ do_or_die ip link set "$vif" up arp on
+ do_or_die ip addr add "$router_ip" dev "$vif"
+ do_or_die ip route add "$vif_ip" dev "$vif" src "$router_ip"
+ echo 1 >/proc/sys/net/ipv4/conf/${vif}/proxy_arp
+ [ "$dhcp" != 'no' ] && dhcp_up
+ ;;
+ offline)
+ [ "$dhcp" != 'no' ] && dhcp_down
+ do_without_error ifconfig "$vif" down
+ ;;
+esac
+
+
+handle_iptable
+
+log debug "Successful vif-nat $command for $vif."
+if [ "$command" = "online" ]
+then
+ success
+fi
--- /dev/null
+#!/bin/bash
+#============================================================================
+# /etc/xen/vif-route
+#
+# Script for configuring a vif in routed mode.
+# The hotplugging system will call this script if it is specified either in
+# the device configuration given to Xend, or the default Xend configuration
+# in /etc/xen/xend-config.sxp. If the script is specified in neither of those
+# places, then vif-bridge is the default.
+#
+# Usage:
+# vif-route (add|remove|online|offline)
+#
+# Environment vars:
+# vif vif interface name (required).
+# XENBUS_PATH path to this device's details in the XenStore (required).
+#
+# Read from the store:
+# ip list of IP networks for the vif, space-separated (default given in
+# this script).
+#============================================================================
+
+dir=$(dirname "$0")
+. "$dir/vif-common.sh"
+
+main_ip=$(dom0_ip)
+
+case "$command" in
+ online)
+ ifconfig ${vif} ${main_ip} netmask 255.255.255.255 up
+ echo 1 >/proc/sys/net/ipv4/conf/${vif}/proxy_arp
+ ipcmd='add'
+ cmdprefix=''
+ ;;
+ offline)
+ do_without_error ifdown ${vif}
+ ipcmd='del'
+ cmdprefix='do_without_error'
+ ;;
+esac
+
+if [ "${ip}" ] ; then
+ # If we've been given a list of IP addresses, then add routes from dom0 to
+ # the guest using those addresses.
+ for addr in ${ip} ; do
+ ${cmdprefix} ip route ${ipcmd} ${addr} dev ${vif} src ${main_ip}
+ done
+fi
+
+handle_iptable
+
+log debug "Successful vif-route $command for $vif."
+if [ "$command" = "online" ]
+then
+ success
+fi
--- /dev/null
+#!/bin/sh
+#
+# Copyright (c) 2007, FUJITSU Limited
+# Based on the block scripts code.
+#
+
+dir=$(dirname "$0")
+. "$dir/xen-hotplug-common.sh"
+
+findCommand "$@"
+
+case "$command" in
+ add)
+ success
+ ;;
+ remove)
+ # TODO
+ exit 0
+ ;;
+esac
+
+exit 0
--- /dev/null
+#!/bin/bash
+
+dir=$(dirname "$0")
+. "$dir/vtpm-hotplug-common.sh"
+
+vtpm_fatal_error=0
+
+case "$command" in
+ add)
+ vtpm_create_instance
+ ;;
+ remove)
+ vtpm_remove_instance
+ ;;
+esac
+
+if [ $vtpm_fatal_error -eq 0 ]; then
+ log debug "Successful vTPM operation '$command'."
+ success
+else
+ fatal "Error while executing vTPM operation '$command'."
+fi
--- /dev/null
+#
+# Copyright (c) 2005 IBM Corporation
+# Copyright (c) 2005 XenSource Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+dir=$(dirname "$0")
+. "$dir/logging.sh"
+. "$dir/locking.sh"
+
+VTPMDB="/var/vtpm/vtpm.db"
+
+#In the vtpm-impl file some commands should be defined:
+# vtpm_create, vtpm_setup, vtpm_start, etc. (see below)
+if [ -r "$dir/vtpm-impl.alt" ]; then
+ . "$dir/vtpm-impl.alt"
+elif [ -r "$dir/vtpm-impl" ]; then
+ . "$dir/vtpm-impl"
+else
+ function vtpm_create () {
+ true
+ }
+ function vtpm_setup() {
+ true
+ }
+ function vtpm_start() {
+ true
+ }
+ function vtpm_suspend() {
+ true
+ }
+ function vtpm_resume() {
+ true
+ }
+ function vtpm_delete() {
+ true
+ }
+ function vtpm_migrate() {
+ echo "Error: vTPM migration accross machines not implemented."
+ }
+ function vtpm_migrate_local() {
+ echo "Error: local vTPM migration not supported"
+ }
+ function vtpm_migrate_recover() {
+ true
+ }
+fi
+
+
+#Find the instance number for the vtpm given the name of the domain
+# Parameters
+# - vmname : the name of the vm
+# Return value
+# Returns '0' if instance number could not be found, otherwise
+# it returns the instance number in the variable 'instance'
+function vtpmdb_find_instance () {
+ local vmname ret instance
+ vmname=$1
+ ret=0
+
+ instance=$(cat $VTPMDB | \
+ awk -vvmname=$vmname \
+ '{ \
+ if ( 1 != index($1,"#")) { \
+ if ( $1 == vmname ) { \
+ print $2; \
+ exit; \
+ } \
+ } \
+ }')
+ if [ "$instance" != "" ]; then
+ ret=$instance
+ fi
+ echo "$ret"
+}
+
+
+# Check whether a particular instance number is still available
+# returns "0" if it is not available, "1" otherwise.
+function vtpmdb_is_free_instancenum () {
+ local instance instances avail i
+ instance=$1
+ avail=1
+ #Allowed instance number range: 1-255
+ if [ $instance -eq 0 -o $instance -gt 255 ]; then
+ avail=0
+ else
+ instances=$(cat $VTPMDB | \
+ gawk \
+ '{ \
+ if (1 != index($1,"#")) { \
+ printf("%s ",$2); \
+ } \
+ }')
+ for i in $instances; do
+ if [ $i -eq $instance ]; then
+ avail=0
+ break
+ fi
+ done
+ fi
+ echo "$avail"
+}
+
+
+# Get an available instance number given the database
+# Returns an unused instance number
+function vtpmdb_get_free_instancenum () {
+ local ctr instances don found
+ instances=$(cat $VTPMDB | \
+ gawk \
+ '{ \
+ if (1 != index($1,"#")) { \
+ printf("%s ",$2); \
+ } \
+ }')
+ ctr=1
+ don=0
+ while [ $don -eq 0 ]; do
+ found=0
+ for i in $instances; do
+ if [ $i -eq $ctr ]; then
+ found=1;
+ break;
+ fi
+ done
+
+ if [ $found -eq 0 ]; then
+ don=1
+ break
+ fi
+ let ctr=ctr+1
+ done
+ echo "$ctr"
+}
+
+
+# Add a domain name and instance number to the DB file
+function vtpmdb_add_instance () {
+ local res vmname inst
+ vmname=$1
+ inst=$2
+
+ if [ ! -f $VTPMDB ]; then
+ echo "#Database for VM to vTPM association" > $VTPMDB
+ echo "#1st column: domain name" >> $VTPMDB
+ echo "#2nd column: TPM instance number" >> $VTPMDB
+ fi
+ res=$(vtpmdb_validate_entry $vmname $inst)
+ if [ $res -eq 0 ]; then
+ echo "$vmname $inst" >> $VTPMDB
+ fi
+}
+
+
+#Validate whether an entry is the same as passed to this
+#function
+function vtpmdb_validate_entry () {
+ local res rc vmname inst
+ rc=0
+ vmname=$1
+ inst=$2
+
+ res=$(cat $VTPMDB | \
+ gawk -vvmname=$vmname \
+ -vinst=$inst \
+ '{ \
+ if ( 1 == index($1,"#")) {\
+ } else \
+ if ( $1 == vmname && \
+ $2 == inst) { \
+ printf("1"); \
+ exit; \
+ } else \
+ if ( $1 == vmname || \
+ $2 == inst) { \
+ printf("2"); \
+ exit; \
+ } \
+ }')
+
+ if [ "$res" == "1" ]; then
+ rc=1
+ elif [ "$res" == "2" ]; then
+ rc=2
+ fi
+ echo "$rc"
+}
+
+
+#Remove an entry from the vTPM database given its domain name
+#and instance number
+function vtpmdb_remove_entry () {
+ local vmname instance VTPMDB_TMP
+ vmname=$1
+ instance=$2
+ VTPMDB_TMP="$VTPMDB".tmp
+
+ $(cat $VTPMDB | \
+ gawk -vvmname=$vmname \
+ '{ \
+ if ( $1 != vmname ) { \
+ print $0; \
+ } \
+ '} > $VTPMDB_TMP)
+ if [ -e $VTPMDB_TMP ]; then
+ mv -f $VTPMDB_TMP $VTPMDB
+ vtpm_delete $instance
+ else
+ log err "Error creating temporary file '$VTPMDB_TMP'."
+ fi
+}
+
+
+# Find the reason for the creation of this device:
+# Returns 'resume' or 'create'
+function vtpm_get_create_reason () {
+ local resume
+ resume=$(xenstore_read $XENBUS_PATH/resume)
+ if [ "$resume" == "True" ]; then
+ echo "resume"
+ else
+ echo "create"
+ fi
+}
+
+
+#Create a vTPM instance
+# If no entry in the TPM database is found, the instance is
+# created and an entry added to the database.
+function vtpm_create_instance () {
+ local res instance domname reason uuid
+ uuid=$(xenstore_read "$XENBUS_PATH"/uuid)
+ reason=$(vtpm_get_create_reason)
+
+ claim_lock vtpmdb
+
+ instance="0"
+
+ if [ "$uuid" != "" ]; then
+ instance=$(vtpmdb_find_instance $uuid)
+ fi
+ if [ "$instance" == "0" ]; then
+ domname=$(xenstore_read "$XENBUS_PATH"/domain)
+ instance=$(vtpmdb_find_instance $domname)
+ fi
+
+ if [ "$instance" == "0" -a "$reason" != "create" ]; then
+ release_lock vtpmdb
+ return
+ fi
+
+ if [ "$instance" == "0" ]; then
+ #Try to give the preferred instance to the domain
+ instance=$(xenstore_read "$XENBUS_PATH"/pref_instance)
+ if [ "$instance" != "" ]; then
+ res=$(vtpmdb_is_free_instancenum $instance)
+ if [ $res -eq 0 ]; then
+ instance=$(vtpmdb_get_free_instancenum)
+ fi
+ else
+ instance=$(vtpmdb_get_free_instancenum)
+ fi
+
+ vtpm_create $instance
+
+ if [ $vtpm_fatal_error -eq 0 ]; then
+ if [ "$uuid" != "" ]; then
+ vtpmdb_add_instance $uuid $instance
+ else
+ vtpmdb_add_instance $domname $instance
+ fi
+ fi
+ else
+ if [ "$reason" == "resume" ]; then
+ vtpm_resume $instance
+ else
+ vtpm_start $instance
+ fi
+ fi
+
+ release_lock vtpmdb
+
+ xenstore_write $XENBUS_PATH/instance $instance
+}
+
+
+#Remove an instance when a VM is terminating or suspending.
+#Since it is assumed that the VM will appear again, the
+#entry is kept in the VTPMDB file.
+function vtpm_remove_instance () {
+ local instance reason domname uuid
+ #Stop script execution quietly if path does not exist (anymore)
+ xenstore-exists "$XENBUS_PATH"/domain
+ uuid=$(xenstore_read "$XENBUS_PATH"/uuid)
+
+ claim_lock vtpmdb
+
+ instance="0"
+
+ if [ "$uuid" != "" ]; then
+ instance=$(vtpmdb_find_instance $uuid)
+ fi
+
+ if [ "$instance" == "0" ]; then
+ domname=$(xenstore_read "$XENBUS_PATH"/domain)
+ instance=$(vtpmdb_find_instance $domname)
+ fi
+
+ if [ "$instance" != "0" ]; then
+ vtpm_suspend $instance
+ fi
+
+ release_lock vtpmdb
+}
+
+
+#Remove an entry in the VTPMDB file given the domain's name
+#1st parameter: The name of the domain
+function vtpm_delete_instance () {
+ local instance
+
+ claim_lock vtpmdb
+
+ instance=$(vtpmdb_find_instance $1)
+ if [ "$instance" != "0" ]; then
+ vtpmdb_remove_entry $1 $instance
+ fi
+
+ release_lock vtpmdb
+}
+
+# Determine whether the given address is local to this machine
+# Return values:
+# "-1" : the given machine name is invalid
+# "0" : this is not an address of this machine
+# "1" : this is an address local to this machine
+function vtpm_isLocalAddress() {
+ local addr res
+ addr=$(ping $1 -c 1 | \
+ gawk '{ print substr($3,2,length($3)-2); exit }')
+ if [ "$addr" == "" ]; then
+ echo "-1"
+ return
+ fi
+ res=$(ifconfig | grep "inet addr" | \
+ gawk -vaddr=$addr \
+ '{ \
+ if ( addr == substr($2, 6)) {\
+ print "1"; \
+ } \
+ }' \
+ )
+ if [ "$res" == "" ]; then
+ echo "0"
+ return
+ fi
+ echo "1"
+}
+
+# Perform a migration step. This function differentiates between migration
+# to the local host or to a remote machine.
+# Parameters:
+# 1st: destination host to migrate to
+# 2nd: name of the domain to migrate
+# 3rd: the migration step to perform
+function vtpm_migration_step() {
+ local res=$(vtpm_isLocalAddress $1)
+ if [ "$res" == "0" ]; then
+ vtpm_migrate $1 $2 $3
+ else
+ vtpm_migrate_local
+ fi
+}
+
+# Recover from migration due to an error. This function differentiates
+# between migration to the local host or to a remote machine.
+# Parameters:
+# 1st: destination host the migration was going to
+# 2nd: name of the domain that was to be migrated
+# 3rd: the last successful migration step that was done
+function vtpm_recover() {
+ local res
+ res=$(vtpm_isLocalAddress $1)
+ if [ "$res" == "0" ]; then
+ vtpm_migrate_recover $1 $2 $3
+ fi
+}
+
+
+#Determine the domain id given a domain's name.
+#1st parameter: name of the domain
+#return value: domain id or -1 if domain id could not be determined
+function vtpm_domid_from_name () {
+ local id name ids
+ ids=$(xenstore-list /local/domain)
+ for id in $ids; do
+ name=$(xenstore-read /local/domain/$id/name)
+ if [ "$name" == "$1" ]; then
+ echo "$id"
+ return
+ fi
+ done
+ echo "-1"
+}
+
+#Determine the virtual TPM's instance number using the domain ID.
+#1st parm: domain ID
+function vtpm_uuid_by_domid() {
+ echo $(xenstore-read /local/domain/0/backend/vtpm/$1/0/uuid)
+}
+
+
+# Determine the vTPM's UUID by the name of the VM
+function vtpm_uuid_from_vmname() {
+ local domid=$(vtpm_domid_from_name $1)
+ if [ "$domid" != "-1" ]; then
+ echo $(vtpm_uuid_by_domid $domid)
+ return
+ fi
+ echo ""
+}
+
+#Add a virtual TPM instance number and its associated domain name
+#to the VTPMDB file and activate usage of this virtual TPM instance
+#by writing the instance number into the xenstore
+#1st parm: name of virtual machine
+#2nd parm: instance of associated virtual TPM
+function vtpm_add_and_activate() {
+ local domid=$(vtpm_domid_from_name $1)
+ local vtpm_uuid=$(vtpm_uuid_from_vmname $1)
+ if [ "$vtpm_uuid" != "" -a "$domid" != "-1" ]; then
+ vtpmdb_add_instance $vtpm_uuid $2
+ xenstore-write backend/vtpm/$domid/0/instance $2
+ fi
+}
--- /dev/null
+#!/bin/bash
+
+# This scripts must be called the following way:
+# vtpm-delete <vtpm uuid>
+# or
+# vtpm-delete --vmname <vm name>
+
+dir=$(dirname "$0")
+. "$dir/vtpm-common.sh"
+
+if [ "$1" == "--vmname" ]; then
+ vtpm_uuid=$(vtpm_uuid_from_vmname $2)
+ if [ "$vtpm_uuid" != "" ];then
+ vtpm_delete_instance $vtpm_uuid
+ fi
+else
+ vtpm_delete_instance $1
+fi
--- /dev/null
+#
+# Copyright (c) 2005 IBM Corporation
+# Copyright (c) 2005 XenSource Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+dir=$(dirname "$0")
+. "$dir/xen-hotplug-common.sh"
+
+findCommand "$@"
+if [ "$command" != "online" ] &&
+ [ "$command" != "offline" ] &&
+ [ "$command" != "add" ] &&
+ [ "$command" != "remove" ]
+then
+ log err "Invalid command: $command"
+ exit 1
+fi
+
+
+XENBUS_PATH="${XENBUS_PATH:?}"
+
+. "$dir/vtpm-common.sh"
--- /dev/null
+#!/bin/bash
+# ===================================================================
+#
+# Copyright (c) 2005, Intel Corp.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials provided
+# with the distribution.
+# * Neither the name of Intel Corporation nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
+# OF THE POSSIBILITY OF SUCH DAMAGE.
+# ===================================================================
+
+# | SRC | TAG | CMD SIZE | ORD |mtype|strt
+TPM_CMD_OPEN=\\x00\\x00\\x00\\x00\\x01\\xc1\\x00\\x00\\x00\\x11\\x01\\x00\\x00\\x01\\x01\\x01
+TPM_CMD_RESM=\\x00\\x00\\x00\\x00\\x01\\xc1\\x00\\x00\\x00\\x11\\x01\\x00\\x00\\x01\\x01\\x02
+TPM_CMD_CLOS=\\x00\\x00\\x00\\x00\\x01\\xc1\\x00\\x00\\x00\\x0e\\x01\\x00\\x00\\x02
+TPM_CMD_DELE=\\x00\\x00\\x00\\x00\\x01\\xc1\\x00\\x00\\x00\\x0e\\x01\\x00\\x00\\x03
+
+TPM_TYPE_PVM=\\x01
+TPM_TYPE_HVM=\\x02
+
+TPM_SUCCESS=00000000
+
+TX_VTPM_MANAGER=/var/vtpm/fifos/from_console.fifo
+RX_VTPM_MANAGER=/var/vtpm/fifos/to_console.fifo
+
+VTPM_MIG=/usr/bin/vtpm_migrator
+
+# -------------------- Helpers for binary streams -----------
+
+function str_to_hex32() {
+ printf "%0.8x" $1
+}
+
+function hex32_to_bin() {
+ local inst=$(str_to_hex32 $1);
+
+ local n1=`echo $inst | sed 's/\(..\)....../\\\\x\1/'`
+ local n2=`echo $inst | sed 's/..\(..\)..../\\\\x\1/'`
+ local n3=`echo $inst | sed 's/....\(..\)../\\\\x\1/'`
+ local n4=`echo $inst | sed 's/......\(..\)/\\\\x\1/'`
+
+ echo "$n1$n2$n3$n4"
+}
+
+function vtpm_manager_cmd() {
+ local cmd=$1;
+ local inst=$2;
+ local inst_bin=$(hex32_to_bin $inst);
+
+ claim_lock vtpm_mgr
+
+ #send cmd to vtpm_manager
+ printf "$cmd$inst_bin" > $TX_VTPM_MANAGER
+
+ #recv response
+ set +e
+ local resp_hex=`dd skip=10 bs=1 count=4 if=$RX_VTPM_MANAGER 2> /dev/null | xxd -ps`
+ set -e
+
+ release_lock vtpm_mgr
+
+ #return whether the command was successful
+ if [ $resp_hex -ne $TPM_SUCCESS ]; then
+ vtpm_fatal_error=1
+ false
+ else
+ true
+ fi
+}
+
+# Helper to get vm type to pass to vtpm_manager open/resume
+function vtpm_get_type() {
+ local inst=$(xenstore_read $XENBUS_PATH/frontend-id)
+ local vm=$(xenstore_read /local/domain/$inst/vm)
+ if [ "$vm" != "" ]; then
+ local ostype=$(xenstore-read $vm/image/ostype)
+ if [ "$ostype" == "hvm" ]; then
+ echo $TPM_TYPE_HVM;
+ else
+ echo $TPM_TYPE_PVM;
+ fi
+ fi
+}
+
+# ------------------ Command handlers -----------------
+
+# Create new vtpm instance & set it up for use
+function vtpm_create () {
+ # Creation is handled implicitly by the manager on first setup
+ # so just set it up for use
+ $(vtpm_start $1)
+}
+
+# Setup vtpm instance for use.
+function vtpm_start() {
+ local vmtype=$(vtpm_get_type);
+ $(vtpm_manager_cmd $TPM_CMD_OPEN$vmtype $1)
+}
+
+function vtpm_resume() {
+ local vmtype=$(vtpm_get_type);
+ $(vtpm_manager_cmd $TPM_CMD_RESM$vmtype $1)
+}
+
+# Reset the vtpm AKA clear PCRs
+function vtpm_reset() {
+ #not used by current implemenation
+ true
+}
+
+# Shutdown the vtpm while the vm is down
+# This could be a suspend of shutdown
+# we cannot distinquish, so save the state
+# and decide on startup if we should keep is
+function vtpm_suspend() {
+ $(vtpm_manager_cmd $TPM_CMD_CLOS $1)
+}
+
+
+function vtpm_delete() {
+ local inst=$1
+ if $(vtpm_manager_cmd $TPM_CMD_DELE $inst); then
+ rm -f /var/vtpm/vtpm_dm_$1.data
+ true
+ else
+ vtpm_fatal_error=1
+ false
+ fi
+}
+
+# Perform a migration step. This function differentiates between migration
+# to the local host or to a remote machine.
+# Parameters:
+# 1st: destination host to migrate to
+# 2nd: name of the domain to migrate
+# 3rd: the migration step to perform
+function vtpm_migrate() {
+ local instance res
+
+ instance=$(vtpmdb_find_instance $2)
+ if [ "$instance" == "" ]; then
+ log err "VTPM Migratoin failed. Unable to translation of domain name"
+ echo "Error: VTPM Migration failed while looking up instance number"
+ fi
+
+ case "$3" in
+ 0)
+ #Incicate migration supported
+ echo "0"
+ ;;
+
+ 1)
+ # Get Public Key from Destination
+ # Call vtpm_manager's migration part 1
+ claim_lock vtpm_mgr
+ $VTPM_MIG $1 $2 $instance $3
+ release_lock vtpm_mgr
+ ;;
+
+ 2)
+ # Call manager's migration step 2 and send result to destination
+ # If successful remove from db
+ claim_lock vtpm_mgr
+ $VTPM_MIG $1 $2 $instance $3
+ release_lock vtpm_mgr
+ ;;
+
+ 3)
+ if `ps x | grep "$VTPM_MIG $1"`; then
+ log err "VTPM Migration failed to complete."
+ echo "Error: VTPM Migration failed to complete."
+ fi
+ ;;
+ esac
+
+}
+
+
+function vtpm_migrate_recover() {
+ echo "Error: Recovery not supported yet"
+}
+
+function vtpm_migrate_local() {
+ echo "Error: local vTPM migration not supported"
+}
--- /dev/null
+#
+# Copyright (c) 2005 IBM Corporation
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+dir=$(dirname "$0")
+. "$dir/vtpm-common.sh"
--- /dev/null
+#! /bin/bash
+
+PATH=/etc/xen/scripts:$PATH
+
+. /etc/xen/scripts/locking.sh
+
+claim_lock xenbus_hotplug_global
+
+case "$XENBUS_TYPE" in
+ tap)
+ /etc/xen/scripts/blktap "$ACTION"
+ ;;
+ vbd)
+ /etc/xen/scripts/block "$ACTION"
+ ;;
+ vtpm)
+ /etc/xen/scripts/vtpm "$ACTION"
+ ;;
+ vif)
+ [ -n "$script" ] && $script "$ACTION"
+ ;;
+ vscsi)
+ /etc/xen/scripts/vscsi "$ACTION"
+ ;;
+esac
+
+case "$ACTION" in
+ add)
+ ;;
+ remove)
+ /etc/xen/scripts/xen-hotplug-cleanup
+ ;;
+ online)
+ ;;
+ offline)
+ ;;
+esac
+
+release_lock xenbus_hotplug_global
--- /dev/null
+SUBSYSTEM=="xen-backend", KERNEL=="vbd*", RUN+="/etc/xen/scripts/block $env{ACTION}"
+SUBSYSTEM=="xen-backend", KERNEL=="vtpm*", RUN+="/etc/xen/scripts/vtpm $env{ACTION}"
+SUBSYSTEM=="xen-backend", KERNEL=="vif*", ACTION=="online", RUN+="$env{script} online"
+SUBSYSTEM=="xen-backend", KERNEL=="vif*", ACTION=="offline", RUN+="$env{script} offline"
+SUBSYSTEM=="xen-backend", KERNEL=="vscsi*", RUN+="/etc/xen/scripts/vscsi $env{ACTION}"
+SUBSYSTEM=="xen-backend", ACTION=="remove", RUN+="/etc/xen/scripts/xen-hotplug-cleanup"
+KERNEL=="evtchn", NAME="xen/%k"
+KERNEL=="blktap[0-9]*", NAME="xen/%k"
--- /dev/null
+#! /bin/bash
+
+dir=$(dirname "$0")
+. "$dir/xen-hotplug-common.sh"
+
+# Claim the lock protecting /etc/xen/scripts/block. This stops a race whereby
+# paths in the store would disappear underneath that script as it attempted to
+# read from the store checking for device sharing.
+# Any other scripts that do similar things will have to have their lock
+# claimed too.
+# This is pretty horrible, but there's not really a nicer way of solving this.
+claim_lock "block"
+
+# split backend/DEVCLASS/VMID/DEVID on slashes
+path_array=( ${XENBUS_PATH//\// } )
+# get /vm/UUID path
+vm=$(xenstore_read_default "/local/domain/${path_array[2]}/vm" "")
+# construct /vm/UUID/device/DEVCLASS/DEVID
+if [ "$vm" != "" ]; then
+ vm_dev="$vm/device/${path_array[1]}/${path_array[3]}"
+else
+ vm_dev=
+fi
+
+# remove device frontend store entries
+xenstore-rm -t \
+ $(xenstore-read "$XENBUS_PATH/frontend" 2>/dev/null) 2>/dev/null || true
+
+# remove device backend store entries
+xenstore-rm -t "$XENBUS_PATH" 2>/dev/null || true
+xenstore-rm -t "error/$XENBUS_PATH" 2>/dev/null || true
+
+# remove device path from /vm/UUID
+[ "$vm_dev" != "" ] && xenstore-rm -t "$vm_dev" 2>/dev/null || true
+
+release_lock "block"
--- /dev/null
+#
+# Copyright (c) 2005 XenSource Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+
+dir=$(dirname "$0")
+. "$dir/logging.sh"
+. "$dir/xen-script-common.sh"
+. "$dir/locking.sh"
+
+exec 2>>/var/log/xen/xen-hotplug.log
+
+export PATH="/sbin:/bin:/usr/bin:/usr/sbin:$PATH"
+export LANG="POSIX"
+unset $(set | grep ^LC_ | cut -d= -f1)
+
+fatal() {
+ xenstore_write "$XENBUS_PATH/hotplug-error" "$*" \
+ "$XENBUS_PATH/hotplug-status" error
+ log err "$@"
+ exit 1
+}
+
+success() {
+ # Tell DevController that backend is "connected"
+ xenstore_write "$XENBUS_PATH/hotplug-status" connected
+}
+
+do_or_die() {
+ "$@" || fatal "$@ failed"
+}
+
+do_without_error() {
+ "$@" 2>/dev/null || log debug "$@ failed"
+}
+
+sigerr() {
+ fatal "$0 failed; error detected."
+}
+
+trap sigerr ERR
+
+
+##
+# xenstore_read <path>+
+#
+# Read each of the given paths, returning each result on a separate line, or
+# exit this script if any of the paths is missing.
+#
+xenstore_read() {
+ local v=$(xenstore-read "$@" || true)
+ [ "$v" != "" ] || fatal "xenstore-read $@ failed."
+ echo "$v"
+}
+
+
+##
+# xenstore_read_default <path> <default>
+#
+# Read the given path, returning the value there or the given default if the
+# path is not present.
+#
+xenstore_read_default() {
+ xenstore-read "$1" 2>/dev/null || echo "$2"
+}
+
+
+##
+# xenstore_write (<path> <value>)+
+#
+# Write each of the key/value pairs to the store, and exit this script if any
+# such writing fails.
+#
+xenstore_write() {
+ log debug "Writing $@ to xenstore."
+ xenstore-write "$@" || fatal "Writing $@ to xenstore failed."
+}
+
+
+log debug "$@" "XENBUS_PATH=$XENBUS_PATH"
--- /dev/null
+#
+# Copyright (c) 2005 XenSource Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+
+# Gentoo doesn't have ifup/ifdown, so we define appropriate alternatives.
+
+# Other platforms just use ifup / ifdown directly.
+
+##
+# preiftransfer
+#
+# @param $1 The current name for the physical device, which is also the name
+# that the virtual device will take once the physical device has
+# been renamed.
+
+if ! which ifup >/dev/null 2>/dev/null
+then
+ preiftransfer()
+ {
+ true
+ }
+ ifup()
+ {
+ false
+ }
+ ifdown()
+ {
+ false
+ }
+else
+ preiftransfer()
+ {
+ true
+ }
+fi
+
+
+first_file()
+{
+ t="$1"
+ shift
+ for file in $@
+ do
+ if [ "$t" "$file" ]
+ then
+ echo "$file"
+ return
+ fi
+ done
+}
+
+find_dhcpd_conf_file()
+{
+ first_file -f /etc/dhcp3/dhcpd.conf /etc/dhcpd.conf
+}
+
+
+find_dhcpd_init_file()
+{
+ first_file -x /etc/init.d/{dhcp3-server,dhcp,dhcpd}
+}
+
+find_dhcpd_arg_file()
+{
+ first_file -f /etc/sysconfig/dhcpd /etc/defaults/dhcp /etc/default/dhcp3-server
+}
+
+# configure interfaces which act as pure bridge ports:
+setup_bridge_port() {
+ local dev="$1"
+
+ # take interface down ...
+ ip link set ${dev} down
+
+ # ... and configure it
+ ip addr flush ${dev}
+}
+
+# Usage: create_bridge bridge
+create_bridge () {
+ local bridge=$1
+
+ # Don't create the bridge if it already exists.
+ if [ ! -e "/sys/class/net/${bridge}/bridge" ]; then
+ brctl addbr ${bridge}
+ brctl stp ${bridge} off
+ brctl setfd ${bridge} 0
+ fi
+}
+
+# Usage: add_to_bridge bridge dev
+add_to_bridge () {
+ local bridge=$1
+ local dev=$2
+
+ # Don't add $dev to $bridge if it's already on a bridge.
+ if [ -e "/sys/class/net/${bridge}/brif/${dev}" ]; then
+ ip link set ${dev} up || true
+ return
+ fi
+ brctl addif ${bridge} ${dev}
+ ip link set ${dev} up
+}
+
--- /dev/null
+#
+# Copyright (c) 2005 XenSource Ltd.
+#
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#
+
+
+set -e
+
+
+evalVariables()
+{
+ for arg in "$@"
+ do
+ if expr 'index' "$arg" '=' '>' '1' >/dev/null
+ then
+ eval "$arg"
+ fi
+ done
+}
+
+
+findCommand()
+{
+ for arg in "$@"
+ do
+ if ! expr 'index' "$arg" '=' >/dev/null
+ then
+ command="$arg"
+ return
+ fi
+ done
+}
--- /dev/null
+SUBSYSTEM=="pci", RUN+="socket:/org/xen/xend/udev_event"
+#SUBSYSTEM=="scsi", RUN+="socket:/org/xen/xend/udev_event"
+#SUBSYSTEM=="net", KERNEL!="vif[0-9]*.[0-9]*|tap[0-9]*.[0-9]*", RUN+="socket:/org/xen/xend/udev_event"
--- /dev/null
+XEN_ROOT = ../../
+include $(XEN_ROOT)/tools/Rules.mk
+
+SUBDIRS-y := common
+SUBDIRS-$(CONFIG_NetBSD) += NetBSD
+SUBDIRS-$(CONFIG_Linux) += Linux
+
+.PHONY: all clean install
+all clean install: %: subdirs-%
--- /dev/null
+XEN_ROOT = ../../../
+include $(XEN_ROOT)/tools/Rules.mk
+
+# Xen configuration dir and configs to go there.
+XEN_CONFIG_DIR = /etc/xen
+
+# Xen script dir and scripts to go there.
+XEN_SCRIPT_DIR = $(XEN_CONFIG_DIR)/scripts
+XEN_SCRIPTS =
+XEN_SCRIPTS += block-nbsd
+XEN_SCRIPTS += qemu-ifup-nbsd
+XEN_SCRIPTS += vif-bridge-nbsd
+XEN_SCRIPTS += vif-ip-nbsd
+
+XEN_SCRIPT_DATA =
+
+.PHONY: all
+all:
+
+.PHONY: build
+build:
+
+.PHONY: install
+install: all install-scripts
+
+.PHONY: install-scripts
+install-scripts:
+ $(INSTALL_DIR) $(DESTDIR)$(XEN_SCRIPT_DIR)
+ set -e; for i in $(XEN_SCRIPTS); \
+ do \
+ $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \
+ done
+ set -e; for i in $(XEN_SCRIPT_DATA); \
+ do \
+ $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \
+ done
+
+.PHONY: clean
+clean:
--- /dev/null
+#!/bin/sh -e
+
+# $NetBSD: block-nbsd,v 1.1.1.1 2008/08/07 20:26:57 cegger Exp $
+# Called by xenbackendd
+# Usage: block xsdir_backend_path state
+
+PATH=/bin:/usr/bin:/sbin:/usr/sbin
+export PATH
+
+error() {
+ echo "$@" >&2
+ xenstore_write $xpath/hotplug-status error
+ exit 1
+}
+
+
+xpath=$1
+xstatus=$2
+xtype=$(xenstore-read "$xpath/type")
+xparams=$(xenstore-read "$xpath/params")
+
+case $xstatus in
+6)
+ # device removed
+ case $xtype in
+ file)
+ vnd=$(xenstore-read "$xpath/vnd" || echo none)
+ if [ $vnd != none ]; then
+ vnconfig -u $vnd
+ fi
+ ;;
+ phy)
+ ;;
+ *)
+ echo "unknown type $xtype" >&2
+ ;;
+ esac
+ xenstore-rm $xpath
+ exit 0
+ ;;
+2)
+ case $xtype in
+ file)
+ # Store the list of available vnd(4) devices in
+ #``available_disks'', and mark them as ``free''.
+ list=`ls -1 /dev/vnd[0-9]*d | sed "s,/dev/vnd,,;s,d,," | sort -n`
+ for i in $list; do
+ disk="vnd$i"
+ available_disks="$available_disks $disk"
+ eval $disk=free
+ done
+ # Mark the used vnd(4) devices as ``used''.
+ for disk in `sysctl hw.disknames`; do
+ case $disk in
+ vnd[0-9]*) eval $disk=used ;;
+ esac
+ done
+ # Configure the first free vnd(4) device.
+ for disk in $available_disks; do
+ eval status=\$$disk
+ if [ "$status" = "free" ] && \
+ vnconfig /dev/${disk}d $xparams >/dev/null; then
+ device=/dev/${disk}d
+ echo vnconfig /dev/${disk}d $xparams
+ break
+ fi
+ done
+ if [ x$device = x ] ; then
+ error "no available vnd device"
+ fi
+ echo xenstore-write $xpath/vnd $device
+ xenstore-write $xpath/vnd $device
+ ;;
+ phy)
+ device=$xparams
+ ;;
+ esac
+ physical_device=$(stat -f '%r' "$device")
+ echo xenstore-write $xpath/physical-device $physical_device
+ xenstore-write $xpath/physical-device $physical_device
+ echo xenstore-write $xpath/hotplug-status connected
+ xenstore-write $xpath/hotplug-status connected
+ exit 0
+ ;;
+*)
+ exit 0
+ ;;
+esac
--- /dev/null
+#!/bin/sh
+ifconfig $1 up
+exec /sbin/brconfig $2 add $1
--- /dev/null
+#!/bin/sh -e
+
+# $NetBSD: vif-bridge-nbsd,v 1.1.1.1 2008/08/07 20:26:57 cegger Exp $
+# Called by xenbackendd
+# Usage: vif-bridge xsdir_backend_path state
+
+PATH=/bin:/usr/bin:/sbin:/usr/sbin
+export PATH
+
+xpath=$1
+xstatus=$2
+
+case $xstatus in
+6)
+ # device removed
+ xenstore-rm $xpath
+ exit 0
+ ;;
+2)
+ xbridge=$(xenstore-read "$xpath/bridge")
+ xfid=$(xenstore-read "$xpath/frontend-id")
+ xhandle=$(xenstore-read "$xpath/handle")
+ iface=xvif$xfid.$xhandle
+ echo ifconfig $iface up
+ ifconfig $iface up
+ brconfig $xbridge add $iface
+ echo brconfig $xbridge add $iface
+ xenstore-write $xpath/hotplug-status connected
+ echo xenstore-write $xpath/hotplug-status connected
+ exit 0
+ ;;
+*)
+ exit 0
+ ;;
+esac
--- /dev/null
+#!/bin/sh -e
+
+# $NetBSD: vif-ip-nbsd,v 1.1.1.1 2008/08/07 20:26:57 cegger Exp $
+# Called by xenbackendd
+# Usage: vif-ip xsdir_backend_path state
+
+PATH=/bin:/usr/bin:/sbin:/usr/sbin
+export PATH
+
+xpath=$1
+xstatus=$2
+
+case $xstatus in
+6)
+ # device removed
+ xenstore-rm $xpath
+ exit 0
+ ;;
+2)
+ xip=$(xenstore-read "$xpath/ip")
+ xfid=$(xenstore-read "$xpath/frontend-id")
+ xhandle=$(xenstore-read "$xpath/handle")
+ iface=xvif$xfid.$xhandle
+ echo ifconfig $iface $xip up
+ ifconfig $iface $xip up
+ xenstore-write $xpath/hotplug-status connected
+ echo xenstore-write $xpath/hotplug-status connected
+ exit 0
+ ;;
+*)
+ exit 0
+ ;;
+esac
--- /dev/null
+XEN_ROOT = ../../../
+include $(XEN_ROOT)/tools/Rules.mk
+
+# OS-independent hotplug scripts go in this directory
+
+# Xen configuration dir and configs to go there.
+XEN_CONFIG_DIR = /etc/xen
+
+# Xen script dir and scripts to go there.
+XEN_SCRIPT_DIR = /etc/xen/scripts
+XEN_SCRIPTS =
+XEN_SCRIPT_DATA =
+
+.PHONY: all
+all:
+
+.PHONY: build
+build:
+
+.PHONY: install
+install: all install-scripts
+
+.PHONY: install-scripts
+install-scripts:
+ [ -d $(DESTDIR)$(XEN_SCRIPT_DIR) ] || \
+ $(INSTALL_DIR) $(DESTDIR)$(XEN_SCRIPT_DIR)
+ set -e; for i in $(XEN_SCRIPTS); \
+ do \
+ $(INSTALL_PROG) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \
+ done
+ set -e; for i in $(XEN_SCRIPT_DATA); \
+ do \
+ $(INSTALL_DATA) $$i $(DESTDIR)$(XEN_SCRIPT_DIR); \
+ done
+
+.PHONY: clean
+clean:
xen/.dir:
@rm -rf xen
- mkdir xen
+ mkdir -p xen/libelf
ln -sf ../$(XEN_ROOT)/xen/include/public/COPYING xen
ln -sf $(addprefix ../,$(wildcard $(XEN_ROOT)/xen/include/public/*.h)) xen
ln -sf $(addprefix ../$(XEN_ROOT)/xen/include/public/,arch-ia64 arch-x86 hvm io xsm) xen
ln -sf ../xen-sys/$(XEN_OS) xen/sys
+ ln -sf $(addprefix ../../$(XEN_ROOT)/xen/include/xen/,libelf.h elfstructs.h) xen/libelf/
ln -s ../xen-foreign xen/foreign
touch $@
structs | x86_32 x86_64 ia64
-start_info | 1104 1152 1152
+start_info | 1112 1168 1168
trap_info | 8 16 -
pt_fpreg | - - 16
cpu_user_regs | 68 200 -
libdir=$(prefix)/lib
ARCH := $(shell uname -m | sed -e s/i.86/i386/)
-CFLAGS := -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC
+CFLAGS = -nostdlib -nostartfiles -Wall -I. -g -fomit-frame-pointer -O2 -fPIC
SO_CFLAGS=-shared $(CFLAGS)
L_CFLAGS=$(CFLAGS)
LINK_FLAGS=
include $(XEN_ROOT)/tools/Rules.mk
-DEPS = .*.d
-
-CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror -Wp,-MD,.$(@F).d
+CFLAGS += -I$(XEN_ROOT)/tools/libfsimage/common/ -Werror
LDFLAGS += -L../common/
PIC_OBJS := $(patsubst %.c,%.opic,$(LIB_SRCS-y))
MAJOR = 1.0
MINOR = 0
-CFLAGS += -Werror -Wp,-MD,.$(@F).d
-DEPS = .*.d
-
LDFLAGS-$(CONFIG_SunOS) = -Wl,-M -Wl,mapfile-SunOS
LDFLAGS-$(CONFIG_Linux) = -Wl,mapfile-GNU
LDFLAGS = $(LDFLAGS-y)
return (-1);
if (uber->ub_magic == UBERBLOCK_MAGIC &&
- uber->ub_version >= SPA_VERSION_1 &&
- uber->ub_version <= SPA_VERSION)
+ uber->ub_version > 0 && uber->ub_version <= SPA_VERSION)
return (0);
return (-1);
/*
* On-disk version number.
*/
-#define SPA_VERSION_1 1ULL
-#define SPA_VERSION_2 2ULL
-#define SPA_VERSION_3 3ULL
-#define SPA_VERSION_4 4ULL
-#define SPA_VERSION_5 5ULL
-#define SPA_VERSION_6 6ULL
-#define SPA_VERSION_7 7ULL
-#define SPA_VERSION_8 8ULL
-#define SPA_VERSION_9 9ULL
-#define SPA_VERSION_10 10ULL
-#define SPA_VERSION SPA_VERSION_10
+#define SPA_VERSION 14ULL
/*
* The following are configuration names used in the nvlist describing a pool's
XEN_ROOT = ../..
include $(XEN_ROOT)/tools/Rules.mk
-MAJOR = 3.2
+MAJOR = 3.4
MINOR = 0
CTRL_SRCS-y :=
CTRL_SRCS-$(CONFIG_MiniOS) += xc_minios.c
GUEST_SRCS-y :=
-GUEST_SRCS-y += xg_private.c
+GUEST_SRCS-y += xg_private.c xc_suspend.c
GUEST_SRCS-$(CONFIG_MIGRATE) += xc_domain_restore.c xc_domain_save.c
GUEST_SRCS-$(CONFIG_HVM) += xc_hvm_build.c
# libraries.
#CFLAGS += -DVALGRIND -O0 -ggdb3
-# Get gcc to generate the dependencies for us.
-CFLAGS += -Wp,-MD,.$(@F).d
LDFLAGS += -L.
-DEPS = .*.d
CTRL_LIB_OBJS := $(patsubst %.c,%.o,$(CTRL_SRCS-y))
CTRL_PIC_OBJS := $(patsubst %.c,%.opic,$(CTRL_SRCS-y))
return -1;
}
+/* xc_hvm_build_target_mem:
+ * Create a domain for a pre-ballooned virtualized Linux, using
+ * files/filenames. If target < memsize, domain is created with
+ * memsize pages marked populate-on-demand, and with a PoD cache size
+ * of target. If target == memsize, pages are populated normally.
+ */
+int xc_hvm_build_target_mem(int xc_handle,
+ uint32_t domid,
+ int memsize,
+ int target,
+ const char *image_name)
+{
+ /* XXX:PoD isn't supported yet */
+ return xc_hvm_build(xc_handle, domid, target, image_name);
+}
+
/*
* From asm/pgtable.h
*/
fprintf(stderr, "ip=%016lx, b0=%016lx\n", ctxt->regs.ip, ctxt->regs.b[0]);
/* Initialize and set registers. */
- ctxt->flags = VGCF_EXTRA_REGS | VGCF_SET_CR_IRR | VGCF_online;
+ ctxt->flags = VGCF_EXTRA_REGS | VGCF_SET_CR_IRR | VGCF_online |
+ VGCF_SET_AR_ITC;
if (xc_vcpu_setcontext(xc_handle, dom, vcpu, ctxt_any) != 0) {
ERROR("Couldn't set vcpu context");
return -1;
/* number of pages to write at a time */
#define DUMP_INCREMENT (4 * 1024)
-/* Don't yet support cross-address-size core dump */
-#define guest_width (sizeof (unsigned long))
-
/* string table */
struct xc_core_strtab {
char *strings;
ehdr->e_ident[EI_ABIVERSION] = EV_CURRENT;
ehdr->e_type = ET_CORE;
- ehdr->e_machine = ELF_ARCH_MACHINE;
+ /* e_machine will be filled in later */
ehdr->e_version = EV_CURRENT;
ehdr->e_entry = 0;
ehdr->e_phoff = 0;
}
static int
-elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle)
+elfnote_dump_xen_version(void *args, dumpcore_rtn_t dump_rtn, int xc_handle,
+ unsigned int guest_width)
{
int sts;
struct elfnote elfnote;
elfnote.descsz = sizeof(xen_version);
elfnote.type = XEN_ELFNOTE_DUMPCORE_XEN_VERSION;
elfnote_fill_xen_version(xc_handle, &xen_version);
+ if (guest_width < sizeof(unsigned long))
+ {
+ // 32 bit elf file format differs in pagesize's alignment
+ char *p = (char *)&xen_version.pagesize;
+ memmove(p - 4, p, sizeof(xen_version.pagesize));
+ }
sts = dump_rtn(args, (char*)&elfnote, sizeof(elfnote));
if ( sts != 0 )
return sts;
return dump_rtn(args, (char*)&format_version, sizeof(format_version));
}
+static int
+get_guest_width(int xc_handle,
+ uint32_t domid,
+ unsigned int *guest_width)
+{
+ DECLARE_DOMCTL;
+
+ memset(&domctl, 0, sizeof(domctl));
+ domctl.domain = domid;
+ domctl.cmd = XEN_DOMCTL_get_address_size;
+
+ if ( do_domctl(xc_handle, &domctl) != 0 )
+ return 1;
+
+ *guest_width = domctl.u.address_size.size / 8;
+ return 0;
+}
+
int
xc_domain_dumpcore_via_callback(int xc_handle,
uint32_t domid,
dumpcore_rtn_t dump_rtn)
{
xc_dominfo_t info;
- shared_info_t *live_shinfo = NULL;
+ shared_info_any_t *live_shinfo = NULL;
+ unsigned int guest_width;
int nr_vcpus = 0;
char *dump_mem, *dump_mem_start = NULL;
uint16_t strtab_idx;
struct xc_core_section_headers *sheaders = NULL;
Elf64_Shdr *shdr;
+
+ if ( get_guest_width(xc_handle, domid, &guest_width) != 0 )
+ {
+ PERROR("Could not get address size for domain");
+ return sts;
+ }
xc_core_arch_context_init(&arch_ctxt);
if ( (dump_mem_start = malloc(DUMP_INCREMENT*PAGE_SIZE)) == NULL )
if ( sts != 0 )
goto out;
+ /*
+ * Note: this is the *current* number of pages and may change under
+ * a live dump-core. We'll just take this value, and if more pages
+ * exist, we'll skip them. If there's less, then we'll just not use
+ * all the array...
+ *
+ * We don't want to use the total potential size of the memory map
+ * since that is usually much higher than info.nr_pages.
+ */
nr_pages = info.nr_pages;
+
if ( !auto_translated_physmap )
{
/* obtain p2m table */
goto out;
}
- sts = xc_core_arch_map_p2m(xc_handle, &info, live_shinfo,
+ sts = xc_core_arch_map_p2m(xc_handle, guest_width, &info, live_shinfo,
&p2m, &p2m_size);
if ( sts != 0 )
goto out;
/* write out elf header */
ehdr.e_shnum = sheaders->num;
ehdr.e_shstrndx = strtab_idx;
+ ehdr.e_machine = ELF_ARCH_MACHINE;
sts = dump_rtn(args, (char*)&ehdr, sizeof(ehdr));
if ( sts != 0 )
goto out;
goto out;
/* elf note section: xen version */
- sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle);
+ sts = elfnote_dump_xen_version(args, dump_rtn, xc_handle, guest_width);
if ( sts != 0 )
goto out;
if ( !auto_translated_physmap )
{
- gmfn = p2m[i];
- if ( gmfn == INVALID_P2M_ENTRY )
- continue;
+ if ( guest_width >= sizeof(unsigned long) )
+ {
+ if ( guest_width == sizeof(unsigned long) )
+ gmfn = p2m[i];
+ else
+ gmfn = ((uint64_t *)p2m)[i];
+ if ( gmfn == INVALID_P2M_ENTRY )
+ continue;
+ }
+ else
+ {
+ gmfn = ((uint32_t *)p2m)[i];
+ if ( gmfn == (uint32_t)INVALID_P2M_ENTRY )
+ continue;
+ }
p2m_array[j].pfn = i;
p2m_array[j].gmfn = gmfn;
/* When live dump-mode (-L option) is specified,
* guest domain may reduce memory. pad with zero pages.
*/
- IPRINTF("j (%ld) != nr_pages (%ld)", j , nr_pages);
+ IPRINTF("j (%ld) != nr_pages (%ld)", j, nr_pages);
memset(dump_mem_start, 0, PAGE_SIZE);
for (; j < nr_pages; j++) {
sts = dump_rtn(args, dump_mem_start, PAGE_SIZE);
struct dump_args da;
int sts;
- if ( (da.fd = open(corename, O_CREAT|O_RDWR, S_IWUSR|S_IRUSR)) < 0 )
+ if ( (da.fd = open(corename, O_CREAT|O_RDWR|O_TRUNC, S_IWUSR|S_IRUSR)) < 0 )
{
PERROR("Could not open corefile %s", corename);
return -errno;
#include "xen/version.h"
#include "xg_private.h"
-#include "xen/elfstructs.h"
+#include "xen/libelf/elfstructs.h"
/* section names */
#define XEN_DUMPCORE_SEC_NOTE ".note.Xen"
struct xc_core_arch_context;
int xc_core_arch_memory_map_get(int xc_handle,
struct xc_core_arch_context *arch_ctxt,
- xc_dominfo_t *info, shared_info_t *live_shinfo,
+ xc_dominfo_t *info, shared_info_any_t *live_shinfo,
xc_core_memory_map_t **mapp,
unsigned int *nr_entries);
-int xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
- shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
- unsigned long *pfnp);
+int xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width,
+ xc_dominfo_t *info, shared_info_any_t *live_shinfo,
+ xen_pfn_t **live_p2m, unsigned long *pfnp);
#if defined (__i386__) || defined (__x86_64__)
/* see setup_guest() @ xc_linux_build.c */
static int
memory_map_get_old_domu(int xc_handle, xc_dominfo_t *info,
- shared_info_t *live_shinfo,
+ shared_info_any_t *live_shinfo,
xc_core_memory_map_t **mapp, unsigned int *nr_entries)
{
xc_core_memory_map_t *map = NULL;
/* see setup_guest() @ xc_ia64_hvm_build.c */
static int
memory_map_get_old_hvm(int xc_handle, xc_dominfo_t *info,
- shared_info_t *live_shinfo,
+ shared_info_any_t *live_shinfo,
xc_core_memory_map_t **mapp, unsigned int *nr_entries)
{
const xc_core_memory_map_t gfw_map[] = {
static int
memory_map_get_old(int xc_handle, xc_dominfo_t *info,
- shared_info_t *live_shinfo,
+ shared_info_any_t *live_shinfo,
xc_core_memory_map_t **mapp, unsigned int *nr_entries)
{
if ( info->hvm )
int
xc_core_arch_memory_map_get(int xc_handle,
struct xc_core_arch_context *arch_ctxt,
- xc_dominfo_t *info, shared_info_t *live_shinfo,
+ xc_dominfo_t *info,
+ shared_info_any_t *live_shinfo,
xc_core_memory_map_t **mapp,
unsigned int *nr_entries)
{
}
/* copy before use in case someone updating them */
- if (xc_ia64_copy_memmap(xc_handle, info->domid, live_shinfo, &memmap_info,
- NULL)) {
+ if (xc_ia64_copy_memmap(xc_handle, info->domid, &live_shinfo->s,
+ &memmap_info, NULL)) {
goto old;
}
}
int
-xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
- shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
+xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t *info,
+ shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m,
unsigned long *pfnp)
{
/*
#include "xg_private.h"
#include "xc_core.h"
+#include "xc_e820.h"
+
+#define GET_FIELD(_p, _f) ((guest_width==8) ? ((_p)->x64._f) : ((_p)->x32._f))
+
+#ifndef MAX
+#define MAX(_a, _b) ((_a) >= (_b) ? (_a) : (_b))
+#endif
+
+int
+xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt,
+ unsigned long pfn)
+{
+ if ((pfn >= 0xa0 && pfn < 0xc0) /* VGA hole */
+ || (pfn >= (HVM_BELOW_4G_MMIO_START >> PAGE_SHIFT)
+ && pfn < (1ULL<<32) >> PAGE_SHIFT)) /* MMIO */
+ return 0;
+ return 1;
+}
-/* Don't yet support cross-address-size core dump */
-#define guest_width (sizeof (unsigned long))
static int nr_gpfns(int xc_handle, domid_t domid)
{
int
xc_core_arch_memory_map_get(int xc_handle, struct xc_core_arch_context *unused,
- xc_dominfo_t *info, shared_info_t *live_shinfo,
+ xc_dominfo_t *info, shared_info_any_t *live_shinfo,
xc_core_memory_map_t **mapp,
unsigned int *nr_entries)
{
}
int
-xc_core_arch_map_p2m(int xc_handle, xc_dominfo_t *info,
- shared_info_t *live_shinfo, xen_pfn_t **live_p2m,
+xc_core_arch_map_p2m(int xc_handle, unsigned int guest_width, xc_dominfo_t *info,
+ shared_info_any_t *live_shinfo, xen_pfn_t **live_p2m,
unsigned long *pfnp)
{
/* Double and single indirect references to the live P2M table */
xen_pfn_t *live_p2m_frame_list_list = NULL;
xen_pfn_t *live_p2m_frame_list = NULL;
+ /* Copies of the above. */
+ xen_pfn_t *p2m_frame_list_list = NULL;
+ xen_pfn_t *p2m_frame_list = NULL;
+
uint32_t dom = info->domid;
unsigned long p2m_size = nr_gpfns(xc_handle, info->domid);
int ret = -1;
int err;
+ int i;
if ( p2m_size < info->nr_pages )
{
live_p2m_frame_list_list =
xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
- live_shinfo->arch.pfn_to_mfn_frame_list_list);
+ GET_FIELD(live_shinfo, arch.pfn_to_mfn_frame_list_list));
if ( !live_p2m_frame_list_list )
{
goto out;
}
+ /* Get a local copy of the live_P2M_frame_list_list */
+ if ( !(p2m_frame_list_list = malloc(PAGE_SIZE)) )
+ {
+ ERROR("Couldn't allocate p2m_frame_list_list array");
+ goto out;
+ }
+ memcpy(p2m_frame_list_list, live_p2m_frame_list_list, PAGE_SIZE);
+
+ /* Canonicalize guest's unsigned long vs ours */
+ if ( guest_width > sizeof(unsigned long) )
+ for ( i = 0; i < PAGE_SIZE/sizeof(unsigned long); i++ )
+ if ( i < PAGE_SIZE/guest_width )
+ p2m_frame_list_list[i] = ((uint64_t *)p2m_frame_list_list)[i];
+ else
+ p2m_frame_list_list[i] = 0;
+ else if ( guest_width < sizeof(unsigned long) )
+ for ( i = PAGE_SIZE/sizeof(unsigned long) - 1; i >= 0; i-- )
+ p2m_frame_list_list[i] = ((uint32_t *)p2m_frame_list_list)[i];
+
live_p2m_frame_list =
xc_map_foreign_pages(xc_handle, dom, PROT_READ,
- live_p2m_frame_list_list,
+ p2m_frame_list_list,
P2M_FLL_ENTRIES);
if ( !live_p2m_frame_list )
goto out;
}
+ /* Get a local copy of the live_P2M_frame_list */
+ if ( !(p2m_frame_list = malloc(P2M_TOOLS_FL_SIZE)) )
+ {
+ ERROR("Couldn't allocate p2m_frame_list array");
+ goto out;
+ }
+ memset(p2m_frame_list, 0, P2M_TOOLS_FL_SIZE);
+ memcpy(p2m_frame_list, live_p2m_frame_list, P2M_GUEST_FL_SIZE);
+
+ /* Canonicalize guest's unsigned long vs ours */
+ if ( guest_width > sizeof(unsigned long) )
+ for ( i = 0; i < P2M_FL_ENTRIES; i++ )
+ p2m_frame_list[i] = ((uint64_t *)p2m_frame_list)[i];
+ else if ( guest_width < sizeof(unsigned long) )
+ for ( i = P2M_FL_ENTRIES - 1; i >= 0; i-- )
+ p2m_frame_list[i] = ((uint32_t *)p2m_frame_list)[i];
+
*live_p2m = xc_map_foreign_pages(xc_handle, dom, PROT_READ,
- live_p2m_frame_list,
+ p2m_frame_list,
P2M_FL_ENTRIES);
if ( !*live_p2m )
if ( live_p2m_frame_list )
munmap(live_p2m_frame_list, P2M_FLL_ENTRIES * PAGE_SIZE);
+ if ( p2m_frame_list_list )
+ free(p2m_frame_list_list);
+
+ if ( p2m_frame_list )
+ free(p2m_frame_list);
+
errno = err;
return ret;
}
#ifndef XC_CORE_X86_H
#define XC_CORE_X86_H
-#if defined(__i386__) || defined(__x86_64__)
#define ELF_ARCH_DATA ELFDATA2LSB
-#if defined (__i386__)
-# define ELF_ARCH_MACHINE EM_386
-#else
-# define ELF_ARCH_MACHINE EM_X86_64
-#endif
-#endif /* __i386__ or __x86_64__ */
-
+#define ELF_ARCH_MACHINE (guest_width == 8 ? EM_X86_64 : EM_386)
struct xc_core_arch_context {
/* nothing */
#define xc_core_arch_context_get(arch_ctxt, ctxt, xc_handle, domid) \
(0)
#define xc_core_arch_context_dump(arch_ctxt, args, dump_rtn) (0)
-#define xc_core_arch_gpfn_may_present(arch_ctxt, i) (1)
+int
+xc_core_arch_gpfn_may_present(struct xc_core_arch_context *arch_ctxt,
+ unsigned long pfn);
static inline int
xc_core_arch_context_get_shdr(struct xc_core_arch_context *arch_ctxt,
struct xc_core_section_headers *sheaders,
#define X86_FEATURE_SSE4_1 (4*32+19) /* Streaming SIMD Extensions 4.1 */
#define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */
#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running under some hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
bitmaskof(X86_FEATURE_SSE4_2) |
bitmaskof(X86_FEATURE_POPCNT));
+ regs[2] |= bitmaskof(X86_FEATURE_HYPERVISOR);
+
regs[3] &= (bitmaskof(X86_FEATURE_FPU) |
bitmaskof(X86_FEATURE_VME) |
bitmaskof(X86_FEATURE_DE) |
clear_bit(X86_FEATURE_XTPR, regs[2]);
clear_bit(X86_FEATURE_PDCM, regs[2]);
clear_bit(X86_FEATURE_DCA, regs[2]);
+ clear_bit(X86_FEATURE_XSAVE, regs[2]);
+ set_bit(X86_FEATURE_HYPERVISOR, regs[2]);
break;
case 0x80000001:
if ( !guest_64bit )
-#include <xen/libelf.h>
+#include <xen/libelf/libelf.h>
#define INVALID_P2M_ENTRY ((xen_pfn_t)-1)
return -1;
}
rc = inflate(&zStream, Z_FINISH);
+ inflateEnd(&zStream);
if ( rc != Z_STREAM_END )
{
xc_dom_panic(XC_INTERNAL_ERROR,
xc_dom_printf("%s: called\n", __FUNCTION__);
memset(start_info, 0, sizeof(*start_info));
- snprintf(start_info->magic, sizeof(start_info->magic), dom->guest_type);
+ strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
+ start_info->magic[sizeof(start_info->magic) - 1] = '\0';
start_info->nr_pages = dom->total_pages;
start_info->shared_info = shinfo << PAGE_SHIFT_X86;
start_info->pt_base = dom->pgtables_seg.vstart;
xc_dom_printf("%s: called\n", __FUNCTION__);
memset(start_info, 0, sizeof(*start_info));
- snprintf(start_info->magic, sizeof(start_info->magic), dom->guest_type);
+ strncpy(start_info->magic, dom->guest_type, sizeof(start_info->magic));
+ start_info->magic[sizeof(start_info->magic) - 1] = '\0';
start_info->nr_pages = dom->total_pages;
start_info->shared_info = shinfo << PAGE_SHIFT_X86;
start_info->pt_base = dom->pgtables_seg.vstart;
int arch_setup_meminit(struct xc_dom_image *dom)
{
int rc;
- xen_pfn_t pfn;
+ xen_pfn_t pfn, allocsz, i;
rc = x86_compat(dom->guest_xc, dom->guest_domid, dom->guest_type);
if ( rc )
dom->p2m_host[pfn] = pfn;
/* allocate guest memory */
- rc = xc_domain_memory_populate_physmap(dom->guest_xc, dom->guest_domid,
- dom->total_pages, 0, 0,
- dom->p2m_host);
+ for ( i = rc = allocsz = 0; (i < dom->total_pages) && !rc; i += allocsz )
+ {
+ allocsz = dom->total_pages - i;
+ if ( allocsz > 1024*1024 )
+ allocsz = 1024*1024;
+ rc = xc_domain_memory_populate_physmap(
+ dom->guest_xc, dom->guest_domid, allocsz, 0, 0, &dom->p2m_host[i]);
+ }
+
return rc;
}
return (ret < 0 ? -1 : domctl.u.hvmcontext.size);
}
+/* Get just one element of the HVM guest context.
+ * size must be >= HVM_SAVE_LENGTH(type) */
+int xc_domain_hvm_getcontext_partial(int xc_handle,
+ uint32_t domid,
+ uint16_t typecode,
+ uint16_t instance,
+ void *ctxt_buf,
+ uint32_t size)
+{
+ int ret;
+ DECLARE_DOMCTL;
+
+ if ( !ctxt_buf )
+ return -EINVAL;
+
+ domctl.cmd = XEN_DOMCTL_gethvmcontext_partial;
+ domctl.domain = (domid_t) domid;
+ domctl.u.hvmcontext_partial.type = typecode;
+ domctl.u.hvmcontext_partial.instance = instance;
+ set_xen_guest_handle(domctl.u.hvmcontext_partial.buffer, ctxt_buf);
+
+ if ( (ret = lock_pages(ctxt_buf, size)) != 0 )
+ return ret;
+
+ ret = do_domctl(xc_handle, &domctl);
+
+ if ( ctxt_buf )
+ unlock_pages(ctxt_buf, size);
+
+ return ret ? -1 : 0;
+}
+
/* set info to hvm guest for restore */
int xc_domain_hvm_setcontext(int xc_handle,
uint32_t domid,
return err;
}
-int xc_domain_memory_translate_gpfn_list(int xc_handle,
- uint32_t domid,
- unsigned long nr_gpfns,
- xen_pfn_t *gpfn_list,
- xen_pfn_t *mfn_list)
+static int xc_domain_memory_pod_target(int xc_handle,
+ int op,
+ uint32_t domid,
+ uint64_t target_pages,
+ uint64_t *tot_pages,
+ uint64_t *pod_cache_pages,
+ uint64_t *pod_entries)
{
int err;
- struct xen_translate_gpfn_list translate_gpfn_list = {
- .domid = domid,
- .nr_gpfns = nr_gpfns,
+
+ struct xen_pod_target pod_target = {
+ .domid = domid,
+ .target_pages = target_pages
};
- set_xen_guest_handle(translate_gpfn_list.gpfn_list, gpfn_list);
- set_xen_guest_handle(translate_gpfn_list.mfn_list, mfn_list);
- err = xc_memory_op(xc_handle, XENMEM_translate_gpfn_list, &translate_gpfn_list);
+ err = xc_memory_op(xc_handle, op, &pod_target);
- if ( err != 0 )
+ if ( err < 0 )
{
- DPRINTF("Failed translation for dom %d (%ld PFNs)\n",
- domid, nr_gpfns);
+ DPRINTF("Failed %s_memory_target dom %d\n",
+ (op==XENMEM_set_pod_target)?"set":"get",
+ domid);
errno = -err;
err = -1;
}
+ else
+ err = 0;
+
+ if ( tot_pages )
+ *tot_pages = pod_target.tot_pages;
+ if ( pod_cache_pages )
+ *pod_cache_pages = pod_target.pod_cache_pages;
+ if ( pod_entries )
+ *pod_entries = pod_target.pod_entries;
return err;
}
+
+
+int xc_domain_memory_set_pod_target(int xc_handle,
+ uint32_t domid,
+ uint64_t target_pages,
+ uint64_t *tot_pages,
+ uint64_t *pod_cache_pages,
+ uint64_t *pod_entries)
+{
+ return xc_domain_memory_pod_target(xc_handle,
+ XENMEM_set_pod_target,
+ domid,
+ target_pages,
+ tot_pages,
+ pod_cache_pages,
+ pod_entries);
+}
+
+int xc_domain_memory_get_pod_target(int xc_handle,
+ uint32_t domid,
+ uint64_t *tot_pages,
+ uint64_t *pod_cache_pages,
+ uint64_t *pod_entries)
+{
+ return xc_domain_memory_pod_target(xc_handle,
+ XENMEM_get_pod_target,
+ domid,
+ -1,
+ tot_pages,
+ pod_cache_pages,
+ pod_entries);
+}
int xc_domain_max_vcpus(int xc_handle, uint32_t domid, unsigned int max)
{
uint32_t domid,
uint32_t gvec,
uint32_t pirq,
- uint32_t gflags)
+ uint32_t gflags,
+ uint64_t gtable)
{
int rc;
xen_domctl_bind_pt_irq_t *bind;
domctl.cmd = XEN_DOMCTL_bind_pt_irq;
domctl.domain = (domid_t)domid;
+ bind = &(domctl.u.bind_pt_irq);
+ bind->hvm_domid = domid;
+ bind->irq_type = PT_IRQ_TYPE_MSI;
+ bind->machine_irq = pirq;
+ bind->u.msi.gvec = gvec;
+ bind->u.msi.gflags = gflags;
+ bind->u.msi.gtable = gtable;
+
+ rc = do_domctl(xc_handle, &domctl);
+ return rc;
+}
+
+int xc_domain_unbind_msi_irq(
+ int xc_handle,
+ uint32_t domid,
+ uint32_t gvec,
+ uint32_t pirq,
+ uint32_t gflags)
+{
+ int rc;
+ xen_domctl_bind_pt_irq_t *bind;
+
+ DECLARE_DOMCTL;
+
+ domctl.cmd = XEN_DOMCTL_unbind_pt_irq;
+ domctl.domain = (domid_t)domid;
+
bind = &(domctl.u.bind_pt_irq);
bind->hvm_domid = domid;
bind->irq_type = PT_IRQ_TYPE_MSI;
bind->hvm_domid = domid;
bind->irq_type = irq_type;
bind->machine_irq = machine_irq;
- if ( irq_type == PT_IRQ_TYPE_PCI )
+ if ( irq_type == PT_IRQ_TYPE_PCI ||
+ irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
{
bind->u.pci.bus = bus;
bind->u.pci.device = device;
}
+int xc_domain_debug_control(int xc, uint32_t domid, uint32_t sop, uint32_t vcpu)
+{
+ DECLARE_DOMCTL;
+
+ memset(&domctl, 0, sizeof(domctl));
+ domctl.domain = (domid_t)domid;
+ domctl.cmd = XEN_DOMCTL_debug_op;
+ domctl.u.debug_op.op = sop;
+ domctl.u.debug_op.vcpu = vcpu;
+
+ return do_domctl(xc, &domctl);
+}
+
+
/*
* Local variables:
* mode: C
continue;
}
+ if ( j == -4 )
+ {
+ uint64_t vm86_tss;
+
+ /* Skip padding 4 bytes then read the vm86 TSS location. */
+ if ( read_exact(io_fd, &vm86_tss, sizeof(uint32_t)) ||
+ read_exact(io_fd, &vm86_tss, sizeof(uint64_t)) )
+ {
+ ERROR("error read the address of the vm86 TSS");
+ goto out;
+ }
+
+ xc_set_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS, vm86_tss);
+ continue;
+ }
+
if ( j == 0 )
break; /* our work here is done */
return success ? p2m : NULL;
}
-
-
int xc_domain_save(int xc_handle, int io_fd, uint32_t dom, uint32_t max_iters,
uint32_t max_factor, uint32_t flags, int (*suspend)(void),
int hvm, void *(*init_qemu_maps)(int, unsigned),
if ( hvm )
{
struct {
- int minusthree;
+ int id;
uint32_t pad;
- uint64_t ident_pt;
- } chunk = { -3, 0 };
+ uint64_t data;
+ } chunk = { 0, };
+ chunk.id = -3;
xc_get_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
- (unsigned long *)&chunk.ident_pt);
+ (unsigned long *)&chunk.data);
- if ( (chunk.ident_pt != 0) &&
+ if ( (chunk.data != 0) &&
write_exact(io_fd, &chunk, sizeof(chunk)) )
{
PERROR("Error when writing the ident_pt for EPT guest");
goto out;
}
+
+ chunk.id = -4;
+ xc_get_hvm_param(xc_handle, dom, HVM_PARAM_VM86_TSS,
+ (unsigned long *)&chunk.data);
+
+ if ( (chunk.data != 0) &&
+ write_exact(io_fd, &chunk, sizeof(chunk)) )
+ {
+ PERROR("Error when writing the vm86 TSS for guest");
+ goto out;
+ }
}
/* Zero terminate */
-#include <xen/elfstructs.h>
+#include <xen/libelf/elfstructs.h>
#include <xen/foreign/x86_64.h>
#include <xen/hvm/hvm_info_table.h>
#include <xen/hvm/params.h>
-#include "xc_e820.h"
+#include <xen/hvm/e820.h>
-#include <xen/libelf.h>
+#include <xen/libelf/libelf.h>
#define SUPERPAGE_PFN_SHIFT 9
#define SUPERPAGE_NR_PFNS (1UL << SUPERPAGE_PFN_SHIFT)
-#define SCRATCH_PFN 0xFFFFF
-
-#define SPECIALPAGE_GUARD 0
-#define SPECIALPAGE_BUFIOREQ 1
-#define SPECIALPAGE_XENSTORE 2
-#define SPECIALPAGE_IOREQ 3
-#define SPECIALPAGE_IDENT_PT 4
+#define SPECIALPAGE_BUFIOREQ 0
+#define SPECIALPAGE_XENSTORE 1
+#define SPECIALPAGE_IOREQ 2
+#define SPECIALPAGE_IDENT_PT 3
+#define SPECIALPAGE_SHINFO 4
#define NR_SPECIAL_PAGES 5
+#define special_pfn(x) (0xff000u - NR_SPECIAL_PAGES + (x))
-static void build_e820map(void *e820_page, unsigned long long mem_size)
+static void build_hvm_info(void *hvm_info_page, uint64_t mem_size)
{
- struct e820entry *e820entry =
- (struct e820entry *)(((unsigned char *)e820_page) + HVM_E820_OFFSET);
- unsigned long long extra_mem_size = 0;
- unsigned char nr_map = 0;
+ struct hvm_info_table *hvm_info = (struct hvm_info_table *)
+ (((unsigned char *)hvm_info_page) + HVM_INFO_OFFSET);
+ uint64_t lowmem_end = mem_size, highmem_end = 0;
+ uint8_t sum;
+ int i;
- /*
- * Physical address space from HVM_BELOW_4G_RAM_END to 4G is reserved
- * for PCI devices MMIO. So if HVM has more than HVM_BELOW_4G_RAM_END
- * RAM, memory beyond HVM_BELOW_4G_RAM_END will go to 4G above.
- */
- if ( mem_size > HVM_BELOW_4G_RAM_END )
+ if ( lowmem_end > HVM_BELOW_4G_RAM_END )
{
- extra_mem_size = mem_size - HVM_BELOW_4G_RAM_END;
- mem_size = HVM_BELOW_4G_RAM_END;
+ highmem_end = lowmem_end + (1ull<<32) - HVM_BELOW_4G_RAM_END;
+ lowmem_end = HVM_BELOW_4G_RAM_END;
}
- /* 0x0-0x9FC00: Ordinary RAM. */
- e820entry[nr_map].addr = 0x0;
- e820entry[nr_map].size = 0x9FC00;
- e820entry[nr_map].type = E820_RAM;
- nr_map++;
+ memset(hvm_info_page, 0, PAGE_SIZE);
- /* 0x9FC00-0xA0000: Extended BIOS Data Area (EBDA). */
- e820entry[nr_map].addr = 0x9FC00;
- e820entry[nr_map].size = 0x400;
- e820entry[nr_map].type = E820_RESERVED;
- nr_map++;
+ /* Fill in the header. */
+ strncpy(hvm_info->signature, "HVM INFO", 8);
+ hvm_info->length = sizeof(struct hvm_info_table);
- /*
- * Following regions are standard regions of the PC memory map.
- * They are not covered by e820 regions. OSes will not use as RAM.
- * 0xA0000-0xC0000: VGA memory-mapped I/O. Not covered by E820.
- * 0xC0000-0xE0000: 16-bit devices, expansion ROMs (inc. vgabios).
- * TODO: hvmloader should free pages which turn out to be unused.
- */
+ /* Sensible defaults: these can be overridden by the caller. */
+ hvm_info->acpi_enabled = 1;
+ hvm_info->apic_mode = 1;
+ hvm_info->nr_vcpus = 1;
- /*
- * 0xE0000-0x0F0000: PC-specific area. We place ACPI tables here.
- * We *cannot* mark as E820_ACPI, for two reasons:
- * 1. ACPI spec. says that E820_ACPI regions below
- * 16MB must clip INT15h 0x88 and 0xe801 queries.
- * Our rombios doesn't do this.
- * 2. The OS is allowed to reclaim ACPI memory after
- * parsing the tables. But our FACS is in this
- * region and it must not be reclaimed (it contains
- * the ACPI global lock!).
- * 0xF0000-0x100000: System BIOS.
- * TODO: hvmloader should free pages which turn out to be unused.
- */
- e820entry[nr_map].addr = 0xE0000;
- e820entry[nr_map].size = 0x20000;
- e820entry[nr_map].type = E820_RESERVED;
- nr_map++;
-
- /* Low RAM goes here. Reserve space for special pages. */
- e820entry[nr_map].addr = 0x100000;
- e820entry[nr_map].size = (mem_size - 0x100000 -
- PAGE_SIZE * NR_SPECIAL_PAGES);
- e820entry[nr_map].type = E820_RAM;
- nr_map++;
-
- /* Explicitly reserve space for special pages (excluding guard page). */
- e820entry[nr_map].addr = mem_size - PAGE_SIZE * (NR_SPECIAL_PAGES - 1);
- e820entry[nr_map].size = PAGE_SIZE * (NR_SPECIAL_PAGES - 1);
- e820entry[nr_map].type = E820_RESERVED;
- nr_map++;
-
- if ( extra_mem_size )
- {
- e820entry[nr_map].addr = (1ULL << 32);
- e820entry[nr_map].size = extra_mem_size;
- e820entry[nr_map].type = E820_RAM;
- nr_map++;
- }
+ /* Memory parameters. */
+ hvm_info->low_mem_pgend = lowmem_end >> PAGE_SHIFT;
+ hvm_info->high_mem_pgend = highmem_end >> PAGE_SHIFT;
+ hvm_info->reserved_mem_pgstart = special_pfn(0);
- *(((unsigned char *)e820_page) + HVM_E820_NR_OFFSET) = nr_map;
+ /* Finish with the checksum. */
+ for ( i = 0, sum = 0; i < hvm_info->length; i++ )
+ sum += ((uint8_t *)hvm_info)[i];
+ hvm_info->checksum = -sum;
}
static int loadelfimage(
}
static int setup_guest(int xc_handle,
- uint32_t dom, int memsize,
+ uint32_t dom, int memsize, int target,
char *image, unsigned long image_size)
{
xen_pfn_t *page_array = NULL;
unsigned long i, nr_pages = (unsigned long)memsize << (20 - PAGE_SHIFT);
- unsigned long special_page_nr, entry_eip, cur_pages;
+ unsigned long target_pages = (unsigned long)target << (20 - PAGE_SHIFT);
+ unsigned long pod_pages = 0;
+ unsigned long entry_eip, cur_pages;
struct xen_add_to_physmap xatp;
struct shared_info *shared_info;
- void *e820_page;
+ void *hvm_info_page;
uint32_t *ident_pt;
struct elf_binary elf;
uint64_t v_start, v_end;
int rc;
xen_capabilities_info_t caps;
+ int pod_mode = 0;
+
/* An HVM guest must be initialised with at least 2MB memory. */
- if ( memsize < 2 )
+ if ( memsize < 2 || target < 2 )
goto error_out;
+ if ( memsize > target )
+ pod_mode = 1;
+
if ( elf_init(&elf, image, image_size) != 0 )
goto error_out;
elf_parse_binary(&elf);
.extent_order = SUPERPAGE_PFN_SHIFT,
.domid = dom
};
+
+ if ( pod_mode )
+ sp_req.mem_flags = XENMEMF_populate_on_demand;
+
set_xen_guest_handle(sp_req.extent_start, sp_extents);
for ( i = 0; i < sp_req.nr_extents; i++ )
sp_extents[i] = page_array[cur_pages+(i<<SUPERPAGE_PFN_SHIFT)];
if ( done > 0 )
{
done <<= SUPERPAGE_PFN_SHIFT;
+ if ( pod_mode && target_pages > cur_pages )
+ {
+ int d = target_pages - cur_pages;
+ pod_pages += ( done < d ) ? done : d;
+ }
cur_pages += done;
count -= done;
}
rc = xc_domain_memory_populate_physmap(
xc_handle, dom, count, 0, 0, &page_array[cur_pages]);
cur_pages += count;
+ if ( pod_mode )
+ pod_pages -= count;
}
}
+ if ( pod_mode )
+ rc = xc_domain_memory_set_pod_target(xc_handle,
+ dom,
+ pod_pages,
+ NULL, NULL, NULL);
+
if ( rc != 0 )
{
PERROR("Could not allocate memory for HVM guest.\n");
if ( loadelfimage(&elf, xc_handle, dom, page_array) != 0 )
goto error_out;
- if ( (e820_page = xc_map_foreign_range(
+ if ( (hvm_info_page = xc_map_foreign_range(
xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
- HVM_E820_PAGE >> PAGE_SHIFT)) == NULL )
+ HVM_INFO_PFN)) == NULL )
goto error_out;
- memset(e820_page, 0, PAGE_SIZE);
- build_e820map(e820_page, v_end);
- munmap(e820_page, PAGE_SIZE);
+ build_hvm_info(hvm_info_page, v_end);
+ munmap(hvm_info_page, PAGE_SIZE);
/* Map and initialise shared_info page. */
xatp.domid = dom;
xatp.space = XENMAPSPACE_shared_info;
xatp.idx = 0;
- xatp.gpfn = SCRATCH_PFN;
+ xatp.gpfn = special_pfn(SPECIALPAGE_SHINFO);
if ( (xc_memory_op(xc_handle, XENMEM_add_to_physmap, &xatp) != 0) ||
((shared_info = xc_map_foreign_range(
xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
- SCRATCH_PFN)) == NULL) )
+ special_pfn(SPECIALPAGE_SHINFO))) == NULL) )
goto error_out;
memset(shared_info, 0, PAGE_SIZE);
/* NB. evtchn_upcall_mask is unused: leave as zero. */
sizeof(shared_info->evtchn_mask));
munmap(shared_info, PAGE_SIZE);
- special_page_nr = (((v_end > HVM_BELOW_4G_RAM_END)
- ? (HVM_BELOW_4G_RAM_END >> PAGE_SHIFT)
- : (v_end >> PAGE_SHIFT))
- - NR_SPECIAL_PAGES);
-
- /* Paranoia: clean special pages. */
+ /* Allocate and clear special pages. */
for ( i = 0; i < NR_SPECIAL_PAGES; i++ )
- if ( xc_clear_domain_page(xc_handle, dom, special_page_nr + i) )
- goto error_out;
-
- /* Free the guard page that separates low RAM from special pages. */
- rc = xc_domain_memory_decrease_reservation(
- xc_handle, dom, 1, 0, &page_array[special_page_nr]);
- if ( rc != 0 )
{
- PERROR("Could not deallocate guard page for HVM guest.\n");
- goto error_out;
+ xen_pfn_t pfn = special_pfn(i);
+ if ( i == SPECIALPAGE_SHINFO )
+ continue;
+ rc = xc_domain_memory_populate_physmap(xc_handle, dom, 1, 0, 0, &pfn);
+ if ( rc != 0 )
+ {
+ PERROR("Could not allocate %d'th special page.\n", i);
+ goto error_out;
+ }
+ if ( xc_clear_domain_page(xc_handle, dom, special_pfn(i)) )
+ goto error_out;
}
xc_set_hvm_param(xc_handle, dom, HVM_PARAM_STORE_PFN,
- special_page_nr + SPECIALPAGE_XENSTORE);
+ special_pfn(SPECIALPAGE_XENSTORE));
xc_set_hvm_param(xc_handle, dom, HVM_PARAM_BUFIOREQ_PFN,
- special_page_nr + SPECIALPAGE_BUFIOREQ);
+ special_pfn(SPECIALPAGE_BUFIOREQ));
xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IOREQ_PFN,
- special_page_nr + SPECIALPAGE_IOREQ);
+ special_pfn(SPECIALPAGE_IOREQ));
/*
* Identity-map page table is required for running with CR0.PG=0 when
*/
if ( (ident_pt = xc_map_foreign_range(
xc_handle, dom, PAGE_SIZE, PROT_READ | PROT_WRITE,
- special_page_nr + SPECIALPAGE_IDENT_PT)) == NULL )
+ special_pfn(SPECIALPAGE_IDENT_PT))) == NULL )
goto error_out;
for ( i = 0; i < PAGE_SIZE / sizeof(*ident_pt); i++ )
ident_pt[i] = ((i << 22) | _PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
munmap(ident_pt, PAGE_SIZE);
xc_set_hvm_param(xc_handle, dom, HVM_PARAM_IDENT_PT,
- (special_page_nr + SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
+ special_pfn(SPECIALPAGE_IDENT_PT) << PAGE_SHIFT);
/* Insert JMP <rel32> instruction at address 0x0 to reach entry point. */
entry_eip = elf_uval(&elf, elf.ehdr, e_entry);
static int xc_hvm_build_internal(int xc_handle,
uint32_t domid,
int memsize,
+ int target,
char *image,
unsigned long image_size)
{
return -1;
}
- return setup_guest(xc_handle, domid, memsize, image, image_size);
+ return setup_guest(xc_handle, domid, memsize, target, image, image_size);
}
static inline int is_loadable_phdr(Elf32_Phdr *phdr)
((image = xc_read_image(image_name, &image_size)) == NULL) )
return -1;
- sts = xc_hvm_build_internal(xc_handle, domid, memsize, image, image_size);
+ sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize, image, image_size);
+
+ free(image);
+
+ return sts;
+}
+
+/* xc_hvm_build_target_mem:
+ * Create a domain for a pre-ballooned virtualized Linux, using
+ * files/filenames. If target < memsize, domain is created with
+ * memsize pages marked populate-on-demand, and with a PoD cache size
+ * of target. If target == memsize, pages are populated normally.
+ */
+int xc_hvm_build_target_mem(int xc_handle,
+ uint32_t domid,
+ int memsize,
+ int target,
+ const char *image_name)
+{
+ char *image;
+ int sts;
+ unsigned long image_size;
+
+ if ( (image_name == NULL) ||
+ ((image = xc_read_image(image_name, &image_size)) == NULL) )
+ return -1;
+
+ sts = xc_hvm_build_internal(xc_handle, domid, memsize, target, image, image_size);
free(image);
return -1;
}
- sts = xc_hvm_build_internal(xc_handle, domid, memsize,
+ sts = xc_hvm_build_internal(xc_handle, domid, memsize, memsize,
img, img_len);
/* xc_inflate_buffer may return the original buffer pointer (for
* Function to translate virtual to physical addresses.
*/
#include "xc_private.h"
+#include <xen/hvm/save.h>
-#if defined(__i386__)
+#define CR0_PG 0x80000000
+#define CR4_PAE 0x20
+#define PTE_PSE 0x80
+#define EFER_LMA 0x400
-#define L1_PAGETABLE_SHIFT_PAE 12
-#define L2_PAGETABLE_SHIFT_PAE 21
-#define L3_PAGETABLE_SHIFT_PAE 30
-
-#define L1_PAGETABLE_SHIFT 12
-#define L2_PAGETABLE_SHIFT 22
-
-#define L0_PAGETABLE_MASK_PAE 0x00000ffffffff000ULL
-#define L1_PAGETABLE_MASK_PAE 0x1ffULL
-#define L2_PAGETABLE_MASK_PAE 0x1ffULL
-#define L3_PAGETABLE_MASK_PAE 0x3ULL
-
-#define L0_PAGETABLE_MASK 0xfffff000ULL
-#define L1_PAGETABLE_MASK 0x3ffULL
-#define L2_PAGETABLE_MASK 0x3ffULL
-
-#elif defined(__x86_64__)
-
-#define L1_PAGETABLE_SHIFT_PAE 12
-#define L2_PAGETABLE_SHIFT_PAE 21
-#define L3_PAGETABLE_SHIFT_PAE 30
-#define L4_PAGETABLE_SHIFT_PAE 39
-
-#define L1_PAGETABLE_SHIFT L1_PAGETABLE_SHIFT_PAE
-#define L2_PAGETABLE_SHIFT L2_PAGETABLE_SHIFT_PAE
-
-#define L0_PAGETABLE_MASK_PAE 0x000ffffffffff000ULL
-#define L1_PAGETABLE_MASK_PAE 0x1ffULL
-#define L2_PAGETABLE_MASK_PAE 0x1ffULL
-#define L3_PAGETABLE_MASK_PAE 0x1ffULL
-#define L4_PAGETABLE_MASK_PAE 0x1ffULL
-
-#define L0_PAGETABLE_MASK L0_PAGETABLE_MASK_PAE
-#define L1_PAGETABLE_MASK L1_PAGETABLE_MASK_PAE
-#define L2_PAGETABLE_MASK L2_PAGETABLE_MASK_PAE
-
-#endif
unsigned long xc_translate_foreign_address(int xc_handle, uint32_t dom,
- int vcpu, unsigned long long virt )
+ int vcpu, unsigned long long virt)
{
- vcpu_guest_context_any_t ctx;
- unsigned long long cr3;
- void *pd, *pt, *pdppage = NULL, *pdp, *pml = NULL;
- unsigned long long pde, pte, pdpe, pmle;
- unsigned long mfn = 0;
-#if defined (__i386__)
- static int pt_levels = 0;
-
- if (pt_levels == 0) {
- xen_capabilities_info_t xen_caps = "";
-
- if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
- goto out;
- if (strstr(xen_caps, "xen-3.0-x86_64"))
+ xc_dominfo_t dominfo;
+ uint64_t paddr, mask, pte = 0;
+ int size, level, pt_levels = 2;
+ void *map;
+
+ if (xc_domain_getinfo(xc_handle, dom, 1, &dominfo) != 1
+ || dominfo.domid != dom)
+ return 0;
+
+ /* What kind of paging are we dealing with? */
+ if (dominfo.hvm) {
+ struct hvm_hw_cpu ctx;
+ if (xc_domain_hvm_getcontext_partial(xc_handle, dom,
+ HVM_SAVE_CODE(CPU), vcpu,
+ &ctx, sizeof ctx) != 0)
+ return 0;
+ if (!(ctx.cr0 & CR0_PG))
+ return virt;
+ pt_levels = (ctx.msr_efer&EFER_LMA) ? 4 : (ctx.cr4&CR4_PAE) ? 3 : 2;
+ paddr = ctx.cr3 & ((pt_levels == 3) ? ~0x1full : ~0xfffull);
+ } else {
+ DECLARE_DOMCTL;
+ vcpu_guest_context_any_t ctx;
+ if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0)
+ return 0;
+ domctl.domain = dom;
+ domctl.cmd = XEN_DOMCTL_get_address_size;
+ if ( do_domctl(xc_handle, &domctl) != 0 )
+ return 0;
+ if (domctl.u.address_size.size == 64) {
pt_levels = 4;
- else if (strstr(xen_caps, "xen-3.0-x86_32p"))
+ paddr = ctx.x64.ctrlreg[3] & ~0xfffull;
+ } else {
pt_levels = 3;
- else if (strstr(xen_caps, "xen-3.0-x86_32"))
- pt_levels = 2;
- else
- goto out;
- }
-#elif defined (__x86_64__)
-#define pt_levels 4
-#endif
-
- if (xc_vcpu_getcontext(xc_handle, dom, vcpu, &ctx) != 0) {
- DPRINTF("failed to retreive vcpu context\n");
- goto out;
- }
- cr3 = ((unsigned long long)xen_cr3_to_pfn(ctx.c.ctrlreg[3])) << PAGE_SHIFT;
-
- /* Page Map Level 4 */
-
-#if defined(__i386__)
- pmle = cr3;
-#elif defined(__x86_64__)
- pml = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, cr3 >> PAGE_SHIFT);
- if (pml == NULL) {
- DPRINTF("failed to map PML4\n");
- goto out;
- }
- pmle = *(unsigned long long *)(pml + 8 * ((virt >> L4_PAGETABLE_SHIFT_PAE) & L4_PAGETABLE_MASK_PAE));
- if((pmle & 1) == 0) {
- DPRINTF("page entry not present in PML4\n");
- goto out_unmap_pml;
- }
-#endif
-
- /* Page Directory Pointer Table */
-
- if (pt_levels >= 3) {
- pdppage = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, pmle >> PAGE_SHIFT);
- if (pdppage == NULL) {
- DPRINTF("failed to map PDP\n");
- goto out_unmap_pml;
- }
- if (pt_levels >= 4)
- pdp = pdppage;
- else
- /* PDP is only 32 bit aligned with 3 level pts */
- pdp = pdppage + (pmle & ~(XC_PAGE_MASK | 0x1f));
-
- pdpe = *(unsigned long long *)(pdp + 8 * ((virt >> L3_PAGETABLE_SHIFT_PAE) & L3_PAGETABLE_MASK_PAE));
-
- if((pdpe & 1) == 0) {
- DPRINTF("page entry not present in PDP\n");
- goto out_unmap_pdp;
+ paddr = (((uint64_t) xen_cr3_to_pfn(ctx.x32.ctrlreg[3]))
+ << PAGE_SHIFT);
}
- } else {
- pdpe = pmle;
}
- /* Page Directory */
-
- pd = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ, pdpe >> PAGE_SHIFT);
- if (pd == NULL) {
- DPRINTF("failed to map PD\n");
- goto out_unmap_pdp;
- }
-
- if (pt_levels >= 3)
- pde = *(unsigned long long *)(pd + 8 * ((virt >> L2_PAGETABLE_SHIFT_PAE) & L2_PAGETABLE_MASK_PAE));
- else
- pde = *(unsigned long *)(pd + 4 * ((virt >> L2_PAGETABLE_SHIFT) & L2_PAGETABLE_MASK));
-
- if ((pde & 1) == 0) {
- DPRINTF("page entry not present in PD\n");
- goto out_unmap_pd;
+ if (pt_levels == 4) {
+ virt &= 0x0000ffffffffffffull;
+ mask = 0x0000ff8000000000ull;
+ } else if (pt_levels == 3) {
+ virt &= 0x00000000ffffffffull;
+ mask = 0x0000007fc0000000ull;
+ } else {
+ virt &= 0x00000000ffffffffull;
+ mask = 0x00000000ffc00000ull;
}
-
- /* Page Table */
-
- if (pde & 0x00000080) { /* 4M page (or 2M in PAE mode) */
- DPRINTF("Cannot currently cope with 2/4M pages\n");
- exit(-1);
- } else { /* 4k page */
- pt = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
- pde >> PAGE_SHIFT);
-
- if (pt == NULL) {
- DPRINTF("failed to map PT\n");
- goto out_unmap_pd;
+ size = (pt_levels == 2 ? 4 : 8);
+
+ /* Walk the pagetables */
+ for (level = pt_levels; level > 0; level--) {
+ paddr += ((virt & mask) >> (xc_ffs64(mask) - 1)) * size;
+ map = xc_map_foreign_range(xc_handle, dom, PAGE_SIZE, PROT_READ,
+ paddr >>PAGE_SHIFT);
+ if (!map)
+ return 0;
+ memcpy(&pte, map + (paddr & (PAGE_SIZE - 1)), size);
+ munmap(map, PAGE_SIZE);
+ if (!(pte & 1))
+ return 0;
+ paddr = pte & 0x000ffffffffff000ull;
+ if (level == 2 && (pte & PTE_PSE)) {
+ mask = ((mask ^ ~-mask) >> 1); /* All bits below first set bit */
+ return ((paddr & ~mask) | (virt & mask)) >> PAGE_SHIFT;
}
-
- if (pt_levels >= 3)
- pte = *(unsigned long long *)(pt + 8 * ((virt >> L1_PAGETABLE_SHIFT_PAE) & L1_PAGETABLE_MASK_PAE));
- else
- pte = *(unsigned long *)(pt + 4 * ((virt >> L1_PAGETABLE_SHIFT) & L1_PAGETABLE_MASK));
-
- if ((pte & 1) == 0) {
- DPRINTF("page entry not present in PT\n");
- goto out_unmap_pt;
- }
-
- if (pt_levels >= 3)
- mfn = (pte & L0_PAGETABLE_MASK_PAE) >> PAGE_SHIFT;
- else
- mfn = (pte & L0_PAGETABLE_MASK) >> PAGE_SHIFT;
+ mask >>= (pt_levels == 2 ? 10 : 9);
}
-
- out_unmap_pt:
- munmap(pt, PAGE_SIZE);
- out_unmap_pd:
- munmap(pd, PAGE_SIZE);
- out_unmap_pdp:
- munmap(pdppage, PAGE_SIZE);
- out_unmap_pml:
- munmap(pml, PAGE_SIZE);
- out:
- return mfn;
+ return paddr >> PAGE_SHIFT;
}
/*
*
*/
+#include <errno.h>
+#include <stdbool.h>
#include "xc_private.h"
+/*
+ * Get PM statistic info
+ */
int xc_pm_get_max_px(int xc_handle, int cpuid, int *max_px)
{
DECLARE_SYSCTL;
return xc_sysctl(xc_handle, &sysctl);
}
+
+
+/*
+ * 1. Get PM parameter
+ * 2. Provide user PM control
+ */
+int xc_get_cpufreq_para(int xc_handle, int cpuid,
+ struct xc_get_cpufreq_para *user_para)
+{
+ DECLARE_SYSCTL;
+ int ret = 0;
+ struct xen_get_cpufreq_para *sys_para = &sysctl.u.pm_op.get_para;
+ bool has_num = user_para->cpu_num &&
+ user_para->freq_num &&
+ user_para->gov_num;
+
+ if ( (xc_handle < 0) || !user_para )
+ return -EINVAL;
+
+ if ( has_num )
+ {
+ if ( (!user_para->affected_cpus) ||
+ (!user_para->scaling_available_frequencies) ||
+ (!user_para->scaling_available_governors) )
+ return -EINVAL;
+
+ if ( (ret = lock_pages(user_para->affected_cpus,
+ user_para->cpu_num * sizeof(uint32_t))) )
+ goto unlock_1;
+ if ( (ret = lock_pages(user_para->scaling_available_frequencies,
+ user_para->freq_num * sizeof(uint32_t))) )
+ goto unlock_2;
+ if ( (ret = lock_pages(user_para->scaling_available_governors,
+ user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+ goto unlock_3;
+
+ set_xen_guest_handle(sys_para->affected_cpus,
+ user_para->affected_cpus);
+ set_xen_guest_handle(sys_para->scaling_available_frequencies,
+ user_para->scaling_available_frequencies);
+ set_xen_guest_handle(sys_para->scaling_available_governors,
+ user_para->scaling_available_governors);
+ }
+
+ sysctl.cmd = XEN_SYSCTL_pm_op;
+ sysctl.u.pm_op.cmd = GET_CPUFREQ_PARA;
+ sysctl.u.pm_op.cpuid = cpuid;
+ sys_para->cpu_num = user_para->cpu_num;
+ sys_para->freq_num = user_para->freq_num;
+ sys_para->gov_num = user_para->gov_num;
+
+ ret = xc_sysctl(xc_handle, &sysctl);
+ if ( ret )
+ {
+ if ( errno == EAGAIN )
+ {
+ user_para->cpu_num = sys_para->cpu_num;
+ user_para->freq_num = sys_para->freq_num;
+ user_para->gov_num = sys_para->gov_num;
+ ret = -errno;
+ }
+
+ if ( has_num )
+ goto unlock_4;
+ goto unlock_1;
+ }
+ else
+ {
+ user_para->cpuinfo_cur_freq = sys_para->cpuinfo_cur_freq;
+ user_para->cpuinfo_max_freq = sys_para->cpuinfo_max_freq;
+ user_para->cpuinfo_min_freq = sys_para->cpuinfo_min_freq;
+ user_para->scaling_cur_freq = sys_para->scaling_cur_freq;
+ user_para->scaling_max_freq = sys_para->scaling_max_freq;
+ user_para->scaling_min_freq = sys_para->scaling_min_freq;
+
+ memcpy(user_para->scaling_driver,
+ sys_para->scaling_driver, CPUFREQ_NAME_LEN);
+ memcpy(user_para->scaling_governor,
+ sys_para->scaling_governor, CPUFREQ_NAME_LEN);
+
+ /* copy to user_para no matter what cpufreq governor */
+ XC_BUILD_BUG_ON(sizeof(((struct xc_get_cpufreq_para *)0)->u) !=
+ sizeof(((struct xen_get_cpufreq_para *)0)->u));
+
+ memcpy(&user_para->u, &sys_para->u, sizeof(sys_para->u));
+ }
+
+unlock_4:
+ unlock_pages(user_para->scaling_available_governors,
+ user_para->gov_num * CPUFREQ_NAME_LEN * sizeof(char));
+unlock_3:
+ unlock_pages(user_para->scaling_available_frequencies,
+ user_para->freq_num * sizeof(uint32_t));
+unlock_2:
+ unlock_pages(user_para->affected_cpus,
+ user_para->cpu_num * sizeof(uint32_t));
+unlock_1:
+ return ret;
+}
+
+int xc_set_cpufreq_gov(int xc_handle, int cpuid, char *govname)
+{
+ DECLARE_SYSCTL;
+ char *scaling_governor = sysctl.u.pm_op.set_gov.scaling_governor;
+
+ if ( (xc_handle < 0) || (!govname) )
+ return -EINVAL;
+
+ sysctl.cmd = XEN_SYSCTL_pm_op;
+ sysctl.u.pm_op.cmd = SET_CPUFREQ_GOV;
+ sysctl.u.pm_op.cpuid = cpuid;
+ strncpy(scaling_governor, govname, CPUFREQ_NAME_LEN);
+ scaling_governor[CPUFREQ_NAME_LEN - 1] = '\0';
+
+ return xc_sysctl(xc_handle, &sysctl);
+}
+
+int xc_set_cpufreq_para(int xc_handle, int cpuid,
+ int ctrl_type, int ctrl_value)
+{
+ DECLARE_SYSCTL;
+
+ if ( xc_handle < 0 )
+ return -EINVAL;
+
+ sysctl.cmd = XEN_SYSCTL_pm_op;
+ sysctl.u.pm_op.cmd = SET_CPUFREQ_PARA;
+ sysctl.u.pm_op.cpuid = cpuid;
+ sysctl.u.pm_op.set_para.ctrl_type = ctrl_type;
+ sysctl.u.pm_op.set_para.ctrl_value = ctrl_value;
+
+ return xc_sysctl(xc_handle, &sysctl);
+}
+
+int xc_get_cpufreq_avgfreq(int xc_handle, int cpuid, int *avg_freq)
+{
+ int ret = 0;
+ DECLARE_SYSCTL;
+
+ if ( (xc_handle < 0) || (!avg_freq) )
+ return -EINVAL;
+
+ sysctl.cmd = XEN_SYSCTL_pm_op;
+ sysctl.u.pm_op.cmd = GET_CPUFREQ_AVGFREQ;
+ sysctl.u.pm_op.cpuid = cpuid;
+ ret = xc_sysctl(xc_handle, &sysctl);
+
+ *avg_freq = sysctl.u.pm_op.get_avgfreq;
+
+ return ret;
+}
+
+int xc_get_cputopo(int xc_handle, struct xc_get_cputopo *info)
+{
+ int rc;
+ DECLARE_SYSCTL;
+
+ sysctl.cmd = XEN_SYSCTL_pm_op;
+ sysctl.u.pm_op.cmd = XEN_SYSCTL_pm_op_get_cputopo;
+ sysctl.u.pm_op.cpuid = 0;
+ set_xen_guest_handle( sysctl.u.pm_op.get_topo.cpu_to_core,
+ info->cpu_to_core );
+ set_xen_guest_handle( sysctl.u.pm_op.get_topo.cpu_to_socket,
+ info->cpu_to_socket );
+ sysctl.u.pm_op.get_topo.max_cpus = info->max_cpus;
+
+ rc = do_sysctl(xc_handle, &sysctl);
+ info->nr_cpus = sysctl.u.pm_op.get_topo.nr_cpus;
+
+ return rc;
+}
+
+/* value: 0 - disable sched_smt_power_savings
+ 1 - enable sched_smt_power_savings
+ */
+int xc_set_sched_opt_smt(int xc_handle, uint32_t value)
+{
+ int rc;
+ DECLARE_SYSCTL;
+
+ sysctl.cmd = XEN_SYSCTL_pm_op;
+ sysctl.u.pm_op.cmd = XEN_SYSCTL_pm_op_set_sched_opt_smt;
+ sysctl.u.pm_op.cpuid = 0;
+ sysctl.u.pm_op.set_sched_opt_smt = value;
+ rc = do_sysctl(xc_handle, &sysctl);
+
+ return rc;
+}
+
goto out1;
}
break;
- case XENMEM_remove_from_physmap:
- if ( lock_pages(arg, sizeof(struct xen_remove_from_physmap)) )
+ case XENMEM_current_reservation:
+ case XENMEM_maximum_reservation:
+ case XENMEM_maximum_gpfn:
+ if ( lock_pages(arg, sizeof(domid_t)) )
{
PERROR("Could not lock");
goto out1;
}
break;
- case XENMEM_current_reservation:
- case XENMEM_maximum_reservation:
- case XENMEM_maximum_gpfn:
- if ( lock_pages(arg, sizeof(domid_t)) )
+ case XENMEM_set_pod_target:
+ case XENMEM_get_pod_target:
+ if ( lock_pages(arg, sizeof(struct xen_pod_target)) )
{
PERROR("Could not lock");
goto out1;
case XENMEM_add_to_physmap:
unlock_pages(arg, sizeof(struct xen_add_to_physmap));
break;
- case XENMEM_remove_from_physmap:
- unlock_pages(arg, sizeof(struct xen_remove_from_physmap));
- break;
case XENMEM_current_reservation:
case XENMEM_maximum_reservation:
case XENMEM_maximum_gpfn:
unlock_pages(arg, sizeof(domid_t));
break;
+ case XENMEM_set_pod_target:
+ case XENMEM_get_pod_target:
+ unlock_pages(arg, sizeof(struct xen_pod_target));
+ break;
}
out1:
return 0;
}
+int xc_ffs8(uint8_t x)
+{
+ int i;
+ for ( i = 0; i < 8; i++ )
+ if ( x & (1u << i) )
+ return i+1;
+ return 0;
+}
+
+int xc_ffs16(uint16_t x)
+{
+ uint8_t h = x>>8, l = x;
+ return l ? xc_ffs8(l) : h ? xc_ffs8(h) + 8 : 0;
+}
+
+int xc_ffs32(uint32_t x)
+{
+ uint16_t h = x>>16, l = x;
+ return l ? xc_ffs16(l) : h ? xc_ffs16(h) + 16 : 0;
+}
+
+int xc_ffs64(uint64_t x)
+{
+ uint32_t h = x>>32, l = x;
+ return l ? xc_ffs32(l) : h ? xc_ffs32(h) + 32 : 0;
+}
+
/*
* Local variables:
* mode: C
#define INFO 1
#define PROGRESS 0
+/* Force a compilation error if condition is true */
+#define XC_BUILD_BUG_ON(p) ((void)sizeof(struct { int:-!!(p); }))
+
/*
** Define max dirty page cache to permit during save/restore -- need to balance
** keeping cache usage down with CPU impact of invalidating too often.
int read_exact(int fd, void *data, size_t size);
int write_exact(int fd, const void *data, size_t size);
+int xc_ffs8(uint8_t x);
+int xc_ffs16(uint16_t x);
+int xc_ffs32(uint32_t x);
+int xc_ffs64(uint64_t x);
+
#endif /* __XC_PRIVATE_H__ */
static uint64_t regs_valid;
static vcpu_guest_context_any_t ctxt[MAX_VIRT_CPUS];
-extern int ffsll(long long int);
-#define FOREACH_CPU(cpumap, i) for ( cpumap = online_cpumap; (i = ffsll(cpumap)); cpumap &= ~(1 << (index - 1)) )
+#define FOREACH_CPU(cpumap, i) for ( cpumap = online_cpumap; (i = xc_ffs64(cpumap)); cpumap &= ~(1 << (index - 1)) )
static int
fetch_regs(int xc_handle, int cpu, int *online)
uint64_t changed_cpumap = cpumap ^ online_cpumap;
int index;
- while ( (index = ffsll(changed_cpumap)) ) {
+ while ( (index = xc_ffs64(changed_cpumap)) ) {
if ( cpumap & (1 << (index - 1)) )
{
if (handlers.td_create) handlers.td_create(index - 1);
}
-/* --------------------- */
-/* XXX application state */
-static long nr_pages = 0;
-static uint64_t *page_array = NULL;
-
-static uint64_t to_ma(int cpu, uint64_t maddr)
-{
- return maddr;
-}
static void *
-map_domain_va_32(
- int xc_handle,
- int cpu,
- void *guest_va,
- int perm)
-{
- unsigned long l2e, l1e, l1p, p, va = (unsigned long)guest_va;
- uint32_t *l2, *l1;
- static void *v[MAX_VIRT_CPUS];
-
- l2 = xc_map_foreign_range(
- xc_handle, current_domid, PAGE_SIZE, PROT_READ,
- xen_cr3_to_pfn(ctxt[cpu].c.ctrlreg[3]));
- if ( l2 == NULL )
- return NULL;
-
- l2e = l2[l2_table_offset_i386(va)];
- munmap(l2, PAGE_SIZE);
- if ( !(l2e & _PAGE_PRESENT) )
- return NULL;
- l1p = to_ma(cpu, l2e);
- l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l1p >> PAGE_SHIFT);
- if ( l1 == NULL )
- return NULL;
-
- l1e = l1[l1_table_offset_i386(va)];
- munmap(l1, PAGE_SIZE);
- if ( !(l1e & _PAGE_PRESENT) )
- return NULL;
- p = to_ma(cpu, l1e);
- if ( v[cpu] != NULL )
- munmap(v[cpu], PAGE_SIZE);
- v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT);
- if ( v[cpu] == NULL )
- return NULL;
-
- return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
-}
-
-
-static void *
-map_domain_va_pae(
+map_domain_va(
int xc_handle,
int cpu,
void *guest_va,
int perm)
{
- uint64_t l3e, l2e, l1e, l2p, l1p, p;
unsigned long va = (unsigned long)guest_va;
- uint64_t *l3, *l2, *l1;
- static void *v[MAX_VIRT_CPUS];
+ unsigned long mfn;
+ void *map;
- l3 = xc_map_foreign_range(
- xc_handle, current_domid, PAGE_SIZE, PROT_READ,
- xen_cr3_to_pfn(ctxt[cpu].c.ctrlreg[3]));
- if ( l3 == NULL )
+ /* cross page boundary */
+ if ( (va & ~PAGE_MASK) + sizeof(long) > PAGE_SIZE )
return NULL;
- l3e = l3[l3_table_offset_pae(va)];
- munmap(l3, PAGE_SIZE);
- if ( !(l3e & _PAGE_PRESENT) )
- return NULL;
- l2p = to_ma(cpu, l3e);
- l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p >> PAGE_SHIFT);
- if ( l2 == NULL )
- return NULL;
-
- l2e = l2[l2_table_offset_pae(va)];
- munmap(l2, PAGE_SIZE);
- if ( !(l2e & _PAGE_PRESENT) )
- return NULL;
- l1p = to_ma(cpu, l2e);
- l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l1p >> PAGE_SHIFT);
- if ( l1 == NULL )
+ mfn = xc_translate_foreign_address(xc_handle, current_domid, cpu, va);
+ if ( mfn == 0 )
return NULL;
- l1e = l1[l1_table_offset_pae(va)];
- munmap(l1, PAGE_SIZE);
- if ( !(l1e & _PAGE_PRESENT) )
- return NULL;
- p = to_ma(cpu, l1e);
- if ( v[cpu] != NULL )
- munmap(v[cpu], PAGE_SIZE);
- v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT);
- if ( v[cpu] == NULL )
+ map = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE,
+ perm, mfn);
+ if (map == NULL)
return NULL;
- return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
+ return map + (va & ~PAGE_MASK);
}
-#ifdef __x86_64__
-static void *
-map_domain_va_64(
- int xc_handle,
- int cpu,
- void *guest_va,
- int perm)
-{
- unsigned long l4e, l3e, l2e, l1e, l3p, l2p, l1p, p, va = (unsigned long)guest_va;
- uint64_t *l4, *l3, *l2, *l1;
- static void *v[MAX_VIRT_CPUS];
-
- if ((ctxt[cpu].c.ctrlreg[4] & 0x20) == 0 ) /* legacy ia32 mode */
- return map_domain_va_32(xc_handle, cpu, guest_va, perm);
-
- l4 = xc_map_foreign_range(
- xc_handle, current_domid, PAGE_SIZE, PROT_READ,
- xen_cr3_to_pfn(ctxt[cpu].c.ctrlreg[3]));
- if ( l4 == NULL )
- return NULL;
-
- l4e = l4[l4_table_offset(va)];
- munmap(l4, PAGE_SIZE);
- if ( !(l4e & _PAGE_PRESENT) )
- return NULL;
- l3p = to_ma(cpu, l4e);
- l3 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l3p >> PAGE_SHIFT);
- if ( l3 == NULL )
- return NULL;
-
- l3e = l3[l3_table_offset(va)];
- munmap(l3, PAGE_SIZE);
- if ( !(l3e & _PAGE_PRESENT) )
- return NULL;
- l2p = to_ma(cpu, l3e);
- l2 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l2p >> PAGE_SHIFT);
- if ( l2 == NULL )
- return NULL;
-
- l2e = l2[l2_table_offset(va)];
- munmap(l2, PAGE_SIZE);
- if ( !(l2e & _PAGE_PRESENT) )
- return NULL;
- l1p = to_ma(cpu, l2e);
- if (l2e & 0x80) { /* 2M pages */
- p = to_ma(cpu, l1p + (l1_table_offset(va) << PAGE_SHIFT));
- } else { /* 4K pages */
- l1 = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, PROT_READ, l1p >> PAGE_SHIFT);
- if ( l1 == NULL )
- return NULL;
-
- l1e = l1[l1_table_offset(va)];
- munmap(l1, PAGE_SIZE);
- if ( !(l1e & _PAGE_PRESENT) )
- return NULL;
- p = to_ma(cpu, l1e);
- }
- if ( v[cpu] != NULL )
- munmap(v[cpu], PAGE_SIZE);
- v[cpu] = xc_map_foreign_range(xc_handle, current_domid, PAGE_SIZE, perm, p >> PAGE_SHIFT);
- if ( v[cpu] == NULL )
- return NULL;
-
- return (void *)((unsigned long)v[cpu] | (va & (PAGE_SIZE - 1)));
-}
-#endif
-
-static void *
-map_domain_va(
- int xc_handle,
- int cpu,
- void *guest_va,
- int perm)
+static void
+unmap_domain_va(void *guest_va)
{
- unsigned long va = (unsigned long) guest_va;
- long npgs = xc_get_tot_pages(xc_handle, current_domid);
- static enum { MODE_UNKNOWN, MODE_64, MODE_32, MODE_PAE } mode;
-
- if ( mode == MODE_UNKNOWN )
- {
- xen_capabilities_info_t caps;
- (void)xc_version(xc_handle, XENVER_capabilities, caps);
- if ( strstr(caps, "-x86_64") )
- mode = MODE_64;
- else if ( strstr(caps, "-x86_32p") )
- mode = MODE_PAE;
- else if ( strstr(caps, "-x86_32") )
- mode = MODE_32;
- }
-
- if ( nr_pages != npgs )
- {
- if ( nr_pages > 0 )
- free(page_array);
- nr_pages = npgs;
- if ( (page_array = malloc(nr_pages * sizeof(*page_array))) == NULL )
- {
- IPRINTF("Could not allocate memory\n");
- return NULL;
- }
- if ( xc_get_pfn_list(xc_handle, current_domid,
- page_array, nr_pages) != nr_pages )
- {
- IPRINTF("Could not get the page frame list\n");
- return NULL;
- }
- }
-
- if (fetch_regs(xc_handle, cpu, NULL))
- return NULL;
-
- if (!paging_enabled(&ctxt[cpu])) {
- static void * v;
- uint64_t page;
-
- if ( v != NULL )
- munmap(v, PAGE_SIZE);
-
- page = to_ma(cpu, va);
-
- v = xc_map_foreign_range( xc_handle, current_domid, PAGE_SIZE,
- perm, page >> PAGE_SHIFT);
-
- if ( v == NULL )
- return NULL;
-
- return (void *)(((unsigned long)v) | (va & BSD_PAGE_MASK));
- }
-#ifdef __x86_64__
- if ( mode == MODE_64 )
- return map_domain_va_64(xc_handle, cpu, guest_va, perm);
-#endif
- if ( mode == MODE_PAE )
- return map_domain_va_pae(xc_handle, cpu, guest_va, perm);
- /* else ( mode == MODE_32 ) */
- return map_domain_va_32(xc_handle, cpu, guest_va, perm);
+ munmap((void *)((unsigned long)guest_va & PAGE_MASK), PAGE_SIZE);
}
int control_c_pressed_flag = 0;
if ( guest_va == NULL )
goto out_error;
retval = *guest_va;
+ if (!current_isfile)
+ unmap_domain_va(guest_va);
break;
case PTRACE_POKETEXT:
xc_handle, cpu, addr, PROT_READ|PROT_WRITE);
if ( guest_va == NULL )
goto out_error;
- *guest_va = (unsigned long)data;
+ *guest_va = edata;
+ if (!current_isfile)
+ unmap_domain_va(guest_va);
break;
case PTRACE_GETREGS:
/* XXX we can still have problems if the user switches threads
* during single-stepping - but that just seems retarded
*/
- ctxt[cpu].c.user_regs.eflags |= PSL_T;
- if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu,
- &ctxt[cpu])))
- goto out_error_domctl;
+ /* Try to enalbe Monitor Trap Flag for HVM, and fall back to TF
+ * if no MTF support
+ */
+ if ( !current_is_hvm ||
+ xc_domain_debug_control(xc_handle,
+ current_domid,
+ XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON,
+ cpu) )
+ {
+ ctxt[cpu].c.user_regs.eflags |= PSL_T;
+ if ((retval = xc_vcpu_setcontext(xc_handle, current_domid, cpu,
+ &ctxt[cpu])))
+ goto out_error_domctl;
+ }
/* FALLTHROUGH */
case PTRACE_CONT:
{
FOREACH_CPU(cpumap, index) {
cpu = index - 1;
- if (fetch_regs(xc_handle, cpu, NULL))
- goto out_error;
- /* Clear trace flag */
- if ( ctxt[cpu].c.user_regs.eflags & PSL_T )
+ if ( !current_is_hvm ||
+ xc_domain_debug_control(xc_handle,
+ current_domid,
+ XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF,
+ cpu) )
{
- ctxt[cpu].c.user_regs.eflags &= ~PSL_T;
- if ((retval = xc_vcpu_setcontext(xc_handle, current_domid,
- cpu, &ctxt[cpu])))
- goto out_error_domctl;
+ if (fetch_regs(xc_handle, cpu, NULL))
+ goto out_error;
+ /* Clear trace flag */
+ if ( ctxt[cpu].c.user_regs.eflags & PSL_T )
+ {
+ ctxt[cpu].c.user_regs.eflags &= ~PSL_T;
+ if ((retval = xc_vcpu_setcontext(xc_handle, current_domid,
+ cpu, &ctxt[cpu])))
+ goto out_error_domctl;
+ }
}
}
}
IPRINTF("Could not allocate m2p array\n");
return -1;
}
- bzero(m2p_array_compat, sizeof(unsigned long)* 1 << 20);
+ memset(m2p_array_compat, 0, sizeof(unsigned long)* 1 << 20);
for (i = 0; i < nr_pages_compat; i++)
m2p_array_compat[p2m_array_compat[i]] = i;
XEN_ELFNOTE_DUMPCORE_XEN_VERSION,
(void**)&xen_version) < 0)
goto out;
- if (xen_version->xen_version.pagesize != PAGE_SIZE)
+ /* shifted case covers 32 bit FV guest core file created on 64 bit Dom0 */
+ if (xen_version->xen_version.pagesize != PAGE_SIZE &&
+ (xen_version->xen_version.pagesize >> 32) != PAGE_SIZE)
goto out;
/* .note.Xen: format_version */
if (rc)
goto ioctl_failed;
+ return addr;
+
ioctl_failed:
rc = munmap(addr, size);
if (rc == -1)
--- /dev/null
+/*
+ * This file is subject to the terms and conditions of the GNU General
+ * Public License. See the file "COPYING" in the main directory of
+ * this archive for more details.
+ */
+
+#include "xc_private.h"
+#include "xenguest.h"
+
+#define SUSPEND_LOCK_FILE "/var/lib/xen/suspend_evtchn_lock.d"
+static int lock_suspend_event(void)
+{
+ int fd, rc;
+ mode_t mask;
+ char buf[128];
+
+ mask = umask(022);
+ fd = open(SUSPEND_LOCK_FILE, O_CREAT | O_EXCL | O_RDWR, 0666);
+ if (fd < 0)
+ {
+ ERROR("Can't create lock file for suspend event channel\n");
+ return -EINVAL;
+ }
+ umask(mask);
+ snprintf(buf, sizeof(buf), "%10ld", (long)getpid());
+
+ rc = write_exact(fd, buf, strlen(buf));
+ close(fd);
+
+ return rc;
+}
+
+static int unlock_suspend_event(void)
+{
+ int fd, pid, n;
+ char buf[128];
+
+ fd = open(SUSPEND_LOCK_FILE, O_RDWR);
+
+ if (fd < 0)
+ return -EINVAL;
+
+ n = read(fd, buf, 127);
+
+ close(fd);
+
+ if (n > 0)
+ {
+ sscanf(buf, "%d", &pid);
+ /* We are the owner, so we can simply delete the file */
+ if (pid == getpid())
+ {
+ unlink(SUSPEND_LOCK_FILE);
+ return 0;
+ }
+ }
+
+ return -EPERM;
+}
+
+int xc_await_suspend(int xce, int suspend_evtchn)
+{
+ int rc;
+
+ do {
+ rc = xc_evtchn_pending(xce);
+ if (rc < 0) {
+ ERROR("error polling suspend notification channel: %d", rc);
+ return -1;
+ }
+ } while (rc != suspend_evtchn);
+
+ /* harmless for one-off suspend */
+ if (xc_evtchn_unmask(xce, suspend_evtchn) < 0)
+ ERROR("failed to unmask suspend notification channel: %d", rc);
+
+ return 0;
+}
+
+int xc_suspend_evtchn_release(int xce, int suspend_evtchn)
+{
+ if (suspend_evtchn >= 0)
+ xc_evtchn_unbind(xce, suspend_evtchn);
+
+ return unlock_suspend_event();
+}
+
+int xc_suspend_evtchn_init(int xc, int xce, int domid, int port)
+{
+ int rc, suspend_evtchn = -1;
+
+ if (lock_suspend_event())
+ return -EINVAL;
+
+ suspend_evtchn = xc_evtchn_bind_interdomain(xce, domid, port);
+ if (suspend_evtchn < 0) {
+ ERROR("failed to bind suspend event channel: %d", suspend_evtchn);
+ goto cleanup;
+ }
+
+ rc = xc_domain_subscribe_for_suspend(xc, domid, port);
+ if (rc < 0) {
+ ERROR("failed to subscribe to domain: %d", rc);
+ goto cleanup;
+ }
+
+ /* event channel is pending immediately after binding */
+ xc_await_suspend(xce, suspend_evtchn);
+
+ return suspend_evtchn;
+
+cleanup:
+ if (suspend_evtchn > 0)
+ xc_suspend_evtchn_release(xce, suspend_evtchn);
+
+ return -1;
+}
paused:1, blocked:1, running:1,
hvm:1, debugged:1;
unsigned int shutdown_reason; /* only meaningful if shutdown==1 */
- unsigned long nr_pages;
+ unsigned long nr_pages; /* current number, not maximum */
unsigned long shared_info_frame;
uint64_t cpu_time;
unsigned long max_memkb;
uint8_t *ctxt_buf,
uint32_t size);
+
+/**
+ * This function returns one element of the context of a hvm domain
+ * @parm xc_handle a handle to an open hypervisor interface
+ * @parm domid the domain to get information from
+ * @parm typecode which type of elemnt required
+ * @parm instance which instance of the type
+ * @parm ctxt_buf a pointer to a structure to store the execution context of
+ * the hvm domain
+ * @parm size the size of ctxt_buf (must be >= HVM_SAVE_LENGTH(typecode))
+ * @return 0 on success, -1 on failure
+ */
+int xc_domain_hvm_getcontext_partial(int xc_handle,
+ uint32_t domid,
+ uint16_t typecode,
+ uint16_t instance,
+ void *ctxt_buf,
+ uint32_t size);
+
/**
* This function will set the context for hvm domain
*
unsigned int mem_flags,
xen_pfn_t *extent_start);
-int xc_domain_memory_translate_gpfn_list(int xc_handle,
- uint32_t domid,
- unsigned long nr_gpfns,
- xen_pfn_t *gpfn_list,
- xen_pfn_t *mfn_list);
+int xc_domain_memory_set_pod_target(int xc_handle,
+ uint32_t domid,
+ uint64_t target_pages,
+ uint64_t *tot_pages,
+ uint64_t *pod_cache_pages,
+ uint64_t *pod_entries);
+
+int xc_domain_memory_get_pod_target(int xc_handle,
+ uint32_t domid,
+ uint64_t *tot_pages,
+ uint64_t *pod_cache_pages,
+ uint64_t *pod_entries);
int xc_domain_ioport_permission(int xc_handle,
uint32_t domid,
/**
* Translates a virtual address in the context of a given domain and
- * vcpu returning the machine page frame number of the associated
- * page.
+ * vcpu returning the GFN containing the address (that is, an MFN for
+ * PV guests, a PFN for HVM guests). Returns 0 for failure.
*
* @parm xc_handle a handle on an open hypervisor interface
* @parm dom the domain to perform the translation in
uint32_t domid,
uint32_t gvec,
uint32_t pirq,
- uint32_t gflags);
+ uint32_t gflags,
+ uint64_t gtable);
+
+int xc_domain_unbind_msi_irq(int xc_handle,
+ uint32_t domid,
+ uint32_t gvec,
+ uint32_t pirq,
+ uint32_t gflags);
int xc_domain_bind_pt_irq(int xc_handle,
uint32_t domid,
uint32_t domid,
uint32_t target);
+/* Control the domain for debug */
+int xc_domain_debug_control(int xc_handle,
+ uint32_t domid,
+ uint32_t sop,
+ uint32_t vcpu);
+
#if defined(__i386__) || defined(__x86_64__)
int xc_cpuid_check(int xc,
const unsigned int *input,
int xc_cpu_online(int xc_handle, int cpu);
int xc_cpu_offline(int xc_handle, int cpu);
+
+/*
+ * cpufreq para name of this structure named
+ * same as sysfs file name of native linux
+ */
+typedef xen_userspace_t xc_userspace_t;
+typedef xen_ondemand_t xc_ondemand_t;
+
+struct xc_get_cpufreq_para {
+ /* IN/OUT variable */
+ uint32_t cpu_num;
+ uint32_t freq_num;
+ uint32_t gov_num;
+
+ /* for all governors */
+ /* OUT variable */
+ uint32_t *affected_cpus;
+ uint32_t *scaling_available_frequencies;
+ char *scaling_available_governors;
+ char scaling_driver[CPUFREQ_NAME_LEN];
+
+ uint32_t cpuinfo_cur_freq;
+ uint32_t cpuinfo_max_freq;
+ uint32_t cpuinfo_min_freq;
+ uint32_t scaling_cur_freq;
+
+ char scaling_governor[CPUFREQ_NAME_LEN];
+ uint32_t scaling_max_freq;
+ uint32_t scaling_min_freq;
+
+ /* for specific governor */
+ union {
+ xc_userspace_t userspace;
+ xc_ondemand_t ondemand;
+ } u;
+};
+
+int xc_get_cpufreq_para(int xc_handle, int cpuid,
+ struct xc_get_cpufreq_para *user_para);
+int xc_set_cpufreq_gov(int xc_handle, int cpuid, char *govname);
+int xc_set_cpufreq_para(int xc_handle, int cpuid,
+ int ctrl_type, int ctrl_value);
+int xc_get_cpufreq_avgfreq(int xc_handle, int cpuid, int *avg_freq);
+
+struct xc_get_cputopo {
+ /* IN: maximum addressable entry in
+ * the caller-provided cpu_to_core/socket.
+ */
+ uint32_t max_cpus;
+ uint32_t *cpu_to_core;
+ uint32_t *cpu_to_socket;
+
+ /* OUT: number of cpus returned
+ * If OUT is greater than IN then the cpu_to_core/socket is truncated!
+ */
+ uint32_t nr_cpus;
+};
+
+int xc_get_cputopo(int xc_handle, struct xc_get_cputopo *info);
+
+int xc_set_sched_opt_smt(int xc_handle, uint32_t value);
+
#endif /* XENCTRL_H */
int memsize,
const char *image_name);
+int xc_hvm_build_target_mem(int xc_handle,
+ uint32_t domid,
+ int memsize,
+ int target,
+ const char *image_name);
+
int xc_hvm_build_mem(int xc_handle,
uint32_t domid,
int memsize,
const char *image_buffer,
unsigned long image_size);
+int xc_suspend_evtchn_release(int xce, int suspend_evtchn);
+
+int xc_suspend_evtchn_init(int xc, int xce, int domid, int port);
+
+int xc_await_suspend(int xce, int suspend_evtchn);
+
#endif /* XENGUEST_H */
#include <stdlib.h>
#include <unistd.h>
#include <zlib.h>
-#include <strings.h>
#include <malloc.h>
#include "xg_private.h"
(256 * ((unsigned char)in_buf[in_size-2] +
(256 * (unsigned char)in_buf[in_size-1])))));
- bzero(&zStream, sizeof(zStream));
+ memset(&zStream, 0, sizeof(zStream));
out_buf = malloc(out_len + 16); /* Leave a little extra space */
if ( out_buf == NULL )
{
/* Inflate in one pass/call */
sts = inflate(&zStream, Z_FINISH);
+ inflateEnd(&zStream);
if ( sts != Z_STREAM_END )
{
ERROR("inflate failed, sts %d\n", sts);
static xmlNode *
add_param_struct(xmlNode *);
static xmlNode *
+add_param_array(xmlNode *);
+static xmlNode *
add_struct_array(xmlNode *, const char *);
static xmlNode *
add_nested_struct(xmlNode *, const char *);
const struct abstract_type *member_type = v->type->child;
arbitrary_set *set_val = v->u.struct_val;
abstract_value v;
- xmlNode *data_node = add_param_struct(params_node);
+ xmlNode *data_node = add_param_array(params_node);
for (size_t i = 0; i < set_val->size; i++)
{
}
+static xmlNode *
+add_param_array(xmlNode *params_node)
+{
+ xmlNode *param_node = add_container(params_node, "param");
+ xmlNode *value_node = add_container(param_node, "value");
+
+ return xmlNewChild(value_node, NULL, BAD_CAST "array", NULL);
+}
+
+
static void
add_struct_member(xmlNode *struct_node, const char *name, const char *type,
const char *value)
.PHONY: clean
clean:
- $(RM) *.o $(TARGETS) *~
+ $(RM) *.o $(TARGETS) *~ $(DEPS)
set -e; for d in $(SUBDIRS); do $(MAKE) -C $$d clean; done
%.o: %.c $(HDRS) Makefile
xenperf xenpm: %: %.o Makefile
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) $(LDFLAGS_libxenctrl)
+
+-include $(DEPS)
{
uint32_t eax, ebx, ecx, edx;
char signature[13];
+ uint32_t base;
- cpuid(0x40000000, &eax, &ebx, &ecx, &edx);
- *(uint32_t *)(signature + 0) = ebx;
- *(uint32_t *)(signature + 4) = ecx;
- *(uint32_t *)(signature + 8) = edx;
- signature[12] = '\0';
+ for ( base = 0x40000000; base < 0x40001000; base += 0x100 )
+ {
+ cpuid(base, &eax, &ebx, &ecx, &edx);
- if ( strcmp("XenVMMXenVMM", signature) || (eax < 0x40000002) )
- return 0;
+ *(uint32_t *)(signature + 0) = ebx;
+ *(uint32_t *)(signature + 4) = ecx;
+ *(uint32_t *)(signature + 8) = edx;
+ signature[12] = '\0';
+
+ if ( !strcmp("XenVMMXenVMM", signature) && (eax >= (base + 2)) )
+ goto found;
+ }
+
+ return 0;
- cpuid(0x40000001, &eax, &ebx, &ecx, &edx);
+ found:
+ cpuid(base + 1, &eax, &ebx, &ecx, &edx);
printf("Running in %s context on Xen v%d.%d.\n",
pv_context ? "PV" : "HVM", (uint16_t)(eax >> 16), (uint16_t)eax);
return 1;
#include <stdio.h>
#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
#include <getopt.h>
#include <errno.h>
+#include <signal.h>
#include <xenctrl.h>
#include <inttypes.h>
+#include <sys/time.h>
-int main(int argc, char **argv)
+#define ARRAY_SIZE(a) (sizeof (a) / sizeof ((a)[0]))
+
+static int xc_fd;
+static int max_cpu_nr;
+
+/* help message */
+void show_help(void)
{
- int xc_fd;
- int i, j, ret = 0;
- int cinfo = 0, pinfo = 0;
- int ch;
- xc_physinfo_t physinfo = { 0 };
+ fprintf(stderr,
+ "xen power management control tool\n\n"
+ "usage: xenpm <command> [args]\n\n"
+ "xenpm command list:\n\n"
+ " get-cpuidle-states [cpuid] list cpu idle info of CPU <cpuid> or all\n"
+ " get-cpufreq-states [cpuid] list cpu freq info of CPU <cpuid> or all\n"
+ " get-cpufreq-para [cpuid] list cpu freq parameter of CPU <cpuid> or all\n"
+ " set-scaling-maxfreq [cpuid] <HZ> set max cpu frequency <HZ> on CPU <cpuid>\n"
+ " or all CPUs\n"
+ " set-scaling-minfreq [cpuid] <HZ> set min cpu frequency <HZ> on CPU <cpuid>\n"
+ " or all CPUs\n"
+ " set-scaling-speed [cpuid] <num> set scaling speed on CPU <cpuid> or all\n"
+ " it is used in userspace governor.\n"
+ " set-scaling-governor [cpuid] <gov> set scaling governor on CPU <cpuid> or all\n"
+ " as userspace/performance/powersave/ondemand\n"
+ " set-sampling-rate [cpuid] <num> set sampling rate on CPU <cpuid> or all\n"
+ " it is used in ondemand governor.\n"
+ " set-up-threshold [cpuid] <num> set up threshold on CPU <cpuid> or all\n"
+ " it is used in ondemand governor.\n"
+ " get-cpu-topology get thread/core/socket topology info\n"
+ " set-sched-smt enable|disable enable/disable scheduler smt power saving\n"
+ " start [seconds] start collect Cx/Px statistics,\n"
+ " output after CTRL-C or SIGINT or several seconds.\n"
+ );
+}
+/* wrapper function */
+void help_func(int argc, char *argv[])
+{
+ show_help();
+}
+
+static void print_cxstat(int cpuid, struct xc_cx_stat *cxstat)
+{
+ int i;
- while ( (ch = getopt(argc, argv, "cp")) != -1 )
+ printf("cpu id : %d\n", cpuid);
+ printf("total C-states : %d\n", cxstat->nr);
+ printf("idle time(ms) : %"PRIu64"\n",
+ cxstat->idle_time/1000000UL);
+ for ( i = 0; i < cxstat->nr; i++ )
{
- switch ( ch )
- {
- case 'c':
- cinfo = 1;
- break;
- case 'p':
- pinfo = 1;
- break;
- default:
- fprintf(stderr, "%s [-p] [-c]\n", argv[0]);
- return -1;
- }
+ printf("C%d : transition [%020"PRIu64"]\n",
+ i, cxstat->triggers[i]);
+ printf(" residency [%020"PRIu64" ms]\n",
+ cxstat->residencies[i]/1000000UL);
}
+ printf("\n");
+}
+
+/* show cpu idle information on CPU cpuid */
+static int get_cxstat_by_cpuid(int xc_fd, int cpuid, struct xc_cx_stat *cxstat)
+{
+ int ret = 0;
+ int max_cx_num = 0;
+
+ ret = xc_pm_get_max_cx(xc_fd, cpuid, &max_cx_num);
+ if ( ret )
+ return errno;
- if ( !cinfo && !pinfo )
+ if ( !cxstat )
+ return -EINVAL;
+
+ cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
+ if ( !cxstat->triggers )
+ return -ENOMEM;
+ cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
+ if ( !cxstat->residencies )
{
- cinfo = 1;
- pinfo = 1;
+ free(cxstat->triggers);
+ return -ENOMEM;
}
- xc_fd = xc_interface_open();
- if ( xc_fd < 0 )
+ ret = xc_pm_get_cxstat(xc_fd, cpuid, cxstat);
+ if( ret )
{
- fprintf(stderr, "failed to get the handler\n");
- return xc_fd;
+ int temp = errno;
+ free(cxstat->triggers);
+ free(cxstat->residencies);
+ cxstat->triggers = NULL;
+ cxstat->residencies = NULL;
+ return temp;
}
- ret = xc_physinfo(xc_fd, &physinfo);
+ return 0;
+}
+
+static int show_cxstat_by_cpuid(int xc_fd, int cpuid)
+{
+ int ret = 0;
+ struct xc_cx_stat cxstatinfo;
+
+ ret = get_cxstat_by_cpuid(xc_fd, cpuid, &cxstatinfo);
if ( ret )
+ return ret;
+
+ print_cxstat(cpuid, &cxstatinfo);
+
+ free(cxstatinfo.triggers);
+ free(cxstatinfo.residencies);
+ return 0;
+}
+
+void cxstat_func(int argc, char *argv[])
+{
+ int cpuid = -1;
+
+ if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
+ cpuid = -1;
+
+ if ( cpuid >= max_cpu_nr )
+ cpuid = -1;
+
+ if ( cpuid < 0 )
{
- fprintf(stderr, "failed to get the processor information\n");
- xc_interface_close(xc_fd);
+ /* show cxstates on all cpus */
+ int i;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( show_cxstat_by_cpuid(xc_fd, i) == -ENODEV )
+ break;
+ }
+ else
+ show_cxstat_by_cpuid(xc_fd, cpuid);
+}
+
+static void print_pxstat(int cpuid, struct xc_px_stat *pxstat)
+{
+ int i;
+
+ printf("cpu id : %d\n", cpuid);
+ printf("total P-states : %d\n", pxstat->total);
+ printf("usable P-states : %d\n", pxstat->usable);
+ printf("current frequency : %"PRIu64" MHz\n",
+ pxstat->pt[pxstat->cur].freq);
+ for ( i = 0; i < pxstat->total; i++ )
+ {
+ if ( pxstat->cur == i )
+ printf("*P%d", i);
+ else
+ printf("P%d ", i);
+ printf(" : freq [%04"PRIu64" MHz]\n",
+ pxstat->pt[i].freq);
+ printf(" transition [%020"PRIu64"]\n",
+ pxstat->pt[i].count);
+ printf(" residency [%020"PRIu64" ms]\n",
+ pxstat->pt[i].residency/1000000UL);
+ }
+ printf("\n");
+}
+
+/* show cpu frequency information on CPU cpuid */
+static int get_pxstat_by_cpuid(int xc_fd, int cpuid, struct xc_px_stat *pxstat)
+{
+ int ret = 0;
+ int max_px_num = 0;
+
+ ret = xc_pm_get_max_px(xc_fd, cpuid, &max_px_num);
+ if ( ret )
+ return errno;
+
+ if ( !pxstat)
+ return -EINVAL;
+
+ pxstat->trans_pt = malloc(max_px_num * max_px_num *
+ sizeof(uint64_t));
+ if ( !pxstat->trans_pt )
+ return -ENOMEM;
+ pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
+ if ( !pxstat->pt )
+ {
+ free(pxstat->trans_pt);
+ return -ENOMEM;
+ }
+
+ ret = xc_pm_get_pxstat(xc_fd, cpuid, pxstat);
+ if( ret )
+ {
+ int temp = errno;
+ free(pxstat->trans_pt);
+ free(pxstat->pt);
+ pxstat->trans_pt = NULL;
+ pxstat->pt = NULL;
+ return temp;
+ }
+
+ return 0;
+}
+
+/* show cpu actual average freq information on CPU cpuid */
+static int get_avgfreq_by_cpuid(int xc_fd, int cpuid, int *avgfreq)
+{
+ int ret = 0;
+
+ ret = xc_get_cpufreq_avgfreq(xc_fd, cpuid, avgfreq);
+ if ( ret )
+ {
+ return errno;
+ }
+
+ return 0;
+}
+
+static int show_pxstat_by_cpuid(int xc_fd, int cpuid)
+{
+ int ret = 0;
+ struct xc_px_stat pxstatinfo;
+
+ ret = get_pxstat_by_cpuid(xc_fd, cpuid, &pxstatinfo);
+ if ( ret )
return ret;
+
+ print_pxstat(cpuid, &pxstatinfo);
+
+ free(pxstatinfo.trans_pt);
+ free(pxstatinfo.pt);
+ return 0;
+}
+
+void pxstat_func(int argc, char *argv[])
+{
+ int cpuid = -1;
+
+ if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
+ cpuid = -1;
+
+ if ( cpuid >= max_cpu_nr )
+ cpuid = -1;
+
+ if ( cpuid < 0 )
+ {
+ /* show pxstates on all cpus */
+ int i;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( show_pxstat_by_cpuid(xc_fd, i) == -ENODEV )
+ break;
}
+ else
+ show_pxstat_by_cpuid(xc_fd, cpuid);
+}
+
+static uint64_t usec_start, usec_end;
+static struct xc_cx_stat *cxstat, *cxstat_start, *cxstat_end;
+static struct xc_px_stat *pxstat, *pxstat_start, *pxstat_end;
+static int *avgfreq;
+static uint64_t *sum, *sum_cx, *sum_px;
+
+static void signal_int_handler(int signo)
+{
+ int i, j;
+ struct timeval tv;
+ int cx_cap = 0, px_cap = 0;
- /* print out the C state information */
- if ( cinfo )
+ if ( gettimeofday(&tv, NULL) == -1 )
{
- int max_cx_num = 0;
- struct xc_cx_stat cxstatinfo, *cxstat = &cxstatinfo;
+ fprintf(stderr, "failed to get timeofday\n");
+ return ;
+ }
+ usec_end = tv.tv_sec * 1000000UL + tv.tv_usec;
- for ( i = 0; i < physinfo.nr_cpus; i++ )
- {
- ret = xc_pm_get_max_cx(xc_fd, i, &max_cx_num);
- if ( ret )
- {
- if ( errno == ENODEV )
- {
- fprintf(stderr, "Xen cpuidle is not enabled!\n");
- break;
- }
- else
- {
- fprintf(stderr, "[CPU%d] failed to get max C-state\n", i);
- continue;
- }
- }
+ if ( get_cxstat_by_cpuid(xc_fd, 0, NULL) != -ENODEV )
+ {
+ cx_cap = 1;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( !get_cxstat_by_cpuid(xc_fd, i, &cxstat_end[i]) )
+ for ( j = 0; j < cxstat_end[i].nr; j++ )
+ sum_cx[i] += cxstat_end[i].residencies[j] -
+ cxstat_start[i].residencies[j];
+ }
+
+ if ( get_pxstat_by_cpuid(xc_fd, 0, NULL) != -ENODEV )
+ {
+ px_cap = 1;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( !get_pxstat_by_cpuid(xc_fd, i , &pxstat_end[i]) )
+ for ( j = 0; j < pxstat_end[i].total; j++ )
+ sum_px[i] += pxstat_end[i].pt[j].residency -
+ pxstat_start[i].pt[j].residency;
+ }
- cxstat->triggers = malloc(max_cx_num * sizeof(uint64_t));
- if ( !cxstat->triggers )
+ for ( i = 0; i < max_cpu_nr; i++ )
+ get_avgfreq_by_cpuid(xc_fd, i, &avgfreq[i]);
+
+ printf("Elapsed time (ms): %"PRIu64"\n", (usec_end - usec_start) / 1000UL);
+ for ( i = 0; i < max_cpu_nr; i++ )
+ {
+ uint64_t res, triggers;
+ double avg_res;
+
+ printf("\nCPU%d:\tResidency(ms)\t\tAvg Res(ms)\n",i);
+ if ( cx_cap && sum_cx[i] > 0 )
+ {
+ for ( j = 0; j < cxstat_end[i].nr; j++ )
{
- fprintf(stderr, "failed to malloc for C-states triggers\n");
- break;
+ res = cxstat_end[i].residencies[j] -
+ cxstat_start[i].residencies[j];
+ triggers = cxstat_end[i].triggers[j] -
+ cxstat_start[i].triggers[j];
+ avg_res = (triggers==0) ? 0: (double)res/triggers/1000000.0;
+ printf(" C%d\t%"PRIu64"\t(%5.2f%%)\t%.2f\n", j, res/1000000UL,
+ 100 * res / (double)sum_cx[i], avg_res );
}
- cxstat->residencies = malloc(max_cx_num * sizeof(uint64_t));
- if ( !cxstat->residencies )
+ printf("\n");
+ }
+ if ( px_cap && sum_px[i]>0 )
+ {
+ for ( j = 0; j < pxstat_end[i].total; j++ )
{
- fprintf(stderr, "failed to malloc for C-states residencies\n");
- free(cxstat->triggers);
- break;
+ res = pxstat_end[i].pt[j].residency -
+ pxstat_start[i].pt[j].residency;
+ printf(" P%d\t%"PRIu64"\t(%5.2f%%)\n", j,
+ res / 1000000UL, 100UL * res / (double)sum_px[i]);
}
+ }
+ printf(" Avg freq\t%d\tKHz\n", avgfreq[i]);
+ }
- ret = xc_pm_get_cxstat(xc_fd, i, cxstat);
- if( ret )
- {
- fprintf(stderr, "[CPU%d] failed to get C-states statistics "
- "information\n", i);
- free(cxstat->triggers);
- free(cxstat->residencies);
- continue;
- }
+ /* some clean up and then exits */
+ for ( i = 0; i < 2 * max_cpu_nr; i++ )
+ {
+ free(cxstat[i].triggers);
+ free(cxstat[i].residencies);
+ free(pxstat[i].trans_pt);
+ free(pxstat[i].pt);
+ }
+ free(cxstat);
+ free(pxstat);
+ free(sum);
+ free(avgfreq);
+ xc_interface_close(xc_fd);
+ exit(0);
+}
- printf("cpu id : %d\n", i);
- printf("total C-states : %d\n", cxstat->nr);
- printf("idle time(ms) : %"PRIu64"\n",
- cxstat->idle_time/1000000UL);
- for ( j = 0; j < cxstat->nr; j++ )
- {
- printf("C%d : transition [%020"PRIu64"]\n",
- j, cxstat->triggers[j]);
- printf(" residency [%020"PRIu64" ms]\n",
- cxstat->residencies[j]*1000000UL/3579/1000000UL);
- }
+void start_gather_func(int argc, char *argv[])
+{
+ int i;
+ struct timeval tv;
+ int timeout = 0;
- free(cxstat->triggers);
- free(cxstat->residencies);
+ if ( argc == 1 )
+ {
+ sscanf(argv[0], "%d", &timeout);
+ if ( timeout <= 0 )
+ fprintf(stderr, "failed to set timeout seconds, falling back...\n");
+ else
+ printf("Timeout set to %d seconds\n", timeout);
+ }
- printf("\n");
+ if ( gettimeofday(&tv, NULL) == -1 )
+ {
+ fprintf(stderr, "failed to get timeofday\n");
+ return ;
+ }
+ usec_start = tv.tv_sec * 1000000UL + tv.tv_usec;
+
+ sum = malloc(sizeof(uint64_t) * 2 * max_cpu_nr);
+ if ( sum == NULL )
+ return ;
+ cxstat = malloc(sizeof(struct xc_cx_stat) * 2 * max_cpu_nr);
+ if ( cxstat == NULL )
+ {
+ free(sum);
+ return ;
+ }
+ pxstat = malloc(sizeof(struct xc_px_stat) * 2 * max_cpu_nr);
+ if ( pxstat == NULL )
+ {
+ free(sum);
+ free(cxstat);
+ return ;
+ }
+ avgfreq = malloc(sizeof(int) * max_cpu_nr);
+ if ( avgfreq == NULL )
+ {
+ free(sum);
+ free(cxstat);
+ free(pxstat);
+ return ;
+ }
+ memset(sum, 0, sizeof(uint64_t) * 2 * max_cpu_nr);
+ memset(cxstat, 0, sizeof(struct xc_cx_stat) * 2 * max_cpu_nr);
+ memset(pxstat, 0, sizeof(struct xc_px_stat) * 2 * max_cpu_nr);
+ memset(avgfreq, 0, sizeof(int) * max_cpu_nr);
+ sum_cx = sum;
+ sum_px = sum + max_cpu_nr;
+ cxstat_start = cxstat;
+ cxstat_end = cxstat + max_cpu_nr;
+ pxstat_start = pxstat;
+ pxstat_end = pxstat + max_cpu_nr;
+
+ if ( get_cxstat_by_cpuid(xc_fd, 0, NULL) == -ENODEV &&
+ get_pxstat_by_cpuid(xc_fd, 0, NULL) == -ENODEV )
+ {
+ fprintf(stderr, "Xen cpu idle and frequency is disabled!\n");
+ return ;
+ }
+
+ for ( i = 0; i < max_cpu_nr; i++ )
+ {
+ get_cxstat_by_cpuid(xc_fd, i, &cxstat_start[i]);
+ get_pxstat_by_cpuid(xc_fd, i, &pxstat_start[i]);
+ get_avgfreq_by_cpuid(xc_fd, i, &avgfreq[i]);
+ }
+
+ if (signal(SIGINT, signal_int_handler) == SIG_ERR)
+ {
+ fprintf(stderr, "failed to set signal int handler\n");
+ free(sum);
+ free(pxstat);
+ free(cxstat);
+ free(avgfreq);
+ return ;
+ }
+
+ if ( timeout > 0 )
+ {
+ if ( signal(SIGALRM, signal_int_handler) == SIG_ERR )
+ {
+ fprintf(stderr, "failed to set signal alarm handler\n");
+ free(sum);
+ free(pxstat);
+ free(cxstat);
+ free(avgfreq);
+ return ;
}
+ alarm(timeout);
}
- /* print out P state information */
- if ( pinfo )
+ printf("Start sampling, waiting for CTRL-C or SIGINT or SIGALARM signal ...\n");
+
+ pause();
+}
+
+/* print out parameters about cpu frequency */
+static void print_cpufreq_para(int cpuid, struct xc_get_cpufreq_para *p_cpufreq)
+{
+ int i;
+
+ printf("cpu id : %d\n", cpuid);
+
+ printf("affected_cpus :");
+ for ( i = 0; i < p_cpufreq->cpu_num; i++ )
+ if ( i == cpuid )
+ printf(" *%d", p_cpufreq->affected_cpus[i]);
+ else
+ printf(" %d", p_cpufreq->affected_cpus[i]);
+ printf("\n");
+
+ printf("cpuinfo frequency : max [%u] min [%u] cur [%u]\n",
+ p_cpufreq->cpuinfo_max_freq,
+ p_cpufreq->cpuinfo_min_freq,
+ p_cpufreq->cpuinfo_cur_freq);
+
+ printf("scaling_driver : %s\n", p_cpufreq->scaling_driver);
+
+ printf("scaling_avail_gov : %s\n",
+ p_cpufreq->scaling_available_governors);
+
+ printf("current_governor : %s\n", p_cpufreq->scaling_governor);
+ if ( !strncmp(p_cpufreq->scaling_governor,
+ "userspace", CPUFREQ_NAME_LEN) )
+ {
+ printf(" userspace specific :\n");
+ printf(" scaling_setspeed : %u\n",
+ p_cpufreq->u.userspace.scaling_setspeed);
+ }
+ else if ( !strncmp(p_cpufreq->scaling_governor,
+ "ondemand", CPUFREQ_NAME_LEN) )
+ {
+ printf(" ondemand specific :\n");
+ printf(" sampling_rate : max [%u] min [%u] cur [%u]\n",
+ p_cpufreq->u.ondemand.sampling_rate_max,
+ p_cpufreq->u.ondemand.sampling_rate_min,
+ p_cpufreq->u.ondemand.sampling_rate);
+ printf(" up_threshold : %u\n",
+ p_cpufreq->u.ondemand.up_threshold);
+ }
+
+ printf("scaling_avail_freq :");
+ for ( i = 0; i < p_cpufreq->freq_num; i++ )
+ if ( p_cpufreq->scaling_available_frequencies[i] ==
+ p_cpufreq->scaling_cur_freq )
+ printf(" *%d", p_cpufreq->scaling_available_frequencies[i]);
+ else
+ printf(" %d", p_cpufreq->scaling_available_frequencies[i]);
+ printf("\n");
+
+ printf("scaling frequency : max [%u] min [%u] cur [%u]\n",
+ p_cpufreq->scaling_max_freq,
+ p_cpufreq->scaling_min_freq,
+ p_cpufreq->scaling_cur_freq);
+ printf("\n");
+}
+
+/* show cpu frequency parameters information on CPU cpuid */
+static int show_cpufreq_para_by_cpuid(int xc_fd, int cpuid)
+{
+ int ret = 0;
+ struct xc_get_cpufreq_para cpufreq_para, *p_cpufreq = &cpufreq_para;
+
+ p_cpufreq->cpu_num = 0;
+ p_cpufreq->freq_num = 0;
+ p_cpufreq->gov_num = 0;
+ p_cpufreq->affected_cpus = NULL;
+ p_cpufreq->scaling_available_frequencies = NULL;
+ p_cpufreq->scaling_available_governors = NULL;
+
+ do
{
- int max_px_num = 0;
- struct xc_px_stat pxstatinfo, *pxstat = &pxstatinfo;
+ free(p_cpufreq->affected_cpus);
+ free(p_cpufreq->scaling_available_frequencies);
+ free(p_cpufreq->scaling_available_governors);
+
+ p_cpufreq->affected_cpus = NULL;
+ p_cpufreq->scaling_available_frequencies = NULL;
+ p_cpufreq->scaling_available_governors = NULL;
- for ( i = 0; i < physinfo.nr_cpus; i++ )
+ if (!(p_cpufreq->affected_cpus =
+ malloc(p_cpufreq->cpu_num * sizeof(uint32_t))))
{
- ret = xc_pm_get_max_px(xc_fd, i, &max_px_num);
- if ( ret )
- {
- if ( errno == ENODEV )
- {
- printf("Xen cpufreq is not enabled!\n");
- break;
- }
- else
- {
- fprintf(stderr, "[CPU%d] failed to get max P-state\n", i);
- continue;
- }
- }
+ fprintf(stderr,
+ "[CPU%d] failed to malloc for affected_cpus\n",
+ cpuid);
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (!(p_cpufreq->scaling_available_frequencies =
+ malloc(p_cpufreq->freq_num * sizeof(uint32_t))))
+ {
+ fprintf(stderr,
+ "[CPU%d] failed to malloc for scaling_available_frequencies\n",
+ cpuid);
+ ret = -ENOMEM;
+ goto out;
+ }
+ if (!(p_cpufreq->scaling_available_governors =
+ malloc(p_cpufreq->gov_num * CPUFREQ_NAME_LEN * sizeof(char))))
+ {
+ fprintf(stderr,
+ "[CPU%d] failed to malloc for scaling_available_governors\n",
+ cpuid);
+ ret = -ENOMEM;
+ goto out;
+ }
- pxstat->trans_pt = malloc(max_px_num * max_px_num *
- sizeof(uint64_t));
- if ( !pxstat->trans_pt )
- {
- fprintf(stderr, "failed to malloc for P-states "
- "transition table\n");
- break;
- }
- pxstat->pt = malloc(max_px_num * sizeof(struct xc_px_val));
- if ( !pxstat->pt )
- {
- fprintf(stderr, "failed to malloc for P-states table\n");
- free(pxstat->pt);
+ ret = xc_get_cpufreq_para(xc_fd, cpuid, p_cpufreq);
+ } while ( ret && errno == EAGAIN );
+
+ if ( ret == 0 )
+ print_cpufreq_para(cpuid, p_cpufreq);
+ else if ( errno == ENODEV )
+ {
+ ret = -ENODEV;
+ fprintf(stderr, "Xen cpufreq is not enabled!\n");
+ }
+ else
+ fprintf(stderr,
+ "[CPU%d] failed to get cpufreq parameter\n",
+ cpuid);
+
+out:
+ free(p_cpufreq->scaling_available_governors);
+ free(p_cpufreq->scaling_available_frequencies);
+ free(p_cpufreq->affected_cpus);
+
+ return ret;
+}
+
+void cpufreq_para_func(int argc, char *argv[])
+{
+ int cpuid = -1;
+
+ if ( argc > 0 && sscanf(argv[0], "%d", &cpuid) != 1 )
+ cpuid = -1;
+
+ if ( cpuid >= max_cpu_nr )
+ cpuid = -1;
+
+ if ( cpuid < 0 )
+ {
+ /* show cpu freqency information on all cpus */
+ int i;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( show_cpufreq_para_by_cpuid(xc_fd, i) == -ENODEV )
break;
- }
+ }
+ else
+ show_cpufreq_para_by_cpuid(xc_fd, cpuid);
+}
- ret = xc_pm_get_pxstat(xc_fd, i, pxstat);
- if( ret )
- {
- fprintf(stderr, "[CPU%d] failed to get P-states "
- "statistics information\n", i);
- free(pxstat->trans_pt);
- free(pxstat->pt);
- continue;
- }
+void scaling_max_freq_func(int argc, char *argv[])
+{
+ int cpuid = -1, freq = -1;
+
+ if ( (argc >= 2 && (sscanf(argv[1], "%d", &freq) != 1 ||
+ sscanf(argv[0], "%d", &cpuid) != 1)) ||
+ (argc == 1 && sscanf(argv[0], "%d", &freq) != 1 ) ||
+ argc == 0 )
+ {
+ fprintf(stderr, "failed to set scaling max freq\n");
+ return ;
+ }
+
+ if ( cpuid < 0 )
+ {
+ int i;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( xc_set_cpufreq_para(xc_fd, i, SCALING_MAX_FREQ, freq) )
+ fprintf(stderr, "[CPU%d] failed to set scaling max freq\n", i);
+ }
+ else
+ {
+ if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MAX_FREQ, freq) )
+ fprintf(stderr, "failed to set scaling max freq\n");
+ }
+}
+
+void scaling_min_freq_func(int argc, char *argv[])
+{
+ int cpuid = -1, freq = -1;
+
+ if ( (argc >= 2 && (sscanf(argv[1], "%d", &freq) != 1 ||
+ sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+ (argc == 1 && sscanf(argv[0], "%d", &freq) != 1 ) ||
+ argc == 0 )
+ {
+ fprintf(stderr, "failed to set scaling min freq\n");
+ return ;
+ }
+
+ if ( cpuid < 0 )
+ {
+ int i;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( xc_set_cpufreq_para(xc_fd, i, SCALING_MIN_FREQ, freq) )
+ fprintf(stderr, "[CPU%d] failed to set scaling min freq\n", i);
+ }
+ else
+ {
+ if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_MIN_FREQ, freq) )
+ fprintf(stderr, "failed to set scaling min freq\n");
+ }
+}
+
+void scaling_speed_func(int argc, char *argv[])
+{
+ int cpuid = -1, speed = -1;
+
+ if ( (argc >= 2 && (sscanf(argv[1], "%d", &speed) != 1 ||
+ sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+ (argc == 1 && sscanf(argv[0], "%d", &speed) != 1 ) ||
+ argc == 0 )
+ {
+ fprintf(stderr, "failed to set scaling speed\n");
+ return ;
+ }
+
+ if ( cpuid < 0 )
+ {
+ int i;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( xc_set_cpufreq_para(xc_fd, i, SCALING_SETSPEED, speed) )
+ fprintf(stderr, "[CPU%d] failed to set scaling speed\n", i);
+ }
+ else
+ {
+ if ( xc_set_cpufreq_para(xc_fd, cpuid, SCALING_SETSPEED, speed) )
+ fprintf(stderr, "failed to set scaling speed\n");
+ }
+}
+
+void scaling_sampling_rate_func(int argc, char *argv[])
+{
+ int cpuid = -1, rate = -1;
+
+ if ( (argc >= 2 && (sscanf(argv[1], "%d", &rate) != 1 ||
+ sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+ (argc == 1 && sscanf(argv[0], "%d", &rate) != 1 ) ||
+ argc == 0 )
+ {
+ fprintf(stderr, "failed to set scaling sampling rate\n");
+ return ;
+ }
+
+ if ( cpuid < 0 )
+ {
+ int i;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( xc_set_cpufreq_para(xc_fd, i, SAMPLING_RATE, rate) )
+ fprintf(stderr,
+ "[CPU%d] failed to set scaling sampling rate\n", i);
+ }
+ else
+ {
+ if ( xc_set_cpufreq_para(xc_fd, cpuid, SAMPLING_RATE, rate) )
+ fprintf(stderr, "failed to set scaling sampling rate\n");
+ }
+}
- printf("cpu id : %d\n", i);
- printf("total P-states : %d\n", pxstat->total);
- printf("usable P-states : %d\n", pxstat->usable);
- printf("current frequency : %"PRIu64" MHz\n",
- pxstat->pt[pxstat->cur].freq);
- for ( j = 0; j < pxstat->total; j++ )
+void scaling_up_threshold_func(int argc, char *argv[])
+{
+ int cpuid = -1, threshold = -1;
+
+ if ( (argc >= 2 && (sscanf(argv[1], "%d", &threshold) != 1 ||
+ sscanf(argv[0], "%d", &cpuid) != 1) ) ||
+ (argc == 1 && sscanf(argv[0], "%d", &threshold) != 1 ) ||
+ argc == 0 )
+ {
+ fprintf(stderr, "failed to set up scaling threshold\n");
+ return ;
+ }
+
+ if ( cpuid < 0 )
+ {
+ int i;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( xc_set_cpufreq_para(xc_fd, i, UP_THRESHOLD, threshold) )
+ fprintf(stderr,
+ "[CPU%d] failed to set up scaling threshold\n", i);
+ }
+ else
+ {
+ if ( xc_set_cpufreq_para(xc_fd, cpuid, UP_THRESHOLD, threshold) )
+ fprintf(stderr, "failed to set up scaling threshold\n");
+ }
+}
+
+void scaling_governor_func(int argc, char *argv[])
+{
+ int cpuid = -1;
+ char *name = NULL;
+
+ if ( argc >= 2 )
+ {
+ name = strdup(argv[1]);
+ if ( name == NULL )
+ goto out;
+ if ( sscanf(argv[0], "%d", &cpuid) != 1 )
+ {
+ free(name);
+ goto out;
+ }
+ }
+ else if ( argc > 0 )
+ {
+ name = strdup(argv[0]);
+ if ( name == NULL )
+ goto out;
+ }
+ else
+ goto out;
+
+ if ( cpuid < 0 )
+ {
+ int i;
+ for ( i = 0; i < max_cpu_nr; i++ )
+ if ( xc_set_cpufreq_gov(xc_fd, i, name) )
+ fprintf(stderr, "[CPU%d] failed to set governor name\n", i);
+ }
+ else
+ {
+ if ( xc_set_cpufreq_gov(xc_fd, cpuid, name) )
+ fprintf(stderr, "failed to set governor name\n");
+ }
+
+ free(name);
+ return ;
+out:
+ fprintf(stderr, "failed to set governor name\n");
+}
+
+#define MAX_NR_CPU 512
+
+void cpu_topology_func(int argc, char *argv[])
+{
+ uint32_t cpu_to_core[MAX_NR_CPU];
+ uint32_t cpu_to_socket[MAX_NR_CPU];
+ struct xc_get_cputopo info;
+ int i, ret;
+
+ info.cpu_to_core = cpu_to_core;
+ info.cpu_to_socket = cpu_to_socket;
+ info.max_cpus = MAX_NR_CPU;
+ ret = xc_get_cputopo(xc_fd, &info);
+ if (!ret)
+ {
+ printf("CPU\tcore\tsocket\n");
+ for (i=0; i<info.nr_cpus; i++)
+ {
+ if ( info.cpu_to_core[i] != INVALID_TOPOLOGY_ID &&
+ info.cpu_to_socket[i] != INVALID_TOPOLOGY_ID )
{
- if ( pxstat->cur == j )
- printf("*P%d", j);
- else
- printf("P%d ", j);
- printf(" : freq [%04"PRIu64" MHz]\n",
- pxstat->pt[j].freq);
- printf(" transition [%020"PRIu64"]\n",
- pxstat->pt[j].count);
- printf(" residency [%020"PRIu64" ms]\n",
- pxstat->pt[j].residency/1000000UL);
+ printf("CPU%d\t %d\t %d\n", i, info.cpu_to_core[i],
+ info.cpu_to_socket[i]);
}
+ }
+ }
+ else
+ {
+ printf("Can not get Xen CPU topology!\n");
+ }
- free(pxstat->trans_pt);
- free(pxstat->pt);
+ return ;
+}
- printf("\n");
- }
+void set_sched_smt_func(int argc, char *argv[])
+{
+ int value, rc;
+
+ if (argc != 1){
+ show_help();
+ exit(-1);
+ }
+
+ if ( !strncmp(argv[0], "disable", sizeof("disable")) )
+ {
+ value = 0;
+ }
+ else if ( !strncmp(argv[0], "enable", sizeof("enable")) )
+ {
+ value = 1;
+ }
+ else
+ {
+ show_help();
+ exit(-1);
+ }
+
+ rc = xc_set_sched_opt_smt(xc_fd, value);
+ printf("%s sched_smt_power_savings %s\n", argv[0],
+ rc? "failed":"successeed" );
+
+ return;
+}
+
+struct {
+ const char *name;
+ void (*function)(int argc, char *argv[]);
+} main_options[] = {
+ { "help", help_func },
+ { "get-cpuidle-states", cxstat_func },
+ { "get-cpufreq-states", pxstat_func },
+ { "start", start_gather_func },
+ { "get-cpufreq-para", cpufreq_para_func },
+ { "set-scaling-maxfreq", scaling_max_freq_func },
+ { "set-scaling-minfreq", scaling_min_freq_func },
+ { "set-scaling-governor", scaling_governor_func },
+ { "set-scaling-speed", scaling_speed_func },
+ { "set-sampling-rate", scaling_sampling_rate_func },
+ { "set-up-threshold", scaling_up_threshold_func },
+ { "get-cpu-topology", cpu_topology_func},
+ { "set-sched-smt", set_sched_smt_func},
+};
+
+int main(int argc, char *argv[])
+{
+ int i, ret = 0;
+ xc_physinfo_t physinfo = { 0 };
+ int nr_matches = 0;
+ int matches_main_options[ARRAY_SIZE(main_options)];
+
+ if ( argc < 2 )
+ {
+ show_help();
+ return 0;
+ }
+
+ xc_fd = xc_interface_open();
+ if ( xc_fd < 0 )
+ {
+ fprintf(stderr, "failed to get the handler\n");
+ return 0;
}
+ ret = xc_physinfo(xc_fd, &physinfo);
+ if ( ret )
+ {
+ fprintf(stderr, "failed to get the processor information\n");
+ xc_interface_close(xc_fd);
+ return 0;
+ }
+ max_cpu_nr = physinfo.nr_cpus;
+
+ /* calculate how many options match with user's input */
+ for ( i = 0; i < ARRAY_SIZE(main_options); i++ )
+ if ( !strncmp(main_options[i].name, argv[1], strlen(argv[1])) )
+ matches_main_options[nr_matches++] = i;
+
+ if ( nr_matches > 1 )
+ {
+ fprintf(stderr, "Ambigious options: ");
+ for ( i = 0; i < nr_matches; i++ )
+ fprintf(stderr, " %s", main_options[matches_main_options[i]].name);
+ fprintf(stderr, "\n");
+ }
+ else if ( nr_matches == 1 )
+ /* dispatch to the corresponding function handler */
+ main_options[matches_main_options[0]].function(argc - 2, argv + 2);
+ else
+ show_help();
+
xc_interface_close(xc_fd);
- return ret;
+ return 0;
}
.PHONY: clean
clean:
- rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out
+ rm -rf build tmp *.pyc *.pyo *.o *.a *~ a.out $(DEPS)
+
+-include $(DEPS)
# Timed out waiting for a keypress
if mytime != -1:
mytime += 1
- if mytime >= int(timeout):
+ # curses.timeout() does not work properly on Solaris
+ # So we may come here even after a key has been pressed.
+ # Check both timeout and mytime to avoid exiting
+ # when we shouldn't.
+ if timeout != -1 and mytime >= int(timeout):
self.isdone = True
break
else:
return None
-def run_grub(file, entry, fs):
+def run_grub(file, entry, fs, arg):
global g
global sel
print "No kernel image selected!"
sys.exit(1)
- img = g.cf.images[sel]
+ try:
+ img = g.cf.images[sel]
+ except:
+ log.debug("PyGrub: Default selection is not valid, using first boot configuration...")
+ img = g.cf.images[0]
grubcfg = { "kernel": None, "ramdisk": None, "args": None }
if img.initrd:
grubcfg["ramdisk"] = img.initrd[1]
if img.args:
- grubcfg["args"] = img.args
+ grubcfg["args"] = img.args + " " + arg
return grubcfg
return cfg
+def sniff_netware(fs, cfg):
+ if not fs.file_exists("/nwserver/xnloader.sys"):
+ return cfg
+
+ if not cfg["kernel"]:
+ cfg["kernel"] = "/nwserver/xnloader.sys"
+
+ return cfg
+
if __name__ == "__main__":
sel = None
isconfig = False
# what was passed in
- incfg = { "kernel": None, "ramdisk": None, "args": None }
+ incfg = { "kernel": None, "ramdisk": None, "args": "" }
# what grub or sniffing chose
chosencfg = { "kernel": None, "ramdisk": None, "args": None }
# what to boot
# debug
if isconfig:
- chosencfg = run_grub(file, entry)
+ chosencfg = run_grub(file, entry, fs, incfg["args"])
print " kernel: %s" % chosencfg["kernel"]
if img.initrd:
print " initrd: %s" % chosencfg["ramdisk"]
chosencfg = sniff_solaris(fs, incfg)
if not chosencfg["kernel"]:
- chosencfg = run_grub(file, entry, fs)
+ chosencfg = sniff_netware(fs, incfg)
+
+ if not chosencfg["kernel"]:
+ chosencfg = run_grub(file, entry, fs, incfg["args"])
data = fs.open_file(chosencfg["kernel"]).read()
(tfd, bootcfg["kernel"]) = tempfile.mkstemp(prefix="boot_kernel.",
.PHONY: clean
clean:
rm -rf build *.pyc *.pyo *.o *.a *~ $(CATALOGS) xen/util/auxbin.pyc
+ rm -f $(DEPS)
+
+-include $(DEPS)
libraries = libraries,
sources = [ "xen/lowlevel/scf/scf.c" ])
+process = Extension("process",
+ extra_compile_args = extra_compile_args,
+ include_dirs = include_dirs + [ "xen/lowlevel/process" ],
+ library_dirs = library_dirs,
+ libraries = libraries + [ "contract" ],
+ sources = [ "xen/lowlevel/process/process.c" ])
+
acm = Extension("acm",
extra_compile_args = extra_compile_args,
include_dirs = include_dirs + [ "xen/lowlevel/acm" ],
modules = [ xc, xs, ptsname, acm, flask ]
if os.uname()[0] == 'SunOS':
modules.append(scf)
+ modules.append(process)
setup(name = 'xen',
version = '3.0',
xc_handle = xc_interface_open();
if (xc_handle < 0) {
errno = xc_handle;
+ free(buf);
return PyErr_SetFromErrno(xc_error_obj);
}
--- /dev/null
+/*
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#include <Python.h>
+
+#include <libcontract.h>
+#include <sys/contract/process.h>
+#include <fcntl.h>
+#include <stdio.h>
+
+/*
+ * On Solaris, xend runs under a contract as an smf(5) service. As a
+ * result, when spawning long-running children such as a domain's
+ * qemu-dm instantiation, we have to make sure it's in a separate
+ * contract. Before we fork, we must activate a separate process
+ * contract template to place the child processes in a new contract.
+ */
+
+static PyObject *
+pyprocess_activate(PyObject *o, PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "name", NULL };
+ char *name = NULL;
+ int flags;
+ int cfd;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|s", kwlist, &name))
+ return (NULL);
+
+ cfd = open64("/system/contract/process/template", O_RDWR);
+
+ if (cfd == -1)
+ goto err;
+
+ if ((flags = fcntl(cfd, F_GETFD, 0)) == -1)
+ goto err;
+
+ if (fcntl(cfd, F_SETFD, flags | FD_CLOEXEC) == -1)
+ goto err;
+
+ if (name != NULL)
+ ct_pr_tmpl_set_svc_aux(cfd, name);
+
+ if (ct_tmpl_activate(cfd))
+ goto err;
+
+ return (PyInt_FromLong((long)cfd));
+
+err:
+ if (cfd != -1)
+ close(cfd);
+ PyErr_SetFromErrno(PyExc_OSError);
+ return (NULL);
+}
+
+static PyObject *
+pyprocess_clear(PyObject *o, PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { "contract", NULL };
+ int cfd;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "i", kwlist, &cfd))
+ return (NULL);
+
+ if (ct_tmpl_clear(cfd) != 0) {
+ PyErr_SetFromErrno(PyExc_OSError);
+ return (NULL);
+ }
+
+ close(cfd);
+
+ Py_INCREF(Py_None);
+ return (Py_None);
+}
+
+static PyObject *
+pyprocess_abandon_latest(PyObject *o, PyObject *args, PyObject *kwargs)
+{
+ static char *kwlist[] = { NULL };
+ static char path[PATH_MAX];
+ ct_stathdl_t st;
+ ctid_t latest;
+ int cfd;
+
+ if (!PyArg_ParseTupleAndKeywords(args, kwargs, "", kwlist))
+ return (NULL);
+
+ cfd = open64("/system/contract/process/latest", O_RDONLY);
+ if (cfd == -1)
+ goto err;
+
+ ct_status_read(cfd, CTD_COMMON, &st);
+ latest = ct_status_get_id(st);
+ ct_status_free(st);
+ close(cfd);
+
+ snprintf(path, PATH_MAX, "/system/contract/process/%ld/ctl",
+ (long)latest);
+
+ if ((cfd = open64(path, O_WRONLY)) < 0)
+ goto err;
+ if (ct_ctl_abandon(cfd))
+ goto err;
+ close(cfd);
+
+ Py_INCREF(Py_None);
+ return (Py_None);
+err:
+ PyErr_SetFromErrno(PyExc_OSError);
+ return (NULL);
+}
+
+PyDoc_STRVAR(pyprocess_activate__doc__,
+ "activate(name)\n"
+ "\n"
+ "Activate a new process contract template. If name is given,\n"
+ "it is used as the template's auxiliary value.\n"
+ "Returns the new contract template.\n");
+
+PyDoc_STRVAR(pyprocess_clear__doc__,
+ "clear(contract)\n"
+ "\n"
+ "Clear and close the given contract template.\n");
+
+PyDoc_STRVAR(pyprocess_abandon_latest__doc__,
+ "abandon_latest()\n"
+ "\n"
+ "Abandon the latest contract created by this thread.\n");
+
+static struct PyMethodDef pyprocess_module_methods[] = {
+ { "activate", (PyCFunction) pyprocess_activate,
+ METH_VARARGS|METH_KEYWORDS, pyprocess_activate__doc__ },
+ { "clear", (PyCFunction) pyprocess_clear,
+ METH_VARARGS|METH_KEYWORDS, pyprocess_clear__doc__ },
+ { "abandon_latest", (PyCFunction) pyprocess_abandon_latest,
+ METH_VARARGS|METH_KEYWORDS, pyprocess_abandon_latest__doc__ },
+ { NULL, NULL, 0, NULL }
+};
+
+PyMODINIT_FUNC
+initprocess(void)
+{
+ Py_InitModule("process", pyprocess_module_methods);
+}
if ( rc < 0 )
{
- free(sdev_array);
- return pyxc_error_to_exception();
+ free(sdev_array);
+ return pyxc_error_to_exception();
}
if ( !num_sdevs )
{
- free(sdev_array);
- return Py_BuildValue("s", "");
+ free(sdev_array);
+ return Py_BuildValue("s", "");
}
group_str = calloc(num_sdevs, sizeof(dev_str));
if (group_str == NULL)
+ {
+ free(sdev_array);
return PyErr_NoMemory();
+ }
for ( i = 0; i < num_sdevs; i++ )
{
int i;
#endif
char *image;
- int memsize, vcpus = 1, acpi = 0, apic = 1;
+ int memsize, target=-1, vcpus = 1, acpi = 0, apic = 1;
static char *kwd_list[] = { "domid",
- "memsize", "image", "vcpus", "acpi",
+ "memsize", "image", "target", "vcpus", "acpi",
"apic", NULL };
- if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iii", kwd_list,
- &dom, &memsize,
- &image, &vcpus, &acpi, &apic) )
+ if ( !PyArg_ParseTupleAndKeywords(args, kwds, "iis|iiii", kwd_list,
+ &dom, &memsize, &image, &target, &vcpus,
+ &acpi, &apic) )
return NULL;
- if ( xc_hvm_build(self->xc_handle, dom, memsize, image) != 0 )
+ if ( target == -1 )
+ target = memsize;
+
+ if ( xc_hvm_build_target_mem(self->xc_handle, dom, memsize,
+ target, image) != 0 )
return pyxc_error_to_exception();
#if !defined(__ia64__)
- /* Set up the HVM info table. */
+ /* Fix up the HVM info table. */
va_map = xc_map_foreign_range(self->xc_handle, dom, XC_PAGE_SIZE,
PROT_READ | PROT_WRITE,
HVM_INFO_PFN);
if ( va_map == NULL )
return PyErr_SetFromErrno(xc_error_obj);
va_hvm = (struct hvm_info_table *)(va_map + HVM_INFO_OFFSET);
- memset(va_hvm, 0, sizeof(*va_hvm));
- strncpy(va_hvm->signature, "HVM INFO", 8);
- va_hvm->length = sizeof(struct hvm_info_table);
va_hvm->acpi_enabled = acpi;
va_hvm->apic_mode = apic;
va_hvm->nr_vcpus = vcpus;
for ( i = 0, sum = 0; i < va_hvm->length; i++ )
sum += ((uint8_t *)va_hvm)[i];
- va_hvm->checksum = -sum;
+ va_hvm->checksum -= sum;
munmap(va_map, XC_PAGE_SIZE);
#endif
return zero;
}
+static PyObject *pyxc_domain_set_target_mem(XcObject *self, PyObject *args)
+{
+ uint32_t dom;
+ unsigned int mem_kb, mem_pages;
+
+ if (!PyArg_ParseTuple(args, "ii", &dom, &mem_kb))
+ return NULL;
+
+ mem_pages = mem_kb / 4;
+
+ if (xc_domain_memory_set_pod_target(self->xc_handle, dom, mem_pages,
+ NULL, NULL, NULL) != 0)
+ return pyxc_error_to_exception();
+
+ Py_INCREF(zero);
+ return zero;
+}
+
static PyObject *pyxc_domain_set_memmap_limit(XcObject *self, PyObject *args)
{
uint32_t dom;
" maxmem_kb [int]: .\n"
"Returns: [int] 0 on success; -1 on error.\n" },
+ { "domain_set_target_mem",
+ (PyCFunction)pyxc_domain_set_target_mem,
+ METH_VARARGS, "\n"
+ "Set a domain's memory target\n"
+ " dom [int]: Identifier of domain.\n"
+ " mem_kb [int]: .\n"
+ "Returns: [int] 0 on success; -1 on error.\n" },
+
{ "domain_set_memmap_limit",
(PyCFunction)pyxc_domain_set_memmap_limit,
METH_VARARGS, "\n"
xs_set_error(EINVAL);
goto exit;
}
+
xsperms_n = PyList_Size(perms);
- xsperms = calloc(xsperms_n, sizeof(struct xs_permissions));
+ /* NB. alloc +1 so we can change the owner if necessary. */
+ xsperms = calloc(xsperms_n + 1, sizeof(struct xs_permissions));
if (!xsperms) {
xs_set_error(ENOMEM);
goto exit;
}
+
tuple0 = PyTuple_New(0);
if (!tuple0)
goto exit;
+
for (i = 0; i < xsperms_n; i++) {
/* Read/write perms. Set these. */
int p_read = 0, p_write = 0;
if (p_write)
xsperms[i].perms |= XS_PERM_WRITE;
}
+
+ /*
+ * Is the caller trying to restrict access to the first specified
+ * domain? If so then it cannot be owner, so we force dom0 as owner.
+ */
+ if (xsperms_n && xsperms[0].perms && xsperms[0].id) {
+ memmove(&xsperms[1], &xsperms[0], xsperms_n * sizeof(*xsperms));
+ xsperms[0].id = xsperms[0].perms = 0;
+ xsperms_n++;
+ }
+
Py_BEGIN_ALLOW_THREADS
result = xs_set_permissions(xh, th, path, xsperms, xsperms_n);
Py_END_ALLOW_THREADS
major = scsi_major[((ord(n[7:8]) - ord('a') + 1) * 26 + (ord(n[8:9]) - ord('a'))) / 16 ]
minor = (((ord(n[7:8]) - ord('a') + 1 ) * 26 + (ord(n[8:9]) - ord('a'))) % 16) * 16 + int(n[9:] or 0)
devnum = major * 256 + minor
- elif re.match( '/dev/hd[a-t]([1-9]|[1-5][0-9]|6[0-3])?', n):
+ elif re.match( '/dev/hd[a-t]([1-9]|[1-5][0-9]|6[0-3])?$', n):
ide_majors = [ 3, 22, 33, 34, 56, 57, 88, 89, 90, 91 ]
major = ide_majors[(ord(n[7:8]) - ord('a')) / 2]
minor = ((ord(n[7:8]) - ord('a')) % 2) * 64 + int(n[8:] or 0)
from xen.xend.XendClient import server
from xen.xend.XendError import XendError
from xen.xend.xenstore.xstransact import xstransact
-from xen.xend.server import DevController
+from xen.xend.server import DevConstants
import xen.xend.XendProtocol
def stateString(state):
- return state and DevController.xenbusState[int(state)] or '<None>'
+ return state and DevConstants.xenbusState[int(state)] or '<None>'
def main(argv = None):
f = fcntl.fcntl(file, fcntl.F_GETFD)
if bool: f |= fcntl.FD_CLOEXEC
else: f &= ~fcntl.FD_CLOEXEC
- fcntl.fcntl(file, fcntl.F_SETFD)
+ fcntl.fcntl(file, fcntl.F_SETFD, f)
def waitstatus_description(st):
if os.WIFEXITED(st):
import types
import struct
import time
+import threading
from xen.util import utils
PROC_PCI_PATH = '/proc/bus/pci/devices'
PCI_CAP_ID_PM = 0x01
PCI_PM_CTRL = 4
-PCI_PM_CTRL_NO_SOFT_RESET = 0x0004
+PCI_PM_CTRL_NO_SOFT_RESET = 0x0008
PCI_PM_CTRL_STATE_MASK = 0x0003
PCI_D3hot = 3
+PCI_D0hot = 0
VENDOR_INTEL = 0x8086
PCI_CAP_ID_VENDOR_SPECIFIC_CAP = 0x09
# Global variable to store information from lspci
lspci_info = None
+lspci_info_lock = threading.RLock()
#Calculate PAGE_SHIFT: number of bits to shift an address to get the page number
PAGE_SIZE = resource.getpagesize()
return pci_devs
-def create_lspci_info():
+def _create_lspci_info():
+ """Execute 'lspci' command and parse the result.
+ If the command does not exist, lspci_info will be kept blank ({}).
+
+ Expects to be protected by lspci_info_lock.
+ """
global lspci_info
+
lspci_info = {}
- # Execute 'lspci' command and parse the result.
- # If the command does not exist, lspci_info will be kept blank ({}).
for paragraph in os.popen(LSPCI_CMD + ' -vmm').read().split('\n\n'):
device_name = None
device_info = {}
if device_name is not None:
lspci_info[device_name] = device_info
+def create_lspci_info():
+ global lspci_info_lock
+ lspci_info_lock.acquire()
+ try:
+ _create_lspci_info()
+ finally:
+ lspci_info_lock.release()
+
def save_pci_conf_space(devs_string):
pci_list = []
cfg_list = []
return dev_list
def transform_list(target, src):
- ''' src: its element is pci string (Format: xxxx:xx:xx:x).
+ ''' src: its element is pci string (Format: xxxx:xx:xx.x).
target: its element is pci string, or a list of pci string.
If all the elements in src are in target, we remove them from target
coassigned_pci_list = dev.find_all_the_multi_functions()
need_transform = True
elif dev.dev_type == DEV_TYPE_PCI and not dev.pci_af_flr:
- coassigned_pci_list = dev.find_coassigned_devices(True)
+ coassigned_pci_list = dev.find_coassigned_pci_devices(True)
del coassigned_pci_list[0]
need_transform = True
list = list + [dev.name]
return list
- def find_coassigned_devices(self, ignore_bridge = True):
+ def find_coassigned_pci_devices(self, ignore_bridge = True):
''' Here'self' is a PCI device, we need find the uppermost PCI/PCI-X
bridge, and all devices behind it must be co-assigned to the same
guest.
os.lseek(fd, PCI_CB_BRIDGE_CONTROL, 0)
br_cntl |= PCI_BRIDGE_CTL_BUS_RESET
os.write(fd, struct.pack('H', br_cntl))
- time.sleep(0.200)
+ time.sleep(0.100)
# De-assert Secondary Bus Reset
os.lseek(fd, PCI_CB_BRIDGE_CONTROL, 0)
br_cntl &= ~PCI_BRIDGE_CTL_BUS_RESET
os.write(fd, struct.pack('H', br_cntl))
- time.sleep(0.200)
+ time.sleep(0.100)
os.close(fd)
# Restore the config spaces
if pos == 0:
return False
+ # No_Soft_Reset - When set 1, this bit indicates that
+ # devices transitioning from D3hot to D0 because of
+ # PowerState commands do not perform an internal reset.
+ pm_ctl = self.pci_conf_read32(pos + PCI_PM_CTRL)
+ if (pm_ctl & PCI_PM_CTRL_NO_SOFT_RESET) == 1:
+ return False
+
(pci_list, cfg_list) = save_pci_conf_space([self.name])
- # Enter D3hot without soft reset
- pm_ctl = self.pci_conf_read32(pos + PCI_PM_CTRL)
- pm_ctl |= PCI_PM_CTRL_NO_SOFT_RESET
+ # Enter D3hot
pm_ctl &= ~PCI_PM_CTRL_STATE_MASK
pm_ctl |= PCI_D3hot
self.pci_conf_write32(pos + PCI_PM_CTRL, pm_ctl)
time.sleep(0.010)
# From D3hot to D0
- self.pci_conf_write32(pos + PCI_PM_CTRL, 0)
+ pm_ctl &= ~PCI_PM_CTRL_STATE_MASK
+ pm_ctl |= PCI_D0hot
+ self.pci_conf_write32(pos + PCI_PM_CTRL, pm_ctl)
time.sleep(0.010)
restore_pci_conf_space((pci_list, cfg_list))
(pci_list, cfg_list) = save_pci_conf_space([self.name])
self.pci_conf_write8(pos + PCI_USB_FLRCTRL, 1)
- time.sleep(0.010)
+ time.sleep(0.100)
restore_pci_conf_space((pci_list, cfg_list))
funcs = re.findall(p, pci_names)
return funcs
+ def find_coassigned_devices(self):
+ if self.dev_type == DEV_TYPE_PCIe_ENDPOINT and not self.pcie_flr:
+ return self.find_all_the_multi_functions()
+ elif self.dev_type == DEV_TYPE_PCI and not self.pci_af_flr:
+ coassigned_pci_list = self.find_coassigned_pci_devices(True)
+ del coassigned_pci_list[0]
+ return coassigned_pci_list
+ else:
+ return [self.name]
+
def find_cap_offset(self, cap):
path = find_sysfs_mnt()+SYSFS_PCI_DEVS_PATH+'/'+ \
self.name+SYSFS_PCI_DEV_CONFIG_PATH
self.dev_type = DEV_TYPE_PCI_BRIDGE
else:
creg = self.pci_conf_read16(pos + PCI_EXP_FLAGS)
- if ((creg & PCI_EXP_TYPE_PCI_BRIDGE) >> 4) == \
+ if ((creg & PCI_EXP_FLAGS_TYPE) >> 4) == \
PCI_EXP_TYPE_PCI_BRIDGE:
self.dev_type = DEV_TYPE_PCI_BRIDGE
else:
pos = self.find_cap_offset(PCI_CAP_ID_EXP)
self.pci_conf_write32(pos + PCI_EXP_DEVCTL, PCI_EXP_DEVCTL_FLR)
# We must sleep at least 100ms for the completion of FLR
- time.sleep(0.200)
+ time.sleep(0.100)
restore_pci_conf_space((pci_list, cfg_list))
else:
if self.bus == 0:
# We use Advanced Capability to do FLR.
pos = self.find_cap_offset(PCI_CAP_ID_AF)
self.pci_conf_write8(pos + PCI_AF_CTL, PCI_AF_CTL_FLR)
- time.sleep(0.200)
+ time.sleep(0.100)
restore_pci_conf_space((pci_list, cfg_list))
else:
if self.bus == 0:
self.do_FLR_for_integrated_device()
else:
- devs = self.find_coassigned_devices(False)
+ devs = self.find_coassigned_pci_devices(False)
# Remove the element 0 which is a bridge
target_bus = devs[0]
del devs[0]
Since we cannot obtain these data from sysfs, use 'lspci' command.
"""
global lspci_info
+ global lspci_info_lock
- if lspci_info is None:
- create_lspci_info()
-
+ lspci_info_lock.acquire()
try:
- device_info = lspci_info[self.name]
- self.revision = int(device_info['Rev'], 16)
- self.vendorname = device_info['Vendor']
- self.devicename = device_info['Device']
- self.classname = device_info['Class']
- self.subvendorname = device_info['SVendor']
- self.subdevicename = device_info['SDevice']
- except KeyError:
- pass
+ if lspci_info is None:
+ _create_lspci_info()
- return True
+ try:
+ device_info = lspci_info[self.name]
+ self.revision = int(device_info['Rev'], 16)
+ self.vendorname = device_info['Vendor']
+ self.devicename = device_info['Device']
+ self.classname = device_info['Class']
+ self.subvendorname = device_info['SVendor']
+ self.subdevicename = device_info['SDevice']
+ except KeyError:
+ pass
+
+ return True
+ finally:
+ lspci_info_lock.release()
def __str__(self):
str = "PCI Device %s\n" % (self.name)
--- /dev/null
+""" Reader-writer lock implementation based on a condition variable """
+
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2008 International Business Machines Corp.
+# Author: Stefan Berger <stefanb@us.ibm.com>
+#============================================================================
+
+from threading import Condition
+
+class RWLock:
+
+ RWLOCK_STATE_WRITER = -1
+ RWLOCK_STATE_UNUSED = 0
+
+ def __init__(self):
+ self.__condition = Condition()
+ self.__state = RWLock.RWLOCK_STATE_UNUSED
+ self.__blocked_writers = 0
+
+ def acquire_reader(self):
+ self.__condition.acquire()
+ while True:
+ if self.__state == RWLock.RWLOCK_STATE_WRITER:
+ self.__condition.wait()
+ else:
+ break
+ self.__state += 1
+ self.__condition.release()
+
+ def acquire_writer(self):
+ self.__condition.acquire()
+ self.__acquire_writer(RWLock.RWLOCK_STATE_UNUSED)
+ self.__condition.release()
+
+ def __acquire_writer(self, wait_for_state):
+ while True:
+ if self.__state == wait_for_state:
+ self.__state = RWLock.RWLOCK_STATE_WRITER
+ break
+ else:
+ self.__blocked_writers += 1
+ self.__condition.wait()
+ self.__blocked_writers -= 1
+
+ def release(self):
+ self.__condition.acquire()
+ if self.__state == RWLock.RWLOCK_STATE_WRITER:
+ self.__state = RWLock.RWLOCK_STATE_UNUSED
+ elif self.__state == RWLock.RWLOCK_STATE_UNUSED:
+ assert False, 'Lock not in use.'
+ else:
+ self.__state -= 1
+ self.__condition.notifyAll()
+ self.__condition.release()
+
+
+if __name__ == '__main__':
+ from threading import Thread
+ from time import sleep
+
+ rwlock = RWLock()
+
+ class Base(Thread):
+ def __init__(self, name, timeout):
+ self.name = name
+ self.timeout = timeout
+ Thread.__init__(self)
+
+ class Reader(Base):
+ def __init__(self, name = 'Reader', timeout = 10):
+ Base.__init__(self, name, timeout)
+
+ def run(self):
+ print '%s begin' % self.name
+ rwlock.acquire_reader()
+ print '%s acquired' % self.name
+ sleep(self.timeout)
+ rwlock.release()
+ print '%s end' % self.name
+
+ class ReaderTwice(Base):
+ def __init__(self, name = 'Reader', timeout = 10):
+ Base.__init__(self, name, timeout)
+
+ def run(self):
+ print '%s begin' % self.name
+ rwlock.acquire_reader()
+ print '%s acquired once' % self.name
+ sleep(self.timeout)
+ rwlock.acquire_reader()
+ print '%s acquired twice' % self.name
+ sleep(self.timeout)
+ rwlock.release()
+ rwlock.release()
+ print '%s end' % self.name
+
+ class Writer(Base):
+ def __init__(self, name = 'Writer', timeout = 10):
+ Base.__init__(self, name, timeout)
+
+ def run(self):
+ print '%s begin' % self.name
+ rwlock.acquire_writer()
+ print '%s acquired' % self.name
+ sleep(self.timeout)
+ rwlock.release()
+ print '%s end' % self.name
+
+ def run_test(threadlist, msg):
+ print msg
+ for t in threadlist:
+ t.start()
+ sleep(1)
+ for t in threads:
+ t.join()
+ print 'Done\n\n'
+
+ threads = []
+ threads.append( Reader('R1', 4) )
+ threads.append( Reader('R2', 4) )
+ threads.append( Writer('W1', 4) )
+ threads.append( Reader('R3', 4) )
+ run_test(threads,
+ 'Test: readers may bypass blocked writers')
SYSFS_SCSI_DEV_REVISION_PATH = '/rev'
SYSFS_SCSI_DEV_SCSILEVEL_PATH = '/scsi_level'
-def _vscsi_hctl_block(name, scsi_devices):
- """ block-device name is convert into hctl. (e.g., '/dev/sda',
- '0:0:0:0')"""
+def _vscsi_get_devname_by(name, scsi_devices):
+ """A device name is gotten by the HCTL.
+ (e.g., '0:0:0:0' to '/dev/sda')
+ """
+
try:
search = re.compile(r'' + name + '$', re.DOTALL)
except Exception, e:
raise VmError("vscsi: invalid expression. " + str(e))
- chk = 0
- for hctl, block, sg, scsi_id in scsi_devices:
+
+ for hctl, devname, sg, scsi_id in scsi_devices:
if search.match(hctl):
- chk = 1
- break
+ return (hctl, devname)
- if chk:
- return (hctl, block)
- else:
- return (None, None)
+ return (None, None)
-def _vscsi_block_scsiid_to_hctl(phyname, scsi_devices):
- """ block-device name is convert into hctl. (e.g., '/dev/sda',
- '0:0:0:0')"""
+def _vscsi_get_hctl_by(phyname, scsi_devices):
+ """An HCTL is gotten by the device name or the scsi_id.
+ (e.g., '/dev/sda' to '0:0:0:0')
+ """
if re.match('/dev/sd[a-z]+([1-9]|1[0-5])?$', phyname):
# sd driver
# scsi_id -gu
name = phyname
- chk = 0
- for hctl, block, sg, scsi_id in scsi_devices:
- if block == name:
- chk = 1
- break
- elif sg == name:
- chk = 1
- break
- elif scsi_id == name:
- chk = 1
- break
-
- if chk:
- return (hctl, block)
- else:
- return (None, None)
+ for hctl, devname, sg, scsi_id in scsi_devices:
+ if name in [devname, sg, scsi_id]:
+ return (hctl, devname)
+ return (None, None)
+
+
+def _vscsi_get_scsiid(sg):
+ scsi_id = os.popen('/sbin/scsi_id -gu -s /class/scsi_generic/' + sg).read().split()
+ if len(scsi_id):
+ return scsi_id[0]
+ return None
-def vscsi_get_scsidevices():
- """ get all scsi devices"""
+
+def _vscsi_get_scsidevices_by_lsscsi(option = ""):
+ """ get all scsi devices information by lsscsi """
devices = []
- sysfs_mnt = utils.find_sysfs_mount()
+
+ for scsiinfo in os.popen('{ lsscsi -g %s; } 2>/dev/null' % option).readlines():
+ s = scsiinfo.split()
+ hctl = s[0][1:-1]
+ try:
+ devname = s[-2].split('/dev/')[1]
+ except IndexError:
+ devname = None
+ try:
+ sg = s[-1].split('/dev/')[1]
+ scsi_id = _vscsi_get_scsiid(sg)
+ except IndexError:
+ sg = None
+ scsi_id = None
+ devices.append([hctl, devname, sg, scsi_id])
+
+ return devices
+
+
+def _vscsi_get_scsidevices_by_sysfs():
+ """ get all scsi devices information by sysfs """
+
+ devices = []
+ try:
+ sysfs_mnt = utils.find_sysfs_mount()
+ except:
+ return devices
for dirpath, dirnames, files in os.walk(sysfs_mnt + SYSFS_SCSI_PATH):
for hctl in dirnames:
paths = os.path.join(dirpath, hctl)
- block = "-"
+ devname = None
+ sg = None
+ scsi_id = None
for f in os.listdir(paths):
- if re.match('^block', f):
- os.chdir(os.path.join(paths, f))
- block = os.path.basename(os.getcwd())
- elif re.match('^tape', f):
- os.chdir(os.path.join(paths, f))
- block = os.path.basename(os.getcwd())
- elif re.match('^scsi_changer', f):
- os.chdir(os.path.join(paths, f))
- block = os.path.basename(os.getcwd())
- elif re.match('^onstream_tape', f):
- os.chdir(os.path.join(paths, f))
- block = os.path.basename(os.getcwd())
+ realpath = os.path.realpath(os.path.join(paths, f))
+ if re.match('^block', f) or \
+ re.match('^tape', f) or \
+ re.match('^scsi_changer', f) or \
+ re.match('^onstream_tape', f):
+ devname = os.path.basename(realpath)
if re.match('^scsi_generic', f):
- os.chdir(os.path.join(paths, f))
- sg = os.path.basename(os.getcwd())
- lines = os.popen('/sbin/scsi_id -gu -s /class/scsi_generic/' + sg).read().split()
- if len(lines) == 0:
- scsi_id = '-'
- else:
- scsi_id = lines[0]
-
- devices.append([hctl, block, sg, scsi_id])
+ sg = os.path.basename(realpath)
+ scsi_id = _vscsi_get_scsiid(sg)
+ devices.append([hctl, devname, sg, scsi_id])
return devices
-def vscsi_search_hctl_and_block(device):
-
- scsi_devices = vscsi_get_scsidevices()
-
- tmp = device.split(':')
- if len(tmp) == 4:
- (hctl, block) = _vscsi_hctl_block(device, scsi_devices)
+def vscsi_get_scsidevices():
+ """ get all scsi devices information """
+
+ devices = _vscsi_get_scsidevices_by_lsscsi("")
+ if devices:
+ return devices
+ return _vscsi_get_scsidevices_by_sysfs()
+
+
+def vscsi_get_hctl_and_devname_by(target, scsi_devices = None):
+ if scsi_devices is None:
+ if len(target.split(':')) == 4:
+ scsi_devices = _vscsi_get_scsidevices_by_lsscsi(target)
+ elif target.startswith('/dev/'):
+ scsi_devices = _vscsi_get_scsidevices_by_lsscsi("| grep %s" % target)
+ else:
+ scsi_devices = _vscsi_get_scsidevices_by_lsscsi("")
+ if not scsi_devices:
+ scsi_devices = _vscsi_get_scsidevices_by_sysfs()
+
+ if len(target.split(':')) == 4:
+ return _vscsi_get_devname_by(target, scsi_devices)
else:
- (hctl, block) = _vscsi_block_scsiid_to_hctl(device, scsi_devices)
-
- return (hctl, block)
+ return _vscsi_get_hctl_by(target, scsi_devices)
def get_scsi_vendor(pHCTL):
'sg_name': scsi_info[2],
'scsi_id': None
}
- if scsi_info[1] != '-':
+ if scsi_info[1] is not None:
scsi_dev['dev_name'] = scsi_info[1]
- if scsi_info[3] != '-':
+ if scsi_info[3] is not None:
scsi_dev['scsi_id'] = scsi_info[3]
scsi_dev['vendor_name'] = \
get_scsi_scsilevel(scsi_dev['physical_HCTL'])
try:
- lsscsi_info = os.popen('lsscsi ' + scsi_dev['physical_HCTL']).read().split()
+ lsscsi_info = os.popen('lsscsi %s 2>/dev/null' % scsi_dev['physical_HCTL']).read().split()
scsi_dev['type'] = lsscsi_info[1]
except:
scsi_dev['type'] = None
from xen.xend import sxp
from xen.xend import PrettyPrint
from xen.xend.Args import ArgError
-from xen.xend.XendError import XendError
+from xen.xend.XendError import XendError, XendInvalidDomain
#from xen.xend.XendLogging import log
import resource
val = self.get(x)
except XendError, ex:
return self.noChild(str(ex))
+ except XendInvalidDomain, ex:
+ return self.noChild(str(ex))
if val is None:
return self.noChild('Not found: ' + str(x))
else:
return True
log.warn("Rejected connection from %s (%s).", addrport[0], fqdn)
return False
+
+
+class SocketDgramListener:
+ """A connectionless server socket, running listen in a thread.
+ """
+
+ def __init__(self, protocol_class):
+ self.protocol = protocol_class()
+ self.sock = self.createSocket()
+ threading.Thread(target=self.main).start()
+
+
+ def close(self):
+ try:
+ self.sock.close()
+ except:
+ pass
+
+
+ def createSocket(self):
+ raise NotImplementedError()
+
+
+ def main(self):
+ try:
+ while True:
+ try:
+ data = self.sock.recv(BUFFER_SIZE)
+ self.protocol.dataReceived(data)
+ except socket.error, ex:
+ if ex.args[0] not in (EWOULDBLOCK, EAGAIN, EINTR):
+ break
+ finally:
+ try:
+ self.close()
+ except:
+ pass
import connection
-def bind(path):
- """Create a Unix socket, and bind it to the given path. The socket is
-created such that only the current user may access it."""
+def bind(path, type = socket.SOCK_STREAM):
+ """Create a Unix socket, and bind it to the given path.
+ The socket is created such that only the current user may access it."""
- parent = os.path.dirname(path)
- mkdir.parents(parent, stat.S_IRWXU, True)
- if os.path.exists(path):
- os.unlink(path)
+ if path[0] == '\0': # Abstract namespace is used for the path
+ pass
+ else:
+ parent = os.path.dirname(path)
+ mkdir.parents(parent, stat.S_IRWXU, True)
+ if os.path.exists(path):
+ os.unlink(path)
- sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+ sock = socket.socket(socket.AF_UNIX, type)
sock.bind(path)
return sock
def createSocket(self):
- return bind(self.path)
+ return bind(self.path, socket.SOCK_STREAM)
def acceptConnection(self, sock, _):
connection.SocketServerConnection(sock, self.protocol_class)
+
+
+class UnixDgramListener(connection.SocketDgramListener):
+ def __init__(self, path, protocol_class):
+ self.path = path
+ connection.SocketDgramListener.__init__(self, protocol_class)
+
+
+ def createSocket(self):
+ return bind(self.path, socket.SOCK_DGRAM)
+
import XendDomain, XendDomainInfo, XendNode, XendDmesg
import XendLogging, XendTaskManager, XendAPIStore
+from xen.xend import uuid as genuuid
from XendAPIVersion import *
from XendAuthSessions import instance as auth_manager
from XendError import *
lambda *args, **kwargs: \
_check_ref(lambda r: \
XendAPIStore.get(r, class_name) is not None,
- 'PIF', func, *args, **kwargs)
+ class_name, func, *args, **kwargs)
# -----------------------------
# Bridge to Legacy XM API calls
dom = xendom.get_vm_by_uuid(vbd_struct['VM'])
vdi = xennode.get_vdi_by_uuid(vbd_struct['VDI'])
if not vdi:
- return xen_api_error(['HANDLE_INVALID', 'VDI', vdi_ref])
+ return xen_api_error(['HANDLE_INVALID', 'VDI', vbd_struct['VDI']])
# new VBD via VDI/SR
vdi_image = vdi.get_location()
tpmif.destroy_vtpmstate(dom.getName())
return xen_api_success_void()
else:
- return xen_api_error(['HANDLE_INVALID', 'VM', vtpm_struct['VM']])
+ return xen_api_error(['HANDLE_INVALID', 'VTPM', vtpm_ref])
# class methods
def VTPM_create(self, session, vtpm_struct):
return xen_api_success_void()
def event_unregister(self, session, unreg_classes):
- event_unregister(session, reg_classes)
+ event_unregister(session, unreg_classes)
return xen_api_success_void()
def event_next(self, session):
return xen_api_error(['DEBUG_FAIL', session])
def debug_create(self, session):
- debug_uuid = uuid.createString()
+ debug_uuid = genuuid.createString()
self._debug[debug_uuid] = None
return xen_api_success(debug_uuid)
by type, to ensure safety
"""
+import threading
+
__classes = {}
+__classes_lock = threading.RLock()
def register(uuid, type, inst):
- __classes[(uuid, type)] = inst
- return inst
+ __classes_lock.acquire()
+ try:
+ __classes[(uuid, type)] = inst
+ return inst
+ finally:
+ __classes_lock.release()
def deregister(uuid, type):
- old = get(uuid, type)
- del __classes[(uuid, type)]
- return old
+ __classes_lock.acquire()
+ try:
+ old = get(uuid, type)
+ if old is not None:
+ del __classes[(uuid, type)]
+ return old
+ finally:
+ __classes_lock.release()
def get(uuid, type):
"""
Get the instances by uuid and type
"""
- return __classes.get((uuid, type), None)
+ __classes_lock.acquire()
+ try:
+ return __classes.get((uuid, type), None)
+ finally:
+ __classes_lock.release()
def get_all(all_type):
"""
Get all instances by type
"""
- return [inst
- for ((uuid, t), inst) in __classes.items()
- if t == all_type]
+ __classes_lock.acquire()
+ try:
+ return [inst
+ for ((uuid, t), inst) in __classes.items()
+ if t == all_type]
+ finally:
+ __classes_lock.release()
def get_all_uuid(all_type):
"""
Get all uuids by type
"""
- return [uuid
- for (uuid, t) in __classes.keys()
- if t == all_type]
+ __classes_lock.acquire()
+ try:
+ return [uuid
+ for (uuid, t) in __classes.keys()
+ if t == all_type]
+ finally:
+ __classes_lock.release()
# listening on the bootloader's fifo for the results.
(m1, s1) = pty.openpty()
- tty.setraw(m1);
- fcntl.fcntl(m1, fcntl.F_SETFL, os.O_NDELAY);
- os.close(s1)
+
+ # On Solaris, the pty master side will get cranky if we try
+ # to write to it while there is no slave. To work around this,
+ # keep the slave descriptor open until we're done. Set it
+ # to raw terminal parameters, otherwise it will echo back
+ # characters, which will confuse the I/O loop below.
+ # Furthermore, a raw master pty device has no terminal
+ # semantics on Solaris, so don't try to set any attributes
+ # for it.
+ if os.uname()[0] != 'SunOS' and os.uname()[0] != 'NetBSD':
+ tty.setraw(m1)
+ os.close(s1)
+ else:
+ tty.setraw(s1)
+
+ fcntl.fcntl(m1, fcntl.F_SETFL, os.O_NDELAY)
+
slavename = ptsname.ptsname(m1)
dom.storeDom("console/tty", slavename)
# record that this domain is bootloading
dom.bootloader_pid = child
- tty.setraw(m2);
+ # On Solaris, the master pty side does not have terminal semantics,
+ # so don't try to set any attributes, as it will fail.
+ if os.uname()[0] != 'SunOS':
+ tty.setraw(m2);
+
fcntl.fcntl(m2, fcntl.F_SETFL, os.O_NDELAY);
while True:
try:
if e.errno == errno.EINTR:
continue
break
+
+ fcntl.fcntl(r, fcntl.F_SETFL, os.O_NDELAY);
+
ret = ""
inbuf=""; outbuf="";
+ # filedescriptors:
+ # r - input from the bootloader (bootstring output)
+ # m1 - input/output from/to xenconsole
+ # m2 - input/output from/to pty that controls the bootloader
+ # The filedescriptors are NDELAY, so it's ok to try to read
+ # bigger chunks than may be available, to keep e.g. curses
+ # screen redraws in the bootloader efficient. m1 is the side that
+ # gets xenconsole input, which will be keystrokes, so a small number
+ # is sufficient. m2 is pygrub output, which will be curses screen
+ # updates, so a larger number (1024) is appropriate there.
+ #
+ # For writeable descriptors, only include them in the set for select
+ # if there is actual data to write, otherwise this would loop too fast,
+ # eating up CPU time.
+
while True:
- sel = select.select([r, m1, m2], [m1, m2], [])
+ wsel = []
+ if len(outbuf) != 0:
+ wsel = wsel + [m1]
+ if len(inbuf) != 0:
+ wsel = wsel + [m2]
+ sel = select.select([r, m1, m2], wsel, [])
try:
if m1 in sel[0]:
- s = os.read(m1, 1)
+ s = os.read(m1, 16)
inbuf += s
- if m2 in sel[1] and len(inbuf) != 0:
- os.write(m2, inbuf[0])
- inbuf = inbuf[1:]
+ if m2 in sel[1]:
+ n = os.write(m2, inbuf)
+ inbuf = inbuf[n:]
except OSError, e:
if e.errno == errno.EIO:
pass
try:
if m2 in sel[0]:
- s = os.read(m2, 1)
+ s = os.read(m2, 1024)
outbuf += s
- if m1 in sel[1] and len(outbuf) != 0:
- os.write(m1, outbuf[0])
- outbuf = outbuf[1:]
+ if m1 in sel[1]:
+ n = os.write(m1, outbuf)
+ outbuf = outbuf[n:]
except OSError, e:
if e.errno == errno.EIO:
pass
if r in sel[0]:
- s = os.read(r, 1)
+ s = os.read(r, 128)
ret = ret + s
if len(s) == 0:
break
os.close(r)
os.close(m2)
os.close(m1)
+ if os.uname()[0] == 'SunOS' or os.uname()[0] == 'NetBSD':
+ os.close(s1)
os.unlink(fifo)
# Re-acquire the lock to cover the changes we're about to make
def save(fd, dominfo, network, live, dst, checkpoint=False, node=-1):
+ try:
+ if not os.path.isdir("/var/lib/xen"):
+ os.makedirs("/var/lib/xen")
+ except Exception, exn:
+ log.exception("Can't create directory '/var/lib/xen'")
+ raise XendError("Can't create directory '/var/lib/xen'")
+
write_exact(fd, SIGNATURE, "could not write guest state file: signature")
sxprep = dominfo.sxpr()
if line == "suspend":
log.debug("Suspending %d ...", dominfo.getDomid())
dominfo.shutdown('suspend')
- dominfo.waitForShutdown()
+ dominfo.waitForSuspend()
if line in ('suspend', 'suspended'):
dominfo.migrateDevices(network, dst, DEV_MIGRATE_STEP2,
domain_name)
def restore(xd, fd, dominfo = None, paused = False, relocating = False):
+ try:
+ if not os.path.isdir("/var/lib/xen"):
+ os.makedirs("/var/lib/xen")
+ except Exception, exn:
+ log.exception("Can't create directory '/var/lib/xen'")
+ raise XendError("Can't create directory '/var/lib/xen'")
+
signature = read_exact(fd, len(SIGNATURE),
"not a valid guest state file: signature read")
if signature != SIGNATURE:
# set memory limit
xc.domain_setmaxmem(dominfo.getDomid(), maxmem)
- balloon.free(memory + shadow)
+ balloon.free(memory + shadow, dominfo)
shadow_cur = xc.shadow_mem_control(dominfo.getDomid(), shadow / 1024)
dominfo.info['shadow_memory'] = shadow_cur
'opengl': int,
'soundhw': str,
'stdvga': int,
+ 'videoram': int,
'usb': int,
'usbdevice': str,
'hpet': int,
'vncdisplay': int,
'vnclisten': str,
'timer_mode': int,
+ 'vpt_align': int,
'viridian': int,
'vncpasswd': str,
'vncunused': int,
'guest_os_type': str,
'hap': int,
'xen_extended_power_mgmt': int,
+ 'pci_msitranslate': int,
+ 'pci_power_mgmt': int,
+ 'xen_platform_pci': int,
}
# Xen API console 'other_config' keys.
XENAPI_CONSOLE_OTHER_CFG = ['vncunused', 'vncdisplay', 'vnclisten',
- 'vncpasswd', 'type', 'display', 'xauthority',
+ 'vncpasswd', 'sdl', 'vnc', 'display', 'xauthority',
'keymap', 'opengl']
# List of XendConfig configuration keys that have no direct equivalent
'cpuid_check' : dict,
'machine_address_size': int,
'suppress_spurious_page_faults': bool0,
+ 's3_integrity' : int,
}
# List of legacy configuration keys that have no equivalent in the
'on_xend_start': str,
'online_vcpus': int,
'rtc/timeoffset': str,
+ 'bootloader': str,
+ 'bootloader_args': str,
}
# Values that should be stored in xenstore's /vm/<uuid> that is used
'on_reboot',
'on_xend_start',
'on_xend_stop',
+ 'bootloader',
+ 'bootloader_args',
]
##
def _vcpus_sanity_check(self):
if 'VCPUs_max' in self and 'vcpu_avail' not in self:
self['vcpu_avail'] = (1 << self['VCPUs_max']) - 1
+ if 'online_vcpus' in self:
+ self['VCPUs_live'] = self['online_vcpus']
def _uuid_sanity_check(self):
"""Make sure UUID is in proper string format with hyphens."""
self['platform']['rtc_timeoffset'] = 0
if 'hpet' not in self['platform']:
self['platform']['hpet'] = 0
+ if 'xen_platform_pci' not in self['platform']:
+ self['platform']['xen_platform_pci'] = 1
+ if 'vpt_align' not in self['platform']:
+ self['platform']['vpt_align'] = 1
if 'loader' not in self['platform']:
# Old configs may have hvmloader set as PV_kernel param
if self.has_key('PV_kernel') and self['PV_kernel'] != '':
# add vfb device if it isn't there already
if not self.has_rfb():
dev_config = ['vfb']
- dev_config.append(['type', 'vnc'])
+ dev_config.append(['vnc', '1'])
# copy VNC related params from platform config to vfb dev conf
for key in ['vncpasswd', 'vncunused', 'vncdisplay',
'vnclisten']:
sxpr.append([name, s])
for xenapi, legacy in XENAPI_CFG_TO_LEGACY_CFG.items():
- if legacy in ('cpus'): # skip this
- continue
if self.has_key(xenapi) and self[xenapi] not in (None, []):
if type(self[xenapi]) == bool:
# convert booleans to ints before making an sxp item
return None
return devid
- def device_duplicate_check(self, dev_type, dev_info, defined_config):
+ def device_duplicate_check(self, dev_type, dev_info, defined_config, config):
defined_devices_sxpr = self.all_devices_sxpr(target = defined_config)
if dev_type == 'vbd' or dev_type == 'tap':
if blkdev_file == o_blkdev_file:
raise XendConfigError('The file "%s" is already used' %
blkdev_file)
+ if dev_uname == o_dev_uname:
+ raise XendConfigError('The uname "%s" is already defined' %
+ dev_uname)
o_blkdev_name = sxp.child_value(o_dev_info, 'dev')
o_devid = self._blkdev_name_to_number(o_blkdev_name)
if o_devid != None and devid == o_devid:
+ name_array = blkdev_name.split(':', 2)
+ if len(name_array) == 2 and name_array[1] == 'cdrom':
+ #
+ # Since the device is a cdrom, we are most likely
+ # inserting, changing, or removing a cd. We can
+ # update the old device instead of creating a new
+ # one.
+ #
+ if o_dev_uname != None and dev_uname == None:
+ #
+ # We are removing a cd. We can simply update
+ # the uname on the existing device.
+ #
+ merge_sxp = sxp.from_string("('vbd' ('uname' ''))")
+ else:
+ merge_sxp = config
+
+ dev_uuid = sxp.child_value(o_dev_info, 'uuid')
+ if dev_uuid != None and \
+ self.device_update(dev_uuid, cfg_sxp = merge_sxp):
+ return dev_uuid
+
raise XendConfigError('The device "%s" is already defined' %
blkdev_name)
if dev_mac.lower() == sxp.child_value(o_dev_info, 'mac').lower():
raise XendConfigError('The mac "%s" is already defined' %
dev_mac)
+ return None
def device_add(self, dev_type, cfg_sxp = None, cfg_xenapi = None,
target = None):
'PPCI': ppci_uuid,
'hotplug_slot': pci_dev.get('vslot', 0)
}
+
+ dpci_opts = pci_dev.get('opts')
+ if dpci_opts and len(dpci_opts) > 0:
+ dpci_record['options'] = dpci_opts
+
XendDPCI(dpci_uuid, dpci_record)
target['devices'][pci_devs_uuid] = (dev_type,
uuid.createString())
vscsi_dict = self.vscsi_convert_sxp_to_dict(config)
vscsi_devs = vscsi_dict['devs']
+ vscsi_mode = vscsi_dict['feature-host']
+ vscsi_be = vscsi_dict.get('backend', None)
# create XenAPI DSCSI objects.
for vscsi_dev in vscsi_devs:
}
XendDSCSI(dscsi_uuid, dscsi_record)
- target['devices'][vscsi_devs_uuid] = \
- (dev_type, {'devs': vscsi_devs, 'uuid': vscsi_devs_uuid} )
- log.debug("XendConfig: reading device: %s" % vscsi_devs)
+ vscsi_info = {
+ 'devs': vscsi_devs,
+ 'feature-host': vscsi_mode,
+ 'uuid': vscsi_devs_uuid
+ }
+ if vscsi_be is not None:
+ vscsi_info['backend'] = vscsi_be
+ target['devices'][vscsi_devs_uuid] = (dev_type, vscsi_info)
+ log.debug("XendConfig: reading device: %s,%s" % \
+ (vscsi_devs, vscsi_mode))
return vscsi_devs_uuid
for opt_val in config[1:]:
pass
if dev_type == 'vbd':
- dev_info['bootable'] = 0
if dev_info.get('dev', '').startswith('ioemu:'):
dev_info['driver'] = 'ioemu'
else:
if not dev_info.get('mac'):
dev_info['mac'] = randomMAC()
- self.device_duplicate_check(dev_type, dev_info, target)
+ ret_uuid = self.device_duplicate_check(dev_type, dev_info, target, config)
+ if ret_uuid != None:
+ return ret_uuid
if dev_type == 'vif':
if dev_info.get('policy') and dev_info.get('label'):
if param not in target:
target[param] = []
if dev_uuid not in target[param]:
- if dev_type == 'vbd':
+ if dev_type == 'vbd' and 'bootable' not in dev_info:
# Compat hack -- mark first disk bootable
dev_info['bootable'] = int(not target[param])
target[param].append(dev_uuid)
if 'vbd_refs' not in target:
target['vbd_refs'] = []
if dev_uuid not in target['vbd_refs']:
- # Compat hack -- mark first disk bootable
- dev_info['bootable'] = int(not target['vbd_refs'])
+ if 'bootable' not in dev_info:
+ # Compat hack -- mark first disk bootable
+ dev_info['bootable'] = int(not target['vbd_refs'])
target['vbd_refs'].append(dev_uuid)
elif dev_type == 'vfb':
# collapse other config into devinfo for things
# such as vncpasswd, vncunused, etc.
dev_info.update(console_other_config)
- dev_info['type'] = console_other_config.get('type', 'vnc')
+ dev_info['vnc'] = console_other_config.get('vnc', '0')
+ dev_info['sdl'] = console_other_config.get('sdl', '0')
target['devices'][dev_uuid] = ('vfb', dev_info)
target['console_refs'].append(dev_uuid)
try:
opt, val = opt_val
pci_dev_info[opt] = val
- except TypeError:
+ except (TypeError, ValueError):
pass
# append uuid for each pci device.
dpci_uuid = pci_dev_info.get('uuid', uuid.createString())
#
# [device,
# [vscsi,
+ # [feature-host, 0],
+ # [backend, 0],
# [dev,
# [devid, 0], [p-devname, sdb], [p-dev, 1:0:0:1],
- # [v-dev, 0:0:0:0], [state, Initialising]
+ # [v-dev, 0:0:0:0], [state, 1]
# ],
# [dev,
# [devid, 0], [p-devname, sdc], [p-dev, 1:0:0:2],
- # [v-dev, 0:0:0:1], [satet, Initialising]
+ # [v-dev, 0:0:0:1], [satet, 1]
# ]
# ],
# [vscsi,
+ # [feature-host, 1],
+ # [backend, 0],
# [dev,
# [devid, 1], [p-devname, sdg], [p-dev, 2:0:0:0],
- # [v-dev, 1:0:0:0], [state, Initialising]
+ # [v-dev, 1:0:0:0], [state, 1]
# ],
# [dev,
# [devid, 1], [p-devname, sdh], [p-dev, 2:0:0:1],
- # [v-dev, 1:0:0:1], [satet, Initialising]
+ # [v-dev, 1:0:0:1], [satet, 1]
# ]
# ]
# ]
#
# [device,
# [vscsi,
+ # [feature-host, 0],
+ # [backend, 0],
# [dev,
# [devid, 0], [p-devname, sdd], [p-dev, 1:0:0:3],
- # [v-dev, 0:0:0:2], [state, Initialising]
+ # [v-dev, 0:0:0:2], [state, 1]
# ]
# ]
# ]
#
- # state 'Initialising' indicates that the device is being attached,
- # while state 'Closing' indicates that the device is being detached.
+ # state xenbusState['Initialising'] indicates that the device is
+ # being attached, while state xenbusState['Closing'] indicates
+ # that the device is being detached.
#
# The Dict looks like this:
#
# { devs: [ {devid: 0, p-devname: sdd, p-dev: 1:0:0:3,
- # v-dev: 0:0:0:2, state: Initialising} ] }
+ # v-dev: 0:0:0:2, state: 1} ],
+ # feature-host: 1 , backend: 0 }
dev_config = {}
vscsi_devs.append(vscsi_dev_info)
dev_config['devs'] = vscsi_devs
+ vscsi_mode = sxp.children(dev_sxp, 'feature-host')[0]
+ dev_config['feature-host'] = vscsi_mode[1]
+ try:
+ vscsi_be = sxp.children(dev_sxp, 'backend')[0]
+ dev_config['backend'] = vscsi_be[1]
+ except IndexError:
+ pass
+
return dev_config
def console_add(self, protocol, location, other_config = {}):
'PPCI': ppci_uuid,
'hotplug_slot': pci_dev.get('vslot', 0)
}
+
+ dpci_opts = pci_dev.get('opts')
+ if dpci_opts and len(dpci_opts) > 0:
+ dpci_record['options'] = dpci_opts
+
XendDPCI(dpci_uuid, dpci_record)
self['devices'][dev_uuid] = (dev_type,
if dev_type == 'vscsi': # Special case for vscsi
vscsi_dict = self.vscsi_convert_sxp_to_dict(config)
vscsi_devs = vscsi_dict['devs']
+ vscsi_mode = vscsi_dict['feature-host']
+ vscsi_be = vscsi_dict.get('backend', None)
# destroy existing XenAPI DSCSI objects
+ vscsi_devid = int(dev_info['devs'][0]['devid'])
for dscsi_uuid in XendDSCSI.get_by_VM(self['uuid']):
- XendAPIStore.deregister(dscsi_uuid, "DSCSI")
+ dscsi_inst = XendAPIStore.get(dscsi_uuid, 'DSCSI')
+ if vscsi_devid == dscsi_inst.get_virtual_host():
+ XendAPIStore.deregister(dscsi_uuid, "DSCSI")
# create XenAPI DSCSI objects.
for vscsi_dev in vscsi_devs:
}
XendDSCSI(dscsi_uuid, dscsi_record)
- self['devices'][dev_uuid] = \
- (dev_type, {'devs': vscsi_devs, 'uuid': dev_uuid} )
+ vscsi_info = {
+ 'devs': vscsi_devs,
+ 'feature-host': vscsi_mode,
+ 'uuid': dev_uuid
+ }
+ if vscsi_be is not None:
+ vscsi_info['backend'] = vscsi_be
+ self['devices'][dev_uuid] = (dev_type, vscsi_info)
return True
for opt_val in config[1:]:
def all_devices_sxpr(self, target = None):
"""Returns the SXPR for all devices in the current configuration."""
sxprs = []
- pci_devs = []
if target == None:
target = self
if dev_type == 'pci':
sxpr = ['pci', ['uuid', dev_info['uuid']]]
elif dev_type == 'vscsi':
- sxpr = ['vscsi', ['uuid', dev_info['uuid']]]
+ sxpr = ['vscsi', ['uuid', dev_info['uuid']],
+ ['feature-host', dev_info['feature-host']]]
+ if dev_info.has_key('backend'):
+ sxpr.append(['backend', dev_info['backend']])
for pci_dev_info in dev_info['devs']:
pci_dev_sxpr = ['dev']
for opt, val in pci_dev_info.items():
HVM_PARAM_TIMER_MODE = 10
HVM_PARAM_HPET_ENABLED = 11
HVM_PARAM_ACPI_S_STATE = 14
+HVM_PARAM_VPT_ALIGN = 16
restart_modes = [
"restart",
ZOMBIE_PREFIX = 'Zombie-'
"""Minimum time between domain restarts in seconds."""
-MINIMUM_RESTART_TIME = 20
+MINIMUM_RESTART_TIME = 60
RESTART_IN_PROGRESS = 'xend/restart_in_progress'
DUMPCORE_IN_PROGRESS = 'xend/dumpcore_in_progress'
XS_VMROOT = "/vm/"
+NR_PCI_DEV = 32
+AUTO_PHP_SLOT = NR_PCI_DEV
+AUTO_PHP_SLOT_STR = "%02x" % NR_PCI_DEV
'virtual_name',
'VM',
'PPCI',
- 'hotplug_slot']
+ 'hotplug_slot',
+ 'options']
return XendBase.getAttrRO() + attrRO
def getAttrRW(self):
self.VM = record['VM']
self.PPCI = record['PPCI']
self.hotplug_slot = record['hotplug_slot']
+ if 'options' in record.keys():
+ self.options = record['options']
def destroy(self):
xendom = XendDomain.instance()
def get_hotplug_slot(self):
return self.hotplug_slot
+ def get_options(self):
+ return self.options
from xen.xend.xenstore.xstransact import xstransact
from xen.xend.xenstore.xswatch import xswatch
-from xen.util import mkdir
+from xen.util import mkdir, rwlock
from xen.xend import uuid
xc = xen.lowlevel.xc.xc()
self.managed_domains = {}
self.domains_lock = threading.RLock()
+ self.policy_lock = rwlock.RWLock()
+
# xen api instance vars
# TODO: nothing uses this at the moment
self._allow_new_domains = True
log.exception("Unable to recreate domain")
try:
xc.domain_pause(domid)
- do_FLR(domid)
+ XendDomainInfo.do_FLR(domid)
xc.domain_destroy(domid)
except:
log.exception("Hard destruction of domain failed: %d" %
"""
try:
- return XendCheckpoint.restore(self, fd, paused=paused, relocating=relocating)
- except XendError, e:
- log.exception("Restore failed")
- raise
- except:
- # I don't really want to log this exception here, but the error
- # handling in the relocation-socket handling code (relocate.py) is
- # poor, so we need to log this for debugging.
- log.exception("Restore failed")
- raise XendError("Restore failed")
+ self.policy_lock.acquire_reader()
+
+ try:
+ return XendCheckpoint.restore(self, fd, paused=paused, relocating=relocating)
+ except XendError, e:
+ log.exception("Restore failed")
+ raise
+ except:
+ # I don't really want to log this exception here, but the error
+ # handling in the relocation-socket handling code (relocate.py) is
+ # poor, so we need to log this for debugging.
+ log.exception("Restore failed")
+ raise XendError("Restore failed")
+ finally:
+ self.policy_lock.release()
def domain_unpause(self, domid):
"""Unpause domain execution.
log.exception("domain_pause")
raise XendError(str(ex))
- def domain_dump(self, domid, filename, live, crash):
+ def domain_dump(self, domid, filename=None, live=False, crash=False, reset=False):
"""Dump domain core."""
dominfo = self.domain_lookup_nr(domid)
POWER_STATE_NAMES[DOM_STATE_PAUSED],
POWER_STATE_NAMES[dominfo._stateGet()])
+ dopause = (not live and dominfo._stateGet() == DOM_STATE_RUNNING)
+ if dopause:
+ dominfo.pause()
+
try:
- log.info("Domain core dump requested for domain %s (%d) "
- "live=%d crash=%d.",
- dominfo.getName(), dominfo.getDomid(), live, crash)
- return dominfo.dumpCore(filename)
- except Exception, ex:
- raise XendError(str(ex))
+ try:
+ log.info("Domain core dump requested for domain %s (%d) "
+ "live=%d crash=%d reset=%d.",
+ dominfo.getName(), dominfo.getDomid(), live, crash, reset)
+ dominfo.dumpCore(filename)
+ if crash:
+ self.domain_destroy(domid)
+ elif reset:
+ self.domain_reset(domid)
+ except Exception, ex:
+ raise XendError(str(ex))
+ finally:
+ if dopause and not crash and not reset:
+ dominfo.unpause()
def domain_destroy(self, domid):
"""Terminate domain immediately.
else:
try:
xc.domain_pause(int(domid))
- do_FLR(int(domid))
+ XendDomainInfo.do_FLR(int(domid))
val = xc.domain_destroy(int(domid))
except ValueError:
raise XendInvalidDomain(domid)
from xen.xend.xenstore.xswatch import xswatch
from xen.xend.XendConstants import *
from xen.xend.XendAPIConstants import *
+from xen.xend.server.DevConstants import xenbusState
from xen.xend.XendVMMetrics import XendVMMetrics
log.trace("domain_getinfo(%d) failed, ignoring: %s", dom, str(err))
return None
-def do_FLR(domid):
- from xen.xend.server.pciif import parse_pci_name, PciDevice
+def get_assigned_pci_devices(domid):
+ dev_str_list = []
path = '/local/domain/0/backend/pci/%u/0/' % domid
num_devs = xstransact.Read(path + 'num_devs');
if num_devs is None or num_devs == "":
- return;
-
- num_devs = int(xstransact.Read(path + 'num_devs'));
-
- dev_str_list = []
+ return dev_str_list
+ num_devs = int(num_devs);
for i in range(num_devs):
dev_str = xstransact.Read(path + 'dev-%i' % i)
dev_str_list = dev_str_list + [dev_str]
+ return dev_str_list
+
+def do_FLR(domid):
+ from xen.xend.server.pciif import parse_pci_name, PciDevice
+ dev_str_list = get_assigned_pci_devices(domid)
for dev_str in dev_str_list:
(dom, b, d, f) = parse_pci_name(dev_str)
if state in (DOM_STATE_SUSPENDED, DOM_STATE_HALTED):
try:
self._constructDomain()
+
+ try:
+ self._setCPUAffinity()
+ except:
+ # usually a CPU we want to set affinity to does not exist
+ # we just ignore it so that the domain can still be restored
+ log.warn("Cannot restore CPU affinity")
+
self._storeVmDetails()
self._createChannels()
self._createDevices()
# HVM domain shuts itself down only if it has PV drivers
if self.info.is_hvm():
hvm_pvdrv = xc.hvm_get_param(self.domid, HVM_PARAM_CALLBACK_IRQ)
- if not hvm_pvdrv:
+ hvm_s_state = xc.hvm_get_param(self.domid, HVM_PARAM_ACPI_S_STATE)
+ if not hvm_pvdrv or hvm_s_state != 0:
code = REVERSE_DOMAIN_SHUTDOWN_REASONS[reason]
log.info("HVM save:remote shutdown dom %d!", self.domid)
xc.domain_shutdown(self.domid, code)
" already been assigned to other domain, or maybe"
" it doesn't exist." % (bus, dev, func))
- bdf_str = "%s:%s:%s.%s@%s" % (new_dev['domain'],
+ # Here, we duplicate some checkings (in some cases, we mustn't allow
+ # a device to be hot-plugged into an HVM guest) that are also done in
+ # pci_device_configure()'s self.device_create(dev_sxp) or
+ # dev_control.reconfigureDevice(devid, dev_config).
+ # We must make the checkings before sending the command 'pci-ins' to
+ # ioemu.
+
+ # Test whether the device is owned by pciback. For instance, we can't
+ # hotplug a device being used by Dom0 itself to an HVM guest.
+ from xen.xend.server.pciif import PciDevice, parse_pci_name
+ domain = int(new_dev['domain'],16)
+ bus = int(new_dev['bus'],16)
+ dev = int(new_dev['slot'],16)
+ func = int(new_dev['func'],16)
+ try:
+ pci_device = PciDevice(domain, bus, dev, func)
+ except Exception, e:
+ raise VmError("pci: failed to locate device and "+
+ "parse it's resources - "+str(e))
+ if pci_device.driver!='pciback':
+ raise VmError(("pci: PCI Backend does not own device "+ \
+ "%s\n"+ \
+ "See the pciback.hide kernel "+ \
+ "command-line parameter or\n"+ \
+ "bind your slot/device to the PCI backend using sysfs" \
+ )%(pci_device.name))
+
+ # Check non-page-aligned MMIO BAR.
+ if pci_device.has_non_page_aligned_bar and arch.type != "ia64":
+ raise VmError("pci: %s: non-page-aligned MMIO BAR found." % \
+ pci_device.name)
+
+ # Check the co-assignment.
+ # To pci-attach a device D to domN, we should ensure each of D's
+ # co-assignment devices hasn't been assigned, or has been assigned to
+ # domN.
+ coassignment_list = pci_device.find_coassigned_devices()
+ assigned_pci_device_str_list = get_assigned_pci_devices(self.domid)
+ for pci_str in coassignment_list:
+ (domain, bus, dev, func) = parse_pci_name(pci_str)
+ dev_str = '0x%x,0x%x,0x%x,0x%x' % (domain, bus, dev, func)
+ if xc.test_assign_device(self.domid, dev_str) == 0:
+ continue
+ if not pci_str in assigned_pci_device_str_list:
+ raise VmError(('pci: failed to pci-attach %s to dom%d" + \
+ " because one of its co-assignment device %s has been" + \
+ " assigned to other domain.' \
+ )% (pci_device.name, self.domid, pci_str))
+
+ opts = ''
+ if 'opts' in new_dev and len(new_dev['opts']) > 0:
+ config_opts = new_dev['opts']
+ config_opts = map(lambda (x, y): x+'='+y, config_opts)
+ opts = ',' + reduce(lambda x, y: x+','+y, config_opts)
+
+ bdf_str = "%s:%s:%s.%s%s@%s" % (new_dev['domain'],
new_dev['bus'],
new_dev['slot'],
new_dev['func'],
+ opts,
new_dev['vslt'])
self.image.signalDeviceModel('pci-ins', 'pci-inserted', bdf_str)
if dev_type == 'pci':
for dev in dev_config_dict['devs']:
XendAPIStore.deregister(dev['uuid'], 'DPCI')
- if dev_type == 'vscsi':
+ elif dev_type == 'vscsi':
for dev in dev_config_dict['devs']:
XendAPIStore.deregister(dev['uuid'], 'DSCSI')
elif dev_type == 'tap':
existing_dev_uuid = sxp.child_value(existing_dev_info, 'uuid')
existing_pci_conf = self.info['devices'][existing_dev_uuid][1]
existing_pci_devs = existing_pci_conf['devs']
- vslt = '0x0'
+ vslt = AUTO_PHP_SLOT_STR
for x in existing_pci_devs:
if ( int(x['domain'], 16) == int(dev['domain'], 16) and
int(x['bus'], 16) == int(dev['bus'], 16) and
int(x['func'], 16) == int(dev['func'], 16) ):
vslt = x['vslt']
break
- if vslt == '0x0':
+ if vslt == AUTO_PHP_SLOT_STR:
raise VmError("Device %04x:%02x:%02x.%01x is not connected"
% (int(dev['domain'],16), int(dev['bus'],16),
int(dev['slot'],16), int(dev['func'],16)))
"""Configure an existing vscsi device.
quoted pci funciton
"""
+ def _is_vscsi_defined(dev_info, p_devs = None, v_devs = None):
+ if not dev_info:
+ return False
+ for dev in sxp.children(dev_info, 'dev'):
+ if p_devs is not None:
+ if sxp.child_value(dev, 'p-dev') in p_devs:
+ return True
+ if v_devs is not None:
+ if sxp.child_value(dev, 'v-dev') in v_devs:
+ return True
+ return False
+
+ def _vscsi_be(be):
+ be_xdi = xen.xend.XendDomain.instance().domain_lookup_nr(be)
+ if be_xdi is not None:
+ be_domid = be_xdi.getDomid()
+ if be_domid is not None:
+ return str(be_domid)
+ return str(be)
+
dev_class = sxp.name(dev_sxp)
if dev_class != 'vscsi':
return False
dev_config = self.info.vscsi_convert_sxp_to_dict(dev_sxp)
- dev = dev_config['devs'][0]
- req_devid = int(dev['devid'])
- existing_dev_info = self._getDeviceInfo_vscsi(req_devid, dev['v-dev'])
- state = dev['state']
+ devs = dev_config['devs']
+ v_devs = [d['v-dev'] for d in devs]
+ state = devs[0]['state']
+ req_devid = int(devs[0]['devid'])
+ cur_dev_sxp = self._getDeviceInfo_vscsi(req_devid)
- if state == 'Initialising':
+ if state == xenbusState['Initialising']:
# new create
# If request devid does not exist, create and exit.
- if existing_dev_info is None:
+ p_devs = [d['p-dev'] for d in devs]
+ for dev_type, dev_info in self.info.all_devices_sxpr():
+ if dev_type != 'vscsi':
+ continue
+ if _is_vscsi_defined(dev_info, p_devs = p_devs):
+ raise XendError('The physical device "%s" is already defined' % \
+ p_devs[0])
+ if cur_dev_sxp is None:
self.device_create(dev_sxp)
return True
- elif existing_dev_info == "exists":
- raise XendError("The virtual device %s is already defined" % dev['v-dev'])
- elif state == 'Closing':
- if existing_dev_info is None:
+ if _is_vscsi_defined(cur_dev_sxp, v_devs = v_devs):
+ raise XendError('The virtual device "%s" is already defined' % \
+ v_devs[0])
+
+ if int(dev_config['feature-host']) != \
+ int(sxp.child_value(cur_dev_sxp, 'feature-host')):
+ raise XendError('The physical device "%s" cannot define '
+ 'because mode is different' % devs[0]['p-dev'])
+
+ new_be = dev_config.get('backend', None)
+ if new_be is not None:
+ cur_be = sxp.child_value(cur_dev_sxp, 'backend', None)
+ if cur_be is None:
+ cur_be = xen.xend.XendDomain.DOM0_ID
+ new_be_dom = _vscsi_be(new_be)
+ cur_be_dom = _vscsi_be(cur_be)
+ if new_be_dom != cur_be_dom:
+ raise XendError('The physical device "%s" cannot define '
+ 'because backend is different' % devs[0]['p-dev'])
+
+ elif state == xenbusState['Closing']:
+ if not _is_vscsi_defined(cur_dev_sxp, v_devs = v_devs):
raise XendError("Cannot detach vscsi device does not exist")
- # use DevController.reconfigureDevice to change device config
- dev_control = self.getDeviceController(dev_class)
- dev_uuid = dev_control.reconfigureDevice(req_devid, dev_config)
- dev_control.waitForDevice_reconfigure(req_devid)
- num_devs = dev_control.cleanupDevice(req_devid)
+ if self.domid is not None:
+ # use DevController.reconfigureDevice to change device config
+ dev_control = self.getDeviceController(dev_class)
+ dev_uuid = dev_control.reconfigureDevice(req_devid, dev_config)
+ dev_control.waitForDevice_reconfigure(req_devid)
+ num_devs = dev_control.cleanupDevice(req_devid)
+
+ # update XendConfig with new device info
+ if dev_uuid:
+ new_dev_sxp = dev_control.configuration(req_devid)
+ self.info.device_update(dev_uuid, new_dev_sxp)
+
+ # If there is no device left, destroy vscsi and remove config.
+ if num_devs == 0:
+ self.destroyDevice('vscsi', req_devid)
+ del self.info['devices'][dev_uuid]
- # update XendConfig with new device info
- if dev_uuid:
- new_dev_sxp = dev_control.configuration(req_devid)
+ else:
+ new_dev_sxp = ['vscsi']
+ cur_mode = sxp.children(cur_dev_sxp, 'feature-host')[0]
+ new_dev_sxp.append(cur_mode)
+ try:
+ cur_be = sxp.children(cur_dev_sxp, 'backend')[0]
+ new_dev_sxp.append(cur_be)
+ except IndexError:
+ pass
+
+ for cur_dev in sxp.children(cur_dev_sxp, 'dev'):
+ if state == xenbusState['Closing']:
+ if int(cur_mode[1]) == 1:
+ continue
+ if sxp.child_value(cur_dev, 'v-dev') in v_devs:
+ continue
+ new_dev_sxp.append(cur_dev)
+
+ if state == xenbusState['Initialising']:
+ for new_dev in sxp.children(dev_sxp, 'dev'):
+ new_dev_sxp.append(new_dev)
+
+ dev_uuid = sxp.child_value(cur_dev_sxp, 'uuid')
self.info.device_update(dev_uuid, new_dev_sxp)
- # If there is no device left, destroy vscsi and remove config.
- if num_devs == 0:
- self.destroyDevice('vscsi', req_devid)
- del self.info['devices'][dev_uuid]
+ # If there is only 'vscsi' in new_dev_sxp, remove the config.
+ if len(sxp.children(new_dev_sxp, 'dev')) == 0:
+ del self.info['devices'][dev_uuid]
+
+ xen.xend.XendDomain.instance().managed_config_save(self)
return True
if vslot == 0:
raise VmError("Device @ vslot 0x%x do not support hotplug." % (vslot))
+ # Check the co-assignment.
+ # To pci-detach a device D from domN, we should ensure: for each DD in the
+ # list of D's co-assignment devices, DD is not assigned (to domN).
+ #
+ from xen.xend.server.pciif import PciDevice
+ domain = int(x['domain'],16)
+ bus = int(x['bus'],16)
+ dev = int(x['slot'],16)
+ func = int(x['func'],16)
+ try:
+ pci_device = PciDevice(domain, bus, dev, func)
+ except Exception, e:
+ raise VmError("pci: failed to locate device and "+
+ "parse it's resources - "+str(e))
+ coassignment_list = pci_device.find_coassigned_devices()
+ coassignment_list.remove(pci_device.name)
+ assigned_pci_device_str_list = get_assigned_pci_devices(self.domid)
+ for pci_str in coassignment_list:
+ if pci_str in assigned_pci_device_str_list:
+ raise VmError(('pci: failed to pci-detach %s from dom%d" + \
+ " because one of its co-assignment device %s is still " + \
+ " assigned to the domain.' \
+ )% (pci_device.name, self.domid, pci_str))
+
+
bdf_str = "%s:%s:%s.%s" % (x['domain'], x['bus'], x['slot'], x['func'])
log.info("hvm_destroyPCIDevice:%s:%s!", x, bdf_str)
sxprs = []
dev_num = 0
for dev_type, dev_info in self.info.all_devices_sxpr():
- if dev_type == deviceClass:
+ if (deviceClass == 'vbd' and dev_type not in ['vbd', 'tap']) or \
+ (deviceClass != 'vbd' and dev_type != deviceClass):
+ continue
+
+ if deviceClass == 'vscsi':
+ vscsi_devs = ['devs', []]
+ for vscsi_dev in sxp.children(dev_info, 'dev'):
+ vscsi_dev.append(['frontstate', None])
+ vscsi_devs[1].append(vscsi_dev)
+ dev_num = int(sxp.child_value(vscsi_dev, 'devid'))
+ vscsi_mode = sxp.children(dev_info, 'feature-host')[0]
+ sxprs.append([dev_num, [vscsi_devs, vscsi_mode]])
+ elif deviceClass == 'vbd':
+ dev = sxp.child_value(dev_info, 'dev')
+ if 'ioemu:' in dev:
+ (_, dev) = dev.split(':', 1)
+ try:
+ (dev_name, _) = dev.split(':', 1) # Remove ":disk" or ":cdrom"
+ except ValueError:
+ dev_name = dev
+ dev_num = self.getDeviceController('vbd').convertToDeviceNumber(dev_name)
+ sxprs.append([dev_num, dev_info])
+ else:
sxprs.append([dev_num, dev_info])
dev_num += 1
return sxprs
return dev_info
return None
- def _getDeviceInfo_vscsi(self, devid, vdev):
+ def _getDeviceInfo_vscsi(self, devid):
devid = int(devid)
for dev_type, dev_info in self.info.all_devices_sxpr():
if dev_type != 'vscsi':
continue
- existing_dev_uuid = sxp.child_value(dev_info, 'uuid')
- existing_conf = self.info['devices'][existing_dev_uuid][1]
- existing_dev = existing_conf['devs'][0]
- existing_devid = int(existing_dev['devid'])
- existing_vdev = existing_dev['v-dev']
-
- if vdev == existing_vdev:
- return "exists"
-
- if devid == existing_devid:
+ devs = sxp.children(dev_info, 'dev')
+ if devid == int(sxp.child_value(devs[0], 'devid')):
return dev_info
-
return None
def setMemoryTarget(self, target):
self.info['name_label'], str(self.domid), target)
MiB = 1024 * 1024
+ memory_cur = self.get_memory_dynamic_max() / MiB
if self.domid == 0:
dom0_min_mem = xoptions.get_dom0_min_mem()
- memory_cur = self.get_memory_dynamic_max() / MiB
if target < memory_cur and dom0_min_mem > target:
raise XendError("memory_dynamic_max too small")
self._safe_set_memory('memory_dynamic_max', target * MiB)
if self.domid >= 0:
+ if target > memory_cur:
+ balloon.free((target - memory_cur) * 1024, self)
self.storeVm("memory", target)
self.storeDom("memory/target", target << 10)
+ xc.domain_set_target_mem(self.domid,
+ (target * 1024))
xen.xend.XendDomain.instance().managed_config_save(self)
def setMemoryMaximum(self, limit):
for dev_uuid, (dev_type, dev_info) in self.info['devices'].items():
if dev_type == 'vfb':
old_location = dev_info.get('location')
- listen_host = dev_info.get('vnclisten', 'localhost')
+ listen_host = dev_info.get('vnclisten', \
+ XendOptions.instance().get_vnclisten_address())
new_location = '%s:%s' % (listen_host, str(vnc_port))
if old_location == new_location:
break
t.mkdir()
t.set_permissions({'dom' : self.domid, 'read' : True})
t.write('vm', self.vmpath)
- for i in [ 'device', 'control', 'error', 'memory' ]:
+ # NB. Solaris guests use guest/ and hvmpv/ xenstore directories
+ for i in [ 'device', 'control', 'error', 'memory', 'guest', 'hvmpv' ]:
t.mkdir(i)
t.set_permissions(i, {'dom' : self.domid})
return self.info['VCPUs_max']
def setVCpuCount(self, vcpus):
- if vcpus <= 0:
- raise XendError('Invalid VCPUs')
+ def vcpus_valid(n):
+ if vcpus <= 0:
+ raise XendError('Zero or less VCPUs is invalid')
+ if self.domid >= 0 and vcpus > self.info['VCPUs_max']:
+ raise XendError('Cannot set vcpus greater than max vcpus on running domain')
+ vcpus_valid(vcpus)
self.info['vcpu_avail'] = (1 << vcpus) - 1
if self.domid >= 0:
self.storeVm('vcpu_avail', self.info['vcpu_avail'])
- # update dom differently depending on whether we are adjusting
- # vcpu number up or down, otherwise _vcpuDomDetails does not
- # disable the vcpus
- if self.info['VCPUs_max'] > vcpus:
- # decreasing
- self._writeDom(self._vcpuDomDetails())
- self.info['VCPUs_live'] = vcpus
- else:
- # same or increasing
- self.info['VCPUs_live'] = vcpus
- self._writeDom(self._vcpuDomDetails())
+ self._writeDom(self._vcpuDomDetails())
+ self.info['VCPUs_live'] = vcpus
else:
if self.info['VCPUs_max'] > vcpus:
# decreasing
for c in range(self.info['VCPUs_max'], vcpus):
self.info['cpus'].append(list())
self.info['VCPUs_max'] = vcpus
- xen.xend.XendDomain.instance().managed_config_save(self)
+ xen.xend.XendDomain.instance().managed_config_save(self)
log.info("Set VCPU count on domain %s to %d", self.info['name_label'],
vcpus)
@raise: XendError if core dumping failed.
"""
- try:
- if not corefile:
- this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime())
- corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time,
- self.info['name_label'], self.domid)
+ if not corefile:
+ this_time = time.strftime("%Y-%m%d-%H%M.%S", time.localtime())
+ corefile = "/var/xen/dump/%s-%s.%s.core" % (this_time,
+ self.info['name_label'], self.domid)
- if os.path.isdir(corefile):
- raise XendError("Cannot dump core in a directory: %s" %
- corefile)
-
- self._writeVm(DUMPCORE_IN_PROGRESS, 'True')
- xc.domain_dumpcore(self.domid, corefile)
- self._removeVm(DUMPCORE_IN_PROGRESS)
- except RuntimeError, ex:
- corefile_incomp = corefile+'-incomplete'
- os.rename(corefile, corefile_incomp)
+ if os.path.isdir(corefile):
+ raise XendError("Cannot dump core in a directory: %s" %
+ corefile)
+
+ try:
+ try:
+ self._writeVm(DUMPCORE_IN_PROGRESS, 'True')
+ xc.domain_dumpcore(self.domid, corefile)
+ except RuntimeError, ex:
+ corefile_incomp = corefile+'-incomplete'
+ try:
+ os.rename(corefile, corefile_incomp)
+ except:
+ pass
+
+ log.error("core dump failed: id = %s name = %s: %s",
+ self.domid, self.info['name_label'], str(ex))
+ raise XendError("Failed to dump core: %s" % str(ex))
+ finally:
self._removeVm(DUMPCORE_IN_PROGRESS)
- log.exception("XendDomainInfo.dumpCore failed: id = %s name = %s",
- self.domid, self.info['name_label'])
- raise XendError("Failed to dump core: %s" % str(ex))
#
# Device creation/deletion functions
for devclass in XendDevices.valid_devices():
for dev in t.list(devclass):
try:
+ true_devclass = devclass
+ if devclass == 'vbd':
+ # In the case of "vbd", the true device class
+ # may possibly be "tap". Just in case, verify
+ # device class.
+ devid = dev.split('/')[-1]
+ true_devclass = self.getBlockDeviceClass(devid)
log.debug("Removing %s", dev);
- self.destroyDevice(devclass, dev, False);
+ self.destroyDevice(true_devclass, dev, False);
except:
# Log and swallow any exceptions in removal --
# there's nothing more we can do.
log.exception("Device release failed: %s; %s; %s",
- self.info['name_label'], devclass, dev)
+ self.info['name_label'],
+ true_devclass, dev)
finally:
t.abort()
# overhead is greater for some types of domain than others. For
# example, an x86 HVM domain will have a default shadow-pagetable
# allocation of 1MB. We free up 2MB here to be on the safe side.
- balloon.free(2*1024) # 2MB should be plenty
+ balloon.free(2*1024, self) # 2MB should be plenty
ssidref = 0
if security.on() == xsconstants.XS_POLICY_USE:
if security.has_authorization(ssidref) == False:
raise VmError("VM is not authorized to run.")
+ s3_integrity = 0
+ if self.info.has_key('s3_integrity'):
+ s3_integrity = self.info['s3_integrity']
+ flags = (int(hvm) << 0) | (int(hap) << 1) | (int(s3_integrity) << 2)
+
try:
self.domid = xc.domain_create(
domid = 0,
ssidref = ssidref,
handle = uuid.fromString(self.info['uuid']),
- flags = (int(hvm) << 0) | (int(hap) << 1),
+ flags = flags,
target = self.info.target())
except Exception, e:
# may get here if due to ACM the operation is not permitted
xc.hvm_set_param(self.domid, HVM_PARAM_HPET_ENABLED,
long(hpet))
+ # Optionally enable periodic vpt aligning
+ vpt_align = self.info["platform"].get("vpt_align")
+ if hvm and vpt_align is not None:
+ xc.hvm_set_param(self.domid, HVM_PARAM_VPT_ALIGN,
+ long(vpt_align))
+
# Set maximum number of vcpus in domain
xc.domain_max_vcpus(self.domid, int(self.info['VCPUs_max']))
# Test whether the devices can be assigned with VT-d
- pci_str = str(self.info["platform"].get("pci"))
+ pci = self.info["platform"].get("pci")
+ pci_str = ''
+ if pci and len(pci) > 0:
+ pci = map(lambda x: x[0:4], pci) # strip options
+ pci_str = str(pci)
if hvm and pci_str:
bdf = xc.test_assign_device(self.domid, pci_str)
if bdf != 0:
raise XendError(str(exn))
+ def _setCPUAffinity(self):
+ """ Repin domain vcpus if a restricted cpus list is provided
+ """
+
+ def has_cpus():
+ if self.info['cpus'] is not None:
+ for c in self.info['cpus']:
+ if c:
+ return True
+ return False
+
+ if has_cpus():
+ for v in range(0, self.info['VCPUs_max']):
+ if self.info['cpus'][v]:
+ xc.vcpu_setaffinity(self.domid, v, self.info['cpus'][v])
+ else:
+ def find_relaxed_node(node_list):
+ import sys
+ nr_nodes = info['nr_nodes']
+ if node_list is None:
+ node_list = range(0, nr_nodes)
+ nodeload = [0]
+ nodeload = nodeload * nr_nodes
+ from xen.xend import XendDomain
+ doms = XendDomain.instance().list('all')
+ for dom in filter (lambda d: d.domid != self.domid, doms):
+ cpuinfo = dom.getVCPUInfo()
+ for vcpu in sxp.children(cpuinfo, 'vcpu'):
+ if sxp.child_value(vcpu, 'online') == 0: continue
+ cpumap = list(sxp.child_value(vcpu,'cpumap'))
+ for i in range(0, nr_nodes):
+ node_cpumask = info['node_to_cpu'][i]
+ for j in node_cpumask:
+ if j in cpumap:
+ nodeload[i] += 1
+ break
+ for i in range(0, nr_nodes):
+ if len(info['node_to_cpu'][i]) > 0 and i in node_list:
+ nodeload[i] = int(nodeload[i] * 16 / len(info['node_to_cpu'][i]))
+ else:
+ nodeload[i] = sys.maxint
+ index = nodeload.index( min(nodeload) )
+ return index
+
+ info = xc.physinfo()
+ if info['nr_nodes'] > 1:
+ node_memory_list = info['node_to_memory']
+ needmem = self.image.getRequiredAvailableMemory(self.info['memory_dynamic_max']) / 1024
+ candidate_node_list = []
+ for i in range(0, info['nr_nodes']):
+ if node_memory_list[i] >= needmem and len(info['node_to_cpu'][i]) > 0:
+ candidate_node_list.append(i)
+ index = find_relaxed_node(candidate_node_list)
+ cpumask = info['node_to_cpu'][index]
+ for v in range(0, self.info['VCPUs_max']):
+ xc.vcpu_setaffinity(self.domid, v, cpumask)
+
+
def _initDomain(self):
log.debug('XendDomainInfo.initDomain: %s %s',
self.domid,
# repin domain vcpus if a restricted cpus list is provided
# this is done prior to memory allocation to aide in memory
# distribution for NUMA systems.
- def has_cpus():
- if self.info['cpus'] is not None:
- for c in self.info['cpus']:
- if c:
- return True
- return False
-
- if has_cpus():
- for v in range(0, self.info['VCPUs_max']):
- if self.info['cpus'][v]:
- xc.vcpu_setaffinity(self.domid, v, self.info['cpus'][v])
- else:
- def find_relaxed_node(node_list):
- import sys
- nr_nodes = info['nr_nodes']
- if node_list is None:
- node_list = range(0, nr_nodes)
- nodeload = [0]
- nodeload = nodeload * nr_nodes
- from xen.xend import XendDomain
- doms = XendDomain.instance().list('all')
- for dom in filter (lambda d: d.domid != self.domid, doms):
- cpuinfo = dom.getVCPUInfo()
- for vcpu in sxp.children(cpuinfo, 'vcpu'):
- if sxp.child_value(vcpu, 'online') == 0: continue
- cpumap = list(sxp.child_value(vcpu,'cpumap'))
- for i in range(0, nr_nodes):
- node_cpumask = info['node_to_cpu'][i]
- for j in node_cpumask:
- if j in cpumap:
- nodeload[i] += 1
- break
- for i in range(0, nr_nodes):
- if len(info['node_to_cpu'][i]) > 0 and i in node_list:
- nodeload[i] = int(nodeload[i] * 16 / len(info['node_to_cpu'][i]))
- else:
- nodeload[i] = sys.maxint
- index = nodeload.index( min(nodeload) )
- return index
-
- info = xc.physinfo()
- if info['nr_nodes'] > 1:
- node_memory_list = info['node_to_memory']
- needmem = self.image.getRequiredAvailableMemory(self.info['memory_dynamic_max']) / 1024
- candidate_node_list = []
- for i in range(0, info['nr_nodes']):
- if node_memory_list[i] >= needmem and len(info['node_to_cpu'][i]) > 0:
- candidate_node_list.append(i)
- index = find_relaxed_node(candidate_node_list)
- cpumask = info['node_to_cpu'][index]
- for v in range(0, self.info['VCPUs_max']):
- xc.vcpu_setaffinity(self.domid, v, cpumask)
+ self._setCPUAffinity()
# Use architecture- and image-specific calculations to determine
# the various headrooms necessary, given the raw configured
vtd_mem = ((vtd_mem + 1023) / 1024) * 1024
# Make sure there's enough RAM available for the domain
- balloon.free(memory + shadow + vtd_mem)
+ balloon.free(memory + shadow + vtd_mem, self)
# Set up the shadow memory
shadow_cur = xc.shadow_mem_control(self.domid, shadow / 1024)
finally:
self.state_updated.release()
+ def waitForSuspend(self):
+ """Wait for the guest to respond to a suspend request by
+ shutting down. If the guest hasn't re-written control/shutdown
+ after a certain amount of time, it's obviously not listening and
+ won't suspend, so we give up. HVM guests with no PV drivers
+ should already be shutdown.
+ """
+ state = "suspend"
+ nr_tries = 60
+
+ self.state_updated.acquire()
+ try:
+ while self._stateGet() in (DOM_STATE_RUNNING,DOM_STATE_PAUSED):
+ self.state_updated.wait(1.0)
+ if state == "suspend":
+ if nr_tries == 0:
+ msg = ('Timeout waiting for domain %s to suspend'
+ % self.domid)
+ self._writeDom('control/shutdown', '')
+ raise XendError(msg)
+ state = self.readDom('control/shutdown')
+ nr_tries -= 1
+ finally:
+ self.state_updated.release()
+
#
# TODO: recategorise - called from XendCheckpoint
#
time.sleep(2)
for paths in plist:
if paths.find('backend') != -1:
- from xen.xend.server import DevController
# Modify online status /before/ updating state (latter is watched by
# drivers, so this ordering avoids a race).
xstransact.Write(paths, 'online', "0")
- xstransact.Write(paths, 'state', str(DevController.xenbusState['Closing']))
+ xstransact.Write(paths, 'state', str(xenbusState['Closing']))
# force
xstransact.Remove(paths)
# The domain might already have some shadow memory
overhead_kb -= xc.shadow_mem_control(self.domid) * 1024
if overhead_kb > 0:
- balloon.free(overhead_kb)
+ balloon.free(overhead_kb, self)
def _unwatchVm(self):
"""Remove the watch on the VM path, if any. Idempotent. Nothrow
while True:
test = 0
diff = time.time() - start
- for i in self.getDeviceController('vbd').deviceIDs():
+ vbds = self.getDeviceController('vbd').deviceIDs()
+ taps = self.getDeviceController('tap').deviceIDs()
+ for i in vbds + taps:
test = 1
log.info("Dev %s still active, looping...", i)
time.sleep(0.1)
if not xspol:
xspol = poladmin.get_policy_by_name(policy)
- if state in [ DOM_STATE_RUNNING, DOM_STATE_PAUSED ]:
- #if domain is running or paused try to relabel in hypervisor
- if not xspol:
- return (-xsconstants.XSERR_POLICY_NOT_LOADED, "", "", 0)
-
- if typ != xspol.get_type_name() or \
- policy != xspol.get_name():
- return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
+ try:
+ xen.xend.XendDomain.instance().policy_lock.acquire_writer()
- if typ == xsconstants.ACM_POLICY_ID:
- new_ssidref = xspol.vmlabel_to_ssidref(label)
- if new_ssidref == xsconstants.INVALID_SSIDREF:
- return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
+ if state in [ DOM_STATE_RUNNING, DOM_STATE_PAUSED ]:
+ #if domain is running or paused try to relabel in hypervisor
+ if not xspol:
+ return (-xsconstants.XSERR_POLICY_NOT_LOADED, "", "", 0)
- # Check that all used resources are accessible under the
- # new label
- if not is_policy_update and \
- not security.resources_compatible_with_vmlabel(xspol,
- self, label):
+ if typ != xspol.get_type_name() or \
+ policy != xspol.get_name():
return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
- #Check label against expected one. Can only do this
- # if the policy hasn't changed underneath in the meantime
- if xspol_old == None:
- old_label = self.get_security_label()
- if old_label != old_seclab:
- log.info("old_label != old_seclab: %s != %s" %
- (old_label, old_seclab))
+ if typ == xsconstants.ACM_POLICY_ID:
+ new_ssidref = xspol.vmlabel_to_ssidref(label)
+ if new_ssidref == xsconstants.INVALID_SSIDREF:
return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
- # relabel domain in the hypervisor
- rc, errors = security.relabel_domains([[domid, new_ssidref]])
- log.info("rc from relabeling in HV: %d" % rc)
- else:
- return (-xsconstants.XSERR_POLICY_TYPE_UNSUPPORTED, "", "", 0)
+ # Check that all used resources are accessible under the
+ # new label
+ if not is_policy_update and \
+ not security.resources_compatible_with_vmlabel(xspol,
+ self, label):
+ return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
- if rc == 0:
- # HALTED, RUNNING or PAUSED
- if domid == 0:
- if xspol:
- self.info['security_label'] = seclab
- ssidref = poladmin.set_domain0_bootlabel(xspol, label)
+ #Check label against expected one. Can only do this
+ # if the policy hasn't changed underneath in the meantime
+ if xspol_old == None:
+ old_label = self.get_security_label()
+ if old_label != old_seclab:
+ log.info("old_label != old_seclab: %s != %s" %
+ (old_label, old_seclab))
+ return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
+
+ # relabel domain in the hypervisor
+ rc, errors = security.relabel_domains([[domid, new_ssidref]])
+ log.info("rc from relabeling in HV: %d" % rc)
else:
- return (-xsconstants.XSERR_POLICY_NOT_LOADED, "", "", 0)
- else:
- if self.info.has_key('security_label'):
- old_label = self.info['security_label']
- # Check label against expected one, unless wildcard
- if old_label != old_seclab:
- return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
+ return (-xsconstants.XSERR_POLICY_TYPE_UNSUPPORTED, "", "", 0)
+
+ if rc == 0:
+ # HALTED, RUNNING or PAUSED
+ if domid == 0:
+ if xspol:
+ self.info['security_label'] = seclab
+ ssidref = poladmin.set_domain0_bootlabel(xspol, label)
+ else:
+ return (-xsconstants.XSERR_POLICY_NOT_LOADED, "", "", 0)
+ else:
+ if self.info.has_key('security_label'):
+ old_label = self.info['security_label']
+ # Check label against expected one, unless wildcard
+ if old_label != old_seclab:
+ return (-xsconstants.XSERR_BAD_LABEL, "", "", 0)
- self.info['security_label'] = seclab
+ self.info['security_label'] = seclab
- try:
- xen.xend.XendDomain.instance().managed_config_save(self)
- except:
- pass
- return (rc, errors, old_label, new_ssidref)
+ try:
+ xen.xend.XendDomain.instance().managed_config_save(self)
+ except:
+ pass
+ return (rc, errors, old_label, new_ssidref)
+ finally:
+ xen.xend.XendDomain.instance().policy_lock.release()
def get_on_shutdown(self):
after_shutdown = self.info.get('actions_after_shutdown')
dpci_uuid = uuid.createString()
+ dpci_opts = []
+ opts_dict = xenapi_pci.get('options')
+ for k in opts_dict.keys():
+ dpci_opts.append([k, opts_dict[k]])
+
# Convert xenapi to sxp
ppci = XendAPIStore.get(xenapi_pci.get('PPCI'), 'PPCI')
['slot', '0x%02x' % ppci.get_slot()],
['func', '0x%1x' % ppci.get_func()],
['vslt', '0x%02x' % xenapi_pci.get('hotplug_slot')],
+ ['opts', dpci_opts],
['uuid', dpci_uuid]
],
['state', 'Initialising']
['p-devname', pscsi.get_dev_name()],
['p-dev', pscsi.get_physical_HCTL()],
['v-dev', xenapi_dscsi.get('virtual_HCTL')],
- ['state', 'Initialising'],
+ ['state', xenbusState['Initialising']],
['uuid', dscsi_uuid]
- ]
+ ],
+ ['feature-host', 0]
]
if self._stateGet() != XEN_API_VM_POWER_STATE_RUNNING:
- cur_vscsi_sxp = self._getDeviceInfo_vscsi(devid, None)
+ cur_vscsi_sxp = self._getDeviceInfo_vscsi(devid)
if cur_vscsi_sxp is None:
dev_uuid = self.info.device_add('vscsi', cfg_sxp = target_vscsi_sxp)
raise XendError('Failed to create device')
else:
- new_vscsi_sxp = ['vscsi']
+ new_vscsi_sxp = ['vscsi', ['feature-host', 0]]
for existing_dev in sxp.children(cur_vscsi_sxp, 'dev'):
new_vscsi_sxp.append(existing_dev)
new_vscsi_sxp.append(sxp.child0(target_vscsi_sxp, 'dev'))
dscsi = XendAPIStore.get(dev_uuid, 'DSCSI')
devid = dscsi.get_virtual_host()
vHCTL = dscsi.get_virtual_HCTL()
- cur_vscsi_sxp = self._getDeviceInfo_vscsi(devid, None)
+ cur_vscsi_sxp = self._getDeviceInfo_vscsi(devid)
dev_uuid = sxp.child_value(cur_vscsi_sxp, 'uuid')
target_dev = None
- new_vscsi_sxp = ['vscsi']
+ new_vscsi_sxp = ['vscsi', ['feature-host', 0]]
for dev in sxp.children(cur_vscsi_sxp, 'dev'):
if vHCTL == sxp.child_value(dev, 'v-dev'):
target_dev = dev
if target_dev is None:
raise XendError('Failed to destroy device')
- target_dev.append(['state', 'Closing'])
- target_vscsi_sxp = ['vscsi', target_dev]
+ target_dev.append(['state', xenbusState['Closing']])
+ target_vscsi_sxp = ['vscsi', target_dev, ['feature-host', 0]]
if self._stateGet() != XEN_API_VM_POWER_STATE_RUNNING:
import os
import socket
+import time
import xen.lowlevel.xc
from xen.util import Brctl
self.srs = {}
+ self._init_networks()
+ self._init_PIFs()
+
+ self._init_SRs()
+ self._init_PBDs()
+
+ self._init_PPCIs()
+
+ self._init_PSCSIs()
+
+
+ def _init_networks(self):
# Initialise networks
# First configure ones off disk
saved_networks = self.state_store.load_state('network')
# Next discover any existing bridges and check
# they are not already configured
- bridges = Brctl.get_state().keys()
+
+ # 'tmpbridge' is a temporary bridge created by network-bridge script.
+ # Wait a couple of seconds for it to be renamed.
+ for i in xrange(20):
+ bridges = Brctl.get_state().keys()
+ if 'tmpbridge' in bridges:
+ time.sleep(0.1)
+ else:
+ break
+
configured_bridges = [XendAPIStore.get(
network_uuid, "network")
.get_name_label()
for bridge in bridges
if bridge not in configured_bridges]
for unconfigured_bridge in unconfigured_bridges:
- XendNetwork.create_phy(unconfigured_bridge)
+ if unconfigured_bridge != 'tmpbridge':
+ XendNetwork.create_phy(unconfigured_bridge)
+ def _init_PIFs(self):
# Initialise PIFs
# First configure ones off disk
saved_pifs = self.state_store.load_state('pif')
log.debug("Cannot find network for bridge %s "
"when configuring PIF %s",
(bridge_name, name))
-
+
+ def _init_SRs(self):
# initialise storage
saved_srs = self.state_store.load_state('sr')
if saved_srs:
qcow_sr_uuid = uuid.createString()
self.srs[qcow_sr_uuid] = XendQCoWStorageRepo(qcow_sr_uuid)
+ def _init_PBDs(self):
saved_pbds = self.state_store.load_state('pbd')
if saved_pbds:
for pbd_uuid, pbd_cfg in saved_pbds.items():
except CreateUnspecifiedAttributeError:
log.warn("Error recreating PBD %s", pbd_uuid)
-
- # Initialise PPCIs
+ def _init_PPCIs(self):
saved_ppcis = self.state_store.load_state('ppci')
saved_ppci_table = {}
if saved_ppcis:
ppci_uuid = saved_ppci_table.get(pci_dev.name, uuid.createString())
XendPPCI(ppci_uuid, ppci_record)
-
+ def _init_PSCSIs(self):
# Initialise PSCSIs
saved_pscsis = self.state_store.load_state('pscsi')
saved_pscsi_table = {}
XendPSCSI(pscsi_uuid, pscsi_record)
+ def add_network(self, interface):
+ # TODO
+ log.debug("add_network(): Not implemented.")
+
+
+ def remove_network(self, interface):
+ # TODO
+ log.debug("remove_network(): Not implemented.")
+
+
+ def add_PPCI(self, pci_name):
+ # Update lspci info
+ PciUtil.create_lspci_info()
+
+ # Initialise the PPCI
+ saved_ppcis = self.state_store.load_state('ppci')
+ saved_ppci_table = {}
+ if saved_ppcis:
+ for ppci_uuid, ppci_record in saved_ppcis.items():
+ try:
+ saved_ppci_table[ppci_record['name']] = ppci_uuid
+ except KeyError:
+ pass
+
+ (domain, bus, slot, func) = PciUtil.parse_pci_name(pci_name)
+ pci_dev = PciUtil.PciDevice(domain, bus, slot, func)
+ ppci_record = {
+ 'domain': pci_dev.domain,
+ 'bus': pci_dev.bus,
+ 'slot': pci_dev.slot,
+ 'func': pci_dev.func,
+ 'vendor_id': pci_dev.vendor,
+ 'vendor_name': pci_dev.vendorname,
+ 'device_id': pci_dev.device,
+ 'device_name': pci_dev.devicename,
+ 'revision_id': pci_dev.revision,
+ 'class_code': pci_dev.classcode,
+ 'class_name': pci_dev.classname,
+ 'subsystem_vendor_id': pci_dev.subvendor,
+ 'subsystem_vendor_name': pci_dev.subvendorname,
+ 'subsystem_id': pci_dev.subdevice,
+ 'subsystem_name': pci_dev.subdevicename,
+ 'driver': pci_dev.driver
+ }
+ # If saved uuid exists, use it. Otherwise create one.
+ ppci_uuid = saved_ppci_table.get(pci_dev.name, uuid.createString())
+ XendPPCI(ppci_uuid, ppci_record)
+
+
+ def remove_PPCI(self, pci_name):
+ # Update lspci info
+ PciUtil.create_lspci_info()
+
+ # Remove the PPCI
+ (domain, bus, slot, func) = PciUtil.parse_pci_name(pci_name)
+ ppci_ref = XendPPCI.get_by_sbdf(domain, bus, slot, func)
+ XendAPIStore.get(ppci_ref, "PPCI").destroy()
+
+
+ def add_PSCSI(self):
+ # TODO
+ log.debug("add_network(): Not implemented.")
+
+
+ def remove_PSCSI(self):
+ # TODO
+ log.debug("add_network(): Not implemented.")
+
+
## def network_destroy(self, net_uuid):
## del self.networks[net_uuid]
## self.save_networks()
"""Default for the flag indicating whether xend should run a ssl relocation server."""
xend_relocation_ssl_server_default = 'no'
+ """Default for the flag indicating whether xend should run a udev event server."""
+ xend_udev_event_server_default = 'no'
+
"""Default interface address the xend relocation server listens at. """
xend_relocation_address_default = ''
def get_xend_relocation_server_ssl_cert_file(self):
return self.get_config_string("xend-relocation-server-ssl-cert-file")
+ def get_xend_udev_event_server(self):
+ return self.get_config_bool("xend-udev-event-server",
+ self.xend_udev_event_server_default)
+
def get_xend_port(self):
"""Get the port xend listens at for its HTTP interface.
"""
from xen.xend.XendBase import XendAPIStore
from xen.xend import uuid as genuuid
+from xen.util.pci import parse_hex
+
class XendPPCI(XendBase):
"""Representation of a physical PCI device."""
def get_by_sbdf(self, domain, bus, slot, func):
for ppci in XendAPIStore.get_all("PPCI"):
- if ppci.get_domain() == int(domain, 16) and \
- ppci.get_bus() == int(bus, 16) and \
- ppci.get_slot() == int(slot, 16) and \
- ppci.get_func() == int(func, 16):
+ if ppci.get_domain() == parse_hex(domain) and \
+ ppci.get_bus() == parse_hex(bus) and \
+ ppci.get_slot() == parse_hex(slot) and \
+ ppci.get_func() == parse_hex(func):
return ppci.get_uuid()
return None
raise VmError('Failed to query target memory allocation of dom0.')
return kb
-def free(need_mem):
+def free(need_mem, dominfo):
"""Balloon out memory from the privileged domain so that there is the
specified required amount (in KiB) free.
"""
if need_mem >= max_free_mem:
retries = rlimit
+ # Check whethercurrent machine is a numa system and the new
+ # created hvm has all its vcpus in the same node, if all the
+ # conditions above are fit. We will wait until all the pages
+ # in scrub list are freed (if waiting time go beyond 20s,
+ # we will stop waiting it.)
+ if physinfo['nr_nodes'] > 1 and retries == 0:
+ oldnode = -1
+ waitscrub = 1
+ vcpus = dominfo.info['cpus'][0]
+ for vcpu in vcpus:
+ nodenum = 0
+ for node in physinfo['node_to_cpu']:
+ for cpu in node:
+ if vcpu == cpu:
+ if oldnode == -1:
+ oldnode = nodenum
+ elif oldnode != nodenum:
+ waitscrub = 0
+ nodenum = nodenum + 1
+
+ if waitscrub == 1 and scrub_mem > 0:
+ log.debug("wait for scrub %s", scrub_mem)
+ while scrub_mem > 0 and retries < rlimit:
+ time.sleep(sleep_time)
+ physinfo = xc.physinfo()
+ free_mem = physinfo['free_memory']
+ scrub_mem = physinfo['scrub_memory']
+ retries += 1
+ sleep_time += SLEEP_TIME_GROWTH
+ log.debug("scrub for %d times", retries)
+
+ retries = 0
+ sleep_time = SLEEP_TIME_GROWTH
+
while retries < rlimit:
physinfo = xc.physinfo()
free_mem = physinfo['free_memory']
import errno
import glob
import traceback
+import platform
import xen.lowlevel.xc
from xen.xend.XendConstants import *
from xen.xend import XendOptions
from xen.util import oshelp
from xen.util import utils
+from xen.xend import osdep
xc = xen.lowlevel.xc.xc()
if self.device_model is None:
return
- # If we use a device model, the pipes for communication between
- # blktapctrl and ioemu must be present before the devices are
- # created (blktapctrl must access them for new block devices)
+ if platform.system() != 'SunOS':
+ # If we use a device model, the pipes for communication between
+ # blktapctrl and ioemu must be present before the devices are
+ # created (blktapctrl must access them for new block devices)
- # mkdir throws an exception if the path already exists
- try:
- os.mkdir('/var/run/tap', 0755)
- except:
- pass
+ try:
+ os.makedirs('/var/run/tap', 0755)
+ except:
+ pass
- try:
- os.mkfifo('/var/run/tap/qemu-read-%d' % domid, 0600)
- os.mkfifo('/var/run/tap/qemu-write-%d' % domid, 0600)
- except OSError, e:
- log.warn('Could not create blktap pipes for domain %d' % domid)
- log.exception(e)
- pass
+ try:
+ os.mkfifo('/var/run/tap/qemu-read-%d' % domid, 0600)
+ os.mkfifo('/var/run/tap/qemu-write-%d' % domid, 0600)
+ except OSError, e:
+ log.warn('Could not create blktap pipes for domain %d' % domid)
+ log.exception(e)
+ pass
# Return a list of cmd line args to the device models based on the
ret.append('-nographic')
return ret
+ vram = str(vmConfig['platform'].get('videoram',4))
+ ret.append('-videoram')
+ ret.append(vram)
+
vnc_config = {}
has_vnc = int(vmConfig['platform'].get('vnc', 0)) != 0
has_sdl = int(vmConfig['platform'].get('sdl', 0)) != 0
if dev_type == 'vfb':
if 'keymap' in dev_info:
keymap = dev_info.get('keymap',{})
- vfb_type = dev_info.get('type', {})
- if vfb_type == 'sdl':
+ if int(dev_info.get('vnc', 0)) != 0 :
+ has_vnc = True
+ if int(dev_info.get('sdl', 0)) != 0 :
+ has_sdl = True
+ if has_sdl:
self.display = dev_info.get('display', {})
self.xauthority = dev_info.get('xauthority', {})
opengl = int(dev_info.get('opengl', opengl))
- has_sdl = True
- else:
+ if has_vnc:
vnc_config = dev_info.get('other_config', {})
- has_vnc = True
break
if keymap:
if int(vnc_config.get('vncunused', 1)) != 0:
ret.append('-vncunused')
- elif has_sdl:
- # SDL is default in QEMU.
+ if has_sdl:
+ ret.append('-sdl')
if int(vmConfig['platform'].get('opengl', opengl)) != 1 :
ret.append('-disable-opengl')
- else:
+
+ if not has_sdl and not has_vnc :
ret.append('-nographic')
if int(vmConfig['platform'].get('monitor', 0)) != 0:
env['DISPLAY'] = self.display
if self.xauthority:
env['XAUTHORITY'] = self.xauthority
- if self.vncconsole:
- args = args + ([ "-vncviewer" ])
unique_id = "%i-%i" % (self.vm.getDomid(), time.time())
sentinel_path = sentinel_path_prefix + unique_id
sentinel_path_fifo = sentinel_path + '.fifo'
logfd = os.open(self.logfile, logfile_mode)
sys.stderr.flush()
+ contract = osdep.prefork("%s:%d" %
+ (self.vm.getName(), self.vm.getDomid()))
pid = os.fork()
if pid == 0: #child
try:
+ osdep.postfork(contract)
os.dup2(null, 0)
os.dup2(logfd, 1)
os.dup2(logfd, 2)
except:
os._exit(127)
else:
+ osdep.postfork(contract, abandon=True)
self.pid = pid
os.close(null)
os.close(logfd)
def _dmfailed(self, message):
log.warning("domain %s: %s", self.vm.getName(), message)
- # ideally we would like to forcibly crash the domain with
- # something like
- # xc.domain_shutdown(self.vm.getDomid(), DOMAIN_CRASH)
- # but this can easily lead to very rapid restart loops against
- # which we currently have no protection
+ xc.domain_shutdown(self.vm.getDomid(), DOMAIN_CRASH)
def recreate(self):
if self.device_model is None:
os.kill(self.pid, signal.SIGHUP)
except OSError, exn:
log.exception(exn)
- try:
- # Try to reap the child every 100ms for 10s. Then SIGKILL it.
- for i in xrange(100):
+ # Try to reap the child every 100ms for 10s. Then SIGKILL it.
+ for i in xrange(100):
+ try:
(p, rv) = os.waitpid(self.pid, os.WNOHANG)
if p == self.pid:
break
- time.sleep(0.1)
- else:
- log.warning("DeviceModel %d took more than 10s "
- "to terminate: sending SIGKILL" % self.pid)
+ except OSError:
+ # This is expected if Xend has been restarted within
+ # the life of this domain. In this case, we can kill
+ # the process, but we can't wait for it because it's
+ # not our child. We continue this loop, and after it is
+ # terminated make really sure the process is going away
+ # (SIGKILL).
+ pass
+ time.sleep(0.1)
+ else:
+ log.warning("DeviceModel %d took more than 10s "
+ "to terminate: sending SIGKILL" % self.pid)
+ try:
os.kill(self.pid, signal.SIGKILL)
os.waitpid(self.pid, 0)
- except OSError, exn:
- # This is expected if Xend has been restarted within the
- # life of this domain. In this case, we can kill the process,
- # but we can't wait for it because it's not our child.
- # We just make really sure it's going away (SIGKILL) first.
- os.kill(self.pid, signal.SIGKILL)
+ except OSError:
+ # This happens if the process doesn't exist.
+ pass
state = xstransact.Remove("/local/domain/0/device-model/%i"
% self.vm.getDomid())
finally:
def configure(self, vmConfig):
ImageHandler.configure(self, vmConfig)
+ self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024
+ self.is_stubdom = (self.kernel.find('stubdom') >= 0)
def buildDomain(self):
store_evtchn = self.vm.getStorePort()
flags = self.flags,
vhpt = self.vhpt)
+ def getRequiredAvailableMemory(self, mem_kb):
+ if self.is_stubdom :
+ mem_kb += self.vramsize
+ return mem_kb
+
+ def getRequiredInitialReservation(self):
+ return self.vm.getMemoryTarget()
+
+ def getRequiredMaximumReservation(self):
+ return self.vm.getMemoryMaximum()
+
def parseDeviceModelArgs(self, vmConfig):
ret = ImageHandler.parseDeviceModelArgs(self, vmConfig)
# Equivalent to old xenconsoled behaviour. Should make
if 'hvm' not in info['xen_caps']:
raise HVMRequired()
+ xen_platform_pci = int(vmConfig['platform'].get('xen_platform_pci',1))
rtc_timeoffset = vmConfig['platform'].get('rtc_timeoffset')
+ if not self.display :
+ self.display = ''
self.vm.storeVm(("image/dmargs", " ".join(self.dmargs)),
("image/device-model", self.device_model),
("image/display", self.display))
self.vm.permissionsVm("image/dmargs", { 'dom': self.vm.getDomid(), 'read': True } )
+
+ if xen_platform_pci == 0:
+ disable_pf = 1
+ log.info("No need to create platform device.[domid:%d]", self.vm.getDomid())
+ else:
+ disable_pf = 0
+ log.info("Need to create platform device.[domid:%d]", self.vm.getDomid())
+
+ xstransact.Store("/local/domain/0/device-model/%i"%self.vm.getDomid(),
+ ('disable_pf', disable_pf))
self.vm.storeVm(("rtc/timeoffset", rtc_timeoffset))
self.vm.permissionsVm("rtc/timeoffset", { 'dom': self.vm.getDomid(), 'read': True } )
self.apic = int(vmConfig['platform'].get('apic', 0))
self.acpi = int(vmConfig['platform'].get('acpi', 0))
self.guest_os_type = vmConfig['platform'].get('guest_os_type')
-
+
# Return a list of cmd line args to the device models based on the
# xm config file
def buildDomain(self):
store_evtchn = self.vm.getStorePort()
+ memmax_mb = self.getRequiredMaximumReservation() / 1024
mem_mb = self.getRequiredInitialReservation() / 1024
log.debug("domid = %d", self.vm.getDomid())
log.debug("image = %s", self.loader)
log.debug("store_evtchn = %d", store_evtchn)
- log.debug("memsize = %d", mem_mb)
+ log.debug("memsize = %d", memmax_mb)
+ log.debug("target = %d", mem_mb)
log.debug("vcpus = %d", self.vm.getVCpuCount())
log.debug("acpi = %d", self.acpi)
log.debug("apic = %d", self.apic)
rc = xc.hvm_build(domid = self.vm.getDomid(),
image = self.loader,
- memsize = mem_mb,
+ memsize = memmax_mb,
+ target = mem_mb,
vcpus = self.vm.getVCpuCount(),
acpi = self.acpi,
apic = self.apic)
def configure(self, vmConfig):
HVMImageHandler.configure(self, vmConfig)
self.vhpt = int(vmConfig['platform'].get('vhpt', 0))
+ self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024
def buildDomain(self):
xc.nvram_init(self.vm.getName(), self.vm.getDomid())
# buffer io page, buffer pio page and memmap info page
extra_pages = 1024 + 5
mem_kb += extra_pages * page_kb
- # Add 8 MiB overhead for QEMU's video RAM.
- return mem_kb + 8192
+ mem_kb += self.vramsize
+ return mem_kb
def getRequiredInitialReservation(self):
return self.vm.getMemoryTarget()
def configure(self, vmConfig):
HVMImageHandler.configure(self, vmConfig)
self.pae = int(vmConfig['platform'].get('pae', 0))
+ self.vramsize = int(vmConfig['platform'].get('videoram',4)) * 1024
def buildDomain(self):
xc.hvm_set_param(self.vm.getDomid(), HVM_PARAM_PAE_ENABLED, self.pae)
return rc
def getRequiredAvailableMemory(self, mem_kb):
- # Add 8 MiB overhead for QEMU's video RAM.
- return mem_kb + 8192
+ return mem_kb + self.vramsize
def getRequiredInitialReservation(self):
return self.vm.getMemoryTarget()
# Use is subject to license terms.
import os
+import commands
_scripts_dir = {
"Linux": "/etc/xen/scripts",
"SunOS": "vif-vnic"
}
-def _linux_balloon_stat(label):
+PROC_XEN_BALLOON = '/proc/xen/balloon'
+SYSFS_XEN_MEMORY = '/sys/devices/system/xen_memory/xen_memory0'
+
+def _linux_balloon_stat_proc(label):
"""Returns the value for the named label, or None if an error occurs."""
xend2linux_labels = { 'current' : 'Current allocation',
'high-balloon' : 'High-mem balloon',
'limit' : 'Xen hard limit' }
- PROC_XEN_BALLOON = '/proc/xen/balloon'
f = file(PROC_XEN_BALLOON, 'r')
try:
for line in f:
finally:
f.close()
+def _linux_balloon_stat_sysfs(label):
+ sysfiles = { 'target' : 'target_kb',
+ 'current' : 'info/current_kb',
+ 'low-balloon' : 'info/low_kb',
+ 'high-balloon' : 'info/high_kb',
+ 'limit' : 'info/hard_limit_kb' }
+
+ name = os.path.join(SYSFS_XEN_MEMORY, sysfiles[label])
+ f = file(name, 'r')
+
+ val = f.read().strip()
+ if val.isdigit():
+ return int(val)
+ return None
+
+def _linux_balloon_stat(label):
+ if os.access(PROC_XEN_BALLOON, os.F_OK):
+ return _linux_balloon_stat_proc(label)
+ elif os.access(SYSFS_XEN_MEMORY, os.F_OK):
+ return _linux_balloon_stat_sysfs(label)
+
+ return None
+
def _solaris_balloon_stat(label):
"""Returns the value for the named label, or None if an error occurs."""
finally:
f.close()
+def _solaris_get_cpuinfo():
+ cpuinfo = {}
+
+ # call kstat to extrace specific cpu_info output
+ cmd = "/usr/bin/kstat -p -c misc -m cpu_info"
+ kstatoutput = commands.getoutput (cmd)
+
+ # walk each line
+ for kstatline in kstatoutput.split('\n'):
+
+ # split the line on
+ # module:cpu #:module#:name value
+ (module, cpunum, combo, namevalue) = kstatline.split (":")
+
+ # check to see if this cpunum is already a key. If not,
+ # initialize an empty hash table
+ if not cpuinfo.has_key (int(cpunum)):
+ cpuinfo[int(cpunum)] = {}
+
+ # split the namevalue output on whitespace
+ data = namevalue.split()
+
+ # the key will be data[0]
+ key = data[0]
+
+ # check the length of the data list. If it's larger than
+ # 2, join the rest of the list together with a space.
+ # Otherwise, value is just data[1]
+ if len (data) > 2:
+ value = ' '.join (data[1:])
+ else:
+ value = data[1]
+
+ # add this key/value pair to the cpuhash
+ cpuinfo[int(cpunum)][key] = value
+
+ # Translate Solaris tokens into what Xend expects
+ for key in cpuinfo.keys():
+ cpuinfo[key]["flags"] = ""
+ cpuinfo[key]["model name"] = cpuinfo[key]["brand"]
+ cpuinfo[key]["cpu MHz"] = cpuinfo[key]["clock_MHz"]
+
+ # return the hash table
+ return cpuinfo
+
_get_cpuinfo = {
+ "SunOS": _solaris_get_cpuinfo
+}
+
+def _default_prefork(name):
+ pass
+
+def _default_postfork(ct, abandon=False):
+ pass
+
+# call this for long-running processes that should survive a xend
+# restart
+def _solaris_prefork(name):
+ from xen.lowlevel import process
+ return process.activate(name)
+
+def _solaris_postfork(ct, abandon=False):
+ from xen.lowlevel import process
+ process.clear(ct)
+ if abandon:
+ process.abandon_latest()
+
+_get_prefork = {
+ "SunOS": _solaris_prefork
+}
+
+_get_postfork = {
+ "SunOS": _solaris_postfork
}
def _get(var, default=None):
vif_script = _get(_vif_script, "vif-bridge")
lookup_balloon_stat = _get(_balloon_stat, _linux_balloon_stat)
get_cpuinfo = _get(_get_cpuinfo, _linux_get_cpuinfo)
+prefork = _get(_get_prefork, _default_prefork)
+postfork = _get(_get_postfork, _default_postfork)
'qcow',
'qcow2',
- 'ioemu'
+ 'ioemu',
+ 'tapdisk',
]
class BlktapController(BlkifController):
--- /dev/null
+#============================================================================
+# This library is free software; you can redistribute it and/or
+# modify it under the terms of version 2.1 of the GNU Lesser General Public
+# License as published by the Free Software Foundation.
+#
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+#
+# You should have received a copy of the GNU Lesser General Public
+# License along with this library; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+#============================================================================
+# Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
+# Copyright (C) 2005 XenSource Ltd
+#============================================================================
+
+DEVICE_CREATE_TIMEOUT = 100
+DEVICE_DESTROY_TIMEOUT = 100
+HOTPLUG_STATUS_NODE = "hotplug-status"
+HOTPLUG_ERROR_NODE = "hotplug-error"
+HOTPLUG_STATUS_ERROR = "error"
+HOTPLUG_STATUS_BUSY = "busy"
+
+Connected = 1
+Error = 2
+Missing = 3
+Timeout = 4
+Busy = 5
+Disconnected = 6
+
+xenbusState = {
+ 'Unknown' : 0,
+ 'Initialising' : 1,
+ 'InitWait' : 2,
+ 'Initialised' : 3,
+ 'Connected' : 4,
+ 'Closing' : 5,
+ 'Closed' : 6,
+ 'Reconfiguring' : 7,
+ 'Reconfigured' : 8,
+ }
+xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys())))
+
from xen.xend.XendError import VmError
from xen.xend.XendLogging import log
import xen.xend.XendConfig
+from xen.xend.server.DevConstants import *
from xen.xend.xenstore.xstransact import xstransact, complete
from xen.xend.xenstore.xswatch import xswatch
import os
-DEVICE_CREATE_TIMEOUT = 100
-DEVICE_DESTROY_TIMEOUT = 100
-HOTPLUG_STATUS_NODE = "hotplug-status"
-HOTPLUG_ERROR_NODE = "hotplug-error"
-HOTPLUG_STATUS_ERROR = "error"
-HOTPLUG_STATUS_BUSY = "busy"
-
-Connected = 1
-Error = 2
-Missing = 3
-Timeout = 4
-Busy = 5
-Disconnected = 6
-
-xenbusState = {
- 'Unknown' : 0,
- 'Initialising' : 1,
- 'InitWait' : 2,
- 'Initialised' : 3,
- 'Connected' : 4,
- 'Closing' : 5,
- 'Closed' : 6,
- 'Reconfiguring': 7,
- 'Reconfigured' : 8,
- }
-
xoptions = XendOptions.instance()
-xenbusState.update(dict(zip(xenbusState.values(), xenbusState.keys())))
-
class DevController:
"""Abstract base class for a device controller. Device controllers create
xstransact.Remove(backpath)
xstransact.Remove(frontpath)
- # xstransact.Remove(self.devicePath()) ?? Below is the same ?
- self.vm._removeVm("device/%s/%d" % (self.deviceClass, dev))
+ # xstransact.Remove(self.devicePath()) ?? Below is the same ?
+ self.vm._removeVm("device/%s/%d" % (self.deviceClass, dev))
def configurations(self, transaction = None):
return map(lambda x: self.configuration(x, transaction), self.deviceIDs(transaction))
xswatch(statusPath, hotplugStatusCallback, ev, result)
ev.wait(DEVICE_CREATE_TIMEOUT)
err = xstransact.Read(statusPath, HOTPLUG_ERROR_NODE)
- if result['status'] != 'Connected':
+ if result['status'] != Connected:
return (result['status'], err)
backpath = self.readVm(devid, "backend")
from xen.util import mkdir
import relocate
+import udevevent
import SrvServer
from params import *
del xc
relocate.listenRelocation()
+ udevevent.listenUdevEvent()
servers = SrvServer.create()
servers.start(status)
del servers
[['dom', 'int'],
['file', 'str'],
['live', 'int'],
- ['crash', 'int']])
+ ['crash', 'int'],
+ ['reset', 'int']])
return fn(req.args, {'dom': self.dom.domid})
def op_migrate(self, op, req):
import re
import string
+import os
from xen.util import blkif
import xen.util.xsm.xsm as security
"""
DevController.__init__(self, vm)
+ def _isValidProtocol(self, protocol):
+ if protocol in ('phy', 'file', 'tap'):
+ return True
+
+ return os.access('/etc/xen/scripts/block-%s' % protocol, os.X_OK)
+
+
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""
uname = config.get('uname', '')
else:
try:
(typ, params) = string.split(uname, ':', 1)
- if typ not in ('phy', 'file', 'tap'):
- raise VmError(
- 'Block device must have "phy", "file" or "tap" '
- 'specified to type')
+ if not self._isValidProtocol(typ):
+ raise VmError('Block device type "%s" is invalid.' % typ)
except ValueError:
raise VmError(
'Block device must have physical details specified')
if uuid:
back['uuid'] = uuid
+ bootable = config.get('bootable', None)
+ if bootable != None:
+ back['bootable'] = str(bootable)
+
if security.on() == xsconstants.XS_POLICY_USE:
self.do_access_control(config, uname)
config = DevController.getDeviceConfiguration(self, devid, transaction)
if transaction is None:
devinfo = self.readBackend(devid, 'dev', 'type', 'params', 'mode',
- 'uuid')
+ 'uuid', 'bootable')
else:
devinfo = self.readBackendTxn(transaction, devid,
- 'dev', 'type', 'params', 'mode', 'uuid')
- dev, typ, params, mode, uuid = devinfo
+ 'dev', 'type', 'params', 'mode', 'uuid',
+ 'bootable')
+ dev, typ, params, mode, uuid, bootable = devinfo
if dev:
if transaction is None:
config['mode'] = mode
if uuid:
config['uuid'] = uuid
+ if bootable != None:
+ config['bootable'] = int(bootable)
proto = self.readFrontend(devid, 'protocol')
if proto:
class IOPortsController(DevController):
+ valid_cfg = ['to', 'from', 'uuid']
+
def __init__(self, vm):
DevController.__init__(self, vm)
+ def getDeviceConfiguration(self, devid, transaction = None):
+ result = DevController.getDeviceConfiguration(self, devid, transaction)
+ if transaction is None:
+ devinfo = self.readBackend(devid, *self.valid_cfg)
+ else:
+ devinfo = self.readBackendTxn(transaction, devid, *self.valid_cfg)
+ config = dict(zip(self.valid_cfg, devinfo))
+ config = dict([(key, val) for key, val in config.items()
+ if val != None])
+ return config
+
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""
'ioports: Failed to configure legacy i/o range: %s - %s' %
(io_from, io_to))
- return (None, {}, {})
+ back = dict([(k, config[k]) for k in self.valid_cfg if k in config])
+ return (self.allocateDeviceID(), back, {})
+
+ def waitForDevice(self, devid):
+ # don't wait for hotplug
+ return
def __init__(self, vm):
DevController.__init__(self, vm)
+ valid_cfg = ['irq', 'uuid']
+
+ def getDeviceConfiguration(self, devid, transaction = None):
+ result = DevController.getDeviceConfiguration(self, devid, transaction)
+ if transaction is None:
+ devinfo = self.readBackend(devid, *self.valid_cfg)
+ else:
+ devinfo = self.readBackendTxn(transaction, devid, *self.valid_cfg)
+ config = dict(zip(self.valid_cfg, devinfo))
+ config = dict([(key, val) for key, val in config.items()
+ if val != None])
+ return config
def getDeviceDetails(self, config):
"""@see DevController.getDeviceDetails"""
if rc < 0:
raise VmError(
'irq: Failed to map irq %x' % (pirq))
- return (None, {}, {})
+ back = dict([(k, config[k]) for k in self.valid_cfg if k in config])
+ return (self.allocateDeviceID(), back, {})
+
+ def waitForDevice(self, devid):
+ # don't wait for hotplug
+ return
import random
import re
-from xen.xend import XendOptions
+from xen.xend import XendOptions, sxp
from xen.xend.server.DevController import DevController
from xen.xend.XendError import VmError
from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance
result[x] = y
return result
+
+ # match a VIF ID from xenstore, or a MAC address stored in the domain config
+ def convertToDeviceNumber(self, devid):
+ try:
+ return int(devid)
+ except ValueError:
+ if type(devid) is not str:
+ raise VmError("devid %s is wrong type" % str(devid))
+ try:
+ dev = devid.split('/')[-1]
+ return (int(dev))
+ except ValueError:
+ devs = [d for d in self.vm.info.all_devices_sxpr()
+ if d[0] == 'vif']
+ for nr in range(len(devs)):
+ dev_type, dev_info = devs[nr]
+ if (sxp.child_value(dev_info, 'mac').lower() ==
+ devid.lower()):
+ return nr
+ raise VmError("unknown devid %s" % str(devid))
from xen.xend import arch
from xen.xend.XendError import VmError
from xen.xend.XendLogging import log
+from xen.xend.XendConstants import *
-from xen.xend.server.DevController import DevController, xenbusState
+from xen.xend.server.DevController import DevController
+from xen.xend.server.DevConstants import xenbusState
import xen.lowlevel.xc
import re
from xen.xend.server.pciquirk import *
+from xen.xend.xenstore.xstransact import xstransact
+from xen.xend.xenstore.xswatch import xswatch
xc = xen.lowlevel.xc.xc()
class PciController(DevController):
def __init__(self, vm):
+ self.aerStateWatch = None
DevController.__init__(self, vm)
bus = parse_hex(pci_config.get('bus', 0))
slot = parse_hex(pci_config.get('slot', 0))
func = parse_hex(pci_config.get('func', 0))
+ vslot = parse_hex(pci_config.get('vslot', 0))
+
+ opts = pci_config.get('opts', '')
+ if len(opts) > 0:
+ opts = map(lambda (x, y): x+'='+y, opts)
+ opts = reduce(lambda x, y: x+','+y, opts)
+ back['opts-%i' % pcidevid] = opts
vslt = pci_config.get('vslt')
if vslt is not None:
back['num_devs']=str(pcidevid)
back['uuid'] = config.get('uuid','')
+ if 'pci_msitranslate' in self.vm.info['platform']:
+ back['msitranslate']=str(self.vm.info['platform']['pci_msitranslate'])
+ if 'pci_power_mgmt' in self.vm.info['platform']:
+ back['power_mgmt']=str(self.vm.info['platform']['pci_power_mgmt'])
+
return (0, back, {})
dev = back['dev-%i' % i]
state = states[i]
uuid = back['uuid-%i' %i]
+ opts = ''
+ if 'opts-%i' % i in back:
+ opts = back['opts-%i' % i]
except:
raise XendError('Error reading config')
self.writeBackend(devid, 'state-%i' % (num_olddevs + i),
str(xenbusState['Initialising']))
self.writeBackend(devid, 'uuid-%i' % (num_olddevs + i), uuid)
+ if len(opts) > 0:
+ self.writeBackend(devid, 'opts-%i' % (num_olddevs + i), opts)
self.writeBackend(devid, 'num_devs', str(num_olddevs + i + 1))
# Update vslots
try:
dev_dict['vslt'] = slot_list[i]
except IndexError:
- dev_dict['vslt'] = '0x0'
+ dev_dict['vslt'] = AUTO_PHP_SLOT_STR
pci_devs.append(dev_dict)
if rc<0:
raise VmError(('pci: failed to configure I/O memory on device '+
'%s - errno=%d')%(dev.name,rc))
- rc = xc.physdev_map_pirq(domid = fe_domid,
- index = dev.irq,
- pirq = dev.irq)
- if rc < 0:
- raise VmError(('pci: failed to map irq on device '+
- '%s - errno=%d')%(dev.name,rc))
if dev.msix:
for (start, size) in dev.msix_iomem:
if rc<0:
raise VmError(('pci: failed to remove msi-x iomem'))
+ rc = xc.physdev_map_pirq(domid = fe_domid,
+ index = dev.irq,
+ pirq = dev.irq)
+ if rc < 0:
+ raise VmError(('pci: failed to map irq on device '+
+ '%s - errno=%d')%(dev.name,rc))
if dev.irq>0:
log.debug('pci: enabling irq %d'%dev.irq)
rc = xc.domain_irq_permission(domid = fe_domid, pirq = dev.irq,
else:
# All devices behind the uppermost PCI/PCI-X bridge must be\
# co-assigned to the same guest.
- devs_str = dev.find_coassigned_devices(True)
+ devs_str = dev.find_coassigned_pci_devices(True)
# Remove the element 0 which is a bridge
del devs_str[0]
for (domain, bus, slot, func) in pci_dev_list:
self.setupOneDevice(domain, bus, slot, func)
-
+ wPath = '/local/domain/0/backend/pci/%u/0/aerState' % (self.getDomid())
+ self.aerStatePath = xswatch(wPath, self._handleAerStateWatch)
+ log.debug('pci: register aer watch %s', wPath)
return
+ def _handleAerStateWatch(self, _):
+ log.debug('XendDomainInfo.handleAerStateWatch')
+ if self.getDomid() == 0:
+ raise XendError('Domain 0 cannot be shutdown')
+ readPath = '/local/domain/0/backend/pci/%u/0/aerState' % (self.getDomid())
+ action = xstransact.Read(readPath)
+ if action and action=='aerfail':
+ log.debug('shutdown domain because of aer handle error')
+ self.vm.shutdown('poweroff')
+ return True
+
+
def cleanupOneDevice(self, domain, bus, slot, func):
""" Detach I/O resources for device from frontend domain
"""
self.removeBackend(devid, 'vdev-%i' % i)
self.removeBackend(devid, 'state-%i' % i)
self.removeBackend(devid, 'uuid-%i' % i)
+ tmpopts = self.readBackend(devid, 'opts-%i' % i)
+ if tmpopts is not None:
+ self.removeBackend(devid, 'opts-%i' % i)
else:
if new_num_devs != i:
tmpdev = self.readBackend(devid, 'dev-%i' % i)
tmpuuid = self.readBackend(devid, 'uuid-%i' % i)
self.writeBackend(devid, 'uuid-%i' % new_num_devs, tmpuuid)
self.removeBackend(devid, 'uuid-%i' % i)
+ tmpopts = self.readBackend(devid, 'opts-%i' % i)
+ if tmpopts is not None:
+ self.removeBackend(devid, 'opts-%i' % i)
new_num_devs = new_num_devs + 1
self.writeBackend(devid, 'num_devs', str(new_num_devs))
return new_num_devs
+ def destroyDevice(self, devid, force):
+ DevController.destroyDevice(self, devid, True)
+ log.debug('pci: unregister aer watch')
+ self.unwatchAerState
+
+ def unwatchAerState(self):
+ """Remove the watch on the domain's aerState node, if any."""
+ try:
+ try:
+ if self.aerStateWatch:
+ self.aerStateWatch.unwatch()
+ finally:
+ self.aerStateWatch = None
+ except:
+ log.exception("Unwatching aerState failed.")
+
def waitForBackend(self,devid):
return (0, "ok - no hotplug")
log.info("Config file does not exist: %s" % PERMISSIVE_CONFIG_FILE)
self.pci_perm_dev_config = ['xend-pci-perm-devs']
- devices = child_at(child(pci_perm_dev_config, 'unconstrained_dev_ids'),0)
+ devices = child_at(child(self.pci_perm_dev_config,
+ 'unconstrained_dev_ids'),0)
if self.__matchPCIdev( devices ):
log.debug("Permissive mode enabled for PCI device [%s]" %
self.devid)
if self.transport:
self.send_reply(["ready", name])
p2cread, p2cwrite = os.pipe()
+ from xen.util import oshelp
+ oshelp.fcntl_setfd_cloexec(p2cwrite, True)
threading.Thread(target=connection.SSLSocketServerConnection.recv2fd,
args=(self.transport.sock, p2cwrite)).start()
try:
--- /dev/null
+import socket
+
+from xen.web import protocol, unix
+
+from xen.xend.XendLogging import log
+from xen.xend import XendNode
+from xen.xend import XendOptions
+
+UDEV_EVENT_PATH = '\0/org/xen/xend/udev_event'
+
+class UdevEventProtocol(protocol.Protocol):
+
+ def __init__(self):
+ protocol.Protocol.__init__(self)
+
+ def dataReceived(self, data):
+ udev_event = {}
+ for entry in data.split('\0'):
+ try:
+ opt, val = entry.split("=")
+ udev_event[opt] = val
+ except (TypeError, ValueError):
+ pass
+ if udev_event.get('ACTION', None) is None:
+ log.warn("Invalid udev event received")
+ return
+
+ log.debug("udev event received: %s", udev_event)
+
+ self._process_event(udev_event)
+
+ def _process_event(self, udev_event):
+ try:
+ if (udev_event.get('SUBSYSTEM', None) == 'pci'):
+ pci_name = udev_event.get('PCI_SLOT_NAME', None)
+ if (udev_event['ACTION'] == 'add'):
+ log.info("Adding pci device %s", pci_name)
+ XendNode.instance().add_PPCI(pci_name)
+ elif (udev_event['ACTION'] == 'remove'):
+ log.info("Removing pci device %s", pci_name)
+ XendNode.instance().remove_PPCI(pci_name)
+
+ elif (udev_event.get('SUBSYSTEMS', None) == 'scsi'):
+ if (udev_event['ACTION'] == 'add'):
+ log.info("Adding scsi device")
+ XendNode.instance().add_PSCSI()
+ elif (udev_event['ACTION'] == 'remove'):
+ log.info("Removing scci device")
+ XendNode.instance().remove_PSCSI()
+
+ elif (udev_event.get('SUBSYSTEM', None) == 'net'):
+ interface = udev_event.get('INTERFACE', None)
+ if (udev_event['ACTION'] == 'add'):
+ log.info("Adding net device %s", interface)
+ XendNode.instance().add_network(interface)
+ elif (udev_event['ACTION'] == 'remove'):
+ log.info("Removing net device %s", interface)
+ XendNode.instance().remove_network(interface)
+
+ except Exception, e:
+ log.warn("error while processing udev event(): %s" % str(e))
+
+
+def listenUdevEvent():
+ xoptions = XendOptions.instance()
+ if xoptions.get_xend_udev_event_server():
+ unix.UnixDgramListener(UDEV_EVENT_PATH, UdevEventProtocol)
+
from xen.xend.XendError import VmError
from xen.xend.XendLogging import log
-from xen.xend.server.DevController import DevController, xenbusState
+from xen.xend.server.DevController import DevController
+from xen.xend.server.DevConstants import xenbusState
from xen.xend.xenstore.xstransact import xstransact
class VSCSIController(DevController):
vscsi_config.append(['devs', devs])
state = self.readFrontend(devid, 'state')
vscsi_config.append(['state', state])
+ hostmode = self.readBackend(devid, 'feature-host')
+ vscsi_config.append(['feature-host', hostmode])
backid = self.readFrontend(devid, 'backend-id')
vscsi_config.append(['backend-id', backid])
backpath = self.readFrontend(devid, 'backend')
back[devpath + '/p-devname'] = pdevname
vdev = vscsi_config.get('v-dev', '')
back[devpath + '/v-dev'] = vdev
- state = vscsi_config.get('state', '')
- back[devpath + '/state'] = str(xenbusState[state])
+ state = vscsi_config.get('state', xenbusState['Unknown'])
+ back[devpath + '/state'] = str(state)
devid = vscsi_config.get('devid', '')
back[devpath + '/devid'] = str(devid)
+ host_mode = config.get('feature-host','')
+ back['feature-host'] = str(host_mode)
back['uuid'] = config.get('uuid','')
devid = int(devid)
return (devid, back, {})
vscsi_devs.append(dev_dict)
config['devs'] = vscsi_devs
+ config['feature-host'] = self.readBackend(devid, 'feature-host')
config['uuid'] = self.readBackend(devid, 'uuid')
return config
(devid, back, front) = self.getDeviceDetails(config)
devid = int(devid)
vscsi_config = config['devs'][0]
- state = vscsi_config.get('state', '')
+ state = vscsi_config.get('state', xenbusState['Unknown'])
driver_state = self.readBackend(devid, 'state')
+
if str(xenbusState['Connected']) != driver_state:
raise VmError("Driver status is not connected")
uuid = self.readBackend(devid, 'uuid')
- if state == 'Initialising':
+ if state == xenbusState['Initialising']:
back['uuid'] = uuid
self.writeBackend(devid, back)
- elif state == 'Closing':
+ elif state == xenbusState['Closing']:
found = False
devs = self.readBackendList(devid, "vscsi-devs")
+ hostmode = int(self.readBackend(devid, 'feature-host'))
vscsipath = "vscsi-devs/"
vdev = vscsi_config.get('v-dev', '')
for dev in devs:
devpath = vscsipath + dev
old_vdev = self.readBackend(devid, devpath + '/v-dev')
- if vdev == old_vdev:
+
+ if hostmode == 1:
+ #At hostmode, all v-dev that belongs to devid is deleted.
+ found = True
+ self.writeBackend(devid, devpath + '/state', \
+ str(xenbusState['Closing']))
+ elif vdev == old_vdev:
found = True
self.writeBackend(devid, devpath + '/state', \
str(xenbusState['Closing']))
else:
raise XendError("Error configuring device invalid "
- "state '%s'" % state)
+ "state '%s'" % xenbusState[state])
self.writeBackend(devid, 'state', str(xenbusState['Reconfiguring']))
return self.readBackend(devid, 'uuid')
return 0
# sanity check on the data from the file
+ # requiring 'memory,' 'name,' and ether 'kernel' or 'bootloader'
count = 0
- required = ['kernel', 'memory', 'name']
+ required = ['kernel', 'bootloader', 'memory', 'name']
for (k, v) in locs.items():
if k in required:
count += 1
- if count != 3:
+ if count < len(required) - 1:
print "Invalid configuration file."
return 0
else:
other_config*)>
<!ATTLIST vm is_a_template CDATA #REQUIRED
auto_power_on CDATA #REQUIRED
+ s3_integrity CDATA #REQUIRED
vcpus_max CDATA #REQUIRED
vcpus_at_startup CDATA #REQUIRED
actions_after_shutdown %NORMAL_EXIT; #REQUIRED
<!ELEMENT vtpm (name*)>
<!ATTLIST vtpm backend CDATA #REQUIRED>
-<!ELEMENT pci EMPTY>
+<!ELEMENT pci (pci_opt*)>
<!ATTLIST pci domain CDATA #REQUIRED
bus CDATA #REQUIRED
slot CDATA #REQUIRED
func CDATA #REQUIRED
+ opts_str CDATA #IMPLIED
vslt CDATA #IMPLIED>
<!ELEMENT vscsi EMPTY>
<!ATTLIST vcpu_param key CDATA #REQUIRED
value CDATA #REQUIRED>
+<!ELEMENT pci_opt EMPTY>
+<!ATTLIST pci_opt key CDATA #REQUIRED
+ value CDATA #REQUIRED>
+
<!ELEMENT other_config EMPTY>
<!ATTLIST other_config key CDATA #REQUIRED
value CDATA #REQUIRED>
-#============================================================================
+#============================================================================UTO
# This library is free software; you can redistribute it and/or
# modify it under the terms of version 2.1 of the GNU Lesser General Public
# License as published by the Free Software Foundation.
from xen.xend import osdep
import xen.xend.XendClient
from xen.xend.XendBootloader import bootloader
+from xen.xend.XendConstants import *
+from xen.xend.server.DevConstants import xenbusState
from xen.util import blkif
from xen.util import vscsi_util
import xen.util.xsm.xsm as security
use="""Timer mode (0=delay virtual time when ticks are missed;
1=virtual time is always wallclock time.""")
+gopts.var('vpt_align', val='VPT_ALIGN',
+ fn=set_int, default=1,
+ use="Enable aligning all periodic vpt to reduce timer interrupts.")
+
gopts.var('viridian', val='VIRIDIAN',
fn=set_int, default=0,
use="""Expose Viridian interface to x86 HVM guest?
backend driver domain to use for the disk.
The option may be repeated to add more than one disk.""")
-gopts.var('pci', val='BUS:DEV.FUNC',
+gopts.var('pci', val='BUS:DEV.FUNC[@VSLOT][,msitranslate=0|1][,power_mgmt=0|1]',
fn=append_value, default=[],
use="""Add a PCI device to a domain, using given params (in hex).
- For example 'pci=c0:02.1'.
- The option may be repeated to add more than one pci device.""")
+ For example 'pci=c0:02.1'.
+ If VSLOT is supplied the device will be inserted into that
+ virtual slot in the guest, else a free slot is selected.
+ If msitranslate is set, MSI-INTx translation is enabled if possible.
+ Guest that doesn't support MSI will get IO-APIC type IRQs
+ translated from physical MSI, HVM only. Default is 1.
+ The option may be repeated to add more than one pci device.
+ If power_mgmt is set, the guest OS will be able to program the power
+ states D0-D3hot of the device, HVM only. Default=0.""")
gopts.var('vscsi', val='PDEV,VDEV[,DOM]',
fn=append_value, default=[],
For example 'irq=7'.
This option may be repeated to add more than one IRQ.""")
-gopts.var('vfb', val="type={vnc,sdl},vncunused=1,vncdisplay=N,vnclisten=ADDR,display=DISPLAY,xauthority=XAUTHORITY,vncpasswd=PASSWORD,opengl=1,keymap=FILE",
+gopts.var('vfb', val="vnc=1,sdl=1,vncunused=1,vncdisplay=N,vnclisten=ADDR,display=DISPLAY,xauthority=XAUTHORITY,vncpasswd=PASSWORD,opengl=1,keymap=FILE",
fn=append_value, default=[],
use="""Make the domain a framebuffer backend.
- The backend type should be either sdl or vnc.
- For type=vnc, connect an external vncviewer. The server will listen
+ Both sdl=1 and vnc=1 can be enabled at the same time.
+ For vnc=1, connect an external vncviewer. The server will listen
on ADDR (default 127.0.0.1) on port N+5900. N defaults to the
domain id. If vncunused=1, the server will try to find an arbitrary
unused port above 5900. vncpasswd overrides the XenD configured
default password.
- For type=sdl, a viewer will be started automatically using the
+ For sdl=1, a viewer will be started automatically using the
given DISPLAY and XAUTHORITY, which default to the current user's
ones. OpenGL will be used by default unless opengl is set to 0.
keymap overrides the XendD configured default layout file.""")
use="""Try to find an unused port for the VNC server.
Only valid when vnc=1.""")
-gopts.var('videoram', val='',
- fn=set_value, default=None,
- use="""Maximum amount of videoram PV guest can allocate
+gopts.var('videoram', val='MEMORY',
+ fn=set_int, default=4,
+ use="""Maximum amount of videoram a guest can allocate
for frame buffer.""")
gopts.var('sdl', val='',
use="""Hap status (0=hap is disabled;
1=hap is enabled.""")
+gopts.var('s3_integrity', val='TBOOT_MEMORY_PROTECT',
+ fn=set_int, default=1,
+ use="""Should domain memory integrity be verified during S3?
+ (0=protection is disabled; 1=protection is enabled.""")
+
gopts.var('cpuid', val="IN[,SIN]:eax=EAX,ebx=EBX,ecx=ECX,edx=EDX",
fn=append_value, default=[],
use="""Cpuid description.""")
fn=set_bool, default=None,
use="""Do not inject spurious page faults into this guest""")
+gopts.var('pci_msitranslate', val='TRANSLATE',
+ fn=set_int, default=1,
+ use="""Global PCI MSI-INTx translation flag (0=disable;
+ 1=enable.""")
+
+gopts.var('pci_power_mgmt', val='POWERMGMT',
+ fn=set_int, default=0,
+ use="""Global PCI Power Management flag (0=disable;1=enable).""")
+
+gopts.var('xen_platform_pci', val='0|1',
+ fn=set_int, default=1,
+ use="Is xen_platform_pci used?")
+
def err(msg):
"""Print an error to stderr and exit.
"""
if vals.root:
cmdline_root = strip('root=', vals.root)
config_image.append(['root', cmdline_root])
+ if vals.videoram:
+ config_image.append(['videoram', vals.videoram])
if vals.extra:
config_image.append(['args', vals.extra])
"""Create the config for pci devices.
"""
config_pci = []
- for (domain, bus, slot, func) in vals.pci:
- config_pci.append(['dev', ['domain', domain], ['bus', bus], \
- ['slot', slot], ['func', func]])
+ for (domain, bus, slot, func, vslot, opts) in vals.pci:
+ config_pci_opts = []
+ d = comma_sep_kv_to_dict(opts)
+
+ def f(k):
+ if k not in ['msitranslate', 'power_mgmt']:
+ err('Invalid pci option: ' + k)
+
+ config_pci_opts.append([k, d[k]])
+
+ config_pci_bdf = ['dev', ['domain', domain], ['bus', bus], \
+ ['slot', slot], ['func', func], ['vslot', vslot]]
+ map(f, d.keys())
+ if len(config_pci_opts)>0:
+ config_pci_bdf.append(['opts', config_pci_opts])
+
+ config_pci.append(config_pci_bdf)
if len(config_pci)>0:
config_pci.insert(0, 'pci')
config_devs.append(['device', config_pci])
-def vscsi_convert_sxp_to_dict(dev_sxp):
- dev_dict = {}
- for opt_val in dev_sxp[1:]:
- try:
- opt, val = opt_val
- dev_dict[opt] = val
- except TypeError:
- pass
- return dev_dict
-
-def vscsi_lookup_devid(devlist, req_devid):
- if len(devlist) == 0:
- return 0
- else:
- for devid, backend in devlist:
- if devid == req_devid:
- return 1
- return 0
-
def configure_vscsis(config_devs, vals):
"""Create the config for vscsis (virtual scsi devices).
"""
- devidlist = []
- config_scsi = []
+
+ def get_devid(hctl):
+ return int(hctl.split(':')[0])
+
if len(vals.vscsi) == 0:
return 0
+ config_scsi = {}
+ pHCTL_list = []
+ vHCTL_list = []
+
scsi_devices = vscsi_util.vscsi_get_scsidevices()
for (p_dev, v_dev, backend) in vals.vscsi:
- tmp = p_dev.split(':')
- if len(tmp) == 4:
- (p_hctl, block) = vscsi_util._vscsi_hctl_block(p_dev, scsi_devices)
- else:
- (p_hctl, block) = vscsi_util._vscsi_block_scsiid_to_hctl(p_dev, scsi_devices)
+ (p_hctl, devname) = \
+ vscsi_util.vscsi_get_hctl_and_devname_by(p_dev, scsi_devices)
if p_hctl == None:
- raise ValueError("Cannot find device \"%s\"" % p_dev)
-
- for config in config_scsi:
- dev = vscsi_convert_sxp_to_dict(config)
- if dev['v-dev'] == v_dev:
- raise ValueError('The virtual device "%s" is already defined' % v_dev)
-
- v_hctl = v_dev.split(':')
- devid = int(v_hctl[0])
- config_scsi.append(['dev', \
- ['state', 'Initialising'], \
- ['devid', devid], \
- ['p-dev', p_hctl], \
- ['p-devname', block], \
- ['v-dev', v_dev] ])
-
- if vscsi_lookup_devid(devidlist, devid) == 0:
- devidlist.append([devid, backend])
-
- for devid, backend in devidlist:
- tmp = []
- for config in config_scsi:
- dev = vscsi_convert_sxp_to_dict(config)
- if dev['devid'] == devid:
- tmp.append(config)
-
- tmp.insert(0, 'vscsi')
- if backend:
- tmp.append(['backend', backend])
- config_devs.append(['device', tmp])
+ raise ValueError('Cannot find device "%s"' % p_dev)
+
+ feature_host = 0
+ if v_dev == 'host':
+ if serverType == SERVER_XEN_API:
+ # TODO
+ raise ValueError("SCSI devices assignment by HBA is not implemeted")
+ feature_host = 1
+ scsi_info = []
+ devid = get_devid(p_hctl)
+ for (pHCTL, devname, _, _) in scsi_devices:
+ if get_devid(pHCTL) == devid:
+ scsi_info.append([devid, pHCTL, devname, pHCTL])
+ else:
+ scsi_info = [[get_devid(v_dev), p_hctl, devname, v_dev]]
+
+ devid_key = scsi_info[0][0]
+ try:
+ config = config_scsi[devid_key]
+ except KeyError:
+ config = {'feature-host': feature_host, 'backend': backend, 'devs': []}
+
+ devs = config['devs']
+ for (devid, pHCTL, devname, vHCTL) in scsi_info:
+ if pHCTL in pHCTL_list:
+ raise ValueError('The physical device "%s" is already defined' % pHCTL)
+ if vHCTL in vHCTL_list:
+ raise ValueError('The virtual device "%s" is already defined' % vHCTL)
+ pHCTL_list.append(pHCTL)
+ vHCTL_list.append(vHCTL)
+ devs.append(['dev', \
+ ['state', xenbusState['Initialising']], \
+ ['devid', devid], \
+ ['p-dev', pHCTL], \
+ ['p-devname', devname], \
+ ['v-dev', vHCTL] ])
+
+ if config['feature-host'] != feature_host:
+ raise ValueError('The physical device "%s" cannot define '
+ 'because mode is different' % scsi_info[0][1])
+ if config['backend'] != backend:
+ raise ValueError('The physical device "%s" cannot define '
+ 'because backend is different' % scsi_info[0][1])
+
+ config['devs'] = devs
+ config_scsi[devid_key] = config
+
+ for config in config_scsi.values():
+ device = ['vscsi', ['feature-host', config['feature-host']]]
+ for dev in config['devs']:
+ device.append(dev)
+ if config['backend']:
+ device.append(['backend', config['backend']])
+ config_devs.append(['device', device])
def configure_ioports(config_devs, vals):
"""Create the config for legacy i/o ranges.
for f in vals.vfb:
d = comma_sep_kv_to_dict(f)
config = ['vfb']
- if not d.has_key("type"):
- d['type'] = 'sdl'
+ #handle the legacy case
+ if d.has_key("type"):
+ d[d['type']] = '1'
+ del d['type']
for (k,v) in d.iteritems():
if not k in [ 'vnclisten', 'vncunused', 'vncdisplay', 'display',
- 'videoram', 'xauthority', 'type', 'vncpasswd',
+ 'videoram', 'xauthority', 'sdl', 'vnc', 'vncpasswd',
'opengl', 'keymap' ]:
err("configuration option %s unknown to vfbs" % k)
config.append([k,v])
elif num > 1:
err("VM config error: Multiple access_control definitions!")
+def configure_mem_prot(config_image, vals):
+ """Create the config for S3 memory integrity verification under tboot.
+ """
+ config_image.append(['s3_integrity', vals.s3_integrity])
def configure_vtpm(config_devs, vals):
"""Create the config for virtual TPM interfaces.
'sdl', 'display', 'xauthority', 'rtc_timeoffset', 'monitor',
'acpi', 'apic', 'usb', 'usbdevice', 'keymap', 'pci', 'hpet',
'guest_os_type', 'hap', 'opengl', 'cpuid', 'cpuid_check',
- 'viridian', 'xen_extended_power_mgmt' ]
+ 'viridian', 'xen_extended_power_mgmt', 'pci_msitranslate',
+ 'vpt_align', 'pci_power_mgmt', 'xen_platform_pci' ]
for a in args:
if a in vals.__dict__ and vals.__dict__[a] is not None:
else:
config.append(['bootloader_args', '-q'])
config.append(['image', config_image])
+ configure_mem_prot(config, vals);
config_devs = []
configure_disks(config_devs, vals)
pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \
r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \
r"(?P<slot>[0-9a-fA-F]{1,2})[.,]" + \
- r"(?P<func>[0-7])$", pci_dev_str)
+ r"(?P<func>[0-7])" + \
+ r"(@(?P<vslot>[0-9a-fA-F]))?" + \
+ r"(,(?P<opts>.*))?$", \
+ pci_dev_str)
if pci_match!=None:
- pci_dev_info = pci_match.groupdict('0')
+ pci_dev_info = pci_match.groupdict('')
+ if pci_dev_info['domain']=='':
+ pci_dev_info['domain']='0'
+ if pci_dev_info['vslot']=='':
+ pci_dev_info['vslot']="%02x" % AUTO_PHP_SLOT
try:
pci.append( ('0x'+pci_dev_info['domain'], \
'0x'+pci_dev_info['bus'], \
'0x'+pci_dev_info['slot'], \
- '0x'+pci_dev_info['func']))
+ '0x'+pci_dev_info['func'], \
+ '0x'+pci_dev_info['vslot'], \
+ pci_dev_info['opts']))
except IndexError:
err('Error in PCI slot syntax "%s"'%(pci_dev_str))
vals.pci = pci
n = len(d)
if n == 2:
tmp = d[1].split(':')
- if len(tmp) != 4:
+ if d[1] != 'host' and len(tmp) != 4:
err('vscsi syntax error "%s"' % d[1])
else:
d.append(None)
ioports.append(hexd)
vals.ioports = ioports
+def preprocess_irq(vals):
+ if not vals.irq: return
+ irq = []
+ for v in vals.irq:
+ d = repr(v)
+ irq.append(d)
+ vals.irq = irq
+
def preprocess_vtpm(vals):
if not vals.vtpm: return
vtpms = []
preprocess_vscsi(vals)
preprocess_ioports(vals)
preprocess_ip(vals)
+ preprocess_irq(vals)
preprocess_nfs(vals)
preprocess_vtpm(vals)
preprocess_access_control(vals)
except:
server.xend.domain.destroy(dom)
err("Failed to unpause domain %s" % dom)
- opts.info("Started domain %s" % (dom))
- return int(sxp.child_value(dominfo, 'domid'))
+ domid = int(sxp.child_value(dominfo, 'domid'))
+ opts.info("Started domain %s (id=%d)" % (dom, domid))
+ return domid
def get_xauthority():
elif not opts.is_xml:
dom = make_domain(opts, config)
- if opts.vals.vncviewer:
+ if opts.vals.vncconsole:
domid = domain_name_to_domid(sxp.child_value(config, 'name', -1))
vncviewer_autopass = getattr(opts.vals,'vncviewer-autopass', False)
console.runVncViewer(domid, vncviewer_autopass, True)
from xen.xend import sxp
from xen.xend import XendClient
from xen.xend.XendConstants import *
+from xen.xend.server.DevConstants import xenbusState
from xen.xm.opts import OptionError, Opts, wrap, set_true
from xen.xm import console
import XenAPI
import xen.lowlevel.xc
-xc = xen.lowlevel.xc.xc()
+try:
+ xc = xen.lowlevel.xc.xc()
+except Exception, ex:
+ print >>sys.stderr, ("Is xen kernel running?")
+ sys.exit(1)
import inspect
from xen.xend import XendOptions
'vnet-delete' : ('<VnetId>', 'Delete a Vnet.'),
'vnet-list' : ('[-l|--long]', 'List Vnets.'),
'vtpm-list' : ('<Domain> [--long]', 'List virtual TPM devices.'),
- 'pci-attach' : ('<Domain> <domain:bus:slot.func> [virtual slot]',
+ 'pci-attach' : ('[-o|--options=<opt>] <Domain> <domain:bus:slot.func> [virtual slot]',
'Insert a new pass-through pci device.'),
'pci-detach' : ('<Domain> <domain:bus:slot.func>',
'Remove a domain\'s pass-through pci device.'),
(options, params) = getopt.gnu_getopt(args, 'c', ['checkpoint'])
except getopt.GetoptError, opterr:
err(opterr)
- sys.exit(1)
+ usage('save')
checkpoint = False
for (k, v) in options:
else:
filename = None
- if not live:
- ds = server.xend.domain.pause(dom, True)
-
- try:
- print "Dumping core of domain: %s ..." % str(dom)
- server.xend.domain.dump(dom, filename, live, crash)
-
- if crash:
- print "Destroying domain: %s ..." % str(dom)
- server.xend.domain.destroy(dom)
- elif reset:
- print "Resetting domain: %s ..." % str(dom)
- server.xend.domain.reset(dom)
- finally:
- if not live and not crash and not reset and ds == DOM_STATE_RUNNING:
- server.xend.domain.unpause(dom)
+ print "Dumping core of domain: %s ..." % str(dom)
+ server.xend.domain.dump(dom, filename, live, crash, reset)
def xm_rename(args):
arg_check(args, "rename", 2)
'mac' : get_info('mac', str, '??'),
#block-device specific
'ring-ref' : get_info('ring-ref', int, -1),
+ #vscsi specific
+ 'feature-host' : get_info('feature-host', int, -1),
}
def arg_check_for_resource_list(args, name):
print d.name,
print
+def vscsi_sort(devs):
+ def sort_hctl(ds, l):
+ s = []
+ for d1 in ds:
+ for d2 in d1:
+ v_dev = sxp.child_value(d2, 'v-dev')
+ n = int(v_dev.split(':')[l])
+ try:
+ j = s[n]
+ except IndexError:
+ j = []
+ s.extend([ [] for _ in range(len(s), n+1) ])
+ j.append(d2)
+ s[n] = j
+ return s
+
+ for i in range(len(devs)):
+ ds1 = [ devs[i][1][0][1] ]
+ ds1 = sort_hctl(ds1, 3)
+ ds1 = sort_hctl(ds1, 2)
+ ds1 = sort_hctl(ds1, 1)
+ ds2 = []
+ for d in ds1:
+ ds2.extend(d)
+ devs[i][1][0][1] = ds2
+ return devs
+
def vscsi_convert_sxp_to_dict(dev_sxp):
dev_dict = {}
for opt_val in dev_sxp[1:]:
else:
devs = server.xend.domain.getDeviceSxprs(dom, 'vscsi')
+ # Sort devs by virtual HCTL.
+ devs = vscsi_sort(devs)
+
if use_long:
map(PrettyPrint.prettyprint, devs)
else:
hdr = 0
for x in devs:
if hdr == 0:
- print "%-3s %-3s %-5s %-10s %-5s %-10s %-4s" \
- % ('Idx', 'BE', 'state', 'phy-hctl', 'phy', 'vir-hctl', 'devstate')
+ print "%-3s %-3s %-5s %-4s %-10s %-5s %-10s %-4s" \
+ % ('Idx', 'BE', 'state', 'host', 'phy-hctl', 'phy', 'vir-hctl', 'devstate')
hdr = 1
ni = parse_dev_info(x[1])
ni['idx'] = int(x[0])
for dev in x[1][0][1]:
mi = vscsi_convert_sxp_to_dict(dev)
- print "%(idx)-3d %(backend-id)-3d %(state)-5d " % ni,
+ print "%(idx)-3d %(backend-id)-3d %(state)-5d %(feature-host)-4d " % ni,
print "%(p-dev)-10s %(p-devname)-5s %(v-dev)-10s %(frontstate)-4s" % mi
def parse_block_configuration(args):
vif.append(vif_param)
server.xend.domain.device_create(dom, vif)
-def parse_pci_configuration(args, state):
+def parse_pci_configuration(args, state, opts = ''):
dom = args[0]
pci_dev_str = args[1]
if len(args) == 3:
vslt = args[2]
else:
- vslt = '0x0' #chose a free virtual PCI slot
+ vslt = AUTO_PHP_SLOT_STR
pci=['pci']
pci_match = re.match(r"((?P<domain>[0-9a-fA-F]{1,4})[:,])?" + \
r"(?P<bus>[0-9a-fA-F]{1,2})[:,]" + \
if pci_match == None:
raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt))
pci_dev_info = pci_match.groupdict('0')
+
try:
- pci.append(['dev', ['domain', '0x'+ pci_dev_info['domain']], \
+ pci_bdf =['dev', ['domain', '0x'+ pci_dev_info['domain']], \
['bus', '0x'+ pci_dev_info['bus']],
['slot', '0x'+ pci_dev_info['slot']],
['func', '0x'+ pci_dev_info['func']],
- ['vslt', '0x%x' % int(vslt, 16)]])
+ ['vslt', '0x%x' % int(vslt, 16)]]
+ if len(opts) > 0:
+ pci_bdf.append(['opts', opts])
+ pci.append(pci_bdf)
+
except:
raise OptionError("Invalid argument: %s %s" % (pci_dev_str,vslt))
pci.append(['state', state])
return (dom, pci)
def xm_pci_attach(args):
- arg_check(args, 'pci-attach', 2, 3)
- (dom, pci) = parse_pci_configuration(args, 'Initialising')
+ config_pci_opts = []
+ (options, params) = getopt.gnu_getopt(args, 'o:', ['options='])
+ for (k, v) in options:
+ if k in ('-o', '--options'):
+ if len(v.split('=')) != 2:
+ err("Invalid pci attach option: %s" % v)
+ usage('pci-attach')
+ config_pci_opts.append(v.split('='))
+
+ n = len([i for i in params if i != '--'])
+ if n < 2 or n > 3:
+ err("Invalid argument for 'xm pci-attach'")
+ usage('pci-attach')
+
+ (dom, pci) = parse_pci_configuration(params, 'Initialising',
+ config_pci_opts)
if serverType == SERVER_XEN_API:
dpci_record = {
"VM": get_single_vm(dom),
"PPCI": target_ref,
- "hotplug_slot": vslt
+ "hotplug_slot": vslt,
+ "options": dict(config_pci_opts)
}
server.xenapi.DPCI.create(dpci_record)
server.xend.domain.device_configure(dom, pci)
def parse_scsi_configuration(p_scsi, v_hctl, state):
- v = v_hctl.split(':')
- if len(v) != 4:
- raise OptionError("Invalid argument: %s" % v_hctl)
+ def get_devid(hctl):
+ return int(hctl.split(':')[0])
+
+ host_mode = 0
+ scsi_devices = None
if p_scsi is not None:
- (p_hctl, block) = vscsi_util.vscsi_search_hctl_and_block(p_scsi)
- if p_hctl == None:
+ # xm scsi-attach
+ if v_hctl == "host":
+ if serverType == SERVER_XEN_API:
+ # TODO
+ raise OptionError("SCSI devices assignment by HBA is not implemeted")
+ host_mode = 1
+ scsi_devices = vscsi_util.vscsi_get_scsidevices()
+ elif len(v_hctl.split(':')) != 4:
+ raise OptionError("Invalid argument: %s" % v_hctl)
+ (p_hctl, devname) = \
+ vscsi_util.vscsi_get_hctl_and_devname_by(p_scsi, scsi_devices)
+ if p_hctl is None:
raise OptionError("Cannot find device '%s'" % p_scsi)
+ if host_mode:
+ scsi_info = []
+ devid = get_devid(p_hctl)
+ for pHCTL, devname, _, _ in scsi_devices:
+ if get_devid(pHCTL) == devid:
+ scsi_info.append([devid, pHCTL, devname, pHCTL])
+ else:
+ scsi_info = [[get_devid(v_hctl), p_hctl, devname, v_hctl]]
else:
- p_hctl = ''
- block = ''
-
- scsi = ['vscsi']
- scsi.append(['dev', \
- ['state', state], \
- ['devid', int(v[0])], \
- ['p-dev', p_hctl], \
- ['p-devname', block], \
- ['v-dev', v_hctl] \
- ])
-
+ # xm scsi-detach
+ if len(v_hctl.split(':')) != 4:
+ raise OptionError("Invalid argument: %s" % v_hctl)
+ scsi_info = [[get_devid(v_hctl), None, None, v_hctl]]
+
+ scsi = ['vscsi', ['feature-host', host_mode]]
+ for devid, pHCTL, devname, vHCTL in scsi_info:
+ scsi.append(['dev', \
+ ['state', state], \
+ ['devid', devid], \
+ ['p-dev', pHCTL], \
+ ['p-devname', devname], \
+ ['v-dev', vHCTL] \
+ ])
return scsi
def xm_scsi_attach(args):
dom = args[0]
p_scsi = args[1]
v_hctl = args[2]
- scsi = parse_scsi_configuration(p_scsi, v_hctl, 'Initialising')
+ scsi = parse_scsi_configuration(p_scsi, v_hctl, xenbusState['Initialising'])
if serverType == SERVER_XEN_API:
arg_check(args, 'scsi-detach', 2)
dom = args[0]
v_hctl = args[1]
- scsi = parse_scsi_configuration(None, v_hctl, 'Closing')
+ scsi = parse_scsi_configuration(None, v_hctl, xenbusState['Closing'])
if serverType == SERVER_XEN_API:
vm.attributes["is_a_template"].value == 'true',
"auto_power_on":
vm.attributes["auto_power_on"].value == 'true',
+ "s3_integrity":
+ vm.attributes["s3_integrity"].value,
"memory_static_max":
get_child_node_attribute(vm, "memory", "static_max"),
"memory_static_min":
"PPCI":
target_ref,
"hotplug_slot":
- int(pci.attributes["func"].value, 16)
+ int(pci.attributes["func"].value, 16),
+ "options":
+ get_child_nodes_as_dict(pci,
+ "pci_opt", "key", "value")
}
return server.xenapi.DPCI.create(dpci_record)
= str(get_child_by_name(config, "vcpus", 1))
vm.attributes["vcpus_at_startup"] \
= str(get_child_by_name(config, "vcpus", 1))
+ vm.attributes["s3_integrity"] \
+ = str(get_child_by_name(config, "s3_integrity", 0))
sec_data = get_child_by_name(config, "security")
if sec_data:
= get_child_by_name(dev_sxp, "func", "0")
pci.attributes["vslt"] \
= get_child_by_name(dev_sxp, "vslt", "0")
+ for opt in get_child_by_name(dev_sxp, "opts", ""):
+ if len(opt) > 0:
+ pci_opt = document.createElement("pci_opt")
+ pci_opt.attributes["key"] = opt[0]
+ pci_opt.attributes["value"] = opt[1]
+ pci.appendChild(pci_opt)
pcis.append(pci)
'usbdevice',
'hpet',
'timer_mode',
+ 'vpt_align',
'viridian',
'vhpt',
'guest_os_type',
'hap',
+ 'pci_msitranslate',
+ 'pci_power_mgmt',
+ 'xen_platform_pci',
]
platform_configs = []
override XEN_TARGET_ARCH = x86_32
XEN_ROOT = ../..
-CFLAGS :=
+CFLAGS =
include $(XEN_ROOT)/tools/Rules.mk
# Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS += $(call cc-option,$(CC),-nopie,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
CFLAGS += -fno-builtin -msoft-float
all: compile
gc.tar.gz:
- wget http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/$@
+ #wget http://www.hpl.hp.com/personal/Hans_Boehm/gc/gc_source/$@
+ wget $(XEN_EXTFILES_URL)/$@
.PHONY: gc
gc: gc.tar.gz
LIB_OBJS := $(LIB_SRCS:.c=.o)
PIC_OBJS := $(LIB_SRCS:.c=.opic)
-CFLAGS += -Werror -fno-strict-aliasing $(call cc-option,$(CC),-fgnu89-inline,)
+$(call cc-option-add,CFLAGS,CC,-fgnu89-inline)
+CFLAGS += -Werror -fno-strict-aliasing
CFLAGS += -O3
#CFLAGS += -g
-# Get gcc to generate the dependencies for us.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEPS = .*.d
-
MAJOR := 3.0
MINOR := 0
LIB := libxutil.so
$(MAKE) -C $(TPM_EMULATOR_DIR); \
fi \
else \
- echo "*** Unable to build VTPMs. libgmp could not be found."; \
+ echo "=== Unable to build VTPMs. libgmp could not be found."; \
fi
# General compiler flags
CFLAGS = -Werror -g3 -I.
-# For generating dependencies
-CFLAGS += -Wp,-MD,.$(@F).d
-
-DEP_FILES = .*.d
-
# Generic project files
HDRS = $(wildcard *.h)
SRCS = $(wildcard *.c)
$(OBJS): $(SRCS)
--include $(DEP_FILES)
+-include $(DEPS)
BUILD_EMULATOR = y
# General compiler flags
CFLAGS = -Werror -g3 -I.
-# For generating dependencies
-CFLAGS += -Wp,-MD,.$(@F).d
-
-DEP_FILES = .*.d
-
# Generic project files
HDRS = $(wildcard *.h)
SRCS = $(wildcard *.c)
$(OBJS): $(SRCS)
--include $(DEP_FILES)
+-include $(FILES)
# Make sure these are just rules
.PHONY : all build install clean
CFLAGS += -Werror
CFLAGS += $(CFLAGS_libxenctrl) $(CFLAGS_libxenguest) $(CFLAGS_libxenstore)
-# Make gcc generate dependencies.
-CFLAGS += -Wp,-MD,.$(@F).d
-PROG_DEP = .*.d
-
PROGRAMS = xc_restore xc_save readnotes lsevtchn
LDLIBS = $(LDFLAGS_libxenctrl) $(LDFLAGS_libxenguest) $(LDFLAGS_libxenstore)
.PHONY: clean
clean:
$(RM) *.o $(PROGRAMS)
- $(RM) $(PROG_DEP)
+ $(RM) $(DEPS)
--include $(PROG_DEP)
+-include $(DEPS)
#include <xg_private.h>
#include <xc_dom.h> /* gunzip bits */
-#include <xen/libelf.h>
+#include <xen/libelf/libelf.h>
static void print_string_note(const char *prefix, struct elf_binary *elf,
const elf_note *note)
#include <xenguest.h>
static struct suspendinfo {
+ int xc_fd; /* libxc handle */
int xce; /* event channel handle */
int suspend_evtchn;
+ int domid;
+ unsigned int flags;
} si;
/**
!strncmp(ans, "done\n", 5));
}
-static int suspend_evtchn_release(void)
-{
- if (si.suspend_evtchn >= 0) {
- xc_evtchn_unbind(si.xce, si.suspend_evtchn);
- si.suspend_evtchn = -1;
- }
- if (si.xce >= 0) {
- xc_evtchn_close(si.xce);
- si.xce = -1;
- }
-
- return 0;
-}
-
-static int await_suspend(void)
-{
- int rc;
-
- do {
- rc = xc_evtchn_pending(si.xce);
- if (rc < 0) {
- warnx("error polling suspend notification channel: %d", rc);
- return -1;
- }
- } while (rc != si.suspend_evtchn);
-
- /* harmless for one-off suspend */
- if (xc_evtchn_unmask(si.xce, si.suspend_evtchn) < 0)
- warnx("failed to unmask suspend notification channel: %d", rc);
-
- return 0;
-}
-
-static int suspend_evtchn_init(int xc, int domid)
-{
- struct xs_handle *xs;
- char path[128];
- char *portstr;
- unsigned int plen;
- int port;
- int rc;
-
- si.xce = -1;
- si.suspend_evtchn = -1;
-
- xs = xs_daemon_open();
- if (!xs) {
- warnx("failed to get xenstore handle");
- return -1;
- }
- sprintf(path, "/local/domain/%d/device/suspend/event-channel", domid);
- portstr = xs_read(xs, XBT_NULL, path, &plen);
- xs_daemon_close(xs);
-
- if (!portstr || !plen) {
- warnx("could not read suspend event channel");
- return -1;
- }
-
- port = atoi(portstr);
- free(portstr);
-
- si.xce = xc_evtchn_open();
- if (si.xce < 0) {
- warnx("failed to open event channel handle");
- goto cleanup;
- }
-
- si.suspend_evtchn = xc_evtchn_bind_interdomain(si.xce, domid, port);
- if (si.suspend_evtchn < 0) {
- warnx("failed to bind suspend event channel: %d", si.suspend_evtchn);
- goto cleanup;
- }
-
- rc = xc_domain_subscribe_for_suspend(xc, domid, port);
- if (rc < 0) {
- warnx("failed to subscribe to domain: %d", rc);
- goto cleanup;
- }
-
- /* event channel is pending immediately after binding */
- await_suspend();
-
- return 0;
-
- cleanup:
- suspend_evtchn_release();
-
- return -1;
-}
-
/**
* Issue a suspend request to a dedicated event channel in the guest, and
* receive the acknowledgement from the subscribe event channel. */
return 0;
}
- if (await_suspend() < 0) {
+ if (xc_await_suspend(si.xce, si.suspend_evtchn) < 0) {
warnx("suspend failed");
return 0;
}
static int suspend(void)
{
- if (si.suspend_evtchn >= 0)
+ unsigned long sx_state = 0;
+
+ /* Cannot notify guest to shut itself down if it's in ACPI sleep state. */
+ if (si.flags & XCFLAGS_HVM)
+ xc_get_hvm_param(si.xc_fd, si.domid,
+ HVM_PARAM_ACPI_S_STATE, &sx_state);
+
+ if ((sx_state == 0) && (si.suspend_evtchn >= 0))
return evtchn_suspend();
return compat_suspend();
return seg;
}
-
int
main(int argc, char **argv)
{
- unsigned int domid, maxit, max_f, flags;
- int xc_fd, io_fd, ret;
+ unsigned int maxit, max_f;
+ int io_fd, ret, port;
if (argc != 6)
errx(1, "usage: %s iofd domid maxit maxf flags", argv[0]);
- xc_fd = xc_interface_open();
- if (xc_fd < 0)
+ si.xc_fd = xc_interface_open();
+ if (si.xc_fd < 0)
errx(1, "failed to open control interface");
io_fd = atoi(argv[1]);
- domid = atoi(argv[2]);
+ si.domid = atoi(argv[2]);
maxit = atoi(argv[3]);
max_f = atoi(argv[4]);
- flags = atoi(argv[5]);
+ si.flags = atoi(argv[5]);
+
+ si.suspend_evtchn = si.xce = -1;
+
+ si.xce = xc_evtchn_open();
+ if (si.xce < 0)
+ warnx("failed to open event channel handle");
+
+ if (si.xce > 0)
+ {
+ port = xs_suspend_evtchn_port(si.domid);
- if (suspend_evtchn_init(xc_fd, domid) < 0)
- warnx("suspend event channel initialization failed, using slow path");
+ if (port < 0)
+ warnx("faield to get the suspend evtchn port\n");
+ else
+ {
+ si.suspend_evtchn =
+ xc_suspend_evtchn_init(si.xc_fd, si.xce, si.domid, port);
- ret = xc_domain_save(xc_fd, io_fd, domid, maxit, max_f, flags,
- &suspend, !!(flags & XCFLAGS_HVM),
+ if (si.suspend_evtchn < 0)
+ warnx("suspend event channel initialization failed"
+ "using slow path");
+ }
+ }
+ ret = xc_domain_save(si.xc_fd, io_fd, si.domid, maxit, max_f, si.flags,
+ &suspend, !!(si.flags & XCFLAGS_HVM),
&init_qemu_maps, &qemu_flip_buffer);
- suspend_evtchn_release();
+ if (si.suspend_evtchn > 0)
+ xc_suspend_evtchn_release(si.xce, si.suspend_evtchn);
+
+ if (si.xce > 0)
+ xc_evtchn_close(si.xce);
- xc_interface_close(xc_fd);
+ xc_interface_close(si.xc_fd);
return ret;
}
.PHONY: clean
clean:
- rm -f $(BIN)
+ rm -f $(BIN) $(DEPS)
%: %.c Makefile
$(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
xentrace_%: %.c Makefile
$(CC) $(CFLAGS) $< $(LDFLAGS) -o $@
+
+-include $(DEPS)
--- /dev/null
+XEN_ROOT=../..
+include $(XEN_ROOT)/tools/Rules.mk
+
+CFLAGS += -Werror
+CFLAGS += $(CFLAGS_libxenstore)
+LDFLAGS += $(LDFLAGS_libxenstore)
+
+BIN = xenpmd
+
+.PHONY: all
+all: $(BIN)
+
+.PHONY: install
+install: all
+ $(INSTALL_DIR) $(DESTDIR)$(SBINDIR)
+ $(INSTALL_PROG) $(BIN) $(DESTDIR)$(SBINDIR)
+
+.PHONY: clean
+clean:
+ $(RM) -f $(BIN) $(DEPS)
+
+-include $(DEPS)
--- /dev/null
+/*
+ * xenpmd.c
+ *
+ * xen power management daemon - Facilitates power management
+ * functionality within xen guests.
+ *
+ * Copyright (c) 2008 Kamala Narasimhan
+ * Copyright (c) 2008 Citrix Systems, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+/* Xen extended power management support provides HVM guest power management
+ * features beyond S3, S4, S5. For example, it helps expose system level
+ * battery status and battery meter information and in future will be extended
+ * to include more power management support. This extended power management
+ * support is enabled by setting xen_extended_power_mgmt to 1 or 2 in the HVM
+ * config file. When set to 2, non-pass through mode is enabled which heavily
+ * relies on this power management daemon to glean battery information from
+ * dom0 and store it xenstore which would then be queries and used by qemu and
+ * passed to the guest when appropriate battery ports are read/written to.
+ */
+
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <stdlib.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <sys/stat.h>
+#include <xs.h>
+
+/* #define RUN_STANDALONE */
+#define RUN_IN_SIMULATE_MODE
+
+enum BATTERY_INFO_TYPE {
+ BIF,
+ BST
+};
+
+enum BATTERY_PRESENT {
+ NO,
+ YES
+};
+
+enum BATTERY_TECHNOLOGY {
+ NON_RECHARGEABLE,
+ RECHARGEABLE
+};
+
+struct battery_info {
+ enum BATTERY_PRESENT present;
+ unsigned long design_capacity;
+ unsigned long last_full_capacity;
+ enum BATTERY_TECHNOLOGY battery_technology;
+ unsigned long design_voltage;
+ unsigned long design_capacity_warning;
+ unsigned long design_capacity_low;
+ unsigned long capacity_granularity_1;
+ unsigned long capacity_granularity_2;
+ char model_number[32];
+ char serial_number[32];
+ char battery_type[32];
+ char oem_info[32];
+};
+
+struct battery_status {
+ enum BATTERY_PRESENT present;
+ unsigned long state;
+ unsigned long present_rate;
+ unsigned long remaining_capacity;
+ unsigned long present_voltage;
+};
+
+static struct xs_handle *xs;
+
+#ifdef RUN_IN_SIMULATE_MODE
+ #define BATTERY_DIR_PATH "/tmp/battery"
+ #define BATTERY_INFO_FILE_PATH "/tmp/battery/%s/info"
+ #define BATTERY_STATE_FILE_PATH "/tmp/battery/%s/state"
+#else
+ #define BATTERY_DIR_PATH "/proc/acpi/battery"
+ #define BATTERY_INFO_FILE_PATH "/proc/acpi/battery/%s/info"
+ #define BATTERY_STATE_FILE_PATH "/proc/acpi/battery/%s/state"
+#endif
+
+FILE *get_next_battery_file(DIR *battery_dir,
+ enum BATTERY_INFO_TYPE battery_info_type)
+{
+ FILE *file = 0;
+ struct dirent *dir_entries;
+ char file_name[32];
+
+ do
+ {
+ dir_entries = readdir(battery_dir);
+ if ( !dir_entries )
+ return 0;
+ if ( strlen(dir_entries->d_name) < 4 )
+ continue;
+ if ( battery_info_type == BIF )
+ snprintf(file_name, 32, BATTERY_INFO_FILE_PATH,
+ dir_entries->d_name);
+ else
+ snprintf(file_name, 32, BATTERY_STATE_FILE_PATH,
+ dir_entries->d_name);
+ file = fopen(file_name, "r");
+ } while ( !file );
+
+ return file;
+}
+
+void set_attribute_battery_info(char *attrib_name,
+ char *attrib_value,
+ struct battery_info *info)
+{
+ if ( strstr(attrib_name, "present") )
+ {
+ if ( strstr(attrib_value, "yes") )
+ info->present = YES;
+ return;
+ }
+
+ if ( strstr(attrib_name, "design capacity warning") )
+ {
+ info->design_capacity_warning = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "design capacity low") )
+ {
+ info->design_capacity_low = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "design capacity") )
+ {
+ info->design_capacity = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "last full capacity") )
+ {
+ info->last_full_capacity = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "design voltage") )
+ {
+ info->design_voltage = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "capacity granularity 1") )
+ {
+ info->capacity_granularity_1 = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "capacity granularity 2") )
+ {
+ info->capacity_granularity_2 = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "battery technology") )
+ {
+ if ( strncmp(attrib_value, "rechargeable",
+ strlen("rechargeable")) == 0 )
+ info->battery_technology = RECHARGEABLE;
+ else
+ info->battery_technology = NON_RECHARGEABLE;
+ return;
+ }
+
+ if ( strstr(attrib_name, "model number") )
+ {
+ strncpy(info->model_number, attrib_value, 32);
+ return;
+ }
+
+ if ( strstr(attrib_name, "serial number") )
+ {
+ strncpy(info->serial_number, attrib_value, 32);
+ return;
+ }
+
+ if ( strstr(attrib_name, "battery type") )
+ {
+ strncpy(info->battery_type, attrib_value, 32);
+ return;
+ }
+
+ if ( strstr(attrib_name, "OEM info") )
+ {
+ strncpy(info->oem_info, attrib_value, 32);
+ return;
+ }
+
+ return;
+}
+
+void set_attribute_battery_status(char *attrib_name,
+ char *attrib_value,
+ struct battery_status *status)
+{
+ if ( strstr(attrib_name, "charging state") )
+ {
+ /* Check this, below is half baked */
+ if ( strstr(attrib_value, "charged") )
+ status->state = 0;
+ else
+ status->state = 1;
+ return;
+ }
+
+ if ( strstr(attrib_name, "present rate") )
+ {
+ status->present_rate = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "remaining capacity") )
+ {
+ status->remaining_capacity = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "present voltage") )
+ {
+ status->present_voltage = strtoull(attrib_value, NULL, 10);
+ return;
+ }
+
+ if ( strstr(attrib_name, "present") )
+ {
+ if ( strstr(attrib_value, "yes") )
+ status->present = YES;
+ return;
+ }
+}
+
+void parse_battery_info_or_status(char *line_info,
+ enum BATTERY_INFO_TYPE type,
+ void *info_or_status)
+{
+ char attrib_name[128];
+ char attrib_value[64];
+ char *delimiter;
+ unsigned long length;
+
+ length = strlen(line_info);
+ delimiter = (char *) strchr( line_info, ':');
+ if ( (!delimiter) || (delimiter == line_info) ||
+ (delimiter == line_info + length) )
+ return;
+
+ strncpy(attrib_name, line_info, delimiter-line_info);
+ while ( *(delimiter+1) == ' ' )
+ {
+ delimiter++;
+ if ( delimiter+1 == line_info + length)
+ return;
+ }
+ strncpy(attrib_value, delimiter+1,
+ (unsigned long)line_info + length -(unsigned long)delimiter);
+
+ if ( type == BIF )
+ set_attribute_battery_info(attrib_name, attrib_value,
+ (struct battery_info *)info_or_status);
+ else
+ set_attribute_battery_status(attrib_name, attrib_value,
+ (struct battery_status *)info_or_status);
+
+ return;
+}
+
+int get_next_battery_info_or_status(DIR *battery_dir,
+ enum BATTERY_INFO_TYPE type,
+ void *info_or_status)
+{
+ FILE *file;
+ char line_info[256];
+
+ if ( !info_or_status )
+ return 0;
+
+ if (type == BIF)
+ memset(info_or_status, 0, sizeof(struct battery_info));
+ else
+ memset(info_or_status, 0, sizeof(struct battery_status));
+
+ file = get_next_battery_file(battery_dir, type);
+ if ( !file )
+ return 0;
+
+ while ( fgets(line_info, sizeof(line_info), file) != NULL )
+ parse_battery_info_or_status(line_info, type, info_or_status);
+
+ fclose(file);
+ return 1;
+}
+
+#ifdef RUN_STANDALONE
+void print_battery_info(struct battery_info *info)
+{
+ printf("present: %d\n", info->present);
+ printf("design capacity: %d\n", info->design_capacity);
+ printf("last full capacity: %d\n", info->last_full_capacity);
+ printf("battery technology: %d\n", info->battery_technology);
+ printf("design voltage: %d\n", info->design_voltage);
+ printf("design capacity warning:%d\n", info->design_capacity_warning);
+ printf("design capacity low: %d\n", info->design_capacity_low);
+ printf("capacity granularity 1: %d\n", info->capacity_granularity_1);
+ printf("capacity granularity 2: %d\n", info->capacity_granularity_2);
+ printf("model number: %s\n", info->model_number);
+ printf("serial number: %s\n", info->serial_number);
+ printf("battery type: %s\n", info->battery_type);
+ printf("OEM info: %s\n", info->oem_info);
+}
+#endif /*RUN_STANDALONE*/
+
+void write_ulong_lsb_first(char *temp_val, unsigned long val)
+{
+ snprintf(temp_val, 9, "%02x%02x%02x%02x", (unsigned int)val & 0xff,
+ (unsigned int)(val & 0xff00) >> 8, (unsigned int)(val & 0xff0000) >> 16,
+ (unsigned int)(val & 0xff000000) >> 24);
+}
+
+void write_battery_info_to_xenstore(struct battery_info *info)
+{
+ char val[1024], string_info[256];
+
+ xs_mkdir(xs, XBT_NULL, "/pm");
+
+ memset(val, 0, 1024);
+ memset(string_info, 0, 256);
+ /* write 9 dwords (so 9*4) + length of 4 strings + 4 null terminators */
+ snprintf(val, 3, "%02x",
+ (unsigned int)(9*4 +
+ strlen(info->model_number) +
+ strlen(info->serial_number) +
+ strlen(info->battery_type) +
+ strlen(info->oem_info) + 4));
+ write_ulong_lsb_first(val+2, info->present);
+ write_ulong_lsb_first(val+10, info->design_capacity);
+ write_ulong_lsb_first(val+18, info->last_full_capacity);
+ write_ulong_lsb_first(val+26, info->battery_technology);
+ write_ulong_lsb_first(val+34, info->design_voltage);
+ write_ulong_lsb_first(val+42, info->design_capacity_warning);
+ write_ulong_lsb_first(val+50, info->design_capacity_low);
+ write_ulong_lsb_first(val+58, info->capacity_granularity_1);
+ write_ulong_lsb_first(val+66, info->capacity_granularity_2);
+
+ snprintf(string_info, 256, "%02x%s%02x%s%02x%s%02x%s",
+ (unsigned int)strlen(info->model_number), info->model_number,
+ (unsigned int)strlen(info->serial_number), info->serial_number,
+ (unsigned int)strlen(info->battery_type), info->battery_type,
+ (unsigned int)strlen(info->oem_info), info->oem_info);
+ strncat(val+73, string_info, 1024-73-1);
+ xs_write(xs, XBT_NULL, "/pm/bif",
+ val, 73+8+strlen(info->model_number)+strlen(info->serial_number)+
+ strlen(info->battery_type)+strlen(info->oem_info)+1);
+}
+
+int write_one_time_battery_info(void)
+{
+ DIR *dir;
+ int ret = 0;
+ struct battery_info info;
+
+ dir = opendir(BATTERY_DIR_PATH);
+ if ( !dir )
+ return 0;
+
+ while ( get_next_battery_info_or_status(dir, BIF, (void *)&info) )
+ {
+#ifdef RUN_STANDALONE
+ print_battery_info(&info);
+#endif
+ if ( info.present == YES )
+ {
+ write_battery_info_to_xenstore(&info);
+ ret = 1;
+ break; /* rethink this... */
+ }
+ }
+
+ closedir(dir);
+ return ret;
+}
+
+#ifdef RUN_STANDALONE
+void print_battery_status(struct battery_status *status)
+{
+ printf("present: %d\n", status->present);
+ printf("Battery state %d\n", status->state);
+ printf("Battery present rate %d\n", status->present_rate);
+ printf("Battery remining capacity %d\n", status->remaining_capacity);
+ printf("Battery present voltage %d\n", status->present_voltage);
+}
+#endif /*RUN_STANDALONE*/
+
+void write_battery_status_to_xenstore(struct battery_status *status)
+{
+ char val[35];
+
+ xs_mkdir(xs, XBT_NULL, "/pm");
+
+ memset(val, 0, 35);
+ snprintf(val, 3, "%02x", 16);
+ write_ulong_lsb_first(val+2, status->state);
+ write_ulong_lsb_first(val+10, status->present_rate);
+ write_ulong_lsb_first(val+18, status->remaining_capacity);
+ write_ulong_lsb_first(val+26, status->present_voltage);
+
+ xs_write(xs, XBT_NULL, "/pm/bst", val, 35);
+}
+
+int wait_for_and_update_battery_status_request(void)
+{
+ DIR *dir;
+ int ret = 0;
+ unsigned int count;
+ struct battery_status status;
+
+ while ( true )
+ {
+ /* KN:@TODO - It is rather inefficient to not cache the file handle.
+ * Switch to caching file handle.
+ */
+ dir = opendir(BATTERY_DIR_PATH);
+ if ( !dir )
+ return 0;
+
+ while ( get_next_battery_info_or_status(dir, BST, (void *)&status) )
+ {
+#ifdef RUN_STANDALONE
+ print_battery_status(&status);
+#endif
+ if ( status.present == YES )
+ {
+ write_battery_status_to_xenstore(&status);
+ ret = 1;
+ /* rethink this; though I have never seen, there might be
+ * systems out there with more than one battery device
+ * present
+ */
+ break;
+ }
+ }
+ closedir(dir);
+ xs_watch(xs, "/pm/events", "refreshbatterystatus");
+ xs_read_watch(xs, &count);
+ }
+
+ return ret;
+}
+
+/* Borrowed daemonize from xenstored - Initially written by Stevens. */
+static void daemonize(void)
+{
+ pid_t pid;
+
+ if ( (pid = fork()) < 0 )
+ exit(1);
+
+ if ( pid != 0 )
+ exit(0);
+
+ setsid();
+
+ if ( (pid = fork()) < 0 )
+ exit(1);
+
+ if ( pid != 0 )
+ exit(0);
+
+ if ( chdir("/") == -1 )
+ exit(1);
+
+ umask(0);
+}
+
+int main(int argc, char *argv[])
+{
+#ifndef RUN_STANDALONE
+ daemonize();
+#endif
+ xs = (struct xs_handle *)xs_daemon_open();
+ if ( xs == NULL )
+ return -1;
+
+ if ( write_one_time_battery_info() == 0 )
+ {
+ xs_daemon_close(xs);
+ return -1;
+ }
+
+ wait_for_and_update_battery_status_request();
+ xs_daemon_close(xs);
+ return 0;
+}
+
.PHONY: clean
clean:
rm -f $(LIB) $(SHLIB) $(SHLIB_LINKS) $(OBJECTS-y) \
- $(BINDINGS) $(BINDINGSRC)
+ $(BINDINGS) $(BINDINGSRC) $(DEPS)
+
+-include $(DEPS)
struct dirent *dp;
struct priv_data *priv = get_priv_data(node->handle);
- char *sys_prefix = "statistics/";
-
- /* 23 = "statistics/" + "xxxx_xx_req" */
- char ooreq[23], rdreq[23], wrreq[23];
- char *stat_prefix = NULL;
-
if (priv == NULL) {
perror("Allocation error");
return 0;
if (ret != 3)
continue;
-
- if (strcmp(buf,"vbd") == 0){
- stat_prefix = "";
+ if (strcmp(buf,"vbd") == 0)
vbd.back_type = 1;
- } else if (strcmp(buf,"tap") == 0){
- stat_prefix = "tap_";
+ else if (strcmp(buf,"tap") == 0)
vbd.back_type = 2;
- } else {
+ else
continue;
- }
domain = xenstat_node_domain(node, domid);
if (domain == NULL) {
continue;
}
- snprintf(ooreq, sizeof(ooreq), "%s%soo_req", sys_prefix, stat_prefix);
- if((read_attributes_vbd(dp->d_name, ooreq, buf, 256)<=0)
+ if((read_attributes_vbd(dp->d_name, "statistics/oo_req", buf, 256)<=0)
|| ((ret = sscanf(buf, "%llu", &vbd.oo_reqs)) != 1))
{
continue;
}
- snprintf(rdreq, sizeof(rdreq),"%s%srd_req", sys_prefix, stat_prefix);
- if((read_attributes_vbd(dp->d_name, rdreq, buf, 256)<=0)
+ if((read_attributes_vbd(dp->d_name, "statistics/rd_req", buf, 256)<=0)
|| ((ret = sscanf(buf, "%llu", &vbd.rd_reqs)) != 1))
{
continue;
}
- snprintf(wrreq, sizeof(wrreq),"%s%swr_req", sys_prefix, stat_prefix);
- if((read_attributes_vbd(dp->d_name, wrreq, buf, 256)<=0)
+ if((read_attributes_vbd(dp->d_name, "statistics/wr_req", buf, 256)<=0)
|| ((ret = sscanf(buf, "%llu", &vbd.wr_reqs)) != 1))
{
continue;
.PHONY: clean
clean:
- rm -f xentop xentop.o
+ rm -f xentop xentop.o $(DEPS)
+
+-include $(DEPS)
{
if(cwin != NULL && !isendwin())
endwin();
- fprintf(stderr, str);
+ fprintf(stderr, "%s", str);
exit(1);
}
CFLAGS += -I.
CFLAGS += $(CFLAGS_libxenctrl)
-# Make gcc generate dependencies.
-CFLAGS += -Wp,-MD,.$(@F).d
-DEP = .*.d
-
CLIENTS := xenstore-exists xenstore-list xenstore-read xenstore-rm xenstore-chmod
CLIENTS += xenstore-write xenstore-ls
rm -f xenstored xs_random xs_stress xs_crashme
rm -f xs_tdb_dump xenstore-control
rm -f xenstore $(CLIENTS)
- $(RM) $(DEP)
+ $(RM) $(DEPS)
.PHONY: TAGS
TAGS:
$(INSTALL_DIR) $(DESTDIR)$(INCLUDEDIR)
$(INSTALL_PROG) xenstored $(DESTDIR)$(SBINDIR)
$(INSTALL_PROG) xenstore-control $(DESTDIR)$(BINDIR)
- $(INSTALL_PROG) xenstore $(DESTDIR)/usr/bin
+ $(INSTALL_PROG) xenstore $(DESTDIR)$(BINDIR)
set -e ; for c in $(CLIENTS) ; do \
- ln -f $(DESTDIR)/usr/bin/xenstore $(DESTDIR)/usr/bin/$${c} ; \
+ ln -f $(DESTDIR)$(BINDIR)/xenstore $(DESTDIR)$(BINDIR)/$${c} ; \
done
$(INSTALL_DIR) $(DESTDIR)$(LIBDIR)
$(INSTALL_PROG) libxenstore.so.$(MAJOR).$(MINOR) $(DESTDIR)$(LIBDIR)
$(INSTALL_DATA) xs.h $(DESTDIR)$(INCLUDEDIR)
$(INSTALL_DATA) xs_lib.h $(DESTDIR)$(INCLUDEDIR)
--include $(DEP)
+-include $(DEPS)
# never delete any intermediate files.
.SECONDARY:
handle_event();
next = list_entry(connections.next, typeof(*conn), list);
+ if (&next->list != &connections)
+ talloc_increase_ref_count(next);
while (&next->list != &connections) {
conn = next;
next = list_entry(conn->list.next,
typeof(*conn), list);
+ if (&next->list != &connections)
+ talloc_increase_ref_count(next);
if (conn->domain) {
- talloc_increase_ref_count(conn);
if (domain_can_read(conn))
handle_input(conn);
if (talloc_free(conn) == 0)
if (talloc_free(conn) == 0)
continue;
} else {
- talloc_increase_ref_count(conn);
if (FD_ISSET(conn->fd, &inset))
handle_input(conn);
if (talloc_free(conn) == 0)
return rc;
}
+int xs_suspend_evtchn_port(int domid)
+{
+ char path[128];
+ char *portstr;
+ int port;
+ unsigned int plen;
+ struct xs_handle *xs;
+
+ xs = xs_daemon_open();
+ if (!xs)
+ return -1;
+
+ sprintf(path, "/local/domain/%d/device/suspend/event-channel", domid);
+ portstr = xs_read(xs, XBT_NULL, path, &plen);
+ xs_daemon_close(xs);
+
+ if (!portstr || !plen)
+ return -1;
+
+ port = atoi(portstr);
+ free(portstr);
+
+ return port;
+}
+
/* Only useful for DEBUG versions */
char *xs_debug_command(struct xs_handle *h, const char *cmd,
void *data, unsigned int len)
char *xs_debug_command(struct xs_handle *h, const char *cmd,
void *data, unsigned int len);
+int xs_suspend_evtchn_port(int domid);
#endif /* _XS_H */
/*
.PHONY: clean
clean:
- $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN)
+ $(RM) *.a *.so *.o *.rpm $(BIN) $(LIBBIN) $(DEPS)
%: %.c $(HDRS) Makefile
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
xentrace_%: %.c $(HDRS) Makefile
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
+
+-include $(DEPS)
+
0x0040f10f CPU%(cpu)d %(tsc)d (+%(reltsc)8d) shadow_emulate_resync_only [ gfn = 0x%(1)16x ]
0x00801001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) cpu_freq_change [ %(1)dMHz -> %(2)dMHz ]
-0x00802001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) cpu_idle_entry [ C0 -> C%(1)d ]
-0x00802002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) cpu_idle_exit [ C%(1)d -> C0 ]
+0x00802001 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) cpu_idle_entry [ C0 -> C%(1)d, acpi_pm_tick = %(2)d ]
+0x00802002 CPU%(cpu)d %(tsc)d (+%(reltsc)8d) cpu_idle_exit [ C%(1)d -> C0, acpi_pm_tick = %(2)d ]
#include <getopt.h>
#include "xenctrl.h"
+#include <xen/foreign/x86_32.h>
+#include <xen/foreign/x86_64.h>
+#include <xen/hvm/save.h>
int xc_handle = 0;
int domid = 0;
int stack_trace = 0;
int disp_all = 0;
-#if defined (__i386__)
-#if defined (__OpenBSD__)
-#define FMT_SIZE_T "%08lx"
-#define INSTR_POINTER(regs) (unsigned long)(regs->eip)
-#else
-#define FMT_SIZE_T "%08x"
-#define INSTR_POINTER(regs) (regs->eip)
-#endif
-#define STACK_POINTER(regs) (regs->esp)
-#define FRAME_POINTER(regs) (regs->ebp)
-#define STACK_ROWS 4
-#define STACK_COLS 8
-#elif defined (__x86_64__)
-#define FMT_SIZE_T "%016lx"
-#define STACK_POINTER(regs) (regs->rsp)
-#define FRAME_POINTER(regs) (regs->rbp)
-#define INSTR_POINTER(regs) (regs->rip)
-#define STACK_ROWS 4
-#define STACK_COLS 4
+#if defined (__i386__) || defined (__x86_64__)
+typedef unsigned long long guest_word_t;
+#define FMT_32B_WORD "%08llx"
+#define FMT_64B_WORD "%016llx"
+/* Word-length of the guest's own data structures */
+int guest_word_size = sizeof (unsigned long);
+/* Word-length of the context record we get from xen */
+int ctxt_word_size = sizeof (unsigned long);
#elif defined (__ia64__)
/* On ia64, we can't translate virtual address to physical address. */
#define NO_TRANSLATION
+typedef size_t guest_word_t;
/* Which registers should be displayed. */
int disp_cr_regs;
#endif
struct symbol {
- size_t address;
+ guest_word_t address;
char type;
char *name;
struct symbol *next;
} *symbol_table = NULL;
-size_t kernel_stext, kernel_etext, kernel_sinittext, kernel_einittext, kernel_hypercallpage;
+guest_word_t kernel_stext, kernel_etext, kernel_sinittext, kernel_einittext, kernel_hypercallpage;
-static int is_kernel_text(size_t addr)
+static int is_kernel_text(guest_word_t addr)
{
-#if defined (__i386__)
+#if defined (__i386__) || defined (__x86_64__)
if (symbol_table == NULL)
- return (addr > 0xc000000);
-#elif defined (__x86_64__)
- if (symbol_table == NULL)
- return (addr > 0xffffffff80000000UL);
+ return (addr > ((guest_word_size == 4) ? 0xc000000 : 0xffffffff80000000ULL));
#elif defined (__ia64__)
if (symbol_table == NULL)
return (addr > 0xa000000000000000UL);
prev = symbol;
}
-static struct symbol *lookup_symbol(size_t address)
+static struct symbol *lookup_symbol(guest_word_t address)
{
struct symbol *s = symbol_table;
return NULL;
}
-static void print_symbol(size_t addr)
+static void print_symbol(guest_word_t addr)
{
struct symbol *s;
printf("\n");
}
-static void print_special(unsigned long *regs, const char *name, unsigned int mask)
+static void print_special(void *regs, const char *name, unsigned int mask, int width)
{
unsigned int i;
printf("\n");
for (i = 0; mask; mask >>= 1, ++i)
- if (mask & 1)
- printf("%s%u: " FMT_SIZE_T "\n", name, i, (size_t)regs[i]);
+ if (mask & 1) {
+ if (width == 4)
+ printf("%s%u: %08"PRIx32"\n", name, i, ((uint32_t *) regs)[i]);
+ else
+ printf("%s%u: %08"PRIx64"\n", name, i, ((uint64_t *) regs)[i]);
+ }
}
-#endif
-#ifdef __i386__
-static void print_ctx(vcpu_guest_context_t *ctx1)
+static void print_ctx_32(vcpu_guest_context_x86_32_t *ctx)
{
- struct cpu_user_regs *regs = &ctx1->user_regs;
+ struct cpu_user_regs_x86_32 *regs = &ctx->user_regs;
printf("cs:eip: %04x:%08x ", regs->cs, regs->eip);
print_symbol(regs->eip);
printf(" gs: %04x\n", regs->gs);
if (disp_all) {
- print_special(ctx1->ctrlreg, "cr", 0x1d);
- print_special(ctx1->debugreg, "dr", 0xcf);
+ print_special(ctx->ctrlreg, "cr", 0x1d, 4);
+ print_special(ctx->debugreg, "dr", 0xcf, 4);
+ }
+}
+
+static void print_ctx_32on64(vcpu_guest_context_x86_64_t *ctx)
+{
+ struct cpu_user_regs_x86_64 *regs = &ctx->user_regs;
+
+ printf("cs:eip: %04x:%08x ", regs->cs, (uint32_t)regs->eip);
+ print_symbol((uint32_t)regs->eip);
+ print_flags((uint32_t)regs->eflags);
+ printf("ss:esp: %04x:%08x\n", regs->ss, (uint32_t)regs->esp);
+
+ printf("eax: %08x\t", (uint32_t)regs->eax);
+ printf("ebx: %08x\t", (uint32_t)regs->ebx);
+ printf("ecx: %08x\t", (uint32_t)regs->ecx);
+ printf("edx: %08x\n", (uint32_t)regs->edx);
+
+ printf("esi: %08x\t", (uint32_t)regs->esi);
+ printf("edi: %08x\t", (uint32_t)regs->edi);
+ printf("ebp: %08x\n", (uint32_t)regs->ebp);
+
+ printf(" ds: %04x\t", regs->ds);
+ printf(" es: %04x\t", regs->es);
+ printf(" fs: %04x\t", regs->fs);
+ printf(" gs: %04x\n", regs->gs);
+
+ if (disp_all) {
+ print_special(ctx->ctrlreg, "cr", 0x1d, 4);
+ print_special(ctx->debugreg, "dr", 0xcf, 4);
}
}
-#elif defined(__x86_64__)
-static void print_ctx(vcpu_guest_context_t *ctx1)
+
+static void print_ctx_64(vcpu_guest_context_x86_64_t *ctx)
{
- struct cpu_user_regs *regs = &ctx1->user_regs;
+ struct cpu_user_regs_x86_64 *regs = &ctx->user_regs;
- printf("rip: %016lx ", regs->rip);
+ printf("rip: %016"PRIx64" ", regs->rip);
print_symbol(regs->rip);
print_flags(regs->rflags);
- printf("rsp: %016lx\n", regs->rsp);
+ printf("rsp: %016"PRIx64"\n", regs->rsp);
- printf("rax: %016lx\t", regs->rax);
- printf("rcx: %016lx\t", regs->rcx);
- printf("rdx: %016lx\n", regs->rdx);
+ printf("rax: %016"PRIx64"\t", regs->rax);
+ printf("rcx: %016"PRIx64"\t", regs->rcx);
+ printf("rdx: %016"PRIx64"\n", regs->rdx);
- printf("rbx: %016lx\t", regs->rbx);
- printf("rsi: %016lx\t", regs->rsi);
- printf("rdi: %016lx\n", regs->rdi);
+ printf("rbx: %016"PRIx64"\t", regs->rbx);
+ printf("rsi: %016"PRIx64"\t", regs->rsi);
+ printf("rdi: %016"PRIx64"\n", regs->rdi);
- printf("rbp: %016lx\t", regs->rbp);
- printf(" r8: %016lx\t", regs->r8);
- printf(" r9: %016lx\n", regs->r9);
+ printf("rbp: %016"PRIx64"\t", regs->rbp);
+ printf(" r8: %016"PRIx64"\t", regs->r8);
+ printf(" r9: %016"PRIx64"\n", regs->r9);
- printf("r10: %016lx\t", regs->r10);
- printf("r11: %016lx\t", regs->r11);
- printf("r12: %016lx\n", regs->r12);
+ printf("r10: %016"PRIx64"\t", regs->r10);
+ printf("r11: %016"PRIx64"\t", regs->r11);
+ printf("r12: %016"PRIx64"\n", regs->r12);
- printf("r13: %016lx\t", regs->r13);
- printf("r14: %016lx\t", regs->r14);
- printf("r15: %016lx\n", regs->r15);
+ printf("r13: %016"PRIx64"\t", regs->r13);
+ printf("r14: %016"PRIx64"\t", regs->r14);
+ printf("r15: %016"PRIx64"\n", regs->r15);
printf(" cs: %04x\t", regs->cs);
printf(" ss: %04x\t", regs->ss);
printf(" ds: %04x\t", regs->ds);
printf(" es: %04x\n", regs->es);
- printf(" fs: %04x @ %016lx\n", regs->fs, ctx1->fs_base);
- printf(" gs: %04x @ %016lx/%016lx\n", regs->gs,
- ctx1->gs_base_kernel, ctx1->gs_base_user);
+ printf(" fs: %04x @ %016"PRIx64"\n", regs->fs, ctx->fs_base);
+ printf(" gs: %04x @ %016"PRIx64"/%016"PRIx64"\n", regs->gs,
+ ctx->gs_base_kernel, ctx->gs_base_user);
if (disp_all) {
- print_special(ctx1->ctrlreg, "cr", 0x1d);
- print_special(ctx1->debugreg, "dr", 0xcf);
+ print_special(ctx->ctrlreg, "cr", 0x1d, 8);
+ print_special(ctx->debugreg, "dr", 0xcf, 8);
}
}
+
+static void print_ctx(vcpu_guest_context_any_t *ctx)
+{
+ if (ctxt_word_size == 4)
+ print_ctx_32(&ctx->x32);
+ else if (guest_word_size == 4)
+ print_ctx_32on64(&ctx->x64);
+ else
+ print_ctx_64(&ctx->x64);
+}
+
+static guest_word_t instr_pointer(vcpu_guest_context_any_t *ctx)
+{
+ if (ctxt_word_size == 4)
+ return ctx->x32.user_regs.eip;
+ else
+ return ctx->x64.user_regs.rip;
+}
+
+static guest_word_t stack_pointer(vcpu_guest_context_any_t *ctx)
+{
+ if (ctxt_word_size == 4)
+ return ctx->x32.user_regs.esp;
+ else
+ return ctx->x64.user_regs.rsp;
+}
+
+static guest_word_t frame_pointer(vcpu_guest_context_any_t *ctx)
+{
+ if (ctxt_word_size == 4)
+ return ctx->x32.user_regs.ebp;
+ else
+ return ctx->x64.user_regs.rbp;
+}
+
#elif defined(__ia64__)
#define PTE_ED_SHIFT 52
tr->itir >> ITIR_KEY_SHIFT & ITIR_KEY_MASK);
}
-void print_ctx(vcpu_guest_context_t *ctx)
+void print_ctx(vcpu_guest_context_any_t *ctx)
{
- struct vcpu_guest_context_regs *regs = &ctx->regs;
- struct vcpu_tr_regs *tr = &ctx->regs.tr;
+ struct vcpu_guest_context_regs *regs = &ctx->c.regs;
+ struct vcpu_tr_regs *tr = &ctx->c.regs.tr;
int i;
unsigned int rbs_size, cfm_sof;
printf(" cmcv: %016lx\n", regs->cr.cmcv);
printf(" lrr0: %016lx ", regs->cr.lrr0);
printf(" lrr1: %016lx ", regs->cr.lrr1);
- printf(" ev_cb:%016lx\n", ctx->event_callback_ip);
+ printf(" ev_cb:%016lx\n", ctx->c.event_callback_ip);
}
if (disp_ar_regs) {
#endif
#ifndef NO_TRANSLATION
-static void *map_page(vcpu_guest_context_t *ctx, int vcpu, size_t virt)
+static void *map_page(vcpu_guest_context_any_t *ctx, int vcpu, guest_word_t virt)
{
static unsigned long previous_mfn = 0;
static void *mapped = NULL;
return (void *)(mapped + offset);
}
-static void print_stack(vcpu_guest_context_t *ctx, int vcpu)
+static guest_word_t read_stack_word(guest_word_t *src, int width)
{
- struct cpu_user_regs *regs = &ctx->user_regs;
- size_t stack = STACK_POINTER(regs);
- size_t stack_limit = (STACK_POINTER(regs) & XC_PAGE_MASK) + XC_PAGE_SIZE;
- size_t frame;
- size_t instr;
- size_t *p;
+ guest_word_t word = 0;
+ /* Little-endian only */
+ memcpy(&word, src, width);
+ return word;
+}
+
+static void print_stack_word(guest_word_t word, int width)
+{
+ if (width == 4)
+ printf(FMT_32B_WORD, word);
+ else
+ printf(FMT_64B_WORD, word);
+}
+
+static void print_stack(vcpu_guest_context_any_t *ctx, int vcpu, int width)
+{
+ guest_word_t stack = stack_pointer(ctx);
+ guest_word_t stack_limit;
+ guest_word_t frame;
+ guest_word_t instr;
+ guest_word_t word;
+ guest_word_t *p;
int i;
+ stack_limit = ((stack_pointer(ctx) + XC_PAGE_SIZE)
+ & ~((guest_word_t) XC_PAGE_SIZE - 1));
printf("\n");
printf("Stack:\n");
- for (i=1; i<STACK_ROWS+1 && stack < stack_limit; i++) {
- while(stack < stack_limit && stack < STACK_POINTER(regs) + i*STACK_COLS*sizeof(stack)) {
+ for (i=1; i<5 && stack < stack_limit; i++) {
+ while(stack < stack_limit && stack < stack_pointer(ctx) + i*32) {
p = map_page(ctx, vcpu, stack);
- printf(" " FMT_SIZE_T, *p);
- stack += sizeof(stack);
+ word = read_stack_word(p, width);
+ printf(" ");
+ print_stack_word(word, width);
+ stack += width;
}
printf("\n");
}
printf("\n");
printf("Code:\n");
- instr = INSTR_POINTER(regs) - 21;
+ instr = instr_pointer(ctx) - 21;
for(i=0; i<32; i++) {
unsigned char *c = map_page(ctx, vcpu, instr+i);
- if (instr+i == INSTR_POINTER(regs))
+ if (instr+i == instr_pointer(ctx))
printf("<%02x> ", *c);
else
printf("%02x ", *c);
printf("Stack Trace:\n");
else
printf("Call Trace:\n");
- printf("%c [<" FMT_SIZE_T ">] ",
- stack_trace ? '*' : ' ', INSTR_POINTER(regs));
+ printf("%c [<", stack_trace ? '*' : ' ');
+ print_stack_word(instr_pointer(ctx), width);
+ printf(">] ");
- print_symbol(INSTR_POINTER(regs));
+ print_symbol(instr_pointer(ctx));
printf(" <--\n");
if (frame_ptrs) {
- stack = STACK_POINTER(regs);
- frame = FRAME_POINTER(regs);
+ stack = stack_pointer(ctx);
+ frame = frame_pointer(ctx);
while(frame && stack < stack_limit) {
if (stack_trace) {
while (stack < frame) {
p = map_page(ctx, vcpu, stack);
- printf("| " FMT_SIZE_T " ", *p);
- printf("\n");
- stack += sizeof(*p);
+ printf("| ");
+ print_stack_word(read_stack_word(p, width), width);
+ printf(" \n");
+ stack += width;
}
} else {
stack = frame;
}
p = map_page(ctx, vcpu, stack);
- frame = *p;
- if (stack_trace)
- printf("|-- " FMT_SIZE_T "\n", *p);
- stack += sizeof(*p);
+ frame = read_stack_word(p, width);
+ if (stack_trace) {
+ printf("|-- ");
+ print_stack_word(read_stack_word(p, width), width);
+ printf("\n");
+ }
+ stack += width;
if (frame) {
p = map_page(ctx, vcpu, stack);
- printf("%c [<" FMT_SIZE_T ">] ", stack_trace ? '|' : ' ', *p);
- print_symbol(*p);
+ word = read_stack_word(p, width);
+ printf("%c [<", stack_trace ? '|' : ' ');
+ print_stack_word(word, width);
+ printf(">] ");
+ print_symbol(word);
printf("\n");
- stack += sizeof(*p);
+ stack += width;
}
}
} else {
- stack = STACK_POINTER(regs);
+ stack = stack_pointer(ctx);
while(stack < stack_limit) {
p = map_page(ctx, vcpu, stack);
- if (is_kernel_text(*p)) {
- printf(" [<" FMT_SIZE_T ">] ", *p);
- print_symbol(*p);
+ word = read_stack_word(p, width);
+ if (is_kernel_text(word)) {
+ printf(" [<");
+ print_stack_word(word, width);
+ printf(">] ");
+ print_symbol(word);
printf("\n");
} else if (stack_trace) {
- printf(" " FMT_SIZE_T "\n", *p);
+ printf(" ");
+ print_stack_word(word, width);
+ printf("\n");
}
- stack += sizeof(*p);
+ stack += width;
}
}
}
exit(-1);
}
- print_ctx(&ctx.c);
+#if defined(__i386__) || defined(__x86_64__)
+ {
+ if (dominfo.hvm) {
+ struct hvm_hw_cpu cpuctx;
+ xen_capabilities_info_t xen_caps = "";
+ if (xc_domain_hvm_getcontext_partial(
+ xc_handle, domid, HVM_SAVE_CODE(CPU),
+ vcpu, &cpuctx, sizeof cpuctx) != 0) {
+ perror("xc_domain_hvm_getcontext_partial");
+ exit(-1);
+ }
+ guest_word_size = (cpuctx.msr_efer & 0x400) ? 8 : 4;
+ /* HVM guest context records are always host-sized */
+ if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) {
+ perror("xc_version");
+ exit(-1);
+ }
+ ctxt_word_size = (strstr(xen_caps, "xen-3.0-x86_64")) ? 8 : 4;
+ } else {
+ struct xen_domctl domctl;
+ memset(&domctl, 0, sizeof domctl);
+ domctl.domain = domid;
+ domctl.cmd = XEN_DOMCTL_get_address_size;
+ if (xc_domctl(xc_handle, &domctl) == 0)
+ ctxt_word_size = guest_word_size = domctl.u.address_size.size / 8;
+ }
+ }
+#endif
+
+ print_ctx(&ctx);
#ifndef NO_TRANSLATION
- if (is_kernel_text(INSTR_POINTER((&ctx.c.user_regs))))
- print_stack(&ctx.c, vcpu);
+ if (is_kernel_text(instr_pointer(&ctx)))
+ print_stack(&ctx, vcpu, guest_word_size);
#endif
if (!dominfo.paused) {
int main(int argc, char **argv)
{
int ch;
- static const char *sopts = "fs:h"
+ static const char *sopts = "fs:ha"
#ifdef __ia64__
- "ar:"
+ "r:"
#endif
;
static const struct option lopts[] = {
interrupted = 0
-defs = read_defs(arg[0])
+try:
+ defs = read_defs(arg[0])
+except IOError, exn:
+ print exn
+ sys.exit(1)
# structure of trace record (as output by xentrace):
# HDR(I) {TSC(Q)} D1(I) D2(I) D3(I) D4(I) D5(I) D6(I) D7(I)
while val[0] != 'network-script':
val = pin.get_val()
- if val[1] == "network-bridge":
+ # split network command into script name and its parameters
+ sub_val = val[1].split()
+ if sub_val[0] == "network-bridge":
netenv = "bridge"
- elif val[1] == "network-route":
+ elif sub_val[0] == "network-route":
netenv = "route"
- elif val[1] == "network-nat":
+ elif sub_val[0] == "network-nat":
netenv = "nat"
else:
raise NetworkError("Failed to get network env from xend config")
obj-m += balloon/
obj-m += blkfront/
obj-m += netfront/
+obj-m += scsifront/
EXTRA_CFLAGS += -I$(M)/platform-pci
-xen-balloon-objs =
-xen-balloon-objs += balloon.o
-xen-balloon-objs += sysfs.o
+xen-balloon-y := balloon.o sysfs.o
+xen-balloon-$(CONFIG_XEN_SCRUB_PAGES) += scrub.o
--- /dev/null
+#ifndef _LINUX_SCATTERLIST_H
+#define _LINUX_SCATTERLIST_H
+
+#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12)
+#error "This version of Linux should not need compat linux/scatterlist.h"
+#endif
+
+#include <asm/scatterlist.h>
+
+#endif /* _LINUX_SCATTERLIST_H */
* RHEL_VERSION
*/
#if !defined(RHEL_VERSION) || (RHEL_VERSION == 4 && RHEL_UPDATE < 5)
+#if !defined(RHEL_MAJOR) || (RHEL_MAJOR == 4 && RHEL_MINOR < 5)
typedef irqreturn_t (*irq_handler_t)(int, void *, struct pt_regs *);
#endif
#endif
+#endif
#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)
#define setup_xen_features xen_setup_features
ln -sf ${XL}/include/asm-x86/mach-xen/asm/synch_bitops*.h include/asm
ln -sf ${XL}/include/asm-x86/mach-xen/asm/maddr*.h include/asm
ln -sf ${XL}/include/asm-x86/mach-xen/asm/gnttab_dma.h include/asm
+ ln -sf ${XL}/arch/x86/lib/scrub.c balloon
else
if [ $uname = x86_64 ]; then
mkdir -p include/asm-i386
EXTRA_CFLAGS += $(_XEN_CPPFLAGS)
EXTRA_AFLAGS += $(_XEN_CPPFLAGS)
+CPPFLAGS := -I$(M)/include $(CPPFLAGS)
atomic_t nr_spinning;
};
-/*
- * Use a rwlock to protect the hypercall page from being executed in AP context
- * while the BSP is re-initializing it after restore.
- */
-static DEFINE_RWLOCK(suspend_lock);
-
#ifdef CONFIG_SMP
/*
atomic_inc(&info->nr_spinning);
mb();
- while (info->do_spin) {
+ while (info->do_spin)
cpu_relax();
- read_lock(&suspend_lock);
- HYPERVISOR_yield();
- read_unlock(&suspend_lock);
- }
mb();
atomic_dec(&info->nr_spinning);
}
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
#define initiate_ap_suspend(i) smp_call_function(ap_suspend, i, 0, 0)
+#else
+#define initiate_ap_suspend(i) smp_call_function(ap_suspend, i, 0)
+#endif
#else /* !defined(CONFIG_SMP) */
suspend_cancelled = HYPERVISOR_suspend(0);
if (!suspend_cancelled) {
- write_lock(&suspend_lock);
platform_pci_resume();
- write_unlock(&suspend_lock);
gnttab_resume();
irq_resume();
}
void ctrl_alt_del(void)
{
+#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27)
kill_proc(1, SIGINT, 1); /* interrupt init */
+#else
+ kill_cad_pid(SIGINT, 1);
+#endif
}
#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8)
--- /dev/null
+include $(M)/overrides.mk
+
+obj-m += xen-scsi.o
+
+xen-scsi-objs := scsifront.o xenbus.o
+
--- /dev/null
+ifneq ($(KERNELRELEASE),)
+include $(src)/Kbuild
+endif
$(MAKE) -f $(BASEDIR)/Rules.mk -C common clean
$(MAKE) -f $(BASEDIR)/Rules.mk -C drivers clean
$(MAKE) -f $(BASEDIR)/Rules.mk -C xsm clean
+ $(MAKE) -f $(BASEDIR)/Rules.mk -C crypto clean
$(MAKE) -f $(BASEDIR)/Rules.mk -C arch/$(TARGET_ARCH) clean
rm -f include/asm *.o $(TARGET)* *~ core
rm -f include/asm-*/asm-offsets.h
ifeq ($(perfc_arrays),y)
perfc := y
endif
-ifeq ($(frame_pointer),y)
-CFLAGS := $(shell echo $(CFLAGS) | sed -e 's/-f[^ ]*omit-frame-pointer//g')
-endif
# Set ARCH/SUBARCH appropriately.
override TARGET_SUBARCH := $(XEN_TARGET_ARCH)
TARGET := $(BASEDIR)/xen
-HDRS := $(wildcard *.h)
-HDRS += $(wildcard $(BASEDIR)/include/xen/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/xen/hvm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/public/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/public/*/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/compat/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-$(TARGET_ARCH)/$(TARGET_SUBARCH)/*.h)
-
include $(BASEDIR)/arch/$(TARGET_ARCH)/Rules.mk
-# Do not depend on auto-generated header files.
-AHDRS := $(filter-out %/include/xen/compile.h,$(HDRS))
-HDRS := $(filter-out %/asm-offsets.h,$(AHDRS))
-
# Note that link order matters!
ALL_OBJS-y += $(BASEDIR)/common/built_in.o
ALL_OBJS-y += $(BASEDIR)/drivers/built_in.o
ALL_OBJS-y += $(BASEDIR)/xsm/built_in.o
ALL_OBJS-y += $(BASEDIR)/arch/$(TARGET_ARCH)/built_in.o
+ALL_OBJS-$(x86) += $(BASEDIR)/crypto/built_in.o
CFLAGS-y += -g -D__XEN__
CFLAGS-$(XSM_ENABLE) += -DXSM_ENABLE
ifneq ($(max_phys_cpus),)
CFLAGS-y += -DMAX_PHYS_CPUS=$(max_phys_cpus)
endif
+ifneq ($(max_phys_irqs),)
+CFLAGS-y += -DMAX_PHYS_IRQS=$(max_phys_irqs)
+endif
AFLAGS-y += -D__ASSEMBLY__
ALL_OBJS := $(ALL_OBJS-y)
-CFLAGS := $(strip $(CFLAGS) $(CFLAGS-y))
+# Get gcc to generate the dependencies for us.
+CFLAGS-y += -MMD -MF .$(@F).d
+DEPS = .*.d
+
+CFLAGS += $(CFLAGS-y)
# Most CFLAGS are safe for assembly files:
# -std=gnu{89,99} gets confused by #-prefixed end-of-line comments
-AFLAGS := $(strip $(AFLAGS) $(AFLAGS-y))
-AFLAGS += $(patsubst -std=gnu%,,$(CFLAGS))
+AFLAGS += $(AFLAGS-y) $(filter-out -std=gnu%,$(CFLAGS))
# LDFLAGS are only passed directly to $(LD)
-LDFLAGS := $(strip $(LDFLAGS) $(LDFLAGS_DIRECT))
+LDFLAGS += $(LDFLAGS_DIRECT)
include Makefile
.PHONY: clean
clean:: $(addprefix _clean_, $(subdir-all))
- rm -f *.o *~ core
+ rm -f *.o *~ core $(DEPS)
_clean_%/: FORCE
$(MAKE) -f $(BASEDIR)/Rules.mk -C $* clean
-%.o: %.c $(HDRS) Makefile
+%.o: %.c Makefile
$(CC) $(CFLAGS) -c $< -o $@
-%.o: %.S $(AHDRS) Makefile
+%.o: %.S Makefile
$(CC) $(AFLAGS) -c $< -o $@
-%.i: %.c $(HDRS) Makefile
+%.i: %.c Makefile
$(CPP) $(CFLAGS) $< -o $@
# -std=gnu{89,99} gets confused by # as an end-of-line comment marker
-%.s: %.S $(AHDRS) Makefile
+%.s: %.S Makefile
$(CPP) $(AFLAGS) $< -o $@
+
+-include $(DEPS)
# Headers do not depend on auto-generated header, but object files do.
$(ALL_OBJS): $(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h
-asm-offsets.s: asm-offsets.c $(HDRS) \
+asm-offsets.s: asm-offsets.c \
$(BASEDIR)/include/asm-ia64/.offsets.h.stamp
$(CC) $(CFLAGS) -DGENERATE_ASM_OFFSETS -DIA64_TASK_SIZE=0 -S -o $@ $<
-asm-xsi-offsets.s: asm-xsi-offsets.c $(HDRS)
+asm-xsi-offsets.s: asm-xsi-offsets.c
$(CC) $(CFLAGS) -S -o $@ $<
$(BASEDIR)/include/asm-ia64/asm-xsi-offsets.h: asm-xsi-offsets.s
touch $@
# I'm sure a Makefile wizard would know a better way to do this
-xen.lds.s: xen/xen.lds.S $(HDRS)
+xen.lds.s: xen/xen.lds.S
$(CC) -E $(CPPFLAGS) -P -DXEN $(AFLAGS) \
-o xen.lds.s xen/xen.lds.S
xen_ia64_tlbflush_clock ?= y
xen_ia64_disable_optvfault ?= n
+# If they are enabled,
+# shrink struct page_info assuming all mfn can be addressed by 32 bits.
+# However, with 50bit ia64 architected physical address and 16KB page size,
+# mfn isn't always assessed by 32bit. So they are disabled by default.
+xen_ia64_shrink_page_list ?= n
+xen_ia64_pickle_domain ?= n
+
# Used only by linux/Makefile.
AFLAGS_KERNEL += -mconstant-gp -nostdinc $(CPPFLAGS)
ifeq ($(xen_ia64_disable_optvfault),y)
CFLAGS += -DCONFIG_XEN_IA64_DISABLE_OPTVFAULT
endif
+ifeq ($(xen_ia64_shrink_page_list),y)
+CFLAGS += -DCONFIG_IA64_SHRINK_PAGE_LIST
+endif
+ifeq ($(xen_ia64_pickle_domain),y)
+CFLAGS += -DCONFIG_IA64_PICKLE_DOMAIN
+endif
-LDFLAGS := -g
-
-# Additionnal IA64 include dirs.
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/asm/sn/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-null/linux/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/asm/sn/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux-xen/linux/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm-generic/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/asm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/linux/byteorder/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-ia64/hvm/*.h)
-
-HDRS := $(filter-out %/include/asm-ia64/asm-xsi-offsets.h,$(HDRS))
+LDFLAGS = -g
unsigned long acpi_wakeup_address = 0;
#ifdef CONFIG_IA64_GENERIC
-#ifndef XEN
static unsigned long __init acpi_find_rsdp(void)
-#else
-unsigned long __init acpi_find_rsdp(void)
-#endif
{
unsigned long rsdp_phys = 0;
if (acpi_table_parse(ACPI_SIG_FADT, acpi_parse_fadt))
printk(KERN_ERR PREFIX "Can't find FADT\n");
+#ifdef XEN
+ acpi_dmar_init();
+#endif
+
#ifdef CONFIG_SMP
if (available_cpus == 0) {
printk(KERN_INFO "ACPI: Found 0 CPUS; assuming 1\n");
/*
* Switch into virtual mode:
*/
+#ifdef XEN
+ movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
+ |IA64_PSR_DI|IA64_PSR_AC)
+#else
movl r16=(IA64_PSR_IT|IA64_PSR_IC|IA64_PSR_DT|IA64_PSR_RT|IA64_PSR_DFH|IA64_PSR_BN \
|IA64_PSR_DI)
+#endif
;;
mov cr.ipsr=r16
movl r17=1f
#include <asm/ptrace.h>
#include <asm/system.h>
+#ifdef XEN
+static inline int iosapic_irq_to_vector (int irq)
+{
+ return irq;
+}
+
+#undef irq_to_vector
+#define irq_to_vector(irq) iosapic_irq_to_vector(irq)
+#define AUTO_ASSIGN AUTO_ASSIGN_IRQ
+#endif
#undef DEBUG_INTERRUPT_ROUTING
};
#endif
-#ifdef XEN
-extern void setup_vector (unsigned int vec, struct irqaction *action);
-#endif
-
void
register_percpu_irq (ia64_vector vec, struct irqaction *action)
{
irq_desc_t *desc;
+#ifndef XEN
unsigned int irq;
for (irq = 0; irq < NR_IRQS; ++irq)
desc->status |= IRQ_PER_CPU;
desc->handler = &irq_type_ia64_lsapic;
if (action)
-#ifdef XEN
- setup_vector(irq, action);
-#else
setup_irq(irq, action);
-#endif
}
+#else
+ desc = irq_descp(vec);
+ desc->status |= IRQ_PER_CPU;
+ desc->handler = &irq_type_ia64_lsapic;
+ if (action)
+ setup_vector(vec, action);
+#endif
}
#ifdef XEN
-int request_irq(unsigned int irq,
+int request_irq_vector(unsigned int vector,
void (*handler)(int, void *, struct cpu_user_regs *),
unsigned long irqflags, const char * devname, void *dev_id)
{
struct irqaction * action;
- int retval=0;
+ int retval;
/*
* Sanity-check: shared interrupts must pass in a real dev-ID,
* otherwise we'll have trouble later trying to figure out
* which interrupt is which (messes up the interrupt freeing logic etc).
* */
- if (irq >= NR_IRQS)
+ if (vector >= NR_VECTORS)
return -EINVAL;
if (!handler)
return -EINVAL;
action->handler = handler;
action->name = devname;
action->dev_id = dev_id;
- setup_vector(irq, action);
+
+ retval = setup_vector(vector, action);
if (retval)
xfree(action);
/* In mca_asm.S */
extern void ia64_monarch_init_handler (void);
extern void ia64_slave_init_handler (void);
-#ifdef XEN
-extern void setup_vector (unsigned int vec, struct irqaction *action);
-#define setup_irq(irq, action) setup_vector(irq, action)
-#endif
static ia64_mc_info_t ia64_mc_info;
#define IA64_LOG_COUNT(it) ia64_state_log[it].isl_count
#ifdef XEN
+sal_queue_entry_t sal_entry[NR_CPUS][IA64_MAX_LOG_TYPES];
struct list_head *sal_queue, sal_log_queues[IA64_MAX_LOG_TYPES];
sal_log_record_header_t *sal_record;
DEFINE_SPINLOCK(sal_queue_lock);
if (total_len) {
int queue_type;
+ int cpuid = smp_processor_id();
spin_lock_irqsave(&sal_queue_lock, flags);
else
queue_type = sal_info_type;
- e = xmalloc(sal_queue_entry_t);
- BUG_ON(e == NULL);
- e->cpuid = smp_processor_id();
+ /* Skip if sal_entry is already listed in sal_queue */
+ list_for_each_entry(e, &sal_queue[queue_type], list) {
+ if (e == &sal_entry[cpuid][queue_type])
+ goto found;
+ }
+ e = &sal_entry[cpuid][queue_type];
+ memset(e, 0, sizeof(sal_queue_entry_t));
+ e->cpuid = cpuid;
e->sal_info_type = sal_info_type;
e->vector = IA64_CMC_VECTOR;
e->virq = virq;
e->length = total_len;
list_add_tail(&e->list, &sal_queue[queue_type]);
+
+ found:
spin_unlock_irqrestore(&sal_queue_lock, flags);
IA64_LOG_INDEX_INC(sal_info_type);
{
irq_desc_t *desc;
+#ifndef XEN
unsigned int irq;
+#endif
if (cpe_vector >= 0) {
/* If platform supports CPEI, enable the irq. */
cpe_poll_enabled = 0;
+#ifndef XEN
for (irq = 0; irq < NR_IRQS; ++irq)
if (irq_to_vector(irq) == cpe_vector) {
desc = irq_descp(irq);
desc->status |= IRQ_PER_CPU;
- setup_irq(irq, &mca_cpe_irqaction);
+ setup_vector(irq, &mca_cpe_irqaction);
}
+#else
+ desc = irq_descp(cpe_vector);
+ desc->status |= IRQ_PER_CPU;
+ setup_vector(cpe_vector, &mca_cpe_irqaction);
+#endif
ia64_mca_register_cpev(cpe_vector);
IA64_MCA_DEBUG("%s: CPEI/P setup and enabled.\n", __FUNCTION__);
} else {
unsigned long end = start + size;
if (__pa(end) < end_in_pa) {
- init_xenheap_pages(__pa(xen_heap_start), __pa(start));
+ init_boot_pages(__pa(xen_heap_start), __pa(start));
xen_heap_start = (void*)end;
percpu_area = (void*)virt_to_xenva(start);
printk("allocate percpu area 0x%lx@0x%lx 0x%p\n",
extern void cpu_halt (void);
+#ifdef XEN
+/* work around for spinlock irq check. */
+void
+lock_ipi_calllock(unsigned long *flags)
+{
+ spin_lock_irqsave(&call_lock, *flags);
+}
+
+void
+unlock_ipi_calllock(unsigned long flags)
+{
+ spin_unlock_irqrestore(&call_lock, flags);
+}
+#else
void
lock_ipi_calllock(void)
{
{
spin_unlock_irq(&call_lock);
}
+#endif
static void
stop_this_cpu (void)
static void __devinit
smp_callin (void)
{
+#ifdef XEN
+ /* work around for spinlock irq assert. */
+ unsigned long flags;
+#endif
int cpuid, phys_id;
extern void ia64_init_itm(void);
fix_b0_for_bsp();
+#ifdef XEN
+ lock_ipi_calllock(&flags);
+#else
lock_ipi_calllock();
+#endif
cpu_set(cpuid, cpu_online_map);
+#ifdef XEN
+ unlock_ipi_calllock(flags);
+#else
unlock_ipi_calllock();
+#endif
per_cpu(cpu_state, cpuid) = CPU_ONLINE;
smp_setup_percpu_timer();
}
#endif
+#ifdef XEN
+void sn_irq_fixup(struct sn_pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
+#else
void sn_irq_fixup(struct pci_dev *pci_dev, struct sn_irq_info *sn_irq_info)
+#endif
{
nasid_t nasid = sn_irq_info->irq_nasid;
int slice = sn_irq_info->irq_slice;
register_intr_pda(sn_irq_info);
}
+#ifdef XEN
+void sn_irq_unfixup(struct sn_pci_dev *pci_dev)
+#else
void sn_irq_unfixup(struct pci_dev *pci_dev)
+#endif
{
#ifndef XEN
struct sn_irq_info *sn_irq_info;
pte = *lookup_noalloc_domain_pte(v->domain, paddr);
if (!pte_present(pte) || !pte_mem(pte))
return -EINVAL;
- mfn = (pte_val(pte) & _PFN_MASK) >> PAGE_SHIFT;
+ mfn = pte_pfn(pte);
ASSERT(mfn_valid(mfn));
page = mfn_to_page(mfn);
static void viosapic_deliver(struct viosapic *viosapic, int irq)
{
- uint16_t dest = viosapic->redirtbl[irq].dest_id;
- uint8_t delivery_mode = viosapic->redirtbl[irq].delivery_mode;
- uint8_t vector = viosapic->redirtbl[irq].vector;
+ uint16_t dest = viosapic->redirtbl[irq].fields.dest_id;
+ uint8_t delivery_mode = viosapic->redirtbl[irq].fields.delivery_mode;
+ uint8_t vector = viosapic->redirtbl[irq].fields.vector;
ASSERT(spin_is_locked(&viosapic->lock));
ASSERT(spin_is_locked(&viosapic->lock));
for ( i = 0; i < VIOSAPIC_NUM_PINS; i++ )
- if ( viosapic->redirtbl[i].vector == vector )
+ if ( viosapic->redirtbl[i].fields.vector == vector )
return i;
return -1;
while ( (irq = iosapic_get_highest_irq(viosapic)) != -1 )
{
- if ( viosapic->redirtbl[irq].trig_mode == SAPIC_LEVEL )
+ if ( viosapic->redirtbl[irq].fields.trig_mode == SAPIC_LEVEL )
viosapic->isr |= (1UL << irq);
viosapic_deliver(viosapic, irq);
if ( !test_and_clear_bit(redir_num, &viosapic->isr) )
{
spin_unlock(&viosapic->lock);
- if ( viosapic->redirtbl[redir_num].trig_mode == SAPIC_LEVEL )
+ if ( viosapic->redirtbl[redir_num].fields.trig_mode == SAPIC_LEVEL )
gdprintk(XENLOG_WARNING, "redir %d not set for %d EOI\n",
redir_num, vector);
return;
}
+ if ( iommu_enabled )
+ {
+ spin_unlock(&viosapic->lock);
+ hvm_dpci_eoi(current->domain, redir_num, &viosapic->redirtbl[redir_num]);
+ spin_lock(&viosapic->lock);
+ }
+
service_iosapic(viosapic);
spin_unlock(&viosapic->lock);
}
for ( i = 0; i < VIOSAPIC_NUM_PINS; i++ )
{
- viosapic->redirtbl[i].mask = 0x1;
+ viosapic->redirtbl[i].fields.mask = 0x1;
}
spin_lock_init(&viosapic->lock);
}
if ( (irq < 0) || (irq >= VIOSAPIC_NUM_PINS) )
goto out;
- if ( viosapic->redirtbl[irq].mask )
+ if ( viosapic->redirtbl[irq].fields.mask )
goto out;
bit = 1UL << irq;
- if ( viosapic->redirtbl[irq].trig_mode == SAPIC_LEVEL )
+ if ( viosapic->redirtbl[irq].fields.trig_mode == SAPIC_LEVEL )
{
if ( level )
viosapic->irr |= bit;
spin_unlock(&viosapic->lock);
}
-#define hvm_pci_intx_gsi(dev, intx) \
- (((((dev) << 2) + ((dev) >> 3) + (intx)) & 31) + 16)
-
-
void viosapic_set_pci_irq(struct domain *d, int device, int intx, int level)
{
int irq;
*/
if (ps != _PAGE_SIZE_16M)
thash_purge_entries(vcpu, va, ps);
- gpfn = (pte & _PAGE_PPN_MASK)>> PAGE_SHIFT;
+ gpfn = pte_pfn(__pte(pte));
vcpu_get_rr(vcpu, va, &rid);
rid &= RR_RID_MASK;
p_dtr = (thash_data_t *)&vcpu->arch.dtrs[slot];
do {
cpu = v->processor;
if (cpu != current->processor) {
- spin_unlock_wait(&per_cpu(schedule_data, cpu).schedule_lock);
+ spin_barrier(&per_cpu(schedule_data, cpu).schedule_lock);
/* Flush VHPT on remote processors. */
smp_call_function_single(cpu, &ptc_ga_remote_func,
&args, 0, 1);
#include <asm/shadow.h>
#include <asm/sioemu.h>
#include <public/arch-ia64/sioemu.h>
+#include <xen/hvm/irq.h>
/* reset all PSR field to 0, except up,mfl,mfh,pk,dt,rt,mc,it */
#define INITIAL_PSR_VALUE_AT_INTERRUPTION 0x0000001808028034
0x7f00
};
-
+void vmx_lazy_load_fpu(struct vcpu *vcpu)
+{
+ if (FP_PSR(vcpu) & IA64_PSR_DFH) {
+ FP_PSR(vcpu) = IA64_PSR_MFH;
+ if (__ia64_per_cpu_var(fp_owner) != vcpu)
+ __ia64_load_fpu(vcpu->arch._thread.fph);
+ }
+}
void vmx_reflect_interruption(u64 ifa, u64 isr, u64 iim,
u64 vec, REGS *regs)
case 25: // IA64_DISABLED_FPREG_VECTOR
if (!(vpsr & IA64_PSR_IC))
goto nested_fault;
- if (FP_PSR(vcpu) & IA64_PSR_DFH) {
- FP_PSR(vcpu) = IA64_PSR_MFH;
- if (__ia64_per_cpu_var(fp_owner) != vcpu)
- __ia64_load_fpu(vcpu->arch._thread.fph);
- }
+ vmx_lazy_load_fpu(vcpu);
if (!(VCPU(vcpu, vpsr) & IA64_PSR_DFH)) {
regs->cr_ipsr &= ~IA64_PSR_DFH;
return;
if (!status) {
vcpu_increment_iip(vcpu);
return;
- } else if (IA64_RETRY == status)
- return;
+ }
break;
case 33: // IA64_FP_TRAP_VECTOR
status = handle_fpu_swa(0, regs, isr);
if (!status)
return;
- else if (IA64_RETRY == status) {
- vcpu_decrement_iip(vcpu);
- return;
- }
break;
case 29: // IA64_DEBUG_VECTOR
viosapic_set_irq(d, callback_irq, 0);
}
}
+ hvm_dirq_assist(v);
}
rmb();
pte = lookup_domain_mpa(v->domain, pa_clear_uc(vadr), NULL);
if (v->domain != dom0 && (pte & _PAGE_IO)) {
emulate_io_inst(v, pa_clear_uc(vadr), 4,
- (pte & _PFN_MASK) >> PAGE_SHIFT);
+ pte_pfn(__pte(pte)));
return IA64_FAULT;
}
physical_tlb_miss(v, vadr, type);
" pte=0x%lx\n", data->page_flags);
if (data->pl >= ((regs->cr_ipsr >> IA64_PSR_CPL0_BIT) & 3))
emulate_io_inst(v, gppa, data->ma,
- (pte & _PFN_MASK) >> PAGE_SHIFT);
+ pte_pfn(__pte(pte)));
else {
vcpu_set_isr(v, misr.val);
data_access_rights(v, vadr);
VM_BUFFER_ALIGN_UP((unsigned long)start);
unsigned long e_vm_buffer = s_vm_buffer + buffer_size;
if (__pa(e_vm_buffer) < end_in_pa) {
- init_xenheap_pages(__pa(start), __pa(s_vm_buffer));
+ init_boot_pages(__pa(start), __pa(s_vm_buffer));
start = (void*)e_vm_buffer;
vm_buffer = virt_to_xenva(s_vm_buffer);
printk("vm_buffer: 0x%lx\n", vm_buffer);
pte = *lookup_noalloc_domain_pte(d, gpfn << PAGE_SHIFT);
if (!pte_present(pte) || !pte_mem(pte))
return -EINVAL;
- mfn = (pte_val(pte) & _PFN_MASK) >> PAGE_SHIFT;
+ mfn = pte_pfn(pte);
ASSERT(mfn_valid(mfn));
page = mfn_to_page(mfn);
debugger_event(vec == IA64_EXTINT_VECTOR ?
XEN_IA64_DEBUG_ON_EXTINT : XEN_IA64_DEBUG_ON_EXCEPT);
}
+
+void hvm_pci_intx_assert(
+ struct domain *d, unsigned int device, unsigned int intx)
+{
+ struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+ unsigned int gsi;
+
+ ASSERT((device <= 31) && (intx <= 3));
+
+ if ( __test_and_set_bit(device * 4 + intx, &hvm_irq->pci_intx.i) )
+ return;
+ gsi = hvm_pci_intx_gsi(device, intx);
+ if ( ++hvm_irq->gsi_assert_count[gsi] == 1 )
+ viosapic_set_irq(d, gsi, 1);
+}
+
+void hvm_pci_intx_deassert(
+ struct domain *d, unsigned int device, unsigned int intx)
+{
+ struct hvm_irq *hvm_irq = &d->arch.hvm_domain.irq;
+ unsigned int gsi;
+
+ ASSERT((device <= 31) && (intx <= 3));
+
+ if ( !__test_and_clear_bit(device * 4 + intx, &hvm_irq->pci_intx.i) )
+ return;
+
+ gsi = hvm_pci_intx_gsi(device, intx);
+
+ if (--hvm_irq->gsi_assert_count[gsi] == 0)
+ viosapic_set_irq(d, gsi, 0);
+}
+
+void hvm_isa_irq_assert(struct domain *d, unsigned int isa_irq)
+{
+ /* dummy */
+}
+
+void hvm_isa_irq_deassert(struct domain *d, unsigned int isa_irq)
+{
+ /* dummy */
+}
+
+int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable)
+{
+ /* dummy */
+ return -ENOSYS;
+}
+
+void msixtbl_pt_unregister(struct domain *d, int pirq)
+{
+ /* dummy */
+}
movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
;;
and r19=r19,r16 // clear ed, reserved bits, and PTE control bits
- extr.u r18=r16,XEN_VIRT_UC_BIT, 15 // extract UC bit
+ extr.u r18=r16,XEN_VIRT_UC_BIT, 1 // extract UC bit
;;
or r19=r17,r19 // insert PTE control bits into r19
mov r20=IA64_GRANULE_SHIFT<<2
// 0x1000 Entry 4 (size 64 bundles) Alt DTLB (7,46)
ENTRY(vmx_alt_dtlb_miss)
VMX_DBG_FAULT(4)
- mov r29=cr.ipsr
+ mov r29=cr.ipsr //frametable_miss needs ipsr is saved in r29.
mov r31=pr
adds r22=IA64_VCPU_MMU_MODE_OFFSET, r21
;;
// Test for the address of virtual frame_table
shr r22=r16,56;;
cmp.eq p8,p0=((VIRT_FRAME_TABLE_ADDR>>56)&0xff)-0x100,r22
-(p8)br.cond.sptk frametable_miss ;;
+(p8)br.cond.sptk frametable_miss ;; //Make sure ipsr is saved in r29
#endif
movl r17=PAGE_KERNEL
mov r20=cr.isr
* which is required by vga acceleration since qemu maps shared
* vram buffer with WB.
*/
- if (phy_pte.ma != VA_MATTR_NATPAGE)
+ if (mfn_valid(pte_pfn(__pte(maddr))) && phy_pte.ma != VA_MATTR_NATPAGE)
phy_pte.ma = VA_MATTR_WB;
maddr = ((maddr & _PAGE_PPN_MASK) & PAGE_MASK) | (paddr & ~PAGE_MASK);
cch = vtlb_thash(hcb->pta, va, vrr.rrval, &tag);
do {
if (cch->etag == tag && cch->ps == ps)
- return cch;
+ goto found;
cch = cch->next;
} while(cch);
}
return NULL;
+found:
+ if (unlikely(!cch->ed && is_data == ISIDE_TLB)) {
+ /*The case is very rare, and it may lead to incorrect setting
+ for itlb's ed bit! Purge it from hash vTLB and let guest os
+ determin the ed bit of the itlb entry.*/
+ vtlb_purge(v, va, ps);
+ cch = NULL;
+ }
+ return cch;
}
cpufreq_statistic_update(cpu, data->acpi_data->state, state);
data->acpi_data->state = state;
+ policy->cur = data->freq_table[state].frequency;
return 0;
}
data->acpi_data = &processor_pminfo[cpu]->perf;
- /* capability check */
- if (data->acpi_data->state_count <= 1) {
- printk(KERN_WARNING "P-States\n");
- result = -ENODEV;
- goto err_unreg;
- }
-
- if ((data->acpi_data->control_register.space_id !=
- ACPI_ADR_SPACE_FIXED_HARDWARE) ||
- (data->acpi_data->status_register.space_id !=
- ACPI_ADR_SPACE_FIXED_HARDWARE)) {
- result = -ENODEV;
- goto err_unreg;
- }
-
data->freq_table = xmalloc_array(struct cpufreq_frequency_table,
(data->acpi_data->state_count + 1));
if (!data->freq_table) {
data->acpi_data->states[i].transition_latency * 1000;
}
}
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+
+ policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR;
policy->cur = acpi_cpufreq_get(policy->cpu);
printk(KERN_INFO "Current freq of CPU %u is %u\n", cpu, policy->cur);
}
static struct cpufreq_driver acpi_cpufreq_driver = {
+ .name = "acpi-cpufreq",
.verify = acpi_cpufreq_verify,
.target = acpi_cpufreq_target,
.get = acpi_cpufreq_get,
#include <xen/trace.h>
#include <xen/console.h>
#include <xen/guest_access.h>
+#include <xen/pci.h>
#include <asm/vmx.h>
#include <asm/dom_fw.h>
#include <asm/vhpt.h>
ret = 0;
else {
if (op->u.ioport_permission.allow_access)
- ret = ioports_permit_access(d, fp, lp);
+ ret = ioports_permit_access(d, fp, fp, lp);
else
ret = ioports_deny_access(d, fp, lp);
}
}
break;
+ case XEN_DOMCTL_get_device_group:
+ {
+ struct domain *d;
+ u32 max_sdevs;
+ u8 bus, devfn;
+ XEN_GUEST_HANDLE_64(uint32) sdevs;
+ int num_sdevs;
+
+ ret = -ENOSYS;
+ if ( !iommu_enabled )
+ break;
+
+ ret = -EINVAL;
+ if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
+ break;
+
+ bus = (op->u.get_device_group.machine_bdf >> 16) & 0xff;
+ devfn = (op->u.get_device_group.machine_bdf >> 8) & 0xff;
+ max_sdevs = op->u.get_device_group.max_sdevs;
+ sdevs = op->u.get_device_group.sdev_array;
+
+ num_sdevs = iommu_get_device_group(d, bus, devfn, sdevs, max_sdevs);
+ if ( num_sdevs < 0 )
+ {
+ dprintk(XENLOG_ERR, "iommu_get_device_group() failed!\n");
+ ret = -EFAULT;
+ op->u.get_device_group.num_sdevs = 0;
+ }
+ else
+ {
+ ret = 0;
+ op->u.get_device_group.num_sdevs = num_sdevs;
+ }
+ if ( copy_to_guest(u_domctl, op, 1) )
+ ret = -EFAULT;
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_test_assign_device:
+ {
+ u8 bus, devfn;
+
+ ret = -ENOSYS;
+ if ( !iommu_enabled )
+ break;
+
+ ret = -EINVAL;
+ bus = (op->u.assign_device.machine_bdf >> 16) & 0xff;
+ devfn = (op->u.assign_device.machine_bdf >> 8) & 0xff;
+
+ if ( device_assigned(bus, devfn) )
+ {
+ printk( "XEN_DOMCTL_test_assign_device: "
+ "%x:%x:%x already assigned, or non-existent\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ break;
+ }
+ ret = 0;
+ }
+ break;
+
+ case XEN_DOMCTL_assign_device:
+ {
+ struct domain *d;
+ u8 bus, devfn;
+
+ ret = -ENOSYS;
+ if ( !iommu_enabled )
+ break;
+
+ ret = -EINVAL;
+ if ( unlikely((d = get_domain_by_id(op->domain)) == NULL) )
+ {
+ gdprintk(XENLOG_ERR,
+ "XEN_DOMCTL_assign_device: get_domain_by_id() failed\n");
+ break;
+ }
+ bus = (op->u.assign_device.machine_bdf >> 16) & 0xff;
+ devfn = (op->u.assign_device.machine_bdf >> 8) & 0xff;
+
+ if ( !iommu_pv_enabled && !is_hvm_domain(d) )
+ {
+ ret = -ENOSYS;
+ break;
+ }
+
+ if ( device_assigned(bus, devfn) )
+ {
+ gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
+ "%x:%x:%x already assigned, or non-existent\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ break;
+ }
+
+ ret = assign_device(d, bus, devfn);
+ gdprintk(XENLOG_INFO, "XEN_DOMCTL_assign_device: bdf = %x:%x:%x\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ put_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_deassign_device:
+ {
+ struct domain *d;
+ u8 bus, devfn;
+
+ ret = -ENOSYS;
+ if ( !iommu_enabled )
+ break;
+
+ ret = -EINVAL;
+ if ( unlikely((d = get_domain_by_id(op->domain)) == NULL) )
+ {
+ gdprintk(XENLOG_ERR,
+ "XEN_DOMCTL_deassign_device: get_domain_by_id() failed\n");
+ break;
+ }
+ bus = (op->u.assign_device.machine_bdf >> 16) & 0xff;
+ devfn = (op->u.assign_device.machine_bdf >> 8) & 0xff;
+
+ if ( !iommu_pv_enabled && !is_hvm_domain(d) )
+ {
+ ret = -ENOSYS;
+ break;
+ }
+
+ if ( !device_assigned(bus, devfn) )
+ break;
+
+ ret = 0;
+ deassign_device(d, bus, devfn);
+ gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ put_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_bind_pt_irq:
+ {
+ struct domain * d;
+ xen_domctl_bind_pt_irq_t * bind;
+
+ ret = -ESRCH;
+ if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
+ break;
+ bind = &(op->u.bind_pt_irq);
+ if ( iommu_enabled )
+ ret = pt_irq_create_bind_vtd(d, bind);
+ if ( ret < 0 )
+ gdprintk(XENLOG_ERR, "pt_irq_create_bind failed!\n");
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_unbind_pt_irq:
+ {
+ struct domain * d;
+ xen_domctl_bind_pt_irq_t * bind;
+
+ ret = -ESRCH;
+ if ( (d = rcu_lock_domain_by_id(op->domain)) == NULL )
+ break;
+ bind = &(op->u.bind_pt_irq);
+ if ( iommu_enabled )
+ ret = pt_irq_destroy_bind_vtd(d, bind);
+ if ( ret < 0 )
+ gdprintk(XENLOG_ERR, "pt_irq_destroy_bind failed!\n");
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_memory_mapping:
+ {
+ struct domain *d;
+ unsigned long gfn = op->u.memory_mapping.first_gfn;
+ unsigned long mfn = op->u.memory_mapping.first_mfn;
+ unsigned long nr_mfns = op->u.memory_mapping.nr_mfns;
+ int i;
+
+ ret = -EINVAL;
+ if ( (mfn + nr_mfns - 1) < mfn ) /* wrap? */
+ break;
+
+ ret = -ESRCH;
+ if ( unlikely((d = rcu_lock_domain_by_id(op->domain)) == NULL) )
+ break;
+
+ ret=0;
+ if ( op->u.memory_mapping.add_mapping )
+ {
+ gdprintk(XENLOG_INFO,
+ "memory_map:add: gfn=%lx mfn=%lx nr_mfns=%lx\n",
+ gfn, mfn, nr_mfns);
+
+ ret = iomem_permit_access(d, mfn, mfn + nr_mfns - 1);
+ for ( i = 0; i < nr_mfns; i++ )
+ assign_domain_mmio_page(d, (gfn+i)<<PAGE_SHIFT,
+ (mfn+i)<<PAGE_SHIFT, PAGE_SIZE,
+ ASSIGN_writable | ASSIGN_nocache);
+ }
+ else
+ {
+ gdprintk(XENLOG_INFO,
+ "memory_map:remove: gfn=%lx mfn=%lx nr_mfns=%lx\n",
+ gfn, mfn, nr_mfns);
+
+ for ( i = 0; i < nr_mfns; i++ )
+ deassign_domain_mmio_page(d, (gfn+i)<<PAGE_SHIFT,
+ (mfn+i)<<PAGE_SHIFT, PAGE_SIZE);
+ ret = iomem_deny_access(d, mfn, mfn + nr_mfns - 1);
+ }
+
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_ioport_mapping:
+ {
+
+#define MAX_IOPORTS 0x10000
+ struct domain *d;
+ unsigned int fgp = op->u.ioport_mapping.first_gport;
+ unsigned int fmp = op->u.ioport_mapping.first_mport;
+ unsigned int np = op->u.ioport_mapping.nr_ports;
+
+ ret = -EINVAL;
+ if ( (np == 0) || (fgp > MAX_IOPORTS) || (fmp > MAX_IOPORTS) ||
+ ((fgp + np) > MAX_IOPORTS) || ((fmp + np) > MAX_IOPORTS) )
+ {
+ gdprintk(XENLOG_ERR,
+ "ioport_map:invalid:gport=%x mport=%x nr_ports=%x\n",
+ fgp, fmp, np);
+ break;
+ }
+
+ ret = -ESRCH;
+ if ( unlikely((d = rcu_lock_domain_by_id(op->domain)) == NULL) )
+ break;
+
+ if ( op->u.ioport_mapping.add_mapping )
+ {
+ gdprintk(XENLOG_INFO,
+ "ioport_map:add f_gport=%x f_mport=%x np=%x\n",
+ fgp, fmp, np);
+
+ ret = ioports_permit_access(d, fgp, fmp, fmp + np - 1);
+ }
+ else
+ {
+ gdprintk(XENLOG_INFO,
+ "ioport_map:remove f_gport=%x f_mport=%x np=%x\n",
+ fgp, fmp, np);
+
+ ret = ioports_deny_access(d, fgp, fgp + np - 1);
+ }
+ rcu_unlock_domain(d);
+ }
+ break;
+
case XEN_DOMCTL_sethvmcontext:
{
struct hvm_domain_context c;
}
break;
- case XEN_DOMCTL_assign_device:
- ret = -ENOSYS;
- break;
+ case XEN_DOMCTL_set_address_size:
+ {
+ struct domain *d = rcu_lock_domain_by_id(op->domain);
+
+ ret = -ESRCH;
+ if (d == NULL)
+ break;
+
+ ret = -EINVAL;
+ if (op->u.address_size.size == BITS_PER_LONG)
+ ret = 0;
+
+ rcu_unlock_domain(d);
+ }
+ break;
+
+ case XEN_DOMCTL_get_address_size:
+ {
+ struct domain *d = rcu_lock_domain_by_id(op->domain);
+
+ ret = -ESRCH;
+ if (d == NULL)
+ break;
+
+ ret = 0;
+ op->u.address_size.size = BITS_PER_LONG;
+ rcu_unlock_domain(d);
+
+ if (copy_to_guest(u_domctl, op, 1))
+ ret = -EFAULT;
+ }
+ break;
default:
printk("arch_do_domctl: unrecognized domctl: %d!!!\n",op->cmd);
fp = space_number << IO_SPACE_BITS;
lp = fp | 0xffff;
- return ioports_permit_access(d, fp, lp);
+ return ioports_permit_access(d, fp, fp, lp);
}
unsigned long
dprintk(XENLOG_INFO, "%s: INVALID_MFN ret: 0x%lx\n",
__func__, ret);
} else {
- ret = (ret & _PFN_MASK) >> PAGE_SHIFT;//XXX pte_pfn()
+ ret = pte_pfn(__pte(ret));
}
perfc_incr(dom0vp_phystomach);
break;
+#include <asm/dom_fw.h>
+
// moved from xenasm.S to be shared by xen and libxc
/*
* Assembly support routines for Xen/ia64
xen_ia64_pal_call_stub:
{
.mii
- addl r2=0x1000,r0 // Hypercall number (Value is patched).
+ addl r2=FW_HYPERCALL_PAL_CALL_ASM,r0 // Hypercall number (Value is patched).
mov r9=256
;;
cmp.gtu p7,p8=r9,r28 /* r32 <= 255? */
}
{
.mbb
- break 0x1000 // Hypercall vector (Value is patched).
+ break __IA64_XEN_HYPERCALL_DEFAULT // Hypercall vector (Value is patched).
(p7) br.cond.sptk.few rp
(p8) br.ret.sptk.few rp
}
ia64_fc(imva + 3);
}
+/* xen fpswa call stub. 14 bundles */
+extern const unsigned long xen_ia64_fpswa_call_stub[];
+extern const unsigned long xen_ia64_fpswa_call_stub_end[];
+extern const unsigned long xen_ia64_fpswa_call_stub_patch[];
+asm(
+ ".align 32\n"
+ ".proc xen_ia64_fpswa_call_stub;\n"
+ "xen_ia64_fpswa_call_stub:\n"
+ ".prologue\n"
+ "alloc r3 = ar.pfs, 8, 0, 0, 0\n"
+ ".body\n"
+ "mov r14 = in0\n"
+ "ld8 r15 = [in1], 8\n"
+ ";;\n"
+ "ld8 r16 = [in1]\n"
+ "ld8 r17 = [in2]\n"
+ "ld8 r18 = [in3]\n"
+ "ld8 r19 = [in4]\n"
+ "ld8 r20 = [in5]\n"
+ "ld8 r21 = [in6]\n"
+ "ld8 r22 = [in7], 8\n"
+ ";;\n"
+ "ld8 r23 = [in7], 8\n"
+ ";;\n"
+ "ld8 r24 = [in7], 8\n"
+ ";;\n"
+ "cmp.ne p6, p0 = r24, r0\n"
+ "ld8 r25 = [in7], 8\n"
+ ";;\n"
+ "(p6) tpa r24 = r24\n"
+ "cmp.ne p7, p0 = r25, r0\n"
+ "ld8 r26 = [in7], 8\n"
+ ";;\n"
+ "(p7)tpa r25 = r25\n"
+ "cmp.ne p8, p0 = r26, r0\n"
+ "ld8 r27 = [in7], 8\n"
+ ";;\n"
+ "(p8)tpa r26 = r26\n"
+ "cmp.ne p9, p0 = r27, r0\n"
+ ";;\n"
+ "tpa r27 = r27\n"
+ "xen_ia64_fpswa_call_stub_patch:"
+ "{\n"
+ "mov r2 = " FW_HYPERCALL_FPSWA_STR "\n"
+ "break " __IA64_XEN_HYPERCALL_DEFAULT_STR "\n"
+ "nop.i 0\n"
+ "}\n"
+ "st8 [in2] = r17\n"
+ "st8 [in3] = r18\n"
+ "st8 [in4] = r19\n"
+ "st8 [in5] = r20\n"
+ "st8 [in6] = r21\n"
+ "br.ret.sptk.many rp\n"
+ "xen_ia64_fpswa_call_stub_end:"
+ ".endp xen_ia64_fpswa_call_stub\n"
+);
+
+static void
+build_fpswa_hypercall_bundle(uint64_t *imva, uint64_t brkimm, uint64_t hypnum)
+{
+ INST64_A5 slot0;
+ INST64_I19 slot1;
+ INST64_I18 slot2;
+ IA64_BUNDLE bundle;
+
+ /* slot0: mov r2 = hypnum (low 20 bits) */
+ slot0.inst = 0;
+ slot0.qp = 0;
+ slot0.r1 = 2;
+ slot0.r3 = 0;
+ slot0.major = 0x9;
+
+ slot0.s = 0;
+ slot0.imm9d = hypnum >> 7;
+ slot0.imm5c = hypnum >> 16;
+ slot0.imm7b = hypnum;
+
+ /* slot1: break brkimm */
+ slot1.inst = 0;
+ slot1.qp = 0;
+ slot1.x6 = 0;
+ slot1.x3 = 0;
+ slot1.major = 0x0;
+ slot1.i = brkimm >> 20;
+ slot1.imm20 = brkimm;
+
+ /* slot2: nop.i */
+ slot2.inst = 0;
+ slot2.qp = 0;
+ slot2.imm20 = 0;
+ slot2.y = 0;
+ slot2.x6 = 1;
+ slot2.x3 = 0;
+ slot2.i = 0;
+ slot2.major = 0;
+
+ /* MII bundle */
+ bundle.i64[0] = 0;
+ bundle.i64[1] = 0;
+ bundle.template = 0x0; /* MII */
+ bundle.slot0 = slot0.inst;
+ bundle.slot1a = slot1.inst;
+ bundle.slot1b = slot1.inst >> 18;
+ bundle.slot2 = slot2.inst;
+
+ imva[0] = bundle.i64[0];
+ imva[1] = bundle.i64[1];
+ ia64_fc(imva);
+ ia64_fc(imva + 1);
+}
+
// builds a hypercall bundle at domain physical address
static void
dom_fpswa_hypercall_patch(uint64_t brkimm, unsigned long imva)
unsigned long *entry_imva, *patch_imva;
const unsigned long entry_paddr = FW_HYPERCALL_FPSWA_ENTRY_PADDR;
const unsigned long patch_paddr = FW_HYPERCALL_FPSWA_PATCH_PADDR;
+ const size_t stub_size =
+ (char*)xen_ia64_fpswa_call_stub_end -
+ (char*)xen_ia64_fpswa_call_stub;
+ size_t i;
entry_imva = (unsigned long *)(imva + entry_paddr -
FW_HYPERCALL_BASE_PADDR);
*entry_imva++ = patch_paddr;
*entry_imva = 0;
- build_hypercall_bundle(patch_imva, brkimm, FW_HYPERCALL_FPSWA, 1);
+ /* see dom_fw.h */
+ BUG_ON((char*)xen_ia64_fpswa_call_stub_end -
+ (char*)xen_ia64_fpswa_call_stub > 0xff - 16 + 1);
+
+ /* call stub */
+ memcpy(patch_imva, xen_ia64_fpswa_call_stub, stub_size);
+ for (i = 0; i < stub_size; i++)
+ ia64_fc(imva + i);
+ patch_imva +=
+ xen_ia64_fpswa_call_stub_patch - xen_ia64_fpswa_call_stub;
+ build_fpswa_hypercall_bundle(patch_imva, brkimm, FW_HYPERCALL_FPSWA);
}
// builds a hypercall bundle at domain physical address
#include <xen/event.h>
#include <xen/console.h>
#include <xen/version.h>
-#include <public/libelf.h>
+#include <xen/libelf.h>
#include <asm/pgalloc.h>
#include <asm/offsets.h> /* for IA64_THREAD_INFO_SIZE */
#include <asm/vcpu.h> /* for function declarations */
kill_timer(&v->arch.hlt_timer);
}
+struct domain *alloc_domain_struct(void)
+{
+#ifdef CONFIG_IA64_PICKLE_DOMAIN
+ struct domain *d;
+ /*
+ * We pack the MFN of the domain structure into a 32-bit field within
+ * the page_info structure. Hence the MEMF_bits() restriction.
+ */
+ d = alloc_xenheap_pages(get_order_from_bytes(sizeof(*d)),
+ MEMF_bits(32 + PAGE_SHIFT));
+ if ( d != NULL )
+ memset(d, 0, sizeof(*d));
+ return d;
+#else
+ return xmalloc(struct domain);
+#endif
+}
+
+void free_domain_struct(struct domain *d)
+{
+#ifdef CONFIG_IA64_PICKLE_DOMAIN
+ free_xenheap_pages(d, get_order_from_bytes(sizeof(*d)));
+#else
+ xfree(d);
+#endif
+}
+
struct vcpu *alloc_vcpu_struct(void)
{
struct page_info *page;
/* Create privregs page. */
order = get_order_from_shift(XMAPPEDREGS_SHIFT);
- v->arch.privregs = alloc_xenheap_pages(order);
+ v->arch.privregs = alloc_xenheap_pages(order, 0);
if (v->arch.privregs == NULL)
return -ENOMEM;
BUG_ON(v->arch.privregs == NULL);
// the following will eventually need to be negotiated dynamically
d->arch.shared_info_va = DEFAULT_SHAREDINFO_ADDR;
- d->arch.breakimm = 0x1000;
+ d->arch.breakimm = __IA64_XEN_HYPERCALL_DEFAULT;
for (i = 0; i < NR_CPUS; i++) {
d->arch.last_vcpu[i].vcpu_id = INVALID_VCPU_ID;
}
if (is_idle_domain(d))
return 0;
+ INIT_LIST_HEAD(&d->arch.pdev_list);
foreign_p2m_init(d);
#ifdef CONFIG_XEN_IA64_PERVCPU_VHPT
d->arch.has_pervcpu_vhpt = opt_pervcpu_vhpt;
#endif
if (tlb_track_create(d) < 0)
goto fail_nomem1;
- d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT));
+ d->shared_info = alloc_xenheap_pages(
+ get_order_from_shift(XSI_SHIFT), 0);
if (d->shared_info == NULL)
goto fail_nomem;
BUG_ON(d->shared_info == NULL);
memset(&d->arch.mm, 0, sizeof(d->arch.mm));
d->arch.relres = RELRES_not_started;
d->arch.mm_teardown_offset = 0;
- INIT_LIST_HEAD(&d->arch.relmem_list);
+ INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
if ((d->arch.mm.pgd = pgd_alloc(&d->arch.mm)) == NULL)
goto fail_nomem;
+ if(iommu_domain_init(d) != 0)
+ goto fail_iommu;
+
/*
* grant_table_create() can't fully initialize grant table for domain
* because it is called before arch_domain_create().
dprintk(XENLOG_DEBUG, "arch_domain_create: domain=%p\n", d);
return 0;
+fail_iommu:
+ iommu_domain_destroy(d);
fail_nomem:
tlb_track_destroy(d);
fail_nomem1:
free_xenheap_pages(d->shared_info,
get_order_from_shift(XSI_SHIFT));
+ if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) ) {
+ pci_release_devices(d);
+ iommu_domain_destroy(d);
+ }
+
tlb_track_destroy(d);
/* Clear vTLB for the next domain. */
*nats &= ~(1UL << reg);
}
+static unsigned long
+__vcpu_get_itc(struct vcpu *v)
+{
+ unsigned long itc_last;
+ unsigned long itc_offset;
+ unsigned long itc;
+
+ if (unlikely(v->arch.privregs == NULL))
+ return ia64_get_itc();
+
+ itc_last = v->arch.privregs->itc_last;
+ itc_offset = v->arch.privregs->itc_offset;
+ itc = ia64_get_itc();
+ itc += itc_offset;
+ if (itc_last >= itc)
+ itc = itc_last;
+ return itc;
+}
+
+static void
+__vcpu_set_itc(struct vcpu *v, u64 val)
+{
+ unsigned long itc;
+ unsigned long itc_offset;
+ unsigned long itc_last;
+
+ BUG_ON(v->arch.privregs == NULL);
+
+ if (v != current)
+ vcpu_pause(v);
+
+ itc = ia64_get_itc();
+ itc_offset = val - itc;
+ itc_last = val;
+
+ v->arch.privregs->itc_offset = itc_offset;
+ v->arch.privregs->itc_last = itc_last;
+
+ if (v != current)
+ vcpu_unpause(v);
+}
+
void arch_get_info_guest(struct vcpu *v, vcpu_guest_context_u c)
{
int i;
unw_get_ar(&info, UNW_AR_LC, &c.nat->regs.ar.lc);
unw_get_ar(&info, UNW_AR_EC, &c.nat->regs.ar.ec);
}
+
+ if (!is_hvm)
+ c.nat->regs.ar.itc = __vcpu_get_itc(v);
+
c.nat->regs.ar.csd = uregs->ar_csd;
c.nat->regs.ar.ssd = uregs->ar_ssd;
unw_set_ar(&info, UNW_AR_LC, c.nat->regs.ar.lc);
unw_set_ar(&info, UNW_AR_EC, c.nat->regs.ar.ec);
}
+
+ if (!is_hvm_domain(d) && (c.nat->flags & VGCF_SET_AR_ITC))
+ __vcpu_set_itc(v, c.nat->regs.ar.itc);
+
uregs->ar_csd = c.nat->regs.ar.csd;
uregs->ar_ssd = c.nat->regs.ar.ssd;
return rc;
}
-static int relinquish_memory(struct domain *d, struct list_head *list)
+static int relinquish_memory(struct domain *d, struct page_list_head *list)
{
- struct list_head *ent;
struct page_info *page;
#ifndef __ia64__
unsigned long x, y;
/* Use a recursive lock, as we may enter 'free_domheap_page'. */
spin_lock_recursive(&d->page_alloc_lock);
- ent = list->next;
- while ( ent != list )
+
+ while ( (page = page_list_remove_head(list)) )
{
- page = list_entry(ent, struct page_info, list);
/* Grab a reference to the page so it won't disappear from under us. */
if ( unlikely(!get_page(page, d)) )
{
/* Couldn't get a reference -- someone is freeing this page. */
- ent = ent->next;
- list_move_tail(&page->list, &d->arch.relmem_list);
+ page_list_add_tail(page, &d->arch.relmem_list);
continue;
}
#endif
/* Follow the list chain and /then/ potentially free the page. */
- ent = ent->next;
BUG_ON(get_gpfn_from_mfn(page_to_mfn(page)) != INVALID_M2P_ENTRY);
- list_move_tail(&page->list, &d->arch.relmem_list);
+ page_list_add_tail(page, &d->arch.relmem_list);
put_page(page);
if (hypercall_preempt_check()) {
}
}
- list_splice_init(&d->arch.relmem_list, list);
+ page_list_splice_init(&d->arch.relmem_list, list);
out:
spin_unlock_recursive(&d->page_alloc_lock);
/*fallthrough*/
case RELRES_mm_teardown:
+ if (d->arch.pirq_eoi_map != NULL) {
+ put_page(virt_to_page(d->arch.pirq_eoi_map));
+ d->arch.pirq_eoi_map = NULL;
+ }
+
/* Tear down shadow mode stuff. */
ret = mm_teardown(d);
if (ret != 0)
if (is_hvm_domain(d) && d->arch.sal_data)
xfree(d->arch.sal_data);
- /* Free page used by xen oprofile buffer */
- free_xenoprof_pages(d);
-
return 0;
}
unsigned long p2m_pages;
unsigned long spare_hv_pages;
unsigned long max_dom0_size;
+ unsigned long iommu_pg_table_pages = 0;
/* Estimate maximum memory we can safely allocate for dom0
* by subtracting the p2m table allocation and a chunk of memory
domheap_pages = avail_domheap_pages();
p2m_pages = domheap_pages / PTRS_PER_PTE;
spare_hv_pages = 8192 + (domheap_pages / 4096);
- max_dom0_size = (domheap_pages - (p2m_pages + spare_hv_pages))
- * PAGE_SIZE;
+
+ if (iommu_enabled)
+ iommu_pg_table_pages = domheap_pages * 4 / 512;
+ /* There are 512 ptes in one 4K vtd page. */
+
+ max_dom0_size = (domheap_pages - (p2m_pages + spare_hv_pages) -
+ iommu_pg_table_pages) * PAGE_SIZE;
printk("Maximum permitted dom0 size: %luMB\n",
max_dom0_size / (1024*1024));
BUG();
if (irqs_permit_access(d, 0, NR_IRQS-1))
BUG();
- if (ioports_permit_access(d, 0, 0xffff))
+ if (ioports_permit_access(d, 0, 0, 0xffff))
BUG();
}
unsigned long
handle_fpu_swa(int fp_fault, struct pt_regs *regs, unsigned long isr)
{
- struct vcpu *v = current;
IA64_BUNDLE bundle;
unsigned long fault_ip;
fpswa_ret_t ret;
+ unsigned long rc;
fault_ip = regs->cr_iip;
/*
fault_ip -= 16;
if (VMX_DOMAIN(current)) {
- if (IA64_RETRY == __vmx_get_domain_bundle(fault_ip, &bundle))
- return IA64_RETRY;
- } else
- bundle = __get_domain_bundle(fault_ip);
-
- if (!bundle.i64[0] && !bundle.i64[1]) {
- printk("%s: floating-point bundle at 0x%lx not mapped\n",
- __FUNCTION__, fault_ip);
- return -1;
+ rc = __vmx_get_domain_bundle(fault_ip, &bundle);
+ } else {
+ rc = 0;
+ if (vcpu_get_domain_bundle(current, regs, fault_ip,
+ &bundle) == 0)
+ rc = IA64_RETRY;
+ }
+ if (rc == IA64_RETRY) {
+ PSCBX(current, fpswa_ret) = (fpswa_ret_t){IA64_RETRY, 0, 0, 0};
+ gdprintk(XENLOG_DEBUG,
+ "%s(%s): floating-point bundle at 0x%lx not mapped\n",
+ __FUNCTION__, fp_fault ? "fault" : "trap", fault_ip);
+ return IA64_RETRY;
}
ret = fp_emulate(fp_fault, &bundle, ®s->cr_ipsr, ®s->ar_fpsr,
&isr, ®s->pr, ®s->cr_ifs, regs);
if (ret.status) {
- PSCBX(v, fpswa_ret) = ret;
- printk("%s(%s): fp_emulate() returned %ld\n",
- __FUNCTION__, fp_fault ? "fault" : "trap", ret.status);
+ PSCBX(current, fpswa_ret) = ret;
+ gdprintk(XENLOG_ERR, "%s(%s): fp_emulate() returned %ld\n",
+ __FUNCTION__, fp_fault ? "fault" : "trap",
+ ret.status);
}
return ret.status;
printk("Dirty-bit.\n");
break;
+ case 10:
+ /* __domain_get_bundle() may cause fault. */
+ if (ia64_done_with_exception(regs))
+ return;
+ printk("Data Access-bit.\n");
+ break;
+
case 20:
printk("Page Not Found.\n");
break;
}
}
+void
+ia64_lazy_load_fpu(struct vcpu *v)
+{
+ if (PSCB(v, hpsr_dfh)) {
+ PSCB(v, hpsr_dfh) = 0;
+ PSCB(v, hpsr_mfh) = 1;
+ if (__ia64_per_cpu_var(fp_owner) != v)
+ __ia64_load_fpu(v->arch._thread.fph);
+ }
+}
+
void
ia64_handle_reflection(unsigned long ifa, struct pt_regs *regs,
unsigned long isr, unsigned long iim,
vector = IA64_GENEX_VECTOR;
break;
case 25:
- if (PSCB(v, hpsr_dfh)) {
- PSCB(v, hpsr_dfh) = 0;
- PSCB(v, hpsr_mfh) = 1;
- if (__ia64_per_cpu_var(fp_owner) != v)
- __ia64_load_fpu(v->arch._thread.fph);
- }
+ ia64_lazy_load_fpu(v);
if (!PSCB(v, vpsr_dfh)) {
regs->cr_ipsr &= ~IA64_PSR_DFH;
return;
vcpu_increment_iip(v);
return;
}
- // fetch code fail
- if (IA64_RETRY == status)
- return;
- printk("ia64_handle_reflection: handling FP fault\n");
vector = IA64_FP_FAULT_VECTOR;
break;
case 33:
status = handle_fpu_swa(0, regs, isr);
if (!status)
return;
- // fetch code fail
- if (IA64_RETRY == status)
- return;
- printk("ia64_handle_reflection: handling FP trap\n");
vector = IA64_FP_TRAP_VECTOR;
break;
case 34:
rec_name[arg->type], smp_processor_id(), arg->ret);
if (arg->corrected) {
sal_record->severity = sal_log_severity_corrected;
- IA64_SAL_DEBUG("%s: IA64_SAL_CLEAR_STATE_INFO(SAL_INFO_TYPE_MCA)"
+ IA64_SAL_DEBUG("%s: IA64_SAL_GET_STATE_INFO(SAL_INFO_TYPE_MCA)"
" force\n", __FUNCTION__);
}
if (arg->ret > 0) {
}
r9 = arg.ret;
status = arg.status;
- if (r9 == 0) {
- xfree(e);
- } else {
+ if (r9 != 0) {
/* Re-add the entry to sal_queue */
spin_lock_irqsave(&sal_queue_lock, flags);
list_add(&e->list, &sal_queue[in1]);
}
r9 = arg.ret;
status = arg.status;
- xfree(e);
+ if (r9 >= 0) {
+ IA64_SAL_DEBUG("SAL_CLEAR_STATE_INFO: more errors are available\n");
+ spin_lock_irqsave(&sal_queue_lock, flags);
+ list_add(&e->list, &sal_queue[in1]);
+ spin_unlock_irqrestore(&sal_queue_lock, flags);
+ }
}
break;
case SAL_MC_RENDEZ:
efi_desc_size = sizeof(efi_memory_desc_t);
for (p = efi_map_start; p < efi_map_end; p += efi_desc_size) {
+ struct page_info *efi_runtime_page = NULL;
+ struct page_info *fpswa_inf_page = NULL;
+ struct page_info *fw_table_page = NULL;
+
if (copy_from_user(&entry, p, sizeof(efi_memory_desc_t))) {
printk ("efi_emulate_set_virtual_address_map: copy_from_user() fault. addr=0x%p\n", p);
return EFI_UNSUPPORTED;
if (md->type != EFI_PAL_CODE)
continue;
+ /* get pages to prevend them from being freed
+ * during touching them.
+ * those entres are in [FW_TABLES_BASE_PADDR, ...]
+ * see dom_fw.h for its layout.
+ */
+ efi_runtime_page = virt_to_page(efi_runtime);
+ fpswa_inf_page = virt_to_page(fpswa_inf);
+ fw_table_page = virt_to_page(
+ domain_mpa_to_imva(d, FW_TABLES_BASE_PADDR));
+ if (get_page(efi_runtime_page, d) == 0)
+ return EFI_INVALID_PARAMETER;
+ if (get_page(fpswa_inf_page, d) == 0) {
+ put_page(efi_runtime_page);
+ return EFI_INVALID_PARAMETER;
+ }
+ if (get_page(fw_table_page, d) == 0) {
+ put_page(fpswa_inf_page);
+ put_page(efi_runtime_page);
+ return EFI_INVALID_PARAMETER;
+ }
+
#define EFI_HYPERCALL_PATCH_TO_VIRT(tgt,call) \
do { \
vfn = (unsigned long *) domain_mpa_to_imva(d, tgt); \
*vfn++ = FW_HYPERCALL_FPSWA_PATCH_INDEX * 16UL + md->virt_addr;
*vfn = 0;
fpswa_inf->fpswa = (void *) (FW_HYPERCALL_FPSWA_ENTRY_INDEX * 16UL + md->virt_addr);
+
+ put_page(fw_table_page);
+ put_page(fpswa_inf_page);
+ put_page(efi_runtime_page);
break;
}
#include <asm/sal.h> /* FOR struct ia64_sal_retval */
#include <asm/fpswa.h> /* FOR struct fpswa_ret_t */
+#include <asm/vmx.h>
#include <asm/vmx_vcpu.h>
#include <asm/vcpu.h>
#include <asm/dom_fw.h>
#include <public/arch-ia64/debug_op.h>
#include <asm/sioemu.h>
#include <public/arch-ia64/sioemu.h>
+#include <xen/pci.h>
static IA64FAULT
xen_hypercall (struct pt_regs *regs)
return IA64_NO_FAULT;
}
+static long __do_pirq_guest_eoi(struct domain *d, int pirq)
+{
+ if ( pirq < 0 || pirq >= NR_IRQS )
+ return -EINVAL;
+ if ( d->arch.pirq_eoi_map )
+ evtchn_unmask(d->pirq_to_evtchn[pirq]);
+ return pirq_guest_eoi(d, pirq);
+}
+
long do_pirq_guest_eoi(int pirq)
{
- return pirq_guest_eoi(current->domain, pirq);
+ return __do_pirq_guest_eoi(current->domain, pirq);
}
-
static void
fw_hypercall_ipi (struct pt_regs *regs)
return;
}
+static int
+fpswa_get_domain_addr(struct vcpu *v, unsigned long gpaddr, size_t size,
+ void **virt, struct page_info **page, const char *name)
+{
+ int cross_page_boundary;
+
+ if (gpaddr == 0) {
+ *virt = 0;
+ return 0;
+ }
+
+ cross_page_boundary = (((gpaddr & ~PAGE_MASK) + size) > PAGE_SIZE);
+ if (unlikely(cross_page_boundary)) {
+ /* this case isn't implemented */
+ gdprintk(XENLOG_ERR,
+ "%s: fpswa hypercall is called with "
+ "page crossing argument %s 0x%lx\n",
+ __func__, name, gpaddr);
+ return -ENOSYS;
+ }
+
+again:
+ *virt = domain_mpa_to_imva(v->domain, gpaddr);
+ *page = virt_to_page(*virt);
+ if (get_page(*page, current->domain) == 0) {
+ if (page_get_owner(*page) != current->domain) {
+ *page = NULL;
+ return -EFAULT;
+ }
+ goto again;
+ }
+
+ return 0;
+}
+
+static fpswa_ret_t
+fw_hypercall_fpswa (struct vcpu *v, struct pt_regs *regs)
+{
+ fpswa_ret_t ret = {-1, 0, 0, 0};
+ unsigned long bundle[2] = { regs->r15, regs->r16};
+ fp_state_t fp_state;
+ struct page_info *lp_page = NULL;
+ struct page_info *lv_page = NULL;
+ struct page_info *hp_page = NULL;
+ struct page_info *hv_page = NULL;
+ XEN_EFI_RR_DECLARE(rr6, rr7);
+
+ if (unlikely(PSCBX(v, fpswa_ret).status != 0 &&
+ PSCBX(v, fpswa_ret).status != IA64_RETRY)) {
+ ret = PSCBX(v, fpswa_ret);
+ PSCBX(v, fpswa_ret) = (fpswa_ret_t){0, 0, 0, 0};
+ return ret;
+ }
+
+ if (!fpswa_interface)
+ goto error;
+
+ memset(&fp_state, 0, sizeof(fp_state));
+ fp_state.bitmask_low64 = regs->r22;
+ fp_state.bitmask_high64 = regs->r23;
+
+ /* bit6..bit11 */
+ if ((fp_state.bitmask_low64 & 0xfc0) != 0xfc0) {
+ /* other cases isn't supported yet */
+ gdprintk(XENLOG_ERR, "%s unsupported bitmask_low64 0x%lx\n",
+ __func__, fp_state.bitmask_low64);
+ goto error;
+ }
+ if (regs->r25 == 0)
+ /* fp_state.fp_state_low_volatile must be supplied */
+ goto error;
+
+ /* eager save/lazy restore fpu: f32...f127 */
+ if ((~fp_state.bitmask_low64 & ((1UL << 31) - 1)) != 0 ||
+ ~fp_state.bitmask_high64 != 0) {
+ if (VMX_DOMAIN(v))
+ vmx_lazy_load_fpu(v);
+ else
+ ia64_lazy_load_fpu(v);
+ }
+
+ if (fpswa_get_domain_addr(v, regs->r24,
+ sizeof(fp_state.fp_state_low_preserved),
+ (void*)&fp_state.fp_state_low_preserved,
+ &lp_page, "fp_state_low_preserved") < 0)
+ goto error;
+ if (fpswa_get_domain_addr(v, regs->r25,
+ sizeof(fp_state.fp_state_low_volatile),
+ (void*)&fp_state.fp_state_low_volatile,
+ &lv_page, "fp_state_low_volatile") < 0)
+ goto error;
+ if (fpswa_get_domain_addr(v, regs->r26,
+ sizeof(fp_state.fp_state_high_preserved),
+ (void*)&fp_state.fp_state_high_preserved,
+ &hp_page, "fp_state_low_preserved") < 0)
+ goto error;
+ if (fpswa_get_domain_addr(v, regs->r27,
+ sizeof(fp_state.fp_state_high_volatile),
+ (void*)&fp_state.fp_state_high_volatile,
+ &hv_page, "fp_state_high_volatile") < 0)
+ goto error;
+
+ XEN_EFI_RR_ENTER(rr6, rr7);
+ ret = (*fpswa_interface->fpswa)(regs->r14,
+ bundle,
+ ®s->r17, /* pipsr */
+ ®s->r18, /* pfsr */
+ ®s->r19, /* pisr */
+ ®s->r20, /* ppreds */
+ ®s->r21, /* pifs */
+ &fp_state);
+ XEN_EFI_RR_LEAVE(rr6, rr7);
+
+error:
+ if (lp_page != NULL)
+ put_page(lp_page);
+ if (lv_page != NULL)
+ put_page(lv_page);
+ if (hp_page != NULL)
+ put_page(hp_page);
+ if (hv_page != NULL)
+ put_page(hv_page);
+ return ret;
+}
+
static fpswa_ret_t
-fw_hypercall_fpswa (struct vcpu *v)
+fw_hypercall_fpswa_error(void)
{
- return PSCBX(v, fpswa_ret);
+ return (fpswa_ret_t) {-1, 0, 0, 0};
}
IA64FAULT
stop_timer(&v->arch.hlt_timer);
/* do_block() calls
* local_event_delivery_enable(),
- * but PALL CALL must be called with
+ * but PAL CALL must be called with
* psr.i = 0 and psr.i is unchanged.
* SDM vol.2 Part I 11.10.2
* PAL Calling Conventions.
case FW_HYPERCALL_SET_SHARED_INFO_VA:
regs->r8 = domain_set_shared_info_va (regs->r28);
break;
- case FW_HYPERCALL_FPSWA:
- fpswa_ret = fw_hypercall_fpswa (v);
+ case FW_HYPERCALL_FPSWA_BASE:
+ switch (regs->r2) {
+ case FW_HYPERCALL_FPSWA_BROKEN:
+ gdprintk(XENLOG_WARNING,
+ "Old fpswa hypercall was called (0x%lx).\n"
+ "Please update your domain builder. ip 0x%lx\n",
+ FW_HYPERCALL_FPSWA_BROKEN, regs->cr_iip);
+ fpswa_ret = fw_hypercall_fpswa_error();
+ break;
+ case FW_HYPERCALL_FPSWA:
+ fpswa_ret = fw_hypercall_fpswa(v, regs);
+ break;
+ default:
+ gdprintk(XENLOG_ERR, "unknown fpswa hypercall %lx\n",
+ regs->r2);
+ fpswa_ret = fw_hypercall_fpswa_error();
+ break;
+ }
regs->r8 = fpswa_ret.status;
regs->r9 = fpswa_ret.err0;
regs->r10 = fpswa_ret.err1;
iosapic_guest_write(
unsigned long physbase, unsigned int reg, u32 pval);
+
+/*
+ * XXX: We don't support MSI for PCI passthrough at present, so make the
+ * following 2 functions dummy for now. They shouldn't return -ENOSYS
+ * because xend invokes them (the x86 version of them is necessary for
+ * x86 Xen); if they return -ENOSYS, xend would disallow us to create
+ * IPF HVM guest with devices assigned so here they can return 0.
+ */
+static int physdev_map_pirq(struct physdev_map_pirq *map)
+{
+ return 0;
+}
+
+static int physdev_unmap_pirq(struct physdev_unmap_pirq *unmap)
+{
+ return 0;
+}
+
+
long do_physdev_op(int cmd, XEN_GUEST_HANDLE(void) arg)
{
int irq;
ret = -EFAULT;
if ( copy_from_guest(&eoi, arg, 1) != 0 )
break;
- ret = pirq_guest_eoi(current->domain, eoi.irq);
+ ret = __do_pirq_guest_eoi(current->domain, eoi.irq);
+ break;
+ }
+
+ case PHYSDEVOP_pirq_eoi_gmfn: {
+ struct physdev_pirq_eoi_gmfn info;
+ unsigned long mfn;
+
+ BUILD_BUG_ON(NR_IRQS > (PAGE_SIZE * 8));
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&info, arg, 1) != 0 )
+ break;
+
+ ret = -EINVAL;
+ mfn = gmfn_to_mfn(current->domain, info.gmfn);
+ if ( !mfn_valid(mfn) || !get_page(mfn_to_page(mfn), current->domain) )
+ break;
+
+ if ( cmpxchg(¤t->domain->arch.pirq_eoi_map_mfn, 0, mfn) != 0 )
+ {
+ put_page(mfn_to_page(mfn));
+ ret = -EBUSY;
+ break;
+ }
+
+ current->domain->arch.pirq_eoi_map = mfn_to_virt(mfn);
+ ret = 0;
break;
}
break;
irq_status_query.flags = 0;
/* Edge-triggered interrupts don't need an explicit unmask downcall. */
- if ( !strstr(irq_desc[irq_to_vector(irq)].handler->typename, "edge") )
+ if ( !strstr(irq_descp(irq)->handler->typename, "edge") )
irq_status_query.flags |= XENIRQSTAT_needs_eoi;
ret = copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0;
break;
break;
}
- /*
- * XXX We don't support MSI for PCI passthrough, so just return success
- */
- case PHYSDEVOP_map_pirq:
- case PHYSDEVOP_unmap_pirq:
- ret = 0;
+ case PHYSDEVOP_map_pirq: {
+ struct physdev_map_pirq map;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&map, arg, 1) != 0 )
+ break;
+
+ ret = physdev_map_pirq(&map);
+
+ if ( copy_to_guest(arg, &map, 1) != 0 )
+ ret = -EFAULT;
break;
+ }
+
+ case PHYSDEVOP_unmap_pirq: {
+ struct physdev_unmap_pirq unmap;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&unmap, arg, 1) != 0 )
+ break;
+
+ ret = physdev_unmap_pirq(&unmap);
+ break;
+ }
+
+ case PHYSDEVOP_manage_pci_add: {
+ struct physdev_manage_pci manage_pci;
+ ret = -EPERM;
+ if ( !IS_PRIV(current->domain) )
+ break;
+ ret = -EFAULT;
+ if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
+ break;
+
+ ret = pci_add_device(manage_pci.bus, manage_pci.devfn);
+ break;
+ }
+
+ case PHYSDEVOP_manage_pci_remove: {
+ struct physdev_manage_pci manage_pci;
+ ret = -EPERM;
+ if ( !IS_PRIV(current->domain) )
+ break;
+ ret = -EFAULT;
+ if ( copy_from_guest(&manage_pci, arg, 1) != 0 )
+ break;
+
+ ret = pci_remove_device(manage_pci.bus, manage_pci.devfn);
+ break;
+ }
+
+ case PHYSDEVOP_manage_pci_add_ext: {
+ struct physdev_manage_pci_ext manage_pci_ext;
+ struct pci_dev_info pdev_info;
+
+ ret = -EPERM;
+ if ( !IS_PRIV(current->domain) )
+ break;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&manage_pci_ext, arg, 1) != 0 )
+ break;
+
+ pdev_info.is_extfn = manage_pci_ext.is_extfn;
+ pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
+ pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
+ pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
+ ret = pci_add_device_ext(manage_pci_ext.bus,
+ manage_pci_ext.devfn,
+ &pdev_info);
+ break;
+ }
- case PHYSDEVOP_manage_pci_add:
- case PHYSDEVOP_manage_pci_remove:
default:
ret = -ENOSYS;
+ printk("not implemented do_physdev_op: %d\n", cmd);
break;
}
/*
* Controller mappings for all interrupt sources:
*/
-irq_desc_t irq_desc[NR_IRQS] __cacheline_aligned = {
+irq_desc_t irq_desc[NR_IRQS] = {
[0 ... NR_IRQS-1] = {
.status = IRQ_DISABLED,
.handler = &no_irq_type,
* disabled.
*/
-int setup_vector(unsigned int irq, struct irqaction * new)
+int setup_vector(unsigned int vector, struct irqaction * new)
{
unsigned long flags;
struct irqaction *old, **p;
- irq_desc_t *desc = irq_descp(irq);
+ irq_desc_t *desc = irq_descp(vector);
/*
* The following block of code has to be executed atomically
desc->depth = 0;
desc->status &= ~(IRQ_DISABLED | IRQ_INPROGRESS | IRQ_GUEST);
- desc->handler->startup(irq);
- desc->handler->enable(irq);
+ desc->handler->startup(vector);
+ desc->handler->enable(vector);
spin_unlock_irqrestore(&desc->lock,flags);
return 0;
/* Vectors reserved by xen (and thus not sharable with domains). */
unsigned long ia64_xen_vector[BITS_TO_LONGS(NR_IRQS)];
-int setup_irq(unsigned int irq, struct irqaction * new)
+int setup_irq_vector(unsigned int vec, struct irqaction * new)
{
- unsigned int vec;
int res;
- /* Get vector for IRQ. */
- if (acpi_gsi_to_irq (irq, &vec) < 0)
+ if ( vec == IA64_INVALID_VECTOR )
return -ENOSYS;
/* Reserve the vector (and thus the irq). */
if (test_and_set_bit(vec, ia64_xen_vector))
return res;
}
-void free_irq(unsigned int irq)
+void release_irq_vector(unsigned int vec)
{
- unsigned int vec;
unsigned long flags;
irq_desc_t *desc;
- /* Get vector for IRQ. */
- if (acpi_gsi_to_irq(irq, &vec) < 0)
+ if ( vec == IA64_INVALID_VECTOR )
return;
desc = irq_descp(vec);
struct domain *guest[IRQ_MAX_GUESTS];
} irq_guest_action_t;
+static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
+{
+ if ( d->arch.pirq_eoi_map )
+ set_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
+{
+ if ( d->arch.pirq_eoi_map )
+ clear_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static void _irq_guest_eoi(irq_desc_t *desc)
+{
+ irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+ unsigned int i, vector = desc - irq_desc;
+
+ if ( !(desc->status & IRQ_GUEST_EOI_PENDING) )
+ return;
+
+ for ( i = 0; i < action->nr_guests; ++i )
+ clear_pirq_eoi(action->guest[i], vector);
+
+ desc->status &= ~(IRQ_INPROGRESS|IRQ_GUEST_EOI_PENDING);
+ desc->handler->enable(vector);
+}
+
+static struct timer irq_guest_eoi_timer[NR_IRQS];
+static void irq_guest_eoi_timer_fn(void *data)
+{
+ irq_desc_t *desc = data;
+ unsigned long flags;
+
+ spin_lock_irqsave(&desc->lock, flags);
+ _irq_guest_eoi(desc);
+ spin_unlock_irqrestore(&desc->lock, flags);
+}
+
void __do_IRQ_guest(int irq)
{
irq_desc_t *desc = &irq_desc[irq];
irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
struct domain *d;
- int i;
+ int i, already_pending = 0;
for ( i = 0; i < action->nr_guests; i++ )
{
if ( (action->ack_type != ACKTYPE_NONE) &&
!test_and_set_bit(irq, &d->pirq_mask) )
action->in_flight++;
- send_guest_pirq(d, irq);
- }
+ if ( hvm_do_IRQ_dpci(d, irq) )
+ {
+ if ( action->ack_type == ACKTYPE_NONE )
+ {
+ already_pending += !!(desc->status & IRQ_INPROGRESS);
+ desc->status |= IRQ_INPROGRESS; /* cleared during hvm eoi */
+ }
+ }
+ else if ( send_guest_pirq(d, irq) &&
+ (action->ack_type == ACKTYPE_NONE) )
+ {
+ already_pending++;
+ }
+ }
+
+ if ( already_pending == action->nr_guests )
+ {
+ stop_timer(&irq_guest_eoi_timer[irq]);
+ desc->handler->disable(irq);
+ desc->status |= IRQ_GUEST_EOI_PENDING;
+ for ( i = 0; i < already_pending; ++i )
+ {
+ d = action->guest[i];
+ set_pirq_eoi(d, irq);
+ /*
+ * Could check here whether the guest unmasked the event by now
+ * (or perhaps just re-issue the send_guest_pirq()), and if it
+ * can now accept the event,
+ * - clear all the pirq_eoi bits we already set,
+ * - re-enable the vector, and
+ * - skip the timer setup below.
+ */
+ }
+ init_timer(&irq_guest_eoi_timer[irq],
+ irq_guest_eoi_timer_fn, desc, smp_processor_id());
+ set_timer(&irq_guest_eoi_timer[irq], NOW() + MILLISECS(1));
+ }
}
-int pirq_acktype(int irq)
+static int pirq_acktype(int irq)
{
irq_desc_t *desc = &irq_desc[irq];
int pirq_guest_eoi(struct domain *d, int irq)
{
irq_desc_t *desc;
+ irq_guest_action_t *action;
if ( (irq < 0) || (irq >= NR_IRQS) )
return -EINVAL;
desc = &irq_desc[irq];
spin_lock_irq(&desc->lock);
- if ( test_and_clear_bit(irq, &d->pirq_mask) &&
- (--((irq_guest_action_t *)desc->action)->in_flight == 0) )
+ action = (irq_guest_action_t *)desc->action;
+
+ if ( action->ack_type == ACKTYPE_NONE )
{
- ASSERT(((irq_guest_action_t*)desc->action)->ack_type == ACKTYPE_UNMASK);
+ ASSERT(!test_bit(irq, d->pirq_mask));
+ stop_timer(&irq_guest_eoi_timer[irq]);
+ _irq_guest_eoi(desc);
+ }
+
+ if ( test_and_clear_bit(irq, &d->pirq_mask) && (--action->in_flight == 0) )
+ {
+ ASSERT(action->ack_type == ACKTYPE_UNMASK);
desc->handler->end(irq);
}
spin_unlock_irq(&desc->lock);
action->guest[action->nr_guests++] = v->domain;
+ if ( action->ack_type != ACKTYPE_NONE )
+ set_pirq_eoi(v->domain, irq);
+ else
+ clear_pirq_eoi(v->domain, irq);
+
out:
spin_unlock_irqrestore(&desc->lock, flags);
return rc;
late_alt_dtlb_miss:
mov r20=cr.isr
movl r17=PAGE_KERNEL
- mov r21=cr.ipsr
+ mov r29=cr.ipsr // frametable_miss is shared by paravirtual and HVM sides
+ // and it assumes ipsr is saved in r29. If change the
+ // registers usage here, please check both sides!
movl r19=(((1 << IA64_MAX_PHYS_BITS) - 1) & ~0xfff)
;;
- extr.u r23=r21,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
+ extr.u r23=r29,IA64_PSR_CPL0_BIT,2 // extract psr.cpl
and r22=IA64_ISR_CODE_MASK,r20 // get the isr.code field
tbit.nz p6,p7=r20,IA64_ISR_SP_BIT // is speculation bit on?
extr.u r18=r16,XEN_VIRT_UC_BIT,1 // extract UC bit
br.cond.spnt page_fault
;;
alt_dtlb_miss_identity_map:
- dep r21=-1,r21,IA64_PSR_ED_BIT,1
+ dep r29=-1,r29,IA64_PSR_ED_BIT,1
or r19=r19,r17 // insert PTE control bits into r19
mov cr.itir=r20 // set itir with cleared key
;;
cmp.eq.or p8,p0=0x18,r22 // Region 6 is UC for EFI
;;
(p8) dep r19=-1,r19,4,1 // set bit 4 (uncached) if access to UC area
-(p6) mov cr.ipsr=r21
+(p6) mov cr.ipsr=r29
;;
(p7) itc.d r19 // insert the TLB entry
mov pr=r31,-1
rfi
END(frametable_miss)
-ENTRY(frametable_fault)
+ENTRY(frametable_fault) //ipsr saved in r29 before coming here!
ssm psr.dt // switch to using virtual data addressing
mov r18=cr.iip
movl r19=ia64_frametable_probe
;;
cmp.eq p6,p7=r18,r19 // is faulting addrress ia64_frametable_probe?
mov r8=0 // assumes that 'probe.r' uses r8
- dep r21=-1,r21,IA64_PSR_RI_BIT+1,1 // return to next instruction in
+ dep r29=-1,r29,IA64_PSR_RI_BIT+1,1 // return to next instruction in
// bundle 2
;;
-(p6) mov cr.ipsr=r21
+(p6) mov cr.ipsr=r29
mov r19=4 // FAULT(4)
(p7) br.spnt.few dispatch_to_fault_handler
;;
DBG_FAULT(10)
mov r16=cr.isr
mov r17=cr.ifa
+ mov r18=cr.ipsr
mov r31=pr
mov r19=10
+ ;;
mov r20=0x2800
- br.sptk.many fast_access_reflect
+ extr.u r18=r18,IA64_PSR_CPL0_BIT,2
+ ;;
+ cmp.ne p6,p0=r0,r18 /* cpl != 0? */
+(p6) br.sptk.many fast_access_reflect
+ /* __domain_get_bundle() may cause this fault. */
+ br.sptk.few dispatch_to_fault_handler
;;
END(daccess_bit)
static int machine_kexec_get_xenheap(xen_kexec_range_t *range)
{
range->start = (ia64_tpa(_end) + (ELF_PAGE_SIZE - 1)) & ELF_PAGE_MASK;
- range->size = (unsigned long)xenheap_phys_end -
- (unsigned long)range->start;
+ range->size =
+ (((unsigned long)range->start + KERNEL_TR_PAGE_SIZE) &
+ ~(KERNEL_TR_PAGE_SIZE - 1))
+ - (unsigned long)range->start;
return 0;
}
VMCOREINFO_SYMBOL(dom_io);
VMCOREINFO_SYMBOL(xen_pstart);
VMCOREINFO_SYMBOL(frametable_pg_dir);
- VMCOREINFO_SYMBOL_ALIAS(xen_heap_start, xen_pickle_offset);
}
/*
#include <asm/event.h>
#include <asm/debugger.h>
+
+#define MEM_LOG(_f, _a...) gdprintk(XENLOG_WARNING, _f "\n", ## _a)
+
static void domain_page_flush_and_put(struct domain* d, unsigned long mpaddr,
volatile pte_t* ptep, pte_t old_pte,
struct page_info* page);
page_set_owner(page, d);
wmb(); /* install valid domain ptr before updating refcnt. */
- ASSERT(page->count_info == 0);
+ ASSERT((page->count_info & ~PGC_xen_heap)== 0);
/* Only add to the allocation list if the domain isn't dying. */
if ( !d->is_dying )
page->count_info |= PGC_allocated | 1;
if ( unlikely(d->xenheap_pages++ == 0) )
get_knownalive_domain(d);
- list_add_tail(&page->list, &d->xenpage_list);
+ page_list_add_tail(page, &d->xenpage_list);
}
// grant_table_destroy() releases these pages.
old_pte = __pte(0);
new_pte = pfn_pte(physaddr >> PAGE_SHIFT, __pgprot(prot));
+ again_hvm_page_io:
ret_pte = ptep_cmpxchg_rel(&d->arch.mm, mpaddr, pte, old_pte, new_pte);
if (pte_val(ret_pte) == pte_val(old_pte)) {
smp_mb();
return 0;
}
+ /* in HVM guest, when VTD is enabled,
+ * P2M entry may change from _PAGE_IO type to real MMIO page
+ */
+ if(is_hvm_domain(d) && (pte_val(ret_pte) & _PAGE_IO) &&
+ !mfn_valid(physaddr >> PAGE_SHIFT)) {
+ old_pte = ret_pte;
+ goto again_hvm_page_io;
+ }
// dom0 tries to map real machine's I/O region, but failed.
// It is very likely that dom0 doesn't boot correctly because
struct page_info* page = mfn_to_page(physaddr >> PAGE_SHIFT);
BUG_ON((physaddr & _PAGE_PPN_MASK) != physaddr);
- BUG_ON(page->count_info != (PGC_allocated | 1));
+ BUG_ON((page->count_info & ~PGC_xen_heap) != (PGC_allocated | 1));
set_gpfn_from_mfn(physaddr >> PAGE_SHIFT, mpaddr >> PAGE_SHIFT);
// because __assign_domain_page() uses set_pte_rel() which has
// release semantics, smp_mb() isn't needed.
ASSIGN_writable | ASSIGN_pgc_allocated);
}
+static void
+ioports_get_mmio_addr(const struct io_space *space,
+ unsigned long fp, unsigned long lp,
+ unsigned long *mmio_start, unsigned long *mmio_end)
+{
+ if (space->sparse) {
+ *mmio_start = IO_SPACE_SPARSE_ENCODING(fp) & PAGE_MASK;
+ *mmio_end = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp));
+ } else {
+ *mmio_start = fp & PAGE_MASK;
+ *mmio_end = PAGE_ALIGN(lp);
+ }
+}
+
+static unsigned long
+ioports_get_mmio_base(const struct io_space *space, struct domain *d)
+{
+ if (VMX_DOMAIN(d->vcpu[0]))
+ return LEGACY_IO_START;
+
+ if (space == &io_space[0] && d != dom0)
+ return IO_PORTS_PADDR;
+
+ return __pa(space->mmio_base);
+}
+
+/*
+ * Inpurt
+ * fgp: first guest port
+ * fmp: first machine port
+ * lmp: last machine port
+ */
int
-ioports_permit_access(struct domain *d, unsigned int fp, unsigned int lp)
+ioports_permit_access(struct domain *d, unsigned int fgp,
+ unsigned int fmp, unsigned int lmp)
{
struct io_space *space;
- unsigned long mmio_start, mmio_end, mach_start;
+ unsigned long mmio_start, mach_start, mach_end;
int ret;
- if (IO_SPACE_NR(fp) >= num_io_spaces) {
- dprintk(XENLOG_WARNING, "Unknown I/O Port range 0x%x - 0x%x\n", fp, lp);
+ if (IO_SPACE_NR(fmp) >= num_io_spaces) {
+ dprintk(XENLOG_WARNING, "Unknown I/O Port range 0x%x - 0x%x\n", fmp, lmp);
return -EFAULT;
}
* I/O port spaces and thus will number port spaces differently.
* This is ok, they don't make use of this interface.
*/
- ret = rangeset_add_range(d->arch.ioport_caps, fp, lp);
+ ret = rangeset_add_range(d->arch.ioport_caps, fmp, lmp);
if (ret != 0)
return ret;
- space = &io_space[IO_SPACE_NR(fp)];
+ space = &io_space[IO_SPACE_NR(fmp)];
/* Legacy I/O on dom0 is already setup */
if (d == dom0 && space == &io_space[0])
return 0;
- fp = IO_SPACE_PORT(fp);
- lp = IO_SPACE_PORT(lp);
+ fmp = IO_SPACE_PORT(fmp);
+ lmp = IO_SPACE_PORT(lmp);
- if (space->sparse) {
- mmio_start = IO_SPACE_SPARSE_ENCODING(fp) & PAGE_MASK;
- mmio_end = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp));
- } else {
- mmio_start = fp & PAGE_MASK;
- mmio_end = PAGE_ALIGN(lp);
- }
+ ioports_get_mmio_addr(space, fmp, lmp, &mach_start, &mach_end);
/*
* The "machine first port" is not necessarily identity mapped
* to the guest first port. At least for the legacy range.
*/
- mach_start = mmio_start | __pa(space->mmio_base);
+ mach_start = mach_start | __pa(space->mmio_base);
+ mach_end = mach_end | __pa(space->mmio_base);
- if (space == &io_space[0]) {
- mmio_start |= IO_PORTS_PADDR;
- mmio_end |= IO_PORTS_PADDR;
- } else {
- mmio_start |= __pa(space->mmio_base);
- mmio_end |= __pa(space->mmio_base);
- }
+ mmio_start = IO_SPACE_SPARSE_ENCODING(fgp) & PAGE_MASK;
+ mmio_start |= ioports_get_mmio_base(space, d);
- while (mmio_start <= mmio_end) {
+ while (mach_start < mach_end) {
(void)__assign_domain_page(d, mmio_start, mach_start, ASSIGN_nocache);
mmio_start += PAGE_SIZE;
mach_start += PAGE_SIZE;
fp_base = IO_SPACE_PORT(fp);
lp_base = IO_SPACE_PORT(lp);
- if (space->sparse) {
- mmio_start = IO_SPACE_SPARSE_ENCODING(fp_base) & PAGE_MASK;
- mmio_end = PAGE_ALIGN(IO_SPACE_SPARSE_ENCODING(lp_base));
- } else {
- mmio_start = fp_base & PAGE_MASK;
- mmio_end = PAGE_ALIGN(lp_base);
- }
+ ioports_get_mmio_addr(space, fp_base, lp_base, &mmio_start, &mmio_end);
- if (space == &io_space[0] && d != dom0)
- mmio_base = IO_PORTS_PADDR;
- else
- mmio_base = __pa(space->mmio_base);
+ mmio_base = ioports_get_mmio_base(space, d);
for (; mmio_start < mmio_end; mmio_start += PAGE_SIZE) {
unsigned int port, range;
int ret = get_page(page, d);
BUG_ON(ret == 0);
} else {
- u64 x, nx, y;
+ unsigned long x, nx, y;
- y = *((u64*)&page->count_info);
+ y = page->count_info;
do {
x = y;
nx = x + 1;
BUG_ON((x >> 32) != 0);
BUG_ON((nx & PGC_count_mask) != 2);
- y = cmpxchg((u64*)&page->count_info, x, nx);
+ y = cmpxchg(&page->count_info, x, nx);
} while (unlikely(y != x));
+ BUG_ON(page_get_owner(page) != NULL);
}
}
if (mfn == INVALID_MFN) {
// clear pte
old_pte = ptep_get_and_clear(mm, mpaddr, pte);
+ if(!pte_mem(old_pte))
+ return;
mfn = pte_pfn(old_pte);
} else {
unsigned long old_arflags;
again:
// memory_exchange() calls guest_physmap_remove_page() with
// a stealed page. i.e. page owner = NULL.
- BUG_ON(page_get_owner(mfn_to_page(mfn)) != d &&
+ BUG_ON(mfn_valid(mfn) &&
+ page_get_owner(mfn_to_page(mfn)) != d &&
page_get_owner(mfn_to_page(mfn)) != NULL);
old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;
old_pte = pfn_pte(mfn, __pgprot(old_arflags));
BUG_ON(mfn != pte_pfn(ret_pte));
}
+ perfc_incr(zap_domain_page_one);
+ if(!mfn_valid(mfn))
+ return;
+
+ if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) ){
+ int i, j;
+ j = 1 << (PAGE_SHIFT-PAGE_SHIFT_4K);
+ for(i = 0 ; i < j; i++)
+ iommu_unmap_page(d, (mpaddr>>PAGE_SHIFT)*j + i);
+ }
+
page = mfn_to_page(mfn);
BUG_ON((page->count_info & PGC_count_mask) == 0);
BUG_ON(clear_PGC_allocate && (page_get_owner(page) == NULL));
domain_put_page(d, mpaddr, pte, old_pte, clear_PGC_allocate);
- perfc_incr(zap_domain_page_one);
+}
+
+int
+deassign_domain_mmio_page(struct domain *d, unsigned long mpaddr,
+ unsigned long phys_addr, unsigned long size )
+{
+ unsigned long addr = mpaddr & PAGE_MASK;
+ unsigned long end = PAGE_ALIGN(mpaddr + size);
+
+ if (size == 0) {
+ gdprintk(XENLOG_INFO, "%s: domain %p mpaddr 0x%lx size = 0x%lx\n",
+ __func__, d, mpaddr, size);
+ }
+ if (!efi_mmio(phys_addr, size)) {
+#ifndef NDEBUG
+ gdprintk(XENLOG_INFO, "%s: domain %p mpaddr 0x%lx size = 0x%lx\n",
+ __func__, d, mpaddr, size);
+#endif
+ return -EINVAL;
+ }
+
+ for (; addr < end; addr += PAGE_SIZE )
+ zap_domain_page_one(d, addr, 0, INVALID_MFN);
+ return 0;
}
unsigned long
#if 0 /* if big endian */
# error "implement big endian version of steal_page()"
#endif
- u32 _d, _nd;
- u64 x, nx, y;
+ unsigned long x, y;
if (page_get_owner(page) != d) {
gdprintk(XENLOG_INFO, "%s d 0x%p owner 0x%p\n",
}
spin_lock(&d->page_alloc_lock);
+ /* check again */
+ if (is_xen_heap_page(page) || page_get_owner(page) != d) {
+ goto fail;
+ }
/*
- * The tricky bit: atomically release ownership while there is just one
- * benign reference to the page (PGC_allocated). If that reference
- * disappears then the deallocation routine will safely spin.
+ * We require there is just one reference (PGC_allocated). We temporarily
+ * drop this reference now so that we can safely swizzle the owner.
*/
- _d = pickle_domptr(d);
- y = *((u64*)&page->count_info);
+ y = page->count_info;
do {
x = y;
- nx = x & 0xffffffff;
- // page->count_info: untouched
- // page->u.inused._domain = 0;
- _nd = x >> 32;
if (unlikely(((x & (PGC_count_mask | PGC_allocated)) !=
- (1 | PGC_allocated))) ||
- unlikely(_nd != _d)) {
- struct domain* nd = unpickle_domptr(_nd);
+ (1 | PGC_allocated)))) {
+ struct domain* nd = page_get_owner(page);
if (nd == NULL) {
gdprintk(XENLOG_INFO, "gnttab_transfer: "
- "Bad page %p: ed=%p(%u) 0x%x, "
- "sd=%p 0x%x,"
+ "Bad page %p: ed=%p(%u), "
+ "sd=%p,"
" caf=%016lx, taf=%" PRtype_info
" memflags 0x%x\n",
(void *) page_to_mfn(page),
- d, d->domain_id, _d,
- nd, _nd,
+ d, d->domain_id,
+ nd,
x,
page->u.inuse.type_info,
memflags);
} else {
gdprintk(XENLOG_WARNING, "gnttab_transfer: "
- "Bad page %p: ed=%p(%u) 0x%x, "
- "sd=%p(%u) 0x%x,"
+ "Bad page %p: ed=%p(%u), "
+ "sd=%p(%u),"
" caf=%016lx, taf=%" PRtype_info
" memflags 0x%x\n",
(void *) page_to_mfn(page),
- d, d->domain_id, _d,
- nd, nd->domain_id, _nd,
+ d, d->domain_id,
+ nd, nd->domain_id,
x,
page->u.inuse.type_info,
memflags);
}
- spin_unlock(&d->page_alloc_lock);
- return -1;
+ goto fail;
}
- y = cmpxchg((u64*)&page->count_info, x, nx);
+ y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask);
} while (unlikely(y != x));
- /*
- * Unlink from 'd'. At least one reference remains (now anonymous), so
- * noone else is spinning to try to delete this page from 'd'.
- */
+ /* Swizzle the owner then reinstate the PGC_allocated reference. */
+ page_set_owner(page, NULL);
+ y = page->count_info;
+ do {
+ x = y;
+ BUG_ON((x & (PGC_count_mask | PGC_allocated)) != PGC_allocated);
+ y = cmpxchg(&page->count_info, x, x | 1);
+ } while (unlikely(y != x));
+
+ /* Unlink from original owner. */
if ( !(memflags & MEMF_no_refcount) )
d->tot_pages--;
- list_del(&page->list);
+ page_list_del(page, &d->page_list);
spin_unlock(&d->page_alloc_lock);
perfc_incr(steal_page);
return 0;
+
+ fail:
+ spin_unlock(&d->page_alloc_lock);
+ MEM_LOG("Bad page %p: ed=%p(%u), sd=%p, caf=%016lx, taf=%" PRtype_info,
+ (void *)page_to_mfn(page), d, d->domain_id,
+ page_get_owner(page), page->count_info, page->u.inuse.type_info);
+ return -1;
}
static void
smp_mb();
assign_domain_page_replace(d, gpfn << PAGE_SHIFT, mfn,
ASSIGN_writable | ASSIGN_pgc_allocated);
+ if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) ){
+ int i, j;
+ j = 1 << (PAGE_SHIFT-PAGE_SHIFT_4K);
+ for(i = 0 ; i < j; i++)
+ iommu_map_page(d, gpfn*j + i, mfn*j + i);
+ }
}
int
for (i = 0; i < (1UL << page_order); i++) {
BUG_ON(!mfn_valid(mfn));
- BUG_ON(mfn_to_page(mfn)->count_info != (PGC_allocated | 1));
+ BUG_ON((mfn_to_page(mfn)->count_info & ~PGC_xen_heap) !=
+ (PGC_allocated | 1));
__guest_physmap_add_page(d, gpfn, mfn);
mfn++;
gpfn++;
//printk ("domain_cache_flush: %d %d pages\n", d->domain_id, nbr_page);
}
-#ifdef VERBOSE
-#define MEM_LOG(_f, _a...) \
- printk("DOM%u: (file=mm.c, line=%d) " _f "\n", \
- current->domain->domain_id , __LINE__ , ## _a )
-#else
-#define MEM_LOG(_f, _a...) ((void)0)
-#endif
-
-static void free_page_type(struct page_info *page, u32 type)
+static void free_page_type(struct page_info *page, unsigned long type)
{
}
-static int alloc_page_type(struct page_info *page, u32 type)
+static int alloc_page_type(struct page_info *page, unsigned long type)
{
return 1;
}
-static int opt_p2m_xenheap;
-boolean_param("p2m_xenheap", opt_p2m_xenheap);
-
void *pgtable_quicklist_alloc(void)
{
+ struct page_info *page;
void *p;
BUG_ON(dom_p2m == NULL);
- if (!opt_p2m_xenheap) {
- struct page_info *page = alloc_domheap_page(dom_p2m, 0);
- if (page == NULL)
- return NULL;
- p = page_to_virt(page);
- clear_page(p);
- return p;
- }
- p = alloc_xenheap_pages(0);
- if (p) {
- clear_page(p);
- /*
- * This page should be read only. At this moment, the third
- * argument doesn't make sense. It should be 1 when supported.
- */
- share_xen_page_with_guest(virt_to_page(p), dom_p2m, 0);
- }
+ page = alloc_domheap_page(dom_p2m, 0);
+ if (page == NULL)
+ return NULL;
+
+ p = page_to_virt(page);
+ clear_page(p);
return p;
}
BUG_ON(page->count_info != (1 | PGC_allocated));
put_page(page);
- if (opt_p2m_xenheap)
- free_xenheap_page(pgtable_entry);
}
void put_page_type(struct page_info *page)
}
-int get_page_type(struct page_info *page, u32 type)
+int get_page_type(struct page_info *page, unsigned long type)
{
u64 nx, x, y = page->u.inuse.type_info;
{
if ( ((x & PGT_type_mask) != PGT_l2_page_table) ||
(type != PGT_l1_page_table) )
- MEM_LOG("Bad type (saw %08lx != exp %08x) "
+ MEM_LOG("Bad type (saw %08lx != exp %08lx) "
"for mfn %016lx (pfn %016lx)",
x, type, page_to_mfn(page),
get_gpfn_from_mfn(page_to_mfn(page)));
/* Try to validate page type; drop the new reference on failure. */
if ( unlikely(!alloc_page_type(page, type)) )
{
- MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %08x"
- ": caf=%08x taf=%" PRtype_info,
+ MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %08lx"
+ ": caf=%016lx taf=%" PRtype_info,
page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
type, page->count_info, page->u.inuse.type_info);
/* Noone else can get a reference. We hold the only ref. */
return 1;
}
-int memory_is_conventional_ram(paddr_t p)
+int page_is_ram_type(unsigned long mfn, unsigned long type)
{
- return (efi_mem_type(p) == EFI_CONVENTIONAL_MEMORY);
+ u32 mem_type = efi_mem_type(pfn_to_paddr(mfn));
+
+ if (type & RAM_TYPE_CONVENTIONAL)
+ {
+ switch (mem_type)
+ {
+ case EFI_BOOT_SERVICES_CODE:
+ case EFI_BOOT_SERVICES_DATA:
+ case EFI_LOADER_CODE:
+ case EFI_LOADER_DATA:
+ case EFI_CONVENTIONAL_MEMORY:
+ return 1;
+ default:
+ break;
+ }
+ }
+ if (type & RAM_TYPE_RESERVED)
+ {
+ switch (mem_type)
+ {
+ case EFI_RUNTIME_SERVICES_CODE:
+ case EFI_RUNTIME_SERVICES_DATA:
+ case EFI_RESERVED_TYPE:
+ case EFI_MEMORY_MAPPED_IO:
+ case EFI_MEMORY_MAPPED_IO_PORT_SPACE:
+ case EFI_PAL_CODE:
+ return 1;
+ default:
+ break;
+ }
+ }
+ if (type & RAM_TYPE_ACPI)
+ {
+ switch (mem_type)
+ {
+ case EFI_ACPI_RECLAIM_MEMORY:
+ case EFI_ACPI_MEMORY_NVS:
+ return 1;
+ default:
+ break;
+ }
+ }
+ else if (type & RAM_TYPE_UNUSABLE)
+ {
+ return (mem_type == EFI_UNUSABLE_MEMORY);
+ }
+
+ return 0;
}
spin_unlock(&d->grant_table->lock);
break;
- case XENMAPSPACE_mfn:
- {
- if ( get_page_from_pagenr(xatp.idx, d) ) {
- struct xen_ia64_memmap_info memmap_info;
- efi_memory_desc_t md;
- int ret;
-
- mfn = xatp.idx;
- page = mfn_to_page(mfn);
-
- memmap_info.efi_memmap_size = sizeof(md);
- memmap_info.efi_memdesc_size = sizeof(md);
- memmap_info.efi_memdesc_version =
- EFI_MEMORY_DESCRIPTOR_VERSION;
-
- md.type = EFI_CONVENTIONAL_MEMORY;
- md.pad = 0;
- md.phys_addr = xatp.gpfn << PAGE_SHIFT;
- md.virt_addr = 0;
- md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT);
- md.attribute = EFI_MEMORY_WB;
-
- ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md);
- if (ret != 0) {
- put_page(page);
- rcu_unlock_domain(d);
- gdprintk(XENLOG_DEBUG,
- "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n",
- __func__, __LINE__,
- d->domain_id, xatp.gpfn, xatp.idx, ret);
- return ret;
- }
+ case XENMAPSPACE_gmfn: {
+ struct xen_ia64_memmap_info memmap_info;
+ efi_memory_desc_t md;
+ int ret;
+
+ xatp.idx = gmfn_to_mfn(d, xatp.idx);
+ if ( !get_page_from_pagenr(xatp.idx, d) )
+ break;
+
+ mfn = xatp.idx;
+ page = mfn_to_page(mfn);
+
+ memmap_info.efi_memmap_size = sizeof(md);
+ memmap_info.efi_memdesc_size = sizeof(md);
+ memmap_info.efi_memdesc_version =
+ EFI_MEMORY_DESCRIPTOR_VERSION;
+
+ md.type = EFI_CONVENTIONAL_MEMORY;
+ md.pad = 0;
+ md.phys_addr = xatp.gpfn << PAGE_SHIFT;
+ md.virt_addr = 0;
+ md.num_pages = 1UL << (PAGE_SHIFT - EFI_PAGE_SHIFT);
+ md.attribute = EFI_MEMORY_WB;
+
+ ret = __dom0vp_add_memdesc(d, &memmap_info, (char*)&md);
+ if (ret != 0) {
+ put_page(page);
+ rcu_unlock_domain(d);
+ gdprintk(XENLOG_DEBUG,
+ "%s:%d td %d gpfn 0x%lx mfn 0x%lx ret %d\n",
+ __func__, __LINE__,
+ d->domain_id, xatp.gpfn, xatp.idx, ret);
+ return ret;
}
break;
}
/* Map at new location. */
/* Here page->count_info = PGC_allocated | N where N >= 1*/
__guest_physmap_add_page(d, xatp.gpfn, mfn);
- page = NULL; /* prevent put_page() */
out:
domain_unlock(d);
break;
}
- case XENMEM_remove_from_physmap:
- {
- struct xen_remove_from_physmap xrfp;
- unsigned long mfn;
- struct domain *d;
-
- if ( copy_from_guest(&xrfp, arg, 1) )
- return -EFAULT;
-
- rc = rcu_lock_target_domain_by_id(xrfp.domid, &d);
- if ( rc != 0 )
- return rc;
-
- domain_lock(d);
-
- mfn = gmfn_to_mfn(d, xrfp.gpfn);
-
- if ( mfn_valid(mfn) )
- guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0);
-
- domain_unlock(d);
-
- rcu_unlock_domain(d);
-
- break;
- }
-
-
case XENMEM_machine_memory_map:
{
struct xen_memory_map memmap;
return 0;
}
+ case XENMEM_get_pod_target:
+ case XENMEM_set_pod_target: {
+ /* XXX: PoD populate on demand isn't supported yet. */
+ xen_pod_target_t target;
+ struct domain *d;
+
+ /* Support DOMID_SELF? */
+ if ( !IS_PRIV(current->domain) )
+ return -EINVAL;
+
+ if ( copy_from_guest(&target, arg, 1) )
+ return -EFAULT;
+
+ rc = rcu_lock_target_domain_by_id(target.domid, &d);
+ if ( rc != 0 )
+ return rc;
+
+ if ( op == XENMEM_set_pod_target )
+ {
+ /* if -ENOSYS is returned,
+ domain builder aborts domain creation. */
+ /* rc = -ENOSYS; */
+ }
+
+ target.tot_pages = d->tot_pages;
+ target.pod_cache_pages = 0;
+ target.pod_entries = 0;
+
+ if ( copy_to_guest(arg, &target, 1) )
+ {
+ rc= -EFAULT;
+ goto pod_target_out_unlock;
+ }
+
+ pod_target_out_unlock:
+ rcu_unlock_domain(d);
+ return rc;
+ }
+
default:
return -ENOSYS;
}
__xencomm_mark_dirty(current->domain, addr, len);
}
-int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn)
+/* stubs for populate on demand */
+int
+guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
+ unsigned int order)
{
- /* STUB to compile */
+ gdprintk(XENLOG_WARNING, "populate on demand isn't supported yet\n");
return -ENOSYS;
}
-int iommu_unmap_page(struct domain *d, unsigned long gfn)
+int
+p2m_pod_decrease_reservation(struct domain *d, xen_pfn_t gpfn,
+ unsigned int order)
{
- /* STUB to compile */
- return -ENOSYS;
+ gdprintk(XENLOG_WARNING, "populate on demand isn't supported yet\n");
+ return 0;
}
/*
#include <xen/types.h>
#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/domain.h>
#include <xen/guest_access.h>
#include <xen/acpi.h>
#include <public/platform.h>
extern int set_px_pminfo(uint32_t cpu, struct xen_processor_performance *perf);
extern long set_cx_pminfo(uint32_t cpu, struct xen_processor_power *power);
-int xenpf_copy_px_states(struct processor_performance *pxpt,
- struct xen_processor_performance *dom0_px_info)
-{
- if (!pxpt || !dom0_px_info)
- return -EINVAL;
- return copy_from_guest(pxpt->states, dom0_px_info->states,
- dom0_px_info->state_count);
-}
-
long do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
{
long ret = 0;
switch ( op->u.set_pminfo.type )
{
case XEN_PM_PX:
+ if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
+ {
+ ret = -ENOSYS;
+ break;
+ }
ret = set_px_pminfo(op->u.set_pminfo.id,
&op->u.set_pminfo.perf);
break;
static int implemented_rid_bits = 0;
static int mp_rid_shift;
+static DEFINE_SPINLOCK(ridblock_lock);
static struct domain *ridblock_owner[MAX_RID_BLOCKS] = { 0 };
void __init init_rid_allocator (void)
n_rid_blocks = 1UL << (ridbits - IA64_MIN_IMPL_RID_BITS);
// skip over block 0, reserved for "meta-physical mappings (and Xen)"
+ spin_lock(&ridblock_lock);
for (i = n_rid_blocks; i < MAX_RID_BLOCKS; i += n_rid_blocks) {
if (ridblock_owner[i] == NULL) {
for (j = i; j < i + n_rid_blocks; ++j) {
break;
}
}
-
- if (i >= MAX_RID_BLOCKS)
+
+ if (i >= MAX_RID_BLOCKS) {
+ spin_unlock(&ridblock_lock);
return 0;
-
+ }
+
// found an unused block:
// (i << min_rid_bits) <= rid < ((i + n) << min_rid_bits)
// mark this block as owned
for (j = i; j < i + n_rid_blocks; ++j)
ridblock_owner[j] = d;
-
+ spin_unlock(&ridblock_lock);
+
// setup domain struct
d->arch.rid_bits = ridbits;
d->arch.starting_rid = i << IA64_MIN_IMPL_RID_BITS;
if (d->arch.rid_bits == 0)
return 1;
-
+ spin_lock(&ridblock_lock);
for (i = rid_block_start; i < rid_block_end; ++i) {
ASSERT(ridblock_owner[i] == d);
ridblock_owner[i] = NULL;
}
+ spin_unlock(&ridblock_lock);
d->arch.rid_bits = 0;
d->arch.starting_rid = 0;
return -ENOMEM;
}
- list_add(&entry_page->list, &tlb_track->page_list);
+ page_list_add(entry_page, &tlb_track->page_list);
track_entries = (struct tlb_track_entry*)page_to_virt(entry_page);
allocated = PAGE_SIZE / sizeof(track_entries[0]);
tlb_track->num_entries += allocated;
tlb_track->limit = TLB_TRACK_LIMIT_ENTRIES;
tlb_track->num_entries = 0;
tlb_track->num_free = 0;
- INIT_LIST_HEAD(&tlb_track->page_list);
+ INIT_PAGE_LIST_HEAD(&tlb_track->page_list);
if (tlb_track_allocate_entries(tlb_track) < 0)
goto out;
spin_lock(&tlb_track->free_list_lock);
BUG_ON(tlb_track->num_free != tlb_track->num_entries);
- list_for_each_entry_safe(page, next, &tlb_track->page_list, list) {
- list_del(&page->list);
+ page_list_for_each_safe(page, next, &tlb_track->page_list) {
+ page_list_del(page, &tlb_track->page_list);
free_domheap_page(page);
}
// copy its value to the variable, tr, before use.
TR_ENTRY tr;
+ // fast path:
+ // try to access gip with guest virtual address directly.
+ // This may cause tlb miss. see vcpu_translate(). Be careful!
+ swap_rr0 = (!region && PSCB(vcpu, metaphysical_mode));
+ if (swap_rr0) {
+ set_virtual_rr0();
+ }
+ *bundle = __get_domain_bundle(gip);
+ if (swap_rr0) {
+ set_metaphysical_rr0();
+ }
+
+ if (!bundle->i64[0] && !bundle->i64[1]) {
+ dprintk(XENLOG_INFO, "%s gip 0x%lx\n", __func__, gip);
+ } else {
+ // Okay, mDTC successed
+ return 1;
+ }
+ // mDTC failed, so try vTLB.
+
trp = vcpu_tr_lookup(vcpu, gip, rid, 0);
if (trp != NULL) {
tr = *trp;
tr = *trp;
goto found;
}
-#if 0
tr = PSCBX(vcpu, dtlb);
if (vcpu_match_tr_entry(&tr, gip, rid)) {
goto found;
}
-#endif
- // try to access gip with guest virtual address
- // This may cause tlb miss. see vcpu_translate(). Be careful!
- swap_rr0 = (!region && PSCB(vcpu, metaphysical_mode));
- if (swap_rr0) {
- set_virtual_rr0();
- }
- *bundle = __get_domain_bundle(gip);
- if (swap_rr0) {
- set_metaphysical_rr0();
- }
- if (bundle->i64[0] == 0 && bundle->i64[1] == 0) {
- dprintk(XENLOG_INFO, "%s gip 0x%lx\n", __func__, gip);
- return 0;
- }
- return 1;
+ // mDTC and vTLB failed. so reflect tlb miss into the guest.
+ return 0;
found:
gpip = ((tr.pte.ppn >> (tr.ps - 12)) << tr.ps) |
{
unsigned int mpt_order;
unsigned long mpt_table_size;
+ struct page_info *page;
unsigned long i;
if (!opt_contig_mem) {
mpt_table_size = max_page * sizeof(unsigned long);
mpt_order = get_order(mpt_table_size);
ASSERT(mpt_order <= MAX_ORDER);
- if ((mpt_table = alloc_xenheap_pages(mpt_order)) == NULL)
+ page = alloc_domheap_pages(NULL, mpt_order, 0);
+ if (page == NULL)
panic("Not enough memory to bootstrap Xen.\n");
+ mpt_table = page_to_virt(page);
printk("machine to physical table: 0x%lx mpt_table_size 0x%lx\n"
"mpt_order %u max_page 0x%lx\n",
(u64)mpt_table, mpt_table_size, mpt_order, max_page);
#include <asm/sn/simulator.h>
#include <asm/sal.h>
-unsigned long xenheap_phys_end, total_pages;
+unsigned long total_pages;
char saved_command_line[COMMAND_LINE_SIZE];
char __initdata dom0_command_line[COMMAND_LINE_SIZE];
static int __initdata opt_xencons_poll;
boolean_param("xencons_poll", opt_xencons_poll);
+#define XENHEAP_DEFAULT_SIZE KERNEL_TR_PAGE_SIZE
+#define XENHEAP_SIZE_MIN (16 * 1024 * 1024) /* 16MBytes */
unsigned long xenheap_size = XENHEAP_DEFAULT_SIZE;
unsigned long xen_pstart;
-void *xen_pickle_offset __read_mostly;
-
-static void __init parse_xenheap_megabytes(char *s)
-{
- unsigned long megabytes = simple_strtoll(s, NULL, 0);
-
-#define XENHEAP_MEGABYTES_MIN 16UL
- if (megabytes < XENHEAP_MEGABYTES_MIN)
- megabytes = XENHEAP_MEGABYTES_MIN;
-
-#define XENHEAP_MEGABYTES_MAX 4096UL /* need more? If so,
- __pickle()/__unpickle() must be
- revised. */
- if (megabytes > XENHEAP_MEGABYTES_MAX)
- megabytes = XENHEAP_MEGABYTES_MAX;
-
- xenheap_size = megabytes * 1024 * 1024;
-}
-custom_param("xenheap_megabytes", parse_xenheap_megabytes);
static int __init
xen_count_pages(u64 start, u64 end, void *arg)
unsigned long s = max(start, max(__pa(desc->xen_heap_start),
md->phys_addr));
unsigned long e = min(end, min(md_end, desc->xenheap_phys_end));
- init_xenheap_pages(s, e);
+ init_boot_pages(s, e);
}
}
static int __initdata dom0_vhpt_size_log2;
integer_param("dom0_vhpt_size_log2", dom0_vhpt_size_log2);
#endif
+unsigned long xen_fixed_mfn_start __read_mostly;
+unsigned long xen_fixed_mfn_end __read_mostly;
void __init start_kernel(void)
{
struct domain *idle_domain;
struct vcpu *dom0_vcpu0;
efi_memory_desc_t *kern_md, *last_md, *md;
+ unsigned long xenheap_phys_end;
void *xen_heap_start;
struct xen_heap_desc heap_desc;
#ifdef CONFIG_SMP
* for the actual xenheap.
*/
max_page = efi_get_max_addr() >> PAGE_SHIFT;
- while ((max_page >> 3) > xenheap_size - (XENHEAP_MEGABYTES_MIN << 20))
+ while ((max_page >> 3) > xenheap_size - XENHEAP_SIZE_MIN)
xenheap_size <<= 1;
- BUG_ON(xenheap_size > (XENHEAP_MEGABYTES_MAX << 20));
-
xenheap_phys_end = xen_pstart + xenheap_size;
printk("xen image pstart: 0x%lx, xenheap pend: 0x%lx\n",
xen_pstart, xenheap_phys_end);
printk("find_memory: efi_memmap_walk returns max_page=%lx\n",max_page);
efi_print();
- /*
- * later [__init_begin, __init_end) will be freed up as xen heap
- * so that struct domain might be allocated from the init area
- * which is < xen_heap_start. so we can't simply set
- * xen_pickle_offset = xen_heap_start.
- */
- xen_pickle_offset = ia64_imva(__init_begin);
-
xen_heap_start = memguard_init(ia64_imva(&_end));
printk("Before xen_heap_start: %p\n", xen_heap_start);
xen_heap_start = __va(init_boot_allocator(__pa(xen_heap_start)));
(xenheap_phys_end-__pa(xen_heap_start)) >> 20,
(xenheap_phys_end-__pa(xen_heap_start)) >> 10);
+ /* for is_xen_fixed_mfn() */
+ xen_fixed_mfn_start = virt_to_mfn(&_start);
+ xen_fixed_mfn_end = virt_to_mfn(xen_heap_start);
+
end_boot_allocator();
softirq_init();
}
}
+int xen_in_range(paddr_t start, paddr_t end)
+{
+ paddr_t xs = __pa(&_start);
+ paddr_t xe = __pa(&_end);
+
+ return (start < xe) && (end > xs);
+}
obj-y += apic.o
obj-y += bitops.o
obj-y += clear_page.o
+obj-y += copy_page.o
obj-y += compat.o
obj-y += delay.o
obj-y += dmi_scan.o
obj-y += numa.o
obj-y += pci.o
obj-y += physdev.o
-obj-y += rwlock.o
obj-y += setup.o
obj-y += shutdown.o
obj-y += smp.o
obj-y += crash.o
obj-y += tboot.o
obj-y += hpet.o
+obj-y += bzimage.o
obj-$(crash_debug) += gdbstub.o
$(@D)/.$(@F).1.o -o $@
rm -f $(@D)/.$(@F).[0-9]*
-asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c $(HDRS)
+asm-offsets.s: $(TARGET_SUBARCH)/asm-offsets.c
$(CC) $(CFLAGS) -S -o $@ $<
-xen.lds: $(TARGET_SUBARCH)/xen.lds.S $(HDRS)
+xen.lds: $(TARGET_SUBARCH)/xen.lds.S
$(CC) -P -E -Ui386 $(AFLAGS) -o $@ $<
boot/mkelf32: boot/mkelf32.c
.PHONY: clean
clean::
rm -f asm-offsets.s xen.lds boot/*.o boot/*~ boot/core boot/mkelf32
- rm -f $(BASEDIR)/.xen-syms.[0-9]*
+ rm -f $(BASEDIR)/.xen-syms.[0-9]* boot/.*.d
+ rm -f boot/reloc.S boot/reloc.lnk boot/reloc.bin
CFLAGS += -msoft-float
# Disable PIE/SSP if GCC supports them. They can break us.
-CFLAGS += $(call cc-option,$(CC),-nopie,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector,)
-CFLAGS += $(call cc-option,$(CC),-fno-stack-protector-all,)
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
ifeq ($(supervisor_mode_kernel),y)
CFLAGS += -DCONFIG_X86_SUPERVISOR_MODE_KERNEL=1
CFLAGS += -mno-red-zone -fpic -fno-reorder-blocks
CFLAGS += -fno-asynchronous-unwind-tables
# -fvisibility=hidden reduces -fpic cost, if it's available
-CFLAGS += $(call cc-option,$(CC),-fvisibility=hidden,)
-CFLAGS := $(subst -fvisibility=hidden,-DGCC_HAS_VISIBILITY_ATTRIBUTE,$(CFLAGS))
+ifneq ($(call cc-option,$(CC),-fvisibility=hidden,n),n)
+CFLAGS += -DGCC_HAS_VISIBILITY_ATTRIBUTE
+endif
x86_32 := n
x86_64 := y
endif
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/svm/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/hvm/vmx/*.h)
-HDRS += $(wildcard $(BASEDIR)/include/asm-x86/mach-*/*.h)
-
# Require GCC v3.4+ (to avoid issues with alignment constraints in Xen headers)
$(call cc-ver-check,CC,0x030400,"Xen requires at least gcc-3.4")
#endif /* CONFIG_X86_IO_APIC */
-static unsigned long __init
-acpi_scan_rsdp(unsigned long start, unsigned long length)
-{
- unsigned long offset = 0;
- unsigned long sig_len = sizeof("RSD PTR ") - 1;
-
- /*
- * Scan all 16-byte boundaries of the physical memory region for the
- * RSDP signature.
- */
- for (offset = 0; offset < length; offset += 16) {
- if (strncmp((char *)(start + offset), "RSD PTR ", sig_len))
- continue;
- return (start + offset);
- }
-
- return 0;
-}
-
static int __init acpi_parse_sbf(struct acpi_table_header *table)
{
struct acpi_table_boot *sb;
static void __init
acpi_fadt_parse_sleep_info(struct acpi_table_fadt *fadt)
{
- struct acpi_table_rsdp *rsdp;
- unsigned long rsdp_phys;
struct acpi_table_facs *facs = NULL;
uint64_t facs_pa;
- rsdp_phys = acpi_find_rsdp();
- if (!rsdp_phys || acpi_disabled)
- goto bad;
- rsdp = __va(rsdp_phys);
-
acpi_fadt_copy_address(pm1a_cnt, pm1a_control, pm1_control);
acpi_fadt_copy_address(pm1b_cnt, pm1b_control, pm1_control);
acpi_fadt_copy_address(pm1a_evt, pm1a_event, pm1_event);
return 0;
}
-unsigned long __init acpi_find_rsdp(void)
-{
- unsigned long rsdp_phys = 0;
-
-#if 0
- if (efi_enabled) {
- if (efi.acpi20 != EFI_INVALID_TABLE_ADDR)
- return efi.acpi20;
- else if (efi.acpi != EFI_INVALID_TABLE_ADDR)
- return efi.acpi;
- }
-#endif
- /*
- * Scan memory looking for the RSDP signature. First search EBDA (low
- * memory) paragraphs and then search upper memory (E0000-FFFFF).
- */
- rsdp_phys = acpi_scan_rsdp(0, 0x400);
- if (!rsdp_phys)
- rsdp_phys = acpi_scan_rsdp(0xE0000, 0x20000);
-
- return rsdp_phys;
-}
-
#ifdef CONFIG_X86_LOCAL_APIC
/*
* Parse LAPIC entries in MADT
count =
acpi_table_parse_madt(ACPI_MADT_INT_SRC_OVR, acpi_parse_int_src_ovr,
- NR_IRQ_VECTORS);
+ MAX_IRQ_SOURCES);
if (count < 0) {
printk(KERN_ERR PREFIX
"Error parsing interrupt source overrides entry\n");
count =
acpi_table_parse_madt(ACPI_MADT_NMI_SRC, acpi_parse_nmi_src,
- NR_IRQ_VECTORS);
+ MAX_IRQ_SOURCES);
if (count < 0) {
printk(KERN_ERR PREFIX "Error parsing NMI SRC entry\n");
/* TBD: Cleanup to allow fallback to MPS */
#define DEBUG_PM_CX
-#define US_TO_PM_TIMER_TICKS(t) ((t * (PM_TIMER_FREQUENCY/1000)) / 1000)
-#define PM_TIMER_TICKS_TO_US(t) ((t * 1000) / (PM_TIMER_FREQUENCY / 1000))
-#define C2_OVERHEAD 4 /* 1us (3.579 ticks per us) */
-#define C3_OVERHEAD 4 /* 1us (3.579 ticks per us) */
-
static void (*lapic_timer_off)(void);
static void (*lapic_timer_on)(void);
extern u32 pmtmr_ioport;
extern void (*pm_idle) (void);
+extern void (*dead_idle) (void);
static void (*pm_idle_save) (void) __read_mostly;
unsigned int max_cstate __read_mostly = ACPI_PROCESSOR_MAX_POWER - 1;
static void print_acpi_power(uint32_t cpu, struct acpi_processor_power *power)
{
- uint32_t i;
+ uint32_t i, idle_usage = 0;
+ uint64_t res, idle_res = 0;
printk("==cpu%d==\n", cpu);
printk("active state:\t\tC%d\n",
- power->last_state ? (int)(power->last_state - power->states) : -1);
+ power->last_state ? power->last_state->idx : -1);
printk("max_cstate:\t\tC%d\n", max_cstate);
printk("states:\n");
for ( i = 1; i < power->count; i++ )
{
- printk((power->last_state == &power->states[i]) ? " *" : " ");
+ res = acpi_pm_tick_to_ns(power->states[i].time);
+ idle_usage += power->states[i].usage;
+ idle_res += res;
+
+ printk((power->last_state && power->last_state->idx == i) ?
+ " *" : " ");
printk("C%d:\t", i);
printk("type[C%d] ", power->states[i].type);
printk("latency[%03d] ", power->states[i].latency);
printk("usage[%08d] ", power->states[i].usage);
- printk("duration[%"PRId64"]\n", power->states[i].time);
+ printk("duration[%"PRId64"]\n", res);
}
+ printk(" C0:\tusage[%08d] duration[%"PRId64"]\n",
+ idle_usage, NOW() - idle_res);
+
}
static void dump_cx(unsigned char key)
static void acpi_idle_do_entry(struct acpi_processor_cx *cx)
{
- if ( cx->space_id == ACPI_ADR_SPACE_FIXED_HARDWARE )
+ int unused;
+
+ switch ( cx->entry_method )
{
+ case ACPI_CSTATE_EM_FFH:
/* Call into architectural FFH based C-state */
acpi_processor_ffh_cstate_enter(cx);
- }
- else
- {
- int unused;
+ return;
+ case ACPI_CSTATE_EM_SYSIO:
/* IO port based C-state */
inb(cx->address);
/* Dummy wait op - must do something useless after P_LVL2 read
because chipsets cannot guarantee that STPCLK# signal
gets asserted in time to freeze execution properly. */
unused = inl(pmtmr_ioport);
- }
-}
-
-static inline void acpi_idle_update_bm_rld(struct acpi_processor_power *power,
- struct acpi_processor_cx *target)
-{
- if ( !power->flags.bm_check )
return;
-
- if ( power->flags.bm_rld_set && target->type != ACPI_STATE_C3 )
- {
- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 0);
- power->flags.bm_rld_set = 0;
- }
-
- if ( !power->flags.bm_rld_set && target->type == ACPI_STATE_C3 )
- {
- acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
- power->flags.bm_rld_set = 1;
+ case ACPI_CSTATE_EM_HALT:
+ acpi_safe_halt();
+ local_irq_disable();
+ return;
}
}
if ( power->flags.bm_check && acpi_idle_bm_check()
&& cx->type == ACPI_STATE_C3 )
cx = power->safe_state;
- if ( cx - &power->states[0] > max_cstate )
+ if ( cx->idx > max_cstate )
cx = &power->states[max_cstate];
}
if ( !cx )
{
if ( pm_idle_save )
- {
- printk(XENLOG_DEBUG "call pm_idle_save()\n");
pm_idle_save();
- }
else
- {
- printk(XENLOG_DEBUG "call acpi_safe_halt()\n");
acpi_safe_halt();
- }
return;
}
* ------
* Invoke the current Cx state to put the processor to sleep.
*/
- acpi_idle_update_bm_rld(power, cx);
-
switch ( cx->type )
{
case ACPI_STATE_C1:
- /* Trace cpu idle entry */
- TRACE_1D(TRC_PM_IDLE_ENTRY, 1);
-
- /*
- * Invoke C1.
- * Use the appropriate idle routine, the one that would
- * be used without acpi C-states.
- */
- if ( pm_idle_save )
- pm_idle_save();
- else
- acpi_safe_halt();
-
- /* Trace cpu idle exit */
- TRACE_1D(TRC_PM_IDLE_EXIT, 1);
-
- /*
- * TBD: Can't get time duration while in C1, as resumes
- * go to an ISR rather than here. Need to instrument
- * base interrupt handler.
- */
- sleep_ticks = 0xFFFFFFFF;
- break;
-
case ACPI_STATE_C2:
- if ( local_apic_timer_c2_ok )
+ if ( cx->type == ACPI_STATE_C1 || local_apic_timer_c2_ok )
{
- /* Trace cpu idle entry */
- TRACE_1D(TRC_PM_IDLE_ENTRY, 2);
/* Get start time (ticks) */
t1 = inl(pmtmr_ioport);
+ /* Trace cpu idle entry */
+ TRACE_2D(TRC_PM_IDLE_ENTRY, cx->idx, t1);
/* Invoke C2 */
acpi_idle_do_entry(cx);
/* Get end time (ticks) */
t2 = inl(pmtmr_ioport);
/* Trace cpu idle exit */
- TRACE_1D(TRC_PM_IDLE_EXIT, 2);
+ TRACE_2D(TRC_PM_IDLE_EXIT, cx->idx, t2);
/* Re-enable interrupts */
local_irq_enable();
ACPI_FLUSH_CPU_CACHE();
}
- /* Trace cpu idle entry */
- TRACE_1D(TRC_PM_IDLE_ENTRY, cx - &power->states[0]);
/*
* Before invoking C3, be aware that TSC/APIC timer may be
* stopped by H/W. Without carefully handling of TSC/APIC stop issues,
* deep C state can't work correctly.
*/
- /* preparing TSC stop */
- cstate_save_tsc();
/* preparing APIC stop */
lapic_timer_off();
/* Get start time (ticks) */
t1 = inl(pmtmr_ioport);
+ /* Trace cpu idle entry */
+ TRACE_2D(TRC_PM_IDLE_ENTRY, cx->idx, t1);
/* Invoke C3 */
acpi_idle_do_entry(cx);
/* Get end time (ticks) */
/* recovering TSC */
cstate_restore_tsc();
/* Trace cpu idle exit */
- TRACE_1D(TRC_PM_IDLE_EXIT, cx - &power->states[0]);
+ TRACE_2D(TRC_PM_IDLE_EXIT, cx->idx, t2);
if ( power->flags.bm_check && power->flags.bm_control )
{
cx->usage++;
if ( sleep_ticks > 0 )
{
- power->last_residency = PM_TIMER_TICKS_TO_US(sleep_ticks);
+ power->last_residency = acpi_pm_tick_to_ns(sleep_ticks) / 1000UL;
cx->time += sleep_ticks;
}
cpuidle_current_governor->reflect(power);
}
+static void acpi_dead_idle(void)
+{
+ struct acpi_processor_power *power;
+ struct acpi_processor_cx *cx;
+ int unused;
+
+ if ( (power = processor_powers[smp_processor_id()]) == NULL )
+ goto default_halt;
+
+ if ( (cx = &power->states[power->count-1]) == NULL )
+ goto default_halt;
+
+ for ( ; ; )
+ {
+ if ( !power->flags.bm_check && cx->type == ACPI_STATE_C3 )
+ ACPI_FLUSH_CPU_CACHE();
+
+ switch ( cx->entry_method )
+ {
+ case ACPI_CSTATE_EM_FFH:
+ /* Not treat interrupt as break event */
+ mwait_idle_with_hints(cx->address, 0);
+ break;
+ case ACPI_CSTATE_EM_SYSIO:
+ inb(cx->address);
+ unused = inl(pmtmr_ioport);
+ break;
+ default:
+ goto default_halt;
+ }
+ }
+
+default_halt:
+ for ( ; ; )
+ halt();
+}
+
static int init_cx_pminfo(struct acpi_processor_power *acpi_power)
{
+ int i;
+
memset(acpi_power, 0, sizeof(*acpi_power));
+ for ( i = 0; i < ACPI_PROCESSOR_MAX_POWER; i++ )
+ acpi_power->states[i].idx = i;
+
acpi_power->states[ACPI_STATE_C1].type = ACPI_STATE_C1;
+ acpi_power->states[ACPI_STATE_C1].entry_method = ACPI_CSTATE_EM_HALT;
acpi_power->states[ACPI_STATE_C0].valid = 1;
acpi_power->states[ACPI_STATE_C1].valid = 1;
else if ( c->x86_vendor == X86_VENDOR_INTEL )
{
/*
- * Today all CPUs that support C3 share cache.
- * TBD: This needs to look at cache shared map, once
- * multi-core detection patch makes to the base.
+ * Today all MP CPUs that support C3 share cache.
+ * And caches should not be flushed by software while
+ * entering C3 type state.
*/
flags->bm_check = 1;
}
+
+ /*
+ * On all recent platforms, ARB_DISABLE is a nop.
+ * So, set bm_control to zero to indicate that ARB_DISABLE
+ * is not required while entering C3 type state on
+ * P4, Core and beyond CPUs
+ */
+ if ( c->x86_vendor == X86_VENDOR_INTEL &&
+ (c->x86 > 0x6 || (c->x86 == 6 && c->x86_model >= 14)) )
+ flags->bm_control = 0;
}
#define VENDOR_INTEL (1)
static int check_cx(struct acpi_processor_power *power, xen_processor_cx_t *cx)
{
- static int bm_check_flag;
+ static int bm_check_flag = -1;
+ static int bm_control_flag = -1;
switch ( cx->reg.space_id )
{
break;
case ACPI_ADR_SPACE_FIXED_HARDWARE:
- if ( cx->type > ACPI_STATE_C1 )
- {
- if ( cx->reg.bit_width != VENDOR_INTEL ||
- cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT )
- return -EINVAL;
+ if ( cx->reg.bit_width != VENDOR_INTEL ||
+ cx->reg.bit_offset != NATIVE_CSTATE_BEYOND_HALT )
+ return -EINVAL;
- /* assume all logical cpu has the same support for mwait */
- if ( acpi_processor_ffh_cstate_probe(cx) )
- return -EINVAL;
- }
+ /* assume all logical cpu has the same support for mwait */
+ if ( acpi_processor_ffh_cstate_probe(cx) )
+ return -EINVAL;
break;
default:
}
/* All the logic here assumes flags.bm_check is same across all CPUs */
- if ( !bm_check_flag )
+ if ( bm_check_flag == -1 )
{
/* Determine whether bm_check is needed based on CPU */
acpi_processor_power_init_bm_check(&(power->flags));
bm_check_flag = power->flags.bm_check;
+ bm_control_flag = power->flags.bm_control;
}
else
{
power->flags.bm_check = bm_check_flag;
+ power->flags.bm_control = bm_control_flag;
}
if ( power->flags.bm_check )
"C3 support without BM control\n"));
}
}
+ /*
+ * On older chipsets, BM_RLD needs to be set
+ * in order for Bus Master activity to wake the
+ * system from C3. Newer chipsets handle DMA
+ * during C3 automatically and BM_RLD is a NOP.
+ * In either case, the proper way to
+ * handle BM_RLD is to set it and leave it set.
+ */
+ acpi_set_register(ACPI_BITREG_BUS_MASTER_RLD, 1);
}
else
{
cx->valid = 1;
cx->type = xen_cx->type;
cx->address = xen_cx->reg.address;
- cx->space_id = xen_cx->reg.space_id;
+
+ switch ( xen_cx->reg.space_id )
+ {
+ case ACPI_ADR_SPACE_FIXED_HARDWARE:
+ if ( xen_cx->reg.bit_width == VENDOR_INTEL &&
+ xen_cx->reg.bit_offset == NATIVE_CSTATE_BEYOND_HALT )
+ cx->entry_method = ACPI_CSTATE_EM_FFH;
+ else
+ cx->entry_method = ACPI_CSTATE_EM_HALT;
+ break;
+ case ACPI_ADR_SPACE_SYSTEM_IO:
+ cx->entry_method = ACPI_CSTATE_EM_SYSIO;
+ break;
+ default:
+ cx->entry_method = ACPI_CSTATE_EM_NONE;
+ }
+
cx->latency = xen_cx->latency;
cx->power = xen_cx->power;
- cx->latency_ticks = US_TO_PM_TIMER_TICKS(cx->latency);
+ cx->latency_ticks = ns_to_acpi_pm_tick(cx->latency * 1000UL);
cx->target_residency = cx->latency * latency_factor;
if ( cx->type == ACPI_STATE_C1 || cx->type == ACPI_STATE_C2 )
acpi_power->safe_state = cx;
pm_idle_save = pm_idle;
pm_idle = acpi_processor_idle;
}
+
+ if ( cpu_id == 0 )
+ {
+ dead_idle = acpi_dead_idle;
+ }
return 0;
}
int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat)
{
const struct acpi_processor_power *power = processor_powers[cpuid];
- struct vcpu *v = idle_vcpu[cpuid];
- uint64_t usage;
+ uint64_t usage, res, idle_usage = 0, idle_res = 0;
int i;
if ( power == NULL )
return 0;
}
- stat->last = (power->last_state) ?
- (int)(power->last_state - &power->states[0]) : 0;
+ stat->last = power->last_state ? power->last_state->idx : 0;
stat->nr = power->count;
- stat->idle_time = v->runstate.time[RUNSTATE_running];
- if ( v->is_running )
- stat->idle_time += NOW() - v->runstate.state_entry_time;
+ stat->idle_time = get_cpu_idle_time(cpuid);
- for ( i = 0; i < power->count; i++ )
+ for ( i = power->count - 1; i >= 0; i-- )
{
- usage = power->states[i].usage;
- if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) )
+ if ( i != 0 )
+ {
+ usage = power->states[i].usage;
+ res = acpi_pm_tick_to_ns(power->states[i].time);
+ idle_usage += usage;
+ idle_res += res;
+ }
+ else
+ {
+ usage = idle_usage;
+ res = NOW() - idle_res;
+ }
+ if ( copy_to_guest_offset(stat->triggers, i, &usage, 1) ||
+ copy_to_guest_offset(stat->residencies, i, &res, 1) )
return -EFAULT;
}
- for ( i = 0; i < power->count; i++ )
- if ( copy_to_guest_offset(stat->residencies, i,
- &power->states[i].time, 1) )
- return -EFAULT;
return 0;
}
static struct cpufreq_driver acpi_cpufreq_driver;
+static unsigned int __read_mostly acpi_pstate_strict;
+integer_param("acpi_pstate_strict", acpi_pstate_strict);
+
static int check_est_cpu(unsigned int cpuid)
{
struct cpuinfo_x86 *cpu = &cpu_data[cpuid];
u32 val;
};
-static void do_drv_read(struct drv_cmd *cmd)
+static void do_drv_read(void *drvcmd)
{
+ struct drv_cmd *cmd;
u32 h;
+ cmd = (struct drv_cmd *)drvcmd;
+
switch (cmd->type) {
case SYSTEM_INTEL_MSR_CAPABLE:
rdmsr(cmd->addr.msr.reg, cmd->val, h);
{
cmd->val = 0;
- do_drv_read(cmd);
+ ASSERT(cpus_weight(cmd->mask) == 1);
+
+ /* to reduce IPI for the sake of performance */
+ if (likely(cpu_isset(smp_processor_id(), cmd->mask)))
+ do_drv_read((void *)cmd);
+ else
+ on_selected_cpus( cmd->mask, do_drv_read, (void *)cmd, 0, 1);
}
static void drv_write(struct drv_cmd *cmd)
static u32 get_cur_val(cpumask_t mask)
{
+ struct cpufreq_policy *policy;
struct processor_performance *perf;
struct drv_cmd cmd;
+ unsigned int cpu = smp_processor_id();
if (unlikely(cpus_empty(mask)))
return 0;
- switch (drv_data[first_cpu(mask)]->cpu_feature) {
+ if (!cpu_isset(cpu, mask))
+ cpu = first_cpu(mask);
+ policy = cpufreq_cpu_policy[cpu];
+
+ if (cpu >= NR_CPUS || !policy || !drv_data[policy->cpu])
+ return 0;
+
+ switch (drv_data[policy->cpu]->cpu_feature) {
case SYSTEM_INTEL_MSR_CAPABLE:
cmd.type = SYSTEM_INTEL_MSR_CAPABLE;
cmd.addr.msr.reg = MSR_IA32_PERF_STATUS;
break;
case SYSTEM_IO_CAPABLE:
cmd.type = SYSTEM_IO_CAPABLE;
- perf = drv_data[first_cpu(mask)]->acpi_data;
+ perf = drv_data[policy->cpu]->acpi_data;
cmd.addr.io.port = perf->control_register.address;
cmd.addr.io.bit_width = perf->control_register.bit_width;
break;
return 0;
}
- cmd.mask = mask;
+ cmd.mask = cpumask_of_cpu(cpu);
drv_read(&cmd);
return cmd.val;
}
+struct perf_pair {
+ union {
+ struct {
+ uint32_t lo;
+ uint32_t hi;
+ } split;
+ uint64_t whole;
+ } aperf, mperf;
+};
+static DEFINE_PER_CPU(struct perf_pair, gov_perf_pair);
+static DEFINE_PER_CPU(struct perf_pair, usr_perf_pair);
+
+static void read_measured_perf_ctrs(void *_readin)
+{
+ struct perf_pair *readin = _readin;
+
+ rdmsr(MSR_IA32_APERF, readin->aperf.split.lo, readin->aperf.split.hi);
+ rdmsr(MSR_IA32_MPERF, readin->mperf.split.lo, readin->mperf.split.hi);
+}
+
/*
* Return the measured active (C0) frequency on this CPU since last call
* to this function.
* Only IA32_APERF/IA32_MPERF ratio is architecturally defined and
* no meaning should be associated with absolute values of these MSRs.
*/
-static void __get_measured_perf(void *perf_percent)
+static unsigned int get_measured_perf(unsigned int cpu, unsigned int flag)
{
- unsigned int *ratio = perf_percent;
- union {
- struct {
- uint32_t lo;
- uint32_t hi;
- } split;
- uint64_t whole;
- } aperf_cur, mperf_cur;
+ struct cpufreq_policy *policy;
+ struct perf_pair readin, cur, *saved;
+ unsigned int perf_percent;
+ cpumask_t cpumask;
+ unsigned int retval;
+
+ if (!cpu_online(cpu))
+ return 0;
+
+ policy = cpufreq_cpu_policy[cpu];
+ if (!policy)
+ return 0;
+
+ switch (flag)
+ {
+ case GOV_GETAVG:
+ {
+ saved = &per_cpu(gov_perf_pair, cpu);
+ break;
+ }
+ case USR_GETAVG:
+ {
+ saved = &per_cpu(usr_perf_pair, cpu);
+ break;
+ }
+ default:
+ return 0;
+ }
+
+ if (cpu == smp_processor_id()) {
+ read_measured_perf_ctrs((void *)&readin);
+ } else {
+ cpumask = cpumask_of_cpu(cpu);
+ on_selected_cpus(cpumask, read_measured_perf_ctrs,
+ (void *)&readin, 0, 1);
+ }
- rdmsr(MSR_IA32_APERF, aperf_cur.split.lo, aperf_cur.split.hi);
- rdmsr(MSR_IA32_MPERF, mperf_cur.split.lo, mperf_cur.split.hi);
+ cur.aperf.whole = readin.aperf.whole - saved->aperf.whole;
+ cur.mperf.whole = readin.mperf.whole - saved->mperf.whole;
+ saved->aperf.whole = readin.aperf.whole;
+ saved->mperf.whole = readin.mperf.whole;
- wrmsr(MSR_IA32_APERF, 0,0);
- wrmsr(MSR_IA32_MPERF, 0,0);
+#ifdef __i386__
+ /*
+ * We dont want to do 64 bit divide with 32 bit kernel
+ * Get an approximate value. Return failure in case we cannot get
+ * an approximate value.
+ */
+ if (unlikely(cur.aperf.split.hi || cur.mperf.split.hi)) {
+ int shift_count;
+ uint32_t h;
- if (unlikely(((unsigned long)(-1) / 100) < aperf_cur.whole)) {
+ h = max_t(uint32_t, cur.aperf.split.hi, cur.mperf.split.hi);
+ shift_count = fls(h);
+
+ cur.aperf.whole >>= shift_count;
+ cur.mperf.whole >>= shift_count;
+ }
+
+ if (((unsigned long)(-1) / 100) < cur.aperf.split.lo) {
int shift_count = 7;
- aperf_cur.whole >>= shift_count;
- mperf_cur.whole >>= shift_count;
+ cur.aperf.split.lo >>= shift_count;
+ cur.mperf.split.lo >>= shift_count;
}
- if (aperf_cur.whole && mperf_cur.whole)
- *ratio = (aperf_cur.whole * 100) / mperf_cur.whole;
+ if (cur.aperf.split.lo && cur.mperf.split.lo)
+ perf_percent = (cur.aperf.split.lo * 100) / cur.mperf.split.lo;
else
- *ratio = 0;
-}
+ perf_percent = 0;
-static unsigned int get_measured_perf(unsigned int cpu)
-{
- unsigned int retval, perf_percent;
- cpumask_t cpumask;
+#else
+ if (unlikely(((unsigned long)(-1) / 100) < cur.aperf.whole)) {
+ int shift_count = 7;
+ cur.aperf.whole >>= shift_count;
+ cur.mperf.whole >>= shift_count;
+ }
- if (!cpu_online(cpu))
- return 0;
+ if (cur.aperf.whole && cur.mperf.whole)
+ perf_percent = (cur.aperf.whole * 100) / cur.mperf.whole;
+ else
+ perf_percent = 0;
- cpumask = cpumask_of_cpu(cpu);
- on_selected_cpus(cpumask, __get_measured_perf, (void *)&perf_percent,0,1);
+#endif
+
+ retval = drv_data[policy->cpu]->max_freq * perf_percent / 100;
- retval = drv_data[cpu]->max_freq * perf_percent / 100;
return retval;
}
static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
{
- struct acpi_cpufreq_data *data = drv_data[cpu];
+ struct cpufreq_policy *policy;
+ struct acpi_cpufreq_data *data;
unsigned int freq;
+ policy = cpufreq_cpu_policy[cpu];
+ if (!policy)
+ return 0;
+
+ data = drv_data[policy->cpu];
if (unlikely(data == NULL ||
- data->acpi_data == NULL || data->freq_table == NULL)) {
+ data->acpi_data == NULL || data->freq_table == NULL))
return 0;
- }
freq = extract_freq(get_cur_val(cpumask_of_cpu(cpu)), data);
return freq;
next_perf_state = data->freq_table[next_state].index;
if (perf->state == next_perf_state) {
- if (unlikely(policy->resume)) {
- printk(KERN_INFO "Called after resume, resetting to P%d\n",
- next_perf_state);
+ if (unlikely(policy->resume))
policy->resume = 0;
- }
- else {
- printk(KERN_DEBUG "Already at target state (P%d)\n",
- next_perf_state);
+ else
return 0;
- }
}
switch (data->cpu_feature) {
drv_write(&cmd);
- if (!check_freqs(cmd.mask, freqs.new, data))
+ if (acpi_pstate_strict && !check_freqs(cmd.mask, freqs.new, data)) {
+ printk(KERN_WARNING "Fail transfer to new freq %d\n", freqs.new);
return -EAGAIN;
+ }
- for_each_cpu_mask(j, cmd.mask)
+ for_each_cpu_mask(j, online_policy_cpus)
cpufreq_statistic_update(j, perf->state, next_perf_state);
perf->state = next_perf_state;
perf = data->acpi_data;
policy->shared_type = perf->shared_type;
- /* capability check */
- if (perf->state_count <= 1) {
- printk("No P-States\n");
- result = -ENODEV;
- goto err_unreg;
- }
-
- if (perf->control_register.space_id != perf->status_register.space_id) {
- result = -ENODEV;
- goto err_unreg;
- }
-
switch (perf->control_register.space_id) {
case ACPI_ADR_SPACE_SYSTEM_IO:
printk("xen_pminfo: @acpi_cpufreq_cpu_init,"
policy->cpuinfo.transition_latency =
perf->states[i].transition_latency * 1000;
}
- policy->governor = CPUFREQ_DEFAULT_GOVERNOR;
+
+ policy->governor = cpufreq_opt_governor ? : CPUFREQ_DEFAULT_GOVERNOR;
data->max_freq = perf->states[0].core_frequency * 1000;
/* table init */
}
static struct cpufreq_driver acpi_cpufreq_driver = {
+ .name = "acpi-cpufreq",
.verify = acpi_cpufreq_verify,
.target = acpi_cpufreq_target,
.init = acpi_cpufreq_cpu_init,
return result;
}
+static int powernow_cpufreq_verify(struct cpufreq_policy *policy)
+{
+ struct powernow_cpufreq_data *data;
+
+ if (!policy || !(data = drv_data[policy->cpu]))
+ return -EINVAL;
+
+ return cpufreq_frequency_table_verify(policy, data->freq_table);
+}
+
static int powernow_cpufreq_cpu_init(struct cpufreq_policy *policy)
{
unsigned int i;
return result;
}
+static int powernow_cpufreq_cpu_exit(struct cpufreq_policy *policy)
+{
+ struct powernow_cpufreq_data *data = drv_data[policy->cpu];
+
+ if (data) {
+ drv_data[policy->cpu] = NULL;
+ xfree(data->freq_table);
+ xfree(data);
+ }
+
+ return 0;
+}
+
static struct cpufreq_driver powernow_cpufreq_driver = {
+ .verify = powernow_cpufreq_verify,
.target = powernow_cpufreq_target,
.init = powernow_cpufreq_cpu_init,
+ .exit = powernow_cpufreq_cpu_exit
};
int powernow_cpufreq_init(void)
data->expected_us = (u32) get_sleep_length_ns() / 1000;
/* find the deepest idle state that satisfies our constraints */
- for ( i = 1; i < power->count; i++ )
+ for ( i = 2; i < power->count; i++ )
{
struct acpi_processor_cx *s = &power->states[i];
unsigned int last_residency;
unsigned int measured_us;
- /*
- * Ugh, this idle state doesn't support residency measurements, so we
- * are basically lost in the dark. As a compromise, assume we slept
- * for one full standard timer tick. However, be aware that this
- * could potentially result in a suboptimal state transition.
- */
- if ( target->type == ACPI_STATE_C1 )
- last_residency = USEC_PER_SEC / HZ;
- else
- last_residency = power->last_residency;
-
+ last_residency = power->last_residency;
measured_us = last_residency + data->elapsed_us;
/* if wrapping, set to max uint (-1) */
#include <acpi/cpufreq/cpufreq.h>
+uint32_t system_reset_counter = 1;
+
static char opt_acpi_sleep[20];
string_param("acpi_sleep", opt_acpi_sleep);
static int device_power_down(void)
{
- iommu_suspend();
-
console_suspend();
time_suspend();
i8259A_suspend();
-
+
ioapic_suspend();
-
+
+ iommu_suspend();
+
lapic_suspend();
return 0;
static void device_power_up(void)
{
lapic_resume();
-
+
+ iommu_resume();
+
ioapic_resume();
i8259A_resume();
-
+
time_resume();
console_resume();
-
- iommu_resume();
}
static void freeze_domains(void)
{
struct domain *d;
+ struct vcpu *v;
+ rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
- if ( d->domain_id != 0 )
+ {
+ switch ( d->domain_id )
+ {
+ case 0:
+ for_each_vcpu ( d, v )
+ if ( v != current )
+ vcpu_pause(v);
+ break;
+ default:
domain_pause(d);
+ break;
+ }
+ }
+ rcu_read_unlock(&domlist_read_lock);
}
static void thaw_domains(void)
{
struct domain *d;
+ struct vcpu *v;
+ rcu_read_lock(&domlist_read_lock);
for_each_domain ( d )
- if ( d->domain_id != 0 )
+ {
+ switch ( d->domain_id )
+ {
+ case 0:
+ for_each_vcpu ( d, v )
+ if ( v != current )
+ vcpu_unpause(v);
+ break;
+ default:
domain_unpause(d);
+ break;
+ }
+ }
+ rcu_read_unlock(&domlist_read_lock);
}
static void acpi_sleep_prepare(u32 state)
wakeup_vector_va = __acpi_map_table(
acpi_sinfo.wakeup_vector, sizeof(uint64_t));
+
+ /* TBoot will set resume vector itself (when it is safe to do so). */
+ if ( tboot_in_measured_env() )
+ return;
+
if ( acpi_sinfo.vector_width == 32 )
- {
- *(uint32_t *)wakeup_vector_va =
- tboot_in_measured_env() ?
- (uint32_t)g_tboot_shared->s3_tb_wakeup_entry :
- (uint32_t)bootsym_phys(wakeup_start);
- }
+ *(uint32_t *)wakeup_vector_va = bootsym_phys(wakeup_start);
else
- {
- *(uint64_t *)wakeup_vector_va =
- tboot_in_measured_env() ?
- (uint64_t)g_tboot_shared->s3_tb_wakeup_entry :
- (uint64_t)bootsym_phys(wakeup_start);
- }
+ *(uint64_t *)wakeup_vector_va = bootsym_phys(wakeup_start);
}
static void acpi_sleep_post(u32 state) {}
printk("Entering ACPI S%d state.\n", state);
local_irq_save(flags);
+ spin_debug_disable();
if ( (error = device_power_down()) )
{
{
case ACPI_STATE_S3:
do_suspend_lowlevel();
+ system_reset_counter++;
+ error = tboot_s3_resume();
break;
case ACPI_STATE_S5:
acpi_enter_sleep_state(ACPI_STATE_S5);
device_power_up();
- printk(XENLOG_INFO "Finishing wakeup from ACPI S%d state.", state);
+ printk(XENLOG_INFO "Finishing wakeup from ACPI S%d state.\n", state);
+
+ if ( (state == ACPI_STATE_S3) && error )
+ panic("Memory integrity was lost on resume (%d)\n", error);
done:
+ spin_debug_enable();
local_irq_restore(flags);
console_end_sync();
acpi_sleep_post(state);
enable_cpu:
cpufreq_add_cpu(0);
+ microcode_resume_cpu(0);
enable_nonboot_cpus();
thaw_domains();
spin_unlock(&pm_lock);
static void tboot_sleep(u8 sleep_state)
{
- uint32_t shutdown_type;
-
- g_tboot_shared->acpi_sinfo.pm1a_cnt =
- (uint16_t)acpi_sinfo.pm1a_cnt_blk.address;
- g_tboot_shared->acpi_sinfo.pm1b_cnt =
- (uint16_t)acpi_sinfo.pm1b_cnt_blk.address;
- g_tboot_shared->acpi_sinfo.pm1a_evt =
- (uint16_t)acpi_sinfo.pm1a_evt_blk.address;
- g_tboot_shared->acpi_sinfo.pm1b_evt =
- (uint16_t)acpi_sinfo.pm1b_evt_blk.address;
- g_tboot_shared->acpi_sinfo.pm1a_cnt_val = acpi_sinfo.pm1a_cnt_val;
- g_tboot_shared->acpi_sinfo.pm1b_cnt_val = acpi_sinfo.pm1b_cnt_val;
-
- switch ( sleep_state )
- {
- case ACPI_STATE_S3:
- shutdown_type = TB_SHUTDOWN_S3;
- g_tboot_shared->s3_k_wakeup_entry =
- (uint32_t)bootsym_phys(wakeup_start);
- break;
- case ACPI_STATE_S4:
- shutdown_type = TB_SHUTDOWN_S4;
- break;
- case ACPI_STATE_S5:
- shutdown_type = TB_SHUTDOWN_S5;
- break;
- default:
- return;
- }
-
- tboot_shutdown(shutdown_type);
+ uint32_t shutdown_type;
+
+#define TB_COPY_GAS(tbg, g) \
+ tbg.space_id = g.space_id; \
+ tbg.bit_width = g.bit_width; \
+ tbg.bit_offset = g.bit_offset; \
+ tbg.access_width = g.access_width; \
+ tbg.address = g.address;
+
+ /* sizes are not same (due to packing) so copy each one */
+ TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1a_cnt_blk,
+ acpi_sinfo.pm1a_cnt_blk);
+ TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1b_cnt_blk,
+ acpi_sinfo.pm1b_cnt_blk);
+ TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1a_evt_blk,
+ acpi_sinfo.pm1a_evt_blk);
+ TB_COPY_GAS(g_tboot_shared->acpi_sinfo.pm1b_evt_blk,
+ acpi_sinfo.pm1b_evt_blk);
+ g_tboot_shared->acpi_sinfo.pm1a_cnt_val = acpi_sinfo.pm1a_cnt_val;
+ g_tboot_shared->acpi_sinfo.pm1b_cnt_val = acpi_sinfo.pm1b_cnt_val;
+ g_tboot_shared->acpi_sinfo.wakeup_vector = acpi_sinfo.wakeup_vector;
+ g_tboot_shared->acpi_sinfo.vector_width = acpi_sinfo.vector_width;
+ g_tboot_shared->acpi_sinfo.kernel_s3_resume_vector =
+ bootsym_phys(wakeup_start);
+
+ switch ( sleep_state )
+ {
+ case ACPI_STATE_S3:
+ shutdown_type = TB_SHUTDOWN_S3;
+ break;
+ case ACPI_STATE_S4:
+ shutdown_type = TB_SHUTDOWN_S4;
+ break;
+ case ACPI_STATE_S5:
+ shutdown_type = TB_SHUTDOWN_S5;
+ break;
+ default:
+ return;
+ }
+
+ tboot_shutdown(shutdown_type);
}
-
+
/* System is really put into sleep state by this stub */
acpi_status asmlinkage acpi_enter_sleep_state(u8 sleep_state)
{
void restore_rest_processor_state(void)
{
- int cpu = smp_processor_id();
- struct tss_struct *t = &init_tss[cpu];
struct vcpu *v = current;
- /* Rewriting the TSS desc is necessary to clear the Busy flag. */
- set_tss_desc(cpu, t);
- load_TR(cpu);
+ load_TR();
#if defined(CONFIG_X86_64)
/* Recover syscall MSRs */
wrmsr(MSR_SYSCALL_MASK, EF_VM|EF_RF|EF_NT|EF_DF|EF_IE|EF_TF, 0U);
#else /* !defined(CONFIG_X86_64) */
if ( supervisor_mode_kernel && cpu_has_sep )
- wrmsr(MSR_IA32_SYSENTER_ESP, &t->esp1, 0);
+ wrmsr(MSR_IA32_SYSENTER_ESP, &init_tss[smp_processor_id()].esp1, 0);
#endif
/* Maybe load the debug registers. */
/* Reload FPU state on next FPU use. */
stts();
+ if (cpu_has_pat)
+ wrmsrl(MSR_IA32_CR_PAT, host_pat);
+
mtrr_ap_init();
mcheck_init(&boot_cpu_data);
}
/*
* Knob to control our willingness to enable the local APIC.
*/
-int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
+static int enable_local_apic __initdata = 0; /* -1=force-disable, +1=force-enable */
/*
* Debug level
/* Performance Counters Interrupt */
set_intr_gate(PMU_APIC_VECTOR, pmu_apic_interrupt);
- /* thermal monitor LVT interrupt */
-#ifdef CONFIG_X86_MCE_P4THERMAL
+ /* CMCI Correctable Machine Check Interrupt */
+ set_intr_gate(CMCI_APIC_VECTOR, cmci_interrupt);
+
+ /* thermal monitor LVT interrupt, for P4 and latest Intel CPU*/
+#ifdef CONFIG_X86_MCE_THERMAL
set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt);
#endif
}
}
/* lets not touch this if we didn't frob it */
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
if (maxlvt >= 5) {
v = apic_read(APIC_LVTTHMR);
apic_write_around(APIC_LVTTHMR, v | APIC_LVT_MASKED);
}
#endif
+
+ if (maxlvt >= 6) {
+ v = apic_read(APIC_CMCI);
+ apic_write_around(APIC_CMCI, v | APIC_LVT_MASKED);
+ }
/*
* Clean APIC state for other OSs:
*/
if (maxlvt >= 4)
apic_write_around(APIC_LVTPC, APIC_LVT_MASKED);
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
if (maxlvt >= 5)
apic_write_around(APIC_LVTTHMR, APIC_LVT_MASKED);
#endif
+ if (maxlvt >= 6)
+ apic_write_around(APIC_CMCI, APIC_LVT_MASKED);
+
v = GET_APIC_VERSION(apic_read(APIC_LVR));
if (APIC_INTEGRATED(v)) { /* !82489DX */
if (maxlvt > 3) /* Due to Pentium errata 3AP and 11AP. */
unsigned int apic_spiv;
unsigned int apic_lvtt;
unsigned int apic_lvtpc;
+ unsigned int apic_lvtcmci;
unsigned int apic_lvt0;
unsigned int apic_lvt1;
unsigned int apic_lvterr;
int lapic_suspend(void)
{
unsigned long flags;
-
+ int maxlvt = get_maxlvt();
if (!apic_pm_state.active)
return 0;
apic_pm_state.apic_spiv = apic_read(APIC_SPIV);
apic_pm_state.apic_lvtt = apic_read(APIC_LVTT);
apic_pm_state.apic_lvtpc = apic_read(APIC_LVTPC);
+
+ if (maxlvt >= 6) {
+ apic_pm_state.apic_lvtcmci = apic_read(APIC_CMCI);
+ }
+
apic_pm_state.apic_lvt0 = apic_read(APIC_LVT0);
apic_pm_state.apic_lvt1 = apic_read(APIC_LVT1);
apic_pm_state.apic_lvterr = apic_read(APIC_LVTERR);
{
unsigned int l, h;
unsigned long flags;
+ int maxlvt = get_maxlvt();
if (!apic_pm_state.active)
return 0;
apic_write(APIC_LVT0, apic_pm_state.apic_lvt0);
apic_write(APIC_LVT1, apic_pm_state.apic_lvt1);
apic_write(APIC_LVTTHMR, apic_pm_state.apic_thmr);
+
+ if (maxlvt >= 6) {
+ apic_write(APIC_CMCI, apic_pm_state.apic_lvtcmci);
+ }
+
apic_write(APIC_LVTPC, apic_pm_state.apic_lvtpc);
apic_write(APIC_LVTT, apic_pm_state.apic_lvtt);
apic_write(APIC_TDCR, apic_pm_state.apic_tdcr);
static void __init lapic_disable(char *str)
{
enable_local_apic = -1;
- clear_bit(X86_FEATURE_APIC, boot_cpu_data.x86_capability);
+ setup_clear_cpu_cap(X86_FEATURE_APIC);
}
custom_param("nolapic", lapic_disable);
obj-y += head.o
-head.o: head.S $(TARGET_SUBARCH).S trampoline.S mem.S video.S \
- cmdline.S edd.S wakeup.S
+head.o: reloc.S
+
+# NB. BOOT_TRAMPOLINE == 0x8c000
+%.S: %.c
+ RELOC=0x8c000 $(MAKE) -f build32.mk $@
--- /dev/null
+XEN_ROOT=../../../..
+override XEN_TARGET_ARCH=x86_32
+CFLAGS =
+include $(XEN_ROOT)/Config.mk
+
+# Disable PIE/SSP if GCC supports them. They can break us.
+$(call cc-option-add,CFLAGS,CC,-nopie)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector)
+$(call cc-option-add,CFLAGS,CC,-fno-stack-protector-all)
+
+CFLAGS += -Werror -fno-builtin -msoft-float
+
+# NB. awk invocation is a portable alternative to 'head -n -1'
+%.S: %.bin
+ (od -v -t x $< | awk 'NR > 1 {print s} {s=$$0}' | \
+ sed 's/ /,0x/g' | sed 's/^[0-9]*,/ .long /') >$@
+
+%.bin: %.lnk
+ $(OBJCOPY) -O binary $< $@
+
+%.lnk: %.o
+ $(LD) $(LDFLAGS_DIRECT) -N -Ttext 0x8c000 -o $@ $<
+
+%.o: %.c
+ $(CC) $(CFLAGS) -c $< -o $@
cmp $0x2BADB002,%eax
jne not_multiboot
- /* Save the Multiboot info structure for later use. */
- mov %ebx,sym_phys(multiboot_ptr)
+ /* Save the Multiboot info struct (after relocation) for later use. */
+ mov $sym_phys(cpu0_stack)+1024,%esp
+ push %ebx
+ call reloc
+ mov %eax,sym_phys(multiboot_ptr)
/* Initialize BSS (no nasty surprises!) */
mov $sym_phys(__bss_start),%edi
#include "cmdline.S"
+reloc:
+#include "reloc.S"
+
.align 16
.globl trampoline_start, trampoline_end
trampoline_start:
#define s16 int16_t
#define s32 int32_t
#define s64 int64_t
-#include "../../../include/public/elfstructs.h"
+#include "../../../include/xen/elfstructs.h"
#define DYNAMICALLY_FILLED 0
#define RAW_OFFSET 128
--- /dev/null
+/******************************************************************************
+ * reloc.c
+ *
+ * 32-bit flat memory-map routines for relocating Multiboot structures
+ * and modules. This is most easily done early with paging disabled.
+ *
+ * Copyright (c) 2009, Citrix Systems, Inc.
+ *
+ * Authors:
+ * Keir Fraser <keir.fraser@citrix.com>
+ */
+
+asm (
+ " .text \n"
+ " .globl _start \n"
+ "_start: \n"
+ " mov $_start,%edi \n"
+ " call 1f \n"
+ "1: pop %esi \n"
+ " sub $1b-_start,%esi \n"
+ " mov $__bss_start-_start,%ecx \n"
+ " rep movsb \n"
+ " xor %eax,%eax \n"
+ " mov $_end,%ecx \n"
+ " sub %edi,%ecx \n"
+ " rep stosb \n"
+ " mov $reloc,%eax \n"
+ " jmp *%eax \n"
+ );
+
+typedef unsigned int u32;
+#include "../../../include/xen/multiboot.h"
+
+extern char _start[];
+
+static void *memcpy(void *dest, const void *src, unsigned int n)
+{
+ char *s = (char *)src, *d = dest;
+ while ( n-- )
+ *d++ = *s++;
+ return dest;
+}
+
+static void *reloc_mbi_struct(void *old, unsigned int bytes)
+{
+ static void *alloc = &_start;
+ alloc = (void *)(((unsigned long)alloc - bytes) & ~15ul);
+ return memcpy(alloc, old, bytes);
+}
+
+static char *reloc_mbi_string(char *old)
+{
+ char *p;
+ for ( p = old; *p != '\0'; p++ )
+ continue;
+ return reloc_mbi_struct(old, p - old + 1);
+}
+
+multiboot_info_t *reloc(multiboot_info_t *mbi_old)
+{
+ multiboot_info_t *mbi = reloc_mbi_struct(mbi_old, sizeof(*mbi));
+ int i;
+
+ if ( mbi->flags & MBI_CMDLINE )
+ mbi->cmdline = (u32)reloc_mbi_string((char *)mbi->cmdline);
+
+ if ( mbi->flags & MBI_MODULES )
+ {
+ module_t *mods = reloc_mbi_struct(
+ (module_t *)mbi->mods_addr, mbi->mods_count * sizeof(module_t));
+ mbi->mods_addr = (u32)mods;
+ for ( i = 0; i < mbi->mods_count; i++ )
+ if ( mods[i].string )
+ mods[i].string = (u32)reloc_mbi_string((char *)mods[i].string);
+ }
+
+ if ( mbi->flags & MBI_MEMMAP )
+ mbi->mmap_addr = (u32)reloc_mbi_struct(
+ (memory_map_t *)mbi->mmap_addr, mbi->mmap_length);
+
+ /* Mask features we don't understand or don't relocate. */
+ mbi->flags &= (MBI_MEMLIMITS |
+ MBI_DRIVES |
+ MBI_CMDLINE |
+ MBI_MODULES |
+ MBI_MEMMAP);
+
+ return mbi;
+}
movw $1, %ax
lmsw %ax # Turn on CR0.PE
- jmp 1f
-1: ljmpl $BOOT_CS32, $bootsym_phys(wakeup_32)
+ ljmpl $BOOT_CS32, $bootsym_phys(wakeup_32)
/* This code uses an extended set of video mode numbers. These include:
* Aliases for standard modes
--- /dev/null
+#include <xen/cache.h>
+#include <xen/errno.h>
+#include <xen/lib.h>
+#include <xen/mm.h>
+#include <xen/string.h>
+#include <xen/types.h>
+
+#define HEAPORDER 3
+
+static unsigned char *window;
+#define memptr long
+static memptr free_mem_ptr;
+static memptr free_mem_end_ptr;
+
+#define WSIZE 0x80000000
+
+static unsigned char *inbuf;
+static unsigned insize;
+
+/* Index of next byte to be processed in inbuf: */
+static unsigned inptr;
+
+/* Bytes in output buffer: */
+static unsigned outcnt;
+
+#define OF(args) args
+#define STATIC static
+
+#define memzero(s, n) memset((s), 0, (n))
+
+typedef unsigned char uch;
+typedef unsigned short ush;
+typedef unsigned long ulg;
+
+#define INIT __init
+
+#define get_byte() (inptr < insize ? inbuf[inptr++] : fill_inbuf())
+
+/* Diagnostic functions */
+#ifdef DEBUG
+# define Assert(cond, msg) do { if (!(cond)) error(msg); } while (0)
+# define Trace(x) do { fprintf x; } while (0)
+# define Tracev(x) do { if (verbose) fprintf x ; } while (0)
+# define Tracevv(x) do { if (verbose > 1) fprintf x ; } while (0)
+# define Tracec(c, x) do { if (verbose && (c)) fprintf x ; } while (0)
+# define Tracecv(c, x) do { if (verbose > 1 && (c)) fprintf x ; } while (0)
+#else
+# define Assert(cond, msg)
+# define Trace(x)
+# define Tracev(x)
+# define Tracevv(x)
+# define Tracec(c, x)
+# define Tracecv(c, x)
+#endif
+
+static long bytes_out;
+static void flush_window(void);
+
+static __init void error(char *x)
+{
+ panic("%s\n", x);
+}
+
+static __init int fill_inbuf(void)
+{
+ error("ran out of input data");
+ return 0;
+}
+
+
+#include "../../common/inflate.c"
+
+static __init void flush_window(void)
+{
+ /*
+ * The window is equal to the output buffer therefore only need to
+ * compute the crc.
+ */
+ unsigned long c = crc;
+ unsigned n;
+ unsigned char *in, ch;
+
+ in = window;
+ for ( n = 0; n < outcnt; n++ )
+ {
+ ch = *in++;
+ c = crc_32_tab[((int)c ^ ch) & 0xff] ^ (c >> 8);
+ }
+ crc = c;
+
+ bytes_out += (unsigned long)outcnt;
+ outcnt = 0;
+}
+
+static __init int gzip_length(char *image, unsigned long image_len)
+{
+ return *(uint32_t *)&image[image_len - 4];
+}
+
+static __init int perform_gunzip(char *output, char **_image_start, unsigned long *image_len)
+{
+ char *image = *_image_start;
+ int rc;
+ unsigned char magic0 = (unsigned char)image[0];
+ unsigned char magic1 = (unsigned char)image[1];
+
+ if ( magic0 != 0x1f || ( (magic1 != 0x8b) && (magic1 != 0x9e) ) )
+ return 0;
+
+ window = (unsigned char *)output;
+
+ free_mem_ptr = (unsigned long)alloc_xenheap_pages(HEAPORDER, 0);
+ free_mem_end_ptr = free_mem_ptr + (PAGE_SIZE << HEAPORDER);
+
+ inbuf = (unsigned char *)image;
+ insize = *image_len;
+ inptr = 0;
+
+ makecrc();
+
+ if ( gunzip() < 0 )
+ {
+ rc = -EINVAL;
+ }
+ else
+ {
+ *_image_start = (char *)window;
+ *image_len = gzip_length(image, *image_len);
+ rc = 0;
+ }
+
+ free_xenheap_pages((void *)free_mem_ptr, HEAPORDER);
+
+ return rc;
+}
+
+struct setup_header {
+ uint8_t _pad0[0x1f1]; /* skip uninteresting stuff */
+ uint8_t setup_sects;
+ uint16_t root_flags;
+ uint32_t syssize;
+ uint16_t ram_size;
+ uint16_t vid_mode;
+ uint16_t root_dev;
+ uint16_t boot_flag;
+ uint16_t jump;
+ uint32_t header;
+#define HDR_MAGIC "HdrS"
+#define HDR_MAGIC_SZ 4
+ uint16_t version;
+#define VERSION(h,l) (((h)<<8) | (l))
+ uint32_t realmode_swtch;
+ uint16_t start_sys;
+ uint16_t kernel_version;
+ uint8_t type_of_loader;
+ uint8_t loadflags;
+ uint16_t setup_move_size;
+ uint32_t code32_start;
+ uint32_t ramdisk_image;
+ uint32_t ramdisk_size;
+ uint32_t bootsect_kludge;
+ uint16_t heap_end_ptr;
+ uint16_t _pad1;
+ uint32_t cmd_line_ptr;
+ uint32_t initrd_addr_max;
+ uint32_t kernel_alignment;
+ uint8_t relocatable_kernel;
+ uint8_t _pad2[3];
+ uint32_t cmdline_size;
+ uint32_t hardware_subarch;
+ uint64_t hardware_subarch_data;
+ uint32_t payload_offset;
+ uint32_t payload_length;
+ } __attribute__((packed));
+
+static __init int bzimage_check(struct setup_header *hdr, unsigned long len)
+{
+ if ( len < sizeof(struct setup_header) )
+ return 0;
+
+ if ( memcmp(&hdr->header, HDR_MAGIC, HDR_MAGIC_SZ) != 0 )
+ return 0;
+
+ if ( hdr->version < VERSION(2,8) ) {
+ printk("Cannot load bzImage v%d.%02d at least v2.08 is required\n",
+ hdr->version >> 8, hdr->version & 0xff);
+ return -EINVAL;
+ }
+ return 1;
+}
+
+int __init bzimage_headroom(char *image_start, unsigned long image_length)
+{
+ struct setup_header *hdr = (struct setup_header *)image_start;
+ char *img;
+ int err, headroom;
+
+ err = bzimage_check(hdr, image_length);
+ if (err < 1)
+ return 0;
+
+ img = image_start + (hdr->setup_sects+1) * 512;
+ img += hdr->payload_offset;
+
+ headroom = gzip_length(img, hdr->payload_length);
+ headroom += headroom >> 12; /* Add 8 bytes for every 32K input block */
+ headroom += (32768 + 18); /* Add 32K + 18 bytes of extra headroom */
+ headroom = (headroom + 4095) & ~4095;
+
+ return headroom;
+}
+
+int __init bzimage_parse(char *image_base, char **image_start, unsigned long *image_len)
+{
+ struct setup_header *hdr = (struct setup_header *)(*image_start);
+ int err = bzimage_check(hdr, *image_len);
+
+ if (err < 1)
+ return err;
+
+ BUG_ON(!(image_base < *image_start));
+
+ *image_start += (hdr->setup_sects+1) * 512;
+ *image_start += hdr->payload_offset;
+ *image_len = hdr->payload_length;
+
+ if ( (err = perform_gunzip(image_base, image_start, image_len)) < 0 )
+ return err;
+
+ return 0;
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
--- /dev/null
+#include <xen/config.h>
+#include <asm/page.h>
+
+#ifdef __i386__
+#define src_reg %esi
+#define dst_reg %edi
+#define WORD_SIZE 4
+#define tmp1_reg %eax
+#define tmp2_reg %edx
+#define tmp3_reg %ebx
+#define tmp4_reg %ebp
+#else
+#define src_reg %rsi
+#define dst_reg %rdi
+#define WORD_SIZE 8
+#define tmp1_reg %r8
+#define tmp2_reg %r9
+#define tmp3_reg %r10
+#define tmp4_reg %r11
+#endif
+
+ENTRY(copy_page_sse2)
+#ifdef __i386__
+ push %ebx
+ push %ebp
+ push %esi
+ push %edi
+ mov 6*4(%esp), src_reg
+ mov 5*4(%esp), dst_reg
+#endif
+ mov $PAGE_SIZE/(4*WORD_SIZE)-3, %ecx
+
+ prefetchnta 2*4*WORD_SIZE(src_reg)
+ mov (src_reg), tmp1_reg
+ mov WORD_SIZE(src_reg), tmp2_reg
+ mov 2*WORD_SIZE(src_reg), tmp3_reg
+ mov 3*WORD_SIZE(src_reg), tmp4_reg
+
+0: prefetchnta 3*4*WORD_SIZE(src_reg)
+1: add $4*WORD_SIZE, src_reg
+ movnti tmp1_reg, (dst_reg)
+ mov (src_reg), tmp1_reg
+ dec %ecx
+ movnti tmp2_reg, WORD_SIZE(dst_reg)
+ mov WORD_SIZE(src_reg), tmp2_reg
+ movnti tmp3_reg, 2*WORD_SIZE(dst_reg)
+ mov 2*WORD_SIZE(src_reg), tmp3_reg
+ movnti tmp4_reg, 3*WORD_SIZE(dst_reg)
+ lea 4*WORD_SIZE(dst_reg), dst_reg
+ mov 3*WORD_SIZE(src_reg), tmp4_reg
+ jg 0b
+ jpe 1b
+
+ movnti tmp1_reg, (dst_reg)
+ movnti tmp2_reg, WORD_SIZE(dst_reg)
+ movnti tmp3_reg, 2*WORD_SIZE(dst_reg)
+ movnti tmp4_reg, 3*WORD_SIZE(dst_reg)
+
+#ifdef __i386__
+ pop %edi
+ pop %esi
+ pop %ebp
+ pop %ebx
+#endif
+ sfence
+ ret
if (cpuid_eax(0x80000000) >= 0x80000007) {
c->x86_power = cpuid_edx(0x80000007);
- if (c->x86_power & (1<<8))
+ if (c->x86_power & (1<<8)) {
set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+ set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+ }
}
#ifdef CONFIG_X86_HT
*/
u64 host_pat = 0x050100070406;
+static unsigned int __cpuinitdata cleared_caps[NCAPINTS];
+
+void __init setup_clear_cpu_cap(unsigned int cap)
+{
+ __clear_bit(cap, boot_cpu_data.x86_capability);
+ __set_bit(cap, cleared_caps);
+}
+
static void default_init(struct cpuinfo_x86 * c)
{
/* Not much we can do here... */
if (c->x86 >= 0x6)
c->x86_model += ((tfms >> 16) & 0xF) << 4;
c->x86_mask = tfms & 15;
+ cap0 &= ~cleared_caps[0];
if (cap0 & (1<<19))
c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8;
c->x86_capability[0] = cap0; /* Added for Xen bootstrap */
c->x86_vendor_id[0] = '\0'; /* Unset */
c->x86_model_id[0] = '\0'; /* Unset */
c->x86_max_cores = 1;
+ c->x86_num_siblings = 1;
c->x86_clflush_size = 0;
memset(&c->x86_capability, 0, sizeof c->x86_capability);
if (disable_pse)
clear_bit(X86_FEATURE_PSE, c->x86_capability);
+ for (i = 0 ; i < NCAPINTS ; ++i)
+ c->x86_capability[i] &= ~cleared_caps[i];
+
/* If the model name is still unset, do table lookup. */
if ( !c->x86_model_id[0] ) {
char *p;
if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY))
return;
- smp_num_siblings = (ebx & 0xff0000) >> 16;
+ c->x86_num_siblings = (ebx & 0xff0000) >> 16;
- if (smp_num_siblings == 1) {
+ if (c->x86_num_siblings == 1) {
printk(KERN_INFO "CPU: Hyper-Threading is disabled\n");
- } else if (smp_num_siblings > 1 ) {
+ } else if (c->x86_num_siblings > 1 ) {
- if (smp_num_siblings > NR_CPUS) {
- printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings);
- smp_num_siblings = 1;
+ if (c->x86_num_siblings > NR_CPUS) {
+ printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", c->x86_num_siblings);
+ c->x86_num_siblings = 1;
return;
}
- index_msb = get_count_order(smp_num_siblings);
+ index_msb = get_count_order(c->x86_num_siblings);
phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb);
printk(KERN_INFO "CPU: Physical Processor ID: %d\n",
phys_proc_id[cpu]);
- smp_num_siblings = smp_num_siblings / c->x86_max_cores;
+ c->x86_num_siblings = c->x86_num_siblings / c->x86_max_cores;
- index_msb = get_count_order(smp_num_siblings) ;
+ index_msb = get_count_order(c->x86_num_siblings) ;
core_bits = get_count_order(c->x86_max_cores);
{
int cpu = smp_processor_id();
struct tss_struct *t = &init_tss[cpu];
- char gdt_load[10];
+ struct desc_ptr gdt_desc = {
+ .base = (unsigned long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY),
+ .limit = LAST_RESERVED_GDT_BYTE
+ };
if (cpu_test_and_set(cpu, cpu_initialized)) {
printk(KERN_WARNING "CPU#%d already initialized!\n", cpu);
/* Install correct page table. */
write_ptbase(current);
- *(unsigned short *)(&gdt_load[0]) = LAST_RESERVED_GDT_BYTE;
- *(unsigned long *)(&gdt_load[2]) = GDT_VIRT_START(current);
- asm volatile ( "lgdt %0" : "=m" (gdt_load) );
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
/* No nested task. */
asm volatile ("pushf ; andw $0xbfff,(%"__OP"sp) ; popf" );
BUG_ON((get_stack_bottom() & 15) != 0);
t->rsp0 = get_stack_bottom();
#endif
- set_tss_desc(cpu,t);
- load_TR(cpu);
+ load_TR();
asm volatile ( "lldt %%ax" : : "a" (0) );
/* Clear all 6 debug registers: */
if ((c->x86 == 0xf && c->x86_model >= 0x03) ||
(c->x86 == 0x6 && c->x86_model >= 0x0e))
set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+ if (cpuid_edx(0x80000007) & (1u<<8)) {
+ set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability);
+ set_bit(X86_FEATURE_NOSTOP_TSC, c->x86_capability);
+ }
start_vmx();
}
obj-y += k7.o
obj-y += amd_k8.o
obj-y += amd_f10.o
+obj-y += mctelem.o
obj-y += mce.o
+obj-y += mce_intel.o
obj-y += non-fatal.o
-obj-y += p4.o
obj-$(x86_32) += p5.o
-obj-$(x86_32) += p6.o
obj-$(x86_32) += winchip.o
#include "x86_mca.h"
-static int amd_f10_handler(struct mc_info *mi, uint16_t bank, uint64_t status)
+static enum mca_extinfo
+amd_f10_handler(struct mc_info *mi, uint16_t bank, uint64_t status)
{
struct mcinfo_extended mc_ext;
/* Family 0x10 introduced additional MSR that belong to the
* northbridge bank (4). */
- if (bank != 4)
- return 0;
+ if (mi == NULL || bank != 4)
+ return MCA_EXTINFO_IGNORED;
if (!(status & MCi_STATUS_VAL))
- return 0;
+ return MCA_EXTINFO_IGNORED;
if (!(status & MCi_STATUS_MISCV))
- return 0;
+ return MCA_EXTINFO_IGNORED;
memset(&mc_ext, 0, sizeof(mc_ext));
mc_ext.common.type = MC_TYPE_EXTENDED;
mc_ext.mc_msr[1].reg = MSR_F10_MC4_MISC2;
mc_ext.mc_msr[2].reg = MSR_F10_MC4_MISC3;
- rdmsrl(MSR_F10_MC4_MISC1, mc_ext.mc_msr[0].value);
- rdmsrl(MSR_F10_MC4_MISC2, mc_ext.mc_msr[1].value);
- rdmsrl(MSR_F10_MC4_MISC3, mc_ext.mc_msr[2].value);
+ mca_rdmsrl(MSR_F10_MC4_MISC1, mc_ext.mc_msr[0].value);
+ mca_rdmsrl(MSR_F10_MC4_MISC2, mc_ext.mc_msr[1].value);
+ mca_rdmsrl(MSR_F10_MC4_MISC3, mc_ext.mc_msr[2].value);
x86_mcinfo_add(mi, &mc_ext);
- return 1;
+ return MCA_EXTINFO_LOCAL;
}
extern void k8_machine_check(struct cpu_user_regs *regs, long error_code);
/* AMD Family10 machine check */
-void amd_f10_mcheck_init(struct cpuinfo_x86 *c)
+int amd_f10_mcheck_init(struct cpuinfo_x86 *c)
{
uint64_t value;
uint32_t i;
int cpu_nr;
- machine_check_vector = k8_machine_check;
- mc_callback_bank_extended = amd_f10_handler;
+ if (!cpu_has(c, X86_FEATURE_MCA))
+ return 0;
+
+ x86_mce_vector_register(k8_machine_check);
+ x86_mce_callback_register(amd_f10_handler);
cpu_nr = smp_processor_id();
- wmb();
rdmsrl(MSR_IA32_MCG_CAP, value);
if (value & MCG_CTL_P) /* Control register present ? */
for (i = 0; i < nr_mce_banks; i++) {
switch (i) {
case 4: /* Northbridge */
- /* Enable error reporting of all errors,
- * enable error checking and
- * disable sync flooding */
- wrmsrl(MSR_IA32_MC4_CTL, 0x02c3c008ffffffffULL);
+ /* Enable error reporting of all errors */
+ wrmsrl(MSR_IA32_MC4_CTL, 0xffffffffffffffffULL);
wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL);
-
- /* XXX: We should write the value 0x1087821UL into
- * to register F3x180 here, which sits in
- * the PCI extended configuration space.
- * Since this is not possible here, we can only hope,
- * Dom0 is doing that.
- */
break;
default:
set_in_cr4(X86_CR4_MCE);
printk("CPU%i: AMD Family10h machine check reporting enabled.\n", cpu_nr);
+ return 1;
}
#include <asm/msr.h>
#include "mce.h"
-#include "x86_mca.h"
/* Machine Check Handler for AMD K8 family series */
void k8_machine_check(struct cpu_user_regs *regs, long error_code)
{
- struct vcpu *vcpu = current;
- struct domain *curdom;
- struct mc_info *mc_data;
- struct mcinfo_global mc_global;
- struct mcinfo_bank mc_info;
- uint64_t status, addrv, miscv, uc;
- uint32_t i;
- unsigned int cpu_nr;
- uint32_t xen_impacted = 0;
-#define DOM_NORMAL 0
-#define DOM0_TRAP 1
-#define DOMU_TRAP 2
-#define DOMU_KILLED 4
- uint32_t dom_state = DOM_NORMAL;
-
- /* This handler runs as interrupt gate. So IPIs from the
- * polling service routine are defered until we finished.
- */
-
- /* Disable interrupts for the _vcpu_. It may not re-scheduled to
- * an other physical CPU or the impacted process in the guest
- * continues running with corrupted data, otherwise. */
- vcpu_schedule_lock_irq(vcpu);
-
- mc_data = x86_mcinfo_getptr();
- cpu_nr = smp_processor_id();
- curdom = vcpu->domain;
-
- memset(&mc_global, 0, sizeof(mc_global));
- mc_global.common.type = MC_TYPE_GLOBAL;
- mc_global.common.size = sizeof(mc_global);
-
- mc_global.mc_domid = curdom->domain_id; /* impacted domain */
- mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
- BUG_ON(cpu_nr != vcpu->processor);
- mc_global.mc_core_threadid = 0;
- mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
-#if 0 /* TODO: on which socket is this physical core?
- It's not clear to me how to figure this out. */
- mc_global.mc_socketid = ???;
-#endif
- mc_global.mc_flags |= MC_FLAG_UNCORRECTABLE;
- rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
-
- /* Quick check, who is impacted */
- xen_impacted = is_idle_domain(curdom);
-
- /* Dom0 */
- x86_mcinfo_clear(mc_data);
- x86_mcinfo_add(mc_data, &mc_global);
-
- for (i = 0; i < nr_mce_banks; i++) {
- struct domain *d;
-
- rdmsrl(MSR_IA32_MC0_STATUS + 4 * i, status);
-
- if (!(status & MCi_STATUS_VAL))
- continue;
-
- /* An error happened in this bank.
- * This is expected to be an uncorrectable error,
- * since correctable errors get polled.
- */
- uc = status & MCi_STATUS_UC;
-
- memset(&mc_info, 0, sizeof(mc_info));
- mc_info.common.type = MC_TYPE_BANK;
- mc_info.common.size = sizeof(mc_info);
- mc_info.mc_bank = i;
- mc_info.mc_status = status;
-
- addrv = 0;
- if (status & MCi_STATUS_ADDRV) {
- rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addrv);
-
- d = maddr_get_owner(addrv);
- if (d != NULL)
- mc_info.mc_domid = d->domain_id;
- }
-
- miscv = 0;
- if (status & MCi_STATUS_MISCV)
- rdmsrl(MSR_IA32_MC0_MISC + 4 * i, miscv);
-
- mc_info.mc_addr = addrv;
- mc_info.mc_misc = miscv;
-
- x86_mcinfo_add(mc_data, &mc_info); /* Dom0 */
-
- if (mc_callback_bank_extended)
- mc_callback_bank_extended(mc_data, i, status);
-
- /* clear status */
- wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
- }
-
- status = mc_global.mc_gstatus;
-
- /* clear MCIP or cpu enters shutdown state
- * in case another MCE occurs. */
- status &= ~MCG_STATUS_MCIP;
- wrmsrl(MSR_IA32_MCG_STATUS, status);
- wmb();
-
- /* For the details see the discussion "MCE/MCA concept" on xen-devel.
- * The thread started here:
- * http://lists.xensource.com/archives/html/xen-devel/2007-05/msg01015.html
- */
-
- /* MCG_STATUS_RIPV:
- * When this bit is not set, then the instruction pointer onto the stack
- * to resume at is not valid. If xen is interrupted, then we panic anyway
- * right below. Otherwise it is up to the guest to figure out if
- * guest kernel or guest userland is affected and should kill either
- * itself or the affected process.
- */
-
- /* MCG_STATUS_EIPV:
- * Evaluation of EIPV is the job of the guest.
- */
-
- if (xen_impacted) {
- /* Now we are going to panic anyway. Allow interrupts, so that
- * printk on serial console can work. */
- vcpu_schedule_unlock_irq(vcpu);
-
- /* Uh, that means, machine check exception
- * inside Xen occured. */
- printk("Machine check exception occured in Xen.\n");
-
- /* if MCG_STATUS_EIPV indicates, the IP on the stack is related
- * to the error then it makes sense to print a stack trace.
- * That can be useful for more detailed error analysis and/or
- * error case studies to figure out, if we can clear
- * xen_impacted and kill a DomU instead
- * (i.e. if a guest only control structure is affected, but then
- * we must ensure the bad pages are not re-used again).
- */
- if (status & MCG_STATUS_EIPV) {
- printk("MCE: Instruction Pointer is related to the error. "
- "Therefore, print the execution state.\n");
- show_execution_state(regs);
- }
- x86_mcinfo_dump(mc_data);
- panic("End of MCE. Use mcelog to decode above error codes.\n");
- }
-
- /* If Dom0 registered a machine check handler, which is only possible
- * with a PV MCA driver, then ... */
- if ( guest_has_trap_callback(dom0, 0, TRAP_machine_check) ) {
- dom_state = DOM0_TRAP;
-
- /* ... deliver machine check trap to Dom0. */
- send_guest_trap(dom0, 0, TRAP_machine_check);
-
- /* Xen may tell Dom0 now to notify the DomU.
- * But this will happen through a hypercall. */
- } else
- /* Dom0 did not register a machine check handler, but if DomU
- * did so, then... */
- if ( guest_has_trap_callback(curdom, vcpu->vcpu_id, TRAP_machine_check) ) {
- dom_state = DOMU_TRAP;
-
- /* ... deliver machine check trap to DomU */
- send_guest_trap(curdom, vcpu->vcpu_id, TRAP_machine_check);
- } else {
- /* hmm... noone feels responsible to handle the error.
- * So, do a quick check if a DomU is impacted or not.
- */
- if (curdom == dom0) {
- /* Dom0 is impacted. Since noone can't handle
- * this error, panic! */
- x86_mcinfo_dump(mc_data);
- panic("MCE occured in Dom0, which it can't handle\n");
-
- /* UNREACHED */
- } else {
- dom_state = DOMU_KILLED;
-
- /* Enable interrupts. This basically results in
- * calling sti on the *physical* cpu. But after
- * domain_crash() the vcpu pointer is invalid.
- * Therefore, we must unlock the irqs before killing
- * it. */
- vcpu_schedule_unlock_irq(vcpu);
-
- /* DomU is impacted. Kill it and continue. */
- domain_crash(curdom);
- }
- }
-
-
- switch (dom_state) {
- case DOM0_TRAP:
- case DOMU_TRAP:
- /* Enable interrupts. */
- vcpu_schedule_unlock_irq(vcpu);
-
- /* guest softirqs and event callbacks are scheduled
- * immediately after this handler exits. */
- break;
- case DOMU_KILLED:
- /* Nothing to do here. */
- break;
- default:
- BUG();
- }
+ mcheck_cmn_handler(regs, error_code, mca_allbanks);
}
-
/* AMD K8 machine check */
-void amd_k8_mcheck_init(struct cpuinfo_x86 *c)
+int amd_k8_mcheck_init(struct cpuinfo_x86 *c)
{
uint64_t value;
uint32_t i;
int cpu_nr;
- machine_check_vector = k8_machine_check;
+ /* Check for PPro style MCA; our caller has confirmed MCE support. */
+ if (!cpu_has(c, X86_FEATURE_MCA))
+ return 0;
+
+ x86_mce_vector_register(k8_machine_check);
cpu_nr = smp_processor_id();
- wmb();
rdmsrl(MSR_IA32_MCG_CAP, value);
if (value & MCG_CTL_P) /* Control register present ? */
for (i = 0; i < nr_mce_banks; i++) {
switch (i) {
case 4: /* Northbridge */
- /* Enable error reporting of all errors,
- * enable error checking and
- * disable sync flooding */
- wrmsrl(MSR_IA32_MC4_CTL, 0x02c3c008ffffffffULL);
+ /* Enable error reporting of all errors */
+ wrmsrl(MSR_IA32_MC4_CTL, 0xffffffffffffffffULL);
wrmsrl(MSR_IA32_MC4_STATUS, 0x0ULL);
break;
set_in_cr4(X86_CR4_MCE);
printk("CPU%i: AMD K8 machine check reporting enabled.\n", cpu_nr);
+
+ return 1;
}
#include <xen/smp.h>
#include <xen/timer.h>
#include <xen/event.h>
-#include <asm/processor.h>
+
+#include <asm/processor.h>
#include <asm/system.h>
#include <asm/msr.h>
#include "mce.h"
-#include "x86_mca.h"
static struct timer mce_timer;
-#define MCE_PERIOD MILLISECS(15000)
+#define MCE_PERIOD MILLISECS(10000)
#define MCE_MIN MILLISECS(2000)
#define MCE_MAX MILLISECS(30000)
static s_time_t period = MCE_PERIOD;
static int hw_threshold = 0;
static int adjust = 0;
+static int variable_period = 1;
/* The polling service routine:
* Collects information of correctable errors and notifies
*/
void mce_amd_checkregs(void *info)
{
- struct vcpu *vcpu = current;
- struct mc_info *mc_data;
- struct mcinfo_global mc_global;
- struct mcinfo_bank mc_info;
- uint64_t status, addrv, miscv;
- unsigned int i;
+ mctelem_cookie_t mctc;
+ struct mca_summary bs;
unsigned int event_enabled;
- unsigned int cpu_nr;
- int error_found;
- /* We don't need a slot yet. Only allocate one on error. */
- mc_data = NULL;
+ mctc = mcheck_mca_logout(MCA_POLLER, mca_allbanks, &bs);
- cpu_nr = smp_processor_id();
event_enabled = guest_enabled_event(dom0->vcpu[0], VIRQ_MCA);
- error_found = 0;
-
- memset(&mc_global, 0, sizeof(mc_global));
- mc_global.common.type = MC_TYPE_GLOBAL;
- mc_global.common.size = sizeof(mc_global);
-
- mc_global.mc_domid = vcpu->domain->domain_id; /* impacted domain */
- mc_global.mc_coreid = vcpu->processor; /* impacted physical cpu */
- BUG_ON(cpu_nr != vcpu->processor);
- mc_global.mc_core_threadid = 0;
- mc_global.mc_vcpuid = vcpu->vcpu_id; /* impacted vcpu */
-#if 0 /* TODO: on which socket is this physical core?
- It's not clear to me how to figure this out. */
- mc_global.mc_socketid = ???;
-#endif
- mc_global.mc_flags |= MC_FLAG_CORRECTABLE;
- rdmsrl(MSR_IA32_MCG_STATUS, mc_global.mc_gstatus);
-
- for (i = 0; i < nr_mce_banks; i++) {
- struct domain *d;
-
- rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
-
- if (!(status & MCi_STATUS_VAL))
- continue;
-
- if (mc_data == NULL) {
- /* Now we need a slot to fill in error telemetry. */
- mc_data = x86_mcinfo_getptr();
- BUG_ON(mc_data == NULL);
- x86_mcinfo_clear(mc_data);
- x86_mcinfo_add(mc_data, &mc_global);
- }
-
- memset(&mc_info, 0, sizeof(mc_info));
- mc_info.common.type = MC_TYPE_BANK;
- mc_info.common.size = sizeof(mc_info);
- mc_info.mc_bank = i;
- mc_info.mc_status = status;
-
- /* Increase polling frequency */
- error_found = 1;
-
- addrv = 0;
- if (status & MCi_STATUS_ADDRV) {
- rdmsrl(MSR_IA32_MC0_ADDR + i * 4, addrv);
-
- d = maddr_get_owner(addrv);
- if (d != NULL)
- mc_info.mc_domid = d->domain_id;
- }
- miscv = 0;
- if (status & MCi_STATUS_MISCV)
- rdmsrl(MSR_IA32_MC0_MISC + i * 4, miscv);
+ if (bs.errcnt && mctc != NULL) {
+ static uint64_t dumpcount = 0;
- mc_info.mc_addr = addrv;
- mc_info.mc_misc = miscv;
- x86_mcinfo_add(mc_data, &mc_info);
+ /* If Dom0 enabled the VIRQ_MCA event, then notify it.
+ * Otherwise, if dom0 has had plenty of time to register
+ * the virq handler but still hasn't then dump telemetry
+ * to the Xen console. The call count may be incremented
+ * on multiple cpus at once and is indicative only - just
+ * a simple-minded attempt to avoid spamming the console
+ * for corrected errors in early startup. */
- if (mc_callback_bank_extended)
- mc_callback_bank_extended(mc_data, i, status);
-
- /* clear status */
- wrmsrl(MSR_IA32_MC0_STATUS + i * 4, 0x0ULL);
- wmb();
- }
-
- if (error_found > 0) {
- /* If Dom0 enabled the VIRQ_MCA event, then ... */
- if (event_enabled)
- /* ... notify it. */
+ if (event_enabled) {
+ mctelem_commit(mctc);
send_guest_global_virq(dom0, VIRQ_MCA);
- else
- /* ... or dump it */
- x86_mcinfo_dump(mc_data);
+ } else if (++dumpcount >= 10) {
+ x86_mcinfo_dump((struct mc_info *)mctelem_dataptr(mctc));
+ mctelem_dismiss(mctc);
+ } else {
+ mctelem_dismiss(mctc);
+ }
+
+ } else if (mctc != NULL) {
+ mctelem_dismiss(mctc);
}
- adjust += error_found;
+ /* adjust is global and all cpus may attempt to increment it without
+ * synchronisation, so they race and the final adjust count
+ * (number of cpus seeing any error) is approximate. We can
+ * guarantee that if any cpu observes an error that the
+ * adjust count is at least 1. */
+ if (bs.errcnt)
+ adjust++;
}
/* polling service routine invoker:
on_each_cpu(mce_amd_checkregs, data, 1, 1);
if (adjust > 0) {
- if ( !guest_enabled_event(dom0->vcpu[0], VIRQ_MCA) ) {
+ if (!guest_enabled_event(dom0->vcpu[0], VIRQ_MCA) ) {
/* Dom0 did not enable VIRQ_MCA, so Xen is reporting. */
printk("MCE: polling routine found correctable error. "
" Use mcelog to parse above error output.\n");
uint64_t value;
uint32_t counter;
- rdmsrl(MSR_IA32_MC4_MISC, value);
+ mca_rdmsrl(MSR_IA32_MC4_MISC, value);
/* Only the error counter field is of interest
* Bit field is described in AMD K8 BKDG chapter 6.4.5.5
*/
value &= ~(0x60FFF00000000ULL);
/* Counter enable */
value |= (1ULL << 51);
- wrmsrl(MSR_IA32_MC4_MISC, value);
+ mca_wrmsrl(MSR_IA32_MC4_MISC, value);
wmb();
}
}
- if (adjust > 0) {
+ if (variable_period && adjust > 0) {
/* Increase polling frequency */
adjust++; /* adjust == 1 must have an effect */
period /= adjust;
- } else {
+ } else if (variable_period) {
/* Decrease polling frequency */
period *= 2;
}
- if (period > MCE_MAX) {
+ if (variable_period && period > MCE_MAX) {
/* limit: Poll at least every 30s */
period = MCE_MAX;
}
- if (period < MCE_MIN) {
+ if (variable_period && period < MCE_MIN) {
/* limit: Poll every 2s.
* When this is reached an uncorrectable error
* is expected to happen, if Dom0 does nothing.
/* The threshold bitfields in MSR_IA32_MC4_MISC has
* been introduced along with the SVME feature bit. */
- if (cpu_has(c, X86_FEATURE_SVME)) {
+ if (variable_period && cpu_has(c, X86_FEATURE_SVME)) {
uint64_t value;
/* hw threshold registers present */
#include <asm/msr.h>
#include "mce.h"
+#include "x86_mca.h"
/* Machine Check Handler For AMD Athlon/Duron */
static fastcall void k7_machine_check(struct cpu_user_regs * regs, long error_code)
}
if (recover&2)
- panic ("CPU context corrupt");
+ mc_panic ("CPU context corrupt");
if (recover&1)
- panic ("Unable to continue");
+ mc_panic ("Unable to continue");
printk (KERN_EMERG "Attempting to continue.\n");
mcgstl &= ~(1<<2);
wrmsr (MSR_IA32_MCG_STATUS,mcgstl, mcgsth);
/* AMD K7 machine check */
-void amd_k7_mcheck_init(struct cpuinfo_x86 *c)
+int amd_k7_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
int i;
- machine_check_vector = k7_machine_check;
- wmb();
+ /* Check for PPro style MCA; our caller has confirmed MCE support. */
+ if (!cpu_has(c, X86_FEATURE_MCA))
+ return 0;
+
+ x86_mce_vector_register(k7_machine_check);
rdmsr (MSR_IA32_MCG_CAP, l, h);
if (l & (1<<8)) /* Control register present ? */
set_in_cr4 (X86_CR4_MCE);
printk (KERN_INFO "CPU%d: AMD K7 machine check reporting enabled.\n",
smp_processor_id());
+
+ return 1;
}
#include <xen/config.h>
#include <xen/smp.h>
#include <xen/errno.h>
+#include <xen/console.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+#include <xen/cpumask.h>
+#include <xen/event.h>
+#include <xen/guest_access.h>
-#include <asm/processor.h>
+#include <asm/processor.h>
#include <asm/system.h>
+#include <asm/msr.h>
#include "mce.h"
-#include "x86_mca.h"
int mce_disabled = 0;
unsigned int nr_mce_banks;
EXPORT_SYMBOL_GPL(nr_mce_banks); /* non-fatal.o */
-/* XXX For now a fixed array is used. Later this should be changed
- * to a dynamic allocated array with the size calculated in relation
- * to physical cpus present in the machine.
- * The more physical cpus are available, the more entries you need.
- */
-#define MAX_MCINFO 10
-
-struct mc_machine_notify {
- struct mc_info mc;
- uint32_t fetch_idx;
- uint32_t valid;
-};
+static void intpose_init(void);
+static void mcinfo_clear(struct mc_info *);
-struct mc_machine {
+#define SEG_PL(segsel) ((segsel) & 0x3)
+#define _MC_MSRINJ_F_REQ_HWCR_WREN (1 << 16)
- /* Array structure used for collecting machine check error telemetry. */
- struct mc_info mc[MAX_MCINFO];
+#if 1 /* XXFM switch to 0 for putback */
- /* We handle multiple machine check reports lockless by
- * iterating through the array using the producer/consumer concept.
- */
- /* Producer array index to fill with machine check error data.
- * Index must be increased atomically. */
- uint32_t error_idx;
-
- /* Consumer array index to fetch machine check error data from.
- * Index must be increased atomically. */
- uint32_t fetch_idx;
-
- /* Integer array holding the indeces of the mc array that allows
- * a Dom0 to notify a DomU to re-fetch the same machine check error
- * data. The notification and refetch also uses its own
- * producer/consumer mechanism, because Dom0 may decide to not report
- * every error to the impacted DomU.
- */
- struct mc_machine_notify notify[MAX_MCINFO];
+#define x86_mcerr(str, err) _x86_mcerr(str, err)
- /* Array index to get fetch_idx from.
- * Index must be increased atomically. */
- uint32_t notifyproducer_idx;
- uint32_t notifyconsumer_idx;
-};
+static int _x86_mcerr(const char *msg, int err)
+{
+ printk("x86_mcerr: %s, returning %d\n",
+ msg != NULL ? msg : "", err);
+ return err;
+}
+#else
+#define x86_mcerr(str,err)
+#endif
-/* Global variable with machine check information. */
-struct mc_machine mc_data;
+cpu_banks_t mca_allbanks;
/* Handle unconfigured int18 (should never happen) */
static void unexpected_machine_check(struct cpu_user_regs *regs, long error_code)
-{
+{
printk(XENLOG_ERR "CPU#%d: Unexpected int18 (Machine Check).\n",
smp_processor_id());
}
+static x86_mce_vector_t _machine_check_vector = unexpected_machine_check;
+
+void x86_mce_vector_register(x86_mce_vector_t hdlr)
+{
+ _machine_check_vector = hdlr;
+ wmb();
+}
+
/* Call the installed machine check handler for this CPU setup. */
-void (*machine_check_vector)(struct cpu_user_regs *regs, long error_code) = unexpected_machine_check;
+
+void machine_check_vector(struct cpu_user_regs *regs, long error_code)
+{
+ _machine_check_vector(regs, error_code);
+}
/* Init machine check callback handler
* It is used to collect additional information provided by newer
* CPU families/models without the need to duplicate the whole handler.
* This avoids having many handlers doing almost nearly the same and each
* with its own tweaks ands bugs. */
-int (*mc_callback_bank_extended)(struct mc_info *, uint16_t, uint64_t) = NULL;
+static x86_mce_callback_t mc_callback_bank_extended = NULL;
-
-static void amd_mcheck_init(struct cpuinfo_x86 *ci)
+void x86_mce_callback_register(x86_mce_callback_t cbfunc)
{
+ mc_callback_bank_extended = cbfunc;
+}
- switch (ci->x86) {
- case 6:
- amd_k7_mcheck_init(ci);
- break;
+/* Utility function to perform MCA bank telemetry readout and to push that
+ * telemetry towards an interested dom0 for logging and diagnosis.
+ * The caller - #MC handler or MCA poll function - must arrange that we
+ * do not migrate cpus. */
- case 0xf:
- amd_k8_mcheck_init(ci);
- break;
+/* XXFM Could add overflow counting? */
+mctelem_cookie_t mcheck_mca_logout(enum mca_source who, cpu_banks_t bankmask,
+ struct mca_summary *sp)
+{
+ struct vcpu *v = current;
+ struct domain *d;
+ uint64_t gstatus, status, addr, misc;
+ struct mcinfo_global mcg; /* on stack */
+ struct mcinfo_common *mic;
+ struct mcinfo_global *mig; /* on stack */
+ mctelem_cookie_t mctc = NULL;
+ uint32_t uc = 0, pcc = 0;
+ struct mc_info *mci = NULL;
+ mctelem_class_t which = MC_URGENT; /* XXXgcc */
+ unsigned int cpu_nr;
+ int errcnt = 0;
+ int i;
+ enum mca_extinfo cbret = MCA_EXTINFO_IGNORED;
- case 0x10:
- amd_f10_mcheck_init(ci);
- break;
+ cpu_nr = smp_processor_id();
+ BUG_ON(cpu_nr != v->processor);
- default:
- /* Assume that machine check support is available.
- * The minimum provided support is at least the K8. */
- amd_k8_mcheck_init(ci);
- }
-}
+ mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
-/* This has to be run for each processor */
-void mcheck_init(struct cpuinfo_x86 *c)
-{
- if (mce_disabled == 1) {
- printk(XENLOG_INFO "MCE support disabled by bootparam\n");
- return;
- }
-
- if (!cpu_has(c, X86_FEATURE_MCE)) {
- printk(XENLOG_INFO "CPU%i: No machine check support available\n",
- smp_processor_id());
- return;
+ memset(&mcg, 0, sizeof (mcg));
+ mcg.common.type = MC_TYPE_GLOBAL;
+ mcg.common.size = sizeof (mcg);
+ if (v != NULL && ((d = v->domain) != NULL)) {
+ mcg.mc_domid = d->domain_id;
+ mcg.mc_vcpuid = v->vcpu_id;
+ } else {
+ mcg.mc_domid = -1;
+ mcg.mc_vcpuid = -1;
}
+ mcg.mc_gstatus = gstatus; /* MCG_STATUS */
- memset(&mc_data, 0, sizeof(struct mc_machine));
-
- switch (c->x86_vendor) {
- case X86_VENDOR_AMD:
- amd_mcheck_init(c);
+ switch (who) {
+ case MCA_MCE_HANDLER:
+ mcg.mc_flags = MC_FLAG_MCE;
+ which = MC_URGENT;
break;
- case X86_VENDOR_INTEL:
-#ifndef CONFIG_X86_64
- if (c->x86==5)
- intel_p5_mcheck_init(c);
- if (c->x86==6)
- intel_p6_mcheck_init(c);
-#endif
- if (c->x86==15)
- intel_p4_mcheck_init(c);
+ case MCA_POLLER:
+ case MCA_RESET:
+ mcg.mc_flags = MC_FLAG_POLLED;
+ which = MC_NONURGENT;
break;
-#ifndef CONFIG_X86_64
- case X86_VENDOR_CENTAUR:
- if (c->x86==5)
- winchip_mcheck_init(c);
+ case MCA_CMCI_HANDLER:
+ mcg.mc_flags = MC_FLAG_CMCI;
+ which = MC_NONURGENT;
break;
-#endif
default:
- break;
+ BUG();
}
-}
+ /* Retrieve detector information */
+ x86_mc_get_cpu_info(cpu_nr, &mcg.mc_socketid,
+ &mcg.mc_coreid, &mcg.mc_core_threadid,
+ &mcg.mc_apicid, NULL, NULL, NULL);
-static void __init mcheck_disable(char *str)
-{
- mce_disabled = 1;
-}
-
-static void __init mcheck_enable(char *str)
-{
- mce_disabled = -1;
-}
+ for (i = 0; i < 32 && i < nr_mce_banks; i++) {
+ struct mcinfo_bank mcb; /* on stack */
-custom_param("nomce", mcheck_disable);
-custom_param("mce", mcheck_enable);
+ /* Skip bank if corresponding bit in bankmask is clear */
+ if (!test_bit(i, bankmask))
+ continue;
+ mca_rdmsrl(MSR_IA32_MC0_STATUS + i * 4, status);
+ if (!(status & MCi_STATUS_VAL))
+ continue; /* this bank has no valid telemetry */
+
+ /* If this is the first bank with valid MCA DATA, then
+ * try to reserve an entry from the urgent/nonurgent queue
+ * depending on whethere we are called from an exception or
+ * a poller; this can fail (for example dom0 may not
+ * yet have consumed past telemetry). */
+ if (errcnt == 0) {
+ if ((mctc = mctelem_reserve(which)) != NULL) {
+ mci = mctelem_dataptr(mctc);
+ mcinfo_clear(mci);
+ }
+ }
-#include <xen/guest_access.h>
-#include <asm/traps.h>
+ memset(&mcb, 0, sizeof (mcb));
+ mcb.common.type = MC_TYPE_BANK;
+ mcb.common.size = sizeof (mcb);
+ mcb.mc_bank = i;
+ mcb.mc_status = status;
-struct mc_info *x86_mcinfo_getptr(void)
-{
- struct mc_info *mi;
- uint32_t entry, next;
-
- for (;;) {
- entry = mc_data.error_idx;
- smp_rmb();
- next = entry + 1;
- if (cmpxchg(&mc_data.error_idx, entry, next) == entry)
- break;
- }
+ /* form a mask of which banks have logged uncorrected errors */
+ if ((status & MCi_STATUS_UC) != 0)
+ uc |= (1 << i);
- mi = &(mc_data.mc[(entry % MAX_MCINFO)]);
- BUG_ON(mc_data.error_idx < mc_data.fetch_idx);
+ /* likewise for those with processor context corrupt */
+ if ((status & MCi_STATUS_PCC) != 0)
+ pcc |= (1 << i);
- return mi;
-}
+ addr = misc = 0;
-static int x86_mcinfo_matches_guest(const struct mc_info *mi,
- const struct domain *d, const struct vcpu *v)
-{
- struct mcinfo_common *mic;
- struct mcinfo_global *mig;
+ if (status & MCi_STATUS_ADDRV) {
+ mca_rdmsrl(MSR_IA32_MC0_ADDR + 4 * i, addr);
+ d = maddr_get_owner(addr);
+ if (d != NULL && (who == MCA_POLLER ||
+ who == MCA_CMCI_HANDLER))
+ mcb.mc_domid = d->domain_id;
+ }
- x86_mcinfo_lookup(mic, mi, MC_TYPE_GLOBAL);
- mig = (struct mcinfo_global *)mic;
- if (mig == NULL)
- return 0;
+ if (status & MCi_STATUS_MISCV)
+ mca_rdmsrl(MSR_IA32_MC0_MISC + 4 * i, misc);
- if (d->domain_id != mig->mc_domid)
- return 0;
+ mcb.mc_addr = addr;
+ mcb.mc_misc = misc;
- if (v->vcpu_id != mig->mc_vcpuid)
- return 0;
+ if (who == MCA_CMCI_HANDLER) {
+ mca_rdmsrl(MSR_IA32_MC0_CTL2 + i, mcb.mc_ctrl2);
+ rdtscll(mcb.mc_tsc);
+ }
- return 1;
-}
+ /* Increment the error count; if this is the first bank
+ * with a valid error then add the global info to the mcinfo. */
+ if (errcnt++ == 0 && mci != NULL)
+ x86_mcinfo_add(mci, &mcg);
+ /* Add the bank data */
+ if (mci != NULL)
+ x86_mcinfo_add(mci, &mcb);
-#define x86_mcinfo_mcdata(idx) (mc_data.mc[(idx % MAX_MCINFO)])
+ if (mc_callback_bank_extended && cbret != MCA_EXTINFO_GLOBAL) {
+ cbret = mc_callback_bank_extended(mci, i, status);
+ }
-static struct mc_info *x86_mcinfo_getfetchptr(uint32_t *fetch_idx,
- const struct domain *d, const struct vcpu *v)
-{
- struct mc_info *mi;
+ /* Clear status */
+ mca_wrmsrl(MSR_IA32_MC0_STATUS + 4 * i, 0x0ULL);
+ wmb();
+ }
- /* This function is called from the fetch hypercall with
- * the mc_lock spinlock held. Thus, no need for locking here.
- */
- mi = &(x86_mcinfo_mcdata(mc_data.fetch_idx));
- if ((d != dom0) && !x86_mcinfo_matches_guest(mi, d, v)) {
- /* Bogus domU command detected. */
- *fetch_idx = 0;
- return NULL;
+ if (mci != NULL && errcnt > 0) {
+ x86_mcinfo_lookup(mic, mci, MC_TYPE_GLOBAL);
+ mig = (struct mcinfo_global *)mic;
+ if (pcc)
+ mcg.mc_flags |= MC_FLAG_UNCORRECTABLE;
+ else if (uc)
+ mcg.mc_flags |= MC_FLAG_RECOVERABLE;
+ else
+ mcg.mc_flags |= MC_FLAG_CORRECTABLE;
}
- *fetch_idx = mc_data.fetch_idx;
- mc_data.fetch_idx++;
- BUG_ON(mc_data.fetch_idx > mc_data.error_idx);
- return mi;
+ if (sp) {
+ sp->errcnt = errcnt;
+ sp->ripv = (gstatus & MCG_STATUS_RIPV) != 0;
+ sp->eipv = (gstatus & MCG_STATUS_EIPV) != 0;
+ sp->uc = uc;
+ sp->pcc = pcc;
+ }
+
+ return mci != NULL ? mctc : NULL; /* may be NULL */
}
+#define DOM_NORMAL 0
+#define DOM0_TRAP 1
+#define DOMU_TRAP 2
+#define DOMU_KILLED 4
-static void x86_mcinfo_marknotified(struct xen_mc_notifydomain *mc_notifydomain)
+/* Shared #MC handler. */
+void mcheck_cmn_handler(struct cpu_user_regs *regs, long error_code,
+ cpu_banks_t bankmask)
{
- struct mc_machine_notify *mn;
- struct mcinfo_common *mic = NULL;
- struct mcinfo_global *mig;
- struct domain *d;
- int i;
+ int xen_state_lost, dom0_state_lost, domU_state_lost;
+ struct vcpu *v = current;
+ struct domain *curdom = v->domain;
+ domid_t domid = curdom->domain_id;
+ int ctx_xen, ctx_dom0, ctx_domU;
+ uint32_t dom_state = DOM_NORMAL;
+ mctelem_cookie_t mctc = NULL;
+ struct mca_summary bs;
+ struct mc_info *mci = NULL;
+ int irqlocked = 0;
+ uint64_t gstatus;
+ int ripv;
+
+ /* This handler runs as interrupt gate. So IPIs from the
+ * polling service routine are defered until we're finished.
+ */
+
+ /* Disable interrupts for the _vcpu_. It may not re-scheduled to
+ * another physical CPU. */
+ vcpu_schedule_lock_irq(v);
+ irqlocked = 1;
+
+ /* Read global status; if it does not indicate machine check
+ * in progress then bail as long as we have a valid ip to return to. */
+ mca_rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
+ ripv = ((gstatus & MCG_STATUS_RIPV) != 0);
+ if (!(gstatus & MCG_STATUS_MCIP) && ripv) {
+ add_taint(TAINT_MACHINE_CHECK); /* questionable */
+ vcpu_schedule_unlock_irq(v);
+ irqlocked = 0;
+ goto cmn_handler_done;
+ }
- /* This function is called from the notifier hypercall with
- * the mc_notify_lock spinlock held. Thus, no need for locking here.
+ /* Go and grab error telemetry. We must choose whether to commit
+ * for logging or dismiss the cookie that is returned, and must not
+ * reference the cookie after that action.
*/
+ mctc = mcheck_mca_logout(MCA_MCE_HANDLER, bankmask, &bs);
+ if (mctc != NULL)
+ mci = (struct mc_info *)mctelem_dataptr(mctc);
+
+ /* Clear MCIP or another #MC will enter shutdown state */
+ gstatus &= ~MCG_STATUS_MCIP;
+ mca_wrmsrl(MSR_IA32_MCG_STATUS, gstatus);
+ wmb();
+
+ /* If no valid errors and our stack is intact, we're done */
+ if (ripv && bs.errcnt == 0) {
+ vcpu_schedule_unlock_irq(v);
+ irqlocked = 0;
+ goto cmn_handler_done;
+ }
- /* First invalidate entries for guests that disappeared after
- * notification (e.g. shutdown/crash). This step prevents the
- * notification array from filling up with stalling/leaking entries.
+ if (bs.uc || bs.pcc)
+ add_taint(TAINT_MACHINE_CHECK);
+
+ /* Machine check exceptions will usually be for UC and/or PCC errors,
+ * but it is possible to configure machine check for some classes
+ * of corrected error.
+ *
+ * UC errors could compromise any domain or the hypervisor
+ * itself - for example a cache writeback of modified data that
+ * turned out to be bad could be for data belonging to anyone, not
+ * just the current domain. In the absence of known data poisoning
+ * to prevent consumption of such bad data in the system we regard
+ * all UC errors as terminal. It may be possible to attempt some
+ * heuristics based on the address affected, which guests have
+ * mappings to that mfn etc.
+ *
+ * PCC errors apply to the current context.
+ *
+ * If MCG_STATUS indicates !RIPV then even a #MC that is not UC
+ * and not PCC is terminal - the return instruction pointer
+ * pushed onto the stack is bogus. If the interrupt context is
+ * the hypervisor or dom0 the game is over, otherwise we can
+ * limit the impact to a single domU but only if we trampoline
+ * somewhere safely - we can't return and unwind the stack.
+ * Since there is no trampoline in place we will treat !RIPV
+ * as terminal for any context.
*/
- for (i = mc_data.notifyconsumer_idx; i < mc_data.notifyproducer_idx; i++) {
- mn = &(mc_data.notify[(i % MAX_MCINFO)]);
- x86_mcinfo_lookup(mic, &mn->mc, MC_TYPE_GLOBAL);
- BUG_ON(mic == NULL);
- mig = (struct mcinfo_global *)mic;
- d = get_domain_by_id(mig->mc_domid);
- if (d == NULL) {
- /* Domain does not exist. */
- mn->valid = 0;
+ ctx_xen = SEG_PL(regs->cs) == 0;
+ ctx_dom0 = !ctx_xen && (domid == dom0->domain_id);
+ ctx_domU = !ctx_xen && !ctx_dom0;
+
+ xen_state_lost = bs.uc != 0 || (ctx_xen && (bs.pcc || !ripv)) ||
+ !ripv;
+ dom0_state_lost = bs.uc != 0 || (ctx_dom0 && (bs.pcc || !ripv));
+ domU_state_lost = bs.uc != 0 || (ctx_domU && (bs.pcc || !ripv));
+
+ if (xen_state_lost) {
+ /* Now we are going to panic anyway. Allow interrupts, so that
+ * printk on serial console can work. */
+ vcpu_schedule_unlock_irq(v);
+ irqlocked = 0;
+
+ printk("Terminal machine check exception occured in "
+ "hypervisor context.\n");
+
+ /* If MCG_STATUS_EIPV indicates, the IP on the stack is related
+ * to the error then it makes sense to print a stack trace.
+ * That can be useful for more detailed error analysis and/or
+ * error case studies to figure out, if we can clear
+ * xen_impacted and kill a DomU instead
+ * (i.e. if a guest only control structure is affected, but then
+ * we must ensure the bad pages are not re-used again).
+ */
+ if (bs.eipv & MCG_STATUS_EIPV) {
+ printk("MCE: Instruction Pointer is related to the "
+ "error, therefore print the execution state.\n");
+ show_execution_state(regs);
+ }
+
+ /* Commit the telemetry so that panic flow can find it. */
+ if (mctc != NULL) {
+ x86_mcinfo_dump(mci);
+ mctelem_commit(mctc);
}
- if ((!mn->valid) && (i == mc_data.notifyconsumer_idx))
- mc_data.notifyconsumer_idx++;
+ mc_panic("Hypervisor state lost due to machine check "
+ "exception.\n");
+ /*NOTREACHED*/
}
- /* Now put in the error telemetry. Since all error data fetchable
- * by domUs are uncorrectable errors, they are very important.
- * So we dump them before overriding them. When a guest takes that long,
- * then we can assume something bad already happened (crash, hang, etc.)
+ /*
+ * Xen hypervisor state is intact. If dom0 state is lost then
+ * give it a chance to decide what to do if it has registered
+ * a handler for this event, otherwise panic.
+ *
+ * XXFM Could add some Solaris dom0 contract kill here?
*/
- mn = &(mc_data.notify[(mc_data.notifyproducer_idx % MAX_MCINFO)]);
+ if (dom0_state_lost) {
+ if (guest_has_trap_callback(dom0, 0, TRAP_machine_check)) {
+ dom_state = DOM0_TRAP;
+ send_guest_trap(dom0, 0, TRAP_machine_check);
+ /* XXFM case of return with !ripv ??? */
+ } else {
+ /* Commit telemetry for panic flow. */
+ if (mctc != NULL) {
+ x86_mcinfo_dump(mci);
+ mctelem_commit(mctc);
+ }
+ mc_panic("Dom0 state lost due to machine check "
+ "exception\n");
+ /*NOTREACHED*/
+ }
+ }
+
+ /*
+ * If a domU has lost state then send it a trap if it has registered
+ * a handler, otherwise crash the domain.
+ * XXFM Revisit this functionality.
+ */
+ if (domU_state_lost) {
+ if (guest_has_trap_callback(v->domain, v->vcpu_id,
+ TRAP_machine_check)) {
+ dom_state = DOMU_TRAP;
+ send_guest_trap(curdom, v->vcpu_id,
+ TRAP_machine_check);
+ } else {
+ dom_state = DOMU_KILLED;
+ /* Enable interrupts. This basically results in
+ * calling sti on the *physical* cpu. But after
+ * domain_crash() the vcpu pointer is invalid.
+ * Therefore, we must unlock the irqs before killing
+ * it. */
+ vcpu_schedule_unlock_irq(v);
+ irqlocked = 0;
+
+ /* DomU is impacted. Kill it and continue. */
+ domain_crash(curdom);
+ }
+ }
- if (mn->valid) {
- struct mcinfo_common *mic = NULL;
- struct mcinfo_global *mig;
+ switch (dom_state) {
+ case DOM0_TRAP:
+ case DOMU_TRAP:
+ /* Enable interrupts. */
+ vcpu_schedule_unlock_irq(v);
+ irqlocked = 0;
- /* To not loose the information, we dump it. */
- x86_mcinfo_lookup(mic, &mn->mc, MC_TYPE_GLOBAL);
- BUG_ON(mic == NULL);
- mig = (struct mcinfo_global *)mic;
- printk(XENLOG_WARNING "Domain ID %u was notified by Dom0 to "
- "fetch machine check error telemetry. But Domain ID "
- "did not do that in time.\n",
- mig->mc_domid);
- x86_mcinfo_dump(&mn->mc);
+ /* guest softirqs and event callbacks are scheduled
+ * immediately after this handler exits. */
+ break;
+ case DOMU_KILLED:
+ /* Nothing to do here. */
+ break;
+
+ case DOM_NORMAL:
+ vcpu_schedule_unlock_irq(v);
+ irqlocked = 0;
+ break;
}
- memcpy(&mn->mc, &(x86_mcinfo_mcdata(mc_notifydomain->fetch_idx)),
- sizeof(struct mc_info));
- mn->fetch_idx = mc_notifydomain->fetch_idx;
- mn->valid = 1;
+cmn_handler_done:
+ BUG_ON(irqlocked);
+ BUG_ON(!ripv);
+
+ if (bs.errcnt) {
+ /* Not panicing, so forward telemetry to dom0 now if it
+ * is interested. */
+ if (guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
+ if (mctc != NULL)
+ mctelem_commit(mctc);
+ send_guest_global_virq(dom0, VIRQ_MCA);
+ } else {
+ x86_mcinfo_dump(mci);
+ if (mctc != NULL)
+ mctelem_dismiss(mctc);
+ }
+ } else if (mctc != NULL) {
+ mctelem_dismiss(mctc);
+ }
+}
- mc_data.notifyproducer_idx++;
+static int amd_mcheck_init(struct cpuinfo_x86 *ci)
+{
+ int rc = 0;
- /* By design there can never be more notifies than machine check errors.
- * If that ever happens, then we hit a bug. */
- BUG_ON(mc_data.notifyproducer_idx > mc_data.fetch_idx);
- BUG_ON(mc_data.notifyconsumer_idx > mc_data.notifyproducer_idx);
+ switch (ci->x86) {
+ case 6:
+ rc = amd_k7_mcheck_init(ci);
+ break;
+
+ case 0xf:
+ rc = amd_k8_mcheck_init(ci);
+ break;
+
+ case 0x10:
+ rc = amd_f10_mcheck_init(ci);
+ break;
+
+ default:
+ /* Assume that machine check support is available.
+ * The minimum provided support is at least the K8. */
+ rc = amd_k8_mcheck_init(ci);
+ }
+
+ return rc;
}
-static struct mc_info *x86_mcinfo_getnotifiedptr(uint32_t *fetch_idx,
- const struct domain *d, const struct vcpu *v)
+/*check the existence of Machine Check*/
+int mce_available(struct cpuinfo_x86 *c)
{
- struct mc_machine_notify *mn = NULL;
- uint32_t i;
- int found;
+ return cpu_has(c, X86_FEATURE_MCE) && cpu_has(c, X86_FEATURE_MCA);
+}
- /* This function is called from the fetch hypercall with
- * the mc_notify_lock spinlock held. Thus, no need for locking here.
- */
+/*
+ * Check if bank 0 is usable for MCE. It isn't for AMD K7,
+ * and Intel P6 family before model 0x1a.
+ */
+int mce_firstbank(struct cpuinfo_x86 *c)
+{
+ if (c->x86 == 6) {
+ if (c->x86_vendor == X86_VENDOR_AMD)
+ return 1;
- /* The notifier data is filled in the order guests get notified, but
- * guests may fetch them in a different order. That's why we need
- * the game with valid/invalid entries. */
- found = 0;
- for (i = mc_data.notifyconsumer_idx; i < mc_data.notifyproducer_idx; i++) {
- mn = &(mc_data.notify[(i % MAX_MCINFO)]);
- if (!mn->valid) {
- if (i == mc_data.notifyconsumer_idx)
- mc_data.notifyconsumer_idx++;
- continue;
- }
- if (x86_mcinfo_matches_guest(&mn->mc, d, v)) {
- found = 1;
+ if (c->x86_vendor == X86_VENDOR_INTEL && c->x86_model < 0x1a)
+ return 1;
+ }
+
+ return 0;
+}
+
+/* This has to be run for each processor */
+void mcheck_init(struct cpuinfo_x86 *c)
+{
+ int inited = 0, i;
+
+ if (mce_disabled == 1) {
+ printk(XENLOG_INFO "MCE support disabled by bootparam\n");
+ return;
+ }
+
+ for (i = 0; i < MAX_NR_BANKS; i++)
+ set_bit(i,mca_allbanks);
+
+ /* Enforce at least MCE support in CPUID information. Individual
+ * families may also need to enforce a check for MCA support. */
+ if (!cpu_has(c, X86_FEATURE_MCE)) {
+ printk(XENLOG_INFO "CPU%i: No machine check support available\n",
+ smp_processor_id());
+ return;
+ }
+
+ intpose_init();
+ mctelem_init(sizeof (struct mc_info));
+
+ switch (c->x86_vendor) {
+ case X86_VENDOR_AMD:
+ inited = amd_mcheck_init(c);
+ break;
+
+ case X86_VENDOR_INTEL:
+ switch (c->x86) {
+ case 5:
+#ifndef CONFIG_X86_64
+ inited = intel_p5_mcheck_init(c);
+#endif
+ break;
+
+ case 6:
+ case 15:
+ inited = intel_mcheck_init(c);
break;
}
- }
+ break;
+
+#ifndef CONFIG_X86_64
+ case X86_VENDOR_CENTAUR:
+ if (c->x86==5) {
+ inited = winchip_mcheck_init(c);
+ }
+ break;
+#endif
- if (!found) {
- /* This domain has never been notified. This must be
- * a bogus domU command. */
- *fetch_idx = 0;
- return NULL;
+ default:
+ break;
}
- BUG_ON(mn == NULL);
- *fetch_idx = mn->fetch_idx;
- mn->valid = 0;
+ if (!inited)
+ printk(XENLOG_INFO "CPU%i: No machine check initialization\n",
+ smp_processor_id());
+}
+
+
+static void __init mcheck_disable(char *str)
+{
+ mce_disabled = 1;
+}
- BUG_ON(mc_data.notifyconsumer_idx > mc_data.notifyproducer_idx);
- return &mn->mc;
+static void __init mcheck_enable(char *str)
+{
+ mce_disabled = -1;
}
+custom_param("nomce", mcheck_disable);
+custom_param("mce", mcheck_enable);
-void x86_mcinfo_clear(struct mc_info *mi)
+static void mcinfo_clear(struct mc_info *mi)
{
memset(mi, 0, sizeof(struct mc_info));
x86_mcinfo_nentries(mi) = 0;
}
-
int x86_mcinfo_add(struct mc_info *mi, void *mcinfo)
{
int i;
end2 = (unsigned long)((uint8_t *)mic_index + mic->size);
if (end1 < end2)
- return -ENOSPC; /* No space. Can't add entry. */
+ return x86_mcerr("mcinfo_add: no more sparc", -ENOSPC);
/* there's enough space. add entry. */
memcpy(mic_index, mic, mic->size);
return 0;
}
-
/* Dump machine check information in a format,
* mcelog can parse. This is used only when
* Dom0 does not take the notification. */
if (mic == NULL)
return;
mc_global = (struct mcinfo_global *)mic;
- if (mc_global->mc_flags & MC_FLAG_UNCORRECTABLE) {
+ if (mc_global->mc_flags & MC_FLAG_MCE) {
printk(XENLOG_WARNING
"CPU%d: Machine Check Exception: %16"PRIx64"\n",
mc_global->mc_coreid, mc_global->mc_gstatus);
if (mic == NULL)
return;
if (mic->type != MC_TYPE_BANK)
- continue;
+ goto next;
mc_bank = (struct mcinfo_bank *)mic;
-
+
printk(XENLOG_WARNING "Bank %d: %16"PRIx64,
mc_bank->mc_bank,
mc_bank->mc_status);
printk(" at %16"PRIx64, mc_bank->mc_addr);
printk("\n");
+next:
mic = x86_mcinfo_next(mic); /* next entry */
if ((mic == NULL) || (mic->size == 0))
break;
} while (1);
}
+static void do_mc_get_cpu_info(void *v)
+{
+ int cpu = smp_processor_id();
+ int cindex, cpn;
+ struct cpuinfo_x86 *c;
+ xen_mc_logical_cpu_t *log_cpus, *xcp;
+ uint32_t junk, ebx;
+
+ log_cpus = v;
+ c = &cpu_data[cpu];
+ cindex = 0;
+ cpn = cpu - 1;
+
+ /*
+ * Deal with sparse masks, condensed into a contig array.
+ */
+ while (cpn >= 0) {
+ if (cpu_isset(cpn, cpu_online_map))
+ cindex++;
+ cpn--;
+ }
+
+ xcp = &log_cpus[cindex];
+ c = &cpu_data[cpu];
+ xcp->mc_cpunr = cpu;
+ x86_mc_get_cpu_info(cpu, &xcp->mc_chipid,
+ &xcp->mc_coreid, &xcp->mc_threadid,
+ &xcp->mc_apicid, &xcp->mc_ncores,
+ &xcp->mc_ncores_active, &xcp->mc_nthreads);
+ xcp->mc_cpuid_level = c->cpuid_level;
+ xcp->mc_family = c->x86;
+ xcp->mc_vendor = c->x86_vendor;
+ xcp->mc_model = c->x86_model;
+ xcp->mc_step = c->x86_mask;
+ xcp->mc_cache_size = c->x86_cache_size;
+ xcp->mc_cache_alignment = c->x86_cache_alignment;
+ memcpy(xcp->mc_vendorid, c->x86_vendor_id, sizeof xcp->mc_vendorid);
+ memcpy(xcp->mc_brandid, c->x86_model_id, sizeof xcp->mc_brandid);
+ memcpy(xcp->mc_cpu_caps, c->x86_capability, sizeof xcp->mc_cpu_caps);
+
+ /*
+ * This part needs to run on the CPU itself.
+ */
+ xcp->mc_nmsrvals = __MC_NMSRS;
+ xcp->mc_msrvalues[0].reg = MSR_IA32_MCG_CAP;
+ rdmsrl(MSR_IA32_MCG_CAP, xcp->mc_msrvalues[0].value);
+
+ if (c->cpuid_level >= 1) {
+ cpuid(1, &junk, &ebx, &junk, &junk);
+ xcp->mc_clusterid = (ebx >> 24) & 0xff;
+ } else
+ xcp->mc_clusterid = hard_smp_processor_id();
+}
+
+
+void x86_mc_get_cpu_info(unsigned cpu, uint32_t *chipid, uint16_t *coreid,
+ uint16_t *threadid, uint32_t *apicid,
+ unsigned *ncores, unsigned *ncores_active,
+ unsigned *nthreads)
+{
+ struct cpuinfo_x86 *c;
+
+ *apicid = cpu_physical_id(cpu);
+ c = &cpu_data[cpu];
+ if (c->apicid == BAD_APICID) {
+ *chipid = cpu;
+ *coreid = 0;
+ *threadid = 0;
+ if (ncores != NULL)
+ *ncores = 1;
+ if (ncores_active != NULL)
+ *ncores_active = 1;
+ if (nthreads != NULL)
+ *nthreads = 1;
+ } else {
+ *chipid = phys_proc_id[cpu];
+ if (c->x86_max_cores > 1)
+ *coreid = cpu_core_id[cpu];
+ else
+ *coreid = 0;
+ *threadid = c->apicid & ((1 << (c->x86_num_siblings - 1)) - 1);
+ if (ncores != NULL)
+ *ncores = c->x86_max_cores;
+ if (ncores_active != NULL)
+ *ncores_active = c->booted_cores;
+ if (nthreads != NULL)
+ *nthreads = c->x86_num_siblings;
+ }
+}
+
+#define INTPOSE_NENT 50
+
+static struct intpose_ent {
+ unsigned int cpu_nr;
+ uint64_t msr;
+ uint64_t val;
+} intpose_arr[INTPOSE_NENT];
+
+static void intpose_init(void)
+{
+ static int done;
+ int i;
+
+ if (done++ > 0)
+ return;
+
+ for (i = 0; i < INTPOSE_NENT; i++) {
+ intpose_arr[i].cpu_nr = -1;
+ }
+
+}
+
+struct intpose_ent *intpose_lookup(unsigned int cpu_nr, uint64_t msr,
+ uint64_t *valp)
+{
+ int i;
+
+ for (i = 0; i < INTPOSE_NENT; i++) {
+ if (intpose_arr[i].cpu_nr == cpu_nr &&
+ intpose_arr[i].msr == msr) {
+ if (valp != NULL)
+ *valp = intpose_arr[i].val;
+ return &intpose_arr[i];
+ }
+ }
+
+ return NULL;
+}
+
+static void intpose_add(unsigned int cpu_nr, uint64_t msr, uint64_t val)
+{
+ struct intpose_ent *ent;
+ int i;
+
+ if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) {
+ ent->val = val;
+ return;
+ }
+
+ for (i = 0, ent = &intpose_arr[0]; i < INTPOSE_NENT; i++, ent++) {
+ if (ent->cpu_nr == -1) {
+ ent->cpu_nr = cpu_nr;
+ ent->msr = msr;
+ ent->val = val;
+ return;
+ }
+ }
+
+ printk("intpose_add: interpose array full - request dropped\n");
+}
+
+void intpose_inval(unsigned int cpu_nr, uint64_t msr)
+{
+ struct intpose_ent *ent;
+
+ if ((ent = intpose_lookup(cpu_nr, msr, NULL)) != NULL) {
+ ent->cpu_nr = -1;
+ }
+}
+
+#define IS_MCA_BANKREG(r) \
+ ((r) >= MSR_IA32_MC0_CTL && \
+ (r) <= MSR_IA32_MC0_MISC + (nr_mce_banks - 1) * 4 && \
+ ((r) - MSR_IA32_MC0_CTL) % 4 != 0) /* excludes MCi_CTL */
+
+static int x86_mc_msrinject_verify(struct xen_mc_msrinject *mci)
+{
+ struct cpuinfo_x86 *c;
+ int i, errs = 0;
+
+ c = &cpu_data[smp_processor_id()];
+
+ for (i = 0; i < mci->mcinj_count; i++) {
+ uint64_t reg = mci->mcinj_msr[i].reg;
+ const char *reason = NULL;
+
+ if (IS_MCA_BANKREG(reg)) {
+ if (c->x86_vendor == X86_VENDOR_AMD) {
+ /* On AMD we can set MCi_STATUS_WREN in the
+ * HWCR MSR to allow non-zero writes to banks
+ * MSRs not to #GP. The injector in dom0
+ * should set that bit, but we detect when it
+ * is necessary and set it as a courtesy to
+ * avoid #GP in the hypervisor. */
+ mci->mcinj_flags |=
+ _MC_MSRINJ_F_REQ_HWCR_WREN;
+ continue;
+ } else {
+ /* No alternative but to interpose, so require
+ * that the injector specified as such. */
+ if (!(mci->mcinj_flags &
+ MC_MSRINJ_F_INTERPOSE)) {
+ reason = "must specify interposition";
+ }
+ }
+ } else {
+ switch (reg) {
+ /* MSRs acceptable on all x86 cpus */
+ case MSR_IA32_MCG_STATUS:
+ break;
+
+ /* MSRs that the HV will take care of */
+ case MSR_K8_HWCR:
+ if (c->x86_vendor == X86_VENDOR_AMD)
+ reason = "HV will operate HWCR";
+ else
+ reason ="only supported on AMD";
+ break;
+
+ default:
+ reason = "not a recognized MCA MSR";
+ break;
+ }
+ }
+
+ if (reason != NULL) {
+ printk("HV MSR INJECT ERROR: MSR 0x%llx %s\n",
+ (unsigned long long)mci->mcinj_msr[i].reg, reason);
+ errs++;
+ }
+ }
+
+ return !errs;
+}
+
+static uint64_t x86_mc_hwcr_wren(void)
+{
+ uint64_t old;
+
+ rdmsrl(MSR_K8_HWCR, old);
+ if (!(old & K8_HWCR_MCi_STATUS_WREN)) {
+ uint64_t new = old | K8_HWCR_MCi_STATUS_WREN;
+ wrmsrl(MSR_K8_HWCR, new);
+ }
+
+ return old;
+}
+
+static void x86_mc_hwcr_wren_restore(uint64_t hwcr)
+{
+ if (!(hwcr & K8_HWCR_MCi_STATUS_WREN))
+ wrmsrl(MSR_K8_HWCR, hwcr);
+}
+
+static void x86_mc_msrinject(void *data)
+{
+ struct xen_mc_msrinject *mci = data;
+ struct mcinfo_msr *msr;
+ struct cpuinfo_x86 *c;
+ uint64_t hwcr = 0;
+ int intpose;
+ int i;
+
+ c = &cpu_data[smp_processor_id()];
+
+ if (mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN)
+ hwcr = x86_mc_hwcr_wren();
+
+ intpose = (mci->mcinj_flags & MC_MSRINJ_F_INTERPOSE) != 0;
+
+ for (i = 0, msr = &mci->mcinj_msr[0];
+ i < mci->mcinj_count; i++, msr++) {
+ printk("HV MSR INJECT (%s) target %u actual %u MSR 0x%llx "
+ "<-- 0x%llx\n",
+ intpose ? "interpose" : "hardware",
+ mci->mcinj_cpunr, smp_processor_id(),
+ (unsigned long long)msr->reg,
+ (unsigned long long)msr->value);
+
+ if (intpose)
+ intpose_add(mci->mcinj_cpunr, msr->reg, msr->value);
+ else
+ wrmsrl(msr->reg, msr->value);
+ }
+
+ if (mci->mcinj_flags & _MC_MSRINJ_F_REQ_HWCR_WREN)
+ x86_mc_hwcr_wren_restore(hwcr);
+}
+
+/*ARGSUSED*/
+static void x86_mc_mceinject(void *data)
+{
+ printk("Simulating #MC on cpu %d\n", smp_processor_id());
+ __asm__ __volatile__("int $0x12");
+}
+
+#if BITS_PER_LONG == 64
+
+#define ID2COOKIE(id) ((mctelem_cookie_t)(id))
+#define COOKIE2ID(c) ((uint64_t)(c))
+
+#elif BITS_PER_LONG == 32
+
+#define ID2COOKIE(id) ((mctelem_cookie_t)(uint32_t)((id) & 0xffffffffU))
+#define COOKIE2ID(c) ((uint64_t)(uint32_t)(c))
+
+#elif defined(BITS_PER_LONG)
+#error BITS_PER_LONG has unexpected value
+#else
+#error BITS_PER_LONG definition absent
+#endif
/* Machine Check Architecture Hypercall */
long do_mca(XEN_GUEST_HANDLE(xen_mc_t) u_xen_mc)
long ret = 0;
struct xen_mc curop, *op = &curop;
struct vcpu *v = current;
- struct domain *domU;
struct xen_mc_fetch *mc_fetch;
- struct xen_mc_notifydomain *mc_notifydomain;
- struct mc_info *mi;
- uint32_t flags;
- uint32_t fetch_idx;
- uint16_t vcpuid;
- /* Use a different lock for the notify hypercall in order to allow
- * a DomU to fetch mc data while Dom0 notifies another DomU. */
- static DEFINE_SPINLOCK(mc_lock);
- static DEFINE_SPINLOCK(mc_notify_lock);
+ struct xen_mc_physcpuinfo *mc_physcpuinfo;
+ uint32_t flags, cmdflags;
+ int nlcpu;
+ xen_mc_logical_cpu_t *log_cpus = NULL;
+ mctelem_cookie_t mctc;
+ mctelem_class_t which;
+ unsigned int target;
+ struct xen_mc_msrinject *mc_msrinject;
+ struct xen_mc_mceinject *mc_mceinject;
if ( copy_from_guest(op, u_xen_mc, 1) )
- return -EFAULT;
+ return x86_mcerr("do_mca: failed copyin of xen_mc_t", -EFAULT);
if ( op->interface_version != XEN_MCA_INTERFACE_VERSION )
- return -EACCES;
+ return x86_mcerr("do_mca: interface version mismatch", -EACCES);
- switch ( op->cmd ) {
+ switch (op->cmd) {
case XEN_MC_fetch:
- /* This hypercall is for any domain */
mc_fetch = &op->u.mc_fetch;
+ cmdflags = mc_fetch->flags;
+
+ /* This hypercall is for Dom0 only */
+ if (!IS_PRIV(v->domain) )
+ return x86_mcerr(NULL, -EPERM);
- switch (mc_fetch->flags) {
- case XEN_MC_CORRECTABLE:
- /* But polling mode is Dom0 only, because
- * correctable errors are reported to Dom0 only */
- if ( !IS_PRIV(v->domain) )
- return -EPERM;
+ switch (cmdflags & (XEN_MC_NONURGENT | XEN_MC_URGENT)) {
+ case XEN_MC_NONURGENT:
+ which = MC_NONURGENT;
break;
- case XEN_MC_TRAP:
+ case XEN_MC_URGENT:
+ which = MC_URGENT;
break;
+
default:
- return -EFAULT;
+ return x86_mcerr("do_mca fetch: bad cmdflags", -EINVAL);
}
flags = XEN_MC_OK;
- spin_lock(&mc_lock);
- if ( IS_PRIV(v->domain) ) {
- /* this must be Dom0. So a notify hypercall
- * can't have happened before. */
- mi = x86_mcinfo_getfetchptr(&fetch_idx, dom0, v);
+ if (cmdflags & XEN_MC_ACK) {
+ mctelem_cookie_t cookie = ID2COOKIE(mc_fetch->fetch_id);
+ mctelem_ack(which, cookie);
} else {
- /* Hypercall comes from an unprivileged domain */
- domU = v->domain;
- if (guest_has_trap_callback(dom0, 0, TRAP_machine_check)) {
- /* Dom0 must have notified this DomU before
- * via the notify hypercall. */
- mi = x86_mcinfo_getnotifiedptr(&fetch_idx, domU, v);
+ if (guest_handle_is_null(mc_fetch->data))
+ return x86_mcerr("do_mca fetch: guest buffer "
+ "invalid", -EINVAL);
+
+ if ((mctc = mctelem_consume_oldest_begin(which))) {
+ struct mc_info *mcip = mctelem_dataptr(mctc);
+ if (copy_to_guest(mc_fetch->data, mcip, 1)) {
+ ret = -EFAULT;
+ flags |= XEN_MC_FETCHFAILED;
+ mc_fetch->fetch_id = 0;
+ } else {
+ mc_fetch->fetch_id = COOKIE2ID(mctc);
+ }
+ mctelem_consume_oldest_end(mctc);
} else {
- /* Xen notified the DomU. */
- mi = x86_mcinfo_getfetchptr(&fetch_idx, domU, v);
+ /* There is no data */
+ flags |= XEN_MC_NODATA;
+ mc_fetch->fetch_id = 0;
}
+
+ mc_fetch->flags = flags;
+ if (copy_to_guest(u_xen_mc, op, 1) != 0)
+ ret = -EFAULT;
}
- if (mi) {
- memcpy(&mc_fetch->mc_info, mi,
- sizeof(struct mc_info));
- } else {
- /* There is no data for a bogus DomU command. */
- flags |= XEN_MC_NODATA;
- memset(&mc_fetch->mc_info, 0, sizeof(struct mc_info));
+ break;
+
+ case XEN_MC_notifydomain:
+ return x86_mcerr("do_mca notify unsupported", -EINVAL);
+
+ case XEN_MC_physcpuinfo:
+ if ( !IS_PRIV(v->domain) )
+ return x86_mcerr("do_mca cpuinfo", -EPERM);
+
+ mc_physcpuinfo = &op->u.mc_physcpuinfo;
+ nlcpu = num_online_cpus();
+
+ if (!guest_handle_is_null(mc_physcpuinfo->info)) {
+ if (mc_physcpuinfo->ncpus <= 0)
+ return x86_mcerr("do_mca cpuinfo: ncpus <= 0",
+ -EINVAL);
+ nlcpu = min(nlcpu, (int)mc_physcpuinfo->ncpus);
+ log_cpus = xmalloc_array(xen_mc_logical_cpu_t, nlcpu);
+ if (log_cpus == NULL)
+ return x86_mcerr("do_mca cpuinfo", -ENOMEM);
+
+ if (on_each_cpu(do_mc_get_cpu_info, log_cpus,
+ 1, 1) != 0) {
+ xfree(log_cpus);
+ return x86_mcerr("do_mca cpuinfo", -EIO);
+ }
}
- mc_fetch->flags = flags;
- mc_fetch->fetch_idx = fetch_idx;
+ mc_physcpuinfo->ncpus = nlcpu;
- if ( copy_to_guest(u_xen_mc, op, 1) )
- ret = -EFAULT;
+ if (copy_to_guest(u_xen_mc, op, 1)) {
+ if (log_cpus != NULL)
+ xfree(log_cpus);
+ return x86_mcerr("do_mca cpuinfo", -EFAULT);
+ }
- spin_unlock(&mc_lock);
+ if (!guest_handle_is_null(mc_physcpuinfo->info)) {
+ if (copy_to_guest(mc_physcpuinfo->info,
+ log_cpus, nlcpu))
+ ret = -EFAULT;
+ xfree(log_cpus);
+ }
break;
- case XEN_MC_notifydomain:
- /* This hypercall is for Dom0 only */
+ case XEN_MC_msrinject:
if ( !IS_PRIV(v->domain) )
- return -EPERM;
+ return x86_mcerr("do_mca inject", -EPERM);
- spin_lock(&mc_notify_lock);
+ if (nr_mce_banks == 0)
+ return x86_mcerr("do_mca inject", -ENODEV);
- mc_notifydomain = &op->u.mc_notifydomain;
- domU = get_domain_by_id(mc_notifydomain->mc_domid);
- vcpuid = mc_notifydomain->mc_vcpuid;
+ mc_msrinject = &op->u.mc_msrinject;
+ target = mc_msrinject->mcinj_cpunr;
- if ((domU == NULL) || (domU == dom0)) {
- /* It's not possible to notify a non-existent domain
- * or the dom0. */
- spin_unlock(&mc_notify_lock);
- return -EACCES;
- }
+ if (target >= NR_CPUS)
+ return x86_mcerr("do_mca inject: bad target", -EINVAL);
- if (vcpuid >= MAX_VIRT_CPUS) {
- /* It's not possible to notify a vcpu, Xen can't
- * assign to a domain. */
- spin_unlock(&mc_notify_lock);
- return -EACCES;
- }
+ if (!cpu_isset(target, cpu_online_map))
+ return x86_mcerr("do_mca inject: target offline",
+ -EINVAL);
- mc_notifydomain->flags = XEN_MC_OK;
-
- mi = &(x86_mcinfo_mcdata(mc_notifydomain->fetch_idx));
- if (!x86_mcinfo_matches_guest(mi, domU, domU->vcpu[vcpuid])) {
- /* The error telemetry is not for the guest, Dom0
- * wants to notify. */
- mc_notifydomain->flags |= XEN_MC_NOMATCH;
- } else if ( guest_has_trap_callback(domU, vcpuid,
- TRAP_machine_check) )
- {
- /* Send notification */
- if ( send_guest_trap(domU, vcpuid, TRAP_machine_check) )
- mc_notifydomain->flags |= XEN_MC_NOTDELIVERED;
- } else
- mc_notifydomain->flags |= XEN_MC_CANNOTHANDLE;
-
-#ifdef DEBUG
- /* sanity check - these two flags are mutually exclusive */
- if ((flags & XEN_MC_CANNOTHANDLE) && (flags & XEN_MC_NOTDELIVERED))
- BUG();
-#endif
+ if (mc_msrinject->mcinj_count == 0)
+ return 0;
- if ( copy_to_guest(u_xen_mc, op, 1) )
- ret = -EFAULT;
+ if (!x86_mc_msrinject_verify(mc_msrinject))
+ return x86_mcerr("do_mca inject: illegal MSR", -EINVAL);
- if (ret == 0) {
- x86_mcinfo_marknotified(mc_notifydomain);
- }
+ add_taint(TAINT_ERROR_INJECT);
+
+ on_selected_cpus(cpumask_of_cpu(target),
+ x86_mc_msrinject, mc_msrinject, 1, 1);
+
+ break;
+
+ case XEN_MC_mceinject:
+ if ( !IS_PRIV(v->domain) )
+ return x86_mcerr("do_mca #MC", -EPERM);
+
+ if (nr_mce_banks == 0)
+ return x86_mcerr("do_mca #MC", -ENODEV);
+
+ mc_mceinject = &op->u.mc_mceinject;
+ target = mc_mceinject->mceinj_cpunr;
+
+ if (target >= NR_CPUS)
+ return x86_mcerr("do_mca #MC: bad target", -EINVAL);
+
+ if (!cpu_isset(target, cpu_online_map))
+ return x86_mcerr("do_mca #MC: target offline", -EINVAL);
+
+ add_taint(TAINT_ERROR_INJECT);
+
+ on_selected_cpus(cpumask_of_cpu(target),
+ x86_mc_mceinject, mc_mceinject, 1, 1);
- spin_unlock(&mc_notify_lock);
break;
+
+ default:
+ return x86_mcerr("do_mca: bad command", -EINVAL);
}
return ret;
}
+
+void mc_panic(char *s)
+{
+ console_start_sync();
+ printk("Fatal machine check: %s\n", s);
+ printk("\n"
+ "****************************************\n"
+ "\n"
+ " The processor has reported a hardware error which cannot\n"
+ " be recovered from. Xen will now reboot the machine.\n");
+ panic("HARDWARE ERROR");
+}
+#ifndef _MCE_H
+
+#define _MCE_H
+
#include <xen/init.h>
+#include <xen/smp.h>
+#include <asm/types.h>
#include <asm/traps.h>
+#include <asm/atomic.h>
+#include <asm/percpu.h>
+
+#include "x86_mca.h"
+#include "mctelem.h"
/* Init functions */
-void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
-void amd_k7_mcheck_init(struct cpuinfo_x86 *c);
-void amd_k8_mcheck_init(struct cpuinfo_x86 *c);
-void amd_f10_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p4_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c);
-void intel_p6_mcheck_init(struct cpuinfo_x86 *c);
-void winchip_mcheck_init(struct cpuinfo_x86 *c);
-
-/* Function pointer used in the handlers to collect additional information
- * provided by newer CPU families/models without the need to duplicate
- * the whole handler resulting in various handlers each with its own
- * tweaks and bugs */
-extern int (*mc_callback_bank_extended)(struct mc_info *mi,
- uint16_t bank, uint64_t status);
+int amd_k7_mcheck_init(struct cpuinfo_x86 *c);
+int amd_k8_mcheck_init(struct cpuinfo_x86 *c);
+int amd_f10_mcheck_init(struct cpuinfo_x86 *c);
+
+int intel_p5_mcheck_init(struct cpuinfo_x86 *c);
+int winchip_mcheck_init(struct cpuinfo_x86 *c);
+int intel_mcheck_init(struct cpuinfo_x86 *c);
+void intel_mcheck_timer(struct cpuinfo_x86 *c);
+void mce_intel_feature_init(struct cpuinfo_x86 *c);
+void amd_nonfatal_mcheck_init(struct cpuinfo_x86 *c);
+int mce_available(struct cpuinfo_x86 *c);
+int mce_firstbank(struct cpuinfo_x86 *c);
/* Helper functions used for collecting error telemetry */
struct mc_info *x86_mcinfo_getptr(void);
-void x86_mcinfo_clear(struct mc_info *mi);
+void mc_panic(char *s);
+void x86_mc_get_cpu_info(unsigned, uint32_t *, uint16_t *, uint16_t *,
+ uint32_t *, uint32_t *, uint32_t *, uint32_t *);
+
+
+/* Register a handler for machine check exceptions. */
+typedef void (*x86_mce_vector_t)(struct cpu_user_regs *, long);
+extern void x86_mce_vector_register(x86_mce_vector_t);
+
+/* Common generic MCE handler that implementations may nominate
+ * via x86_mce_vector_register. */
+extern void mcheck_cmn_handler(struct cpu_user_regs *, long, cpu_banks_t);
+
+/* Read an MSR, checking for an interposed value first */
+extern struct intpose_ent *intpose_lookup(unsigned int, uint64_t,
+ uint64_t *);
+extern void intpose_inval(unsigned int, uint64_t);
+
+#define mca_rdmsrl(msr, var) do { \
+ if (intpose_lookup(smp_processor_id(), msr, &var) == NULL) \
+ rdmsrl(msr, var); \
+} while (0)
+
+/* Write an MSR, invalidating any interposed value */
+#define mca_wrmsrl(msr, val) do { \
+ intpose_inval(smp_processor_id(), msr); \
+ wrmsrl(msr, val); \
+} while (0)
+
+
+/* Utility function to "logout" all architectural MCA telemetry from the MCA
+ * banks of the current processor. A cookie is returned which may be
+ * uses to reference the data so logged (the cookie can be NULL if
+ * no logout structures were available). The caller can also pass a pointer
+ * to a structure which will be completed with some summary information
+ * of the MCA data observed in the logout operation. */
+
+enum mca_source {
+ MCA_MCE_HANDLER,
+ MCA_POLLER,
+ MCA_CMCI_HANDLER,
+ MCA_RESET
+};
+
+enum mca_extinfo {
+ MCA_EXTINFO_LOCAL,
+ MCA_EXTINFO_GLOBAL,
+ MCA_EXTINFO_IGNORED
+};
+
+struct mca_summary {
+ uint32_t errcnt; /* number of banks with valid errors */
+ int ripv; /* meaningful on #MC */
+ int eipv; /* meaningful on #MC */
+ uint32_t uc; /* bitmask of banks with UC */
+ uint32_t pcc; /* bitmask of banks with PCC */
+};
+
+extern cpu_banks_t mca_allbanks;
+
+extern mctelem_cookie_t mcheck_mca_logout(enum mca_source, cpu_banks_t,
+ struct mca_summary *);
+
+/* Register a callback to be made during bank telemetry logout.
+ * This callback is only available to those machine check handlers
+ * that call to the common mcheck_cmn_handler or who use the common
+ * telemetry logout function mcheck_mca_logout in error polling.
+ *
+ * This can be used to collect additional information (typically non-
+ * architectural) provided by newer CPU families/models without the need
+ * to duplicate the whole handler resulting in various handlers each with
+ * its own tweaks and bugs. The callback receives an struct mc_info pointer
+ * which it can use with x86_mcinfo_add to add additional telemetry,
+ * the current MCA bank number we are reading telemetry from, and the
+ * MCi_STATUS value for that bank.
+ */
+typedef enum mca_extinfo (*x86_mce_callback_t)
+ (struct mc_info *, uint16_t, uint64_t);
+extern void x86_mce_callback_register(x86_mce_callback_t);
+
int x86_mcinfo_add(struct mc_info *mi, void *mcinfo);
void x86_mcinfo_dump(struct mc_info *mi);
-/* Global variables */
-extern int mce_disabled;
-extern unsigned int nr_mce_banks;
+#endif /* _MCE_H */
--- /dev/null
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/irq.h>
+#include <xen/event.h>
+#include <xen/kernel.h>
+#include <xen/delay.h>
+#include <xen/smp.h>
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+#include "mce.h"
+#include "x86_mca.h"
+
+DEFINE_PER_CPU(cpu_banks_t, mce_banks_owned);
+
+static int nr_intel_ext_msrs = 0;
+static int cmci_support = 0;
+static int firstbank;
+
+#ifdef CONFIG_X86_MCE_THERMAL
+static void unexpected_thermal_interrupt(struct cpu_user_regs *regs)
+{
+ printk(KERN_ERR "Thermal: CPU%d: Unexpected LVT TMR interrupt!\n",
+ smp_processor_id());
+ add_taint(TAINT_MACHINE_CHECK);
+}
+
+/* P4/Xeon Thermal transition interrupt handler */
+static void intel_thermal_interrupt(struct cpu_user_regs *regs)
+{
+ u32 l, h;
+ unsigned int cpu = smp_processor_id();
+ static s_time_t next[NR_CPUS];
+
+ ack_APIC_irq();
+ if (NOW() < next[cpu])
+ return;
+
+ next[cpu] = NOW() + MILLISECS(5000);
+ rdmsr(MSR_IA32_THERM_STATUS, l, h);
+ if (l & 0x1) {
+ printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu);
+ printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n",
+ cpu);
+ add_taint(TAINT_MACHINE_CHECK);
+ } else {
+ printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu);
+ }
+}
+
+/* Thermal interrupt handler for this CPU setup */
+static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs)
+ = unexpected_thermal_interrupt;
+
+fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs)
+{
+ irq_enter();
+ vendor_thermal_interrupt(regs);
+ irq_exit();
+}
+
+/* P4/Xeon Thermal regulation detect and init */
+static void intel_init_thermal(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+ int tm2 = 0;
+ unsigned int cpu = smp_processor_id();
+
+ /* Thermal monitoring */
+ if (!cpu_has(c, X86_FEATURE_ACPI))
+ return; /* -ENODEV */
+
+ /* Clock modulation */
+ if (!cpu_has(c, X86_FEATURE_ACC))
+ return; /* -ENODEV */
+
+ /* first check if its enabled already, in which case there might
+ * be some SMM goo which handles it, so we can't even put a handler
+ * since it might be delivered via SMI already -zwanem.
+ */
+ rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+ h = apic_read(APIC_LVTTHMR);
+ if ((l & (1<<3)) && (h & APIC_DM_SMI)) {
+ printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n",cpu);
+ return; /* -EBUSY */
+ }
+
+ if (cpu_has(c, X86_FEATURE_TM2) && (l & (1 << 13)))
+ tm2 = 1;
+
+ /* check whether a vector already exists, temporarily masked? */
+ if (h & APIC_VECTOR_MASK) {
+ printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already installed\n",
+ cpu, (h & APIC_VECTOR_MASK));
+ return; /* -EBUSY */
+ }
+
+ /* The temperature transition interrupt handler setup */
+ h = THERMAL_APIC_VECTOR; /* our delivery vector */
+ h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */
+ apic_write_around(APIC_LVTTHMR, h);
+
+ rdmsr (MSR_IA32_THERM_INTERRUPT, l, h);
+ wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h);
+
+ /* ok we're good to go... */
+ vendor_thermal_interrupt = intel_thermal_interrupt;
+
+ rdmsr (MSR_IA32_MISC_ENABLE, l, h);
+ wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h);
+
+ l = apic_read (APIC_LVTTHMR);
+ apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED);
+ printk (KERN_INFO "CPU%d: Thermal monitoring enabled (%s)\n",
+ cpu, tm2 ? "TM2" : "TM1");
+ return;
+}
+#endif /* CONFIG_X86_MCE_THERMAL */
+
+static enum mca_extinfo
+intel_get_extended_msrs(struct mc_info *mci, uint16_t bank, uint64_t status)
+{
+ struct mcinfo_extended mc_ext;
+
+ if (mci == NULL || nr_intel_ext_msrs == 0 || !(status & MCG_STATUS_EIPV))
+ return MCA_EXTINFO_IGNORED;
+
+ /* this function will called when CAP(9).MCG_EXT_P = 1 */
+ memset(&mc_ext, 0, sizeof(struct mcinfo_extended));
+ mc_ext.common.type = MC_TYPE_EXTENDED;
+ mc_ext.common.size = sizeof(mc_ext);
+ mc_ext.mc_msrs = 10;
+
+ mc_ext.mc_msr[0].reg = MSR_IA32_MCG_EAX;
+ rdmsrl(MSR_IA32_MCG_EAX, mc_ext.mc_msr[0].value);
+ mc_ext.mc_msr[1].reg = MSR_IA32_MCG_EBX;
+ rdmsrl(MSR_IA32_MCG_EBX, mc_ext.mc_msr[1].value);
+ mc_ext.mc_msr[2].reg = MSR_IA32_MCG_ECX;
+ rdmsrl(MSR_IA32_MCG_ECX, mc_ext.mc_msr[2].value);
+
+ mc_ext.mc_msr[3].reg = MSR_IA32_MCG_EDX;
+ rdmsrl(MSR_IA32_MCG_EDX, mc_ext.mc_msr[3].value);
+ mc_ext.mc_msr[4].reg = MSR_IA32_MCG_ESI;
+ rdmsrl(MSR_IA32_MCG_ESI, mc_ext.mc_msr[4].value);
+ mc_ext.mc_msr[5].reg = MSR_IA32_MCG_EDI;
+ rdmsrl(MSR_IA32_MCG_EDI, mc_ext.mc_msr[5].value);
+
+ mc_ext.mc_msr[6].reg = MSR_IA32_MCG_EBP;
+ rdmsrl(MSR_IA32_MCG_EBP, mc_ext.mc_msr[6].value);
+ mc_ext.mc_msr[7].reg = MSR_IA32_MCG_ESP;
+ rdmsrl(MSR_IA32_MCG_ESP, mc_ext.mc_msr[7].value);
+ mc_ext.mc_msr[8].reg = MSR_IA32_MCG_EFLAGS;
+ rdmsrl(MSR_IA32_MCG_EFLAGS, mc_ext.mc_msr[8].value);
+ mc_ext.mc_msr[9].reg = MSR_IA32_MCG_EIP;
+ rdmsrl(MSR_IA32_MCG_EIP, mc_ext.mc_msr[9].value);
+
+ x86_mcinfo_add(mci, &mc_ext);
+
+ return MCA_EXTINFO_GLOBAL;
+}
+
+/* Below are for MCE handling */
+
+/* Log worst error severity and offending CPU.,
+ * Pick this CPU for further processing in softirq */
+static int severity_cpu = -1;
+static int worst = 0;
+
+/* Lock of entry@second round scanning in MCE# handler */
+static cpumask_t scanned_cpus;
+/* Lock for entry@Critical Section in MCE# handler */
+static bool_t mce_enter_lock = 0;
+/* Record how many CPUs impacted in this MCE# */
+static cpumask_t impact_map;
+
+/* Lock of softirq rendezvous entering point */
+static cpumask_t mced_cpus;
+/*Lock of softirq rendezvous leaving point */
+static cpumask_t finished_cpus;
+/* Lock for picking one processing CPU */
+static bool_t mce_process_lock = 0;
+
+/* Spinlock for vMCE# MSR virtualization data */
+static DEFINE_SPINLOCK(mce_locks);
+
+/* Local buffer for holding MCE# data temporarily, sharing between mce
+ * handler and softirq handler. Those data will be finally committed
+ * for DOM0 Log and coped to per_dom related data for guest vMCE#
+ * MSR virtualization.
+ * Note: When local buffer is still in processing in softirq, another
+ * MCA comes, simply panic.
+ */
+
+struct mc_local_t
+{
+ bool_t in_use;
+ mctelem_cookie_t mctc[NR_CPUS];
+};
+static struct mc_local_t mc_local;
+
+/* This node list records errors impacting a domain. when one
+ * MCE# happens, one error bank impacts a domain. This error node
+ * will be inserted to the tail of the per_dom data for vMCE# MSR
+ * virtualization. When one vMCE# injection is finished processing
+ * processed by guest, the corresponding node will be deleted.
+ * This node list is for GUEST vMCE# MSRS virtualization.
+ */
+static struct bank_entry* alloc_bank_entry(void) {
+ struct bank_entry *entry;
+
+ entry = xmalloc(struct bank_entry);
+ if (!entry) {
+ printk(KERN_ERR "MCE: malloc bank_entry failed\n");
+ return NULL;
+ }
+ memset(entry, 0x0, sizeof(entry));
+ INIT_LIST_HEAD(&entry->list);
+ return entry;
+}
+
+/* Fill error bank info for #vMCE injection and GUEST vMCE#
+ * MSR virtualization data
+ * 1) Log down how many nr_injections of the impacted.
+ * 2) Copy MCE# error bank to impacted DOM node list,
+ for vMCE# MSRs virtualization
+*/
+
+static int fill_vmsr_data(int cpu, struct mcinfo_bank *mc_bank,
+ uint64_t gstatus) {
+ struct domain *d;
+ struct bank_entry *entry;
+
+ /* This error bank impacts one domain, we need to fill domain related
+ * data for vMCE MSRs virtualization and vMCE# injection */
+ if (mc_bank->mc_domid != (uint16_t)~0) {
+ d = get_domain_by_id(mc_bank->mc_domid);
+
+ /* Not impact a valid domain, skip this error of the bank */
+ if (!d) {
+ printk(KERN_DEBUG "MCE: Not found valid impacted DOM\n");
+ return 0;
+ }
+
+ entry = alloc_bank_entry();
+ entry->mci_status = mc_bank->mc_status;
+ entry->mci_addr = mc_bank->mc_addr;
+ entry->mci_misc = mc_bank->mc_misc;
+ entry->cpu = cpu;
+ entry->bank = mc_bank->mc_bank;
+
+ /* New error Node, insert to the tail of the per_dom data */
+ list_add_tail(&entry->list, &d->arch.vmca_msrs.impact_header);
+ /* Fill MSR global status */
+ d->arch.vmca_msrs.mcg_status = gstatus;
+ /* New node impact the domain, need another vMCE# injection*/
+ d->arch.vmca_msrs.nr_injection++;
+
+ printk(KERN_DEBUG "MCE: Found error @[CPU%d BANK%d "
+ "status %"PRIx64" addr %"PRIx64" domid %d]\n ",
+ entry->cpu, mc_bank->mc_bank,
+ mc_bank->mc_status, mc_bank->mc_addr, mc_bank->mc_domid);
+ }
+ return 0;
+}
+
+static int mce_actions(void) {
+ int32_t cpu, ret;
+ struct mc_info *local_mi;
+ struct mcinfo_common *mic = NULL;
+ struct mcinfo_global *mc_global;
+ struct mcinfo_bank *mc_bank;
+
+ /* Spinlock is used for exclusive read/write of vMSR virtualization
+ * (per_dom vMCE# data)
+ */
+ spin_lock(&mce_locks);
+
+ /*
+ * If softirq is filling this buffer while another MCE# comes,
+ * simply panic
+ */
+ test_and_set_bool(mc_local.in_use);
+
+ for_each_cpu_mask(cpu, impact_map) {
+ if (mc_local.mctc[cpu] == NULL) {
+ printk(KERN_ERR "MCE: get reserved entry failed\n ");
+ ret = -1;
+ goto end;
+ }
+ local_mi = (struct mc_info*)mctelem_dataptr(mc_local.mctc[cpu]);
+ x86_mcinfo_lookup(mic, local_mi, MC_TYPE_GLOBAL);
+ if (mic == NULL) {
+ printk(KERN_ERR "MCE: get local buffer entry failed\n ");
+ ret = -1;
+ goto end;
+ }
+
+ mc_global = (struct mcinfo_global *)mic;
+
+ /* Processing bank information */
+ x86_mcinfo_lookup(mic, local_mi, MC_TYPE_BANK);
+
+ for ( ; mic && mic->size; mic = x86_mcinfo_next(mic) ) {
+ if (mic->type != MC_TYPE_BANK) {
+ continue;
+ }
+ mc_bank = (struct mcinfo_bank*)mic;
+ /* Fill vMCE# injection and vMCE# MSR virtualization related data */
+ if (fill_vmsr_data(cpu, mc_bank, mc_global->mc_gstatus) == -1) {
+ ret = -1;
+ goto end;
+ }
+
+ /* TODO: Add recovery actions here, such as page-offline, etc */
+ }
+ } /* end of impact_map loop */
+
+ ret = 0;
+
+end:
+
+ for_each_cpu_mask(cpu, impact_map) {
+ /* This reserved entry is processed, commit it */
+ if (mc_local.mctc[cpu] != NULL) {
+ mctelem_commit(mc_local.mctc[cpu]);
+ printk(KERN_DEBUG "MCE: Commit one URGENT ENTRY\n");
+ }
+ }
+
+ test_and_clear_bool(mc_local.in_use);
+ spin_unlock(&mce_locks);
+ return ret;
+}
+
+/* Softirq Handler for this MCE# processing */
+static void mce_softirq(void)
+{
+ int cpu = smp_processor_id();
+ cpumask_t affinity;
+
+ /* Wait until all cpus entered softirq */
+ while ( cpus_weight(mced_cpus) != num_online_cpus() ) {
+ cpu_relax();
+ }
+ /* Not Found worst error on severity_cpu, it's weird */
+ if (severity_cpu == -1) {
+ printk(KERN_WARNING "MCE: not found severity_cpu!\n");
+ mc_panic("MCE: not found severity_cpu!");
+ return;
+ }
+ /* We choose severity_cpu for further processing */
+ if (severity_cpu == cpu) {
+
+ /* Step1: Fill DOM0 LOG buffer, vMCE injection buffer and
+ * vMCE MSRs virtualization buffer
+ */
+ if (mce_actions())
+ mc_panic("MCE recovery actions or Filling vMCE MSRS "
+ "virtualization data failed!\n");
+
+ /* Step2: Send Log to DOM0 through vIRQ */
+ if (dom0 && guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
+ printk(KERN_DEBUG "MCE: send MCE# to DOM0 through virq\n");
+ send_guest_global_virq(dom0, VIRQ_MCA);
+ }
+
+ /* Step3: Inject vMCE to impacted DOM. Currently we cares DOM0 only */
+ if (guest_has_trap_callback
+ (dom0, 0, TRAP_machine_check) &&
+ !test_and_set_bool(dom0->vcpu[0]->mce_pending)) {
+ dom0->vcpu[0]->cpu_affinity_tmp =
+ dom0->vcpu[0]->cpu_affinity;
+ cpus_clear(affinity);
+ cpu_set(cpu, affinity);
+ printk(KERN_DEBUG "MCE: CPU%d set affinity, old %d\n", cpu,
+ dom0->vcpu[0]->processor);
+ vcpu_set_affinity(dom0->vcpu[0], &affinity);
+ vcpu_kick(dom0->vcpu[0]);
+ }
+
+ /* Clean Data */
+ test_and_clear_bool(mce_process_lock);
+ cpus_clear(impact_map);
+ cpus_clear(scanned_cpus);
+ worst = 0;
+ cpus_clear(mced_cpus);
+ memset(&mc_local, 0x0, sizeof(mc_local));
+ }
+
+ cpu_set(cpu, finished_cpus);
+ wmb();
+ /* Leave until all cpus finished recovery actions in softirq */
+ while ( cpus_weight(finished_cpus) != num_online_cpus() ) {
+ cpu_relax();
+ }
+
+ cpus_clear(finished_cpus);
+ severity_cpu = -1;
+ printk(KERN_DEBUG "CPU%d exit softirq \n", cpu);
+}
+
+/* Machine Check owner judge algorithm:
+ * When error happens, all cpus serially read its msr banks.
+ * The first CPU who fetches the error bank's info will clear
+ * this bank. Later readers can't get any infor again.
+ * The first CPU is the actual mce_owner
+ *
+ * For Fatal (pcc=1) error, it might cause machine crash
+ * before we're able to log. For avoiding log missing, we adopt two
+ * round scanning:
+ * Round1: simply scan. If found pcc = 1 or ripv = 0, simply reset.
+ * All MCE banks are sticky, when boot up, MCE polling mechanism
+ * will help to collect and log those MCE errors.
+ * Round2: Do all MCE processing logic as normal.
+ */
+
+/* Simple Scan. Panic when found non-recovery errors. Doing this for
+ * avoiding LOG missing
+ */
+static void severity_scan(void)
+{
+ uint64_t status;
+ int32_t i;
+
+ /* TODO: For PCC = 0, we need to have further judge. If it is can't be
+ * recovered, we need to RESET for avoiding DOM0 LOG missing
+ */
+ for ( i = 0; i < nr_mce_banks; i++) {
+ rdmsrl(MSR_IA32_MC0_STATUS + 4 * i , status);
+ if ( !(status & MCi_STATUS_VAL) )
+ continue;
+ /* MCE handler only handles UC error */
+ if ( !(status & MCi_STATUS_UC) )
+ continue;
+ if ( !(status & MCi_STATUS_EN) )
+ continue;
+ if (status & MCi_STATUS_PCC)
+ mc_panic("pcc = 1, cpu unable to continue\n");
+ }
+
+ /* TODO: Further judgement for later CPUs here, maybe need MCACOD assistence */
+ /* EIPV and RIPV is not a reliable way to judge the error severity */
+
+}
+
+
+static void intel_machine_check(struct cpu_user_regs * regs, long error_code)
+{
+ unsigned int cpu = smp_processor_id();
+ int32_t severity = 0;
+ uint64_t gstatus;
+ mctelem_cookie_t mctc = NULL;
+ struct mca_summary bs;
+
+ /* First round scanning */
+ severity_scan();
+ cpu_set(cpu, scanned_cpus);
+ while (cpus_weight(scanned_cpus) < num_online_cpus())
+ cpu_relax();
+
+ wmb();
+ /* All CPUs Finished first round scanning */
+ if (mc_local.in_use != 0) {
+ mc_panic("MCE: Local buffer is being processed, can't handle new MCE!\n");
+ return;
+ }
+
+ /* Enter Critical Section */
+ while (test_and_set_bool(mce_enter_lock)) {
+ udelay (1);
+ }
+
+ mctc = mcheck_mca_logout(MCA_MCE_HANDLER, mca_allbanks, &bs);
+ /* local data point to the reserved entry, let softirq to
+ * process the local data */
+ if (!bs.errcnt) {
+ if (mctc != NULL)
+ mctelem_dismiss(mctc);
+ mc_local.mctc[cpu] = NULL;
+ cpu_set(cpu, mced_cpus);
+ test_and_clear_bool(mce_enter_lock);
+ raise_softirq(MACHINE_CHECK_SOFTIRQ);
+ return;
+ }
+ else if ( mctc != NULL) {
+ mc_local.mctc[cpu] = mctc;
+ }
+
+ if (bs.uc || bs.pcc)
+ add_taint(TAINT_MACHINE_CHECK);
+
+ if (bs.pcc) {
+ printk(KERN_WARNING "PCC=1 should have caused reset\n");
+ severity = 3;
+ }
+ else if (bs.uc) {
+ severity = 2;
+ }
+ else {
+ printk(KERN_WARNING "We should skip Correctable Error\n");
+ severity = 1;
+ }
+ /* This is the offending cpu! */
+ cpu_set(cpu, impact_map);
+
+ if ( severity > worst) {
+ worst = severity;
+ severity_cpu = cpu;
+ }
+ cpu_set(cpu, mced_cpus);
+ test_and_clear_bool(mce_enter_lock);
+ wmb();
+
+ /* Wait for all cpus Leave Critical */
+ while (cpus_weight(mced_cpus) < num_online_cpus())
+ cpu_relax();
+ /* Print MCE error */
+ x86_mcinfo_dump(mctelem_dataptr(mctc));
+
+ /* Pick one CPU to clear MCIP */
+ if (!test_and_set_bool(mce_process_lock)) {
+ rdmsrl(MSR_IA32_MCG_STATUS, gstatus);
+ wrmsrl(MSR_IA32_MCG_STATUS, gstatus & ~MCG_STATUS_MCIP);
+
+ if (worst >= 3) {
+ printk(KERN_WARNING "worst=3 should have caused RESET\n");
+ mc_panic("worst=3 should have caused RESET");
+ }
+ else {
+ printk(KERN_DEBUG "MCE: trying to recover\n");
+ }
+ }
+ raise_softirq(MACHINE_CHECK_SOFTIRQ);
+}
+
+static DEFINE_SPINLOCK(cmci_discover_lock);
+static DEFINE_PER_CPU(cpu_banks_t, no_cmci_banks);
+
+/*
+ * Discover bank sharing using the algorithm recommended in the SDM.
+ */
+static int do_cmci_discover(int i)
+{
+ unsigned msr = MSR_IA32_MC0_CTL2 + i;
+ u64 val;
+
+ rdmsrl(msr, val);
+ /* Some other CPU already owns this bank. */
+ if (val & CMCI_EN) {
+ clear_bit(i, __get_cpu_var(mce_banks_owned));
+ goto out;
+ }
+ wrmsrl(msr, val | CMCI_EN | CMCI_THRESHOLD);
+ rdmsrl(msr, val);
+
+ if (!(val & CMCI_EN)) {
+ /* This bank does not support CMCI. Polling timer has to handle it. */
+ set_bit(i, __get_cpu_var(no_cmci_banks));
+ return 0;
+ }
+ set_bit(i, __get_cpu_var(mce_banks_owned));
+out:
+ clear_bit(i, __get_cpu_var(no_cmci_banks));
+ return 1;
+}
+
+static void cmci_discover(void)
+{
+ unsigned long flags;
+ int i;
+ mctelem_cookie_t mctc;
+ struct mca_summary bs;
+
+ printk(KERN_DEBUG "CMCI: find owner on CPU%d\n", smp_processor_id());
+
+ spin_lock_irqsave(&cmci_discover_lock, flags);
+
+ for (i = 0; i < nr_mce_banks; i++)
+ if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+ do_cmci_discover(i);
+
+ spin_unlock_irqrestore(&cmci_discover_lock, flags);
+
+ /* In case CMCI happended when do owner change.
+ * If CMCI happened yet not processed immediately,
+ * MCi_status (error_count bit 38~52) is not cleared,
+ * the CMCI interrupt will never be triggered again.
+ */
+
+ mctc = mcheck_mca_logout(
+ MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs);
+
+ if (bs.errcnt && mctc != NULL) {
+ if (guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
+ mctelem_commit(mctc);
+ send_guest_global_virq(dom0, VIRQ_MCA);
+ } else {
+ x86_mcinfo_dump(mctelem_dataptr(mctc));
+ mctelem_dismiss(mctc);
+ }
+ } else if (mctc != NULL)
+ mctelem_dismiss(mctc);
+
+ printk(KERN_DEBUG "CMCI: CPU%d owner_map[%lx], no_cmci_map[%lx]\n",
+ smp_processor_id(),
+ *((unsigned long *)__get_cpu_var(mce_banks_owned)),
+ *((unsigned long *)__get_cpu_var(no_cmci_banks)));
+}
+
+/*
+ * Define an owner for each bank. Banks can be shared between CPUs
+ * and to avoid reporting events multiple times always set up one
+ * CPU as owner.
+ *
+ * The assignment has to be redone when CPUs go offline and
+ * any of the owners goes away. Also pollers run in parallel so we
+ * have to be careful to update the banks in a way that doesn't
+ * lose or duplicate events.
+ */
+
+static void mce_set_owner(void)
+{
+ if (!cmci_support || mce_disabled == 1)
+ return;
+
+ cmci_discover();
+}
+
+static void __cpu_mcheck_distribute_cmci(void *unused)
+{
+ cmci_discover();
+}
+
+void cpu_mcheck_distribute_cmci(void)
+{
+ if (cmci_support && !mce_disabled)
+ on_each_cpu(__cpu_mcheck_distribute_cmci, NULL, 0, 0);
+}
+
+static void clear_cmci(void)
+{
+ int i;
+
+ if (!cmci_support || mce_disabled == 1)
+ return;
+
+ printk(KERN_DEBUG "CMCI: clear_cmci support on CPU%d\n",
+ smp_processor_id());
+
+ for (i = 0; i < nr_mce_banks; i++) {
+ unsigned msr = MSR_IA32_MC0_CTL2 + i;
+ u64 val;
+ if (!test_bit(i, __get_cpu_var(mce_banks_owned)))
+ continue;
+ rdmsrl(msr, val);
+ if (val & (CMCI_EN|CMCI_THRESHOLD_MASK))
+ wrmsrl(msr, val & ~(CMCI_EN|CMCI_THRESHOLD_MASK));
+ clear_bit(i, __get_cpu_var(mce_banks_owned));
+ }
+}
+
+void cpu_mcheck_disable(void)
+{
+ clear_in_cr4(X86_CR4_MCE);
+
+ if (cmci_support && !mce_disabled)
+ clear_cmci();
+}
+
+static void intel_init_cmci(struct cpuinfo_x86 *c)
+{
+ u32 l, apic;
+ int cpu = smp_processor_id();
+
+ if (!mce_available(c) || !cmci_support) {
+ printk(KERN_DEBUG "CMCI: CPU%d has no CMCI support\n", cpu);
+ return;
+ }
+
+ apic = apic_read(APIC_CMCI);
+ if ( apic & APIC_VECTOR_MASK )
+ {
+ printk(KERN_WARNING "CPU%d CMCI LVT vector (%#x) already installed\n",
+ cpu, ( apic & APIC_VECTOR_MASK ));
+ return;
+ }
+
+ apic = CMCI_APIC_VECTOR;
+ apic |= (APIC_DM_FIXED | APIC_LVT_MASKED);
+ apic_write_around(APIC_CMCI, apic);
+
+ l = apic_read(APIC_CMCI);
+ apic_write_around(APIC_CMCI, l & ~APIC_LVT_MASKED);
+}
+
+fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs)
+{
+ mctelem_cookie_t mctc;
+ struct mca_summary bs;
+
+ ack_APIC_irq();
+ irq_enter();
+
+ mctc = mcheck_mca_logout(
+ MCA_CMCI_HANDLER, __get_cpu_var(mce_banks_owned), &bs);
+
+ if (bs.errcnt && mctc != NULL) {
+ if (guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
+ mctelem_commit(mctc);
+ printk(KERN_DEBUG "CMCI: send CMCI to DOM0 through virq\n");
+ send_guest_global_virq(dom0, VIRQ_MCA);
+ } else {
+ x86_mcinfo_dump(mctelem_dataptr(mctc));
+ mctelem_dismiss(mctc);
+ }
+ } else if (mctc != NULL)
+ mctelem_dismiss(mctc);
+
+ irq_exit();
+}
+
+void mce_intel_feature_init(struct cpuinfo_x86 *c)
+{
+
+#ifdef CONFIG_X86_MCE_THERMAL
+ intel_init_thermal(c);
+#endif
+ intel_init_cmci(c);
+}
+
+uint64_t g_mcg_cap;
+static void mce_cap_init(struct cpuinfo_x86 *c)
+{
+ u32 l, h;
+
+ rdmsr (MSR_IA32_MCG_CAP, l, h);
+ /* For Guest vMCE usage */
+ g_mcg_cap = ((u64)h << 32 | l) & (~MCG_CMCI_P);
+
+ if ((l & MCG_CMCI_P) && cpu_has_apic)
+ cmci_support = 1;
+
+ nr_mce_banks = l & 0xff;
+ if (nr_mce_banks > MAX_NR_BANKS)
+ printk(KERN_WARNING "MCE: exceed max mce banks\n");
+ if (l & MCG_EXT_P)
+ {
+ nr_intel_ext_msrs = (l >> MCG_EXT_CNT) & 0xff;
+ printk (KERN_INFO "CPU%d: Intel Extended MCE MSRs (%d) available\n",
+ smp_processor_id(), nr_intel_ext_msrs);
+ }
+ firstbank = mce_firstbank(c);
+}
+
+static void mce_init(void)
+{
+ u32 l, h;
+ int i;
+ mctelem_cookie_t mctc;
+ struct mca_summary bs;
+
+ clear_in_cr4(X86_CR4_MCE);
+
+ /* log the machine checks left over from the previous reset.
+ * This also clears all registers*/
+
+ mctc = mcheck_mca_logout(MCA_RESET, mca_allbanks, &bs);
+
+ /* in the boot up stage, don't inject to DOM0, but print out */
+ if (bs.errcnt && mctc != NULL) {
+ x86_mcinfo_dump(mctelem_dataptr(mctc));
+ mctelem_dismiss(mctc);
+ }
+
+ set_in_cr4(X86_CR4_MCE);
+ rdmsr (MSR_IA32_MCG_CAP, l, h);
+ if (l & MCG_CTL_P) /* Control register present ? */
+ wrmsr(MSR_IA32_MCG_CTL, 0xffffffff, 0xffffffff);
+
+ for (i = firstbank; i < nr_mce_banks; i++)
+ {
+ /* Some banks are shared across cores, use MCi_CTRL to judge whether
+ * this bank has been initialized by other cores already. */
+ rdmsr(MSR_IA32_MC0_CTL + 4*i, l, h);
+ if (!(l | h))
+ {
+ /* if ctl is 0, this bank is never initialized */
+ printk(KERN_DEBUG "mce_init: init bank%d\n", i);
+ wrmsr (MSR_IA32_MC0_CTL + 4*i, 0xffffffff, 0xffffffff);
+ wrmsr (MSR_IA32_MC0_STATUS + 4*i, 0x0, 0x0);
+ }
+ }
+ if (firstbank) /* if cmci enabled, firstbank = 0 */
+ wrmsr (MSR_IA32_MC0_STATUS, 0x0, 0x0);
+}
+
+/* p4/p6 family have similar MCA initialization process */
+int intel_mcheck_init(struct cpuinfo_x86 *c)
+{
+ mce_cap_init(c);
+ printk (KERN_INFO "Intel machine check reporting enabled on CPU#%d.\n",
+ smp_processor_id());
+
+ /* machine check is available */
+ x86_mce_vector_register(intel_machine_check);
+ x86_mce_callback_register(intel_get_extended_msrs);
+
+ mce_init();
+ mce_intel_feature_init(c);
+ mce_set_owner();
+
+ open_softirq(MACHINE_CHECK_SOFTIRQ, mce_softirq);
+ return 1;
+}
+
+/* Guest vMCE# MSRs virtualization ops (rdmsr/wrmsr) */
+int intel_mce_wrmsr(u32 msr, u32 lo, u32 hi)
+{
+ struct domain *d = current->domain;
+ struct bank_entry *entry = NULL;
+ uint64_t value = (u64)hi << 32 | lo;
+ int ret = 1;
+
+ spin_lock(&mce_locks);
+ switch(msr)
+ {
+ case MSR_IA32_MCG_CTL:
+ if (value != (u64)~0x0 && value != 0x0) {
+ gdprintk(XENLOG_WARNING, "MCE: value writen to MCG_CTL"
+ "should be all 0s or 1s\n");
+ ret = -1;
+ break;
+ }
+ if (!d || is_idle_domain(d)) {
+ gdprintk(XENLOG_WARNING, "MCE: wrmsr not in DOM context, skip\n");
+ break;
+ }
+ d->arch.vmca_msrs.mcg_ctl = value;
+ break;
+ case MSR_IA32_MCG_STATUS:
+ if (!d || is_idle_domain(d)) {
+ gdprintk(XENLOG_WARNING, "MCE: wrmsr not in DOM context, skip\n");
+ break;
+ }
+ d->arch.vmca_msrs.mcg_status = value;
+ gdprintk(XENLOG_DEBUG, "MCE: wrmsr MCG_CTL %"PRIx64"\n", value);
+ break;
+ case MSR_IA32_MC0_CTL2:
+ case MSR_IA32_MC1_CTL2:
+ case MSR_IA32_MC2_CTL2:
+ case MSR_IA32_MC3_CTL2:
+ case MSR_IA32_MC4_CTL2:
+ case MSR_IA32_MC5_CTL2:
+ case MSR_IA32_MC6_CTL2:
+ case MSR_IA32_MC7_CTL2:
+ case MSR_IA32_MC8_CTL2:
+ gdprintk(XENLOG_WARNING, "We have disabled CMCI capability, "
+ "Guest should not write this MSR!\n");
+ break;
+ case MSR_IA32_MC0_CTL:
+ case MSR_IA32_MC1_CTL:
+ case MSR_IA32_MC2_CTL:
+ case MSR_IA32_MC3_CTL:
+ case MSR_IA32_MC4_CTL:
+ case MSR_IA32_MC5_CTL:
+ case MSR_IA32_MC6_CTL:
+ case MSR_IA32_MC7_CTL:
+ case MSR_IA32_MC8_CTL:
+ if (value != (u64)~0x0 && value != 0x0) {
+ gdprintk(XENLOG_WARNING, "MCE: value writen to MCi_CTL"
+ "should be all 0s or 1s\n");
+ ret = -1;
+ break;
+ }
+ if (!d || is_idle_domain(d)) {
+ gdprintk(XENLOG_WARNING, "MCE: wrmsr not in DOM context, skip\n");
+ break;
+ }
+ d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4] = value;
+ break;
+ case MSR_IA32_MC0_STATUS:
+ case MSR_IA32_MC1_STATUS:
+ case MSR_IA32_MC2_STATUS:
+ case MSR_IA32_MC3_STATUS:
+ case MSR_IA32_MC4_STATUS:
+ case MSR_IA32_MC5_STATUS:
+ case MSR_IA32_MC6_STATUS:
+ case MSR_IA32_MC7_STATUS:
+ case MSR_IA32_MC8_STATUS:
+ if (!d || is_idle_domain(d)) {
+ /* Just skip */
+ gdprintk(XENLOG_WARNING, "mce wrmsr: not in domain context!\n");
+ break;
+ }
+ /* Give the first entry of the list, it corresponds to current
+ * vMCE# injection. When vMCE# is finished processing by the
+ * the guest, this node will be deleted.
+ * Only error bank is written. Non-error bank simply return.
+ */
+ if ( !list_empty(&d->arch.vmca_msrs.impact_header) ) {
+ entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == (msr - MSR_IA32_MC0_STATUS)/4 ) {
+ entry->mci_status = value;
+ }
+ gdprintk(XENLOG_DEBUG, "MCE: wmrsr mci_status in vMCE# context\n");
+ }
+ gdprintk(XENLOG_DEBUG, "MCE: wrmsr mci_status val:%"PRIx64"\n", value);
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ spin_unlock(&mce_locks);
+ return ret;
+}
+
+int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi)
+{
+ struct domain *d = current->domain;
+ int ret = 1;
+ struct bank_entry *entry = NULL;
+
+ *lo = *hi = 0x0;
+ spin_lock(&mce_locks);
+ switch(msr)
+ {
+ case MSR_IA32_MCG_STATUS:
+ if (!d || is_idle_domain(d)) {
+ gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain context!\n");
+ *lo = *hi = 0x0;
+ break;
+ }
+ *lo = (u32)d->arch.vmca_msrs.mcg_status;
+ *hi = (u32)(d->arch.vmca_msrs.mcg_status >> 32);
+ gdprintk(XENLOG_DEBUG, "MCE: rd MCG_STATUS lo %x hi %x\n", *lo, *hi);
+ break;
+ case MSR_IA32_MCG_CAP:
+ if (!d || is_idle_domain(d)) {
+ gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain context!\n");
+ *lo = *hi = 0x0;
+ break;
+ }
+ *lo = (u32)d->arch.vmca_msrs.mcg_cap;
+ *hi = (u32)(d->arch.vmca_msrs.mcg_cap >> 32);
+ gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CAP lo %x hi %x\n", *lo, *hi);
+ break;
+ case MSR_IA32_MCG_CTL:
+ if (!d || is_idle_domain(d)) {
+ gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain context!\n");
+ *lo = *hi = 0x0;
+ break;
+ }
+ *lo = (u32)d->arch.vmca_msrs.mcg_ctl;
+ *hi = (u32)(d->arch.vmca_msrs.mcg_ctl >> 32);
+ gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCG_CTL lo %x hi %x\n", *lo, *hi);
+ break;
+ case MSR_IA32_MC0_CTL2:
+ case MSR_IA32_MC1_CTL2:
+ case MSR_IA32_MC2_CTL2:
+ case MSR_IA32_MC3_CTL2:
+ case MSR_IA32_MC4_CTL2:
+ case MSR_IA32_MC5_CTL2:
+ case MSR_IA32_MC6_CTL2:
+ case MSR_IA32_MC7_CTL2:
+ case MSR_IA32_MC8_CTL2:
+ gdprintk(XENLOG_WARNING, "We have disabled CMCI capability, "
+ "Guest should not read this MSR!\n");
+ break;
+ case MSR_IA32_MC0_CTL:
+ case MSR_IA32_MC1_CTL:
+ case MSR_IA32_MC2_CTL:
+ case MSR_IA32_MC3_CTL:
+ case MSR_IA32_MC4_CTL:
+ case MSR_IA32_MC5_CTL:
+ case MSR_IA32_MC6_CTL:
+ case MSR_IA32_MC7_CTL:
+ case MSR_IA32_MC8_CTL:
+ if (!d || is_idle_domain(d)) {
+ gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain context!\n");
+ *lo = *hi = 0x0;
+ break;
+ }
+ *lo = (u32)d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4];
+ *hi =
+ (u32)(d->arch.vmca_msrs.mci_ctl[(msr - MSR_IA32_MC0_CTL)/4]
+ >> 32);
+ gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_CTL lo %x hi %x\n", *lo, *hi);
+ break;
+ case MSR_IA32_MC0_STATUS:
+ case MSR_IA32_MC1_STATUS:
+ case MSR_IA32_MC2_STATUS:
+ case MSR_IA32_MC3_STATUS:
+ case MSR_IA32_MC4_STATUS:
+ case MSR_IA32_MC5_STATUS:
+ case MSR_IA32_MC6_STATUS:
+ case MSR_IA32_MC7_STATUS:
+ case MSR_IA32_MC8_STATUS:
+ /* Only error bank is read. Non-error bank simply return */
+ *lo = *hi = 0x0;
+ gdprintk(XENLOG_DEBUG, "MCE: rdmsr mci_status\n");
+ if (!d || is_idle_domain(d)) {
+ gdprintk(XENLOG_WARNING, "mce_rdmsr: not in domain context!\n");
+ break;
+ }
+ if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
+ entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == (msr - MSR_IA32_MC0_STATUS)/4 ) {
+ *lo = entry->mci_status;
+ *hi = entry->mci_status >> 32;
+ gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_STATUS in vmCE# context "
+ "lo %x hi %x\n", *lo, *hi);
+ }
+ }
+ break;
+ case MSR_IA32_MC0_ADDR:
+ case MSR_IA32_MC1_ADDR:
+ case MSR_IA32_MC2_ADDR:
+ case MSR_IA32_MC3_ADDR:
+ case MSR_IA32_MC4_ADDR:
+ case MSR_IA32_MC5_ADDR:
+ case MSR_IA32_MC6_ADDR:
+ case MSR_IA32_MC7_ADDR:
+ case MSR_IA32_MC8_ADDR:
+ *lo = *hi = 0x0;
+ if (!d || is_idle_domain(d)) {
+ gdprintk(XENLOG_WARNING, "mce_rdmsr: not in domain context!\n");
+ break;
+ }
+ if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
+ entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == (msr - MSR_IA32_MC0_ADDR)/4 ) {
+ *lo = entry->mci_addr;
+ *hi = entry->mci_addr >> 32;
+ gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_ADDR in vMCE# context "
+ "lo %x hi %x\n", *lo, *hi);
+ }
+ }
+ break;
+ case MSR_IA32_MC0_MISC:
+ case MSR_IA32_MC1_MISC:
+ case MSR_IA32_MC2_MISC:
+ case MSR_IA32_MC3_MISC:
+ case MSR_IA32_MC4_MISC:
+ case MSR_IA32_MC5_MISC:
+ case MSR_IA32_MC6_MISC:
+ case MSR_IA32_MC7_MISC:
+ case MSR_IA32_MC8_MISC:
+ *lo = *hi = 0x0;
+ if (!d || is_idle_domain(d)) {
+ gdprintk(XENLOG_WARNING, "MCE: rdmsr not in domain context!\n");
+ break;
+ }
+ if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
+ entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ struct bank_entry, list);
+ if ( entry->bank == (msr - MSR_IA32_MC0_MISC)/4 ) {
+ *lo = entry->mci_misc;
+ *hi = entry->mci_misc >> 32;
+ gdprintk(XENLOG_DEBUG, "MCE: rdmsr MCi_MISC in vMCE# context "
+ " lo %x hi %x\n", *lo, *hi);
+ }
+ }
+ break;
+ default:
+ ret = 0;
+ break;
+ }
+ spin_unlock(&mce_locks);
+ return ret;
+}
+
+
--- /dev/null
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+/*
+ * mctelem.c - x86 Machine Check Telemetry Transport
+ */
+
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/kernel.h>
+#include <xen/config.h>
+#include <xen/smp.h>
+#include <xen/errno.h>
+#include <xen/sched.h>
+#include <xen/sched-if.h>
+#include <xen/cpumask.h>
+#include <xen/event.h>
+
+#include <asm/processor.h>
+#include <asm/system.h>
+#include <asm/msr.h>
+
+#include "mce.h"
+
+struct mctelem_ent {
+ struct mctelem_ent *mcte_next; /* next in chronological order */
+ struct mctelem_ent *mcte_prev; /* previous in chronological order */
+ uint32_t mcte_flags; /* See MCTE_F_* below */
+ uint32_t mcte_refcnt; /* Reference count */
+ void *mcte_data; /* corresponding data payload */
+};
+
+#define MCTE_F_HOME_URGENT 0x0001U /* free to urgent freelist */
+#define MCTE_F_HOME_NONURGENT 0x0002U /* free to nonurgent freelist */
+#define MCTE_F_CLASS_URGENT 0x0004U /* in use - urgent errors */
+#define MCTE_F_CLASS_NONURGENT 0x0008U /* in use - nonurgent errors */
+#define MCTE_F_STATE_FREE 0x0010U /* on a freelist */
+#define MCTE_F_STATE_UNCOMMITTED 0x0020U /* reserved; on no list */
+#define MCTE_F_STATE_COMMITTED 0x0040U /* on a committed list */
+#define MCTE_F_STATE_PROCESSING 0x0080U /* on a processing list */
+
+#define MCTE_F_MASK_HOME (MCTE_F_HOME_URGENT | MCTE_F_HOME_NONURGENT)
+#define MCTE_F_MASK_CLASS (MCTE_F_CLASS_URGENT | MCTE_F_CLASS_NONURGENT)
+#define MCTE_F_MASK_STATE (MCTE_F_STATE_FREE | \
+ MCTE_F_STATE_UNCOMMITTED | \
+ MCTE_F_STATE_COMMITTED | \
+ MCTE_F_STATE_PROCESSING)
+
+#define MCTE_HOME(tep) ((tep)->mcte_flags & MCTE_F_MASK_HOME)
+
+#define MCTE_CLASS(tep) ((tep)->mcte_flags & MCTE_F_MASK_CLASS)
+#define MCTE_SET_CLASS(tep, new) do { \
+ (tep)->mcte_flags &= ~MCTE_F_MASK_CLASS; \
+ (tep)->mcte_flags |= MCTE_F_CLASS_##new; } while (0)
+
+#define MCTE_STATE(tep) ((tep)->mcte_flags & MCTE_F_MASK_STATE)
+#define MCTE_TRANSITION_STATE(tep, old, new) do { \
+ BUG_ON(MCTE_STATE(tep) != (MCTE_F_STATE_##old)); \
+ (tep)->mcte_flags &= ~MCTE_F_MASK_STATE; \
+ (tep)->mcte_flags |= (MCTE_F_STATE_##new); } while (0)
+
+#define MC_URGENT_NENT 10
+#define MC_NONURGENT_NENT 20
+
+#define MC_NCLASSES (MC_NONURGENT + 1)
+
+#define COOKIE2MCTE(c) ((struct mctelem_ent *)(c))
+#define MCTE2COOKIE(tep) ((mctelem_cookie_t)(tep))
+
+static struct mc_telem_ctl {
+ /* Linked lists that thread the array members together.
+ *
+ * The free lists are singly-linked via mcte_next, and we allocate
+ * from them by atomically unlinking an element from the head.
+ * Consumed entries are returned to the head of the free list.
+ * When an entry is reserved off the free list it is not linked
+ * on any list until it is committed or dismissed.
+ *
+ * The committed list grows at the head and we do not maintain a
+ * tail pointer; insertions are performed atomically. The head
+ * thus has the most-recently committed telemetry, i.e. the
+ * list is in reverse chronological order. The committed list
+ * is singly-linked via mcte_prev pointers, and mcte_next is NULL.
+ * When we move telemetry from the committed list to the processing
+ * list we atomically unlink the committed list and keep a pointer
+ * to the head of that list; we then traverse the list following
+ * mcte_prev and fill in mcte_next to doubly-link the list, and then
+ * append the tail of the list onto the processing list. If we panic
+ * during this manipulation of the committed list we still have
+ * the pointer to its head so we can recover all entries during
+ * the panic flow (albeit in reverse chronological order).
+ *
+ * The processing list is updated in a controlled context, and
+ * we can lock it for updates. The head of the processing list
+ * always has the oldest telemetry, and we append (as above)
+ * at the tail of the processing list. */
+ struct mctelem_ent *mctc_free[MC_NCLASSES];
+ struct mctelem_ent *mctc_committed[MC_NCLASSES];
+ struct mctelem_ent *mctc_processing_head[MC_NCLASSES];
+ struct mctelem_ent *mctc_processing_tail[MC_NCLASSES];
+ /*
+ * Telemetry array
+ */
+ struct mctelem_ent *mctc_elems;
+} mctctl;
+
+/* Lock protecting all processing lists */
+static DEFINE_SPINLOCK(processing_lock);
+
+static void *cmpxchgptr(void *ptr, void *old, void *new)
+{
+ unsigned long *ulp = (unsigned long *)ptr;
+ unsigned long a = (unsigned long)old;
+ unsigned long b = (unsigned long)new;
+
+ return (void *)cmpxchg(ulp, a, b);
+}
+
+/* Free an entry to its native free list; the entry must not be linked on
+ * any list.
+ */
+static void mctelem_free(struct mctelem_ent *tep)
+{
+ mctelem_class_t target = MCTE_HOME(tep) == MCTE_F_HOME_URGENT ?
+ MC_URGENT : MC_NONURGENT;
+ struct mctelem_ent **freelp;
+ struct mctelem_ent *oldhead;
+
+ BUG_ON(tep->mcte_refcnt != 0);
+ BUG_ON(MCTE_STATE(tep) != MCTE_F_STATE_FREE);
+
+ tep->mcte_prev = NULL;
+ freelp = &mctctl.mctc_free[target];
+ for (;;) {
+ oldhead = *freelp;
+ tep->mcte_next = oldhead;
+ wmb();
+ if (cmpxchgptr(freelp, oldhead, tep) == oldhead)
+ break;
+ }
+}
+
+/* Increment the reference count of an entry that is not linked on to
+ * any list and which only the caller has a pointer to.
+ */
+static void mctelem_hold(struct mctelem_ent *tep)
+{
+ tep->mcte_refcnt++;
+}
+
+/* Increment the reference count on an entry that is linked at the head of
+ * a processing list. The caller is responsible for locking the list.
+ */
+static void mctelem_processing_hold(struct mctelem_ent *tep)
+{
+ int which = MCTE_CLASS(tep) == MCTE_F_CLASS_URGENT ?
+ MC_URGENT : MC_NONURGENT;
+
+ BUG_ON(tep != mctctl.mctc_processing_head[which]);
+ tep->mcte_refcnt++;
+}
+
+/* Decrement the reference count on an entry that is linked at the head of
+ * a processing list. The caller is responsible for locking the list.
+ */
+static void mctelem_processing_release(struct mctelem_ent *tep)
+{
+ int which = MCTE_CLASS(tep) == MCTE_F_CLASS_URGENT ?
+ MC_URGENT : MC_NONURGENT;
+
+ BUG_ON(tep != mctctl.mctc_processing_head[which]);
+ if (--tep->mcte_refcnt == 0) {
+ MCTE_TRANSITION_STATE(tep, PROCESSING, FREE);
+ mctctl.mctc_processing_head[which] = tep->mcte_next;
+ mctelem_free(tep);
+ }
+}
+
+void mctelem_init(int reqdatasz)
+{
+ static int called = 0;
+ static int datasz = 0, realdatasz = 0;
+ char *datarr;
+ int i;
+
+ BUG_ON(MC_URGENT != 0 || MC_NONURGENT != 1 || MC_NCLASSES != 2);
+
+ /* Called from mcheck_init for all processors; initialize for the
+ * first call only (no race here since the boot cpu completes
+ * init before others start up). */
+ if (++called == 1) {
+ realdatasz = reqdatasz;
+ datasz = (reqdatasz & ~0xf) + 0x10; /* 16 byte roundup */
+ } else {
+ BUG_ON(reqdatasz != realdatasz);
+ return;
+ }
+
+ if ((mctctl.mctc_elems = xmalloc_array(struct mctelem_ent,
+ MC_URGENT_NENT + MC_NONURGENT_NENT)) == NULL ||
+ (datarr = xmalloc_bytes((MC_URGENT_NENT + MC_NONURGENT_NENT) *
+ datasz)) == NULL) {
+ if (mctctl.mctc_elems)
+ xfree(mctctl.mctc_elems);
+ printk("Allocations for MCA telemetry failed\n");
+ return;
+ }
+
+ for (i = 0; i < MC_URGENT_NENT + MC_NONURGENT_NENT; i++) {
+ struct mctelem_ent *tep, **tepp;
+
+ tep = mctctl.mctc_elems + i;
+ tep->mcte_flags = MCTE_F_STATE_FREE;
+ tep->mcte_refcnt = 0;
+ tep->mcte_data = datarr + i * datasz;
+
+ if (i < MC_URGENT_NENT) {
+ tepp = &mctctl.mctc_free[MC_URGENT];
+ tep->mcte_flags |= MCTE_F_HOME_URGENT;
+ } else {
+ tepp = &mctctl.mctc_free[MC_NONURGENT];
+ tep->mcte_flags |= MCTE_F_HOME_NONURGENT;
+ }
+
+ tep->mcte_next = *tepp;
+ tep->mcte_prev = NULL;
+ *tepp = tep;
+ }
+}
+
+/* incremented non-atomically when reserve fails */
+static int mctelem_drop_count;
+
+/* Reserve a telemetry entry, or return NULL if none available.
+ * If we return an entry then the caller must subsequently call exactly one of
+ * mctelem_unreserve or mctelem_commit for that entry.
+ */
+mctelem_cookie_t mctelem_reserve(mctelem_class_t which)
+{
+ struct mctelem_ent **freelp;
+ struct mctelem_ent *oldhead, *newhead;
+ mctelem_class_t target = (which == MC_URGENT) ?
+ MC_URGENT : MC_NONURGENT;
+
+ freelp = &mctctl.mctc_free[target];
+ for (;;) {
+ if ((oldhead = *freelp) == NULL) {
+ if (which == MC_URGENT && target == MC_URGENT) {
+ /* raid the non-urgent freelist */
+ target = MC_NONURGENT;
+ freelp = &mctctl.mctc_free[target];
+ continue;
+ } else {
+ mctelem_drop_count++;
+ return (NULL);
+ }
+ }
+
+ newhead = oldhead->mcte_next;
+ if (cmpxchgptr(freelp, oldhead, newhead) == oldhead) {
+ struct mctelem_ent *tep = oldhead;
+
+ mctelem_hold(tep);
+ MCTE_TRANSITION_STATE(tep, FREE, UNCOMMITTED);
+ tep->mcte_next = NULL;
+ tep->mcte_prev = NULL;
+ if (which == MC_URGENT)
+ MCTE_SET_CLASS(tep, URGENT);
+ else
+ MCTE_SET_CLASS(tep, NONURGENT);
+ return MCTE2COOKIE(tep);
+ }
+ }
+}
+
+void *mctelem_dataptr(mctelem_cookie_t cookie)
+{
+ struct mctelem_ent *tep = COOKIE2MCTE(cookie);
+
+ return tep->mcte_data;
+}
+
+/* Release a previously reserved entry back to the freelist without
+ * submitting it for logging. The entry must not be linked on to any
+ * list - that's how mctelem_reserve handed it out.
+ */
+void mctelem_dismiss(mctelem_cookie_t cookie)
+{
+ struct mctelem_ent *tep = COOKIE2MCTE(cookie);
+
+ tep->mcte_refcnt--;
+ MCTE_TRANSITION_STATE(tep, UNCOMMITTED, FREE);
+ mctelem_free(tep);
+}
+
+/* Commit an entry with completed telemetry for logging. The caller must
+ * not reference the entry after this call. Note that we add entries
+ * at the head of the committed list, so that list therefore has entries
+ * in reverse chronological order.
+ */
+void mctelem_commit(mctelem_cookie_t cookie)
+{
+ struct mctelem_ent *tep = COOKIE2MCTE(cookie);
+ struct mctelem_ent **commlp;
+ struct mctelem_ent *oldhead;
+ mctelem_class_t target = MCTE_CLASS(tep) == MCTE_F_CLASS_URGENT ?
+ MC_URGENT : MC_NONURGENT;
+
+ BUG_ON(tep->mcte_next != NULL || tep->mcte_prev != NULL);
+ MCTE_TRANSITION_STATE(tep, UNCOMMITTED, COMMITTED);
+
+ commlp = &mctctl.mctc_committed[target];
+ for (;;) {
+ oldhead = *commlp;
+ tep->mcte_prev = oldhead;
+ wmb();
+ if (cmpxchgptr(commlp, oldhead, tep) == oldhead)
+ break;
+ }
+}
+
+/* Move telemetry from committed list to processing list, reversing the
+ * list into chronological order. The processing list has been
+ * locked by the caller, and may be non-empty. We append the
+ * reversed committed list on to the tail of the processing list.
+ * The committed list may grow even while we run, so use atomic
+ * operations to swap NULL to the freelist head.
+ *
+ * Note that "chronological order" means the order in which producers
+ * won additions to the processing list, which may not reflect the
+ * strict chronological order of the associated events if events are
+ * closely spaced in time and contend for the processing list at once.
+ */
+
+static struct mctelem_ent *dangling[MC_NCLASSES];
+
+static void mctelem_append_processing(mctelem_class_t which)
+{
+ mctelem_class_t target = which == MC_URGENT ?
+ MC_URGENT : MC_NONURGENT;
+ struct mctelem_ent **commlp = &mctctl.mctc_committed[target];
+ struct mctelem_ent **proclhp = &mctctl.mctc_processing_head[target];
+ struct mctelem_ent **procltp = &mctctl.mctc_processing_tail[target];
+ struct mctelem_ent *tep, *ltep;
+
+ /* Check for an empty list; no race since we hold the processing lock */
+ if (*commlp == NULL)
+ return;
+
+ /* Atomically unlink the committed list, and keep a pointer to
+ * the list we unlink in a well-known location so it can be
+ * picked up in panic code should we panic between this unlink
+ * and the append to the processing list. */
+ for (;;) {
+ dangling[target] = *commlp;
+ wmb();
+ if (cmpxchgptr(commlp, dangling[target], NULL) ==
+ dangling[target])
+ break;
+ }
+
+ if (dangling[target] == NULL)
+ return;
+
+ /* Traverse the list following the previous pointers (reverse
+ * chronological order). For each entry fill in the next pointer
+ * and transition the element state. */
+ for (tep = dangling[target], ltep = NULL; tep != NULL;
+ tep = tep->mcte_prev) {
+ MCTE_TRANSITION_STATE(tep, COMMITTED, PROCESSING);
+ tep->mcte_next = ltep;
+ ltep = tep;
+ }
+
+ /* ltep points to the head of a chronologically ordered linked
+ * list of telemetry entries ending at the most recent entry
+ * dangling[target] if mcte_next is followed; tack this on to
+ * the processing list.
+ */
+ if (*proclhp == NULL) {
+ *proclhp = ltep;
+ *procltp = dangling[target];
+ } else {
+ (*procltp)->mcte_next = ltep;
+ ltep->mcte_prev = *procltp;
+ *procltp = dangling[target];
+ }
+ wmb();
+ dangling[target] = NULL;
+ wmb();
+}
+
+mctelem_cookie_t mctelem_consume_oldest_begin(mctelem_class_t which)
+{
+ mctelem_class_t target = (which == MC_URGENT) ?
+ MC_URGENT : MC_NONURGENT;
+ struct mctelem_ent *tep;
+
+ spin_lock(&processing_lock);
+ mctelem_append_processing(target);
+ if ((tep = mctctl.mctc_processing_head[target]) == NULL) {
+ spin_unlock(&processing_lock);
+ return NULL;
+ }
+
+ mctelem_processing_hold(tep);
+ wmb();
+ spin_unlock(&processing_lock);
+ return MCTE2COOKIE(tep);
+}
+
+void mctelem_consume_oldest_end(mctelem_cookie_t cookie)
+{
+ struct mctelem_ent *tep = COOKIE2MCTE(cookie);
+
+ spin_lock(&processing_lock);
+ mctelem_processing_release(tep);
+ wmb();
+ spin_unlock(&processing_lock);
+}
+
+void mctelem_ack(mctelem_class_t which, mctelem_cookie_t cookie)
+{
+ mctelem_class_t target = (which == MC_URGENT) ?
+ MC_URGENT : MC_NONURGENT;
+ struct mctelem_ent *tep = COOKIE2MCTE(cookie);
+
+ if (tep == NULL)
+ return;
+
+ spin_lock(&processing_lock);
+ if (tep == mctctl.mctc_processing_head[target])
+ mctelem_processing_release(tep);
+ wmb();
+ spin_unlock(&processing_lock);
+}
--- /dev/null
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2 of the
+ * License.
+ */
+
+#ifndef _MCTELEM_H
+
+#define _MCTELEM_H
+
+#include <xen/init.h>
+#include <xen/smp.h>
+#include <asm/traps.h>
+
+/* Helper functions used for collecting error telemetry.
+ *
+ * mctelem_init preallocates a number of data areas for use during
+ * machine check data "logout". Two classes are distinguished -
+ * urgent uses, intended for use from machine check exception handlers,
+ * and non-urgent uses intended for use from error pollers.
+ * Associated with each logout entry of whatever class is a data area
+ * sized per the single argument to mctelem_init. mcelem_init should be
+ * called from MCA init code before anybody has the chance to change the
+ * machine check vector with mcheck_mca_logout or to use mcheck_mca_logout.
+ *
+ * To reserve an entry of a given class for use in logout, call
+ * mctelem_reserve (or use the common handler functions which do all this
+ * for you). This returns an opaque cookie, or NULL if no elements are
+ * available. Elements are reserved with an atomic operation so no deadlock
+ * will occur if, for example, a machine check exception interrupts a
+ * scheduled error poll. The implementation will raid free non-urgent
+ * entries if all urgent entries are in use when an urgent request is received.
+ * Once an entry is reserved the caller must eventually perform exactly
+ * one of two actions: mctelem_commit or mctelem_dismiss.
+ *
+ * On mctelem_commit the entry is appended to a processing list; mctelem_dismiss
+ * frees the element without processing. After either call the cookie
+ * must not be referenced again.
+ *
+ * To consume committed telemetry call mctelem_consume_oldest_begin
+ * which will return a cookie referencing the oldest (first committed)
+ * entry of the requested class. Access the associated data using
+ * mctelem_dataptr and when finished use mctelem_consume_oldest_end - in the
+ * begin .. end bracket you are guaranteed that the entry canot be freed
+ * even if it is ack'd elsewhere). Once the ultimate consumer of the
+ * telemetry has processed it to stable storage it should acknowledge
+ * the telemetry quoting the cookie id, at which point we will free
+ * the element from the processing list.
+ */
+
+typedef struct mctelem_cookie *mctelem_cookie_t;
+
+typedef enum mctelem_class {
+ MC_URGENT,
+ MC_NONURGENT
+} mctelem_class_t;
+
+extern void mctelem_init(int);
+extern mctelem_cookie_t mctelem_reserve(mctelem_class_t);
+extern void *mctelem_dataptr(mctelem_cookie_t);
+extern void mctelem_commit(mctelem_cookie_t);
+extern void mctelem_dismiss(mctelem_cookie_t);
+extern mctelem_cookie_t mctelem_consume_oldest_begin(mctelem_class_t);
+extern void mctelem_consume_oldest_end(mctelem_cookie_t);
+extern void mctelem_ack(mctelem_class_t, mctelem_cookie_t);
+
+#endif
#include <xen/smp.h>
#include <xen/timer.h>
#include <xen/errno.h>
+#include <xen/event.h>
+#include <xen/sched.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/msr.h>
#include "mce.h"
-static int firstbank;
+static cpu_banks_t bankmask;
static struct timer mce_timer;
-#define MCE_PERIOD MILLISECS(15000)
+#define MCE_PERIOD MILLISECS(8000)
+#define MCE_PERIOD_MIN MILLISECS(2000)
+#define MCE_PERIOD_MAX MILLISECS(16000)
+
+static uint64_t period = MCE_PERIOD;
+static int adjust = 0;
+static int variable_period = 1;
static void mce_checkregs (void *info)
{
- u32 low, high;
- int i;
-
- for (i=firstbank; i<nr_mce_banks; i++) {
- rdmsr (MSR_IA32_MC0_STATUS+i*4, low, high);
-
- if (high & (1<<31)) {
- printk(KERN_INFO "MCE: The hardware reports a non "
- "fatal, correctable incident occurred on "
- "CPU %d.\n",
- smp_processor_id());
- printk (KERN_INFO "Bank %d: %08x%08x\n", i, high, low);
-
- /* Scrub the error so we don't pick it up in MCE_RATE seconds time. */
- wrmsr (MSR_IA32_MC0_STATUS+i*4, 0UL, 0UL);
-
- /* Serialize */
- wmb();
- add_taint(TAINT_MACHINE_CHECK);
+ mctelem_cookie_t mctc;
+ struct mca_summary bs;
+ static uint64_t dumpcount = 0;
+
+ mctc = mcheck_mca_logout(MCA_POLLER, bankmask, &bs);
+
+ if (bs.errcnt && mctc != NULL) {
+ adjust++;
+
+ /* If Dom0 enabled the VIRQ_MCA event, then notify it.
+ * Otherwise, if dom0 has had plenty of time to register
+ * the virq handler but still hasn't then dump telemetry
+ * to the Xen console. The call count may be incremented
+ * on multiple cpus at once and is indicative only - just
+ * a simple-minded attempt to avoid spamming the console
+ * for corrected errors in early startup.
+ */
+
+ if (guest_enabled_event(dom0->vcpu[0], VIRQ_MCA)) {
+ mctelem_commit(mctc);
+ send_guest_global_virq(dom0, VIRQ_MCA);
+ } else if (++dumpcount >= 10) {
+ x86_mcinfo_dump((struct mc_info *)mctelem_dataptr(mctc));
+ mctelem_dismiss(mctc);
+ } else {
+ mctelem_dismiss(mctc);
}
+ } else if (mctc != NULL) {
+ mctelem_dismiss(mctc);
}
}
static void mce_work_fn(void *data)
{
on_each_cpu(mce_checkregs, NULL, 1, 1);
- set_timer(&mce_timer, NOW() + MCE_PERIOD);
+
+ if (variable_period) {
+ if (adjust)
+ period /= (adjust + 1);
+ else
+ period *= 2;
+ if (period > MCE_PERIOD_MAX)
+ period = MCE_PERIOD_MAX;
+ if (period < MCE_PERIOD_MIN)
+ period = MCE_PERIOD_MIN;
+ }
+
+ set_timer(&mce_timer, NOW() + period);
+ adjust = 0;
}
static int __init init_nonfatal_mce_checker(void)
struct cpuinfo_x86 *c = &boot_cpu_data;
/* Check for MCE support */
- if (!cpu_has(c, X86_FEATURE_MCE))
+ if (!mce_available(c))
return -ENODEV;
- /* Check for PPro style MCA */
- if (!cpu_has(c, X86_FEATURE_MCA))
- return -ENODEV;
+ memcpy(&bankmask, &mca_allbanks, sizeof (cpu_banks_t));
+ if (mce_firstbank(c) == 1)
+ clear_bit(0, bankmask);
/*
* Check for non-fatal errors every MCE_RATE s
switch (c->x86_vendor) {
case X86_VENDOR_AMD:
if (c->x86 == 6) { /* K7 */
- firstbank = 1;
init_timer(&mce_timer, mce_work_fn, NULL, 0);
set_timer(&mce_timer, NOW() + MCE_PERIOD);
break;
break;
case X86_VENDOR_INTEL:
- init_timer(&mce_timer, mce_work_fn, NULL, 0);
- set_timer(&mce_timer, NOW() + MCE_PERIOD);
+ /*
+ * The P5 family is different. P4/P6 and latest CPUs share the
+ * same polling methods.
+ */
+ if ( c->x86 != 5 )
+ {
+ init_timer(&mce_timer, mce_work_fn, NULL, 0);
+ set_timer(&mce_timer, NOW() + MCE_PERIOD);
+ }
break;
}
- printk(KERN_INFO "MCA: Machine check polling timer started.\n");
+ printk(KERN_INFO "mcheck_poll: Machine check polling timer started.\n");
return 0;
}
__initcall(init_nonfatal_mce_checker);
#include <asm/msr.h>
#include "mce.h"
+#include "x86_mca.h"
/* Machine check handler for Pentium class Intel */
-static fastcall void pentium_machine_check(struct cpu_user_regs * regs, long error_code)
+static void pentium_machine_check(struct cpu_user_regs * regs, long error_code)
{
u32 loaddr, hi, lotype;
rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi);
}
/* Set up machine check reporting for processors with Intel style MCE */
-void intel_p5_mcheck_init(struct cpuinfo_x86 *c)
+int intel_p5_mcheck_init(struct cpuinfo_x86 *c)
{
u32 l, h;
- /*Check for MCE support */
- if( !cpu_has(c, X86_FEATURE_MCE) )
- return;
-
/* Default P5 to off as its often misconnected */
if(mce_disabled != -1)
- return;
- machine_check_vector = pentium_machine_check;
- wmb();
+ return 0;
+ x86_mce_vector_register(pentium_machine_check);
/* Read registers before enabling */
rdmsr(MSR_IA32_P5_MC_ADDR, l, h);
/* Enable MCE */
set_in_cr4(X86_CR4_MCE);
printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id());
+
+ return 1;
}
#include "mce.h"
/* Machine check handler for WinChip C6 */
-static fastcall void winchip_machine_check(struct cpu_user_regs * regs, long error_code)
+static void winchip_machine_check(struct cpu_user_regs * regs, long error_code)
{
printk(KERN_EMERG "CPU0: Machine Check Exception.\n");
add_taint(TAINT_MACHINE_CHECK);
}
/* Set up machine check reporting on the Winchip C6 series */
-void winchip_mcheck_init(struct cpuinfo_x86 *c)
+int winchip_mcheck_init(struct cpuinfo_x86 *c)
{
u32 lo, hi;
- machine_check_vector = winchip_machine_check;
+
wmb();
+ x86_mce_vector_register(winchip_machine_check);
rdmsr(MSR_IDT_FCR1, lo, hi);
lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */
lo&= ~(1<<4); /* Enable MCE */
wrmsr(MSR_IDT_FCR1, lo, hi);
set_in_cr4(X86_CR4_MCE);
printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n");
+ return (1);
}
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#ifndef X86_MCA_H
+
+#define X86_MCA_H
+
/* The MCA/MCE MSRs should not be used anywhere else.
* They are cpu family/model specific and are only for use
/* Bitfield of the MSR_IA32_MCG_CAP register */
#define MCG_CAP_COUNT 0x00000000000000ffULL
#define MCG_CTL_P 0x0000000000000100ULL
-/* Bits 9-63 are reserved */
+#define MCG_EXT_P (1UL<<9)
+#define MCG_EXT_CNT (16)
+#define MCG_CMCI_P (1UL<<10)
+/* Other bits are reserved */
/* Bitfield of the MSR_IA32_MCG_STATUS register */
#define MCG_STATUS_RIPV 0x0000000000000001ULL
/* reserved bits */
#define MCi_STATUS_OTHER_RESERVED2 0x0180000000000000ULL
+/* Bitfield of MSR_K8_HWCR register */
+#define K8_HWCR_MCi_STATUS_WREN (1ULL << 18)
+
+/*Intel Specific bitfield*/
+#define CMCI_THRESHOLD 0x2
+
+#include <asm/domain.h>
+typedef DECLARE_BITMAP(cpu_banks_t, MAX_NR_BANKS);
+DECLARE_PER_CPU(cpu_banks_t, mce_banks_owned);
+
+/* Below interfaces are defined for MCA internal processing:
+ * a. pre_handler will be called early in MCA ISR context, mainly for early
+ * need_reset detection for avoiding log missing. Also, it is used to judge
+ * impacted DOMAIN if possible.
+ * b. mca_error_handler is actually a (error_action_index,
+ * recovery_hanlder pointer) pair. The defined recovery_handler
+ * performs the actual recovery operations such as page_offline, cpu_offline
+ * in softIRQ context when the per_bank MCA error matching the corresponding
+ * mca_code index. If pre_handler can't judge the impacted domain,
+ * recovery_handler must figure it out.
+*/
+
+/* MCA error has been recovered successfully by the recovery action*/
+#define MCA_RECOVERED (0x1 < 0)
+/* MCA error impact the specified DOMAIN in owner field below */
+#define MCA_OWNER (0x1 < 1)
+/* MCA error can't be recovered and need reset */
+#define MCA_NEED_RESET (0x1 < 2)
+/* MCA error need further actions in softIRQ context for recovery */
+#define MCA_MORE_ACTION (0x1 < 3)
+
+struct mca_handle_result
+{
+ uint32_t result;
+ /* Used one result & MCA_OWNER */
+ domid_t owner;
+ /* Used by mca_error_handler, result & MCA_RECOVRED */
+ struct recovery_action *action;
+};
+
+extern void (*mca_prehandler)( struct cpu_user_regs *regs,
+ struct mca_handle_result *result);
+
+struct mca_error_handler
+{
+ /* Assume corresponding recovery action could be uniquely
+ * identified by mca_code. Otherwise, we might need to have
+ * a seperate function to decode the corresponding actions
+ * for the particular mca error later.
+ */
+ uint16_t mca_code;
+ void (*recovery_handler)( struct mcinfo_bank *bank,
+ struct mcinfo_global *global,
+ struct mcinfo_extended *extension,
+ struct mca_handle_result *result);
+};
+
+/* Global variables */
+extern int mce_disabled;
+extern unsigned int nr_mce_banks;
+
+#endif /* X86_MCA_H */
DEFINE_PER_CPU(unsigned long, cr4);
static void default_idle(void);
+static void default_dead_idle(void);
void (*pm_idle) (void) = default_idle;
+void (*dead_idle) (void) = default_dead_idle;
static void paravirt_ctxt_switch_from(struct vcpu *v);
static void paravirt_ctxt_switch_to(struct vcpu *v);
local_irq_enable();
}
+static void default_dead_idle(void)
+{
+ for ( ; ; )
+ halt();
+}
+
static void play_dead(void)
{
/*
/* With physical CPU hotplug, we should halt the cpu. */
local_irq_disable();
- for ( ; ; )
- halt();
+ (*dead_idle)();
}
void idle_loop(void)
}
else
{
- list_for_each_entry ( page, &d->page_list, list )
+ page_list_for_each ( page, &d->page_list )
{
- printk(" DomPage %p: caf=%08x, taf=%" PRtype_info "\n",
+ printk(" DomPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
}
}
- list_for_each_entry ( page, &d->xenpage_list, list )
+ if ( is_hvm_domain(d) )
+ {
+ p2m_pod_dump_data(d);
+ }
+
+ page_list_for_each ( page, &d->xenpage_list )
{
- printk(" XenPage %p: caf=%08x, taf=%" PRtype_info "\n",
+ printk(" XenPage %p: caf=%08lx, taf=%" PRtype_info "\n",
_p(page_to_mfn(page)),
page->count_info, page->u.inuse.type_info);
}
}
+struct domain *alloc_domain_struct(void)
+{
+ struct domain *d;
+ /*
+ * We pack the MFN of the domain structure into a 32-bit field within
+ * the page_info structure. Hence the MEMF_bits() restriction.
+ */
+ d = alloc_xenheap_pages(
+ get_order_from_bytes(sizeof(*d)), MEMF_bits(32 + PAGE_SHIFT));
+ if ( d != NULL )
+ memset(d, 0, sizeof(*d));
+ return d;
+}
+
+void free_domain_struct(struct domain *d)
+{
+ free_xenheap_pages(d, get_order_from_bytes(sizeof(*d)));
+}
+
struct vcpu *alloc_vcpu_struct(void)
{
struct vcpu *v;
- if ( (v = xmalloc(struct vcpu)) != NULL )
+ /*
+ * This structure contains embedded PAE PDPTEs, used when an HVM guest
+ * runs on shadow pagetables outside of 64-bit mode. In this case the CPU
+ * may require that the shadow CR3 points below 4GB, and hence the whole
+ * structure must satisfy this restriction. Thus we specify MEMF_bits(32).
+ */
+ v = alloc_xenheap_pages(get_order_from_bytes(sizeof(*v)), MEMF_bits(32));
+ if ( v != NULL )
memset(v, 0, sizeof(*v));
return v;
}
void free_vcpu_struct(struct vcpu *v)
{
- xfree(v);
+ free_xenheap_pages(v, get_order_from_bytes(sizeof(*v)));
}
#ifdef CONFIG_COMPAT
static int setup_compat_l4(struct vcpu *v)
{
- struct page_info *pg = alloc_domheap_page(NULL, 0);
+ struct page_info *pg;
l4_pgentry_t *l4tab;
+ pg = alloc_domheap_page(NULL, MEMF_node(vcpu_to_node(v)));
if ( pg == NULL )
return -ENOMEM;
/* This page needs to look like a pagetable so that it can be shadowed */
pg->u.inuse.type_info = PGT_l4_page_table|PGT_validated|1;
- l4tab = copy_page(page_to_virt(pg), idle_pg_table);
+ l4tab = page_to_virt(pg);
+ copy_page(l4tab, idle_pg_table);
l4tab[0] = l4e_empty();
l4tab[l4_table_offset(LINEAR_PT_VIRT_START)] =
l4e_from_page(pg, __PAGE_HYPERVISOR);
if ( is_idle_domain(d) )
{
v->arch.schedule_tail = continue_idle_domain;
- if ( v->vcpu_id )
- v->arch.cr3 = d->vcpu[0]->arch.cr3;
- else if ( !*idle_vcpu )
- v->arch.cr3 = __pa(idle_pg_table);
- else if ( !(v->arch.cr3 = clone_idle_pagetable(v)) )
- return -ENOMEM;
+ v->arch.cr3 = __pa(idle_pg_table);
}
v->arch.guest_context.ctrlreg[4] =
v->arch.perdomain_ptes =
d->arch.mm_perdomain_pt + (v->vcpu_id << GDT_LDT_VCPU_SHIFT);
+ spin_lock_init(&v->arch.shadow_ldt_lock);
+
return (is_pv_32on64_vcpu(v) ? setup_compat_l4(v) : 0);
}
hvm_vcpu_destroy(v);
}
+extern uint64_t g_mcg_cap;
int arch_domain_create(struct domain *d, unsigned int domcr_flags)
{
#ifdef __x86_64__
hvm_funcs.hap_supported &&
(domcr_flags & DOMCRF_hap);
+ d->arch.s3_integrity = !!(domcr_flags & DOMCRF_s3_integrity);
+
INIT_LIST_HEAD(&d->arch.pdev_list);
d->arch.relmem = RELMEM_not_started;
- INIT_LIST_HEAD(&d->arch.relmem_list);
+ INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
pdpt_order = get_order_from_bytes(PDPT_L1_ENTRIES * sizeof(l1_pgentry_t));
- d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order);
+ d->arch.mm_perdomain_pt = alloc_xenheap_pages(pdpt_order, 0);
if ( d->arch.mm_perdomain_pt == NULL )
goto fail;
memset(d->arch.mm_perdomain_pt, 0, PAGE_SIZE << pdpt_order);
if ( d->arch.ioport_caps == NULL )
goto fail;
- if ( (d->shared_info = alloc_xenheap_page()) == NULL )
+ /*
+ * The shared_info machine address must fit in a 32-bit field within a
+ * 32-bit guest's start_info structure. Hence we specify MEMF_bits(32).
+ */
+ if ( (d->shared_info = alloc_xenheap_pages(0, MEMF_bits(32))) == NULL )
goto fail;
clear_page(d->shared_info);
if ( (rc = iommu_domain_init(d)) != 0 )
goto fail;
+
+ /* For Guest vMCE MSRs virtualization */
+ d->arch.vmca_msrs.mcg_status = 0x0;
+ d->arch.vmca_msrs.mcg_cap = g_mcg_cap;
+ d->arch.vmca_msrs.mcg_ctl = (uint64_t)~0x0;
+ d->arch.vmca_msrs.nr_injection = 0;
+ memset(d->arch.vmca_msrs.mci_ctl, 0x1,
+ sizeof(d->arch.vmca_msrs.mci_ctl));
+ INIT_LIST_HEAD(&d->arch.vmca_msrs.impact_header);
+
}
if ( is_hvm_domain(d) )
* lost. The domain will get a spurious event, but it can cope.
*/
vcpu_info(v, evtchn_upcall_pending) = 1;
- for ( i = 0; i < BITS_PER_GUEST_LONG(d); i++ )
+ for ( i = 0; i < BITS_PER_EVTCHN_WORD(d); i++ )
set_bit(i, &vcpu_info(v, evtchn_pending_sel));
return 0;
}
}
+static inline int need_full_gdt(struct vcpu *v)
+{
+ return (!is_hvm_vcpu(v) && !is_idle_vcpu(v));
+}
+
static void __context_switch(void)
{
struct cpu_user_regs *stack_regs = guest_cpu_user_regs();
- unsigned int i, cpu = smp_processor_id();
+ unsigned int cpu = smp_processor_id();
struct vcpu *p = per_cpu(curr_vcpu, cpu);
struct vcpu *n = current;
struct desc_struct *gdt;
- struct page_info *page;
struct desc_ptr gdt_desc;
ASSERT(p != n);
gdt = !is_pv_32on64_vcpu(n) ? per_cpu(gdt_table, cpu) :
per_cpu(compat_gdt_table, cpu);
- page = virt_to_page(gdt);
- for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
+ if ( need_full_gdt(n) )
{
- l1e_write(n->domain->arch.mm_perdomain_pt +
- (n->vcpu_id << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE + i,
- l1e_from_page(page + i, __PAGE_HYPERVISOR));
+ struct page_info *page = virt_to_page(gdt);
+ unsigned int i;
+ for ( i = 0; i < NR_RESERVED_GDT_PAGES; i++ )
+ l1e_write(n->domain->arch.mm_perdomain_pt +
+ (n->vcpu_id << GDT_LDT_VCPU_SHIFT) +
+ FIRST_RESERVED_GDT_PAGE + i,
+ l1e_from_page(page + i, __PAGE_HYPERVISOR));
}
- if ( p->vcpu_id != n->vcpu_id )
+ if ( need_full_gdt(p) &&
+ ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(n)) )
{
gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
gdt_desc.base = (unsigned long)(gdt - FIRST_RESERVED_GDT_ENTRY);
write_ptbase(n);
- if ( p->vcpu_id != n->vcpu_id )
+ if ( need_full_gdt(n) &&
+ ((p->vcpu_id != n->vcpu_id) || !need_full_gdt(p)) )
{
+ gdt_desc.limit = LAST_RESERVED_GDT_BYTE;
gdt_desc.base = GDT_VIRT_START(n);
asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
}
flush_tlb_mask(next->vcpu_dirty_cpumask);
}
- local_irq_disable();
-
if ( is_hvm_vcpu(prev) && !list_empty(&prev->arch.hvm_vcpu.tm_list) )
pt_save_timer(prev);
+ local_irq_disable();
+
set_current(next);
if ( (per_cpu(curr_vcpu, cpu) == next) || is_idle_vcpu(next) )
#endif
static int relinquish_memory(
- struct domain *d, struct list_head *list, unsigned long type)
+ struct domain *d, struct page_list_head *list, unsigned long type)
{
- struct list_head *ent;
struct page_info *page;
unsigned long x, y;
int ret = 0;
/* Use a recursive lock, as we may enter 'free_domheap_page'. */
spin_lock_recursive(&d->page_alloc_lock);
- ent = list->next;
- while ( ent != list )
+ while ( (page = page_list_remove_head(list)) )
{
- page = list_entry(ent, struct page_info, list);
-
/* Grab a reference to the page so it won't disappear from under us. */
if ( unlikely(!get_page(page, d)) )
{
/* Couldn't get a reference -- someone is freeing this page. */
- ent = ent->next;
- list_move_tail(&page->list, &d->arch.relmem_list);
+ page_list_add_tail(page, &d->arch.relmem_list);
continue;
}
if ( test_and_clear_bit(_PGT_pinned, &page->u.inuse.type_info) )
- put_page_and_type(page);
+ ret = put_page_and_type_preemptible(page, 1);
+ switch ( ret )
+ {
+ case 0:
+ break;
+ case -EAGAIN:
+ case -EINTR:
+ page_list_add(page, list);
+ set_bit(_PGT_pinned, &page->u.inuse.type_info);
+ put_page(page);
+ goto out;
+ default:
+ BUG();
+ }
if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
put_page(page);
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- /*
- * Forcibly drop reference counts of page tables above top most (which
- * were skipped to prevent long latencies due to deep recursion - see
- * the special treatment in free_lX_table()).
- */
- y = page->u.inuse.type_info;
- if ( (type < PGT_root_page_table) &&
- unlikely(((y + PGT_type_mask) &
- (PGT_type_mask|PGT_validated)) == type) )
- {
- BUG_ON((y & PGT_count_mask) >=
- (page->count_info & PGC_count_mask));
- while ( y & PGT_count_mask )
- {
- put_page_and_type(page);
- y = page->u.inuse.type_info;
- }
- }
-#endif
-
/*
* Forcibly invalidate top-most, still valid page tables at this point
* to break circular 'linear page table' references as well as clean up
x & ~(PGT_validated|PGT_partial));
if ( likely(y == x) )
{
- if ( free_page_type(page, x, 0) != 0 )
+ /* No need for atomic update of type_info here: noone else updates it. */
+ switch ( ret = free_page_type(page, x, 1) )
+ {
+ case 0:
+ break;
+ case -EINTR:
+ page_list_add(page, list);
+ page->u.inuse.type_info |= PGT_validated;
+ if ( x & PGT_partial )
+ put_page(page);
+ put_page(page);
+ ret = -EAGAIN;
+ goto out;
+ case -EAGAIN:
+ page_list_add(page, list);
+ page->u.inuse.type_info |= PGT_partial;
+ if ( x & PGT_partial )
+ put_page(page);
+ goto out;
+ default:
BUG();
+ }
+ if ( x & PGT_partial )
+ {
+ page->u.inuse.type_info--;
+ put_page(page);
+ }
break;
}
}
- /* Follow the list chain and /then/ potentially free the page. */
- ent = ent->next;
- list_move_tail(&page->list, &d->arch.relmem_list);
+ /* Put the page on the list and /then/ potentially free it. */
+ page_list_add_tail(page, &d->arch.relmem_list);
put_page(page);
if ( hypercall_preempt_check() )
}
}
- list_splice_init(&d->arch.relmem_list, list);
+ /* list is empty at this point. */
+ if ( !page_list_empty(&d->arch.relmem_list) )
+ {
+ *list = d->arch.relmem_list;
+ INIT_PAGE_LIST_HEAD(&d->arch.relmem_list);
+ }
out:
spin_unlock_recursive(&d->page_alloc_lock);
unmap_vcpu_info(v);
}
+ if ( d->arch.pirq_eoi_map != NULL )
+ {
+ unmap_domain_page_global(d->arch.pirq_eoi_map);
+ put_page_and_type(mfn_to_page(d->arch.pirq_eoi_map_mfn));
+ d->arch.pirq_eoi_map = NULL;
+ }
+
d->arch.relmem = RELMEM_xen;
/* fallthrough */
/* fallthrough */
case RELMEM_done:
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- ret = relinquish_memory(d, &d->page_list, PGT_l1_page_table);
- if ( ret )
- return ret;
-#endif
break;
default:
BUG();
}
- /* Free page used by xen oprofile buffer. */
- free_xenoprof_pages(d);
-
if ( is_hvm_domain(d) )
hvm_domain_relinquish_resources(d);
*eax = *ebx = *ecx = *edx = 0;
}
+void vcpu_kick(struct vcpu *v)
+{
+ /*
+ * NB1. 'pause_flags' and 'processor' must be checked /after/ update of
+ * pending flag. These values may fluctuate (after all, we hold no
+ * locks) but the key insight is that each change will cause
+ * evtchn_upcall_pending to be polled.
+ *
+ * NB2. We save the running flag across the unblock to avoid a needless
+ * IPI for domains that we IPI'd to unblock.
+ */
+ bool_t running = v->is_running;
+ vcpu_unblock(v);
+ if ( running && (in_irq() || (v != current)) )
+ cpu_raise_softirq(v->processor, VCPU_KICK_SOFTIRQ);
+}
+
+void vcpu_mark_events_pending(struct vcpu *v)
+{
+ int already_pending = test_and_set_bit(
+ 0, (unsigned long *)&vcpu_info(v, evtchn_upcall_pending));
+
+ if ( already_pending )
+ return;
+
+ if ( is_hvm_vcpu(v) )
+ hvm_assert_evtchn_irq(v);
+ else
+ vcpu_kick(v);
+}
+
+static void vcpu_kick_softirq(void)
+{
+ /*
+ * Nothing to do here: we merely prevent notifiers from racing with checks
+ * executed on return to guest context with interrupts enabled. See, for
+ * example, xxx_intr_assist() executed on return to HVM guest context.
+ */
+}
+
+static int __init init_vcpu_kick_softirq(void)
+{
+ open_softirq(VCPU_KICK_SOFTIRQ, vcpu_kick_softirq);
+ return 0;
+}
+__initcall(init_vcpu_kick_softirq);
+
+
/*
* Local variables:
* mode: C
#include <xen/iocap.h>
#include <xen/bitops.h>
#include <xen/compat.h>
+#include <xen/libelf.h>
#include <asm/regs.h>
#include <asm/system.h>
#include <asm/io.h>
#include <asm/e820.h>
#include <public/version.h>
-#include <public/libelf.h>
+
+int __init bzimage_parse(
+ char *output, char **image_start, unsigned long *image_len);
extern unsigned long initial_images_nrpages(void);
extern void discard_initial_images(void);
int __init construct_dom0(
struct domain *d,
- unsigned long _image_start, unsigned long image_len,
+ unsigned long _image_base,
+ unsigned long _image_start, unsigned long image_len,
unsigned long _initrd_start, unsigned long initrd_len,
char *cmdline)
{
struct vcpu *v = d->vcpu[0];
unsigned long long value;
#if defined(__i386__)
+ char *image_base = (char *)_image_base; /* use lowmem mappings */
char *image_start = (char *)_image_start; /* use lowmem mappings */
char *initrd_start = (char *)_initrd_start; /* use lowmem mappings */
#elif defined(__x86_64__)
+ char *image_base = __va(_image_base);
char *image_start = __va(_image_start);
char *initrd_start = __va(_initrd_start);
#endif
nr_pages = compute_dom0_nr_pages();
+ if ( (rc = bzimage_parse(image_base, &image_start, &image_len)) != 0 )
+ return rc;
+
if ( (rc = elf_init(&elf, image_start, image_len)) != 0 )
return rc;
#ifdef VERBOSE
#endif
}
+ if ( (parms.p2m_base != UNSET_ADDR) && elf_32bit(&elf) )
+ {
+ printk(XENLOG_WARNING "P2M table base ignored\n");
+ parms.p2m_base = UNSET_ADDR;
+ }
+
domain_set_alloc_bitsize(d);
/*
vphysmap_end = vphysmap_start + (nr_pages * (!is_pv_32on64_domain(d) ?
sizeof(unsigned long) :
sizeof(unsigned int)));
+ if ( parms.p2m_base != UNSET_ADDR )
+ vphysmap_end = vphysmap_start;
vstartinfo_start = round_pgup(vphysmap_end);
vstartinfo_end = (vstartinfo_start +
sizeof(struct start_info) +
/* Ensure that our low-memory 1:1 mapping covers the allocation. */
page = alloc_domheap_pages(d, order, MEMF_bits(30));
#else
+ if ( parms.p2m_base != UNSET_ADDR )
+ {
+ vphysmap_start = parms.p2m_base;
+ vphysmap_end = vphysmap_start + nr_pages * sizeof(unsigned long);
+ }
page = alloc_domheap_pages(d, order, 0);
#endif
if ( page == NULL )
_p(v_start), _p(v_end));
printk(" ENTRY ADDRESS: %p\n", _p(parms.virt_entry));
- if ( ((v_end - v_start)>>PAGE_SHIFT) > nr_pages )
- {
- printk("Initial guest OS requires too much space\n"
- "(%luMB is greater than %luMB limit)\n",
- (v_end-v_start)>>20, nr_pages>>(20-PAGE_SHIFT));
- return -ENOMEM;
- }
-
mpt_alloc = (vpt_start - v_start) +
(unsigned long)pfn_to_paddr(alloc_spfn);
/* WARNING: The new domain must have its 'processor' field filled in! */
l3start = l3tab = (l3_pgentry_t *)mpt_alloc; mpt_alloc += PAGE_SIZE;
l2start = l2tab = (l2_pgentry_t *)mpt_alloc; mpt_alloc += 4*PAGE_SIZE;
- memcpy(l2tab, idle_pg_table_l2, 4*PAGE_SIZE);
- for (i = 0; i < 4; i++) {
+ for (i = 0; i < L3_PAGETABLE_ENTRIES; i++) {
+ copy_page(l2tab + i * L2_PAGETABLE_ENTRIES,
+ idle_pg_table_l2 + i * L2_PAGETABLE_ENTRIES);
l3tab[i] = l3e_from_paddr((u32)l2tab + i*PAGE_SIZE, L3_PROT);
l2tab[(LINEAR_PT_VIRT_START >> L2_PAGETABLE_SHIFT)+i] =
l2e_from_paddr((u32)l2tab + i*PAGE_SIZE, __PAGE_HYPERVISOR);
(void)alloc_vcpu(d, i, i % num_online_cpus());
/* Set up CR3 value for write_ptbase */
- if ( paging_mode_enabled(v->domain) )
+ if ( paging_mode_enabled(d) )
paging_update_paging_modes(v);
else
update_cr3(v);
- /* Install the new page tables. */
- local_irq_disable();
+ /* We run on dom0's page tables for the final part of the build process. */
write_ptbase(v);
/* Copy the OS image and free temporary buffer. */
(parms.virt_hypercall >= v_end) )
{
write_ptbase(current);
- local_irq_enable();
printk("Invalid HYPERCALL_PAGE field in ELF notes.\n");
return -1;
}
- hypercall_page_initialise(d, (void *)(unsigned long)parms.virt_hypercall);
+ hypercall_page_initialise(
+ d, (void *)(unsigned long)parms.virt_hypercall);
}
/* Copy the initial ramdisk. */
snprintf(si->magic, sizeof(si->magic), "xen-3.0-x86_%d%s",
elf_64bit(&elf) ? 64 : 32, parms.pae ? "p" : "");
+ count = d->tot_pages;
+#ifdef __x86_64__
+ /* Set up the phys->machine table if not part of the initial mapping. */
+ if ( parms.p2m_base != UNSET_ADDR )
+ {
+ unsigned long va = vphysmap_start;
+
+ if ( v_start <= vphysmap_end && vphysmap_start <= v_end )
+ panic("DOM0 P->M table overlaps initial mapping");
+
+ while ( va < vphysmap_end )
+ {
+ if ( d->tot_pages + ((round_pgup(vphysmap_end) - va)
+ >> PAGE_SHIFT) + 3 > nr_pages )
+ panic("Dom0 allocation too small for initial P->M table.\n");
+
+ l4tab = l4start + l4_table_offset(va);
+ if ( !l4e_get_intpte(*l4tab) )
+ {
+ page = alloc_domheap_page(d, 0);
+ if ( !page )
+ break;
+ /* No mapping, PGC_allocated + page-table page. */
+ page->count_info = PGC_allocated | 2;
+ page->u.inuse.type_info =
+ PGT_l3_page_table | PGT_validated | 1;
+ clear_page(page_to_virt(page));
+ *l4tab = l4e_from_page(page, L4_PROT);
+ }
+ l3tab = page_to_virt(l4e_get_page(*l4tab));
+ l3tab += l3_table_offset(va);
+ if ( !l3e_get_intpte(*l3tab) )
+ {
+ if ( cpu_has_page1gb &&
+ !(va & ((1UL << L3_PAGETABLE_SHIFT) - 1)) &&
+ vphysmap_end >= va + (1UL << L3_PAGETABLE_SHIFT) &&
+ (page = alloc_domheap_pages(d,
+ L3_PAGETABLE_SHIFT -
+ PAGE_SHIFT,
+ 0)) != NULL )
+ {
+ *l3tab = l3e_from_page(page,
+ L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
+ va += 1UL << L3_PAGETABLE_SHIFT;
+ continue;
+ }
+ if ( (page = alloc_domheap_page(d, 0)) == NULL )
+ break;
+ else
+ {
+ /* No mapping, PGC_allocated + page-table page. */
+ page->count_info = PGC_allocated | 2;
+ page->u.inuse.type_info =
+ PGT_l2_page_table | PGT_validated | 1;
+ clear_page(page_to_virt(page));
+ *l3tab = l3e_from_page(page, L3_PROT);
+ }
+ }
+ l2tab = page_to_virt(l3e_get_page(*l3tab));
+ l2tab += l2_table_offset(va);
+ if ( !l2e_get_intpte(*l2tab) )
+ {
+ if ( !(va & ((1UL << L2_PAGETABLE_SHIFT) - 1)) &&
+ vphysmap_end >= va + (1UL << L2_PAGETABLE_SHIFT) &&
+ (page = alloc_domheap_pages(d,
+ L2_PAGETABLE_SHIFT -
+ PAGE_SHIFT,
+ 0)) != NULL )
+ {
+ *l2tab = l2e_from_page(page,
+ L1_PROT|_PAGE_DIRTY|_PAGE_PSE);
+ va += 1UL << L2_PAGETABLE_SHIFT;
+ continue;
+ }
+ if ( (page = alloc_domheap_page(d, 0)) == NULL )
+ break;
+ else
+ {
+ /* No mapping, PGC_allocated + page-table page. */
+ page->count_info = PGC_allocated | 2;
+ page->u.inuse.type_info =
+ PGT_l1_page_table | PGT_validated | 1;
+ clear_page(page_to_virt(page));
+ *l2tab = l2e_from_page(page, L2_PROT);
+ }
+ }
+ l1tab = page_to_virt(l2e_get_page(*l2tab));
+ l1tab += l1_table_offset(va);
+ BUG_ON(l1e_get_intpte(*l1tab));
+ page = alloc_domheap_page(d, 0);
+ if ( !page )
+ break;
+ *l1tab = l1e_from_page(page, L1_PROT|_PAGE_DIRTY);
+ va += PAGE_SIZE;
+ va &= PAGE_MASK;
+ }
+ if ( !page )
+ panic("Not enough RAM for DOM0 P->M table.\n");
+ }
+#endif
+
/* Write the phys->machine and machine->phys table entries. */
- for ( pfn = 0; pfn < d->tot_pages; pfn++ )
+ for ( pfn = 0; pfn < count; pfn++ )
{
mfn = pfn + alloc_spfn;
#ifndef NDEBUG
((unsigned int *)vphysmap_start)[pfn] = mfn;
set_gpfn_from_mfn(mfn, pfn);
}
+ si->first_p2m_pfn = pfn;
+ si->nr_p2m_frames = d->tot_pages - count;
+ page_list_for_each ( page, &d->page_list )
+ {
+ mfn = page_to_mfn(page);
+ if ( get_gpfn_from_mfn(mfn) >= count )
+ {
+ BUG_ON(is_pv_32bit_domain(d));
+ if ( !page->u.inuse.type_info &&
+ !get_page_and_type(page, d, PGT_writable_page) )
+ BUG();
+ ((unsigned long *)vphysmap_start)[pfn] = mfn;
+ set_gpfn_from_mfn(mfn, pfn);
+ ++pfn;
+#ifndef NDEBUG
+ ++alloc_epfn;
+#endif
+ }
+ }
+ BUG_ON(pfn != d->tot_pages);
while ( pfn < nr_pages )
{
if ( (page = alloc_chunk(d, nr_pages - d->tot_pages)) == NULL )
xlat_start_info(si, XLAT_start_info_console_dom0);
#endif
- /* Reinstate the caller's page tables. */
+ /* Return to idle domain's page tables. */
write_ptbase(current);
- local_irq_enable();
#if defined(__i386__)
/* Destroy low mappings - they were only for our convenience. */
struct domain *d = rcu_lock_domain_by_id(domctl->domain);
unsigned long max_pfns = domctl->u.getmemlist.max_pfns;
uint64_t mfn;
- struct list_head *list_ent;
+ struct page_info *page;
ret = -EINVAL;
if ( d != NULL )
goto getmemlist_out;
}
- ret = 0;
- list_ent = d->page_list.next;
- for ( i = 0; (i < max_pfns) && (list_ent != &d->page_list); i++ )
+ ret = i = 0;
+ page_list_for_each(page, &d->page_list)
{
- mfn = page_to_mfn(list_entry(
- list_ent, struct page_info, list));
+ if ( i >= max_pfns )
+ break;
+ mfn = page_to_mfn(page);
if ( copy_to_guest_offset(domctl->u.getmemlist.buffer,
i, &mfn, 1) )
{
ret = -EFAULT;
break;
}
- list_ent = mfn_to_page(mfn)->list.next;
+ ++i;
}
spin_unlock(&d->page_alloc_lock);
case XEN_DOMCTL_sethvmcontext:
{
- struct hvm_domain_context c;
- struct domain *d;
+ struct hvm_domain_context c = { .size = domctl->u.hvmcontext.size };
+ struct domain *d;
- c.cur = 0;
- c.size = domctl->u.hvmcontext.size;
- c.data = NULL;
-
ret = -ESRCH;
if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
break;
case XEN_DOMCTL_gethvmcontext:
{
- struct hvm_domain_context c;
- struct domain *d;
+ struct hvm_domain_context c = { 0 };
+ struct domain *d;
ret = -ESRCH;
if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
if ( !is_hvm_domain(d) )
goto gethvmcontext_out;
- c.cur = 0;
c.size = hvm_save_size(d);
- c.data = NULL;
if ( guest_handle_is_null(domctl->u.hvmcontext.buffer) )
{
}
break;
+ case XEN_DOMCTL_gethvmcontext_partial:
+ {
+ struct domain *d;
+
+ ret = -ESRCH;
+ if ( (d = rcu_lock_domain_by_id(domctl->domain)) == NULL )
+ break;
+
+ ret = xsm_hvmcontext(d, domctl->cmd);
+ if ( ret )
+ goto gethvmcontext_partial_out;
+
+ ret = -EINVAL;
+ if ( !is_hvm_domain(d) )
+ goto gethvmcontext_partial_out;
+
+ domain_pause(d);
+ ret = hvm_save_one(d, domctl->u.hvmcontext_partial.type,
+ domctl->u.hvmcontext_partial.instance,
+ domctl->u.hvmcontext_partial.buffer);
+ domain_unpause(d);
+
+ gethvmcontext_partial_out:
+ rcu_unlock_domain(d);
+ }
+ break;
+
+
case XEN_DOMCTL_set_address_size:
{
struct domain *d;
break;
}
- domctl->u.address_size.size = BITS_PER_GUEST_LONG(d);
+ domctl->u.address_size.size =
+ is_pv_32on64_domain(d) ? 32 : BITS_PER_LONG;
ret = 0;
rcu_unlock_domain(d);
}
ret = -EINVAL;
- if ( device_assigned(bus, devfn) )
- {
- gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: "
- "%x:%x:%x already assigned, or non-existent\n",
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
- put_domain(d);
- break;
- }
ret = assign_device(d, bus, devfn);
if ( ret )
put_domain(d);
break;
}
-
- if ( !device_assigned(bus, devfn) )
- {
- put_domain(d);
- break;
- }
-
ret = 0;
- deassign_device(d, bus, devfn);
+ spin_lock(&pcidevs_lock);
+ ret = deassign_device(d, bus, devfn);
+ spin_unlock(&pcidevs_lock);
gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = -ESRCH;
if ( iommu_enabled )
+ {
+ spin_lock(&pcidevs_lock);
ret = pt_irq_create_bind_vtd(d, bind);
+ spin_unlock(&pcidevs_lock);
+ }
if ( ret < 0 )
gdprintk(XENLOG_ERR, "pt_irq_create_bind failed!\n");
break;
bind = &(domctl->u.bind_pt_irq);
if ( iommu_enabled )
+ {
+ spin_lock(&pcidevs_lock);
ret = pt_irq_destroy_bind_vtd(d, bind);
+ spin_unlock(&pcidevs_lock);
+ }
if ( ret < 0 )
gdprintk(XENLOG_ERR, "pt_irq_destroy_bind failed!\n");
rcu_unlock_domain(d);
}
break;
+ case XEN_DOMCTL_debug_op:
+ {
+ struct domain *d;
+ struct vcpu *v;
+
+ ret = -ESRCH;
+ d = rcu_lock_domain_by_id(domctl->domain);
+ if ( d == NULL )
+ break;
+
+ ret = -EINVAL;
+ if ( (domctl->u.debug_op.vcpu >= MAX_VIRT_CPUS) ||
+ ((v = d->vcpu[domctl->u.debug_op.vcpu]) == NULL) )
+ goto debug_op_out;
+
+ ret = -EINVAL;
+ if ( !is_hvm_domain(d))
+ goto debug_op_out;
+
+ ret = hvm_debug_op(v, domctl->u.debug_op.op);
+
+ debug_op_out:
+ rcu_unlock_domain(d);
+ }
+ break;
+
default:
ret = -ENOSYS;
break;
if ( is_hvm_vcpu(v) )
{
+ struct segment_register sreg;
memset(c.nat->ctrlreg, 0, sizeof(c.nat->ctrlreg));
c.nat->ctrlreg[0] = v->arch.hvm_vcpu.guest_cr[0];
c.nat->ctrlreg[2] = v->arch.hvm_vcpu.guest_cr[2];
c.nat->ctrlreg[3] = v->arch.hvm_vcpu.guest_cr[3];
c.nat->ctrlreg[4] = v->arch.hvm_vcpu.guest_cr[4];
+ hvm_get_segment_register(v, x86_seg_cs, &sreg);
+ c.nat->user_regs.cs = sreg.sel;
+ hvm_get_segment_register(v, x86_seg_ss, &sreg);
+ c.nat->user_regs.ss = sreg.sel;
+ hvm_get_segment_register(v, x86_seg_ds, &sreg);
+ c.nat->user_regs.ds = sreg.sel;
+ hvm_get_segment_register(v, x86_seg_es, &sreg);
+ c.nat->user_regs.es = sreg.sel;
+ hvm_get_segment_register(v, x86_seg_fs, &sreg);
+ c.nat->user_regs.fs = sreg.sel;
+ hvm_get_segment_register(v, x86_seg_gs, &sreg);
+ c.nat->user_regs.gs = sreg.sel;
}
else
{
#include <xen/config.h>
#include <xen/init.h>
#include <xen/lib.h>
+#include <xen/mm.h>
#include <xen/compat.h>
#include <xen/dmi.h>
#include <asm/e820.h>
-#include <asm/mm.h>
#include <asm/page.h>
/* opt_mem: Limit of physical RAM. Any RAM beyond this point is ignored. */
reserve_dmi_region();
}
-/* Reserve RAM area (@s,@e) in the specified e820 map. */
-int __init reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e)
+int __init e820_change_range_type(
+ struct e820map *e820, uint64_t s, uint64_t e,
+ uint32_t orig_type, uint32_t new_type)
{
uint64_t rs = 0, re = 0;
int i;
break;
}
- if ( (i == e820->nr_map) || (e820->map[i].type != E820_RAM) )
+ if ( (i == e820->nr_map) || (e820->map[i].type != orig_type) )
return 0;
if ( (s == rs) && (e == re) )
{
- /* Complete excision. */
- memmove(&e820->map[i], &e820->map[i+1],
- (e820->nr_map-i-1) * sizeof(e820->map[0]));
- e820->nr_map--;
- }
- else if ( s == rs )
- {
- /* Truncate start. */
- e820->map[i].addr += e - s;
- e820->map[i].size -= e - s;
+ e820->map[i].type = new_type;
}
- else if ( e == re )
+ else if ( (s == rs) || (e == re) )
{
- /* Truncate end. */
- e820->map[i].size -= e - s;
- }
- else if ( e820->nr_map < ARRAY_SIZE(e820->map) )
- {
- /* Split in two. */
+ if ( (e820->nr_map + 1) > ARRAY_SIZE(e820->map) )
+ goto overflow;
+
memmove(&e820->map[i+1], &e820->map[i],
(e820->nr_map-i) * sizeof(e820->map[0]));
e820->nr_map++;
- e820->map[i].size = s - rs;
- i++;
- e820->map[i].addr = e;
- e820->map[i].size = re - e;
- }
- else
- {
- /* e820map is at maximum size. We have to leak some space. */
- if ( (s - rs) > (re - e) )
+
+ if ( s == rs )
{
- printk("e820 overflow: leaking RAM %"PRIx64"-%"PRIx64"\n", e, re);
- e820->map[i].size = s - rs;
+ e820->map[i].size = e - s;
+ e820->map[i].type = new_type;
+ e820->map[i+1].addr = e;
+ e820->map[i+1].size = re - e;
}
else
{
- printk("e820 overflow: leaking RAM %"PRIx64"-%"PRIx64"\n", rs, s);
- e820->map[i].addr = e;
- e820->map[i].size = re - e;
+ e820->map[i].size = s - rs;
+ e820->map[i+1].addr = s;
+ e820->map[i+1].size = e - s;
+ e820->map[i+1].type = new_type;
}
}
+ else if ( e820->nr_map+1 < ARRAY_SIZE(e820->map) )
+ {
+ if ( (e820->nr_map + 2) > ARRAY_SIZE(e820->map) )
+ goto overflow;
+
+ memmove(&e820->map[i+2], &e820->map[i],
+ (e820->nr_map-i) * sizeof(e820->map[0]));
+ e820->nr_map += 2;
+
+ e820->map[i].size = s - rs;
+ e820->map[i+1].addr = s;
+ e820->map[i+1].size = e - s;
+ e820->map[i+1].type = new_type;
+ e820->map[i+2].addr = e;
+ e820->map[i+2].size = re - e;
+ }
+
+ /* Finally, look for any opportunities to merge adjacent e820 entries. */
+ for ( i = 0; i < (e820->nr_map - 1); i++ )
+ {
+ if ( (e820->map[i].type != e820->map[i+1].type) ||
+ ((e820->map[i].addr + e820->map[i].size) != e820->map[i+1].addr) )
+ continue;
+ e820->map[i].size += e820->map[i+1].size;
+ memmove(&e820->map[i+1], &e820->map[i+2],
+ (e820->nr_map-i-2) * sizeof(e820->map[0]));
+ e820->nr_map--;
+ i--;
+ }
return 1;
+
+ overflow:
+ printk("Overflow in e820 while reserving region %"PRIx64"-%"PRIx64"\n",
+ s, e);
+ return 0;
+}
+
+/* Set E820_RAM area (@s,@e) as RESERVED in specified e820 map. */
+int __init reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e)
+{
+ return e820_change_range_type(e820, s, e, E820_RAM, E820_RESERVED);
}
unsigned long __init init_e820(
#include <xen/timer.h>
#include <xen/smp.h>
#include <xen/softirq.h>
+#include <xen/irq.h>
#include <asm/fixmap.h>
#include <asm/div64.h>
#include <asm/hpet.h>
-
-#define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1))
+#include <asm/msi.h>
+#include <mach_apic.h>
#define MAX_DELTA_NS MILLISECS(10*1000)
#define MIN_DELTA_NS MICROSECS(20)
+#define MAX_HPET_NUM 32
+
+#define HPET_EVT_USED_BIT 2
+#define HPET_EVT_USED (1 << HPET_EVT_USED_BIT)
+
struct hpet_event_channel
{
unsigned long mult;
cpumask_t cpumask;
spinlock_t lock;
void (*event_handler)(struct hpet_event_channel *);
-};
-static struct hpet_event_channel hpet_event;
+
+ unsigned int idx; /* physical channel idx */
+ int cpu; /* msi target */
+ unsigned int vector;/* msi vector */
+ unsigned int flags; /* HPET_EVT_x */
+} __cacheline_aligned;
+static struct hpet_event_channel legacy_hpet_event;
+static struct hpet_event_channel hpet_events[MAX_HPET_NUM];
+static unsigned int num_hpets_used; /* msi hpet channels used for broadcast */
+
+DEFINE_PER_CPU(struct hpet_event_channel *, cpu_bc_channel);
+
+static int vector_channel[NR_IRQS] = {[0 ... NR_IRQS-1] = -1};
+
+#define vector_to_channel(vector) vector_channel[vector]
unsigned long hpet_address;
+void msi_compose_msg(struct pci_dev *pdev, int vector, struct msi_msg *msg);
+
+/* force_hpet_broadcast: if true, force using hpet_broadcast to fix lapic stop
+ issue for deep C state with pit disabled */
+int force_hpet_broadcast;
+boolean_param("hpetbroadcast", force_hpet_broadcast);
+
/*
* Calculate a multiplication factor for scaled math, which is used to convert
* nanoseconds based values to clock ticks:
return (unsigned long) tmp;
}
-static int hpet_legacy_next_event(unsigned long delta)
+static int hpet_next_event(unsigned long delta, int timer)
{
uint32_t cnt, cmp;
unsigned long flags;
local_irq_save(flags);
cnt = hpet_read32(HPET_COUNTER);
cmp = cnt + delta;
- hpet_write32(cmp, HPET_T0_CMP);
+ hpet_write32(cmp, HPET_Tn_CMP(timer));
cmp = hpet_read32(HPET_COUNTER);
local_irq_restore(flags);
if ( expire == STIME_MAX )
{
/* We assume it will take a long time for the timer to wrap. */
- hpet_write32(0, HPET_T0_CMP);
+ hpet_write32(0, HPET_Tn_CMP(ch->idx));
return 0;
}
delta = max_t(int64_t, delta, MIN_DELTA_NS);
delta = ns2ticks(delta, ch->shift, ch->mult);
- ret = hpet_legacy_next_event(delta);
+ ret = hpet_next_event(delta, ch->idx);
while ( ret && force )
{
delta += delta;
- ret = hpet_legacy_next_event(delta);
+ ret = hpet_next_event(delta, ch->idx);
}
return ret;
s_time_t now, next_event;
int cpu;
- spin_lock(&ch->lock);
+ spin_lock_irq(&ch->lock);
again:
ch->next_event = STIME_MAX;
if ( reprogram_hpet_evt_channel(ch, next_event, now, 0) )
goto again;
}
- spin_unlock(&ch->lock);
+ spin_unlock_irq(&ch->lock);
+}
+
+static void hpet_interrupt_handler(int vector, void *data,
+ struct cpu_user_regs *regs)
+{
+ struct hpet_event_channel *ch = (struct hpet_event_channel *)data;
+ if ( !ch->event_handler )
+ {
+ printk(XENLOG_WARNING "Spurious HPET timer interrupt on HPET timer %d\n", ch->idx);
+ return;
+ }
+
+ ch->event_handler(ch);
+}
+
+static void hpet_msi_unmask(unsigned int vector)
+{
+ unsigned long cfg;
+ int ch_idx = vector_to_channel(vector);
+ struct hpet_event_channel *ch;
+
+ BUG_ON(ch_idx < 0);
+ ch = &hpet_events[ch_idx];
+
+ cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
+ cfg |= HPET_TN_FSB;
+ hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
+}
+
+static void hpet_msi_mask(unsigned int vector)
+{
+ unsigned long cfg;
+ int ch_idx = vector_to_channel(vector);
+ struct hpet_event_channel *ch;
+
+ BUG_ON(ch_idx < 0);
+ ch = &hpet_events[ch_idx];
+
+ cfg = hpet_read32(HPET_Tn_CFG(ch->idx));
+ cfg &= ~HPET_TN_FSB;
+ hpet_write32(cfg, HPET_Tn_CFG(ch->idx));
+}
+
+static void hpet_msi_write(unsigned int vector, struct msi_msg *msg)
+{
+ int ch_idx = vector_to_channel(vector);
+ struct hpet_event_channel *ch;
+
+ BUG_ON(ch_idx < 0);
+ ch = &hpet_events[ch_idx];
+
+ hpet_write32(msg->data, HPET_Tn_ROUTE(ch->idx));
+ hpet_write32(msg->address_lo, HPET_Tn_ROUTE(ch->idx) + 4);
+}
+
+static void hpet_msi_read(unsigned int vector, struct msi_msg *msg)
+{
+ int ch_idx = vector_to_channel(vector);
+ struct hpet_event_channel *ch;
+
+ BUG_ON(ch_idx < 0);
+ ch = &hpet_events[ch_idx];
+
+ msg->data = hpet_read32(HPET_Tn_ROUTE(ch->idx));
+ msg->address_lo = hpet_read32(HPET_Tn_ROUTE(ch->idx) + 4);
+ msg->address_hi = 0;
+}
+
+static unsigned int hpet_msi_startup(unsigned int vector)
+{
+ hpet_msi_unmask(vector);
+ return 0;
+}
+
+static void hpet_msi_shutdown(unsigned int vector)
+{
+ hpet_msi_mask(vector);
+}
+
+static void hpet_msi_ack(unsigned int vector)
+{
+ ack_APIC_irq();
+}
+
+static void hpet_msi_end(unsigned int vector)
+{
}
+static void hpet_msi_set_affinity(unsigned int vector, cpumask_t mask)
+{
+ struct msi_msg msg;
+ unsigned int dest;
+ cpumask_t tmp;
+
+ cpus_and(tmp, mask, cpu_online_map);
+ if ( cpus_empty(tmp) )
+ mask = TARGET_CPUS;
+
+ dest = cpu_mask_to_apicid(mask);
+
+ hpet_msi_read(vector, &msg);
+
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(vector);
+ msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
+ msg.address_lo |= MSI_ADDR_DEST_ID(dest);
+
+ hpet_msi_write(vector, &msg);
+ irq_desc[vector].affinity = mask;
+}
+
+/*
+ * IRQ Chip for MSI HPET Devices,
+ */
+static struct hw_interrupt_type hpet_msi_type = {
+ .typename = "HPET-MSI",
+ .startup = hpet_msi_startup,
+ .shutdown = hpet_msi_shutdown,
+ .enable = hpet_msi_unmask,
+ .disable = hpet_msi_mask,
+ .ack = hpet_msi_ack,
+ .end = hpet_msi_end,
+ .set_affinity = hpet_msi_set_affinity,
+};
+
+static int hpet_setup_msi_irq(unsigned int vector)
+{
+ int ret;
+ struct msi_msg msg;
+ struct hpet_event_channel *ch = &hpet_events[vector_to_channel(vector)];
+
+ irq_desc[vector].handler = &hpet_msi_type;
+ ret = request_irq_vector(vector, hpet_interrupt_handler,
+ 0, "HPET", ch);
+ if ( ret < 0 )
+ return ret;
+
+ msi_compose_msg(NULL, vector, &msg);
+ hpet_msi_write(vector, &msg);
+
+ return 0;
+}
+
+static int hpet_assign_irq(struct hpet_event_channel *ch)
+{
+ unsigned int vector;
+
+ vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
+ if ( !vector )
+ return -EINVAL;
+
+ irq_vector[vector] = vector;
+ vector_irq[vector] = vector;
+ vector_channel[vector] = ch - &hpet_events[0];
+
+ if ( hpet_setup_msi_irq(vector) )
+ {
+ irq_vector[vector] = 0;
+ vector_irq[vector] = FREE_TO_ASSIGN_IRQ;
+ vector_channel[vector] = -1;
+ return -EINVAL;
+ }
+
+ ch->vector = vector;
+ return 0;
+}
+
+static int hpet_fsb_cap_lookup(void)
+{
+ unsigned int id;
+ unsigned int num_chs, num_chs_used;
+ int i;
+
+ id = hpet_read32(HPET_ID);
+
+ num_chs = ((id & HPET_ID_NUMBER) >> HPET_ID_NUMBER_SHIFT);
+ num_chs++; /* Value read out starts from 0 */
+
+ num_chs_used = 0;
+ for ( i = 0; i < num_chs; i++ )
+ {
+ struct hpet_event_channel *ch = &hpet_events[num_chs_used];
+ unsigned long cfg = hpet_read32(HPET_Tn_CFG(i));
+
+ /* Only consider HPET timer with MSI support */
+ if ( !(cfg & HPET_TN_FSB_CAP) )
+ continue;
+
+ ch->flags = 0;
+ ch->idx = i;
+
+ if ( hpet_assign_irq(ch) )
+ continue;
+
+ /* set default irq affinity */
+ ch->cpu = num_chs_used;
+ per_cpu(cpu_bc_channel, ch->cpu) = ch;
+ irq_desc[ch->vector].handler->
+ set_affinity(ch->vector, cpumask_of_cpu(ch->cpu));
+
+ num_chs_used++;
+
+ if ( num_chs_used == num_possible_cpus() )
+ break;
+ }
+
+ printk(XENLOG_INFO
+ "HPET: %d timers in total, %d timers will be used for broadcast\n",
+ num_chs, num_chs_used);
+
+ return num_chs_used;
+}
+
+static int next_channel;
+static spinlock_t next_lock = SPIN_LOCK_UNLOCKED;
+
+static struct hpet_event_channel *hpet_get_channel(int cpu)
+{
+ int i;
+ int next;
+ struct hpet_event_channel *ch;
+
+ spin_lock(&next_lock);
+ next = next_channel = (next_channel + 1) % num_hpets_used;
+ spin_unlock(&next_lock);
+
+ /* try unused channel first */
+ for ( i = next; i < next + num_hpets_used; i++ )
+ {
+ ch = &hpet_events[i % num_hpets_used];
+ if ( !test_and_set_bit(HPET_EVT_USED_BIT, &ch->flags) )
+ {
+ ch->cpu = cpu;
+ return ch;
+ }
+ }
+
+ /* share a in-use channel */
+ ch = &hpet_events[next];
+ if ( !test_and_set_bit(HPET_EVT_USED_BIT, &ch->flags) )
+ ch->cpu = cpu;
+
+ return ch;
+}
+
+static void hpet_attach_channel_share(int cpu, struct hpet_event_channel *ch)
+{
+ per_cpu(cpu_bc_channel, cpu) = ch;
+
+ /* try to be the channel owner again while holding the lock */
+ if ( !test_and_set_bit(HPET_EVT_USED_BIT, &ch->flags) )
+ ch->cpu = cpu;
+
+ if ( ch->cpu != cpu )
+ return;
+
+ /* set irq affinity */
+ irq_desc[ch->vector].handler->
+ set_affinity(ch->vector, cpumask_of_cpu(ch->cpu));
+}
+
+static void hpet_detach_channel_share(int cpu)
+{
+ struct hpet_event_channel *ch = per_cpu(cpu_bc_channel, cpu);
+
+ per_cpu(cpu_bc_channel, cpu) = NULL;
+
+ if ( cpu != ch->cpu )
+ return;
+
+ if ( cpus_empty(ch->cpumask) )
+ {
+ ch->cpu = -1;
+ clear_bit(HPET_EVT_USED_BIT, &ch->flags);
+ return;
+ }
+
+ ch->cpu = first_cpu(ch->cpumask);
+ /* set irq affinity */
+ irq_desc[ch->vector].handler->
+ set_affinity(ch->vector, cpumask_of_cpu(ch->cpu));
+}
+
+static void (*hpet_attach_channel)(int cpu, struct hpet_event_channel *ch);
+static void (*hpet_detach_channel)(int cpu);
+
void hpet_broadcast_init(void)
{
u64 hpet_rate;
u32 hpet_id, cfg;
+ int i;
hpet_rate = hpet_setup();
if ( hpet_rate == 0 )
return;
+ num_hpets_used = hpet_fsb_cap_lookup();
+ if ( num_hpets_used > 0 )
+ {
+ /* Stop HPET legacy interrupts */
+ cfg = hpet_read32(HPET_CFG);
+ cfg &= ~HPET_CFG_LEGACY;
+ hpet_write32(cfg, HPET_CFG);
+
+ for ( i = 0; i < num_hpets_used; i++ )
+ {
+ /* set HPET Tn as oneshot */
+ cfg = hpet_read32(HPET_Tn_CFG(hpet_events[i].idx));
+ cfg &= ~HPET_TN_PERIODIC;
+ cfg |= HPET_TN_ENABLE | HPET_TN_32BIT;
+ hpet_write32(cfg, HPET_Tn_CFG(hpet_events[i].idx));
+
+ hpet_events[i].mult = div_sc((unsigned long)hpet_rate,
+ 1000000000ul, 32);
+ hpet_events[i].shift = 32;
+ hpet_events[i].next_event = STIME_MAX;
+ hpet_events[i].event_handler = handle_hpet_broadcast;
+ spin_lock_init(&hpet_events[i].lock);
+ }
+
+ if ( num_hpets_used < num_possible_cpus() )
+ {
+ hpet_attach_channel = hpet_attach_channel_share;
+ hpet_detach_channel = hpet_detach_channel_share;
+ }
+
+ return;
+ }
+
hpet_id = hpet_read32(HPET_ID);
- if ( !(hpet_id & HPET_ID_LEGSUP) )
+ if ( !(hpet_id & HPET_ID_LEGSUP) || !force_hpet_broadcast )
return;
/* Start HPET legacy interrupts */
* The period is a femto seconds value. We need to calculate the scaled
* math multiplication factor for nanosecond to hpet tick conversion.
*/
- hpet_event.mult = div_sc((unsigned long)hpet_rate, 1000000000ul, 32);
- hpet_event.shift = 32;
- hpet_event.next_event = STIME_MAX;
- hpet_event.event_handler = handle_hpet_broadcast;
- spin_lock_init(&hpet_event.lock);
+ legacy_hpet_event.mult = div_sc((unsigned long)hpet_rate, 1000000000ul, 32);
+ legacy_hpet_event.shift = 32;
+ legacy_hpet_event.next_event = STIME_MAX;
+ legacy_hpet_event.event_handler = handle_hpet_broadcast;
+ legacy_hpet_event.idx = 0;
+ legacy_hpet_event.flags = 0;
+ spin_lock_init(&legacy_hpet_event.lock);
+
+ for_each_cpu(i)
+ per_cpu(cpu_bc_channel, i) = &legacy_hpet_event;
}
void hpet_broadcast_enter(void)
{
- struct hpet_event_channel *ch = &hpet_event;
+ int cpu = smp_processor_id();
+ struct hpet_event_channel *ch = per_cpu(cpu_bc_channel, cpu);
+ if ( !ch )
+ ch = hpet_get_channel(cpu);
+ BUG_ON( !ch );
+
+ ASSERT(!local_irq_is_enabled());
spin_lock(&ch->lock);
+ if ( hpet_attach_channel )
+ hpet_attach_channel(cpu, ch);
+
disable_APIC_timer();
- cpu_set(smp_processor_id(), ch->cpumask);
+ cpu_set(cpu, ch->cpumask);
/* reprogram if current cpu expire time is nearer */
if ( this_cpu(timer_deadline) < ch->next_event )
void hpet_broadcast_exit(void)
{
- struct hpet_event_channel *ch = &hpet_event;
int cpu = smp_processor_id();
+ struct hpet_event_channel *ch = per_cpu(cpu_bc_channel, cpu);
+
+ BUG_ON( !ch );
spin_lock_irq(&ch->lock);
reprogram_hpet_evt_channel(ch, STIME_MAX, 0, 0);
}
+ if ( hpet_detach_channel )
+ hpet_detach_channel(cpu);
+
spin_unlock_irq(&ch->lock);
}
int hpet_broadcast_is_available(void)
{
- return (hpet_event.event_handler == handle_hpet_broadcast);
+ return (legacy_hpet_event.event_handler == handle_hpet_broadcast
+ || num_hpets_used > 0);
}
int hpet_legacy_irq_tick(void)
{
- if ( !hpet_event.event_handler )
+ if ( !legacy_hpet_event.event_handler )
return 0;
- hpet_event.event_handler(&hpet_event);
+ legacy_hpet_event.event_handler(&legacy_hpet_event);
return 1;
}
u64 hpet_setup(void)
{
static u64 hpet_rate;
- static int initialised;
+ static u32 system_reset_latch;
u32 hpet_id, hpet_period, cfg;
int i;
- if ( initialised )
+ if ( system_reset_latch == system_reset_counter )
return hpet_rate;
- initialised = 1;
+ system_reset_latch = system_reset_counter;
if ( hpet_address == 0 )
return 0;
set_fixmap_nocache(FIX_HPET_BASE, hpet_address);
hpet_id = hpet_read32(HPET_ID);
- if ( hpet_id == 0 )
+ if ( (hpet_id & HPET_ID_REV) == 0 )
{
- printk("BAD HPET vendor id.\n");
+ printk("BAD HPET revision id.\n");
return 0;
}
for ( i = 0; i <= ((hpet_id >> 8) & 31); i++ )
{
- cfg = hpet_read32(HPET_T0_CFG + i*0x20);
+ cfg = hpet_read32(HPET_Tn_CFG(i));
cfg &= ~HPET_TN_ENABLE;
- hpet_write32(cfg & ~HPET_TN_ENABLE, HPET_T0_CFG);
+ hpet_write32(cfg, HPET_Tn_CFG(i));
}
cfg = hpet_read32(HPET_CFG);
#include <xen/lib.h>
#include <xen/sched.h>
#include <xen/paging.h>
+#include <xen/trace.h>
#include <asm/event.h>
#include <asm/hvm/emulate.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/support.h>
+#define HVMTRACE_IO_ASSIST_WRITE 0x200
+static void hvmtrace_io_assist(int is_mmio, ioreq_t *p)
+{
+ unsigned int size, event;
+ unsigned char buffer[12];
+
+ if ( likely(!tb_init_done) )
+ return;
+
+ event = is_mmio ? TRC_HVM_MMIO_ASSIST : TRC_HVM_IO_ASSIST;
+ if ( !p->dir )
+ event |= HVMTRACE_IO_ASSIST_WRITE;
+
+ *(uint64_t *)buffer = p->addr;
+ size = (p->addr != (u32)p->addr) ? 8 : 4;
+ if ( size == 8 )
+ event |= TRC_64_FLAG;
+
+ if ( !p->data_is_ptr )
+ {
+ *(uint32_t *)&buffer[size] = p->data;
+ size += 4;
+ }
+
+ trace_var(event, 0/*!cycles*/, size, buffer);
+}
+
static int hvmemul_do_io(
int is_mmio, paddr_t addr, unsigned long *reps, int size,
paddr_t ram_gpa, int dir, int df, void *p_data)
p->data = value;
p->io_count++;
+ hvmtrace_io_assist(is_mmio, p);
+
if ( is_mmio )
{
rc = hvm_mmio_intercept(p);
if ( (rc = hvm_msr_read_intercept(&_regs)) != 0 )
return rc;
- *val = ((uint64_t)(uint32_t)_regs.edx << 32) || (uint32_t)_regs.eax;
+ *val = ((uint64_t)(uint32_t)_regs.edx << 32) | (uint32_t)_regs.eax;
return X86EMUL_OKAY;
}
~0ULL : (tick) * (h)->hpet_to_ns_scale) >> 10))
#define timer_config(h, n) (h->hpet.timers[n].config)
+#define timer_enabled(h, n) (timer_config(h, n) & HPET_TN_ENABLE)
#define timer_is_periodic(h, n) (timer_config(h, n) & HPET_TN_PERIODIC)
#define timer_is_32bit(h, n) (timer_config(h, n) & HPET_TN_32BIT)
#define hpet_enabled(h) (h->hpet.config & HPET_CFG_ENABLE)
((timer_config(h, n) & HPET_TN_INT_ROUTE_CAP_MASK) \
>> HPET_TN_INT_ROUTE_CAP_SHIFT)
-#define hpet_time_after(a, b) ((int32_t)(b) - (int32_t)(a) < 0)
-#define hpet_time_after64(a, b) ((int64_t)(b) - (int64_t)(a) < 0)
+static inline uint64_t hpet_read_maincounter(HPETState *h)
+{
+ ASSERT(spin_is_locked(&h->lock));
+
+ if ( hpet_enabled(h) )
+ return guest_time_hpet(h->vcpu) + h->mc_offset;
+ else
+ return h->hpet.mc64;
+}
+static uint64_t hpet_get_comparator(HPETState *h, unsigned int tn)
+{
+ uint64_t comparator;
+ uint64_t elapsed;
+
+ comparator = h->hpet.comparator64[tn];
+ if ( timer_is_periodic(h, tn) )
+ {
+ /* update comparator by number of periods elapsed since last update */
+ uint64_t period = h->hpet.period[tn];
+ if (period)
+ {
+ elapsed = hpet_read_maincounter(h) + period - 1 - comparator;
+ comparator += (elapsed / period) * period;
+ h->hpet.comparator64[tn] = comparator;
+ }
+ }
+
+ /* truncate if timer is in 32 bit mode */
+ if ( timer_is_32bit(h, tn) )
+ comparator = (uint32_t)comparator;
+ h->hpet.timers[tn].cmp = comparator;
+ return comparator;
+}
static inline uint64_t hpet_read64(HPETState *h, unsigned long addr)
{
addr &= ~7;
case HPET_STATUS:
return h->hpet.isr;
case HPET_COUNTER:
- return h->hpet.mc64;
+ return hpet_read_maincounter(h);
case HPET_T0_CFG:
case HPET_T1_CFG:
case HPET_T2_CFG:
case HPET_T0_CMP:
case HPET_T1_CMP:
case HPET_T2_CMP:
- return h->hpet.timers[(addr - HPET_T0_CMP) >> 5].cmp;
+ return hpet_get_comparator(h, (addr - HPET_T0_CMP) >> 5);
case HPET_T0_ROUTE:
case HPET_T1_ROUTE:
case HPET_T2_ROUTE:
return 0;
}
-static inline uint64_t hpet_read_maincounter(HPETState *h)
-{
- ASSERT(spin_is_locked(&h->lock));
-
- if ( hpet_enabled(h) )
- return guest_time_hpet(h->vcpu) + h->mc_offset;
- else
- return h->hpet.mc64;
-}
-
static int hpet_read(
struct vcpu *v, unsigned long addr, unsigned long length,
unsigned long *pval)
spin_lock(&h->lock);
val = hpet_read64(h, addr);
- if ( (addr & ~7) == HPET_COUNTER )
- val = hpet_read_maincounter(h);
result = val;
if ( length != 8 )
{
ASSERT(tn < HPET_TIMER_NUM);
ASSERT(spin_is_locked(&h->lock));
- stop_timer(&h->timers[tn]);
+ destroy_periodic_time(&h->pt[tn]);
+ /* read the comparator to get it updated so a read while stopped will
+ * return the expected value. */
+ hpet_get_comparator(h, tn);
}
/* the number of HPET tick that stands for
static void hpet_set_timer(HPETState *h, unsigned int tn)
{
uint64_t tn_cmp, cur_tick, diff;
+ unsigned int irq;
+ unsigned int oneshot;
ASSERT(tn < HPET_TIMER_NUM);
ASSERT(spin_is_locked(&h->lock));
pit_stop_channel0_irq(pit);
}
- tn_cmp = h->hpet.timers[tn].cmp;
+ if ( !timer_enabled(h, tn) )
+ return;
+
+ tn_cmp = hpet_get_comparator(h, tn);
cur_tick = hpet_read_maincounter(h);
if ( timer_is_32bit(h, tn) )
{
diff = (timer_is_32bit(h, tn) && (-diff > HPET_TINY_TIME_SPAN))
? (uint32_t)diff : 0;
- set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, diff));
+ if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) )
+ /* if LegacyReplacementRoute bit is set, HPET specification requires
+ timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
+ timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
+ irq = (tn == 0) ? 0 : 8;
+ else
+ irq = timer_int_route(h, tn);
+
+ /*
+ * diff is the time from now when the timer should fire, for a periodic
+ * timer we also need the period which may be different because time may
+ * have elapsed between the time the comparator was written and the timer
+ * being enabled (now).
+ */
+ oneshot = !timer_is_periodic(h, tn);
+ create_periodic_time(h->vcpu, &h->pt[tn],
+ hpet_tick_to_ns(h, diff),
+ oneshot ? 0 : hpet_tick_to_ns(h, h->hpet.period[tn]),
+ irq, NULL, NULL);
}
static inline uint64_t hpet_fixup_reg(
uint64_t old_val, new_val;
int tn, i;
+ /* Acculumate a bit mask of timers whos state is changed by this write. */
+ unsigned long start_timers = 0;
+ unsigned long stop_timers = 0;
+#define set_stop_timer(n) (__set_bit((n), &stop_timers))
+#define set_start_timer(n) (__set_bit((n), &start_timers))
+#define set_restart_timer(n) (set_stop_timer(n),set_start_timer(n))
+
addr &= HPET_MMAP_SIZE-1;
if ( hpet_check_access_length(addr, length) != 0 )
spin_lock(&h->lock);
old_val = hpet_read64(h, addr);
- if ( (addr & ~7) == HPET_COUNTER )
- old_val = hpet_read_maincounter(h);
-
new_val = val;
if ( length != 8 )
new_val = hpet_fixup_reg(
/* Enable main counter and interrupt generation. */
h->mc_offset = h->hpet.mc64 - guest_time_hpet(h->vcpu);
for ( i = 0; i < HPET_TIMER_NUM; i++ )
- hpet_set_timer(h, i);
+ {
+ h->hpet.comparator64[i] =
+ h->hpet.timers[i].config & HPET_TN_32BIT ?
+ (uint32_t)h->hpet.timers[i].cmp :
+ h->hpet.timers[i].cmp;
+ if ( timer_enabled(h, i) )
+ set_start_timer(i);
+ }
}
else if ( (old_val & HPET_CFG_ENABLE) && !(new_val & HPET_CFG_ENABLE) )
{
/* Halt main counter and disable interrupt generation. */
h->hpet.mc64 = h->mc_offset + guest_time_hpet(h->vcpu);
for ( i = 0; i < HPET_TIMER_NUM; i++ )
- hpet_stop_timer(h, i);
+ if ( timer_enabled(h, i) )
+ set_stop_timer(i);
}
break;
case HPET_COUNTER:
+ h->hpet.mc64 = new_val;
if ( hpet_enabled(h) )
+ {
gdprintk(XENLOG_WARNING,
"HPET: writing main counter but it's not halted!\n");
- h->hpet.mc64 = new_val;
+ for ( i = 0; i < HPET_TIMER_NUM; i++ )
+ if ( timer_enabled(h, i) )
+ set_restart_timer(i);
+ }
break;
case HPET_T0_CFG:
h->hpet.timers[tn].cmp = (uint32_t)h->hpet.timers[tn].cmp;
h->hpet.period[tn] = (uint32_t)h->hpet.period[tn];
}
-
+ if ( hpet_enabled(h) )
+ {
+ if ( new_val & HPET_TN_ENABLE )
+ {
+ if ( (new_val ^ old_val) & HPET_TN_PERIODIC )
+ /* timer is enabled but switching mode to/from periodic/
+ * one-shot, stop and restart the vpt timer to get it in
+ * the right mode. */
+ set_restart_timer(tn);
+ else if ( (new_val & HPET_TN_32BIT) &&
+ !(old_val & HPET_TN_32BIT) )
+ /* switching from 64 bit to 32 bit mode could cause timer
+ * next fire time, or period, to change. */
+ set_restart_timer(tn);
+ else if ( !(old_val & HPET_TN_ENABLE) )
+ /* transition from timer disabled to timer enabled. */
+ set_start_timer(tn);
+ }
+ else if ( old_val & HPET_TN_ENABLE )
+ /* transition from timer enabled to timer disabled. */
+ set_stop_timer(tn);
+ }
break;
case HPET_T0_CMP:
tn = (addr - HPET_T0_CMP) >> 5;
if ( timer_is_32bit(h, tn) )
new_val = (uint32_t)new_val;
- if ( !timer_is_periodic(h, tn) ||
- (h->hpet.timers[tn].config & HPET_TN_SETVAL) )
- h->hpet.timers[tn].cmp = new_val;
- else
+ h->hpet.timers[tn].cmp = new_val;
+ if ( h->hpet.timers[tn].config & HPET_TN_SETVAL )
+ /*
+ * When SETVAL is one, software is able to "directly set a periodic
+ * timer's accumulator." That is, set the comparator without
+ * adjusting the period. Much the same as just setting the
+ * comparator on an enabled one-shot timer.
+ *
+ * This configuration bit clears when the comparator is written.
+ */
+ h->hpet.timers[tn].config &= ~HPET_TN_SETVAL;
+ else if ( timer_is_periodic(h, tn) )
{
/*
* Clamp period to reasonable min/max values:
- * - minimum is 900us, same as timers controlled by vpt.c
+ * - minimum is 100us, same as timers controlled by vpt.c
* - maximum is to prevent overflow in time_after() calculations
*/
- if ( hpet_tick_to_ns(h, new_val) < MICROSECS(900) )
- new_val = (MICROSECS(900) << 10) / h->hpet_to_ns_scale;
+ if ( hpet_tick_to_ns(h, new_val) < MICROSECS(100) )
+ new_val = (MICROSECS(100) << 10) / h->hpet_to_ns_scale;
new_val &= (timer_is_32bit(h, tn) ? ~0u : ~0ull) >> 1;
h->hpet.period[tn] = new_val;
}
- h->hpet.timers[tn].config &= ~HPET_TN_SETVAL;
- if ( hpet_enabled(h) )
- hpet_set_timer(h, tn);
+ h->hpet.comparator64[tn] = new_val;
+ if ( hpet_enabled(h) && timer_enabled(h, tn) )
+ set_restart_timer(tn);
break;
case HPET_T0_ROUTE:
break;
}
+ /* stop/start timers whos state was changed by this write. */
+ while (stop_timers)
+ {
+ i = find_first_set_bit(stop_timers);
+ __clear_bit(i, &stop_timers);
+ hpet_stop_timer(h, i);
+ }
+
+ while (start_timers)
+ {
+ i = find_first_set_bit(start_timers);
+ __clear_bit(i, &start_timers);
+ hpet_set_timer(h, i);
+ }
+
+#undef set_stop_timer
+#undef set_start_timer
+#undef set_restart_timer
+
spin_unlock(&h->lock);
out:
.write_handler = hpet_write
};
-static void hpet_route_interrupt(HPETState *h, unsigned int tn)
-{
- unsigned int tn_int_route = timer_int_route(h, tn);
- struct domain *d = h->vcpu->domain;
-
- ASSERT(spin_is_locked(&h->lock));
-
- if ( (tn <= 1) && (h->hpet.config & HPET_CFG_LEGACY) )
- {
- /* if LegacyReplacementRoute bit is set, HPET specification requires
- timer0 be routed to IRQ0 in NON-APIC or IRQ2 in the I/O APIC,
- timer1 be routed to IRQ8 in NON-APIC or IRQ8 in the I/O APIC. */
- int isa_irq = (tn == 0) ? 0 : 8;
- hvm_isa_irq_deassert(d, isa_irq);
- hvm_isa_irq_assert(d, isa_irq);
- return;
- }
-
- if ( !(timer_int_route_cap(h, tn) & (1U << tn_int_route)) )
- {
- gdprintk(XENLOG_ERR,
- "HPET: timer%u: invalid interrupt route config\n", tn);
- domain_crash(d);
- return;
- }
-
- /* We support only edge-triggered interrupt. */
- spin_lock(&d->arch.hvm_domain.irq_lock);
- vioapic_irq_positive_edge(d, tn_int_route);
- spin_unlock(&d->arch.hvm_domain.irq_lock);
-}
-
-static void hpet_timer_fn(void *opaque)
-{
- struct HPET_timer_fn_info *htfi = opaque;
- HPETState *h = htfi->hs;
- unsigned int tn = htfi->tn;
-
- spin_lock(&h->lock);
-
- if ( !hpet_enabled(h) )
- {
- spin_unlock(&h->lock);
- return;
- }
-
- if ( timer_config(h, tn) & HPET_TN_ENABLE )
- hpet_route_interrupt(h, tn);
-
- if ( timer_is_periodic(h, tn) && (h->hpet.period[tn] != 0) )
- {
- uint64_t mc = hpet_read_maincounter(h), period = h->hpet.period[tn];
- if ( timer_is_32bit(h, tn) )
- {
- while ( hpet_time_after(mc, h->hpet.timers[tn].cmp) )
- h->hpet.timers[tn].cmp = (uint32_t)(
- h->hpet.timers[tn].cmp + period);
- }
- else
- {
- while ( hpet_time_after64(mc, h->hpet.timers[tn].cmp) )
- h->hpet.timers[tn].cmp += period;
- }
- set_timer(&h->timers[tn], NOW() + hpet_tick_to_ns(h, period));
- }
-
- spin_unlock(&h->lock);
-}
-
-void hpet_migrate_timers(struct vcpu *v)
-{
- struct HPETState *h = &v->domain->arch.hvm_domain.pl_time.vhpet;
- int i;
-
- if ( v != h->vcpu )
- return;
-
- for ( i = 0; i < HPET_TIMER_NUM; i++ )
- migrate_timer(&h->timers[i], v->processor);
-}
static int hpet_save(struct domain *d, hvm_domain_context_t *h)
{
C(isr);
C(mc64);
C(timers[0].config);
- C(timers[0].cmp);
C(timers[0].fsb);
C(timers[1].config);
- C(timers[1].cmp);
C(timers[1].fsb);
C(timers[2].config);
- C(timers[2].cmp);
C(timers[2].fsb);
C(period[0]);
C(period[1]);
C(period[2]);
#undef C
+ /* save the 64 bit comparator in the 64 bit timer[n].cmp field
+ * regardless of whether or not the timer is in 32 bit mode. */
+ rec->timers[0].cmp = hp->hpet.comparator64[0];
+ rec->timers[1].cmp = hp->hpet.comparator64[1];
+ rec->timers[2].cmp = hp->hpet.comparator64[2];
}
spin_unlock(&hp->lock);
{
HPETState *hp = &d->arch.hvm_domain.pl_time.vhpet;
struct hvm_hw_hpet *rec;
+ uint64_t cmp;
int i;
spin_lock(&hp->lock);
h->cur += HVM_SAVE_LENGTH(HPET);
#define C(x) hp->hpet.x = rec->x
- C(capability);
- C(config);
- C(isr);
- C(mc64);
- C(timers[0].config);
- C(timers[0].cmp);
- C(timers[0].fsb);
- C(timers[1].config);
- C(timers[1].cmp);
- C(timers[1].fsb);
- C(timers[2].config);
- C(timers[2].cmp);
- C(timers[2].fsb);
- C(period[0]);
- C(period[1]);
- C(period[2]);
+ C(capability);
+ C(config);
+ C(isr);
+ C(mc64);
+ /* The following define will generate a compiler error if HPET_TIMER_NUM
+ * changes. This indicates an incompatability with previous saved state. */
+#define HPET_TIMER_NUM 3
+ for ( i = 0; i < HPET_TIMER_NUM; i++ )
+ {
+ C(timers[i].config);
+ C(timers[i].fsb);
+ C(period[i]);
+ /* restore the hidden 64 bit comparator and truncate the timer's
+ * visible comparator field if in 32 bit mode. */
+ cmp = rec->timers[i].cmp;
+ hp->hpet.comparator64[i] = cmp;
+ if ( timer_is_32bit(hp, i) )
+ cmp = (uint32_t)cmp;
+ hp->hpet.timers[i].cmp = cmp;
+ }
#undef C
/* Recalculate the offset between the main counter and guest time */
hp->mc_offset = hp->hpet.mc64 - guest_time_hpet(hp->vcpu);
-
- /* Restart the timers */
- for ( i = 0; i < HPET_TIMER_NUM; i++ )
- if ( hpet_enabled(hp) )
- hpet_set_timer(hp, i);
+ /* restart all timers */
+
+ if ( hpet_enabled(hp) )
+ for ( i = 0; i < HPET_TIMER_NUM; i++ )
+ if ( timer_enabled(hp, i) )
+ hpet_set_timer(hp, i);
+
spin_unlock(&hp->lock);
return 0;
h->hpet.timers[i].config =
HPET_TN_INT_ROUTE_CAP | HPET_TN_SIZE_CAP | HPET_TN_PERIODIC_CAP;
h->hpet.timers[i].cmp = ~0ULL;
- h->timer_fn_info[i].hs = h;
- h->timer_fn_info[i].tn = i;
- init_timer(&h->timers[i], hpet_timer_fn, &h->timer_fn_info[i],
- v->processor);
+ h->pt[i].source = PTSRC_isa;
}
}
int i;
HPETState *h = &d->arch.hvm_domain.pl_time.vhpet;
- for ( i = 0; i < HPET_TIMER_NUM; i++ )
- kill_timer(&h->timers[i]);
+ spin_lock(&h->lock);
+
+ if ( hpet_enabled(h) )
+ for ( i = 0; i < HPET_TIMER_NUM; i++ )
+ if ( timer_enabled(h, i) )
+ hpet_stop_timer(h, i);
+
+ spin_unlock(&h->lock);
}
void hpet_reset(struct domain *d)
*/
#include <xen/config.h>
+#include <xen/ctype.h>
#include <xen/init.h>
#include <xen/lib.h>
#include <xen/trace.h>
printk("HVM: %s enabled\n", fns->name);
/*
- * Allow direct access to the PC debug port (it is often used for I/O
- * delays, but the vmexits simply slow things down).
+ * Allow direct access to the PC debug ports 0x80 and 0xed (they are
+ * often used for I/O delays, but the vmexits simply slow things down).
*/
memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
__clear_bit(0x80, hvm_io_bitmap);
+ __clear_bit(0xed, hvm_io_bitmap);
hvm_funcs = *fns;
hvm_enabled = 1;
void hvm_migrate_timers(struct vcpu *v)
{
rtc_migrate_timers(v);
- hpet_migrate_timers(v);
pt_migrate(v);
}
BUG_ON(bytes != 1);
+ /* Accept only printable characters, newline, and horizontal tab. */
+ if ( !isprint(c) && (c != '\n') && (c != '\t') )
+ return X86EMUL_OKAY;
+
spin_lock(&hd->pbuf_lock);
hd->pbuf[hd->pbuf_idx++] = c;
if ( (hd->pbuf_idx == (sizeof(hd->pbuf) - 2)) || (c == '\n') )
spin_lock_init(&d->arch.hvm_domain.irq_lock);
spin_lock_init(&d->arch.hvm_domain.uc_lock);
+ INIT_LIST_HEAD(&d->arch.hvm_domain.msixtbl_list);
+ spin_lock_init(&d->arch.hvm_domain.msixtbl_list_lock);
+
hvm_init_guest_time(d);
d->arch.hvm_domain.params[HVM_PARAM_HPET_ENABLED] = 1;
return rc;
}
+extern void msixtbl_pt_cleanup(struct domain *d);
+
void hvm_domain_relinquish_resources(struct domain *d)
{
hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.ioreq);
hvm_destroy_ioreq_page(d, &d->arch.hvm_domain.buf_ioreq);
+ msixtbl_pt_cleanup(d);
+
/* Stop all asynchronous timer actions. */
rtc_deinit(d);
if ( d->vcpu[0] != NULL )
return -EINVAL;
}
+ /* Older Xen versions used to save the segment arbytes directly
+ * from the VMCS on Intel hosts. Detect this and rearrange them
+ * into the struct segment_register format. */
+#define UNFOLD_ARBYTES(_r) \
+ if ( (_r & 0xf000) && !(_r & 0x0f00) ) \
+ _r = ((_r & 0xff) | ((_r >> 4) & 0xf00))
+ UNFOLD_ARBYTES(ctxt.cs_arbytes);
+ UNFOLD_ARBYTES(ctxt.ds_arbytes);
+ UNFOLD_ARBYTES(ctxt.es_arbytes);
+ UNFOLD_ARBYTES(ctxt.fs_arbytes);
+ UNFOLD_ARBYTES(ctxt.gs_arbytes);
+ UNFOLD_ARBYTES(ctxt.ss_arbytes);
+ UNFOLD_ARBYTES(ctxt.tr_arbytes);
+ UNFOLD_ARBYTES(ctxt.ldtr_arbytes);
+#undef UNFOLD_ARBYTES
+
/* Architecture-specific vmcs/vmcb bits */
if ( hvm_funcs.load_cpu_ctxt(v, &ctxt) < 0 )
return -EINVAL;
if ( flags & HVMCOPY_to_guest )
{
- if ( p2mt != p2m_ram_ro )
+ if ( p2mt == p2m_ram_ro )
+ {
+ static unsigned long lastpage;
+ if ( xchg(&lastpage, gfn) != gfn )
+ gdprintk(XENLOG_DEBUG, "guest attempted write to read-only"
+ " memory page. gfn=%#lx, mfn=%#lx\n",
+ gfn, mfn);
+ }
+ else
{
memcpy(p, buf, count);
paging_mark_dirty(curr->domain, mfn);
msr_content = var_range_base[index];
break;
+ case MSR_K8_ENABLE_C1E:
+ /* There's no point in letting the guest see C-States.
+ * Further, this AMD-only register may be accessed if this HVM guest
+ * has been migrated to an Intel host. This fixes a guest crash
+ * in this case.
+ */
+ msr_content = 0;
+ break;
+
default:
return hvm_funcs.msr_read_intercept(regs);
}
return rc;
}
+static long hvm_vcpu_op(
+ int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
+{
+ long rc;
+
+ switch ( cmd )
+ {
+ case VCPUOP_register_runstate_memory_area:
+ case VCPUOP_get_runstate_info:
+ rc = do_vcpu_op(cmd, vcpuid, arg);
+ break;
+ default:
+ rc = -ENOSYS;
+ break;
+ }
+
+ return rc;
+}
+
typedef unsigned long hvm_hypercall_t(
unsigned long, unsigned long, unsigned long, unsigned long, unsigned long);
static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
[ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
[ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
+ [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op,
HYPERCALL(xen_version),
HYPERCALL(event_channel_op),
HYPERCALL(sched_op),
return rc;
}
+static long hvm_vcpu_op_compat32(
+ int cmd, int vcpuid, XEN_GUEST_HANDLE(void) arg)
+{
+ long rc;
+
+ switch ( cmd )
+ {
+ case VCPUOP_register_runstate_memory_area:
+ case VCPUOP_get_runstate_info:
+ rc = compat_vcpu_op(cmd, vcpuid, arg);
+ break;
+ default:
+ rc = -ENOSYS;
+ break;
+ }
+
+ return rc;
+}
+
static hvm_hypercall_t *hvm_hypercall64_table[NR_hypercalls] = {
[ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op,
[ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
+ [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op,
HYPERCALL(xen_version),
HYPERCALL(event_channel_op),
HYPERCALL(sched_op),
static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
[ __HYPERVISOR_memory_op ] = (hvm_hypercall_t *)hvm_memory_op_compat32,
[ __HYPERVISOR_grant_table_op ] = (hvm_hypercall_t *)hvm_grant_table_op,
+ [ __HYPERVISOR_vcpu_op ] = (hvm_hypercall_t *)hvm_vcpu_op_compat32,
HYPERCALL(xen_version),
HYPERCALL(event_channel_op),
HYPERCALL(sched_op),
void hvm_vcpu_reset_state(struct vcpu *v, uint16_t cs, uint16_t ip)
{
- struct domain *d = current->domain;
+ struct domain *d = v->domain;
struct vcpu_guest_context *ctxt;
struct segment_register reg;
return rc;
}
+int hvm_debug_op(struct vcpu *v, int32_t op)
+{
+ int rc;
+
+ switch ( op )
+ {
+ case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON:
+ case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF:
+ rc = -ENOSYS;
+ if ( !cpu_has_monitor_trap_flag )
+ break;
+ rc = 0;
+ vcpu_pause(v);
+ v->arch.hvm_vcpu.single_step =
+ (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON);
+ vcpu_unpause(v); /* guest will latch new state */
+ break;
+ default:
+ rc = -ENOSYS;
+ break;
+ }
+
+ return rc;
+}
+
+
/*
* Local variables:
* mode: C
case 2:
case 3:
/* Periodic timer. */
- create_periodic_time(v, &pit->pt0, period, 0, 0, pit_time_fired,
+ create_periodic_time(v, &pit->pt0, period, period, 0, pit_time_fired,
&pit->count_load_time[channel]);
break;
case 1:
case 4:
/* One-shot timer. */
- create_periodic_time(v, &pit->pt0, period, 0, 1, pit_time_fired,
+ create_periodic_time(v, &pit->pt0, period, 0, 0, pit_time_fired,
&pit->count_load_time[channel]);
break;
default:
extern struct hvm_mmio_handler hpet_mmio_handler;
extern struct hvm_mmio_handler vlapic_mmio_handler;
extern struct hvm_mmio_handler vioapic_mmio_handler;
+extern struct hvm_mmio_handler msixtbl_mmio_handler;
-#define HVM_MMIO_HANDLER_NR 3
+#define HVM_MMIO_HANDLER_NR 4
static struct hvm_mmio_handler *hvm_mmio_handlers[HVM_MMIO_HANDLER_NR] =
{
&hpet_mmio_handler,
&vlapic_mmio_handler,
- &vioapic_mmio_handler
+ &vioapic_mmio_handler,
+ &msixtbl_mmio_handler
};
static int hvm_mmio_access(struct vcpu *v,
static uint8_t effective_mm_type(struct mtrr_state *m,
uint64_t pat,
paddr_t gpa,
- uint32_t pte_flags)
+ uint32_t pte_flags,
+ uint8_t gmtrr_mtype)
{
uint8_t mtrr_mtype, pat_value, effective;
-
- mtrr_mtype = get_mtrr_type(m, gpa);
+
+ /* if get_pat_flags() gives a dedicated MTRR type,
+ * just use it
+ */
+ if ( gmtrr_mtype == NO_HARDCODE_MEM_TYPE )
+ mtrr_mtype = get_mtrr_type(m, gpa);
+ else
+ mtrr_mtype = gmtrr_mtype;
pat_value = page_pat_type(pat, pte_flags);
uint32_t get_pat_flags(struct vcpu *v,
uint32_t gl1e_flags,
paddr_t gpaddr,
- paddr_t spaddr)
+ paddr_t spaddr,
+ uint8_t gmtrr_mtype)
{
uint8_t guest_eff_mm_type;
uint8_t shadow_mtrr_type;
/* 1. Get the effective memory type of guest physical address,
* with the pair of guest MTRR and PAT
*/
- guest_eff_mm_type = effective_mm_type(g, pat, gpaddr, gl1e_flags);
+ guest_eff_mm_type = effective_mm_type(g, pat, gpaddr,
+ gl1e_flags, gmtrr_mtype);
/* 2. Get the memory type of host physical address, with MTRR */
shadow_mtrr_type = get_mtrr_type(&mtrr_state, spaddr);
*/
if ( pat_entry_value == INVALID_MEM_TYPE )
{
- gdprintk(XENLOG_WARNING,
- "Conflict occurs for a given guest l1e flags:%x "
- "at %"PRIx64" (the effective mm type:%d), "
- "because the host mtrr type is:%d\n",
- gl1e_flags, (uint64_t)gpaddr, guest_eff_mm_type,
- shadow_mtrr_type);
+ struct domain *d = v->domain;
+ p2m_type_t p2mt;
+ gfn_to_mfn(d, paddr_to_pfn(gpaddr), &p2mt);
+ if (p2m_is_ram(p2mt))
+ gdprintk(XENLOG_WARNING,
+ "Conflict occurs for a given guest l1e flags:%x "
+ "at %"PRIx64" (the effective mm type:%d), "
+ "because the host mtrr type is:%d\n",
+ gl1e_flags, (uint64_t)gpaddr, guest_eff_mm_type,
+ shadow_mtrr_type);
pat_entry_value = PAT_TYPE_UNCACHABLE;
}
/* 4. Get the pte flags */
1, HVMSR_PER_VCPU);
uint8_t epte_get_entry_emt(
- struct domain *d, unsigned long gfn, unsigned long mfn)
+ struct domain *d, unsigned long gfn,
+ unsigned long mfn, uint8_t *igmt, int direct_mmio)
{
uint8_t gmtrr_mtype, hmtrr_mtype;
uint32_t type;
struct vcpu *v = current;
+ *igmt = 0;
+
if ( (current->domain != d) && ((v = d->vcpu[0]) == NULL) )
return MTRR_TYPE_WRBACK;
if ( hvm_get_mem_pinned_cacheattr(d, gfn, &type) )
return type;
+ if ( !iommu_enabled )
+ {
+ *igmt = 1;
+ return MTRR_TYPE_WRBACK;
+ }
+
+ if ( direct_mmio )
+ return MTRR_TYPE_UNCACHABLE;
+
+ if ( iommu_snoop )
+ {
+ *igmt = 1;
+ return MTRR_TYPE_WRBACK;
+ }
+
gmtrr_mtype = get_mtrr_type(&v->arch.hvm_vcpu.mtrr, (gfn << PAGE_SHIFT));
hmtrr_mtype = get_mtrr_type(&mtrr_state, (mfn << PAGE_SHIFT));
return ((gmtrr_mtype <= hmtrr_mtype) ? gmtrr_mtype : hmtrr_mtype);
period = 1 << (period_code - 1); /* period in 32 Khz cycles */
period = DIV_ROUND((period * 1000000000ULL), 32768); /* period in ns */
- create_periodic_time(v, &s->pt, period, RTC_IRQ,
- 0, rtc_periodic_cb, s);
+ create_periodic_time(v, &s->pt, period, period, RTC_IRQ,
+ rtc_periodic_cb, s);
}
else
{
static void rtc_set_time(RTCState *s)
{
struct tm *tm = &s->current_tm;
+ struct domain *d = vrtc_domain(s);
unsigned long before, after; /* XXX s_time_t */
ASSERT(spin_is_locked(&s->lock));
after = mktime(tm->tm_year, tm->tm_mon, tm->tm_mday,
tm->tm_hour, tm->tm_min, tm->tm_sec);
+
+ /* We use the guest's setting of the RTC to define the local-time
+ * offset for this domain. */
+ d->time_offset_seconds += (after - before);
+ update_domain_wallclock_time(d);
+ /* Also tell qemu-dm about it so it will be remembered for next boot. */
send_timeoffset_req(after - before);
}
if ( !cpu_has_svm_nrips || (vmcb->nextrip <= vmcb->rip) )
return 0;
+#ifndef NDEBUG
switch ( vmcb->exitcode )
{
case VMEXIT_CR0_READ... VMEXIT_DR15_WRITE:
/* faults due to instruction intercepts */
/* (exitcodes 84-95) are reserved */
case VMEXIT_IDTR_READ ... VMEXIT_TR_WRITE:
- case VMEXIT_RDTSC ... VMEXIT_SWINT:
- case VMEXIT_INVD ... VMEXIT_INVLPGA:
+ case VMEXIT_RDTSC ... VMEXIT_MSR:
case VMEXIT_VMRUN ... VMEXIT_MWAIT_CONDITIONAL:
- case VMEXIT_IOIO:
/* ...and the rest of the #VMEXITs */
case VMEXIT_CR0_SEL_WRITE:
- case VMEXIT_MSR:
case VMEXIT_EXCEPTION_BP:
- return vmcb->nextrip - vmcb->rip;
+ break;
+ default:
+ BUG();
}
-
- return 0;
+#endif
+
+ return vmcb->nextrip - vmcb->rip;
}
/* First byte: Length. Following bytes: Opcode bytes. */
#endif
ENTRY(svm_asm_do_resume)
+ call svm_intr_assist
+
get_current(bx)
CLGI
jnz .Lsvm_process_softirqs
call svm_asid_handle_vmrun
- call svm_intr_assist
cmpb $0,addr_of(tb_init_done)
jnz .Lsvm_trace
ASSERT(intack.source != hvm_intsrc_none);
- HVMTRACE_2D(INJ_VIRQ, 0x0, /*fake=*/ 1);
+ HVMTRACE_3D(INTR_WINDOW, intack.vector, intack.source,
+ vmcb->eventinj.fields.v?vmcb->eventinj.fields.vector:-1);
/*
* Create a dummy virtual interrupt to intercept as soon as the
vmcb->general1_intercepts |= GENERAL1_INTERCEPT_VINTR;
}
-extern int vmsi_deliver(struct domain *d, int pirq);
-static int hvm_pci_msi_assert(struct domain *d, int pirq)
-{
- return vmsi_deliver(d, pirq);
-}
-
-static void svm_dirq_assist(struct vcpu *v)
-{
- unsigned int irq;
- uint32_t device, intx;
- struct domain *d = v->domain;
- struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
- struct dev_intx_gsi_link *digl;
-
- if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
- return;
-
- for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
- irq < NR_IRQS;
- irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
- {
- if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
- continue;
-
- spin_lock(&d->event_lock);
- if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
- {
- hvm_pci_msi_assert(d, irq);
- spin_unlock(&d->event_lock);
- continue;
- }
-
- stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]);
-
- list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
- {
- device = digl->device;
- intx = digl->intx;
- hvm_pci_intx_assert(d, device, intx);
- hvm_irq_dpci->mirq[irq].pending++;
- }
-
- /*
- * Set a timer to see if the guest can finish the interrupt or not. For
- * example, the guest OS may unmask the PIC during boot, before the
- * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
- * guest will never deal with the irq, then the physical interrupt line
- * will never be deasserted.
- */
- set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)],
- NOW() + PT_IRQ_TIME_OUT);
- spin_unlock(&d->event_lock);
- }
-}
-
asmlinkage void svm_intr_assist(void)
{
struct vcpu *v = current;
/* Crank the handle on interrupt state. */
pt_update_irq(v);
- svm_dirq_assist(v);
+ hvm_dirq_assist(v);
do {
intack = hvm_vcpu_has_pending_irq(v);
{
case x86_seg_cs:
memcpy(reg, &vmcb->cs, sizeof(*reg));
+ reg->attr.fields.g = reg->limit > 0xFFFFF;
break;
case x86_seg_ds:
memcpy(reg, &vmcb->ds, sizeof(*reg));
+ if ( reg->attr.fields.type != 0 )
+ reg->attr.fields.type |= 0x1;
break;
case x86_seg_es:
memcpy(reg, &vmcb->es, sizeof(*reg));
+ if ( reg->attr.fields.type != 0 )
+ reg->attr.fields.type |= 0x1;
break;
case x86_seg_fs:
svm_sync_vmcb(v);
memcpy(reg, &vmcb->fs, sizeof(*reg));
+ if ( reg->attr.fields.type != 0 )
+ reg->attr.fields.type |= 0x1;
break;
case x86_seg_gs:
svm_sync_vmcb(v);
memcpy(reg, &vmcb->gs, sizeof(*reg));
+ if ( reg->attr.fields.type != 0 )
+ reg->attr.fields.type |= 0x1;
break;
case x86_seg_ss:
memcpy(reg, &vmcb->ss, sizeof(*reg));
reg->attr.fields.dpl = vmcb->cpl;
+ if ( reg->attr.fields.type == 0 )
+ reg->attr.fields.db = 0;
break;
case x86_seg_tr:
svm_sync_vmcb(v);
memcpy(reg, &vmcb->tr, sizeof(*reg));
+ reg->attr.fields.type |= 0x2;
break;
case x86_seg_gdtr:
memcpy(reg, &vmcb->gdtr, sizeof(*reg));
struct vmcb_struct *vmcb = curr->arch.hvm_svm.vmcb;
eventinj_t event = vmcb->eventinj;
+ switch ( trapnr )
+ {
+ case TRAP_debug:
+ if ( guest_cpu_user_regs()->eflags & X86_EFLAGS_TF )
+ {
+ __restore_debug_registers(curr);
+ vmcb->dr6 |= 0x4000;
+ }
+ case TRAP_int3:
+ if ( curr->domain->debugger_attached )
+ {
+ /* Debug/Int3: Trap to debugger. */
+ domain_pause_for_debugger();
+ return;
+ }
+ }
+
if ( unlikely(event.fields.v) &&
(event.fields.type == X86_EVENTTYPE_HW_EXCEPTION) )
{
{
HVMTRACE_2D(INJ_EXC, trapnr, errcode);
}
-
- if ( (trapnr == TRAP_debug) &&
- (guest_cpu_user_regs()->eflags & X86_EFLAGS_TF) )
- {
- __restore_debug_registers(curr);
- vmcb->dr6 |= 0x4000;
- }
}
static int svm_event_pending(struct vcpu *v)
* If this GFN is emulated MMIO or marked as read-only, pass the fault
* to the mmio handler.
*/
- mfn = gfn_to_mfn_current(gfn, &p2mt);
+ mfn = gfn_to_mfn_type_current(gfn, &p2mt, p2m_guest);
if ( (p2mt == p2m_mmio_dm) || (p2mt == p2m_ram_ro) )
{
if ( !handle_mmio() )
CR_INTERCEPT_CR8_WRITE);
/* I/O and MSR permission bitmaps. */
- arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE));
+ arch_svm->msrpm = alloc_xenheap_pages(get_order_from_bytes(MSRPM_SIZE), 0);
if ( arch_svm->msrpm == NULL )
return -ENOMEM;
memset(arch_svm->msrpm, 0xff, MSRPM_SIZE);
}
else
#endif
- target = apic_round_robin(vioapic_domain(vioapic),
- vector, deliver_bitmask);
+ target = apic_lowest_prio(vioapic_domain(vioapic),
+ deliver_bitmask);
if ( target != NULL )
{
ioapic_inj_irq(vioapic, target, vector, trig_mode, delivery_mode);
/* Viridian CPUID 4000004, Implementation Recommendations. */
#define CPUID4A_MSR_BASED_APIC (1 << 3)
+#define CPUID4A_RELAX_TIMER_INT (1 << 5)
int cpuid_viridian_leaves(unsigned int leaf, unsigned int *eax,
unsigned int *ebx, unsigned int *ecx,
if ( (d->arch.hvm_domain.viridian.guest_os_id.raw == 0) ||
(d->arch.hvm_domain.viridian.guest_os_id.fields.os < 4) )
break;
- *eax = CPUID4A_MSR_BASED_APIC;
+ *eax = (CPUID4A_MSR_BASED_APIC |
+ CPUID4A_RELAX_TIMER_INT);
*ebx = 2047; /* long spin count */
break;
}
}
/* This function is used by both ioapic and lapic.The bitmap is for vcpu_id. */
-struct vlapic *apic_round_robin(
- struct domain *d, uint8_t vector, uint32_t bitmap)
+struct vlapic *apic_lowest_prio(struct domain *d, uint32_t bitmap)
{
- int next, old;
- struct vlapic *target = NULL;
+ int old = d->arch.hvm_domain.irq.round_robin_prev_vcpu;
+ uint32_t ppr, target_ppr = UINT_MAX;
+ struct vlapic *vlapic, *target = NULL;
+ struct vcpu *v;
- old = next = d->arch.hvm_domain.irq.round_robin_prev_vcpu;
+ if ( unlikely((v = d->vcpu[old]) == NULL) )
+ return NULL;
do {
- if ( ++next == MAX_VIRT_CPUS )
- next = 0;
- if ( (d->vcpu[next] == NULL) || !test_bit(next, &bitmap) )
- continue;
- target = vcpu_vlapic(d->vcpu[next]);
- if ( vlapic_enabled(target) )
- break;
- target = NULL;
- } while ( next != old );
+ v = v->next_in_list ? : d->vcpu[0];
+ vlapic = vcpu_vlapic(v);
+ if ( test_bit(v->vcpu_id, &bitmap) && vlapic_enabled(vlapic) &&
+ ((ppr = vlapic_get_ppr(vlapic)) < target_ppr) )
+ {
+ target = vlapic;
+ target_ppr = ppr;
+ }
+ } while ( v->vcpu_id != old );
- d->arch.hvm_domain.irq.round_robin_prev_vcpu = next;
+ if ( target != NULL )
+ d->arch.hvm_domain.irq.round_robin_prev_vcpu =
+ vlapic_vcpu(target)->vcpu_id;
return target;
}
if ( delivery_mode == APIC_DM_LOWEST )
{
- target = apic_round_robin(vlapic_domain(v), vector, lpr_map);
+ target = apic_lowest_prio(vlapic_domain(v), lpr_map);
if ( target != NULL )
rc = vlapic_accept_irq(vlapic_vcpu(target), delivery_mode,
vector, level, trig_mode);
(uint32_t)val * vlapic->hw.timer_divisor;
vlapic_set_reg(vlapic, APIC_TMICT, val);
- create_periodic_time(current, &vlapic->pt, period, vlapic->pt.irq,
- !vlapic_lvtt_period(vlapic), vlapic_pt_cb,
+ create_periodic_time(current, &vlapic->pt, period,
+ vlapic_lvtt_period(vlapic) ? period : 0,
+ vlapic->pt.irq, vlapic_pt_cb,
&vlapic->timer_last_update);
vlapic->timer_last_update = vlapic->pt.last_plt_gtime;
period = ((uint64_t)APIC_BUS_CYCLE_NS *
(uint32_t)tmict * s->hw.timer_divisor);
s->pt.irq = vlapic_get_reg(s, APIC_LVTT) & APIC_VECTOR_MASK;
- create_periodic_time(vlapic_vcpu(s), &s->pt, period, s->pt.irq,
- !vlapic_lvtt_period(s), vlapic_pt_cb,
+ create_periodic_time(vlapic_vcpu(s), &s->pt, period,
+ vlapic_lvtt_period(s) ? period : 0,
+ s->pt.irq, vlapic_pt_cb,
&s->timer_last_update);
s->timer_last_update = s->pt.last_plt_gtime;
}
"vector=%x trig_mode=%x\n",
dest, dest_mode, delivery_mode, vector, trig_mode);
- if ( !test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags) )
+ if ( !( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_GUEST_MSI ) )
{
gdprintk(XENLOG_WARNING, "pirq %x not msi \n", pirq);
return 0;
{
case dest_LowestPrio:
{
- target = apic_round_robin(d, vector, deliver_bitmask);
+ target = apic_lowest_prio(d, deliver_bitmask);
if ( target != NULL )
vmsi_inj_irq(d, target, vector, trig_mode, delivery_mode);
else
return 1;
}
+/* MSI-X mask bit hypervisor interception */
+struct msixtbl_entry
+{
+ struct list_head list;
+ atomic_t refcnt; /* how many bind_pt_irq called for the device */
+
+ /* TODO: resolve the potential race by destruction of pdev */
+ struct pci_dev *pdev;
+ unsigned long gtable; /* gpa of msix table */
+ unsigned long table_len;
+ unsigned long table_flags[MAX_MSIX_TABLE_ENTRIES / BITS_PER_LONG + 1];
+
+ struct rcu_head rcu;
+};
+
+static struct msixtbl_entry *msixtbl_find_entry(
+ struct vcpu *v, unsigned long addr)
+{
+ struct msixtbl_entry *entry;
+ struct domain *d = v->domain;
+
+ list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
+ if ( addr >= entry->gtable &&
+ addr < entry->gtable + entry->table_len )
+ return entry;
+
+ return NULL;
+}
+
+static void __iomem *msixtbl_addr_to_virt(
+ struct msixtbl_entry *entry, unsigned long addr)
+{
+ int idx, nr_page;
+
+ if ( !entry )
+ return NULL;
+
+ nr_page = (addr >> PAGE_SHIFT) -
+ (entry->gtable >> PAGE_SHIFT);
+
+ if ( !entry->pdev )
+ return NULL;
+
+ idx = entry->pdev->msix_table_idx[nr_page];
+ if ( !idx )
+ return NULL;
+
+ return (void *)(fix_to_virt(idx) +
+ (addr & ((1UL << PAGE_SHIFT) - 1)));
+}
+
+static int msixtbl_read(
+ struct vcpu *v, unsigned long address,
+ unsigned long len, unsigned long *pval)
+{
+ unsigned long offset;
+ struct msixtbl_entry *entry;
+ void *virt;
+ int r = X86EMUL_UNHANDLEABLE;
+
+ rcu_read_lock();
+
+ if ( len != 4 )
+ goto out;
+
+ offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
+ if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
+ goto out;
+
+ entry = msixtbl_find_entry(v, address);
+ virt = msixtbl_addr_to_virt(entry, address);
+ if ( !virt )
+ goto out;
+
+ *pval = readl(virt);
+ r = X86EMUL_OKAY;
+
+out:
+ rcu_read_unlock();
+ return r;
+}
+
+static int msixtbl_write(struct vcpu *v, unsigned long address,
+ unsigned long len, unsigned long val)
+{
+ unsigned long offset;
+ struct msixtbl_entry *entry;
+ void *virt;
+ int nr_entry;
+ int r = X86EMUL_UNHANDLEABLE;
+
+ rcu_read_lock();
+
+ if ( len != 4 )
+ goto out;
+
+ entry = msixtbl_find_entry(v, address);
+ nr_entry = (address - entry->gtable) % PCI_MSIX_ENTRY_SIZE;
+
+ offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
+ if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
+ {
+ set_bit(nr_entry, &entry->table_flags);
+ goto out;
+ }
+
+ /* exit to device model if address/data has been modified */
+ if ( test_and_clear_bit(nr_entry, &entry->table_flags) )
+ goto out;
+
+ virt = msixtbl_addr_to_virt(entry, address);
+ if ( !virt )
+ goto out;
+
+ writel(val, virt);
+ r = X86EMUL_OKAY;
+
+out:
+ rcu_read_unlock();
+ return r;
+}
+
+static int msixtbl_range(struct vcpu *v, unsigned long addr)
+{
+ struct msixtbl_entry *entry;
+ void *virt;
+
+ rcu_read_lock();
+
+ entry = msixtbl_find_entry(v, addr);
+ virt = msixtbl_addr_to_virt(entry, addr);
+
+ rcu_read_unlock();
+
+ return !!virt;
+}
+
+struct hvm_mmio_handler msixtbl_mmio_handler = {
+ .check_handler = msixtbl_range,
+ .read_handler = msixtbl_read,
+ .write_handler = msixtbl_write
+};
+
+static void add_msixtbl_entry(struct domain *d,
+ struct pci_dev *pdev,
+ uint64_t gtable,
+ struct msixtbl_entry *entry)
+{
+ u32 len;
+
+ memset(entry, 0, sizeof(struct msixtbl_entry));
+
+ INIT_LIST_HEAD(&entry->list);
+ INIT_RCU_HEAD(&entry->rcu);
+ atomic_set(&entry->refcnt, 0);
+
+ len = pci_msix_get_table_len(pdev);
+ entry->table_len = len;
+ entry->pdev = pdev;
+ entry->gtable = (unsigned long) gtable;
+
+ list_add_rcu(&entry->list, &d->arch.hvm_domain.msixtbl_list);
+}
+
+static void free_msixtbl_entry(struct rcu_head *rcu)
+{
+ struct msixtbl_entry *entry;
+
+ entry = container_of (rcu, struct msixtbl_entry, rcu);
+
+ xfree(entry);
+}
+
+static void del_msixtbl_entry(struct msixtbl_entry *entry)
+{
+ list_del_rcu(&entry->list);
+ call_rcu(&entry->rcu, free_msixtbl_entry);
+}
+
+int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable)
+{
+ irq_desc_t *irq_desc;
+ struct msi_desc *msi_desc;
+ struct pci_dev *pdev;
+ struct msixtbl_entry *entry, *new_entry;
+ int r = -EINVAL;
+
+ ASSERT(spin_is_locked(&pcidevs_lock));
+
+ /*
+ * xmalloc() with irq_disabled causes the failure of check_lock()
+ * for xenpool->lock. So we allocate an entry beforehand.
+ */
+ new_entry = xmalloc(struct msixtbl_entry);
+ if ( !new_entry )
+ return -ENOMEM;
+
+ irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+ if ( !irq_desc )
+ {
+ xfree(new_entry);
+ return r;
+ }
+
+ if ( irq_desc->handler != &pci_msi_type )
+ goto out;
+
+ msi_desc = irq_desc->msi_desc;
+ if ( !msi_desc )
+ goto out;
+
+ pdev = msi_desc->dev;
+
+ spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+ list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
+ if ( pdev == entry->pdev )
+ goto found;
+
+ entry = new_entry;
+ new_entry = NULL;
+ add_msixtbl_entry(d, pdev, gtable, entry);
+
+found:
+ atomic_inc(&entry->refcnt);
+ spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+ r = 0;
+
+out:
+ spin_unlock_irq(&irq_desc->lock);
+ xfree(new_entry);
+ return r;
+}
+
+void msixtbl_pt_unregister(struct domain *d, int pirq)
+{
+ irq_desc_t *irq_desc;
+ struct msi_desc *msi_desc;
+ struct pci_dev *pdev;
+ struct msixtbl_entry *entry;
+
+ ASSERT(spin_is_locked(&pcidevs_lock));
+
+ irq_desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+ if ( !irq_desc )
+ return;
+
+ if ( irq_desc->handler != &pci_msi_type )
+ goto out;
+
+ msi_desc = irq_desc->msi_desc;
+ if ( !msi_desc )
+ goto out;
+
+ pdev = msi_desc->dev;
+
+ spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+ list_for_each_entry( entry, &d->arch.hvm_domain.msixtbl_list, list )
+ if ( pdev == entry->pdev )
+ goto found;
+
+ spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+
+out:
+ spin_unlock_irq(&irq_desc->lock);
+ return;
+
+found:
+ if ( !atomic_dec_and_test(&entry->refcnt) )
+ del_msixtbl_entry(entry);
+
+ spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+ spin_unlock_irq(&irq_desc->lock);
+}
+
+void msixtbl_pt_cleanup(struct domain *d, int pirq)
+{
+ struct msixtbl_entry *entry, *temp;
+ unsigned long flags;
+
+ /* msixtbl_list_lock must be acquired with irq_disabled for check_lock() */
+ local_irq_save(flags);
+ spin_lock(&d->arch.hvm_domain.msixtbl_list_lock);
+
+ list_for_each_entry_safe( entry, temp,
+ &d->arch.hvm_domain.msixtbl_list, list )
+ del_msixtbl_entry(entry);
+
+ spin_unlock(&d->arch.hvm_domain.msixtbl_list_lock);
+ local_irq_restore(flags);
+}
.globl vmx_asm_do_vmentry
vmx_asm_do_vmentry:
+ call vmx_intr_assist
+
get_current(bx)
cli
cmpl $0,(r(dx),r(ax),1)
jnz .Lvmx_process_softirqs
- call vmx_intr_assist
-
- testb $0xff,VCPU_vmx_emul(r(bx))
- jnz .Lvmx_goto_realmode
+ testb $0xff,VCPU_vmx_emulate(r(bx))
+ jnz .Lvmx_goto_emulator
+ testb $0xff,VCPU_vmx_realmode(r(bx))
+ jz .Lvmx_not_realmode
+ cmpw $0,VCPU_vm86_seg_mask(r(bx))
+ jnz .Lvmx_goto_emulator
+ call_with_regs(vmx_enter_realmode)
+.Lvmx_not_realmode:
mov VCPU_hvm_guest_cr2(r(bx)),r(ax)
mov r(ax),%cr2
call vmx_trace_vmentry
/*.Lvmx_resume:*/
VMRESUME
+ sti
call vm_resume_fail
ud2
.Lvmx_launch:
VMLAUNCH
+ sti
call vm_launch_fail
ud2
-.Lvmx_goto_realmode:
+.Lvmx_goto_emulator:
sti
call_with_regs(vmx_realmode)
jmp vmx_asm_do_vmentry
ASSERT(intack.source != hvm_intsrc_none);
+ if ( unlikely(tb_init_done) )
+ {
+ unsigned int intr = __vmread(VM_ENTRY_INTR_INFO);
+ HVMTRACE_3D(INTR_WINDOW, intack.vector, intack.source,
+ (intr & INTR_INFO_VALID_MASK) ? intr & 0xff : -1);
+ }
+
if ( (intack.source == hvm_intsrc_nmi) && cpu_has_vmx_vnmi )
{
/*
}
}
-extern int vmsi_deliver(struct domain *d, int pirq);
-static int hvm_pci_msi_assert(struct domain *d, int pirq)
-{
- return vmsi_deliver(d, pirq);
-}
-
-static void vmx_dirq_assist(struct vcpu *v)
-{
- unsigned int irq;
- uint32_t device, intx;
- struct domain *d = v->domain;
- struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
- struct dev_intx_gsi_link *digl;
-
- if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
- return;
-
- for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
- irq < NR_IRQS;
- irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
- {
- if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
- continue;
-
- spin_lock(&d->event_lock);
- if ( test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[irq].flags) )
- {
- hvm_pci_msi_assert(d, irq);
- spin_unlock(&d->event_lock);
- continue;
- }
-
- stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]);
-
- list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
- {
- device = digl->device;
- intx = digl->intx;
- hvm_pci_intx_assert(d, device, intx);
- hvm_irq_dpci->mirq[irq].pending++;
- }
-
- /*
- * Set a timer to see if the guest can finish the interrupt or not. For
- * example, the guest OS may unmask the PIC during boot, before the
- * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
- * guest will never deal with the irq, then the physical interrupt line
- * will never be deasserted.
- */
- set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)],
- NOW() + PT_IRQ_TIME_OUT);
- spin_unlock(&d->event_lock);
- }
-}
-
asmlinkage void vmx_intr_assist(void)
{
struct hvm_intack intack;
unsigned int tpr_threshold = 0;
enum hvm_intblk intblk;
+ /* Block event injection when single step with MTF. */
+ if ( unlikely(v->arch.hvm_vcpu.single_step) )
+ {
+ v->arch.hvm_vmx.exec_control |= CPU_BASED_MONITOR_TRAP_FLAG;
+ __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+ return;
+ }
+
/* Crank the handle on interrupt state. */
pt_update_irq(v);
- vmx_dirq_assist(v);
+ hvm_dirq_assist(v);
do {
intack = hvm_vcpu_has_pending_irq(v);
if ( intack.source == hvm_intsrc_nmi )
{
- vmx_inject_nmi(v);
+ vmx_inject_nmi();
}
else
{
HVMTRACE_2D(INJ_VIRQ, intack.vector, /*fake=*/ 0);
- vmx_inject_extint(v, intack.vector);
+ vmx_inject_extint(intack.vector);
pt_intr_post(v, intack);
}
frame[1] = csr->sel;
frame[2] = regs->eflags & ~X86_EFLAGS_RF;
- if ( hvmemul_ctxt->ctxt.addr_size == 32 )
+ /* We can't test hvmemul_ctxt->ctxt.sp_size: it may not be initialised. */
+ if ( hvmemul_ctxt->seg_reg[x86_seg_ss].attr.fields.db )
{
regs->esp -= 6;
pstk = regs->esp;
static void realmode_emulate_one(struct hvm_emulate_ctxt *hvmemul_ctxt)
{
struct vcpu *curr = current;
- unsigned long seg_reg_dirty;
uint32_t intr_info;
int rc;
- seg_reg_dirty = hvmemul_ctxt->seg_reg_dirty;
- hvmemul_ctxt->seg_reg_dirty = 0;
+ perfc_incr(realmode_emulations);
rc = hvm_emulate_one(hvmemul_ctxt);
- if ( test_bit(x86_seg_cs, &hvmemul_ctxt->seg_reg_dirty) )
- {
- curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_CS;
- if ( hvmemul_get_seg_reg(x86_seg_cs, hvmemul_ctxt)->sel & 3 )
- curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_CS;
- }
-
- if ( test_bit(x86_seg_ss, &hvmemul_ctxt->seg_reg_dirty) )
- {
- curr->arch.hvm_vmx.vmxemul &= ~VMXEMUL_BAD_SS;
- if ( hvmemul_get_seg_reg(x86_seg_ss, hvmemul_ctxt)->sel & 3 )
- curr->arch.hvm_vmx.vmxemul |= VMXEMUL_BAD_SS;
- }
-
- hvmemul_ctxt->seg_reg_dirty |= seg_reg_dirty;
-
if ( rc == X86EMUL_UNHANDLEABLE )
{
gdprintk(XENLOG_ERR, "Failed to emulate insn.\n");
hvmemul_ctxt->exn_insn_len = 0;
}
- if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
+ if ( unlikely(curr->domain->debugger_attached) &&
+ ((hvmemul_ctxt->exn_vector == TRAP_debug) ||
+ (hvmemul_ctxt->exn_vector == TRAP_int3)) )
+ {
+ domain_pause_for_debugger();
+ }
+ else if ( curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE )
{
gdprintk(XENLOG_ERR, "Exception %02x in protected mode.\n",
hvmemul_ctxt->exn_vector);
goto fail;
}
-
- realmode_deliver_exception(
- hvmemul_ctxt->exn_vector,
- hvmemul_ctxt->exn_insn_len,
- hvmemul_ctxt);
+ else
+ {
+ realmode_deliver_exception(
+ hvmemul_ctxt->exn_vector,
+ hvmemul_ctxt->exn_insn_len,
+ hvmemul_ctxt);
+ }
}
return;
intr_info = 0;
}
- while ( curr->arch.hvm_vmx.vmxemul &&
+ curr->arch.hvm_vmx.vmx_emulate = 1;
+ while ( curr->arch.hvm_vmx.vmx_emulate &&
!softirq_pending(smp_processor_id()) &&
(curr->arch.hvm_vcpu.io_state == HVMIO_none) )
{
* in real mode, because we don't emulate protected-mode IDT vectoring.
*/
if ( unlikely(!(++emulations & 15)) &&
- !(curr->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) &&
+ curr->arch.hvm_vmx.vmx_realmode &&
hvm_local_events_need_delivery(curr) )
break;
+
realmode_emulate_one(&hvmemul_ctxt);
+
+ /* Stop emulating unless our segment state is not safe */
+ if ( curr->arch.hvm_vmx.vmx_realmode )
+ curr->arch.hvm_vmx.vmx_emulate =
+ (curr->arch.hvm_vmx.vm86_segment_mask != 0);
+ else
+ curr->arch.hvm_vmx.vmx_emulate =
+ ((hvmemul_ctxt.seg_reg[x86_seg_cs].sel & 3)
+ || (hvmemul_ctxt.seg_reg[x86_seg_ss].sel & 3));
}
- if ( !curr->arch.hvm_vmx.vmxemul )
+ /* Need to emulate next time if we've started an IO operation */
+ if ( curr->arch.hvm_vcpu.io_state != HVMIO_none )
+ curr->arch.hvm_vmx.vmx_emulate = 1;
+
+ if ( !curr->arch.hvm_vmx.vmx_emulate && !curr->arch.hvm_vmx.vmx_realmode )
{
/*
* Cannot enter protected mode with bogus selector RPLs and DPLs.
static u32 vmcs_revision_id __read_mostly;
+static void __init vmx_display_features(void)
+{
+ int printed = 0;
+
+ printk("VMX: Supported advanced features:\n");
+
+#define P(p,s) if ( p ) { printk(" - %s\n", s); printed = 1; }
+ P(cpu_has_vmx_virtualize_apic_accesses, "APIC MMIO access virtualisation");
+ P(cpu_has_vmx_tpr_shadow, "APIC TPR shadow");
+ P(cpu_has_vmx_ept, "Extended Page Tables (EPT)");
+ P(cpu_has_vmx_vpid, "Virtual-Processor Identifiers (VPID)");
+ P(cpu_has_vmx_vnmi, "Virtual NMI");
+ P(cpu_has_vmx_msr_bitmap, "MSR direct-access bitmap");
+#undef P
+
+ if ( !printed )
+ printk(" - none\n");
+}
+
static u32 adjust_vmx_controls(u32 ctl_min, u32 ctl_opt, u32 msr)
{
u32 vmx_msr_low, vmx_msr_high, ctl = ctl_min | ctl_opt;
(opt_softtsc ? CPU_BASED_RDTSC_EXITING : 0));
opt = (CPU_BASED_ACTIVATE_MSR_BITMAP |
CPU_BASED_TPR_SHADOW |
+ CPU_BASED_MONITOR_TRAP_FLAG |
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS);
_vmx_cpu_based_exec_control = adjust_vmx_controls(
min, opt, MSR_IA32_VMX_PROCBASED_CTLS);
#endif
min = VM_EXIT_ACK_INTR_ON_EXIT;
- opt = 0;
+ opt = VM_EXIT_SAVE_GUEST_PAT | VM_EXIT_LOAD_HOST_PAT;
#ifdef __x86_64__
min |= VM_EXIT_IA32E_MODE;
#endif
_vmx_vmexit_control = adjust_vmx_controls(
min, opt, MSR_IA32_VMX_EXIT_CTLS);
- min = opt = 0;
+ min = 0;
+ opt = VM_ENTRY_LOAD_GUEST_PAT;
_vmx_vmentry_control = adjust_vmx_controls(
min, opt, MSR_IA32_VMX_ENTRY_CTLS);
vmx_vmexit_control = _vmx_vmexit_control;
vmx_vmentry_control = _vmx_vmentry_control;
cpu_has_vmx_ins_outs_instr_info = !!(vmx_basic_msr_high & (1U<<22));
+ vmx_display_features();
}
else
{
{
unsigned int cpu = smp_processor_id();
+ __vmwrite(HOST_GDTR_BASE,
+ (unsigned long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY));
__vmwrite(HOST_IDTR_BASE, (unsigned long)idt_tables[cpu]);
__vmwrite(HOST_TR_SELECTOR, TSS_ENTRY << 3);
/* VMCS controls. */
__vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmx_pin_based_exec_control);
- __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
- __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
v->arch.hvm_vmx.exec_control = vmx_cpu_based_exec_control;
v->arch.hvm_vmx.secondary_exec_control = vmx_secondary_exec_control;
else
{
v->arch.hvm_vmx.secondary_exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+ vmx_vmexit_control &= ~(VM_EXIT_SAVE_GUEST_PAT |
+ VM_EXIT_LOAD_HOST_PAT);
+ vmx_vmentry_control &= ~VM_ENTRY_LOAD_GUEST_PAT;
}
+ /* Do not enable Monitor Trap Flag unless start single step debug */
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
+
__vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+ __vmwrite(VM_EXIT_CONTROLS, vmx_vmexit_control);
+ __vmwrite(VM_ENTRY_CONTROLS, vmx_vmentry_control);
+
if ( cpu_has_vmx_secondary_exec_control )
__vmwrite(SECONDARY_VM_EXEC_CONTROL,
v->arch.hvm_vmx.secondary_exec_control);
vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_CS);
vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_ESP);
vmx_disable_intercept_for_msr(v, MSR_IA32_SYSENTER_EIP);
+ if ( cpu_has_vmx_pat && paging_mode_hap(d) )
+ vmx_disable_intercept_for_msr(v, MSR_IA32_CR_PAT);
}
/* I/O access bitmap. */
__vmwrite(IO_BITMAP_A, virt_to_maddr((char *)hvm_io_bitmap + 0));
__vmwrite(IO_BITMAP_B, virt_to_maddr((char *)hvm_io_bitmap + PAGE_SIZE));
- /* Host GDTR base. */
- __vmwrite(HOST_GDTR_BASE, GDT_VIRT_START(v));
-
/* Host data selectors. */
__vmwrite(HOST_SS_SELECTOR, __HYPERVISOR_DS);
__vmwrite(HOST_DS_SELECTOR, __HYPERVISOR_DS);
__vmwrite(VIRTUAL_PROCESSOR_ID, v->arch.hvm_vmx.vpid);
}
+ if ( cpu_has_vmx_pat && paging_mode_hap(d) )
+ {
+ u64 host_pat, guest_pat;
+
+ rdmsrl(MSR_IA32_CR_PAT, host_pat);
+ guest_pat = 0x7040600070406ULL;
+
+ __vmwrite(HOST_PAT, host_pat);
+ __vmwrite(GUEST_PAT, guest_pat);
+#ifdef __i386__
+ __vmwrite(HOST_PAT_HIGH, host_pat >> 32);
+ __vmwrite(GUEST_PAT_HIGH, guest_pat >> 32);
+#endif
+ }
+
vmx_vmcs_exit(v);
paging_update_paging_modes(v); /* will update HOST & GUEST_CR3 as reqd */
if ( unlikely(v->arch.hvm_vcpu.debug_state_latch != debug_state) )
{
unsigned long intercepts = __vmread(EXCEPTION_BITMAP);
- unsigned long mask = (1U << TRAP_debug) | (1U << TRAP_int3);
+ unsigned long mask = 1u << TRAP_int3;
+
+ if ( !cpu_has_monitor_trap_flag )
+ mask |= 1u << TRAP_debug;
+
v->arch.hvm_vcpu.debug_state_latch = debug_state;
if ( debug_state )
intercepts |= mask;
reset_stack_and_jump(vmx_asm_do_vmentry);
}
-static void vmx_dump_sel(char *name, enum x86_segment seg)
-{
- struct segment_register sreg;
- hvm_get_segment_register(current, seg, &sreg);
- printk("%s: sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016llx\n",
- name, sreg.sel, sreg.attr.bytes, sreg.limit,
- (unsigned long long)sreg.base);
-}
-
static unsigned long vmr(unsigned long field)
{
int rc;
return rc ? 0 : val;
}
+static void vmx_dump_sel(char *name, uint32_t selector)
+{
+ uint32_t sel, attr, limit;
+ uint64_t base;
+ sel = vmr(selector);
+ attr = vmr(selector + (GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR));
+ limit = vmr(selector + (GUEST_ES_LIMIT - GUEST_ES_SELECTOR));
+ base = vmr(selector + (GUEST_ES_BASE - GUEST_ES_SELECTOR));
+ printk("%s: sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016"PRIx64"\n",
+ name, sel, attr, limit, base);
+}
+
+static void vmx_dump_sel2(char *name, uint32_t lim)
+{
+ uint32_t limit;
+ uint64_t base;
+ limit = vmr(lim);
+ base = vmr(lim + (GUEST_GDTR_BASE - GUEST_GDTR_LIMIT));
+ printk("%s: limit=0x%08x, base=0x%016"PRIx64"\n",
+ name, limit, base);
+}
+
void vmcs_dump_vcpu(struct vcpu *v)
{
struct cpu_user_regs *regs = &v->arch.guest_context.user_regs;
(unsigned long long)vmr(GUEST_SYSENTER_ESP),
(int)vmr(GUEST_SYSENTER_CS),
(unsigned long long)vmr(GUEST_SYSENTER_EIP));
- vmx_dump_sel("CS", x86_seg_cs);
- vmx_dump_sel("DS", x86_seg_ds);
- vmx_dump_sel("SS", x86_seg_ss);
- vmx_dump_sel("ES", x86_seg_es);
- vmx_dump_sel("FS", x86_seg_fs);
- vmx_dump_sel("GS", x86_seg_gs);
- vmx_dump_sel("GDTR", x86_seg_gdtr);
- vmx_dump_sel("LDTR", x86_seg_ldtr);
- vmx_dump_sel("IDTR", x86_seg_idtr);
- vmx_dump_sel("TR", x86_seg_tr);
+ vmx_dump_sel("CS", GUEST_CS_SELECTOR);
+ vmx_dump_sel("DS", GUEST_DS_SELECTOR);
+ vmx_dump_sel("SS", GUEST_SS_SELECTOR);
+ vmx_dump_sel("ES", GUEST_ES_SELECTOR);
+ vmx_dump_sel("FS", GUEST_FS_SELECTOR);
+ vmx_dump_sel("GS", GUEST_GS_SELECTOR);
+ vmx_dump_sel2("GDTR", GUEST_GDTR_LIMIT);
+ vmx_dump_sel("LDTR", GUEST_LDTR_SELECTOR);
+ vmx_dump_sel2("IDTR", GUEST_IDTR_LIMIT);
+ vmx_dump_sel("TR", GUEST_TR_SELECTOR);
+ printk("Guest PAT = 0x%08x%08x\n",
+ (uint32_t)vmr(GUEST_PAT_HIGH), (uint32_t)vmr(GUEST_PAT));
x = (unsigned long long)vmr(TSC_OFFSET_HIGH) << 32;
x |= (uint32_t)vmr(TSC_OFFSET);
printk("TSC Offset = %016llx\n", x);
(unsigned long long)vmr(HOST_SYSENTER_ESP),
(int)vmr(HOST_SYSENTER_CS),
(unsigned long long)vmr(HOST_SYSENTER_EIP));
+ printk("Host PAT = 0x%08x%08x\n",
+ (uint32_t)vmr(HOST_PAT_HIGH), (uint32_t)vmr(HOST_PAT));
printk("*** Control State ***\n");
printk("PinBased=%08x CPUBased=%08x SecondaryExec=%08x\n",
#include <asm/hvm/vpt.h>
#include <public/hvm/save.h>
#include <asm/hvm/trace.h>
+#include <asm/xenoprof.h>
enum handler_return { HNDL_done, HNDL_unhandled, HNDL_exception_raised };
{
vmx_destroy_vmcs(v);
vpmu_destroy(v);
+ passive_domain_destroy(v);
}
#ifdef __x86_64__
check_long_mode:
if ( !(hvm_long_mode_enabled(v)) )
{
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ vmx_inject_hw_exception(TRAP_gp_fault, 0);
return HNDL_exception_raised;
}
break;
uncanonical_address:
HVM_DBG_LOG(DBG_LEVEL_0, "Not cano address of msr write %x", ecx);
gp_fault:
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ vmx_inject_hw_exception(TRAP_gp_fault, 0);
exception_raised:
return HNDL_exception_raised;
}
wrmsrl(msr_index[i], host_msr_state->msrs[i]);
clear_bit(i, &host_msr_state->flags);
}
-
- if ( cpu_has_nx && !(read_efer() & EFER_NX) )
- write_efer(read_efer() | EFER_NX);
}
static void vmx_save_guest_msrs(struct vcpu *v)
clear_bit(i, &guest_flags);
}
- if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & (EFER_NX | EFER_SCE) )
+ if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & EFER_SCE )
{
HVM_DBG_LOG(DBG_LEVEL_2,
"restore guest's EFER with value %lx",
v->arch.hvm_vcpu.guest_efer);
- write_efer((read_efer() & ~(EFER_NX | EFER_SCE)) |
- (v->arch.hvm_vcpu.guest_efer & (EFER_NX | EFER_SCE)));
+ write_efer((read_efer() & ~EFER_SCE) |
+ (v->arch.hvm_vcpu.guest_efer & EFER_SCE));
}
}
#else /* __i386__ */
#define vmx_save_host_msrs() ((void)0)
-
-static void vmx_restore_host_msrs(void)
-{
- if ( cpu_has_nx && !(read_efer() & EFER_NX) )
- write_efer(read_efer() | EFER_NX);
-}
+#define vmx_restore_host_msrs() ((void)0)
#define vmx_save_guest_msrs(v) ((void)0)
-
-static void vmx_restore_guest_msrs(struct vcpu *v)
-{
- if ( (v->arch.hvm_vcpu.guest_efer ^ read_efer()) & EFER_NX )
- {
- HVM_DBG_LOG(DBG_LEVEL_2,
- "restore guest's EFER with value %lx",
- v->arch.hvm_vcpu.guest_efer);
- write_efer((read_efer() & ~EFER_NX) |
- (v->arch.hvm_vcpu.guest_efer & EFER_NX));
- }
-}
+#define vmx_restore_guest_msrs(v) ((void)0)
static enum handler_return long_mode_do_msr_read(struct cpu_user_regs *regs)
{
vpmu_load(v);
}
+
+/* SDM volume 3b section 22.3.1.2: we can only enter virtual 8086 mode
+ * if all of CS, SS, DS, ES, FS and GS are 16bit ring-3 data segments.
+ * The guest thinks it's got ring-0 segments, so we need to fudge
+ * things. We store the ring-3 version in the VMCS to avoid lots of
+ * shuffling on vmenter and vmexit, and translate in these accessors. */
+
+#define rm_cs_attr (((union segment_attributes) { \
+ .fields = { .type = 0xb, .s = 1, .dpl = 0, .p = 1, .avl = 0, \
+ .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes)
+#define rm_ds_attr (((union segment_attributes) { \
+ .fields = { .type = 0x3, .s = 1, .dpl = 0, .p = 1, .avl = 0, \
+ .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes)
+#define vm86_ds_attr (((union segment_attributes) { \
+ .fields = { .type = 0x3, .s = 1, .dpl = 3, .p = 1, .avl = 0, \
+ .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes)
+#define vm86_tr_attr (((union segment_attributes) { \
+ .fields = { .type = 0xb, .s = 0, .dpl = 0, .p = 1, .avl = 0, \
+ .l = 0, .db = 0, .g = 0, .pad = 0 } }).bytes)
+
static void vmx_get_segment_register(struct vcpu *v, enum x86_segment seg,
struct segment_register *reg)
{
/* Unusable flag is folded into Present flag. */
if ( attr & (1u<<16) )
reg->attr.fields.p = 0;
+
+ /* Adjust for virtual 8086 mode */
+ if ( v->arch.hvm_vmx.vmx_realmode && seg <= x86_seg_tr
+ && !(v->arch.hvm_vmx.vm86_segment_mask & (1u << seg)) )
+ {
+ struct segment_register *sreg = &v->arch.hvm_vmx.vm86_saved_seg[seg];
+ if ( seg == x86_seg_tr )
+ *reg = *sreg;
+ else if ( reg->base != sreg->base || seg == x86_seg_ss )
+ {
+ /* If the guest's reloaded the segment, remember the new version.
+ * We can't tell if the guest reloaded the segment with another
+ * one that has the same base. By default we assume it hasn't,
+ * since we don't want to lose big-real-mode segment attributes,
+ * but for SS we assume it has: the Ubuntu graphical bootloader
+ * does this and gets badly confused if we leave the old SS in
+ * place. */
+ reg->attr.bytes = (seg == x86_seg_cs ? rm_cs_attr : rm_ds_attr);
+ *sreg = *reg;
+ }
+ else
+ {
+ /* Always give realmode guests a selector that matches the base
+ * but keep the attr and limit from before */
+ *reg = *sreg;
+ reg->sel = reg->base >> 4;
+ }
+ }
}
static void vmx_set_segment_register(struct vcpu *v, enum x86_segment seg,
struct segment_register *reg)
{
- uint32_t attr;
+ uint32_t attr, sel, limit;
+ uint64_t base;
+ sel = reg->sel;
attr = reg->attr.bytes;
+ limit = reg->limit;
+ base = reg->base;
+
+ /* Adjust CS/SS/DS/ES/FS/GS/TR for virtual 8086 mode */
+ if ( v->arch.hvm_vmx.vmx_realmode && seg <= x86_seg_tr )
+ {
+ /* Remember the proper contents */
+ v->arch.hvm_vmx.vm86_saved_seg[seg] = *reg;
+
+ if ( seg == x86_seg_tr )
+ {
+ if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VM86_TSS] )
+ {
+ sel = 0;
+ attr = vm86_tr_attr;
+ limit = 0xff;
+ base = v->domain->arch.hvm_domain.params[HVM_PARAM_VM86_TSS];
+ v->arch.hvm_vmx.vm86_segment_mask &= ~(1u << seg);
+ }
+ else
+ v->arch.hvm_vmx.vm86_segment_mask |= (1u << seg);
+ }
+ else
+ {
+ /* Try to fake it out as a 16bit data segment. This could
+ * cause confusion for the guest if it reads the selector,
+ * but otherwise we have to emulate if *any* segment hasn't
+ * been reloaded. */
+ if ( base < 0x100000 && !(base & 0xf) && limit >= 0xffff
+ && reg->attr.fields.p )
+ {
+ sel = base >> 4;
+ attr = vm86_ds_attr;
+ limit = 0xffff;
+ v->arch.hvm_vmx.vm86_segment_mask &= ~(1u << seg);
+ }
+ else
+ v->arch.hvm_vmx.vm86_segment_mask |= (1u << seg);
+ }
+ }
+
attr = ((attr & 0xf00) << 4) | (attr & 0xff);
/* Not-present must mean unusable. */
attr |= (1u << 16);
/* VMX has strict consistency requirement for flag G. */
- attr |= !!(reg->limit >> 20) << 15;
+ attr |= !!(limit >> 20) << 15;
vmx_vmcs_enter(v);
switch ( seg )
{
case x86_seg_cs:
- __vmwrite(GUEST_CS_SELECTOR, reg->sel);
- __vmwrite(GUEST_CS_LIMIT, reg->limit);
- __vmwrite(GUEST_CS_BASE, reg->base);
+ __vmwrite(GUEST_CS_SELECTOR, sel);
+ __vmwrite(GUEST_CS_LIMIT, limit);
+ __vmwrite(GUEST_CS_BASE, base);
__vmwrite(GUEST_CS_AR_BYTES, attr);
break;
case x86_seg_ds:
- __vmwrite(GUEST_DS_SELECTOR, reg->sel);
- __vmwrite(GUEST_DS_LIMIT, reg->limit);
- __vmwrite(GUEST_DS_BASE, reg->base);
+ __vmwrite(GUEST_DS_SELECTOR, sel);
+ __vmwrite(GUEST_DS_LIMIT, limit);
+ __vmwrite(GUEST_DS_BASE, base);
__vmwrite(GUEST_DS_AR_BYTES, attr);
break;
case x86_seg_es:
- __vmwrite(GUEST_ES_SELECTOR, reg->sel);
- __vmwrite(GUEST_ES_LIMIT, reg->limit);
- __vmwrite(GUEST_ES_BASE, reg->base);
+ __vmwrite(GUEST_ES_SELECTOR, sel);
+ __vmwrite(GUEST_ES_LIMIT, limit);
+ __vmwrite(GUEST_ES_BASE, base);
__vmwrite(GUEST_ES_AR_BYTES, attr);
break;
case x86_seg_fs:
- __vmwrite(GUEST_FS_SELECTOR, reg->sel);
- __vmwrite(GUEST_FS_LIMIT, reg->limit);
- __vmwrite(GUEST_FS_BASE, reg->base);
+ __vmwrite(GUEST_FS_SELECTOR, sel);
+ __vmwrite(GUEST_FS_LIMIT, limit);
+ __vmwrite(GUEST_FS_BASE, base);
__vmwrite(GUEST_FS_AR_BYTES, attr);
break;
case x86_seg_gs:
- __vmwrite(GUEST_GS_SELECTOR, reg->sel);
- __vmwrite(GUEST_GS_LIMIT, reg->limit);
- __vmwrite(GUEST_GS_BASE, reg->base);
+ __vmwrite(GUEST_GS_SELECTOR, sel);
+ __vmwrite(GUEST_GS_LIMIT, limit);
+ __vmwrite(GUEST_GS_BASE, base);
__vmwrite(GUEST_GS_AR_BYTES, attr);
break;
case x86_seg_ss:
- __vmwrite(GUEST_SS_SELECTOR, reg->sel);
- __vmwrite(GUEST_SS_LIMIT, reg->limit);
- __vmwrite(GUEST_SS_BASE, reg->base);
+ __vmwrite(GUEST_SS_SELECTOR, sel);
+ __vmwrite(GUEST_SS_LIMIT, limit);
+ __vmwrite(GUEST_SS_BASE, base);
__vmwrite(GUEST_SS_AR_BYTES, attr);
break;
case x86_seg_tr:
- __vmwrite(GUEST_TR_SELECTOR, reg->sel);
- __vmwrite(GUEST_TR_LIMIT, reg->limit);
- __vmwrite(GUEST_TR_BASE, reg->base);
+ __vmwrite(GUEST_TR_SELECTOR, sel);
+ __vmwrite(GUEST_TR_LIMIT, limit);
+ __vmwrite(GUEST_TR_BASE, base);
/* VMX checks that the the busy flag (bit 1) is set. */
__vmwrite(GUEST_TR_AR_BYTES, attr | 2);
break;
case x86_seg_gdtr:
- __vmwrite(GUEST_GDTR_LIMIT, reg->limit);
- __vmwrite(GUEST_GDTR_BASE, reg->base);
+ __vmwrite(GUEST_GDTR_LIMIT, limit);
+ __vmwrite(GUEST_GDTR_BASE, base);
break;
case x86_seg_idtr:
- __vmwrite(GUEST_IDTR_LIMIT, reg->limit);
- __vmwrite(GUEST_IDTR_BASE, reg->base);
+ __vmwrite(GUEST_IDTR_LIMIT, limit);
+ __vmwrite(GUEST_IDTR_BASE, base);
break;
case x86_seg_ldtr:
- __vmwrite(GUEST_LDTR_SELECTOR, reg->sel);
- __vmwrite(GUEST_LDTR_LIMIT, reg->limit);
- __vmwrite(GUEST_LDTR_BASE, reg->base);
+ __vmwrite(GUEST_LDTR_SELECTOR, sel);
+ __vmwrite(GUEST_LDTR_LIMIT, limit);
+ __vmwrite(GUEST_LDTR_BASE, base);
__vmwrite(GUEST_LDTR_AR_BYTES, attr);
break;
default:
switch ( cr )
{
case 0: {
+ int realmode;
unsigned long hw_cr0_mask =
X86_CR0_NE | X86_CR0_PG | X86_CR0_PE;
vmx_fpu_enter(v);
}
- v->arch.hvm_vmx.vmxemul &= ~VMXEMUL_REALMODE;
- if ( !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE) )
- v->arch.hvm_vmx.vmxemul |= VMXEMUL_REALMODE;
+ realmode = !(v->arch.hvm_vcpu.guest_cr[0] & X86_CR0_PE);
+ if ( realmode != v->arch.hvm_vmx.vmx_realmode )
+ {
+ enum x86_segment s;
+ struct segment_register reg[x86_seg_tr + 1];
+
+ /* Entering or leaving real mode: adjust the segment registers.
+ * Need to read them all either way, as realmode reads can update
+ * the saved values we'll use when returning to prot mode. */
+ for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ )
+ vmx_get_segment_register(v, s, ®[s]);
+ v->arch.hvm_vmx.vmx_realmode = realmode;
+
+ if ( realmode )
+ {
+ for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ )
+ vmx_set_segment_register(v, s, ®[s]);
+ v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME;
+ __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
+ __vmwrite(EXCEPTION_BITMAP, 0xffffffff);
+ }
+ else
+ {
+ for ( s = x86_seg_cs ; s <= x86_seg_tr ; s++ )
+ if ( !(v->arch.hvm_vmx.vm86_segment_mask & (1<<s)) )
+ vmx_set_segment_register(
+ v, s, &v->arch.hvm_vmx.vm86_saved_seg[s]);
+ v->arch.hvm_vcpu.hw_cr[4] =
+ ((v->arch.hvm_vcpu.hw_cr[4] & ~X86_CR4_VME)
+ |(v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_VME));
+ __vmwrite(GUEST_CR4, v->arch.hvm_vcpu.hw_cr[4]);
+ __vmwrite(EXCEPTION_BITMAP,
+ HVM_TRAP_MASK
+ | (paging_mode_hap(v->domain) ?
+ 0 : (1U << TRAP_page_fault))
+ | (1U << TRAP_no_device));
+ }
+ }
v->arch.hvm_vcpu.hw_cr[0] =
v->arch.hvm_vcpu.guest_cr[0] | hw_cr0_mask;
if ( paging_mode_hap(v->domain) )
v->arch.hvm_vcpu.hw_cr[4] &= ~X86_CR4_PAE;
v->arch.hvm_vcpu.hw_cr[4] |= v->arch.hvm_vcpu.guest_cr[4];
+ if ( v->arch.hvm_vmx.vmx_realmode )
+ v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_VME;
if ( paging_mode_hap(v->domain) && !hvm_paging_enabled(v) )
{
v->arch.hvm_vcpu.hw_cr[4] |= X86_CR4_PSE;
#endif
if ( v == current )
- write_efer((read_efer() & ~(EFER_NX|EFER_SCE)) |
- (v->arch.hvm_vcpu.guest_efer & (EFER_NX|EFER_SCE)));
+ write_efer((read_efer() & ~EFER_SCE) |
+ (v->arch.hvm_vcpu.guest_efer & EFER_SCE));
}
static void vmx_flush_guest_tlbs(void)
}
}
-static void __vmx_inject_exception(
- struct vcpu *v, int trap, int type, int error_code)
+static void __vmx_inject_exception(int trap, int type, int error_code)
{
unsigned long intr_fields;
+ struct vcpu *curr = current;
/*
* NB. Callers do not need to worry about clearing STI/MOV-SS blocking:
__vmwrite(VM_ENTRY_INTR_INFO, intr_fields);
- if ( trap == TRAP_page_fault )
- HVMTRACE_LONG_2D(PF_INJECT, error_code,
- TRC_PAR_LONG(v->arch.hvm_vcpu.guest_cr[2]));
- else
- HVMTRACE_2D(INJ_EXC, trap, error_code);
+ /* Can't inject exceptions in virtual 8086 mode because they would
+ * use the protected-mode IDT. Emulate at the next vmenter instead. */
+ if ( curr->arch.hvm_vmx.vmx_realmode )
+ curr->arch.hvm_vmx.vmx_emulate = 1;
}
-void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code)
+void vmx_inject_hw_exception(int trap, int error_code)
{
unsigned long intr_info = __vmread(VM_ENTRY_INTR_INFO);
+ struct vcpu *curr = current;
+
+ switch ( trap )
+ {
+ case TRAP_debug:
+ if ( guest_cpu_user_regs()->eflags & X86_EFLAGS_TF )
+ {
+ __restore_debug_registers(curr);
+ write_debugreg(6, read_debugreg(6) | 0x4000);
+ }
+ if ( cpu_has_monitor_trap_flag )
+ break;
+ case TRAP_int3:
+ if ( curr->domain->debugger_attached )
+ {
+ /* Debug/Int3: Trap to debugger. */
+ domain_pause_for_debugger();
+ return;
+ }
+ }
if ( unlikely(intr_info & INTR_INFO_VALID_MASK) &&
(((intr_info >> 8) & 7) == X86_EVENTTYPE_HW_EXCEPTION) )
error_code = 0;
}
- __vmx_inject_exception(v, trap, X86_EVENTTYPE_HW_EXCEPTION, error_code);
+ __vmx_inject_exception(trap, X86_EVENTTYPE_HW_EXCEPTION, error_code);
+
+ if ( trap == TRAP_page_fault )
+ HVMTRACE_LONG_2D(PF_INJECT, error_code,
+ TRC_PAR_LONG(current->arch.hvm_vcpu.guest_cr[2]));
+ else
+ HVMTRACE_2D(INJ_EXC, trap, error_code);
}
-void vmx_inject_extint(struct vcpu *v, int trap)
+void vmx_inject_extint(int trap)
{
- __vmx_inject_exception(v, trap, X86_EVENTTYPE_EXT_INTR,
+ __vmx_inject_exception(trap, X86_EVENTTYPE_EXT_INTR,
HVM_DELIVER_NO_ERROR_CODE);
}
-void vmx_inject_nmi(struct vcpu *v)
+void vmx_inject_nmi(void)
{
- __vmx_inject_exception(v, 2, X86_EVENTTYPE_NMI,
+ __vmx_inject_exception(2, X86_EVENTTYPE_NMI,
HVM_DELIVER_NO_ERROR_CODE);
}
static void vmx_inject_exception(
unsigned int trapnr, int errcode, unsigned long cr2)
{
- struct vcpu *curr = current;
-
- vmx_inject_hw_exception(curr, trapnr, errcode);
-
if ( trapnr == TRAP_page_fault )
- curr->arch.hvm_vcpu.guest_cr[2] = cr2;
+ current->arch.hvm_vcpu.guest_cr[2] = cr2;
- if ( (trapnr == TRAP_debug) &&
- (guest_cpu_user_regs()->eflags & X86_EFLAGS_TF) )
- {
- __restore_debug_registers(curr);
- write_debugreg(6, read_debugreg(6) | 0x4000);
- }
+ vmx_inject_hw_exception(trapnr, errcode);
}
static int vmx_event_pending(struct vcpu *v)
static void vmx_set_info_guest(struct vcpu *v)
{
+ unsigned long intr_shadow;
+
vmx_vmcs_enter(v);
+
__vmwrite(GUEST_DR7, v->arch.guest_context.debugreg[7]);
+
+ /*
+ * If the interruptibility-state field indicates blocking by STI,
+ * setting the TF flag in the EFLAGS may cause VM entry to fail
+ * and crash the guest. See SDM 3B 22.3.1.5.
+ * Resetting the VMX_INTR_SHADOW_STI flag looks hackish but
+ * to set the GUEST_PENDING_DBG_EXCEPTIONS.BS here incurs
+ * immediately vmexit and hence make no progress.
+ */
+ intr_shadow = __vmread(GUEST_INTERRUPTIBILITY_INFO);
+ if ( v->domain->debugger_attached &&
+ (v->arch.guest_context.user_regs.eflags & X86_EFLAGS_TF) &&
+ (intr_shadow & VMX_INTR_SHADOW_STI) )
+ {
+ intr_shadow &= ~VMX_INTR_SHADOW_STI;
+ __vmwrite(GUEST_INTERRUPTIBILITY_INFO, intr_shadow);
+ }
+
vmx_vmcs_exit(v);
}
}
if ( regs->eflags & X86_EFLAGS_TF )
- vmx_inject_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE, 0);
+ vmx_inject_hw_exception(TRAP_debug, HVM_DELIVER_NO_ERROR_CODE);
}
static void vmx_fpu_dirty_intercept(void)
{
u64 msr_content = 0;
u32 ecx = regs->ecx, eax, edx;
- struct vcpu *v = current;
HVM_DBG_LOG(DBG_LEVEL_1, "ecx=%x", ecx);
default:
if ( vpmu_do_rdmsr(regs) )
goto done;
+ if ( passive_domain_do_rdmsr(regs) )
+ goto done;
switch ( long_mode_do_msr_read(regs) )
{
case HNDL_unhandled:
return X86EMUL_OKAY;
gp_fault:
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ vmx_inject_hw_exception(TRAP_gp_fault, 0);
return X86EMUL_EXCEPTION;
}
if ( (rc < 0) ||
(vmx_add_host_load_msr(ecx) < 0) )
- vmx_inject_hw_exception(v, TRAP_machine_check, 0);
+ vmx_inject_hw_exception(TRAP_machine_check, 0);
else
{
__vmwrite(GUEST_IA32_DEBUGCTL, msr_content);
default:
if ( vpmu_do_wrmsr(regs) )
return X86EMUL_OKAY;
+ if ( passive_domain_do_wrmsr(regs) )
+ return X86EMUL_OKAY;
if ( wrmsr_viridian_regs(ecx, regs->eax, regs->edx) )
break;
return X86EMUL_OKAY;
gp_fault:
- vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ vmx_inject_hw_exception(TRAP_gp_fault, 0);
return X86EMUL_EXCEPTION;
}
fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs);
fastcall void smp_error_interrupt(struct cpu_user_regs *regs);
fastcall void smp_pmu_apic_interrupt(struct cpu_user_regs *regs);
-#ifdef CONFIG_X86_MCE_P4THERMAL
+ fastcall void smp_cmci_interrupt(struct cpu_user_regs *regs);
+#ifdef CONFIG_X86_MCE_THERMAL
fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs);
#endif
case ERROR_APIC_VECTOR:
smp_error_interrupt(regs);
break;
+ case CMCI_APIC_VECTOR:
+ smp_cmci_interrupt(regs);
+ break;
case PMU_APIC_VECTOR:
smp_pmu_apic_interrupt(regs);
break;
-#ifdef CONFIG_X86_MCE_P4THERMAL
+#ifdef CONFIG_X86_MCE_THERMAL
case THERMAL_APIC_VECTOR:
smp_thermal_interrupt(regs);
break;
{
unsigned long gla_validity = qualification & EPT_GLA_VALIDITY_MASK;
struct domain *d = current->domain;
- unsigned long gfn = gpa >> PAGE_SHIFT;
+ unsigned long gla, gfn = gpa >> PAGE_SHIFT;
mfn_t mfn;
p2m_type_t t;
- if ( unlikely(qualification & EPT_GAW_VIOLATION) )
- {
- gdprintk(XENLOG_ERR, "EPT violation: guest physical address %"PRIpaddr
- " exceeded its width limit.\n", gpa);
- goto crash;
- }
+ mfn = gfn_to_mfn_guest(d, gfn, &t);
- if ( unlikely(gla_validity == EPT_GLA_VALIDITY_RSVD) ||
- unlikely(gla_validity == EPT_GLA_VALIDITY_PDPTR_LOAD) )
+ /* There are three legitimate reasons for taking an EPT violation.
+ * One is a guest access to MMIO space. */
+ if ( gla_validity == EPT_GLA_VALIDITY_MATCH && p2m_is_mmio(t) )
{
- gdprintk(XENLOG_ERR, "EPT violation: reserved bit or "
- "pdptr load violation.\n");
- goto crash;
+ handle_mmio();
+ return;
}
- mfn = gfn_to_mfn(d, gfn, &t);
- if ( (t != p2m_ram_ro) && p2m_is_ram(t) && paging_mode_log_dirty(d) )
+ /* The second is log-dirty mode, writing to a read-only page;
+ * The third is populating a populate-on-demand page. */
+ if ( (gla_validity == EPT_GLA_VALIDITY_MATCH
+ || gla_validity == EPT_GLA_VALIDITY_GPT_WALK)
+ && p2m_is_ram(t) && (t != p2m_ram_ro) )
{
- paging_mark_dirty(d, mfn_x(mfn));
- p2m_change_type(d, gfn, p2m_ram_logdirty, p2m_ram_rw);
- flush_tlb_mask(d->domain_dirty_cpumask);
+ if ( paging_mode_log_dirty(d) )
+ {
+ paging_mark_dirty(d, mfn_x(mfn));
+ p2m_change_type(d, gfn, p2m_ram_logdirty, p2m_ram_rw);
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ }
return;
}
- /* This can only happen in log-dirty mode, writing back A/D bits. */
- if ( unlikely(gla_validity == EPT_GLA_VALIDITY_GPT_WALK) )
- goto crash;
-
- ASSERT(gla_validity == EPT_GLA_VALIDITY_MATCH);
- handle_mmio();
-
- return;
+ /* Everything else is an error. */
+ gla = __vmread(GUEST_LINEAR_ADDRESS);
+ gdprintk(XENLOG_ERR, "EPT violation %#lx (%c%c%c/%c%c%c), "
+ "gpa %#"PRIpaddr", mfn %#lx, type %i.\n",
+ qualification,
+ (qualification & EPT_READ_VIOLATION) ? 'r' : '-',
+ (qualification & EPT_WRITE_VIOLATION) ? 'w' : '-',
+ (qualification & EPT_EXEC_VIOLATION) ? 'x' : '-',
+ (qualification & EPT_EFFECTIVE_READ) ? 'r' : '-',
+ (qualification & EPT_EFFECTIVE_WRITE) ? 'w' : '-',
+ (qualification & EPT_EFFECTIVE_EXEC) ? 'x' : '-',
+ gpa, mfn_x(mfn), t);
+
+ if ( qualification & EPT_GAW_VIOLATION )
+ gdprintk(XENLOG_ERR, " --- GPA too wide (max %u bits)\n",
+ 9 * (unsigned) d->arch.hvm_domain.vmx.ept_control.gaw + 21);
+
+ switch ( gla_validity )
+ {
+ case EPT_GLA_VALIDITY_PDPTR_LOAD:
+ gdprintk(XENLOG_ERR, " --- PDPTR load failed\n");
+ break;
+ case EPT_GLA_VALIDITY_GPT_WALK:
+ gdprintk(XENLOG_ERR, " --- guest PT walk to %#lx failed\n", gla);
+ break;
+ case EPT_GLA_VALIDITY_RSVD:
+ gdprintk(XENLOG_ERR, " --- GLA_validity 2 (reserved)\n");
+ break;
+ case EPT_GLA_VALIDITY_MATCH:
+ gdprintk(XENLOG_ERR, " --- guest access to %#lx failed\n", gla);
+ break;
+ }
- crash:
domain_crash(d);
}
domain_crash(curr->domain);
}
+asmlinkage void vmx_enter_realmode(struct cpu_user_regs *regs)
+{
+ struct vcpu *v = current;
+
+ /* Adjust RFLAGS to enter virtual 8086 mode with IOPL == 3. Since
+ * we have CR4.VME == 1 and our own TSS with an empty interrupt
+ * redirection bitmap, all software INTs will be handled by vm86 */
+ v->arch.hvm_vmx.vm86_saved_eflags = regs->eflags;
+ regs->eflags |= (X86_EFLAGS_VM | X86_EFLAGS_IOPL);
+}
+
asmlinkage void vmx_vmexit_handler(struct cpu_user_regs *regs)
{
unsigned int exit_reason, idtv_info;
perfc_incra(vmexits, exit_reason);
- if ( exit_reason != EXIT_REASON_EXTERNAL_INTERRUPT )
- local_irq_enable();
+ /* Handle the interrupt we missed before allowing any more in. */
+ if ( exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT )
+ vmx_do_extint(regs);
+
+ /* Now enable interrupts so it's safe to take locks. */
+ local_irq_enable();
if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
return vmx_failed_vmentry(exit_reason, regs);
+ if ( v->arch.hvm_vmx.vmx_realmode )
+ {
+ unsigned int vector;
+
+ /* Put RFLAGS back the way the guest wants it */
+ regs->eflags &= ~(X86_EFLAGS_VM | X86_EFLAGS_IOPL);
+ regs->eflags |= (v->arch.hvm_vmx.vm86_saved_eflags & X86_EFLAGS_IOPL);
+
+ /* Unless this exit was for an interrupt, we've hit something
+ * vm86 can't handle. Try again, using the emulator. */
+ switch ( exit_reason )
+ {
+ case EXIT_REASON_EXCEPTION_NMI:
+ vector = __vmread(VM_EXIT_INTR_INFO) & INTR_INFO_VECTOR_MASK;;
+ if ( vector != TRAP_page_fault
+ && vector != TRAP_nmi
+ && vector != TRAP_machine_check )
+ {
+ perfc_incr(realmode_exits);
+ v->arch.hvm_vmx.vmx_emulate = 1;
+ return;
+ }
+ case EXIT_REASON_EXTERNAL_INTERRUPT:
+ case EXIT_REASON_INIT:
+ case EXIT_REASON_SIPI:
+ case EXIT_REASON_PENDING_VIRT_INTR:
+ case EXIT_REASON_PENDING_VIRT_NMI:
+ case EXIT_REASON_MACHINE_CHECK:
+ break;
+ default:
+ v->arch.hvm_vmx.vmx_emulate = 1;
+ perfc_incr(realmode_exits);
+ return;
+ }
+ }
+
hvm_maybe_deassert_evtchn_irq();
/* Event delivery caused this intercept? Queue for redelivery. */
*/
exit_qualification = __vmread(EXIT_QUALIFICATION);
write_debugreg(6, exit_qualification | 0xffff0ff0);
- if ( !v->domain->debugger_attached )
+ if ( !v->domain->debugger_attached || cpu_has_monitor_trap_flag )
goto exit_and_crash;
domain_pause_for_debugger();
break;
}
v->arch.hvm_vcpu.guest_cr[2] = exit_qualification;
- vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
+ vmx_inject_hw_exception(TRAP_page_fault, regs->error_code);
break;
case TRAP_nmi:
if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) !=
break;
}
case EXIT_REASON_EXTERNAL_INTERRUPT:
- vmx_do_extint(regs);
+ /* Already handled above. */
break;
case EXIT_REASON_TRIPLE_FAULT:
hvm_triple_fault();
case EXIT_REASON_VMWRITE:
case EXIT_REASON_VMXOFF:
case EXIT_REASON_VMXON:
- vmx_inject_hw_exception(v, TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE);
+ vmx_inject_hw_exception(TRAP_invalid_op, HVM_DELIVER_NO_ERROR_CODE);
break;
case EXIT_REASON_TPR_BELOW_THRESHOLD:
case EXIT_REASON_IO_INSTRUCTION:
case EXIT_REASON_APIC_ACCESS:
if ( !handle_mmio() )
- hvm_inject_exception(TRAP_gp_fault, 0, 0);
+ vmx_inject_hw_exception(TRAP_gp_fault, 0);
break;
case EXIT_REASON_INVD:
break;
}
+ case EXIT_REASON_MONITOR_TRAP_FLAG:
+ {
+ v->arch.hvm_vmx.exec_control &= ~CPU_BASED_MONITOR_TRAP_FLAG;
+ __vmwrite(CPU_BASED_VM_EXEC_CONTROL, v->arch.hvm_vmx.exec_control);
+ if ( v->domain->debugger_attached && v->arch.hvm_vcpu.single_step )
+ domain_pause_for_debugger();
+ break;
+ }
+
default:
exit_and_crash:
gdprintk(XENLOG_ERR, "Bad vmexit (reason %x)\n", exit_reason);
#include <asm/hvm/vmx/vpmu.h>
#include <asm/hvm/vmx/vpmu_core2.h>
+u32 core2_counters_msr[] = {
+ MSR_CORE_PERF_FIXED_CTR0,
+ MSR_CORE_PERF_FIXED_CTR1,
+ MSR_CORE_PERF_FIXED_CTR2};
+
+/* Core 2 Non-architectual Performance Control MSRs. */
+u32 core2_ctrls_msr[] = {
+ MSR_CORE_PERF_FIXED_CTR_CTRL,
+ MSR_IA32_PEBS_ENABLE,
+ MSR_IA32_DS_AREA};
+
+struct pmumsr core2_counters = {
+ 3,
+ core2_counters_msr
+};
+
+struct pmumsr core2_ctrls = {
+ 3,
+ core2_ctrls_msr
+};
static int arch_pmc_cnt;
static int core2_get_pmc_count(void)
return 0;
if ( unlikely(!(vpmu->flags & VPMU_CONTEXT_ALLOCATED)) &&
- !core2_vpmu_alloc_resource(current) )
+ (vpmu->context != NULL ||
+ !core2_vpmu_alloc_resource(current)) )
return 0;
vpmu->flags |= VPMU_CONTEXT_ALLOCATED;
case MSR_CORE_PERF_GLOBAL_STATUS:
gdprintk(XENLOG_INFO, "Can not write readonly MSR: "
"MSR_PERF_GLOBAL_STATUS(0x38E)!\n");
- vmx_inject_hw_exception(current, TRAP_gp_fault, 0);
+ vmx_inject_hw_exception(TRAP_gp_fault, 0);
return 1;
case MSR_IA32_PEBS_ENABLE:
if ( msr_content & 1 )
struct vpmu_struct *vpmu = vcpu_vpmu(v);
struct core2_vpmu_context *core2_vpmu_cxt = vpmu->context;
- if ( !vpmu->flags & VPMU_CONTEXT_ALLOCATED )
+ if ( !(vpmu->flags & VPMU_CONTEXT_ALLOCATED) )
return;
xfree(core2_vpmu_cxt->pmu_enable);
xfree(vpmu->context);
if ( cpu_has_vmx_msr_bitmap )
core2_vpmu_unset_msr_bitmap(v->arch.hvm_vmx.msr_bitmap);
release_pmu_ownship(PMU_OWNER_HVM);
+ vpmu->flags &= ~VPMU_CONTEXT_ALLOCATED;
}
struct arch_vpmu_ops core2_vpmu_ops = {
/* prio = ffs(mask ROR vpic->priority_add); */
asm ( "ror %%cl,%b1 ; bsf %1,%0"
- : "=r" (prio) : "r" ((uint32_t)mask), "c" (vpic->priority_add) );
+ : "=r" (prio) : "q" ((uint32_t)mask), "c" (vpic->priority_add) );
return prio;
}
set_timer(&pt->timer, pt->scheduled);
}
- vcpu_kick(pt->vcpu);
+ if ( !pt_irq_masked(pt) )
+ vcpu_kick(pt->vcpu);
pt_unlock(pt);
}
}
void create_periodic_time(
- struct vcpu *v, struct periodic_time *pt, uint64_t period,
- uint8_t irq, char one_shot, time_cb *cb, void *data)
+ struct vcpu *v, struct periodic_time *pt, uint64_t delta,
+ uint64_t period, uint8_t irq, time_cb *cb, void *data)
{
ASSERT(pt->source != 0);
pt->do_not_freeze = 0;
pt->irq_issued = 0;
- /* Periodic timer must be at least 0.9ms. */
- if ( (period < 900000) && !one_shot )
+ /* Periodic timer must be at least 0.1ms. */
+ if ( (period < 100000) && period )
{
if ( !test_and_set_bool(pt->warned_timeout_too_short) )
gdprintk(XENLOG_WARNING, "HVM_PlatformTime: program too "
"small period %"PRIu64"\n", period);
- period = 900000;
+ period = 100000;
}
pt->period = period;
pt->last_plt_gtime = hvm_get_guest_time(pt->vcpu);
pt->irq = irq;
pt->period_cycles = (u64)period;
- pt->one_shot = one_shot;
- pt->scheduled = NOW() + period;
- /*
- * Offset LAPIC ticks from other timer ticks. Otherwise guests which use
- * LAPIC ticks for process accounting can see long sequences of process
- * ticks incorrectly accounted to interrupt processing.
- */
- if ( pt->source == PTSRC_lapic )
- pt->scheduled += period >> 1;
+ pt->one_shot = !period;
+ pt->scheduled = NOW() + delta;
+
+ if ( !pt->one_shot )
+ {
+ if ( v->domain->arch.hvm_domain.params[HVM_PARAM_VPT_ALIGN] )
+ {
+ pt->scheduled = align_timer(pt->scheduled, pt->period);
+ }
+ else if ( pt->source == PTSRC_lapic )
+ {
+ /*
+ * Offset LAPIC ticks from other timer ticks. Otherwise guests
+ * which use LAPIC ticks for process accounting can see long
+ * sequences of process ticks incorrectly accounted to interrupt
+ * processing (seen with RHEL3 guest).
+ */
+ pt->scheduled += delta >> 1;
+ }
+ }
+
pt->cb = cb;
pt->priv = data;
BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR)
BUILD_SMP_INTERRUPT(pmu_apic_interrupt,PMU_APIC_VECTOR)
BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR)
+BUILD_SMP_INTERRUPT(cmci_interrupt, CMCI_APIC_VECTOR)
#define IRQ(x,y) \
IRQ##x##y##_interrupt
init_8259A(0);
- for ( i = 0; i < NR_IRQS; i++ )
+ for ( i = 0; i < NR_VECTORS; i++ )
{
irq_desc[i].status = IRQ_DISABLED;
irq_desc[i].handler = &no_irq_type;
}
/* Never allocate the hypercall vector or Linux/BSD fast-trap vector. */
- vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN;
- vector_irq[0x80] = NEVER_ASSIGN;
+ vector_irq[HYPERCALL_VECTOR] = NEVER_ASSIGN_IRQ;
+ vector_irq[0x80] = NEVER_ASSIGN_IRQ;
apic_intr_init();
static struct { int pin, apic; } ioapic_i8259 = { -1, -1 };
static DEFINE_SPINLOCK(ioapic_lock);
-static DEFINE_SPINLOCK(vector_lock);
int skip_ioapic_setup;
static struct irq_pin_list {
int apic, pin, next;
-} irq_2_pin[PIN_MAP_SIZE];
+} irq_2_pin[PIN_MAP_SIZE] = {
+ [0 ... PIN_MAP_SIZE-1].pin = -1
+};
static int irq_2_pin_free_entry = NR_IRQS;
-int vector_irq[NR_VECTORS] __read_mostly = {
- [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN};
-
/*
* The common case is 1:1 IRQ<->pin mappings. Sometimes there are
* shared ISA-space IRQs, so we have to support them. We are super
}
/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
-u8 irq_vector[NR_IRQ_VECTORS] __read_mostly;
-
-int free_irq_vector(int vector)
-{
- int irq;
-
- BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
-
- spin_lock(&vector_lock);
- if ((irq = vector_irq[vector]) == AUTO_ASSIGN)
- vector_irq[vector] = FREE_TO_ASSIGN;
- spin_unlock(&vector_lock);
-
- return (irq == AUTO_ASSIGN) ? 0 : -EINVAL;
-}
-
-int assign_irq_vector(int irq)
-{
- static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
- unsigned vector;
-
- BUG_ON(irq >= NR_IRQ_VECTORS);
-
- spin_lock(&vector_lock);
-
- if ((irq != AUTO_ASSIGN) && (IO_APIC_VECTOR(irq) > 0)) {
- spin_unlock(&vector_lock);
- return IO_APIC_VECTOR(irq);
- }
-
- vector = current_vector;
- while (vector_irq[vector] != FREE_TO_ASSIGN) {
- vector += 8;
- if (vector > LAST_DYNAMIC_VECTOR)
- vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
-
- if (vector == current_vector) {
- spin_unlock(&vector_lock);
- return -ENOSPC;
- }
- }
-
- current_vector = vector;
- vector_irq[vector] = irq;
- if (irq != AUTO_ASSIGN)
- IO_APIC_VECTOR(irq) = vector;
-
- spin_unlock(&vector_lock);
-
- return vector;
-}
+u8 irq_vector[NR_IRQS] __read_mostly;
static struct hw_interrupt_type ioapic_level_type;
static struct hw_interrupt_type ioapic_edge_type;
int i, apic;
unsigned long flags;
- for (i = 0; i < PIN_MAP_SIZE; i++) {
- irq_2_pin[i].pin = -1;
- irq_2_pin[i].next = 0;
- }
-
/* Initialise dynamic irq_2_pin free list. */
for (i = NR_IRQS; i < PIN_MAP_SIZE; i++)
irq_2_pin[i].next = i + 1;
static int __init timer_irq_works(void)
{
extern unsigned long pit0_ticks;
- unsigned long t1;
+ unsigned long t1, flags;
t1 = pit0_ticks;
mb();
+ local_save_flags(flags);
local_irq_enable();
/* Let ten ticks pass... */
mdelay((10 * 1000) / HZ);
+ local_irq_restore(flags);
/*
* Expect a few ticks at least, to be sure some possible
.set_affinity = set_ioapic_affinity_vector,
};
-static void mask_msi_vector(unsigned int vector)
-{
- mask_msi_irq(vector);
-}
-
-static void unmask_msi_vector(unsigned int vector)
-{
- unmask_msi_irq(vector);
-}
-
static unsigned int startup_msi_vector(unsigned int vector)
{
- dprintk(XENLOG_INFO, "startup msi vector %x\n", vector);
- unmask_msi_irq(vector);
+ unmask_msi_vector(vector);
return 0;
}
static void ack_msi_vector(unsigned int vector)
{
- ack_APIC_irq();
+ if ( msi_maskable_irq(irq_desc[vector].msi_desc) )
+ ack_APIC_irq(); /* ACKTYPE_NONE */
}
static void end_msi_vector(unsigned int vector)
{
+ if ( !msi_maskable_irq(irq_desc[vector].msi_desc) )
+ ack_APIC_irq(); /* ACKTYPE_EOI */
}
static void shutdown_msi_vector(unsigned int vector)
{
- dprintk(XENLOG_INFO, "shutdown msi vector %x\n", vector);
- mask_msi_irq(vector);
+ mask_msi_vector(vector);
}
static void set_msi_affinity_vector(unsigned int vector, cpumask_t cpu_mask)
{
set_native_irq_info(vector, cpu_mask);
- set_msi_irq_affinity(vector, cpu_mask);
+ set_msi_affinity(vector, cpu_mask);
}
/*
{
int apic1, pin1, apic2, pin2;
int vector;
+ unsigned long flags;
+
+ local_irq_save(flags);
/*
* get/set the timer IRQ vector:
*/
unmask_IO_APIC_irq(0);
if (timer_irq_works()) {
+ local_irq_restore(flags);
if (disable_timer_pin_1 > 0)
clear_IO_APIC_pin(apic1, pin1);
return;
*/
setup_ExtINT_IRQ0_pin(apic2, pin2, vector);
if (timer_irq_works()) {
+ local_irq_restore(flags);
printk("works.\n");
if (pin1 != -1)
replace_pin_at_irq(0, apic1, pin1, apic2, pin2);
enable_8259A_irq(0);
if (timer_irq_works()) {
+ local_irq_restore(flags);
printk(" works.\n");
return;
}
unlock_ExtINT_logic();
+ local_irq_restore(flags);
+
if (timer_irq_works()) {
printk(" works.\n");
return;
*/
#define PIC_IRQS (1 << PIC_CASCADE_IR)
+static struct IO_APIC_route_entry *ioapic_pm_state;
+
+void ioapic_pm_state_alloc(void)
+{
+ int i, nr_entry = 0;
+
+ for (i = 0; i < nr_ioapics; i++)
+ nr_entry += nr_ioapic_registers[i];
+
+ ioapic_pm_state = _xmalloc(sizeof(struct IO_APIC_route_entry)*nr_entry,
+ sizeof(struct IO_APIC_route_entry));
+ BUG_ON(ioapic_pm_state == NULL);
+}
+
void __init setup_IO_APIC(void)
{
enable_IO_APIC();
init_IO_APIC_traps();
check_timer();
print_IO_APIC();
+ ioapic_pm_state_alloc();
register_keyhandler('z', print_IO_APIC_keyhandler, "print ioapic info");
}
-struct IO_APIC_route_entry *ioapic_pm_state=NULL;
-
-void ioapic_pm_state_alloc(void)
+void ioapic_suspend(void)
{
- int i, nr_entry = 0;
-
- if (ioapic_pm_state != NULL)
- return;
-
- for (i = 0; i < nr_ioapics; i++)
- nr_entry += nr_ioapic_registers[i];
-
- ioapic_pm_state = _xmalloc(sizeof(struct IO_APIC_route_entry)*nr_entry,
- sizeof(struct IO_APIC_route_entry));
-}
-
-int ioapic_suspend(void)
-{
- struct IO_APIC_route_entry *entry;
+ struct IO_APIC_route_entry *entry = ioapic_pm_state;
unsigned long flags;
- int apic,i;
-
- ioapic_pm_state_alloc();
-
- if (ioapic_pm_state == NULL) {
- printk("Cannot suspend ioapic due to lack of memory\n");
- return 1;
- }
-
- entry = ioapic_pm_state;
+ int apic, i;
spin_lock_irqsave(&ioapic_lock, flags);
for (apic = 0; apic < nr_ioapics; apic++) {
}
}
spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return 0;
}
-int ioapic_resume(void)
+void ioapic_resume(void)
{
- struct IO_APIC_route_entry *entry;
+ struct IO_APIC_route_entry *entry = ioapic_pm_state;
unsigned long flags;
union IO_APIC_reg_00 reg_00;
- int i,apic;
-
- if (ioapic_pm_state == NULL){
- printk("Cannot resume ioapic due to lack of memory\n");
- return 1;
- }
-
- entry = ioapic_pm_state;
+ int i, apic;
spin_lock_irqsave(&ioapic_lock, flags);
for (apic = 0; apic < nr_ioapics; apic++){
}
}
spin_unlock_irqrestore(&ioapic_lock, flags);
-
- return 0;
}
/* --------------------------------------------------------------------------
if ( new_rte.vector >= FIRST_DYNAMIC_VECTOR )
new_irq = vector_irq[new_rte.vector];
- if ( (old_irq != new_irq) && (old_irq != -1) && IO_APIC_IRQ(old_irq) )
+ if ( (old_irq != new_irq) && (old_irq >= 0) && IO_APIC_IRQ(old_irq) )
{
if ( irq_desc[IO_APIC_VECTOR(old_irq)].action )
{
remove_pin_at_irq(old_irq, apic, pin);
}
- if ( (new_irq != -1) && IO_APIC_IRQ(new_irq) )
+ if ( (new_irq >= 0) && IO_APIC_IRQ(new_irq) )
{
if ( irq_desc[IO_APIC_VECTOR(new_irq)].action )
{
#include <xen/iommu.h>
#include <asm/msi.h>
#include <asm/current.h>
+#include <asm/flushtlb.h>
#include <public/physdev.h>
/* opt_noirqbalance: If true, software IRQ balancing/affinity is disabled. */
int opt_noirqbalance = 0;
boolean_param("noirqbalance", opt_noirqbalance);
-irq_desc_t irq_desc[NR_IRQS];
+irq_desc_t irq_desc[NR_VECTORS];
+
+static DEFINE_SPINLOCK(vector_lock);
+int vector_irq[NR_VECTORS] __read_mostly = {
+ [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
+};
static void __do_IRQ_guest(int vector);
atomic_t irq_err_count;
+int free_irq_vector(int vector)
+{
+ int irq;
+
+ BUG_ON((vector > LAST_DYNAMIC_VECTOR) || (vector < FIRST_DYNAMIC_VECTOR));
+
+ spin_lock(&vector_lock);
+ if ((irq = vector_irq[vector]) == AUTO_ASSIGN_IRQ)
+ vector_irq[vector] = FREE_TO_ASSIGN_IRQ;
+ spin_unlock(&vector_lock);
+
+ return (irq == AUTO_ASSIGN_IRQ) ? 0 : -EINVAL;
+}
+
+int assign_irq_vector(int irq)
+{
+ static unsigned current_vector = FIRST_DYNAMIC_VECTOR;
+ unsigned vector;
+
+ BUG_ON(irq >= NR_IRQS);
+
+ spin_lock(&vector_lock);
+
+ if ((irq != AUTO_ASSIGN_IRQ) && (IO_APIC_VECTOR(irq) > 0)) {
+ spin_unlock(&vector_lock);
+ return IO_APIC_VECTOR(irq);
+ }
+
+ vector = current_vector;
+ while (vector_irq[vector] != FREE_TO_ASSIGN_IRQ) {
+ vector += 8;
+ if (vector > LAST_DYNAMIC_VECTOR)
+ vector = FIRST_DYNAMIC_VECTOR + ((vector + 1) & 7);
+
+ if (vector == current_vector) {
+ spin_unlock(&vector_lock);
+ return -ENOSPC;
+ }
+ }
+
+ current_vector = vector;
+ vector_irq[vector] = irq;
+ if (irq != AUTO_ASSIGN_IRQ)
+ IO_APIC_VECTOR(irq) = vector;
+
+ spin_unlock(&vector_lock);
+
+ return vector;
+}
+
asmlinkage void do_IRQ(struct cpu_user_regs *regs)
{
unsigned int vector = regs->entry_vector;
spin_unlock(&desc->lock);
}
-int request_irq(unsigned int irq,
+int request_irq_vector(unsigned int vector,
void (*handler)(int, void *, struct cpu_user_regs *),
unsigned long irqflags, const char * devname, void *dev_id)
{
* which interrupt is which (messes up the interrupt freeing
* logic etc).
*/
- if (irq >= NR_IRQS)
+ if (vector >= NR_VECTORS)
return -EINVAL;
if (!handler)
return -EINVAL;
action->name = devname;
action->dev_id = dev_id;
- retval = setup_irq(irq, action);
+ retval = setup_irq_vector(vector, action);
if (retval)
xfree(action);
return retval;
}
-void free_irq(unsigned int irq)
+void release_irq_vector(unsigned int vector)
{
- unsigned int vector = irq_to_vector(irq);
- irq_desc_t *desc = &irq_desc[vector];
+ irq_desc_t *desc = &irq_desc[vector];
unsigned long flags;
spin_lock_irqsave(&desc->lock,flags);
desc->action = NULL;
desc->depth = 1;
desc->status |= IRQ_DISABLED;
- desc->handler->shutdown(irq);
+ desc->handler->shutdown(vector);
spin_unlock_irqrestore(&desc->lock,flags);
/* Wait to make sure it's not being used on another CPU */
do { smp_mb(); } while ( desc->status & IRQ_INPROGRESS );
}
-int setup_irq(unsigned int irq, struct irqaction *new)
+int setup_irq_vector(unsigned int vector, struct irqaction *new)
{
- unsigned int vector = irq_to_vector(irq);
- irq_desc_t *desc = &irq_desc[vector];
+ irq_desc_t *desc = &irq_desc[vector];
unsigned long flags;
spin_lock_irqsave(&desc->lock,flags);
static DEFINE_PER_CPU(struct pending_eoi, pending_eoi[NR_VECTORS]);
#define pending_eoi_sp(p) ((p)[NR_VECTORS-1].vector)
-static struct timer irq_guest_eoi_timer[NR_IRQS];
+static inline void set_pirq_eoi(struct domain *d, unsigned int irq)
+{
+ if ( d->arch.pirq_eoi_map )
+ set_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static inline void clear_pirq_eoi(struct domain *d, unsigned int irq)
+{
+ if ( d->arch.pirq_eoi_map )
+ clear_bit(irq, d->arch.pirq_eoi_map);
+}
+
+static void _irq_guest_eoi(irq_desc_t *desc)
+{
+ irq_guest_action_t *action = (irq_guest_action_t *)desc->action;
+ unsigned int i, vector = desc - irq_desc;
+
+ if ( !(desc->status & IRQ_GUEST_EOI_PENDING) )
+ return;
+
+ for ( i = 0; i < action->nr_guests; ++i )
+ clear_pirq_eoi(action->guest[i],
+ domain_vector_to_irq(action->guest[i], vector));
+
+ desc->status &= ~(IRQ_INPROGRESS|IRQ_GUEST_EOI_PENDING);
+ desc->handler->enable(vector);
+}
+
+static struct timer irq_guest_eoi_timer[NR_VECTORS];
static void irq_guest_eoi_timer_fn(void *data)
{
irq_desc_t *desc = data;
- unsigned vector = desc - irq_desc;
unsigned long flags;
spin_lock_irqsave(&desc->lock, flags);
- desc->status &= ~IRQ_INPROGRESS;
- desc->handler->enable(vector);
+ _irq_guest_eoi(desc);
spin_unlock_irqrestore(&desc->lock, flags);
}
if ( already_pending == action->nr_guests )
{
- desc->handler->disable(vector);
stop_timer(&irq_guest_eoi_timer[vector]);
+ desc->handler->disable(vector);
+ desc->status |= IRQ_GUEST_EOI_PENDING;
+ for ( i = 0; i < already_pending; ++i )
+ {
+ d = action->guest[i];
+ set_pirq_eoi(d, domain_vector_to_irq(d, vector));
+ /*
+ * Could check here whether the guest unmasked the event by now
+ * (or perhaps just re-issue the send_guest_pirq()), and if it
+ * can now accept the event,
+ * - clear all the pirq_eoi bits we already set,
+ * - re-enable the vector, and
+ * - skip the timer setup below.
+ */
+ }
init_timer(&irq_guest_eoi_timer[vector],
irq_guest_eoi_timer_fn, desc, smp_processor_id());
set_timer(&irq_guest_eoi_timer[vector], NOW() + MILLISECS(1));
}
/* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */
-static void flush_ready_eoi(void *unused)
+static void flush_ready_eoi(void)
{
struct pending_eoi *peoi = this_cpu(pending_eoi);
irq_desc_t *desc;
__set_eoi_ready(desc);
spin_unlock(&desc->lock);
- flush_ready_eoi(NULL);
+ flush_ready_eoi();
}
static void __pirq_guest_eoi(struct domain *d, int irq)
action = (irq_guest_action_t *)desc->action;
vector = desc - irq_desc;
- ASSERT(!test_bit(irq, d->pirq_mask) ||
- (action->ack_type != ACKTYPE_NONE));
+ if ( action->ack_type == ACKTYPE_NONE )
+ {
+ ASSERT(!test_bit(irq, d->pirq_mask));
+ stop_timer(&irq_guest_eoi_timer[vector]);
+ _irq_guest_eoi(desc);
+ }
if ( unlikely(!test_and_clear_bit(irq, d->pirq_mask)) ||
unlikely(--action->in_flight != 0) )
{
__set_eoi_ready(desc);
spin_unlock(&desc->lock);
- flush_ready_eoi(NULL);
+ flush_ready_eoi();
local_irq_enable();
}
else
}
extern int ioapic_ack_new;
-int pirq_acktype(struct domain *d, int irq)
+static int pirq_acktype(struct domain *d, int irq)
{
irq_desc_t *desc;
unsigned int vector;
/*
* Edge-triggered IO-APIC and LAPIC interrupts need no final
* acknowledgement: we ACK early during interrupt processing.
- * MSIs are treated as edge-triggered interrupts.
*/
if ( !strcmp(desc->handler->typename, "IO-APIC-edge") ||
- !strcmp(desc->handler->typename, "local-APIC-edge") ||
- !strcmp(desc->handler->typename, "PCI-MSI") )
+ !strcmp(desc->handler->typename, "local-APIC-edge") )
return ACKTYPE_NONE;
+ /*
+ * MSIs are treated as edge-triggered interrupts, except
+ * when there is no proper way to mask them.
+ */
+ if ( desc->handler == &pci_msi_type )
+ return msi_maskable_irq(desc->msi_desc) ? ACKTYPE_NONE : ACKTYPE_EOI;
+
/*
* Level-triggered IO-APIC interrupts need to be acknowledged on the CPU
* on which they were received. This is because we tickle the LAPIC to EOI.
{
unsigned int vector;
irq_desc_t *desc;
- irq_guest_action_t *action;
+ irq_guest_action_t *action, *newaction = NULL;
int rc = 0;
cpumask_t cpumask = CPU_MASK_NONE;
retry:
desc = domain_spin_lock_irq_desc(v->domain, irq, NULL);
if ( desc == NULL )
- return -EINVAL;
+ {
+ rc = -EINVAL;
+ goto out;
+ }
action = (irq_guest_action_t *)desc->action;
vector = desc - irq_desc;
"Cannot bind IRQ %d to guest. In use by '%s'.\n",
irq, desc->action->name);
rc = -EBUSY;
- goto out;
+ goto unlock_out;
}
- action = xmalloc(irq_guest_action_t);
- if ( (desc->action = (struct irqaction *)action) == NULL )
+ if ( newaction == NULL )
{
+ spin_unlock_irq(&desc->lock);
+ if ( (newaction = xmalloc(irq_guest_action_t)) != NULL )
+ goto retry;
gdprintk(XENLOG_INFO,
- "Cannot bind IRQ %d to guest. Out of memory.\n",
- irq);
+ "Cannot bind IRQ %d to guest. Out of memory.\n",
+ irq);
rc = -ENOMEM;
goto out;
}
+ action = newaction;
+ desc->action = (struct irqaction *)action;
+ newaction = NULL;
+
action->nr_guests = 0;
action->in_flight = 0;
action->shareable = will_share;
}
else if ( !will_share || !action->shareable )
{
- gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. "
- "Will not share with others.\n",
- irq);
+ gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. %s.\n",
+ irq,
+ will_share ?
+ "Others do not share" :
+ "Will not share with others");
rc = -EBUSY;
- goto out;
+ goto unlock_out;
}
else if ( action->nr_guests == 0 )
{
gdprintk(XENLOG_INFO, "Cannot bind IRQ %d to guest. "
"Already at max share.\n", irq);
rc = -EBUSY;
- goto out;
+ goto unlock_out;
}
action->guest[action->nr_guests++] = v->domain;
- out:
+ if ( action->ack_type != ACKTYPE_NONE )
+ set_pirq_eoi(v->domain, irq);
+ else
+ clear_pirq_eoi(v->domain, irq);
+
+ unlock_out:
spin_unlock_irq(&desc->lock);
+ out:
+ if ( newaction != NULL )
+ xfree(newaction);
return rc;
}
-static void __pirq_guest_unbind(struct domain *d, int irq, irq_desc_t *desc)
+static irq_guest_action_t *__pirq_guest_unbind(
+ struct domain *d, int irq, irq_desc_t *desc)
{
unsigned int vector;
irq_guest_action_t *action;
spin_lock_irq(&desc->lock);
}
break;
+ case ACKTYPE_NONE:
+ stop_timer(&irq_guest_eoi_timer[vector]);
+ _irq_guest_eoi(desc);
+ break;
}
/*
BUG_ON(test_bit(irq, d->pirq_mask));
if ( action->nr_guests != 0 )
- return;
+ return NULL;
BUG_ON(action->in_flight != 0);
BUG_ON(!cpus_empty(action->cpu_eoi_map));
desc->action = NULL;
- xfree(action);
desc->status &= ~IRQ_GUEST;
desc->status &= ~IRQ_INPROGRESS;
kill_timer(&irq_guest_eoi_timer[vector]);
desc->handler->shutdown(vector);
+
+ /* Caller frees the old guest descriptor block. */
+ return action;
}
void pirq_guest_unbind(struct domain *d, int irq)
{
+ irq_guest_action_t *oldaction = NULL;
irq_desc_t *desc;
int vector;
}
else
{
- __pirq_guest_unbind(d, irq, desc);
+ oldaction = __pirq_guest_unbind(d, irq, desc);
}
spin_unlock_irq(&desc->lock);
+
+ if ( oldaction != NULL )
+ xfree(oldaction);
}
int pirq_guest_force_unbind(struct domain *d, int irq)
{
irq_desc_t *desc;
- irq_guest_action_t *action;
+ irq_guest_action_t *action, *oldaction = NULL;
int i, bound = 0;
WARN_ON(!spin_is_locked(&d->event_lock));
goto out;
bound = 1;
- __pirq_guest_unbind(d, irq, desc);
+ oldaction = __pirq_guest_unbind(d, irq, desc);
out:
spin_unlock_irq(&desc->lock);
+
+ if ( oldaction != NULL )
+ xfree(oldaction);
+
return bound;
}
if ( type == MAP_PIRQ_TYPE_GSI )
{
- for ( i = 16; i < NR_PIRQS; i++ )
+ for ( i = 16; i < NR_IRQS; i++ )
if ( !d->arch.pirq_vector[i] )
break;
- if ( i == NR_PIRQS )
+ if ( i == NR_IRQS )
return -ENOSPC;
}
else
{
- for ( i = NR_PIRQS - 1; i >= 16; i-- )
+ for ( i = NR_IRQS - 1; i >= 16; i-- )
if ( !d->arch.pirq_vector[i] )
break;
if ( i == 16 )
int old_vector, old_pirq;
irq_desc_t *desc;
unsigned long flags;
+ struct msi_desc *msi_desc;
+ struct pci_dev *pdev = NULL;
+ ASSERT(spin_is_locked(&pcidevs_lock));
ASSERT(spin_is_locked(&d->event_lock));
if ( !IS_PRIV(current->domain) )
return -EPERM;
- if ( pirq < 0 || pirq >= NR_PIRQS || vector < 0 || vector >= NR_VECTORS )
+ if ( pirq < 0 || pirq >= NR_IRQS || vector < 0 || vector >= NR_VECTORS )
{
dprintk(XENLOG_G_ERR, "dom%d: invalid pirq %d or vector %d\n",
d->domain_id, pirq, vector);
return -EINVAL;
}
- old_vector = d->arch.pirq_vector[pirq];
- old_pirq = d->arch.vector_pirq[vector];
+ old_vector = domain_irq_to_vector(d, pirq);
+ old_pirq = domain_vector_to_irq(d, vector);
if ( (old_vector && (old_vector != vector) ) ||
(old_pirq && (old_pirq != pirq)) )
}
desc = &irq_desc[vector];
- spin_lock_irqsave(&desc->lock, flags);
if ( type == MAP_PIRQ_TYPE_MSI )
{
struct msi_info *msi = (struct msi_info *)data;
+
+ ret = -ENODEV;
+ if ( !cpu_has_apic )
+ goto done;
+
+ pdev = pci_get_pdev(msi->bus, msi->devfn);
+ ret = pci_enable_msi(msi, &msi_desc);
+ if ( ret )
+ goto done;
+
+ spin_lock_irqsave(&desc->lock, flags);
+
if ( desc->handler != &no_irq_type )
dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n",
- d->domain_id, vector);
+ d->domain_id, vector);
desc->handler = &pci_msi_type;
- ret = pci_enable_msi(msi);
- if ( ret )
- goto done;
+ d->arch.pirq_vector[pirq] = vector;
+ d->arch.vector_pirq[vector] = pirq;
+ setup_msi_irq(pdev, msi_desc);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ } else
+ {
+ spin_lock_irqsave(&desc->lock, flags);
+ d->arch.pirq_vector[pirq] = vector;
+ d->arch.vector_pirq[vector] = pirq;
+ spin_unlock_irqrestore(&desc->lock, flags);
}
- d->arch.pirq_vector[pirq] = vector;
- d->arch.vector_pirq[vector] = pirq;
-
-done:
- spin_unlock_irqrestore(&desc->lock, flags);
+ done:
return ret;
}
irq_desc_t *desc;
int vector, ret = 0;
bool_t forced_unbind;
+ struct msi_desc *msi_desc = NULL;
- if ( (pirq < 0) || (pirq >= NR_PIRQS) )
+ if ( (pirq < 0) || (pirq >= NR_IRQS) )
return -EINVAL;
if ( !IS_PRIV(current->domain) )
return -EINVAL;
+ ASSERT(spin_is_locked(&pcidevs_lock));
ASSERT(spin_is_locked(&d->event_lock));
- vector = d->arch.pirq_vector[pirq];
+ vector = domain_irq_to_vector(d, pirq);
if ( vector <= 0 )
{
dprintk(XENLOG_G_ERR, "dom%d: pirq %d not mapped\n",
d->domain_id, pirq);
desc = &irq_desc[vector];
+
+ if ( (msi_desc = desc->msi_desc) != NULL )
+ pci_disable_msi(msi_desc);
+
spin_lock_irqsave(&desc->lock, flags);
- BUG_ON(vector != d->arch.pirq_vector[pirq]);
+ BUG_ON(vector != domain_irq_to_vector(d, pirq));
- if ( desc->msi_desc )
- pci_disable_msi(vector);
+ if ( msi_desc )
+ teardown_msi_vector(vector);
if ( desc->handler == &pci_msi_type )
- {
desc->handler = &no_irq_type;
- free_irq_vector(vector);
- }
if ( !forced_unbind )
{
}
spin_unlock_irqrestore(&desc->lock, flags);
+ if (msi_desc)
+ {
+ msi_free_vector(msi_desc);
+ free_irq_vector(vector);
+ }
ret = irq_deny_access(d, pirq);
if ( ret )
{
int i;
+ spin_lock(&pcidevs_lock);
spin_lock(&d->event_lock);
- for ( i = 0; i < NR_PIRQS; i++ )
+ for ( i = 0; i < NR_IRQS; i++ )
if ( d->arch.pirq_vector[i] > 0 )
unmap_domain_pirq(d, i);
spin_unlock(&d->event_lock);
+ spin_unlock(&pcidevs_lock);
}
extern void dump_ioapic_irq_info(void);
(test_bit(d->pirq_to_evtchn[irq],
&shared_info(d, evtchn_pending)) ?
'P' : '-'),
- (test_bit(d->pirq_to_evtchn[irq]/BITS_PER_GUEST_LONG(d),
+ (test_bit(d->pirq_to_evtchn[irq] /
+ BITS_PER_EVTCHN_WORD(d),
&vcpu_info(d->vcpu[0], evtchn_pending_sel)) ?
'S' : '-'),
(test_bit(d->pirq_to_evtchn[irq],
void fixup_irqs(cpumask_t map)
{
- unsigned int irq, sp;
+ unsigned int vector, sp;
static int warned;
irq_guest_action_t *action;
struct pending_eoi *peoi;
+ irq_desc_t *desc;
+ unsigned long flags;
/* Direct all future interrupts away from this CPU. */
- for ( irq = 0; irq < NR_IRQS; irq++ )
+ for ( vector = 0; vector < NR_VECTORS; vector++ )
{
cpumask_t mask;
- if ( irq == 2 )
+ if ( vector_to_irq(vector) == 2 )
continue;
- cpus_and(mask, irq_desc[irq].affinity, map);
+ desc = &irq_desc[vector];
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ cpus_and(mask, desc->affinity, map);
if ( any_online_cpu(mask) == NR_CPUS )
{
- printk("Breaking affinity for irq %i\n", irq);
+ printk("Breaking affinity for vector %u (irq %i)\n",
+ vector, vector_to_irq(vector));
mask = map;
}
- if ( irq_desc[irq].handler->set_affinity )
- irq_desc[irq].handler->set_affinity(irq, mask);
- else if ( irq_desc[irq].action && !(warned++) )
- printk("Cannot set affinity for irq %i\n", irq);
+ if ( desc->handler->set_affinity )
+ desc->handler->set_affinity(vector, mask);
+ else if ( desc->action && !(warned++) )
+ printk("Cannot set affinity for vector %u (irq %i)\n",
+ vector, vector_to_irq(vector));
+
+ spin_unlock_irqrestore(&desc->lock, flags);
}
/* Service any interrupts that beat us in the re-direction race. */
local_irq_disable();
/* Clean up cpu_eoi_map of every interrupt to exclude this CPU. */
- for ( irq = 0; irq < NR_IRQS; irq++ )
+ for ( vector = 0; vector < NR_VECTORS; vector++ )
{
- if ( !(irq_desc[irq].status & IRQ_GUEST) )
+ if ( !(irq_desc[vector].status & IRQ_GUEST) )
continue;
- action = (irq_guest_action_t *)irq_desc[irq].action;
+ action = (irq_guest_action_t *)irq_desc[vector].action;
cpu_clear(smp_processor_id(), action->cpu_eoi_map);
}
peoi = this_cpu(pending_eoi);
for ( sp = 0; sp < pending_eoi_sp(peoi); sp++ )
peoi[sp].ready = 1;
- flush_ready_eoi(NULL);
+ flush_ready_eoi();
}
#endif
VMCOREINFO_SYMBOL(dom_xen);
VMCOREINFO_SYMBOL(dom_io);
+#ifdef CONFIG_X86_32
+ VMCOREINFO_SYMBOL(xenheap_phys_end);
+#endif
#ifdef CONFIG_X86_PAE
VMCOREINFO_SYMBOL_ALIAS(pgd_l3, idle_pg_table);
#endif
char buffer[1];
};
-static void microcode_fini_cpu(int cpu)
+static void __microcode_fini_cpu(int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
- spin_lock(µcode_mutex);
- xfree(uci->mc.valid_mc);
- uci->mc.valid_mc = NULL;
- uci->valid = 0;
- spin_unlock(µcode_mutex);
+ xfree(uci->mc.mc_valid);
+ memset(uci, 0, sizeof(*uci));
}
-static int collect_cpu_info(int cpu)
+static void microcode_fini_cpu(int cpu)
{
- int err = 0;
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
-
- memset(uci, 0, sizeof(*uci));
- err = microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
- if ( !err )
- uci->valid = 1;
-
- return err;
+ spin_lock(µcode_mutex);
+ __microcode_fini_cpu(cpu);
+ spin_unlock(µcode_mutex);
}
-static int microcode_resume_cpu(int cpu)
+int microcode_resume_cpu(int cpu)
{
int err = 0;
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
gdprintk(XENLOG_INFO, "microcode: CPU%d resumed\n", cpu);
- if ( !uci->mc.valid_mc )
+ if ( !uci->mc.mc_valid )
return -EIO;
/*
return err;
}
- if ( memcmp(&nsig, &uci->cpu_sig, sizeof(nsig)) )
+ if ( microcode_ops->microcode_resume_match(cpu, &nsig) )
+ {
+ return microcode_ops->apply_microcode(cpu);
+ }
+ else
{
microcode_fini_cpu(cpu);
- /* Should we look for a new ucode here? */
return -EIO;
}
-
- err = microcode_ops->apply_microcode(cpu);
-
- return err;
}
static int microcode_update_cpu(const void *buf, size_t size)
spin_lock(µcode_mutex);
- /*
- * Check if the system resume is in progress (uci->valid != NULL),
- * otherwise just request a firmware:
- */
- if ( uci->valid )
- {
- err = microcode_resume_cpu(cpu);
- }
+ err = microcode_ops->collect_cpu_info(cpu, &uci->cpu_sig);
+ if ( likely(!err) )
+ err = microcode_ops->cpu_request_microcode(cpu, buf, size);
else
- {
- err = collect_cpu_info(cpu);
- if ( !err && uci->valid )
- err = microcode_ops->cpu_request_microcode(cpu, buf, size);
- }
+ __microcode_fini_cpu(cpu);
spin_unlock(µcode_mutex);
error = info->error;
xfree(info);
return error;
-
}
int microcode_update(XEN_GUEST_HANDLE(const_void) buf, unsigned long len)
#define MC_HEADER_SIZE (sizeof(struct microcode_header_amd))
#define DEFAULT_UCODE_TOTALSIZE (DEFAULT_UCODE_DATASIZE + MC_HEADER_SIZE)
#define DWSIZE (sizeof(uint32_t))
-/* For now we support a fixed ucode total size only */
-#define get_totalsize(mc) \
- ((((struct microcode_amd *)mc)->hdr.mc_patch_data_len * 28) \
- + MC_HEADER_SIZE)
/* serialize access to the physical write */
static DEFINE_SPINLOCK(microcode_update_lock);
struct equiv_cpu_entry *equiv_cpu_table;
-static long install_equiv_cpu_table(const void *, uint32_t, long);
-
static int collect_cpu_info(int cpu, struct cpu_signature *csig)
{
struct cpuinfo_x86 *c = &cpu_data[cpu];
+ uint32_t dummy;
memset(csig, 0, sizeof(*csig));
{
printk(KERN_ERR "microcode: CPU%d not a capable AMD processor\n",
cpu);
- return -1;
+ return -EINVAL;
}
- asm volatile (
- "movl %1, %%ecx; rdmsr"
- : "=a" (csig->rev)
- : "i" (MSR_AMD_PATCHLEVEL) : "ecx" );
+ rdmsr(MSR_AMD_PATCHLEVEL, csig->rev, dummy);
printk(KERN_INFO "microcode: collect_cpu_info: patch_id=0x%x\n",
csig->rev);
return 0;
}
-static int get_matching_microcode(void *mc, int cpu)
+static int microcode_fits(void *mc, int cpu)
{
struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
struct microcode_header_amd *mc_header = mc;
- unsigned long total_size = get_totalsize(mc_header);
- void *new_mc;
unsigned int current_cpu_id;
- unsigned int equiv_cpu_id = 0x00;
+ unsigned int equiv_cpu_id = 0x0;
unsigned int i;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
- /* This is a tricky part. We might be called from a write operation
- * to the device file instead of the usual process of firmware
- * loading. This routine needs to be able to distinguish both
- * cases. This is done by checking if there already is a equivalent
- * CPU table installed. If not, we're written through
- * /dev/cpu/microcode.
- * Since we ignore all checks. The error case in which going through
- * firmware loading and that table is not loaded has already been
- * checked earlier.
- */
if ( equiv_cpu_table == NULL )
{
printk(KERN_INFO "microcode: CPU%d microcode update with "
{
if ( current_cpu_id == equiv_cpu_table[i].installed_cpu )
{
- equiv_cpu_id = equiv_cpu_table[i].equiv_cpu;
+ equiv_cpu_id = equiv_cpu_table[i].equiv_cpu & 0xffff;
break;
}
}
if ( !equiv_cpu_id )
{
printk(KERN_ERR "microcode: CPU%d cpu_id "
- "not found in equivalent cpu table \n", cpu);
- return 0;
- }
-
- if ( (mc_header->processor_rev_id[0]) != (equiv_cpu_id & 0xff) )
- {
- printk(KERN_INFO
- "microcode: CPU%d patch does not match "
- "(patch is %x, cpu extended is %x) \n",
- cpu, mc_header->processor_rev_id[0],
- (equiv_cpu_id & 0xff));
- return 0;
+ "not found in equivalent cpu table\n", cpu);
+ return -EINVAL;
}
- if ( (mc_header->processor_rev_id[1]) != ((equiv_cpu_id >> 16) & 0xff) )
+ if ( (mc_header->processor_rev_id) != equiv_cpu_id )
{
printk(KERN_INFO "microcode: CPU%d patch does not match "
"(patch is %x, cpu base id is %x) \n",
- cpu, mc_header->processor_rev_id[1],
- ((equiv_cpu_id >> 16) & 0xff));
- return 0;
+ cpu, mc_header->processor_rev_id, equiv_cpu_id);
+ return -EINVAL;
}
if ( mc_header->patch_id <= uci->cpu_sig.rev )
- return 0;
+ return -EINVAL;
printk(KERN_INFO "microcode: CPU%d found a matching microcode "
"update with version 0x%x (current=0x%x)\n",
cpu, mc_header->patch_id, uci->cpu_sig.rev);
- out:
- new_mc = xmalloc_bytes(UCODE_MAX_SIZE);
- if ( new_mc == NULL )
- {
- printk(KERN_ERR "microcode: error, can't allocate memory\n");
- return -ENOMEM;
- }
- memset(new_mc, 0, UCODE_MAX_SIZE);
-
- /* free previous update file */
- xfree(uci->mc.mc_amd);
-
- memcpy(new_mc, mc, total_size);
-
- uci->mc.mc_amd = new_mc;
- return 1;
+out:
+ return 0;
}
static int apply_microcode(int cpu)
{
unsigned long flags;
- uint32_t eax, edx, rev;
- int cpu_num = raw_smp_processor_id();
- struct ucode_cpu_info *uci = ucode_cpu_info + cpu_num;
- uint64_t addr;
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+ uint32_t rev, dummy;
+ struct microcode_amd *mc_amd = uci->mc.mc_amd;
/* We should bind the task to the CPU */
- BUG_ON(cpu_num != cpu);
+ BUG_ON(raw_smp_processor_id() != cpu);
- if ( uci->mc.mc_amd == NULL )
+ if ( mc_amd == NULL )
return -EINVAL;
spin_lock_irqsave(µcode_update_lock, flags);
- addr = (unsigned long)&uci->mc.mc_amd->hdr.data_code;
- edx = (uint32_t)(addr >> 32);
- eax = (uint32_t)addr;
-
- asm volatile (
- "movl %0, %%ecx; wrmsr" :
- : "i" (MSR_AMD_PATCHLOADER), "a" (eax), "d" (edx) : "ecx" );
+ wrmsrl(MSR_AMD_PATCHLOADER, (unsigned long)&mc_amd->hdr.data_code);
/* get patch id after patching */
- asm volatile (
- "movl %1, %%ecx; rdmsr"
- : "=a" (rev)
- : "i" (MSR_AMD_PATCHLEVEL) : "ecx");
+ rdmsr(MSR_AMD_PATCHLEVEL, rev, dummy);
spin_unlock_irqrestore(µcode_update_lock, flags);
/* check current patch id and patch's id for match */
- if ( rev != uci->mc.mc_amd->hdr.patch_id )
+ if ( rev != mc_amd->hdr.patch_id )
{
printk(KERN_ERR "microcode: CPU%d update from revision "
- "0x%x to 0x%x failed\n", cpu_num,
- uci->mc.mc_amd->hdr.patch_id, rev);
+ "0x%x to 0x%x failed\n", cpu,
+ mc_amd->hdr.patch_id, rev);
return -EIO;
}
printk("microcode: CPU%d updated from revision "
"0x%x to 0x%x \n",
- cpu_num, uci->cpu_sig.rev, uci->mc.mc_amd->hdr.patch_id);
+ cpu, uci->cpu_sig.rev, mc_amd->hdr.patch_id);
uci->cpu_sig.rev = rev;
return 0;
}
-static long get_next_ucode_from_buffer_amd(void **mc, const void *buf,
- unsigned long size, long offset)
+static int get_next_ucode_from_buffer_amd(void *mc, const void *buf,
+ size_t size, unsigned long *offset)
{
struct microcode_header_amd *mc_header;
- unsigned long total_size;
- const uint8_t *buf_pos = buf;
+ size_t total_size;
+ const uint8_t *bufp = buf;
+ unsigned long off;
+
+ off = *offset;
/* No more data */
- if ( offset >= size )
- return 0;
+ if ( off >= size )
+ return 1;
- if ( buf_pos[offset] != UCODE_UCODE_TYPE )
+ if ( bufp[off] != UCODE_UCODE_TYPE )
{
printk(KERN_ERR "microcode: error! "
"Wrong microcode payload type field\n");
return -EINVAL;
}
- mc_header = (struct microcode_header_amd *)(&buf_pos[offset+8]);
+ mc_header = (struct microcode_header_amd *)(&bufp[off+8]);
- total_size = (unsigned long) (buf_pos[offset+4] +
- (buf_pos[offset+5] << 8));
+ total_size = (unsigned long) (bufp[off+4] + (bufp[off+5] << 8));
printk(KERN_INFO "microcode: size %lu, total_size %lu, offset %ld\n",
- size, total_size, offset);
+ (unsigned long)size, total_size, off);
- if ( (offset + total_size) > size )
+ if ( (off + total_size) > size )
{
printk(KERN_ERR "microcode: error! Bad data in microcode data file\n");
return -EINVAL;
}
- *mc = xmalloc_bytes(UCODE_MAX_SIZE);
- if ( *mc == NULL )
- {
- printk(KERN_ERR "microcode: error! "
- "Can not allocate memory for microcode patch\n");
- return -ENOMEM;
- }
+ memset(mc, 0, UCODE_MAX_SIZE);
+ memcpy(mc, (const void *)(&bufp[off + 8]), total_size);
- memset(*mc, 0, UCODE_MAX_SIZE);
- memcpy(*mc, (const void *)(buf + offset + 8), total_size);
+ *offset = off + total_size + 8;
- return offset + total_size + 8;
+ return 0;
}
-static long install_equiv_cpu_table(const void *buf,
- uint32_t size, long offset)
+static int install_equiv_cpu_table(const void *buf, uint32_t size,
+ unsigned long *offset)
{
const uint32_t *buf_pos = buf;
+ unsigned long off;
+
+ off = *offset;
+ *offset = 0;
/* No more data */
- if ( offset >= size )
- return 0;
+ if ( off >= size )
+ return -EINVAL;
if ( buf_pos[1] != UCODE_EQUIV_CPU_TABLE_TYPE )
{
printk(KERN_ERR "microcode: error! "
- "Wrong microcode equivalnet cpu table type field\n");
- return 0;
+ "Wrong microcode equivalent cpu table type field\n");
+ return -EINVAL;
}
if ( size == 0 )
{
printk(KERN_ERR "microcode: error! "
"Wrong microcode equivalnet cpu table length\n");
- return 0;
+ return -EINVAL;
}
equiv_cpu_table = xmalloc_bytes(size);
{
printk(KERN_ERR "microcode: error, can't allocate "
"memory for equiv CPU table\n");
- return 0;
+ return -ENOMEM;
}
memset(equiv_cpu_table, 0, size);
memcpy(equiv_cpu_table, (const void *)&buf_pos[3], size);
- return size + 12; /* add header length */
+ *offset = size + 12; /* add header length */
+
+ return 0;
}
static int cpu_request_microcode(int cpu, const void *buf, size_t size)
{
const uint32_t *buf_pos;
- long offset = 0;
+ unsigned long offset = 0;
int error = 0;
+ int ret;
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
void *mc;
/* We should bind the task to the CPU */
return -EINVAL;
}
- offset = install_equiv_cpu_table(buf, (uint32_t)(buf_pos[2]), offset);
- if ( !offset )
+ error = install_equiv_cpu_table(buf, (uint32_t)(buf_pos[2]), &offset);
+ if ( error )
{
printk(KERN_ERR "microcode: installing equivalent cpu table failed\n");
return -EINVAL;
}
- while ( (offset =
- get_next_ucode_from_buffer_amd(&mc, buf, size, offset)) > 0 )
+ mc = xmalloc_bytes(UCODE_MAX_SIZE);
+ if ( mc == NULL )
{
- error = get_matching_microcode(mc, cpu);
- if ( error < 0 )
- break;
- /*
- * It's possible the data file has multiple matching ucode,
- * lets keep searching till the latest version
- */
- if ( error == 1 )
- error = apply_microcode(cpu);
- xfree(mc);
+ printk(KERN_ERR "microcode: error! "
+ "Can not allocate memory for microcode patch\n");
+ error = -ENOMEM;
+ goto out;
}
- if ( offset > 0 )
+
+ /* implicitely validates uci->mc.mc_valid */
+ uci->mc.mc_amd = mc;
+
+ /*
+ * It's possible the data file has multiple matching ucode,
+ * lets keep searching till the latest version
+ */
+ while ( (ret = get_next_ucode_from_buffer_amd(mc, buf, size, &offset)) == 0)
{
+ error = microcode_fits(mc, cpu);
+ if (error != 0)
+ continue;
+
+ error = apply_microcode(cpu);
+ if (error == 0)
+ break;
+ }
+
+ /* On success keep the microcode patch for
+ * re-apply on resume.
+ */
+ if (error) {
xfree(mc);
- xfree(equiv_cpu_table);
- equiv_cpu_table = NULL;
+ mc = NULL;
}
- if ( offset < 0 )
- error = offset;
+ uci->mc.mc_amd = mc;
+
+out:
+ xfree(equiv_cpu_table);
+ equiv_cpu_table = NULL;
return error;
}
+static int microcode_resume_match(int cpu, struct cpu_signature *nsig)
+{
+ return 0;
+}
+
static struct microcode_ops microcode_amd_ops = {
- .get_matching_microcode = get_matching_microcode,
+ .microcode_resume_match = microcode_resume_match,
.cpu_request_microcode = cpu_request_microcode,
.collect_cpu_info = collect_cpu_info,
.apply_microcode = apply_microcode,
struct cpuinfo_x86 *c = &cpu_data[cpu_num];
unsigned int val[2];
+ BUG_ON(cpu_num != smp_processor_id());
+
memset(csig, 0, sizeof(*csig));
if ( (c->x86_vendor != X86_VENDOR_INTEL) || (c->x86 < 6) ||
long offset = 0;
int error = 0;
void *mc;
+ unsigned int matching_count = 0;
/* We should bind the task to the CPU */
BUG_ON(cpu != raw_smp_processor_id());
*/
if ( error == 1 )
{
- apply_microcode(cpu);
+ matching_count++;
error = 0;
}
xfree(mc);
if ( offset < 0 )
error = offset;
+ if ( !error && matching_count )
+ apply_microcode(cpu);
+
return error;
}
+static int microcode_resume_match(int cpu, struct cpu_signature *nsig)
+{
+ struct ucode_cpu_info *uci = ucode_cpu_info + cpu;
+
+ return (sigmatch(nsig->sig, uci->cpu_sig.sig, nsig->pf, uci->cpu_sig.pf) &&
+ (uci->cpu_sig.rev > nsig->rev));
+}
+
static struct microcode_ops microcode_intel_ops = {
- .get_matching_microcode = get_matching_microcode,
+ .microcode_resume_match = microcode_resume_match,
.cpu_request_microcode = cpu_request_microcode,
.collect_cpu_info = collect_cpu_info,
.apply_microcode = apply_microcode,
#define PAGE_CACHE_ATTRS (_PAGE_PAT|_PAGE_PCD|_PAGE_PWT)
+int opt_allow_hugepage;
+boolean_param("allowhugepage", opt_allow_hugepage);
+
#define l1_disallow_mask(d) \
((d != dom_io) && \
(rangeset_is_empty((d)->iomem_caps) && \
#define l3_disallow_mask(d) L3_DISALLOW_MASK
#endif
-static void queue_deferred_ops(struct domain *d, unsigned int ops)
-{
- ASSERT(d == current->domain);
- this_cpu(percpu_mm_info).deferred_ops |= ops;
-}
-
void __init init_frametable(void)
{
unsigned long nr_pages, page_step, i, mfn;
}
memset(frame_table, 0, nr_pages << PAGE_SHIFT);
-
-#if defined(__x86_64__)
- for ( i = 0; i < max_page; i ++ )
- spin_lock_init(&frame_table[i].lock);
-#endif
}
void __init arch_init_memory(void)
subarch_init_memory();
}
-int memory_is_conventional_ram(paddr_t p)
+int page_is_ram_type(unsigned long mfn, unsigned long mem_type)
{
+ uint64_t maddr = pfn_to_paddr(mfn);
int i;
for ( i = 0; i < e820.nr_map; i++ )
{
- if ( (e820.map[i].type == E820_RAM) &&
- (e820.map[i].addr <= p) &&
- (e820.map[i].size > p) )
+ switch ( e820.map[i].type )
+ {
+ case E820_RAM:
+ if ( mem_type & RAM_TYPE_CONVENTIONAL )
+ break;
+ continue;
+ case E820_RESERVED:
+ if ( mem_type & RAM_TYPE_RESERVED )
+ break;
+ continue;
+ case E820_UNUSABLE:
+ if ( mem_type & RAM_TYPE_UNUSABLE )
+ break;
+ continue;
+ case E820_ACPI:
+ case E820_NVS:
+ if ( mem_type & RAM_TYPE_ACPI )
+ break;
+ continue;
+ default:
+ /* unknown */
+ continue;
+ }
+
+ /* Test the range. */
+ if ( (e820.map[i].addr <= maddr) &&
+ ((e820.map[i].addr + e820.map[i].size) >= (maddr + PAGE_SIZE)) )
return 1;
}
page_set_owner(page, d);
wmb(); /* install valid domain ptr before updating refcnt. */
- ASSERT(page->count_info == 0);
+ ASSERT((page->count_info & ~PGC_xen_heap) == 0);
/* Only add to the allocation list if the domain isn't dying. */
if ( !d->is_dying )
page->count_info |= PGC_allocated | 1;
if ( unlikely(d->xenheap_pages++ == 0) )
get_knownalive_domain(d);
- list_add_tail(&page->list, &d->xenpage_list);
+ page_list_add_tail(page, &d->xenpage_list);
}
spin_unlock(&d->page_alloc_lock);
#else
/*
* In debug builds we shadow a selection of <4GB PDPTs to exercise code paths.
- * We cannot safely shadow the idle page table, nor shadow (v1) page tables
- * (detected by lack of an owning domain). As required for correctness, we
+ * We cannot safely shadow the idle page table, nor shadow page tables
+ * (detected by zero reference count). As required for correctness, we
* always shadow PDPTs above 4GB.
*/
-#define l3tab_needs_shadow(mfn) \
- (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
- (page_get_owner(mfn_to_page(mfn)) != NULL) && \
- ((mfn) & 1)) || /* odd MFNs are shadowed */ \
+#define l3tab_needs_shadow(mfn) \
+ (((((mfn) << PAGE_SHIFT) != __pa(idle_pg_table)) && \
+ (mfn_to_page(mfn)->count_info & PGC_count_mask) && \
+ ((mfn) & 1)) || /* odd MFNs are shadowed */ \
((mfn) >= 0x100000))
#endif
}
-static void invalidate_shadow_ldt(struct vcpu *v)
+static void invalidate_shadow_ldt(struct vcpu *v, int flush)
{
int i;
unsigned long pfn;
struct page_info *page;
-
+
+ BUG_ON(unlikely(in_irq()));
+
+ spin_lock(&v->arch.shadow_ldt_lock);
+
if ( v->arch.shadow_ldt_mapcnt == 0 )
- return;
+ goto out;
v->arch.shadow_ldt_mapcnt = 0;
put_page_and_type(page);
}
- /* Dispose of the (now possibly invalid) mappings from the TLB. */
- if ( v == current )
- queue_deferred_ops(v->domain, DOP_FLUSH_TLB | DOP_RELOAD_LDT);
- else
- flush_tlb_mask(v->domain->domain_dirty_cpumask);
+ /* Rid TLBs of stale mappings (guest mappings and shadow mappings). */
+ if ( flush )
+ flush_tlb_mask(v->vcpu_dirty_cpumask);
+
+ out:
+ spin_unlock(&v->arch.shadow_ldt_lock);
}
nl1e = l1e_from_pfn(mfn, l1e_get_flags(l1e) | _PAGE_RW);
+ spin_lock(&v->arch.shadow_ldt_lock);
l1e_write(&v->arch.perdomain_ptes[off + 16], nl1e);
v->arch.shadow_ldt_mapcnt++;
+ spin_unlock(&v->arch.shadow_ldt_lock);
return 1;
}
static int get_page_and_type_from_pagenr(unsigned long page_nr,
unsigned long type,
struct domain *d,
+ int partial,
int preemptible)
{
struct page_info *page = mfn_to_page(page_nr);
int rc;
- if ( unlikely(!get_page_from_pagenr(page_nr, d)) )
+ if ( likely(partial >= 0) &&
+ unlikely(!get_page_from_pagenr(page_nr, d)) )
return -EINVAL;
rc = (preemptible ?
get_page_type_preemptible(page, type) :
(get_page_type(page, type) ? 0 : -EINVAL));
- if ( rc )
+ if ( unlikely(rc) && partial >= 0 )
put_page(page);
return rc;
}
+static int get_data_page(
+ struct page_info *page, struct domain *d, int writeable)
+{
+ int rc;
+
+ if ( writeable )
+ rc = get_page_and_type(page, d, PGT_writable_page);
+ else
+ rc = get_page(page, d);
+
+ return rc;
+}
+
+static void put_data_page(
+ struct page_info *page, int writeable)
+{
+ if ( writeable )
+ put_page_and_type(page);
+ else
+ put_page(page);
+}
+
/*
* We allow root tables to map each other (a.k.a. linear page tables). It
* needs some special care with reference counts and access permissions:
int is_iomem_page(unsigned long mfn)
{
- return (!mfn_valid(mfn) || (page_get_owner(mfn_to_page(mfn)) == dom_io));
+ struct page_info *page;
+
+ if ( !mfn_valid(mfn) )
+ return 1;
+
+ /* Caller must know that it is an iomem page, or a reference is held. */
+ page = mfn_to_page(mfn);
+ ASSERT((page->count_info & PGC_count_mask) != 0);
+
+ return (page_get_owner(page) == dom_io);
}
uint32_t l1f = l1e_get_flags(l1e);
struct vcpu *curr = current;
struct domain *owner;
- int okay;
if ( !(l1f & _PAGE_PRESENT) )
return 1;
return 0;
}
- if ( is_iomem_page(mfn) )
+ if ( !mfn_valid(mfn) ||
+ (owner = page_get_owner_and_reference(page)) == dom_io )
{
+ /* Only needed the reference to confirm dom_io ownership. */
+ if ( mfn_valid(mfn) )
+ put_page(page);
+
/* DOMID_IO reverts to caller for privilege checks. */
if ( d == dom_io )
d = curr->domain;
return 1;
}
+ if ( owner == NULL )
+ goto could_not_pin;
+
/*
* Let privileged domains transfer the right to map their target
* domain's pages. This is used to allow stub-domain pvfb export to dom0,
* until pvfb supports granted mappings. At that time this minor hack
* can go away.
*/
- owner = page_get_owner(page);
- if ( unlikely(d != owner) && (owner != NULL) &&
- (d != curr->domain) && IS_PRIV_FOR(d, owner) )
+ if ( unlikely(d != owner) && (d != curr->domain) && IS_PRIV_FOR(d, owner) )
d = owner;
/* Foreign mappings into guests in shadow external mode don't
* contribute to writeable mapping refcounts. (This allows the
* qemu-dm helper process in dom0 to map the domain's memory without
* messing up the count of "real" writable mappings.) */
- okay = (((l1f & _PAGE_RW) &&
- !(unlikely(paging_mode_external(d) && (d != curr->domain))))
- ? get_page_and_type(page, d, PGT_writable_page)
- : get_page(page, d));
- if ( !okay )
- {
- MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
- " for dom%d",
- mfn, get_gpfn_from_mfn(mfn),
- l1e_get_intpte(l1e), d->domain_id);
- }
- else if ( pte_flags_to_cacheattr(l1f) !=
- ((page->count_info >> PGC_cacheattr_base) & 7) )
+ if ( (l1f & _PAGE_RW) &&
+ !(paging_mode_external(d) && (d != curr->domain)) &&
+ !get_page_type(page, PGT_writable_page) )
+ goto could_not_pin;
+
+ if ( pte_flags_to_cacheattr(l1f) !=
+ ((page->count_info >> PGC_cacheattr_base) & 7) )
{
- uint32_t x, nx, y = page->count_info;
- uint32_t cacheattr = pte_flags_to_cacheattr(l1f);
+ unsigned long x, nx, y = page->count_info;
+ unsigned long cacheattr = pte_flags_to_cacheattr(l1f);
if ( is_xen_heap_page(page) )
{
#endif
}
- return okay;
+ return 1;
+
+ could_not_pin:
+ MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
+ " for dom%d",
+ mfn, get_gpfn_from_mfn(mfn),
+ l1e_get_intpte(l1e), d->domain_id);
+ if ( owner != NULL )
+ put_page(page);
+ return 0;
}
get_page_from_l2e(
l2_pgentry_t l2e, unsigned long pfn, struct domain *d)
{
+ unsigned long mfn = l2e_get_pfn(l2e);
int rc;
if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
return -EINVAL;
}
- rc = get_page_and_type_from_pagenr(
- l2e_get_pfn(l2e), PGT_l1_page_table, d, 0);
- if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
- rc = 0;
+ if ( !(l2e_get_flags(l2e) & _PAGE_PSE) )
+ {
+ rc = get_page_and_type_from_pagenr(mfn, PGT_l1_page_table, d, 0, 0);
+ if ( unlikely(rc == -EINVAL) && get_l2_linear_pagetable(l2e, pfn, d) )
+ rc = 0;
+ }
+ else if ( !opt_allow_hugepage || (mfn & (L1_PAGETABLE_ENTRIES-1)) )
+ {
+ rc = -EINVAL;
+ }
+ else
+ {
+ unsigned long m = mfn;
+ int writeable = !!(l2e_get_flags(l2e) & _PAGE_RW);
+
+ do {
+ rc = get_data_page(mfn_to_page(m), d, writeable);
+ if ( unlikely(!rc) )
+ {
+ while ( m-- > mfn )
+ put_data_page(mfn_to_page(m), writeable);
+ return -EINVAL;
+ }
+ } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+
+#ifdef __x86_64__
+ map_pages_to_xen(
+ (unsigned long)mfn_to_virt(mfn), mfn, L1_PAGETABLE_ENTRIES,
+ PAGE_HYPERVISOR | l2e_get_flags(l2e));
+#endif
+ }
return rc;
}
define_get_linear_pagetable(l3);
static int
get_page_from_l3e(
- l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int preemptible)
+ l3_pgentry_t l3e, unsigned long pfn, struct domain *d, int partial, int preemptible)
{
int rc;
}
rc = get_page_and_type_from_pagenr(
- l3e_get_pfn(l3e), PGT_l2_page_table, d, preemptible);
+ l3e_get_pfn(l3e), PGT_l2_page_table, d, partial, preemptible);
if ( unlikely(rc == -EINVAL) && get_l3_linear_pagetable(l3e, pfn, d) )
rc = 0;
define_get_linear_pagetable(l4);
static int
get_page_from_l4e(
- l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int preemptible)
+ l4_pgentry_t l4e, unsigned long pfn, struct domain *d, int partial, int preemptible)
{
int rc;
}
rc = get_page_and_type_from_pagenr(
- l4e_get_pfn(l4e), PGT_l3_page_table, d, preemptible);
+ l4e_get_pfn(l4e), PGT_l3_page_table, d, partial, preemptible);
if ( unlikely(rc == -EINVAL) && get_l4_linear_pagetable(l4e, pfn, d) )
rc = 0;
(d == e) )
{
for_each_vcpu ( d, v )
- invalidate_shadow_ldt(v);
+ invalidate_shadow_ldt(v, 1);
}
put_page(page);
}
*/
static int put_page_from_l2e(l2_pgentry_t l2e, unsigned long pfn)
{
- if ( (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
- (l2e_get_pfn(l2e) != pfn) )
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) || (l2e_get_pfn(l2e) == pfn) )
+ return 1;
+
+ if ( l2e_get_flags(l2e) & _PAGE_PSE )
+ {
+ unsigned long mfn = l2e_get_pfn(l2e), m = mfn;
+ int writeable = l2e_get_flags(l2e) & _PAGE_RW;
+
+ ASSERT(!(mfn & (L1_PAGETABLE_ENTRIES-1)));
+ do {
+ put_data_page(mfn_to_page(m), writeable);
+ } while ( m++ < (mfn + (L1_PAGETABLE_ENTRIES-1)) );
+ }
+ else
{
put_page_and_type(l2e_get_page(l2e));
- return 0;
}
- return 1;
+
+ return 0;
}
+static int __put_page_type(struct page_info *, int preemptible);
static int put_page_from_l3e(l3_pgentry_t l3e, unsigned long pfn,
- int preemptible)
+ int partial, int preemptible)
{
- if ( (l3e_get_flags(l3e) & _PAGE_PRESENT) &&
- (l3e_get_pfn(l3e) != pfn) )
- return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
- return 1;
+ if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) || (l3e_get_pfn(l3e) == pfn) )
+ return 1;
+
+#ifdef __x86_64__
+ if ( unlikely(l3e_get_flags(l3e) & _PAGE_PSE) )
+ {
+ unsigned long mfn = l3e_get_pfn(l3e);
+ int writeable = l3e_get_flags(l3e) & _PAGE_RW;
+
+ ASSERT(!(mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1)));
+ do {
+ put_data_page(mfn_to_page(mfn), writeable);
+ } while ( ++mfn & ((1UL << (L3_PAGETABLE_SHIFT - PAGE_SHIFT)) - 1) );
+
+ return 0;
+ }
+#endif
+
+ if ( unlikely(partial > 0) )
+ return __put_page_type(l3e_get_page(l3e), preemptible);
+
+ return put_page_and_type_preemptible(l3e_get_page(l3e), preemptible);
}
#if CONFIG_PAGING_LEVELS >= 4
static int put_page_from_l4e(l4_pgentry_t l4e, unsigned long pfn,
- int preemptible)
+ int partial, int preemptible)
{
if ( (l4e_get_flags(l4e) & _PAGE_PRESENT) &&
(l4e_get_pfn(l4e) != pfn) )
+ {
+ if ( unlikely(partial > 0) )
+ return __put_page_type(l4e_get_page(l4e), preemptible);
return put_page_and_type_preemptible(l4e_get_page(l4e), preemptible);
+ }
return 1;
}
#endif
for ( i = 0; i < PDPT_L2_ENTRIES; i++ )
{
l2e = l2e_from_page(
- virt_to_page(page_get_owner(page)->arch.mm_perdomain_pt) + i,
+ virt_to_page(d->arch.mm_perdomain_pt) + i,
__PAGE_HYPERVISOR);
l2e_write(&pl2e[l2_table_offset(PERDOMAIN_VIRT_START) + i], l2e);
}
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
unsigned int i;
- int rc = 0;
+ int rc = 0, partial = page->partial_pte;
#if CONFIG_PAGING_LEVELS == 3
/*
if ( is_pv_32on64_domain(d) )
memset(pl3e + 4, 0, (L3_PAGETABLE_ENTRIES - 4) * sizeof(*pl3e));
- for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES; i++ )
+ for ( i = page->nr_validated_ptes; i < L3_PAGETABLE_ENTRIES;
+ i++, partial = 0 )
{
if ( is_pv_32bit_domain(d) && (i == 3) )
{
rc = get_page_and_type_from_pagenr(l3e_get_pfn(pl3e[i]),
PGT_l2_page_table |
PGT_pae_xen_l2,
- d, preemptible);
+ d, partial, preemptible);
}
else if ( !is_guest_l3_slot(i) ||
- (rc = get_page_from_l3e(pl3e[i], pfn, d, preemptible)) > 0 )
+ (rc = get_page_from_l3e(pl3e[i], pfn, d,
+ partial, preemptible)) > 0 )
continue;
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: 1;
}
else if ( rc == -EINTR && i )
{
if ( !is_guest_l3_slot(i) )
continue;
unadjust_guest_l3e(pl3e[i], d);
- put_page_from_l3e(pl3e[i], pfn, 0);
+ put_page_from_l3e(pl3e[i], pfn, 0, 0);
}
}
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = page_to_virt(page);
unsigned int i;
- int rc = 0;
+ int rc = 0, partial = page->partial_pte;
- for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES; i++ )
+ for ( i = page->nr_validated_ptes; i < L4_PAGETABLE_ENTRIES;
+ i++, partial = 0 )
{
if ( !is_guest_l4_slot(d, i) ||
- (rc = get_page_from_l4e(pl4e[i], pfn, d, preemptible)) > 0 )
+ (rc = get_page_from_l4e(pl4e[i], pfn, d,
+ partial, preemptible)) > 0 )
continue;
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: 1;
}
else if ( rc == -EINTR )
{
MEM_LOG("Failure in alloc_l4_table: entry %d", i);
while ( i-- > 0 )
if ( is_guest_l4_slot(d, i) )
- put_page_from_l4e(pl4e[i], pfn, 0);
+ put_page_from_l4e(pl4e[i], pfn, 0, 0);
}
if ( rc < 0 )
return rc;
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l3_pgentry_t *pl3e;
- unsigned int i = page->nr_validated_ptes - !page->partial_pte;
- int rc = 0;
-
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- if ( d->arch.relmem == RELMEM_l3 )
- return 0;
-#endif
+ int rc = 0, partial = page->partial_pte;
+ unsigned int i = page->nr_validated_ptes - !partial;
pl3e = map_domain_page(pfn);
do {
if ( is_guest_l3_slot(i) )
{
- rc = put_page_from_l3e(pl3e[i], pfn, preemptible);
+ rc = put_page_from_l3e(pl3e[i], pfn, partial, preemptible);
+ if ( rc < 0 )
+ break;
+ partial = 0;
if ( rc > 0 )
continue;
- if ( rc )
- break;
unadjust_guest_l3e(pl3e[i], d);
}
} while ( i-- );
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: -1;
}
else if ( rc == -EINTR && i < L3_PAGETABLE_ENTRIES - 1 )
{
struct domain *d = page_get_owner(page);
unsigned long pfn = page_to_mfn(page);
l4_pgentry_t *pl4e = page_to_virt(page);
- unsigned int i = page->nr_validated_ptes - !page->partial_pte;
- int rc = 0;
-
-#ifdef DOMAIN_DESTRUCT_AVOID_RECURSION
- if ( d->arch.relmem == RELMEM_l4 )
- return 0;
-#endif
+ int rc = 0, partial = page->partial_pte;
+ unsigned int i = page->nr_validated_ptes - !partial;
do {
if ( is_guest_l4_slot(d, i) )
- rc = put_page_from_l4e(pl4e[i], pfn, preemptible);
- } while ( rc >= 0 && i-- );
+ rc = put_page_from_l4e(pl4e[i], pfn, partial, preemptible);
+ if ( rc < 0 )
+ break;
+ partial = 0;
+ } while ( i-- );
if ( rc == -EAGAIN )
{
page->nr_validated_ptes = i;
- page->partial_pte = 1;
+ page->partial_pte = partial ?: -1;
}
else if ( rc == -EINTR && i < L4_PAGETABLE_ENTRIES - 1 )
{
#define free_l4_table(page, preemptible) (-EINVAL)
#endif
-static void page_lock(struct page_info *page)
+static int page_lock(struct page_info *page)
{
-#if defined(__i386__)
- while ( unlikely(test_and_set_bit(_PGC_locked, &page->count_info)) )
- while ( test_bit(_PGC_locked, &page->count_info) )
+ unsigned long x, nx;
+
+ do {
+ while ( (x = page->u.inuse.type_info) & PGT_locked )
cpu_relax();
-#else
- spin_lock(&page->lock);
-#endif
+ nx = x + (1 | PGT_locked);
+ if ( !(x & PGT_validated) ||
+ !(x & PGT_count_mask) ||
+ !(nx & PGT_count_mask) )
+ return 0;
+ } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x );
+
+ return 1;
}
static void page_unlock(struct page_info *page)
{
-#if defined(__i386__)
- clear_bit(_PGC_locked, &page->count_info);
-#else
- spin_unlock(&page->lock);
-#endif
+ unsigned long x, nx, y = page->u.inuse.type_info;
+
+ do {
+ x = y;
+ nx = x - (1 | PGT_locked);
+ } while ( (y = cmpxchg(&page->u.inuse.type_info, x, nx)) != x );
}
/* How to write an entry to the guest pagetables.
(_m), (_v), (_ad))
/* Update the L1 entry at pl1e to new value nl1e. */
-static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
- unsigned long gl1mfn, int preserve_ad)
+static int mod_l1_entry(l1_pgentry_t *pl1e, l1_pgentry_t nl1e,
+ unsigned long gl1mfn, int preserve_ad,
+ struct vcpu *vcpu)
{
l1_pgentry_t ol1e;
- struct vcpu *curr = current;
- struct domain *d = curr->domain;
+ struct domain *d = vcpu->domain;
unsigned long mfn;
- struct page_info *l1pg = mfn_to_page(gl1mfn);
+ p2m_type_t p2mt;
int rc = 1;
- page_lock(l1pg);
-
if ( unlikely(__copy_from_user(&ol1e, pl1e, sizeof(ol1e)) != 0) )
- return page_unlock(l1pg), 0;
+ return 0;
if ( unlikely(paging_mode_refcounts(d)) )
{
- rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr, preserve_ad);
- page_unlock(l1pg);
+ rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, vcpu, preserve_ad);
return rc;
}
if ( l1e_get_flags(nl1e) & _PAGE_PRESENT )
{
/* Translate foreign guest addresses. */
- mfn = gmfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e));
- if ( unlikely(mfn == INVALID_MFN) )
- return page_unlock(l1pg), 0;
+ mfn = mfn_x(gfn_to_mfn(FOREIGNDOM, l1e_get_pfn(nl1e), &p2mt));
+ if ( !p2m_is_ram(p2mt) || unlikely(mfn == INVALID_MFN) )
+ return 0;
ASSERT((mfn & ~(PADDR_MASK >> PAGE_SHIFT)) == 0);
nl1e = l1e_from_pfn(mfn, l1e_get_flags(nl1e));
if ( unlikely(l1e_get_flags(nl1e) & l1_disallow_mask(d)) )
{
- page_unlock(l1pg);
MEM_LOG("Bad L1 flags %x",
l1e_get_flags(nl1e) & l1_disallow_mask(d));
return 0;
if ( !l1e_has_changed(ol1e, nl1e, _PAGE_RW | _PAGE_PRESENT) )
{
adjust_guest_l1e(nl1e, d);
- rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
+ rc = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, vcpu,
preserve_ad);
- page_unlock(l1pg);
return rc;
}
if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
- return page_unlock(l1pg), 0;
+ return 0;
adjust_guest_l1e(nl1e, d);
- if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
+ if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, vcpu,
preserve_ad)) )
{
ol1e = nl1e;
rc = 0;
}
}
- else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, curr,
+ else if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, vcpu,
preserve_ad)) )
{
- page_unlock(l1pg);
return 0;
}
- page_unlock(l1pg);
put_page_from_l1e(ol1e, d);
return rc;
}
static int mod_l2_entry(l2_pgentry_t *pl2e,
l2_pgentry_t nl2e,
unsigned long pfn,
- unsigned long type,
- int preserve_ad)
+ int preserve_ad,
+ struct vcpu *vcpu)
{
l2_pgentry_t ol2e;
- struct vcpu *curr = current;
- struct domain *d = curr->domain;
+ struct domain *d = vcpu->domain;
struct page_info *l2pg = mfn_to_page(pfn);
+ unsigned long type = l2pg->u.inuse.type_info;
int rc = 1;
if ( unlikely(!is_guest_l2_slot(d, type, pgentry_ptr_to_slot(pl2e))) )
return 0;
}
- page_lock(l2pg);
-
if ( unlikely(__copy_from_user(&ol2e, pl2e, sizeof(ol2e)) != 0) )
- return page_unlock(l2pg), 0;
+ return 0;
if ( l2e_get_flags(nl2e) & _PAGE_PRESENT )
{
if ( unlikely(l2e_get_flags(nl2e) & L2_DISALLOW_MASK) )
{
- page_unlock(l2pg);
MEM_LOG("Bad L2 flags %x",
l2e_get_flags(nl2e) & L2_DISALLOW_MASK);
return 0;
if ( !l2e_has_changed(ol2e, nl2e, _PAGE_PRESENT) )
{
adjust_guest_l2e(nl2e, d);
- rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr, preserve_ad);
- page_unlock(l2pg);
+ rc = UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu, preserve_ad);
return rc;
}
if ( unlikely(get_page_from_l2e(nl2e, pfn, d) < 0) )
- return page_unlock(l2pg), 0;
+ return 0;
adjust_guest_l2e(nl2e, d);
- if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
+ if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
preserve_ad)) )
{
ol2e = nl2e;
rc = 0;
}
}
- else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, curr,
+ else if ( unlikely(!UPDATE_ENTRY(l2, pl2e, ol2e, nl2e, pfn, vcpu,
preserve_ad)) )
{
- page_unlock(l2pg);
return 0;
}
- page_unlock(l2pg);
put_page_from_l2e(ol2e, pfn);
return rc;
}
l3_pgentry_t nl3e,
unsigned long pfn,
int preserve_ad,
- int preemptible)
+ int preemptible,
+ struct vcpu *vcpu)
{
l3_pgentry_t ol3e;
- struct vcpu *curr = current;
- struct domain *d = curr->domain;
- struct page_info *l3pg = mfn_to_page(pfn);
+ struct domain *d = vcpu->domain;
int rc = 0;
if ( unlikely(!is_guest_l3_slot(pgentry_ptr_to_slot(pl3e))) )
if ( is_pv_32bit_domain(d) && (pgentry_ptr_to_slot(pl3e) >= 3) )
return -EINVAL;
- page_lock(l3pg);
-
if ( unlikely(__copy_from_user(&ol3e, pl3e, sizeof(ol3e)) != 0) )
- return page_unlock(l3pg), -EFAULT;
+ return -EFAULT;
if ( l3e_get_flags(nl3e) & _PAGE_PRESENT )
{
if ( unlikely(l3e_get_flags(nl3e) & l3_disallow_mask(d)) )
{
- page_unlock(l3pg);
MEM_LOG("Bad L3 flags %x",
l3e_get_flags(nl3e) & l3_disallow_mask(d));
return -EINVAL;
if ( !l3e_has_changed(ol3e, nl3e, _PAGE_PRESENT) )
{
adjust_guest_l3e(nl3e, d);
- rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr, preserve_ad);
- page_unlock(l3pg);
+ rc = UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu, preserve_ad);
return rc ? 0 : -EFAULT;
}
- rc = get_page_from_l3e(nl3e, pfn, d, preemptible);
+ rc = get_page_from_l3e(nl3e, pfn, d, 0, preemptible);
if ( unlikely(rc < 0) )
- return page_unlock(l3pg), rc;
+ return rc;
rc = 0;
adjust_guest_l3e(nl3e, d);
- if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
+ if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu,
preserve_ad)) )
{
ol3e = nl3e;
rc = -EFAULT;
}
}
- else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, curr,
+ else if ( unlikely(!UPDATE_ENTRY(l3, pl3e, ol3e, nl3e, pfn, vcpu,
preserve_ad)) )
{
- page_unlock(l3pg);
return -EFAULT;
}
pae_flush_pgd(pfn, pgentry_ptr_to_slot(pl3e), nl3e);
}
- page_unlock(l3pg);
- put_page_from_l3e(ol3e, pfn, 0);
+ put_page_from_l3e(ol3e, pfn, 0, 0);
return rc;
}
l4_pgentry_t nl4e,
unsigned long pfn,
int preserve_ad,
- int preemptible)
+ int preemptible,
+ struct vcpu *vcpu)
{
- struct vcpu *curr = current;
- struct domain *d = curr->domain;
+ struct domain *d = vcpu->domain;
l4_pgentry_t ol4e;
- struct page_info *l4pg = mfn_to_page(pfn);
int rc = 0;
if ( unlikely(!is_guest_l4_slot(d, pgentry_ptr_to_slot(pl4e))) )
return -EINVAL;
}
- page_lock(l4pg);
-
if ( unlikely(__copy_from_user(&ol4e, pl4e, sizeof(ol4e)) != 0) )
- return page_unlock(l4pg), -EFAULT;
+ return -EFAULT;
if ( l4e_get_flags(nl4e) & _PAGE_PRESENT )
{
if ( unlikely(l4e_get_flags(nl4e) & L4_DISALLOW_MASK) )
{
- page_unlock(l4pg);
MEM_LOG("Bad L4 flags %x",
l4e_get_flags(nl4e) & L4_DISALLOW_MASK);
return -EINVAL;
if ( !l4e_has_changed(ol4e, nl4e, _PAGE_PRESENT) )
{
adjust_guest_l4e(nl4e, d);
- rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr, preserve_ad);
- page_unlock(l4pg);
+ rc = UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu, preserve_ad);
return rc ? 0 : -EFAULT;
}
- rc = get_page_from_l4e(nl4e, pfn, d, preemptible);
+ rc = get_page_from_l4e(nl4e, pfn, d, 0, preemptible);
if ( unlikely(rc < 0) )
- return page_unlock(l4pg), rc;
+ return rc;
rc = 0;
adjust_guest_l4e(nl4e, d);
- if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
+ if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu,
preserve_ad)) )
{
ol4e = nl4e;
rc = -EFAULT;
}
}
- else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, curr,
+ else if ( unlikely(!UPDATE_ENTRY(l4, pl4e, ol4e, nl4e, pfn, vcpu,
preserve_ad)) )
{
- page_unlock(l4pg);
return -EFAULT;
}
- page_unlock(l4pg);
- put_page_from_l4e(ol4e, pfn, 0);
+ put_page_from_l4e(ol4e, pfn, 0, 0);
return rc;
}
void put_page(struct page_info *page)
{
- u32 nx, x, y = page->count_info;
+ unsigned long nx, x, y = page->count_info;
do {
+ ASSERT((y & PGC_count_mask) != 0);
x = y;
nx = x - 1;
}
}
+struct domain *page_get_owner_and_reference(struct page_info *page)
+{
+ unsigned long x, y = page->count_info;
+
+ do {
+ x = y;
+ /*
+ * Count == 0: Page is not allocated, so we cannot take a reference.
+ * Count == -1: Reference count would wrap, which is invalid.
+ * Count == -2: Remaining unused ref is reserved for get_page_light().
+ */
+ if ( unlikely(((x + 2) & PGC_count_mask) <= 2) )
+ return NULL;
+ }
+ while ( (y = cmpxchg(&page->count_info, x, x + 1)) != x );
+
+ return page_get_owner(page);
+}
+
+
int get_page(struct page_info *page, struct domain *domain)
{
- u32 x, nx, y = page->count_info;
- u32 d, nd = page->u.inuse._domain;
- u32 _domain = pickle_domptr(domain);
+ struct domain *owner = page_get_owner_and_reference(page);
+
+ if ( likely(owner == domain) )
+ return 1;
+
+ if ( owner != NULL )
+ put_page(page);
+
+ if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
+ gdprintk(XENLOG_INFO,
+ "Error pfn %lx: rd=%p, od=%p, caf=%08lx, taf=%"
+ PRtype_info "\n",
+ page_to_mfn(page), domain, owner,
+ page->count_info, page->u.inuse.type_info);
+ return 0;
+}
+
+/*
+ * Special version of get_page() to be used exclusively when
+ * - a page is known to already have a non-zero reference count
+ * - the page does not need its owner to be checked
+ * - it will not be called more than once without dropping the thus
+ * acquired reference again.
+ * Due to get_page() reserving one reference, this call cannot fail.
+ */
+static void get_page_light(struct page_info *page)
+{
+ unsigned long x, nx, y = page->count_info;
do {
x = y;
nx = x + 1;
- d = nd;
- if ( unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
- unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
- unlikely(d != _domain) ) /* Wrong owner? */
- {
- if ( !_shadow_mode_refcounts(domain) && !domain->is_dying )
- gdprintk(XENLOG_INFO,
- "Error pfn %lx: rd=%p, od=%p, caf=%08x, taf=%"
- PRtype_info "\n",
- page_to_mfn(page), domain, unpickle_domptr(d),
- x, page->u.inuse.type_info);
- return 0;
- }
- asm volatile (
- LOCK_PREFIX "cmpxchg8b %2"
- : "=d" (nd), "=a" (y),
- "=m" (*(volatile u64 *)(&page->count_info))
- : "0" (d), "1" (x), "c" (d), "b" (nx) );
+ BUG_ON(!(x & PGC_count_mask)); /* Not allocated? */
+ BUG_ON(!(nx & PGC_count_mask)); /* Overflow? */
+ y = cmpxchg(&page->count_info, x, nx);
}
- while ( unlikely(nd != d) || unlikely(y != x) );
-
- return 1;
+ while ( unlikely(y != x) );
}
-
static int alloc_page_type(struct page_info *page, unsigned long type,
int preemptible)
{
rc = alloc_segdesc_page(page);
break;
default:
- printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%x\n",
+ printk("Bad type in alloc_page_type %lx t=%" PRtype_info " c=%lx\n",
type, page->u.inuse.type_info,
page->count_info);
rc = -EINVAL;
wmb();
if ( rc == -EAGAIN )
{
+ get_page_light(page);
page->u.inuse.type_info |= PGT_partial;
}
else if ( rc == -EINTR )
{
ASSERT(rc < 0);
MEM_LOG("Error while validating mfn %lx (pfn %lx) for type %"
- PRtype_info ": caf=%08x taf=%" PRtype_info,
+ PRtype_info ": caf=%08lx taf=%" PRtype_info,
page_to_mfn(page), get_gpfn_from_mfn(page_to_mfn(page)),
type, page->count_info, page->u.inuse.type_info);
page->u.inuse.type_info = 0;
unsigned long gmfn;
int rc;
- if ( likely(owner != NULL) )
+ if ( likely(owner != NULL) && unlikely(paging_mode_enabled(owner)) )
{
- /*
- * We have to flush before the next use of the linear mapping
- * (e.g., update_va_mapping()) or we could end up modifying a page
- * that is no longer a page table (and hence screw up ref counts).
- */
- if ( current->domain == owner )
- queue_deferred_ops(owner, DOP_FLUSH_ALL_TLBS);
- else
- flush_tlb_mask(owner->domain_dirty_cpumask);
-
- if ( unlikely(paging_mode_enabled(owner)) )
- {
- /* A page table is dirtied when its type count becomes zero. */
- paging_mark_dirty(owner, page_to_mfn(page));
+ /* A page table is dirtied when its type count becomes zero. */
+ paging_mark_dirty(owner, page_to_mfn(page));
- if ( shadow_mode_refcounts(owner) )
- return 0;
+ if ( shadow_mode_refcounts(owner) )
+ return 0;
- gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
- ASSERT(VALID_M2P(gmfn));
- shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
- }
+ gmfn = mfn_to_gmfn(owner, page_to_mfn(page));
+ ASSERT(VALID_M2P(gmfn));
+ shadow_remove_all_shadows(owner->vcpu[0], _mfn(gmfn));
}
if ( !(type & PGT_partial) )
page->nr_validated_ptes = 1U << PAGETABLE_ORDER;
page->partial_pte = 0;
}
+
switch ( type & PGT_type_mask )
{
case PGT_l1_page_table:
BUG();
}
+ return rc;
+}
+
+
+static int __put_final_page_type(
+ struct page_info *page, unsigned long type, int preemptible)
+{
+ int rc = free_page_type(page, type, preemptible);
+
/* No need for atomic update of type_info here: noone else updates it. */
if ( rc == 0 )
{
}
else if ( rc == -EINTR )
{
- ASSERT(!(page->u.inuse.type_info &
- (PGT_count_mask|PGT_validated|PGT_partial)));
+ ASSERT((page->u.inuse.type_info &
+ (PGT_count_mask|PGT_validated|PGT_partial)) == 1);
if ( !(shadow_mode_enabled(page_get_owner(page)) &&
(page->count_info & PGC_page_table)) )
page->tlbflush_timestamp = tlbflush_current_time();
{
BUG_ON(rc != -EAGAIN);
wmb();
+ get_page_light(page);
page->u.inuse.type_info |= PGT_partial;
}
int preemptible)
{
unsigned long nx, x, y = page->u.inuse.type_info;
+ int rc = 0;
for ( ; ; )
{
x, nx)) != x) )
continue;
/* We cleared the 'valid bit' so we do the clean up. */
- return free_page_type(page, x, preemptible);
+ rc = __put_final_page_type(page, x, preemptible);
+ if ( x & PGT_partial )
+ put_page(page);
+ break;
}
/*
return -EINTR;
}
- return 0;
+ return rc;
}
int preemptible)
{
unsigned long nx, x, y = page->u.inuse.type_info;
+ int rc = 0;
ASSERT(!(type & ~(PGT_type_mask | PGT_pae_xen_l2)));
page->nr_validated_ptes = 0;
page->partial_pte = 0;
}
- return alloc_page_type(page, type, preemptible);
+ rc = alloc_page_type(page, type, preemptible);
}
- return 0;
+ if ( (x & PGT_partial) && !(nx & PGT_partial) )
+ put_page(page);
+
+ return rc;
}
void put_page_type(struct page_info *page)
int new_guest_cr3(unsigned long mfn)
{
- struct vcpu *v = current;
- struct domain *d = v->domain;
+ struct vcpu *curr = current;
+ struct domain *d = curr->domain;
int okay;
unsigned long old_base_mfn;
okay = paging_mode_refcounts(d)
? 0 /* Old code was broken, but what should it be? */
: mod_l4_entry(
- __va(pagetable_get_paddr(v->arch.guest_table)),
+ __va(pagetable_get_paddr(curr->arch.guest_table)),
l4e_from_pfn(
mfn,
(_PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_ACCESSED)),
- pagetable_get_pfn(v->arch.guest_table), 0, 0) == 0;
+ pagetable_get_pfn(curr->arch.guest_table), 0, 0, curr) == 0;
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new compat baseptr %lx", mfn);
return 0;
}
- invalidate_shadow_ldt(v);
- write_ptbase(v);
+ invalidate_shadow_ldt(curr, 0);
+ write_ptbase(curr);
return 1;
}
#endif
okay = paging_mode_refcounts(d)
? get_page_from_pagenr(mfn, d)
- : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0);
+ : !get_page_and_type_from_pagenr(mfn, PGT_root_page_table, d, 0, 0);
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new baseptr %lx", mfn);
return 0;
}
- invalidate_shadow_ldt(v);
+ invalidate_shadow_ldt(curr, 0);
- old_base_mfn = pagetable_get_pfn(v->arch.guest_table);
+ old_base_mfn = pagetable_get_pfn(curr->arch.guest_table);
- v->arch.guest_table = pagetable_from_pfn(mfn);
- update_cr3(v);
+ curr->arch.guest_table = pagetable_from_pfn(mfn);
+ update_cr3(curr);
- write_ptbase(v);
+ write_ptbase(curr);
if ( likely(old_base_mfn != 0) )
{
flush_tlb_local();
}
+ /*
+ * Do this after flushing TLBs, to ensure we see fresh LDT mappings
+ * via the linear pagetable mapping.
+ */
if ( deferred_ops & DOP_RELOAD_LDT )
(void)map_ldt_shadow_page(0);
return pmask;
}
+#ifdef __i386__
+static inline void *fixmap_domain_page(unsigned long mfn)
+{
+ unsigned int cpu = smp_processor_id();
+ void *ptr = (void *)fix_to_virt(FIX_PAE_HIGHMEM_0 + cpu);
+
+ l1e_write(fix_pae_highmem_pl1e - cpu,
+ l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
+ flush_tlb_one_local(ptr);
+ return ptr;
+}
+static inline void fixunmap_domain_page(const void *ptr)
+{
+ unsigned int cpu = virt_to_fix((unsigned long)ptr) - FIX_PAE_HIGHMEM_0;
+
+ l1e_write(fix_pae_highmem_pl1e - cpu, l1e_empty());
+ this_cpu(make_cr3_timestamp) = this_cpu(tlbflush_time);
+}
+#else
+#define fixmap_domain_page(mfn) mfn_to_virt(mfn)
+#define fixunmap_domain_page(ptr) ((void)(ptr))
+#endif
+
int do_mmuext_op(
XEN_GUEST_HANDLE(mmuext_op_t) uops,
unsigned int count,
unsigned long mfn = 0, gmfn = 0, type;
unsigned int done = 0;
struct page_info *page;
- struct vcpu *v = current;
- struct domain *d = v->domain;
+ struct vcpu *curr = current;
+ struct domain *d = curr->domain;
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
{
if ( paging_mode_refcounts(FOREIGNDOM) )
break;
- rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 1);
+ rc = get_page_and_type_from_pagenr(mfn, type, FOREIGNDOM, 0, 1);
okay = !rc;
if ( unlikely(!okay) )
{
okay = get_page_from_pagenr(mfn, d);
else
okay = !get_page_and_type_from_pagenr(
- mfn, PGT_root_page_table, d, 0);
+ mfn, PGT_root_page_table, d, 0, 0);
if ( unlikely(!okay) )
{
MEM_LOG("Error while installing new mfn %lx", mfn);
}
}
- old_mfn = pagetable_get_pfn(v->arch.guest_table_user);
- v->arch.guest_table_user = pagetable_from_pfn(mfn);
+ old_mfn = pagetable_get_pfn(curr->arch.guest_table_user);
+ curr->arch.guest_table_user = pagetable_from_pfn(mfn);
if ( old_mfn != 0 )
{
case MMUEXT_INVLPG_LOCAL:
if ( !paging_mode_enabled(d)
- || paging_invlpg(v, op.arg1.linear_addr) != 0 )
+ || paging_invlpg(curr, op.arg1.linear_addr) != 0 )
flush_tlb_one_local(op.arg1.linear_addr);
break;
}
case MMUEXT_TLB_FLUSH_ALL:
- flush_tlb_mask(d->domain_dirty_cpumask);
+ this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
break;
case MMUEXT_INVLPG_ALL:
okay = 0;
MEM_LOG("Bad args to SET_LDT: ptr=%lx, ents=%lx", ptr, ents);
}
- else if ( (v->arch.guest_context.ldt_ents != ents) ||
- (v->arch.guest_context.ldt_base != ptr) )
+ else if ( (curr->arch.guest_context.ldt_ents != ents) ||
+ (curr->arch.guest_context.ldt_base != ptr) )
{
- invalidate_shadow_ldt(v);
- v->arch.guest_context.ldt_base = ptr;
- v->arch.guest_context.ldt_ents = ents;
- load_LDT(v);
+ invalidate_shadow_ldt(curr, 0);
+ this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
+ curr->arch.guest_context.ldt_base = ptr;
+ curr->arch.guest_context.ldt_ents = ents;
+ load_LDT(curr);
this_cpu(percpu_mm_info).deferred_ops &= ~DOP_RELOAD_LDT;
if ( ents != 0 )
this_cpu(percpu_mm_info).deferred_ops |= DOP_RELOAD_LDT;
break;
}
+ case MMUEXT_CLEAR_PAGE:
+ {
+ unsigned char *ptr;
+
+ okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
+ FOREIGNDOM, 0, 0);
+ if ( unlikely(!okay) )
+ {
+ MEM_LOG("Error while clearing mfn %lx", mfn);
+ break;
+ }
+
+ /* A page is dirtied when it's being cleared. */
+ paging_mark_dirty(d, mfn);
+
+ ptr = fixmap_domain_page(mfn);
+ clear_page(ptr);
+ fixunmap_domain_page(ptr);
+
+ put_page_and_type(page);
+ break;
+ }
+
+ case MMUEXT_COPY_PAGE:
+ {
+ const unsigned char *src;
+ unsigned char *dst;
+ unsigned long src_mfn;
+
+ src_mfn = gmfn_to_mfn(FOREIGNDOM, op.arg2.src_mfn);
+ okay = get_page_from_pagenr(src_mfn, FOREIGNDOM);
+ if ( unlikely(!okay) )
+ {
+ MEM_LOG("Error while copying from mfn %lx", src_mfn);
+ break;
+ }
+
+ okay = !get_page_and_type_from_pagenr(mfn, PGT_writable_page,
+ FOREIGNDOM, 0, 0);
+ if ( unlikely(!okay) )
+ {
+ put_page(mfn_to_page(src_mfn));
+ MEM_LOG("Error while copying to mfn %lx", mfn);
+ break;
+ }
+
+ /* A page is dirtied when it's being copied to. */
+ paging_mark_dirty(d, mfn);
+
+ src = map_domain_page(src_mfn);
+ dst = fixmap_domain_page(mfn);
+ copy_page(dst, src);
+ fixunmap_domain_page(dst);
+ unmap_domain_page(src);
+
+ put_page_and_type(page);
+ put_page(mfn_to_page(src_mfn));
+ break;
+ }
+
default:
MEM_LOG("Invalid extended pt command 0x%x", op.cmd);
rc = -ENOSYS;
struct page_info *page;
int rc = 0, okay = 1, i = 0;
unsigned int cmd, done = 0;
- struct vcpu *v = current;
- struct domain *d = v->domain;
- unsigned long type_info;
+ struct domain *d = current->domain;
struct domain_mmap_cache mapcache;
if ( unlikely(count & MMU_UPDATE_PREEMPTED) )
(unsigned long)(req.ptr & ~PAGE_MASK));
page = mfn_to_page(mfn);
- switch ( (type_info = page->u.inuse.type_info) & PGT_type_mask )
+ if ( page_lock(page) )
{
- case PGT_l1_page_table:
- case PGT_l2_page_table:
- case PGT_l3_page_table:
- case PGT_l4_page_table:
- {
- if ( paging_mode_refcounts(d) )
- {
- MEM_LOG("mmu update on auto-refcounted domain!");
- break;
- }
-
- if ( unlikely(!get_page_type(
- page, type_info & (PGT_type_mask|PGT_pae_xen_l2))) )
- goto not_a_pt;
-
- switch ( type_info & PGT_type_mask )
+ switch ( page->u.inuse.type_info & PGT_type_mask )
{
case PGT_l1_page_table:
{
l1_pgentry_t l1e = l1e_from_intpte(req.val);
okay = mod_l1_entry(va, l1e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD,
+ current);
}
break;
case PGT_l2_page_table:
{
l2_pgentry_t l2e = l2e_from_intpte(req.val);
- okay = mod_l2_entry(va, l2e, mfn, type_info,
- cmd == MMU_PT_UPDATE_PRESERVE_AD);
+ okay = mod_l2_entry(va, l2e, mfn,
+ cmd == MMU_PT_UPDATE_PRESERVE_AD,
+ current);
}
break;
case PGT_l3_page_table:
{
l3_pgentry_t l3e = l3e_from_intpte(req.val);
rc = mod_l3_entry(va, l3e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1,
+ current);
okay = !rc;
}
break;
{
l4_pgentry_t l4e = l4e_from_intpte(req.val);
rc = mod_l4_entry(va, l4e, mfn,
- cmd == MMU_PT_UPDATE_PRESERVE_AD, 1);
+ cmd == MMU_PT_UPDATE_PRESERVE_AD, 1,
+ current);
okay = !rc;
}
break;
#endif
+ case PGT_writable_page:
+ perfc_incr(writable_mmu_updates);
+ okay = paging_write_guest_entry(
+ current, va, req.val, _mfn(mfn));
+ break;
}
-
- put_page_type(page);
+ page_unlock(page);
if ( rc == -EINTR )
rc = -EAGAIN;
}
- break;
-
- default:
- not_a_pt:
+ else if ( get_page_type(page, PGT_writable_page) )
{
- if ( unlikely(!get_page_type(page, PGT_writable_page)) )
- break;
-
perfc_incr(writable_mmu_updates);
-
- okay = paging_write_guest_entry(v, va, req.val, _mfn(mfn));
-
+ okay = paging_write_guest_entry(
+ current, va, req.val, _mfn(mfn));
put_page_type(page);
}
- break;
- }
unmap_domain_page_with_cache(va, &mapcache);
-
put_page(page);
break;
void *va;
unsigned long gmfn, mfn;
struct page_info *page;
- u32 type;
l1_pgentry_t ol1e;
struct domain *d = v->domain;
va = (void *)((unsigned long)va + ((unsigned long)pte_addr & ~PAGE_MASK));
page = mfn_to_page(mfn);
- type = page->u.inuse.type_info & PGT_type_mask;
- if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
+ if ( !page_lock(page) )
{
- MEM_LOG("Grant map attempted to update a non-L1 page");
rc = GNTST_general_error;
goto failed;
}
- page_lock(page);
+ if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(page);
+ rc = GNTST_general_error;
+ goto failed;
+ }
ol1e = *(l1_pgentry_t *)va;
if ( !UPDATE_ENTRY(l1, (l1_pgentry_t *)va, ol1e, nl1e, mfn, v, 0) )
{
page_unlock(page);
- put_page_type(page);
rc = GNTST_general_error;
goto failed;
}
if ( !paging_mode_refcounts(d) )
put_page_from_l1e(ol1e, d);
- put_page_type(page);
-
failed:
unmap_domain_page(va);
put_page(page);
void *va;
unsigned long gmfn, mfn;
struct page_info *page;
- u32 type;
l1_pgentry_t ol1e;
gmfn = addr >> PAGE_SHIFT;
va = (void *)((unsigned long)va + ((unsigned long)addr & ~PAGE_MASK));
page = mfn_to_page(mfn);
- type = page->u.inuse.type_info & PGT_type_mask;
- if ( (type != PGT_l1_page_table) || !get_page_type(page, type) )
+ if ( !page_lock(page) )
{
- MEM_LOG("Grant map attempted to update a non-L1 page");
rc = GNTST_general_error;
goto failed;
}
- page_lock(page);
+ if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(page);
+ rc = GNTST_general_error;
+ goto failed;
+ }
ol1e = *(l1_pgentry_t *)va;
page_unlock(page);
MEM_LOG("PTE entry %lx for address %"PRIx64" doesn't match frame %lx",
(unsigned long)l1e_get_intpte(ol1e), addr, frame);
- put_page_type(page);
rc = GNTST_general_error;
goto failed;
}
{
page_unlock(page);
MEM_LOG("Cannot delete PTE entry at %p", va);
- put_page_type(page);
rc = GNTST_general_error;
goto failed;
}
page_unlock(page);
- put_page_type(page);
failed:
unmap_domain_page(va);
MEM_LOG("Could not find L1 PTE for address %lx", va);
return GNTST_general_error;
}
+
+ if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+ {
+ guest_unmap_l1e(v, pl1e);
+ return GNTST_general_error;
+ }
+
l1pg = mfn_to_page(gl1mfn);
- page_lock(l1pg);
+ if ( !page_lock(l1pg) )
+ {
+ put_page(l1pg);
+ guest_unmap_l1e(v, pl1e);
+ return GNTST_general_error;
+ }
+
+ if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(l1pg);
+ put_page(l1pg);
+ guest_unmap_l1e(v, pl1e);
+ return GNTST_general_error;
+ }
+
ol1e = *pl1e;
okay = UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0);
+
page_unlock(l1pg);
+ put_page(l1pg);
guest_unmap_l1e(v, pl1e);
- pl1e = NULL;
-
- if ( !okay )
- return GNTST_general_error;
- if ( !paging_mode_refcounts(d) )
+ if ( okay && !paging_mode_refcounts(d) )
put_page_from_l1e(ol1e, d);
- return GNTST_okay;
+ return okay ? GNTST_okay : GNTST_general_error;
}
static int replace_grant_va_mapping(
return GNTST_general_error;
}
+ if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+ {
+ rc = GNTST_general_error;
+ goto out;
+ }
+
l1pg = mfn_to_page(gl1mfn);
- page_lock(l1pg);
+ if ( !page_lock(l1pg) )
+ {
+ rc = GNTST_general_error;
+ put_page(l1pg);
+ goto out;
+ }
+
+ if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ rc = GNTST_general_error;
+ goto unlock_and_out;
+ }
+
ol1e = *pl1e;
/* Check that the virtual address supplied is actually mapped to frame. */
if ( unlikely(l1e_get_pfn(ol1e) != frame) )
{
- page_unlock(l1pg);
MEM_LOG("PTE entry %lx for address %lx doesn't match frame %lx",
l1e_get_pfn(ol1e), addr, frame);
rc = GNTST_general_error;
- goto out;
+ goto unlock_and_out;
}
/* Delete pagetable entry. */
if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, nl1e, gl1mfn, v, 0)) )
{
- page_unlock(l1pg);
MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
rc = GNTST_general_error;
- goto out;
+ goto unlock_and_out;
}
+ unlock_and_out:
page_unlock(l1pg);
-
+ put_page(l1pg);
out:
guest_unmap_l1e(v, pl1e);
return rc;
if ( !(flags & GNTMAP_readonly) )
l1e_add_flags(pte,_PAGE_RW);
+ l1e_add_flags(pte,
+ ((flags >> _GNTMAP_guest_avail0) * _PAGE_AVAIL0)
+ & _PAGE_AVAIL);
+
l1e_add_flags(pte, cacheattr_to_pte_flags(cache_flags >> 5));
if ( flags & GNTMAP_contains_pte )
return GNTST_general_error;
}
+ if ( !get_page_from_pagenr(gl1mfn, current->domain) )
+ {
+ guest_unmap_l1e(curr, pl1e);
+ return GNTST_general_error;
+ }
+
l1pg = mfn_to_page(gl1mfn);
- page_lock(l1pg);
+ if ( !page_lock(l1pg) )
+ {
+ put_page(l1pg);
+ guest_unmap_l1e(curr, pl1e);
+ return GNTST_general_error;
+ }
+
+ if ( (l1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(l1pg);
+ put_page(l1pg);
+ guest_unmap_l1e(curr, pl1e);
+ return GNTST_general_error;
+ }
+
ol1e = *pl1e;
if ( unlikely(!UPDATE_ENTRY(l1, pl1e, ol1e, l1e_empty(),
gl1mfn, curr, 0)) )
{
page_unlock(l1pg);
+ put_page(l1pg);
MEM_LOG("Cannot delete PTE entry at %p", (unsigned long *)pl1e);
guest_unmap_l1e(curr, pl1e);
return GNTST_general_error;
}
page_unlock(l1pg);
+ put_page(l1pg);
guest_unmap_l1e(curr, pl1e);
rc = replace_grant_va_mapping(addr, frame, ol1e, curr);
int steal_page(
struct domain *d, struct page_info *page, unsigned int memflags)
{
- u32 _d, _nd, x, y;
+ unsigned long x, y;
spin_lock(&d->page_alloc_lock);
+ if ( is_xen_heap_page(page) || (page_get_owner(page) != d) )
+ goto fail;
+
/*
- * The tricky bit: atomically release ownership while there is just one
- * benign reference to the page (PGC_allocated). If that reference
- * disappears then the deallocation routine will safely spin.
+ * We require there is just one reference (PGC_allocated). We temporarily
+ * drop this reference now so that we can safely swizzle the owner.
*/
- _d = pickle_domptr(d);
- _nd = page->u.inuse._domain;
- y = page->count_info;
+ y = page->count_info;
do {
x = y;
- if ( unlikely((x & (PGC_count_mask|PGC_allocated)) !=
- (1 | PGC_allocated)) || unlikely(_nd != _d) )
- {
- MEM_LOG("gnttab_transfer: Bad page %p: ed=%p(%u), sd=%p,"
- " caf=%08x, taf=%" PRtype_info "\n",
- (void *) page_to_mfn(page),
- d, d->domain_id, unpickle_domptr(_nd), x,
- page->u.inuse.type_info);
- spin_unlock(&d->page_alloc_lock);
- return -1;
- }
- asm volatile (
- LOCK_PREFIX "cmpxchg8b %2"
- : "=d" (_nd), "=a" (y),
- "=m" (*(volatile u64 *)(&page->count_info))
- : "0" (_d), "1" (x), "c" (NULL), "b" (x) );
- } while (unlikely(_nd != _d) || unlikely(y != x));
+ if ( (x & (PGC_count_mask|PGC_allocated)) != (1 | PGC_allocated) )
+ goto fail;
+ y = cmpxchg(&page->count_info, x, x & ~PGC_count_mask);
+ } while ( y != x );
- /*
- * Unlink from 'd'. At least one reference remains (now anonymous), so
- * noone else is spinning to try to delete this page from 'd'.
- */
+ /* Swizzle the owner then reinstate the PGC_allocated reference. */
+ page_set_owner(page, NULL);
+ y = page->count_info;
+ do {
+ x = y;
+ BUG_ON((x & (PGC_count_mask|PGC_allocated)) != PGC_allocated);
+ } while ( (y = cmpxchg(&page->count_info, x, x | 1)) != x );
+
+ /* Unlink from original owner. */
if ( !(memflags & MEMF_no_refcount) )
d->tot_pages--;
- list_del(&page->list);
+ page_list_del(page, &d->page_list);
spin_unlock(&d->page_alloc_lock);
-
return 0;
+
+ fail:
+ spin_unlock(&d->page_alloc_lock);
+ MEM_LOG("Bad page %p: ed=%p(%u), sd=%p, caf=%08lx, taf=%" PRtype_info,
+ (void *)page_to_mfn(page), d, d->domain_id,
+ page_get_owner(page), page->count_info, page->u.inuse.type_info);
+ return -1;
}
int do_update_va_mapping(unsigned long va, u64 val64,
l1_pgentry_t val = l1e_from_intpte(val64);
struct vcpu *v = current;
struct domain *d = v->domain;
+ struct page_info *gl1pg;
l1_pgentry_t *pl1e;
unsigned long vmask, bmap_ptr, gl1mfn;
cpumask_t pmask;
- int rc = 0;
+ int rc;
perfc_incr(calls_to_update_va);
- if ( unlikely(!access_ok(va, 1) && !paging_mode_external(d)) )
- return -EINVAL;
-
rc = xsm_update_va_mapping(d, FOREIGNDOM, val);
if ( rc )
return rc;
+ rc = -EINVAL;
pl1e = guest_map_l1e(v, va, &gl1mfn);
+ if ( unlikely(!pl1e || !get_page_from_pagenr(gl1mfn, d)) )
+ goto out;
- if ( unlikely(!pl1e || !mod_l1_entry(pl1e, val, gl1mfn, 0)) )
- rc = -EINVAL;
+ gl1pg = mfn_to_page(gl1mfn);
+ if ( !page_lock(gl1pg) )
+ {
+ put_page(gl1pg);
+ goto out;
+ }
+ if ( (gl1pg->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(gl1pg);
+ put_page(gl1pg);
+ goto out;
+ }
+
+ rc = mod_l1_entry(pl1e, val, gl1mfn, 0, v) ? 0 : -EINVAL;
+
+ page_unlock(gl1pg);
+ put_page(gl1pg);
+
+ out:
if ( pl1e )
guest_unmap_l1e(v, pl1e);
- pl1e = NULL;
-
- process_deferred_ops();
switch ( flags & UVMF_FLUSHTYPE_MASK )
{
switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
{
case UVMF_LOCAL:
- flush_tlb_local();
+ this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_TLB;
break;
case UVMF_ALL:
- flush_tlb_mask(d->domain_dirty_cpumask);
+ this_cpu(percpu_mm_info).deferred_ops |= DOP_FLUSH_ALL_TLBS;
break;
default:
+ if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS )
+ break;
if ( unlikely(!is_pv_32on64_domain(d) ?
get_user(vmask, (unsigned long *)bmap_ptr) :
get_user(vmask, (unsigned int *)bmap_ptr)) )
- rc = -EFAULT;
+ rc = -EFAULT, vmask = 0;
pmask = vcpumask_to_pcpumask(d, vmask);
+ if ( cpu_isset(smp_processor_id(), pmask) )
+ this_cpu(percpu_mm_info).deferred_ops &= ~DOP_FLUSH_TLB;
flush_tlb_mask(pmask);
break;
}
break;
case UVMF_INVLPG:
+ if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_ALL_TLBS )
+ break;
switch ( (bmap_ptr = flags & ~UVMF_FLUSHTYPE_MASK) )
{
case UVMF_LOCAL:
+ if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB )
+ break;
if ( !paging_mode_enabled(d) ||
(paging_invlpg(v, va) != 0) )
flush_tlb_one_local(va);
if ( unlikely(!is_pv_32on64_domain(d) ?
get_user(vmask, (unsigned long *)bmap_ptr) :
get_user(vmask, (unsigned int *)bmap_ptr)) )
- rc = -EFAULT;
+ rc = -EFAULT, vmask = 0;
pmask = vcpumask_to_pcpumask(d, vmask);
+ if ( this_cpu(percpu_mm_info).deferred_ops & DOP_FLUSH_TLB )
+ cpu_clear(smp_processor_id(), pmask);
flush_tlb_one_mask(pmask, va);
break;
}
break;
}
+ process_deferred_ops();
+
return rc;
}
spin_unlock(&d->grant_table->lock);
break;
- case XENMAPSPACE_mfn:
- {
- if ( get_page_from_pagenr(xatp.idx, d) ) {
- mfn = xatp.idx;
- page = mfn_to_page(mfn);
- }
+ case XENMAPSPACE_gmfn:
+ xatp.idx = gmfn_to_mfn(d, xatp.idx);
+ if ( !get_page_from_pagenr(xatp.idx, d) )
+ break;
+ mfn = xatp.idx;
+ page = mfn_to_page(mfn);
break;
- }
default:
break;
}
break;
}
- case XENMEM_remove_from_physmap:
- {
- struct xen_remove_from_physmap xrfp;
- unsigned long mfn;
- struct domain *d;
-
- if ( copy_from_guest(&xrfp, arg, 1) )
- return -EFAULT;
-
- rc = rcu_lock_target_domain_by_id(xrfp.domid, &d);
- if ( rc != 0 )
- return rc;
-
- if ( xsm_remove_from_physmap(current->domain, d) )
- {
- rcu_unlock_domain(d);
- return -EPERM;
- }
-
- domain_lock(d);
-
- mfn = gmfn_to_mfn(d, xrfp.gpfn);
-
- if ( mfn_valid(mfn) )
- guest_physmap_remove_page(d, xrfp.gpfn, mfn, 0);
-
- domain_unlock(d);
-
- rcu_unlock_domain(d);
-
- break;
- }
-
case XENMEM_set_memory_map:
{
struct xen_foreign_memory_map fmap;
return 0;
}
+ case XENMEM_set_pod_target:
+ case XENMEM_get_pod_target:
+ {
+ xen_pod_target_t target;
+ struct domain *d;
+
+ /* Support DOMID_SELF? */
+ if ( !IS_PRIV(current->domain) )
+ return -EINVAL;
+
+ if ( copy_from_guest(&target, arg, 1) )
+ return -EFAULT;
+
+ rc = rcu_lock_target_domain_by_id(target.domid, &d);
+ if ( rc != 0 )
+ return rc;
+
+ if ( op == XENMEM_set_pod_target )
+ {
+ if ( target.target_pages > d->max_pages )
+ {
+ rc = -EINVAL;
+ goto pod_target_out_unlock;
+ }
+
+ rc = p2m_pod_set_mem_target(d, target.target_pages);
+ }
+
+ target.tot_pages = d->tot_pages;
+ target.pod_cache_pages = d->arch.p2m->pod.count;
+ target.pod_entries = d->arch.p2m->pod.entry_count;
+
+ if ( copy_to_guest(arg, &target, 1) )
+ {
+ rc= -EFAULT;
+ goto pod_target_out_unlock;
+ }
+
+ pod_target_out_unlock:
+ rcu_unlock_domain(d);
+ return rc;
+ }
+
default:
return subarch_memory_op(op, arg);
}
/* Attempt to read the PTE that maps the VA being accessed. */
guest_get_eff_l1e(v, addr, &pte);
- page = l1e_get_page(pte);
/* We are looking only for read-only mappings of p.t. pages. */
if ( ((l1e_get_flags(pte) & (_PAGE_PRESENT|_PAGE_RW)) != _PAGE_PRESENT) ||
- !mfn_valid(l1e_get_pfn(pte)) ||
- ((page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table) ||
- ((page->u.inuse.type_info & PGT_count_mask) == 0) ||
- (page_get_owner(page) != d) )
+ !get_page_from_pagenr(l1e_get_pfn(pte), d) )
goto bail;
+ page = l1e_get_page(pte);
+ if ( !page_lock(page) )
+ {
+ put_page(page);
+ goto bail;
+ }
+
+ if ( (page->u.inuse.type_info & PGT_type_mask) != PGT_l1_page_table )
+ {
+ page_unlock(page);
+ put_page(page);
+ goto bail;
+ }
+
ptwr_ctxt.ctxt.regs = regs;
ptwr_ctxt.ctxt.force_writeback = 0;
ptwr_ctxt.ctxt.addr_size = ptwr_ctxt.ctxt.sp_size =
ptwr_ctxt.cr2 = addr;
ptwr_ctxt.pte = pte;
- page_lock(page);
rc = x86_emulate(&ptwr_ctxt.ctxt, &ptwr_emulate_ops);
+
page_unlock(page);
+ put_page(page);
+
if ( rc == X86EMUL_UNHANDLEABLE )
goto bail;
{
if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL )
flush_flags |= FLUSH_TLB_GLOBAL;
- if ( (l1f_to_lNf(l3e_get_flags(ol3e)) ^ flags) &
+ if ( (lNf_to_l1f(l3e_get_flags(ol3e)) ^ flags) &
PAGE_CACHE_ATTRS )
flush_flags |= FLUSH_CACHE;
flush_area(virt, flush_flags);
void memguard_init(void)
{
unsigned long start = max_t(unsigned long, xen_phys_start, 1UL << 20);
+#ifdef __i386__
map_pages_to_xen(
(unsigned long)__va(start),
start >> PAGE_SHIFT,
(xenheap_phys_end - start) >> PAGE_SHIFT,
__PAGE_HYPERVISOR|MAP_SMALL_PAGES);
-#ifdef __x86_64__
+#else
+ map_pages_to_xen(
+ (unsigned long)__va(start),
+ start >> PAGE_SHIFT,
+ (__pa(&_end) + PAGE_SIZE - 1 - start) >> PAGE_SHIFT,
+ __PAGE_HYPERVISOR|MAP_SMALL_PAGES);
BUG_ON(start != xen_phys_start);
map_pages_to_xen(
XEN_VIRT_START,
obj-y += paging.o
obj-y += p2m.o
+obj-y += guest_walk_2.o
+obj-y += guest_walk_3.o
+obj-$(x86_64) += guest_walk_4.o
+
+guest_walk_%.o: guest_walk.c Makefile
+ $(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
--- /dev/null
+/******************************************************************************
+ * arch/x86/mm/guest_walk.c
+ *
+ * Pagetable walker for guest memory accesses.
+ *
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#include <xen/types.h>
+#include <xen/mm.h>
+#include <xen/paging.h>
+#include <xen/domain_page.h>
+#include <xen/sched.h>
+#include <asm/page.h>
+#include <asm/guest_pt.h>
+
+
+/* Flags that are needed in a pagetable entry, with the sense of NX inverted */
+static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec)
+{
+ static uint32_t flags[] = {
+ /* I/F - Usr Wr */
+ /* 0 0 0 0 */ _PAGE_PRESENT,
+ /* 0 0 0 1 */ _PAGE_PRESENT|_PAGE_RW,
+ /* 0 0 1 0 */ _PAGE_PRESENT|_PAGE_USER,
+ /* 0 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
+ /* 0 1 0 0 */ _PAGE_PRESENT,
+ /* 0 1 0 1 */ _PAGE_PRESENT|_PAGE_RW,
+ /* 0 1 1 0 */ _PAGE_PRESENT|_PAGE_USER,
+ /* 0 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
+ /* 1 0 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT,
+ /* 1 0 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
+ /* 1 0 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
+ /* 1 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
+ /* 1 1 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT,
+ /* 1 1 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
+ /* 1 1 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
+ /* 1 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
+ };
+
+ /* Don't demand not-NX if the CPU wouldn't enforce it. */
+ if ( !guest_supports_nx(v) )
+ pfec &= ~PFEC_insn_fetch;
+
+ /* Don't demand R/W if the CPU wouldn't enforce it. */
+ if ( is_hvm_vcpu(v) && unlikely(!hvm_wp_enabled(v))
+ && !(pfec & PFEC_user_mode) )
+ pfec &= ~PFEC_write_access;
+
+ return flags[(pfec & 0x1f) >> 1];
+}
+
+/* Modify a guest pagetable entry to set the Accessed and Dirty bits.
+ * Returns non-zero if it actually writes to guest memory. */
+static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty)
+{
+ guest_intpte_t old, new;
+
+ old = *(guest_intpte_t *)walk_p;
+ new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0);
+ if ( old != new )
+ {
+ /* Write the new entry into the walk, and try to write it back
+ * into the guest table as well. If the guest table has changed
+ * under out feet then leave it alone. */
+ *(guest_intpte_t *)walk_p = new;
+ if ( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old )
+ return 1;
+ }
+ return 0;
+}
+
+
+/* Walk the guest pagetables, after the manner of a hardware walker. */
+uint32_t
+guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
+ uint32_t pfec, mfn_t top_mfn, void *top_map)
+{
+ struct domain *d = v->domain;
+ p2m_type_t p2mt;
+ guest_l1e_t *l1p = NULL;
+ guest_l2e_t *l2p = NULL;
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+ guest_l3e_t *l3p = NULL;
+ guest_l4e_t *l4p;
+#endif
+ uint32_t gflags, mflags, rc = 0;
+ int pse;
+
+ perfc_incr(guest_walk);
+ memset(gw, 0, sizeof(*gw));
+ gw->va = va;
+
+ /* Mandatory bits that must be set in every entry. We invert NX, to
+ * calculate as if there were an "X" bit that allowed access.
+ * We will accumulate, in rc, the set of flags that are missing. */
+ mflags = mandatory_flags(v, pfec);
+
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+
+ /* Get the l4e from the top level table and check its flags*/
+ gw->l4mfn = top_mfn;
+ l4p = (guest_l4e_t *) top_map;
+ gw->l4e = l4p[guest_l4_table_offset(va)];
+ gflags = guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
+ rc |= ((gflags & mflags) ^ mflags);
+ if ( rc & _PAGE_PRESENT ) goto out;
+
+ /* Map the l3 table */
+ gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
+ if ( !p2m_is_ram(p2mt) )
+ {
+ rc |= _PAGE_PRESENT;
+ goto out;
+ }
+ ASSERT(mfn_valid(mfn_x(gw->l3mfn)));
+
+ /* Get the l3e and check its flags*/
+ l3p = map_domain_page(mfn_x(gw->l3mfn));
+ gw->l3e = l3p[guest_l3_table_offset(va)];
+ gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
+ rc |= ((gflags & mflags) ^ mflags);
+ if ( rc & _PAGE_PRESENT )
+ goto out;
+
+#else /* PAE only... */
+
+ /* Get the l3e and check its flag */
+ gw->l3e = ((guest_l3e_t *) top_map)[guest_l3_table_offset(va)];
+ if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) )
+ {
+ rc |= _PAGE_PRESENT;
+ goto out;
+ }
+
+#endif /* PAE or 64... */
+
+ /* Map the l2 table */
+ gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
+ if ( !p2m_is_ram(p2mt) )
+ {
+ rc |= _PAGE_PRESENT;
+ goto out;
+ }
+ ASSERT(mfn_valid(mfn_x(gw->l2mfn)));
+
+ /* Get the l2e */
+ l2p = map_domain_page(mfn_x(gw->l2mfn));
+ gw->l2e = l2p[guest_l2_table_offset(va)];
+
+#else /* 32-bit only... */
+
+ /* Get l2e from the top level table */
+ gw->l2mfn = top_mfn;
+ l2p = (guest_l2e_t *) top_map;
+ gw->l2e = l2p[guest_l2_table_offset(va)];
+
+#endif /* All levels... */
+
+ gflags = guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
+ rc |= ((gflags & mflags) ^ mflags);
+ if ( rc & _PAGE_PRESENT )
+ goto out;
+
+ pse = (guest_supports_superpages(v) &&
+ (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE));
+
+ if ( pse )
+ {
+ /* Special case: this guest VA is in a PSE superpage, so there's
+ * no guest l1e. We make one up so that the propagation code
+ * can generate a shadow l1 table. Start with the gfn of the
+ * first 4k-page of the superpage. */
+ gfn_t start = guest_l2e_get_gfn(gw->l2e);
+ /* Grant full access in the l1e, since all the guest entry's
+ * access controls are enforced in the shadow l2e. */
+ int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
+ _PAGE_ACCESSED|_PAGE_DIRTY);
+ /* Import cache-control bits. Note that _PAGE_PAT is actually
+ * _PAGE_PSE, and it is always set. We will clear it in case
+ * _PAGE_PSE_PAT (bit 12, i.e. first bit of gfn) is clear. */
+ flags |= (guest_l2e_get_flags(gw->l2e)
+ & (_PAGE_PAT|_PAGE_PWT|_PAGE_PCD));
+ if ( !(gfn_x(start) & 1) )
+ /* _PAGE_PSE_PAT not set: remove _PAGE_PAT from flags. */
+ flags &= ~_PAGE_PAT;
+
+ /* Increment the pfn by the right number of 4k pages.
+ * The ~0x1 is to mask out the PAT bit mentioned above. */
+ start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
+ gw->l1e = guest_l1e_from_gfn(start, flags);
+ gw->l1mfn = _mfn(INVALID_MFN);
+ }
+ else
+ {
+ /* Not a superpage: carry on and find the l1e. */
+ gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
+ if ( !p2m_is_ram(p2mt) )
+ {
+ rc |= _PAGE_PRESENT;
+ goto out;
+ }
+ ASSERT(mfn_valid(mfn_x(gw->l1mfn)));
+ l1p = map_domain_page(mfn_x(gw->l1mfn));
+ gw->l1e = l1p[guest_l1_table_offset(va)];
+ gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
+ rc |= ((gflags & mflags) ^ mflags);
+ }
+
+ /* Go back and set accessed and dirty bits only if the walk was a
+ * success. Although the PRMs say higher-level _PAGE_ACCESSED bits
+ * get set whenever a lower-level PT is used, at least some hardware
+ * walkers behave this way. */
+ if ( rc == 0 )
+ {
+#if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */
+ if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) )
+ paging_mark_dirty(d, mfn_x(gw->l4mfn));
+ if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) )
+ paging_mark_dirty(d, mfn_x(gw->l3mfn));
+#endif
+ if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e,
+ (pse && (pfec & PFEC_write_access))) )
+ paging_mark_dirty(d, mfn_x(gw->l2mfn));
+ if ( !pse )
+ {
+ if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e,
+ (pfec & PFEC_write_access)) )
+ paging_mark_dirty(d, mfn_x(gw->l1mfn));
+ }
+ }
+
+ out:
+#if GUEST_PAGING_LEVELS == 4
+ if ( l3p ) unmap_domain_page(l3p);
+#endif
+#if GUEST_PAGING_LEVELS >= 3
+ if ( l2p ) unmap_domain_page(l2p);
+#endif
+ if ( l1p ) unmap_domain_page(l1p);
+
+ return rc;
+}
guest_levels = $(subst level,,$(filter %level,$(subst ., ,$(subst _, ,$(1)))))
guest_walk_defns = -DGUEST_PAGING_LEVELS=$(call guest_levels,$(1))
-guest_walk_%level.o: guest_walk.c $(HDRS) Makefile
+guest_walk_%level.o: guest_walk.c Makefile
$(CC) $(CFLAGS) $(call guest_walk_defns,$(@F)) -c $< -o $@
* Place - Suite 330, Boston, MA 02111-1307 USA.
*/
-#include <xen/config.h>
-#include <xen/types.h>
-#include <xen/mm.h>
+
#include <xen/domain_page.h>
-#include <asm/page.h>
-#include <xen/event.h>
+#include <xen/paging.h>
+#include <xen/config.h>
#include <xen/sched.h>
-#include <asm/hvm/svm/vmcb.h>
-#include <asm/domain.h>
-#include <asm/paging.h>
-#include <asm/p2m.h>
-#include <asm/hap.h>
-
-#include "private.h"
#define _hap_gva_to_gfn(levels) hap_gva_to_gfn_##levels##level
#define hap_gva_to_gfn(levels) _hap_gva_to_gfn(levels)
-#if GUEST_PAGING_LEVELS > CONFIG_PAGING_LEVELS
+#if GUEST_PAGING_LEVELS <= CONFIG_PAGING_LEVELS
-unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
- struct vcpu *v, unsigned long gva, uint32_t *pfec)
-{
- gdprintk(XENLOG_ERR,
- "Guest paging level is greater than host paging level!\n");
- domain_crash(v->domain);
- return INVALID_GFN;
-}
-
-#else
-
-#if GUEST_PAGING_LEVELS == 2
-#include "../page-guest32.h"
-#define l1_pgentry_t l1_pgentry_32_t
-#define l2_pgentry_t l2_pgentry_32_t
-#undef l2e_get_flags
-#define l2e_get_flags(x) l2e_get_flags_32(x)
-#undef l1e_get_flags
-#define l1e_get_flags(x) l1e_get_flags_32(x)
-#endif
+#include <asm/guest_pt.h>
unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
struct vcpu *v, unsigned long gva, uint32_t *pfec)
{
- unsigned long gcr3 = v->arch.hvm_vcpu.guest_cr[3];
- int mode = GUEST_PAGING_LEVELS;
- int lev, index;
- paddr_t gpa = 0;
- unsigned long gpfn, mfn;
+ unsigned long cr3;
+ uint32_t missing;
+ mfn_t top_mfn;
+ void *top_map;
p2m_type_t p2mt;
- int success = 1;
+ walk_t gw;
- l1_pgentry_t *l1e;
- l2_pgentry_t *l2e;
-#if GUEST_PAGING_LEVELS >= 3
- l3_pgentry_t *l3e;
-#endif
-#if GUEST_PAGING_LEVELS >= 4
- l4_pgentry_t *l4e;
-#endif
-
- gpfn = (gcr3 >> PAGE_SHIFT);
- for ( lev = mode; lev >= 1; lev-- )
+ /* Get the top-level table's MFN */
+ cr3 = v->arch.hvm_vcpu.guest_cr[3];
+ top_mfn = gfn_to_mfn(v->domain, _gfn(cr3 >> PAGE_SHIFT), &p2mt);
+ if ( !p2m_is_ram(p2mt) )
{
- mfn = mfn_x(gfn_to_mfn_current(gpfn, &p2mt));
- if ( !p2m_is_ram(p2mt) )
- {
- HAP_PRINTK("bad pfn=0x%lx from gva=0x%lx at lev%d\n", gpfn, gva,
- lev);
- success = 0;
- break;
- }
- ASSERT(mfn_valid(mfn));
-
- index = (gva >> PT_SHIFT[mode][lev]) & (PT_ENTRIES[mode][lev]-1);
-
-#if GUEST_PAGING_LEVELS >= 4
- if ( lev == 4 )
- {
- l4e = map_domain_page(mfn);
- if ( !(l4e_get_flags(l4e[index]) & _PAGE_PRESENT) )
- {
- HAP_PRINTK("Level 4 entry not present at index = %d\n", index);
- success = 0;
- }
- gpfn = l4e_get_pfn(l4e[index]);
- unmap_domain_page(l4e);
- }
-#endif
+ pfec[0] &= ~PFEC_page_present;
+ return INVALID_GFN;
+ }
-#if GUEST_PAGING_LEVELS >= 3
- if ( lev == 3 )
- {
- l3e = map_domain_page(mfn);
+ /* Map the top-level table and call the tree-walker */
+ ASSERT(mfn_valid(mfn_x(top_mfn)));
+ top_map = map_domain_page(mfn_x(top_mfn));
#if GUEST_PAGING_LEVELS == 3
- index += ((gcr3 >> 5) & 127) * 4;
-#endif
- if ( !(l3e_get_flags(l3e[index]) & _PAGE_PRESENT) )
- {
- HAP_PRINTK("Level 3 entry not present at index = %d\n", index);
- success = 0;
- }
- gpfn = l3e_get_pfn(l3e[index]);
- unmap_domain_page(l3e);
- }
+ top_map += (cr3 & ~(PAGE_MASK | 31));
#endif
+ missing = guest_walk_tables(v, gva, &gw, pfec[0], top_mfn, top_map);
+ unmap_domain_page(top_map);
+
+ /* Interpret the answer */
+ if ( missing == 0 )
+ return gfn_x(guest_l1e_get_gfn(gw.l1e));
+
+ if ( missing & _PAGE_PRESENT )
+ pfec[0] &= ~PFEC_page_present;
+
+ return INVALID_GFN;
+}
- if ( lev == 2 )
- {
- l2e = map_domain_page(mfn);
- if ( !(l2e_get_flags(l2e[index]) & _PAGE_PRESENT) )
- {
- HAP_PRINTK("Level 2 entry not present at index = %d\n", index);
- success = 0;
- }
-
- if ( l2e_get_flags(l2e[index]) & _PAGE_PSE )
- {
- paddr_t mask = ((paddr_t)1 << PT_SHIFT[mode][2]) - 1;
- HAP_PRINTK("guest page table is PSE\n");
- gpa = (l2e_get_intpte(l2e[index]) & ~mask) + (gva & mask);
- unmap_domain_page(l2e);
- break; /* last level page table, jump out from here */
- }
-
- gpfn = l2e_get_pfn(l2e[index]);
- unmap_domain_page(l2e);
- }
-
- if ( lev == 1 )
- {
- l1e = map_domain_page(mfn);
- if ( !(l1e_get_flags(l1e[index]) & _PAGE_PRESENT) )
- {
- HAP_PRINTK("Level 1 entry not present at index = %d\n", index);
- success = 0;
- }
- gpfn = l1e_get_pfn(l1e[index]);
- gpa = (l1e_get_intpte(l1e[index]) & PAGE_MASK) + (gva &~PAGE_MASK);
- unmap_domain_page(l1e);
- }
-
- if ( success != 1 ) /* error happened, jump out */
- break;
- }
-
- gpa &= PADDR_MASK;
- HAP_PRINTK("success = %d, gva = %lx, gpa = %lx\n", success, gva, gpa);
+#else
- return (!success ? INVALID_GFN : ((paddr_t)gpa >> PAGE_SHIFT));
+unsigned long hap_gva_to_gfn(GUEST_PAGING_LEVELS)(
+ struct vcpu *v, unsigned long gva, uint32_t *pfec)
+{
+ gdprintk(XENLOG_ERR,
+ "Guest paging level is greater than host paging level!\n");
+ domain_crash(v->domain);
+ return INVALID_GFN;
}
#endif
+
/*
* Local variables:
* mode: C
/* Override macros from asm/page.h to make them work with mfn_t */
#undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
#undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
/************************************************/
/* HAP LOG DIRTY SUPPORT */
ASSERT(hap_locked_by_me(d));
- if ( unlikely(list_empty(&d->arch.paging.hap.freelist)) )
+ pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+ if ( unlikely(!pg) )
return NULL;
- pg = list_entry(d->arch.paging.hap.freelist.next, struct page_info, list);
- list_del(&pg->list);
d->arch.paging.hap.free_pages--;
p = hap_map_domain_page(page_to_mfn(pg));
ASSERT(hap_locked_by_me(d));
d->arch.paging.hap.free_pages++;
- list_add_tail(&pg->list, &d->arch.paging.hap.freelist);
+ page_list_add_tail(pg, &d->arch.paging.hap.freelist);
}
static struct page_info *hap_alloc_p2m_page(struct domain *d)
d->arch.paging.hap.total_pages--;
d->arch.paging.hap.p2m_pages++;
page_set_owner(pg, d);
- pg->count_info = 1;
+ pg->count_info |= 1;
}
hap_unlock(d);
ASSERT(page_get_owner(pg) == d);
/* Should have just the one ref we gave it in alloc_p2m_page() */
if ( (pg->count_info & PGC_count_mask) != 1 )
- HAP_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+ HAP_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
pg->count_info, pg->u.inuse.type_info);
- pg->count_info = 0;
+ pg->count_info &= ~PGC_count_mask;
/* Free should not decrement domain's total allocation, since
* these pages were allocated without an owner. */
page_set_owner(pg, NULL);
}
d->arch.paging.hap.free_pages++;
d->arch.paging.hap.total_pages++;
- list_add_tail(&pg->list, &d->arch.paging.hap.freelist);
+ page_list_add_tail(pg, &d->arch.paging.hap.freelist);
}
else if ( d->arch.paging.hap.total_pages > pages )
{
/* Need to return memory to domheap */
- ASSERT(!list_empty(&d->arch.paging.hap.freelist));
- pg = list_entry(d->arch.paging.hap.freelist.next,
- struct page_info, list);
- list_del(&pg->list);
+ pg = page_list_remove_head(&d->arch.paging.hap.freelist);
+ ASSERT(pg);
d->arch.paging.hap.free_pages--;
d->arch.paging.hap.total_pages--;
- pg->count_info = 0;
free_domheap_page(pg);
}
void hap_domain_init(struct domain *d)
{
hap_lock_init(d);
- INIT_LIST_HEAD(&d->arch.paging.hap.freelist);
+ INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
/* This domain will use HAP for log-dirty mode */
paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty,
hap_write_p2m_entry(struct vcpu *v, unsigned long gfn, l1_pgentry_t *p,
mfn_t table_mfn, l1_pgentry_t new, unsigned int level)
{
+ uint32_t old_flags;
+
hap_lock(v->domain);
+ old_flags = l1e_get_flags(*p);
safe_write_pte(p, new);
+ if ( (old_flags & _PAGE_PRESENT)
+ && (level == 1 || (level == 2 && (old_flags & _PAGE_PSE))) )
+ flush_tlb_mask(v->domain->domain_dirty_cpumask);
+
#if CONFIG_PAGING_LEVELS == 3
/* install P2M in monitor table for PAE Xen */
if ( level == 3 )
pg->count_info = 1;
pg->u.inuse.type_info = 1 | PGT_validated;
- list_add_tail(&pg->list, &d->arch.p2m->pages);
+ page_list_add_tail(pg, &d->arch.p2m->pages);
ept_entry->emt = 0;
+ ept_entry->igmt = 0;
ept_entry->sp_avail = 0;
ept_entry->avail1 = 0;
ept_entry->mfn = page_to_mfn(pg);
}
}
+/*
+ * ept_set_entry() computes 'need_modify_vtd_table' for itself,
+ * by observing whether any gfn->mfn translations are modified.
+ */
static int
ept_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
unsigned int order, p2m_type_t p2mt)
u32 index;
int i, rv = 0, ret = 0;
int walk_level = order / EPT_TABLE_ORDER;
+ int direct_mmio = (p2mt == p2m_mmio_direct);
+ uint8_t igmt = 0;
+ int need_modify_vtd_table = 1;
/* we only support 4k and 2m pages now */
{
if ( mfn_valid(mfn_x(mfn)) || (p2mt == p2m_mmio_direct) )
{
- /* Track the highest gfn for which we have ever had a valid mapping */
- if ( gfn > d->arch.p2m->max_mapped_pfn )
- d->arch.p2m->max_mapped_pfn = gfn;
- ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
+ ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn),
+ &igmt, direct_mmio);
+ ept_entry->igmt = igmt;
ept_entry->sp_avail = walk_level ? 1 : 0;
if ( ret == GUEST_TABLE_SUPER_PAGE )
{
- ept_entry->mfn = mfn_x(mfn) - offset;
+ if ( ept_entry->mfn == (mfn_x(mfn) - offset) )
+ need_modify_vtd_table = 0;
+ else
+ ept_entry->mfn = mfn_x(mfn) - offset;
+
if ( ept_entry->avail1 == p2m_ram_logdirty &&
p2mt == p2m_ram_rw )
for ( i = 0; i < 512; i++ )
paging_mark_dirty(d, mfn_x(mfn)-offset+i);
}
else
- ept_entry->mfn = mfn_x(mfn);
+ {
+ if ( ept_entry->mfn == mfn_x(mfn) )
+ need_modify_vtd_table = 0;
+ else
+ ept_entry->mfn = mfn_x(mfn);
+ }
ept_entry->avail1 = p2mt;
ept_entry->rsvd = 0;
{
split_ept_entry = split_table + i;
split_ept_entry->emt = epte_get_entry_emt(d,
- gfn-offset+i, split_mfn+i);
+ gfn-offset+i, split_mfn+i,
+ &igmt, direct_mmio);
+ split_ept_entry->igmt = igmt;
+
split_ept_entry->sp_avail = 0;
split_ept_entry->mfn = split_mfn+i;
/* Set the destinated 4k page as normal */
split_ept_entry = split_table + offset;
- split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn));
- split_ept_entry->mfn = mfn_x(mfn);
+ split_ept_entry->emt = epte_get_entry_emt(d, gfn, mfn_x(mfn),
+ &igmt, direct_mmio);
+ split_ept_entry->igmt = igmt;
+
+ if ( split_ept_entry->mfn == mfn_x(mfn) )
+ need_modify_vtd_table = 0;
+ else
+ split_ept_entry->mfn = mfn_x(mfn);
split_ept_entry->avail1 = p2mt;
ept_p2m_type_to_flags(split_ept_entry, p2mt);
unmap_domain_page(split_table);
}
+ /* Track the highest gfn for which we have ever had a valid mapping */
+ if ( mfn_valid(mfn_x(mfn))
+ && (gfn + (1UL << order) - 1 > d->arch.p2m->max_mapped_pfn) )
+ d->arch.p2m->max_mapped_pfn = gfn + (1UL << order) - 1;
+
/* Success */
rv = 1;
/* Now the p2m table is not shared with vt-d page table */
- if ( iommu_enabled && is_hvm_domain(d) )
+ if ( iommu_enabled && is_hvm_domain(d)
+ && need_modify_vtd_table )
{
if ( p2mt == p2m_ram_rw )
{
}
/* Read ept p2m entries */
-static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t)
+static mfn_t ept_get_entry(struct domain *d, unsigned long gfn, p2m_type_t *t,
+ p2m_query_t q)
{
ept_entry_t *table =
map_domain_page(mfn_x(pagetable_get_mfn(d->arch.phys_table)));
return content;
}
-static mfn_t ept_get_entry_current(unsigned long gfn, p2m_type_t *t)
+static mfn_t ept_get_entry_current(unsigned long gfn, p2m_type_t *t,
+ p2m_query_t q)
{
- return ept_get_entry(current->domain, gfn, t);
+ return ept_get_entry(current->domain, gfn, t, q);
+}
+
+/* To test if the new emt type is the same with old,
+ * return 1 to not to reset ept entry.
+ */
+static int need_modify_ept_entry(struct domain *d, unsigned long gfn,
+ unsigned long mfn, uint8_t o_igmt,
+ uint8_t o_emt, p2m_type_t p2mt)
+{
+ uint8_t igmt, emt;
+ emt = epte_get_entry_emt(d, gfn, mfn, &igmt,
+ (p2mt == p2m_mmio_direct));
+ if ( (emt == o_emt) && (igmt == o_igmt) )
+ return 0;
+ return 1;
}
void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
uint64_t epte;
int order = 0;
unsigned long mfn;
+ uint8_t o_igmt, o_emt;
for ( gfn = start_gfn; gfn <= end_gfn; gfn++ )
{
mfn = (epte & EPTE_MFN_MASK) >> PAGE_SHIFT;
if ( !mfn_valid(mfn) )
continue;
- p2mt = (epte & EPTE_AVAIL1_MASK) >> 8;
+ p2mt = (epte & EPTE_AVAIL1_MASK) >> EPTE_AVAIL1_SHIFT;
+ o_igmt = (epte & EPTE_IGMT_MASK) >> EPTE_IGMT_SHIFT;
+ o_emt = (epte & EPTE_EMT_MASK) >> EPTE_EMT_SHIFT;
order = 0;
if ( epte & EPTE_SUPER_PAGE_MASK )
* Set emt for super page.
*/
order = EPT_TABLE_ORDER;
- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ if ( need_modify_ept_entry(d, gfn, mfn,
+ o_igmt, o_emt, p2mt) )
+ ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
gfn += 0x1FF;
}
else
{
- /* change emt for partial entries of the 2m area */
- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ /* change emt for partial entries of the 2m area. */
+ if ( need_modify_ept_entry(d, gfn, mfn,
+ o_igmt, o_emt, p2mt) )
+ ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
gfn = ((gfn >> EPT_TABLE_ORDER) << EPT_TABLE_ORDER) + 0x1FF;
}
}
else /* gfn assigned with 4k */
- ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ {
+ if ( need_modify_ept_entry(d, gfn, mfn,
+ o_igmt, o_emt, p2mt) )
+ ept_set_entry(d, gfn, _mfn(mfn), order, p2mt);
+ }
}
}
#ifndef __HAP_PRIVATE_H__
#define __HAP_PRIVATE_H__
-#include <asm/flushtlb.h>
-#include <asm/hvm/support.h>
-
/********************************************/
/* GUEST TRANSLATION FUNCS */
/********************************************/
unsigned long hap_gva_to_gfn_4level(struct vcpu *v, unsigned long gva,
uint32_t *pfec);
-/********************************************/
-/* MISC DEFINITIONS */
-/********************************************/
-
-/* PT_SHIFT describes the amount by which a virtual address is shifted right
- * to right justify the portion to be used for indexing into a page
- * table, given the guest memory model (i.e. number of levels) and the level
- * of the page table being accessed. The idea is from Virtual Iron's code.
- */
-static const int PT_SHIFT[][5] =
- { /* ------ level ------ nr_levels */
- /* 1 2 3 4 */
- { 0, 0, 0, 0, 0}, /* 0 not used */
- { 0, 0, 0, 0, 0}, /* 1 not used */
- { 0, 12, 22, 0, 0}, /* 2 */
- { 0, 12, 21, 30, 0}, /* 3 */
- { 0, 12, 21, 30, 39} /* 4 */
- };
-
-/* PT_ENTRIES describes the number of entries in a page table, given the
- * memory model (i.e. number of levels) and the level of the page table
- * being considered. This idea from Virtual Iron's shadow code*/
-static const int PT_ENTRIES[][5] =
- { /* ------ level ------ nr_levels */
- /* 1 2 3 4 */
- { 0, 0, 0, 0, 0}, /* 0 not used */
- { 0, 0, 0, 0, 0}, /* 1 not used */
- { 0, 1024, 1024, 0, 0}, /* 2 */
- { 0, 512, 512, 4, 0}, /* 3 */
- { 0, 512, 512, 512, 512} /* 4 */
- };
#endif /* __SVM_NPT_H__ */
/* Override macros from asm/page.h to make them work with mfn_t */
#undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
#undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
/* PTE flags for the various types of p2m entry */
return flags;
case p2m_mmio_direct:
return flags | P2M_BASE_FLAGS | _PAGE_RW | _PAGE_PCD;
+ case p2m_populate_on_demand:
+ return flags;
}
}
+#if P2M_AUDIT
+static void audit_p2m(struct domain *d);
+#else
+# define audit_p2m(_d) do { (void)(_d); } while(0)
+#endif /* P2M_AUDIT */
// Find the next level's P2M entry, checking for out-of-range gfn's...
// Returns NULL on error.
shift, max)) )
return 0;
- if ( !(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) )
+ /* PoD: Not present doesn't imply empty. */
+ if ( !l1e_get_flags(*p2m_entry) )
{
struct page_info *pg = d->arch.p2m->alloc_page(d);
if ( pg == NULL )
return 0;
- list_add_tail(&pg->list, &d->arch.p2m->pages);
+ page_list_add_tail(pg, &d->arch.p2m->pages);
pg->u.inuse.type_info = type | 1 | PGT_validated;
- pg->count_info = 1;
+ pg->count_info |= 1;
new_entry = l1e_from_pfn(mfn_x(page_to_mfn(pg)),
__PAGE_HYPERVISOR|_PAGE_USER);
}
}
- ASSERT(l1e_get_flags(*p2m_entry) & _PAGE_PRESENT);
+ ASSERT(l1e_get_flags(*p2m_entry) & (_PAGE_PRESENT|_PAGE_PSE));
/* split single large page into 4KB page in P2M table */
if ( type == PGT_l1_page_table && (l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
struct page_info *pg = d->arch.p2m->alloc_page(d);
if ( pg == NULL )
return 0;
- list_add_tail(&pg->list, &d->arch.p2m->pages);
+ page_list_add_tail(pg, &d->arch.p2m->pages);
pg->u.inuse.type_info = PGT_l1_page_table | 1 | PGT_validated;
- pg->count_info = 1;
+ pg->count_info |= 1;
/* New splintered mappings inherit the flags of the old superpage,
* with a little reorganisation for the _PAGE_PSE_PAT bit. */
return 1;
}
+/*
+ * Populate-on-demand functionality
+ */
+static
+int set_p2m_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
+ unsigned int page_order, p2m_type_t p2mt);
+
+int
+p2m_pod_cache_add(struct domain *d,
+ struct page_info *page,
+ unsigned long order)
+{
+ int i;
+ struct page_info *p;
+ struct p2m_domain *p2md = d->arch.p2m;
+
+#ifndef NDEBUG
+ mfn_t mfn;
+
+ mfn = page_to_mfn(page);
+
+ /* Check to make sure this is a contiguous region */
+ if( mfn_x(mfn) & ((1 << order) - 1) )
+ {
+ printk("%s: mfn %lx not aligned order %lu! (mask %lx)\n",
+ __func__, mfn_x(mfn), order, ((1UL << order) - 1));
+ return -1;
+ }
+
+ for(i=0; i < 1 << order ; i++) {
+ struct domain * od;
+
+ p = mfn_to_page(_mfn(mfn_x(mfn) + i));
+ od = page_get_owner(p);
+ if(od != d)
+ {
+ printk("%s: mfn %lx expected owner d%d, got owner d%d!\n",
+ __func__, mfn_x(mfn), d->domain_id,
+ od?od->domain_id:-1);
+ return -1;
+ }
+ }
+#endif
+
+ spin_lock(&d->page_alloc_lock);
+
+ /* First, take all pages off the domain list */
+ for(i=0; i < 1 << order ; i++)
+ {
+ p = page + i;
+ page_list_del(p, &d->page_list);
+ }
+
+ /* Then add the first one to the appropriate populate-on-demand list */
+ switch(order)
+ {
+ case 9:
+ page_list_add_tail(page, &p2md->pod.super); /* lock: page_alloc */
+ p2md->pod.count += 1 << order;
+ break;
+ case 0:
+ page_list_add_tail(page, &p2md->pod.single); /* lock: page_alloc */
+ p2md->pod.count += 1 ;
+ break;
+ default:
+ BUG();
+ }
+
+ spin_unlock(&d->page_alloc_lock);
+
+ return 0;
+}
+
+/* Get a page of size order from the populate-on-demand cache. Will break
+ * down 2-meg pages into singleton pages automatically. Returns null if
+ * a superpage is requested and no superpages are available. Must be called
+ * with the d->page_lock held. */
+static struct page_info * p2m_pod_cache_get(struct domain *d,
+ unsigned long order)
+{
+ struct p2m_domain *p2md = d->arch.p2m;
+ struct page_info *p = NULL;
+ int i;
+
+ if ( order == 9 && page_list_empty(&p2md->pod.super) )
+ {
+ return NULL;
+ }
+ else if ( order == 0 && page_list_empty(&p2md->pod.single) )
+ {
+ unsigned long mfn;
+ struct page_info *q;
+
+ BUG_ON( page_list_empty(&p2md->pod.super) );
+
+ /* Break up a superpage to make single pages. NB count doesn't
+ * need to be adjusted. */
+ printk("%s: Breaking up superpage.\n", __func__);
+ p = page_list_remove_head(&p2md->pod.super);
+ mfn = mfn_x(page_to_mfn(p));
+
+ for ( i=0; i<(1<<9); i++ )
+ {
+ q = mfn_to_page(_mfn(mfn+i));
+ page_list_add_tail(q, &p2md->pod.single);
+ }
+ }
+
+ switch ( order )
+ {
+ case 9:
+ BUG_ON( page_list_empty(&p2md->pod.super) );
+ p = page_list_remove_head(&p2md->pod.super);
+ p2md->pod.count -= 1 << order; /* Lock: page_alloc */
+ break;
+ case 0:
+ BUG_ON( page_list_empty(&p2md->pod.single) );
+ p = page_list_remove_head(&p2md->pod.single);
+ p2md->pod.count -= 1;
+ break;
+ default:
+ BUG();
+ }
+
+ /* Put the pages back on the domain page_list */
+ for ( i = 0 ; i < (1 << order) ; i++ )
+ {
+ BUG_ON(page_get_owner(p + i) != d);
+ page_list_add_tail(p + i, &d->page_list);
+ }
+
+ return p;
+}
+
+/* Set the size of the cache, allocating or freeing as necessary. */
+static int
+p2m_pod_set_cache_target(struct domain *d, unsigned long pod_target)
+{
+ struct p2m_domain *p2md = d->arch.p2m;
+ int ret = 0;
+
+ /* Increasing the target */
+ while ( pod_target > p2md->pod.count )
+ {
+ struct page_info * page;
+ int order;
+
+ if ( (pod_target - p2md->pod.count) >= (1>>9) )
+ order = 9;
+ else
+ order = 0;
+
+ page = alloc_domheap_pages(d, order, 0);
+ if ( unlikely(page == NULL) )
+ goto out;
+
+ p2m_pod_cache_add(d, page, order);
+ }
+
+ /* Decreasing the target */
+ /* We hold the p2m lock here, so we don't need to worry about
+ * cache disappearing under our feet. */
+ while ( pod_target < p2md->pod.count )
+ {
+ struct page_info * page;
+ int order, i;
+
+ /* Grab the lock before checking that pod.super is empty, or the last
+ * entries may disappear before we grab the lock. */
+ spin_lock(&d->page_alloc_lock);
+
+ if ( (p2md->pod.count - pod_target) > (1>>9)
+ && !page_list_empty(&p2md->pod.super) )
+ order = 9;
+ else
+ order = 0;
+
+ page = p2m_pod_cache_get(d, order);
+
+ ASSERT(page != NULL);
+
+ spin_unlock(&d->page_alloc_lock);
+
+ /* Then free them */
+ for ( i = 0 ; i < (1 << order) ; i++ )
+ {
+ /* Copied from common/memory.c:guest_remove_page() */
+ if ( unlikely(!get_page(page+i, d)) )
+ {
+ gdprintk(XENLOG_INFO, "Bad page free for domain %u\n", d->domain_id);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if ( test_and_clear_bit(_PGT_pinned, &(page+i)->u.inuse.type_info) )
+ put_page_and_type(page+i);
+
+ if ( test_and_clear_bit(_PGC_allocated, &(page+i)->count_info) )
+ put_page(page+i);
+
+ put_page(page+i);
+ }
+ }
+
+out:
+ return ret;
+}
+
+/*
+ * The "right behavior" here requires some careful thought. First, some
+ * definitions:
+ * + M: static_max
+ * + B: number of pages the balloon driver has ballooned down to.
+ * + P: Number of populated pages.
+ * + T: Old target
+ * + T': New target
+ *
+ * The following equations should hold:
+ * 0 <= P <= T <= B <= M
+ * d->arch.p2m->pod.entry_count == B - P
+ * d->tot_pages == P + d->arch.p2m->pod.count
+ *
+ * Now we have the following potential cases to cover:
+ * B <T': Set the PoD cache size equal to the number of outstanding PoD
+ * entries. The balloon driver will deflate the balloon to give back
+ * the remainder of the ram to the guest OS.
+ * T <T'<B : Increase PoD cache size.
+ * T'<T<=B : Here we have a choice. We can decrease the size of the cache,
+ * get the memory right away. However, that means every time we
+ * reduce the memory target we risk the guest attempting to populate the
+ * memory before the balloon driver has reached its new target. Safer to
+ * never reduce the cache size here, but only when the balloon driver frees
+ * PoD ranges.
+ *
+ * If there are many zero pages, we could reach the target also by doing
+ * zero sweeps and marking the ranges PoD; but the balloon driver will have
+ * to free this memory eventually anyway, so we don't actually gain that much
+ * by doing so.
+ *
+ * NB that the equation (B<T') may require adjustment to the cache
+ * size as PoD pages are freed as well; i.e., freeing a PoD-backed
+ * entry when pod.entry_count == pod.count requires us to reduce both
+ * pod.entry_count and pod.count.
+ */
+int
+p2m_pod_set_mem_target(struct domain *d, unsigned long target)
+{
+ unsigned pod_target;
+ struct p2m_domain *p2md = d->arch.p2m;
+ int ret = 0;
+ unsigned long populated;
+
+ /* P == B: Nothing to do. */
+ if ( p2md->pod.entry_count == 0 )
+ goto out;
+
+ /* T' < B: Don't reduce the cache size; let the balloon driver
+ * take care of it. */
+ if ( target < d->tot_pages )
+ goto out;
+
+ populated = d->tot_pages - p2md->pod.count;
+
+ pod_target = target - populated;
+
+ /* B < T': Set the cache size equal to # of outstanding entries,
+ * let the balloon driver fill in the rest. */
+ if ( pod_target > p2md->pod.entry_count )
+ pod_target = p2md->pod.entry_count;
+
+ ASSERT( pod_target > p2md->pod.count );
+
+ ret = p2m_pod_set_cache_target(d, pod_target);
+
+out:
+ return ret;
+}
+
+void
+p2m_pod_empty_cache(struct domain *d)
+{
+ struct p2m_domain *p2md = d->arch.p2m;
+ struct page_info *page;
+
+ spin_lock(&d->page_alloc_lock);
+
+ while ( (page = page_list_remove_head(&p2md->pod.super)) )
+ {
+ int i;
+
+ for ( i = 0 ; i < (1 << 9) ; i++ )
+ {
+ BUG_ON(page_get_owner(page + i) != d);
+ page_list_add_tail(page + i, &d->page_list);
+ }
+
+ p2md->pod.count -= 1<<9;
+ }
+
+ while ( (page = page_list_remove_head(&p2md->pod.single)) )
+ {
+ BUG_ON(page_get_owner(page) != d);
+ page_list_add_tail(page, &d->page_list);
+
+ p2md->pod.count -= 1;
+ }
+
+ BUG_ON(p2md->pod.count != 0);
+
+ spin_unlock(&d->page_alloc_lock);
+}
+
+/* This function is needed for two reasons:
+ * + To properly handle clearing of PoD entries
+ * + To "steal back" memory being freed for the PoD cache, rather than
+ * releasing it.
+ *
+ * Once both of these functions have been completed, we can return and
+ * allow decrease_reservation() to handle everything else.
+ */
+int
+p2m_pod_decrease_reservation(struct domain *d,
+ xen_pfn_t gpfn,
+ unsigned int order)
+{
+ struct p2m_domain *p2md = d->arch.p2m;
+ int ret=0;
+ int i;
+
+ int steal_for_cache = 0;
+ int pod = 0, nonpod = 0, ram = 0;
+
+
+ /* If we don't have any outstanding PoD entries, let things take their
+ * course */
+ if ( p2md->pod.entry_count == 0 )
+ goto out;
+
+ /* Figure out if we need to steal some freed memory for our cache */
+ steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
+
+ p2m_lock(p2md);
+ audit_p2m(d);
+
+ /* See what's in here. */
+ /* FIXME: Add contiguous; query for PSE entries? */
+ for ( i=0; i<(1<<order); i++)
+ {
+ p2m_type_t t;
+
+ gfn_to_mfn_query(d, gpfn + i, &t);
+
+ if ( t == p2m_populate_on_demand )
+ pod++;
+ else
+ {
+ nonpod++;
+ if ( p2m_is_ram(t) )
+ ram++;
+ }
+ }
+
+ /* No populate-on-demand? Don't need to steal anything? Then we're done!*/
+ if(!pod && !steal_for_cache)
+ goto out_unlock;
+
+ if ( !nonpod )
+ {
+ /* All PoD: Mark the whole region invalid and tell caller
+ * we're done. */
+ set_p2m_entry(d, gpfn, _mfn(INVALID_MFN), order, p2m_invalid);
+ p2md->pod.entry_count-=(1<<order); /* Lock: p2m */
+ BUG_ON(p2md->pod.entry_count < 0);
+ ret = 1;
+ goto out_unlock;
+ }
+
+ /* FIXME: Steal contig 2-meg regions for cache */
+
+ /* Process as long as:
+ * + There are PoD entries to handle, or
+ * + There is ram left, and we want to steal it
+ */
+ for ( i=0;
+ i<(1<<order) && (pod>0 || (steal_for_cache && ram > 0));
+ i++)
+ {
+ mfn_t mfn;
+ p2m_type_t t;
+
+ mfn = gfn_to_mfn_query(d, gpfn + i, &t);
+ if ( t == p2m_populate_on_demand )
+ {
+ set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
+ p2md->pod.entry_count--; /* Lock: p2m */
+ BUG_ON(p2md->pod.entry_count < 0);
+ pod--;
+ }
+ else if ( steal_for_cache && p2m_is_ram(t) )
+ {
+ struct page_info *page;
+
+ ASSERT(mfn_valid(mfn));
+
+ page = mfn_to_page(mfn);
+
+ set_p2m_entry(d, gpfn + i, _mfn(INVALID_MFN), 0, p2m_invalid);
+ set_gpfn_from_mfn(mfn_x(mfn), INVALID_M2P_ENTRY);
+
+ p2m_pod_cache_add(d, page, 0);
+
+ steal_for_cache = ( p2md->pod.entry_count > p2md->pod.count );
+
+ nonpod--;
+ ram--;
+ }
+ }
+
+ /* If we've reduced our "liabilities" beyond our "assets", free some */
+ if ( p2md->pod.entry_count < p2md->pod.count )
+ {
+ printk("b %d\n", p2md->pod.entry_count);
+ p2m_pod_set_cache_target(d, p2md->pod.entry_count);
+ }
+
+ /* If there are no more non-PoD entries, tell decrease_reservation() that
+ * there's nothing left to do. */
+ if ( nonpod == 0 )
+ ret = 1;
+
+out_unlock:
+ audit_p2m(d);
+ p2m_unlock(p2md);
+
+out:
+ return ret;
+}
+
+void
+p2m_pod_dump_data(struct domain *d)
+{
+ struct p2m_domain *p2md = d->arch.p2m;
+
+ printk(" PoD entries=%d cachesize=%d\n",
+ p2md->pod.entry_count, p2md->pod.count);
+}
+
+#define superpage_aligned(_x) (((_x)&((1<<9)-1))==0)
+
+/* Search for all-zero superpages to be reclaimed as superpages for the
+ * PoD cache. Must be called w/ p2m lock held, page_alloc lock not held. */
+static int
+p2m_pod_zero_check_superpage(struct domain *d, unsigned long gfn)
+{
+ mfn_t mfn, mfn0 = _mfn(INVALID_MFN);
+ p2m_type_t type, type0 = 0;
+ unsigned long * map = NULL;
+ int ret=0, reset = 0;
+ int i, j;
+ int max_ref = 1;
+
+ if ( !superpage_aligned(gfn) )
+ goto out;
+
+ /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
+ if ( paging_mode_shadow(d) )
+ max_ref++;
+
+ /* Look up the mfns, checking to make sure they're the same mfn
+ * and aligned, and mapping them. */
+ for ( i=0; i<(1<<9); i++ )
+ {
+
+ mfn = gfn_to_mfn_query(d, gfn + i, &type);
+
+ if ( i == 0 )
+ {
+ mfn0 = mfn;
+ type0 = type;
+ }
+
+ /* Conditions that must be met for superpage-superpage:
+ * + All gfns are ram types
+ * + All gfns have the same type
+ * + All of the mfns are allocated to a domain
+ * + None of the mfns are used as pagetables
+ * + The first mfn is 2-meg aligned
+ * + All the other mfns are in sequence
+ * Adding for good measure:
+ * + None of the mfns are likely to be mapped elsewhere (refcount
+ * 2 or less for shadow, 1 for hap)
+ */
+ if ( !p2m_is_ram(type)
+ || type != type0
+ || ( (mfn_to_page(mfn)->count_info & PGC_allocated) == 0 )
+ || ( (mfn_to_page(mfn)->count_info & PGC_page_table) != 0 )
+ || ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > max_ref )
+ || !( ( i == 0 && superpage_aligned(mfn_x(mfn0)) )
+ || ( i != 0 && mfn_x(mfn) == (mfn_x(mfn0) + i) ) ) )
+ goto out;
+ }
+
+ /* Now, do a quick check to see if it may be zero before unmapping. */
+ for ( i=0; i<(1<<9); i++ )
+ {
+ /* Quick zero-check */
+ map = map_domain_page(mfn_x(mfn0) + i);
+
+ for ( j=0; j<16; j++ )
+ if( *(map+j) != 0 )
+ break;
+
+ unmap_domain_page(map);
+
+ if ( j < 16 )
+ goto out;
+
+ }
+
+ /* Try to remove the page, restoring old mapping if it fails. */
+ set_p2m_entry(d, gfn,
+ _mfn(POPULATE_ON_DEMAND_MFN), 9,
+ p2m_populate_on_demand);
+
+ /* Make none of the MFNs are used elsewhere... for example, mapped
+ * via the grant table interface, or by qemu. Allow one refcount for
+ * being allocated to the domain. */
+ for ( i=0; i < (1<<9); i++ )
+ {
+ mfn = _mfn(mfn_x(mfn0) + i);
+ if ( (mfn_to_page(mfn)->count_info & PGC_count_mask) > 1 )
+ {
+ reset = 1;
+ goto out_reset;
+ }
+ }
+
+ /* Finally, do a full zero-check */
+ for ( i=0; i < (1<<9); i++ )
+ {
+ map = map_domain_page(mfn_x(mfn0) + i);
+
+ for ( j=0; j<PAGE_SIZE/sizeof(*map); j++ )
+ if( *(map+j) != 0 )
+ {
+ reset = 1;
+ break;
+ }
+
+ unmap_domain_page(map);
+
+ if ( reset )
+ goto out_reset;
+ }
+
+ /* Finally! We've passed all the checks, and can add the mfn superpage
+ * back on the PoD cache, and account for the new p2m PoD entries */
+ p2m_pod_cache_add(d, mfn_to_page(mfn0), 9);
+ d->arch.p2m->pod.entry_count += (1<<9);
+
+out_reset:
+ if ( reset )
+ set_p2m_entry(d, gfn, mfn0, 9, type0);
+
+out:
+ return ret;
+}
+
+static void
+p2m_pod_zero_check(struct domain *d, unsigned long *gfns, int count)
+{
+ mfn_t mfns[count];
+ p2m_type_t types[count];
+ unsigned long * map[count];
+
+ int i, j;
+ int max_ref = 1;
+
+ /* Allow an extra refcount for one shadow pt mapping in shadowed domains */
+ if ( paging_mode_shadow(d) )
+ max_ref++;
+
+ /* First, get the gfn list, translate to mfns, and map the pages. */
+ for ( i=0; i<count; i++ )
+ {
+ mfns[i] = gfn_to_mfn_query(d, gfns[i], types + i);
+ /* If this is ram, and not a pagetable, and probably not mapped
+ elsewhere, map it; otherwise, skip. */
+ if ( p2m_is_ram(types[i])
+ && ( (mfn_to_page(mfns[i])->count_info & PGC_allocated) != 0 )
+ && ( (mfn_to_page(mfns[i])->count_info & PGC_page_table) == 0 )
+ && ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) <= max_ref ) )
+ map[i] = map_domain_page(mfn_x(mfns[i]));
+ else
+ map[i] = NULL;
+ }
+
+ /* Then, go through and check for zeroed pages, removing write permission
+ * for those with zeroes. */
+ for ( i=0; i<count; i++ )
+ {
+ if(!map[i])
+ continue;
+
+ /* Quick zero-check */
+ for ( j=0; j<16; j++ )
+ if( *(map[i]+j) != 0 )
+ break;
+
+ if ( j < 16 )
+ {
+ unmap_domain_page(map[i]);
+ map[i] = NULL;
+ continue;
+ }
+
+ /* Try to remove the page, restoring old mapping if it fails. */
+ set_p2m_entry(d, gfns[i],
+ _mfn(POPULATE_ON_DEMAND_MFN), 0,
+ p2m_populate_on_demand);
+
+ /* See if the page was successfully unmapped. (Allow one refcount
+ * for being allocated to a domain.) */
+ if ( (mfn_to_page(mfns[i])->count_info & PGC_count_mask) > 1 )
+ {
+ unmap_domain_page(map[i]);
+ map[i] = NULL;
+
+ set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
+
+ continue;
+ }
+ }
+
+ /* Now check each page for real */
+ for ( i=0; i < count; i++ )
+ {
+ if(!map[i])
+ continue;
+
+ for ( j=0; j<PAGE_SIZE/sizeof(*map[i]); j++ )
+ if( *(map[i]+j) != 0 )
+ break;
+
+ /* See comment in p2m_pod_zero_check_superpage() re gnttab
+ * check timing. */
+ if ( j < PAGE_SIZE/sizeof(*map[i]) )
+ {
+ set_p2m_entry(d, gfns[i], mfns[i], 0, types[i]);
+ continue;
+ }
+ else
+ {
+ /* Add to cache, and account for the new p2m PoD entry */
+ p2m_pod_cache_add(d, mfn_to_page(mfns[i]), 0);
+ d->arch.p2m->pod.entry_count++;
+ }
+
+ unmap_domain_page(map[i]);
+ map[i] = NULL;
+ }
+
+}
+
+#define POD_SWEEP_LIMIT 1024
+static void
+p2m_pod_emergency_sweep_super(struct domain *d)
+{
+ struct p2m_domain *p2md = d->arch.p2m;
+ unsigned long i, start, limit;
+
+ if ( p2md->pod.reclaim_super == 0 )
+ {
+ p2md->pod.reclaim_super = (p2md->pod.max_guest>>9)<<9;
+ p2md->pod.reclaim_super -= (1<<9);
+ }
+
+ start = p2md->pod.reclaim_super;
+ limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
+
+ for ( i=p2md->pod.reclaim_super ; i > 0 ; i-=(1<<9) )
+ {
+ p2m_pod_zero_check_superpage(d, i);
+ /* Stop if we're past our limit and we have found *something*.
+ *
+ * NB that this is a zero-sum game; we're increasing our cache size
+ * by increasing our 'debt'. Since we hold the p2m lock,
+ * (entry_count - count) must remain the same. */
+ if ( !page_list_empty(&p2md->pod.super) && i < limit )
+ break;
+ }
+
+ p2md->pod.reclaim_super = i ? i - (1<<9) : 0;
+
+}
+
+#define POD_SWEEP_STRIDE 16
+static void
+p2m_pod_emergency_sweep(struct domain *d)
+{
+ struct p2m_domain *p2md = d->arch.p2m;
+ unsigned long gfns[POD_SWEEP_STRIDE];
+ unsigned long i, j=0, start, limit;
+ p2m_type_t t;
+
+
+ if ( p2md->pod.reclaim_single == 0 )
+ p2md->pod.reclaim_single = p2md->pod.max_guest;
+
+ start = p2md->pod.reclaim_single;
+ limit = (start > POD_SWEEP_LIMIT) ? (start - POD_SWEEP_LIMIT) : 0;
+
+ /* FIXME: Figure out how to avoid superpages */
+ for ( i=p2md->pod.reclaim_single ; i > 0 ; i-- )
+ {
+ gfn_to_mfn_query(d, i, &t );
+ if ( p2m_is_ram(t) )
+ {
+ gfns[j] = i;
+ j++;
+ BUG_ON(j > POD_SWEEP_STRIDE);
+ if ( j == POD_SWEEP_STRIDE )
+ {
+ p2m_pod_zero_check(d, gfns, j);
+ j = 0;
+ }
+ }
+ /* Stop if we're past our limit and we have found *something*.
+ *
+ * NB that this is a zero-sum game; we're increasing our cache size
+ * by re-increasing our 'debt'. Since we hold the p2m lock,
+ * (entry_count - count) must remain the same. */
+ if ( p2md->pod.count > 0 && i < limit )
+ break;
+ }
+
+ if ( j )
+ p2m_pod_zero_check(d, gfns, j);
+
+ p2md->pod.reclaim_single = i ? i - 1 : i;
+
+}
+
+static int
+p2m_pod_demand_populate(struct domain *d, unsigned long gfn,
+ mfn_t table_mfn,
+ l1_pgentry_t *p2m_entry,
+ unsigned int order,
+ p2m_query_t q)
+{
+ struct page_info *p = NULL; /* Compiler warnings */
+ unsigned long gfn_aligned;
+ mfn_t mfn;
+ l1_pgentry_t entry_content = l1e_empty();
+ struct p2m_domain *p2md = d->arch.p2m;
+ int i;
+
+ /* We need to grab the p2m lock here and re-check the entry to make
+ * sure that someone else hasn't populated it for us, then hold it
+ * until we're done. */
+ p2m_lock(p2md);
+ audit_p2m(d);
+
+ /* Check to make sure this is still PoD */
+ if ( p2m_flags_to_type(l1e_get_flags(*p2m_entry)) != p2m_populate_on_demand )
+ {
+ p2m_unlock(p2md);
+ return 0;
+ }
+
+ /* If we're low, start a sweep */
+ if ( order == 9 && page_list_empty(&p2md->pod.super) )
+ p2m_pod_emergency_sweep_super(d);
+
+ if ( page_list_empty(&p2md->pod.single) &&
+ ( ( order == 0 )
+ || (order == 9 && page_list_empty(&p2md->pod.super) ) ) )
+ p2m_pod_emergency_sweep(d);
+
+ /* Keep track of the highest gfn demand-populated by a guest fault */
+ if ( q == p2m_guest && gfn > p2md->pod.max_guest )
+ p2md->pod.max_guest = gfn;
+
+ spin_lock(&d->page_alloc_lock);
+
+ if ( p2md->pod.count == 0 )
+ goto out_of_memory;
+
+ /* Get a page f/ the cache. A NULL return value indicates that the
+ * 2-meg range should be marked singleton PoD, and retried */
+ if ( (p = p2m_pod_cache_get(d, order)) == NULL )
+ goto remap_and_retry;
+
+ mfn = page_to_mfn(p);
+
+ BUG_ON((mfn_x(mfn) & ((1 << order)-1)) != 0);
+
+ spin_unlock(&d->page_alloc_lock);
+
+ /* Fill in the entry in the p2m */
+ switch ( order )
+ {
+ case 9:
+ {
+ l2_pgentry_t l2e_content;
+
+ l2e_content = l2e_from_pfn(mfn_x(mfn),
+ p2m_type_to_flags(p2m_ram_rw) | _PAGE_PSE);
+
+ entry_content.l1 = l2e_content.l2;
+ }
+ break;
+ case 0:
+ entry_content = l1e_from_pfn(mfn_x(mfn),
+ p2m_type_to_flags(p2m_ram_rw));
+ break;
+
+ }
+
+ gfn_aligned = (gfn >> order) << order;
+
+ paging_write_p2m_entry(d, gfn_aligned, p2m_entry, table_mfn,
+ entry_content, (order==9)?2:1);
+
+ for( i = 0 ; i < (1UL << order) ; i++ )
+ set_gpfn_from_mfn(mfn_x(mfn) + i, gfn_aligned + i);
+
+ p2md->pod.entry_count -= (1 << order); /* Lock: p2m */
+ BUG_ON(p2md->pod.entry_count < 0);
+ audit_p2m(d);
+ p2m_unlock(p2md);
+
+ return 0;
+out_of_memory:
+ spin_unlock(&d->page_alloc_lock);
+ audit_p2m(d);
+ p2m_unlock(p2md);
+ printk("%s: Out of populate-on-demand memory!\n", __func__);
+ domain_crash(d);
+ return -1;
+remap_and_retry:
+ BUG_ON(order != 9);
+ spin_unlock(&d->page_alloc_lock);
+
+ /* Remap this 2-meg region in singleton chunks */
+ gfn_aligned = (gfn>>order)<<order;
+ for(i=0; i<(1<<order); i++)
+ set_p2m_entry(d, gfn_aligned+i, _mfn(POPULATE_ON_DEMAND_MFN), 0,
+ p2m_populate_on_demand);
+ audit_p2m(d);
+ p2m_unlock(p2md);
+ return 0;
+}
+
// Returns 0 on error (out of memory)
static int
p2m_set_entry(struct domain *d, unsigned long gfn, mfn_t mfn,
L2_PAGETABLE_ENTRIES);
ASSERT(p2m_entry);
+ /* FIXME: Deal with 4k replaced by 2meg pages */
if ( (l1e_get_flags(*p2m_entry) & _PAGE_PRESENT) &&
!(l1e_get_flags(*p2m_entry) & _PAGE_PSE) )
{
goto out;
}
- if ( mfn_valid(mfn) )
+ if ( mfn_valid(mfn) || p2m_is_magic(p2mt) )
l2e_content = l2e_from_pfn(mfn_x(mfn),
p2m_type_to_flags(p2mt) | _PAGE_PSE);
else
}
/* Track the highest gfn for which we have ever had a valid mapping */
- if ( mfn_valid(mfn) && (gfn > d->arch.p2m->max_mapped_pfn) )
+ if ( mfn_valid(mfn)
+ && (gfn + (1UL << page_order) - 1 > d->arch.p2m->max_mapped_pfn) )
d->arch.p2m->max_mapped_pfn = gfn + (1UL << page_order) - 1;
if ( iommu_enabled && (is_hvm_domain(d) || need_iommu(d)) )
}
static mfn_t
-p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t)
+p2m_gfn_to_mfn(struct domain *d, unsigned long gfn, p2m_type_t *t,
+ p2m_query_t q)
{
mfn_t mfn;
paddr_t addr = ((paddr_t)gfn) << PAGE_SHIFT;
l2e = map_domain_page(mfn_x(mfn));
l2e += l2_table_offset(addr);
+
+pod_retry_l2:
if ( (l2e_get_flags(*l2e) & _PAGE_PRESENT) == 0 )
{
+ /* PoD: Try to populate a 2-meg chunk */
+ if ( p2m_flags_to_type(l2e_get_flags(*l2e)) == p2m_populate_on_demand )
+ {
+ if ( q != p2m_query ) {
+ if( !p2m_pod_demand_populate(d, gfn, mfn,
+ (l1_pgentry_t *)l2e, 9, q) )
+ goto pod_retry_l2;
+ } else
+ *t = p2m_populate_on_demand;
+ }
+
unmap_domain_page(l2e);
return _mfn(INVALID_MFN);
}
l1e = map_domain_page(mfn_x(mfn));
l1e += l1_table_offset(addr);
+pod_retry_l1:
if ( (l1e_get_flags(*l1e) & _PAGE_PRESENT) == 0 )
{
+ /* PoD: Try to populate */
+ if ( p2m_flags_to_type(l1e_get_flags(*l1e)) == p2m_populate_on_demand )
+ {
+ if ( q != p2m_query ) {
+ if( !p2m_pod_demand_populate(d, gfn, mfn,
+ (l1_pgentry_t *)l1e, 0, q) )
+ goto pod_retry_l1;
+ } else
+ *t = p2m_populate_on_demand;
+ }
+
unmap_domain_page(l1e);
return _mfn(INVALID_MFN);
}
}
/* Read the current domain's p2m table (through the linear mapping). */
-static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
+static mfn_t p2m_gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t,
+ p2m_query_t q)
{
mfn_t mfn = _mfn(INVALID_MFN);
p2m_type_t p2mt = p2m_mmio_dm;
if ( gfn <= current->domain->arch.p2m->max_mapped_pfn )
{
- l1_pgentry_t l1e = l1e_empty();
+ l1_pgentry_t l1e = l1e_empty(), *p2m_entry;
l2_pgentry_t l2e = l2e_empty();
int ret;
ASSERT(gfn < (RO_MPT_VIRT_END - RO_MPT_VIRT_START)
/ sizeof(l1_pgentry_t));
+ /*
+ * Read & process L2
+ */
+ p2m_entry = &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START)
+ + l2_linear_offset(addr)];
+
+ pod_retry_l2:
ret = __copy_from_user(&l2e,
- &__linear_l1_table[l1_linear_offset(RO_MPT_VIRT_START) + l2_linear_offset(addr)],
+ p2m_entry,
sizeof(l2e));
+ if ( ret != 0
+ || !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ {
+ if( (l2e_get_flags(l2e) & _PAGE_PSE)
+ && ( p2m_flags_to_type(l2e_get_flags(l2e))
+ == p2m_populate_on_demand ) )
+ {
+ /* The read has succeeded, so we know that the mapping
+ * exits at this point. */
+ if ( q != p2m_query )
+ {
+ if( !p2m_pod_demand_populate(current->domain, gfn, mfn,
+ p2m_entry, 9, q) )
+ goto pod_retry_l2;
+
+ /* Allocate failed. */
+ p2mt = p2m_invalid;
+ printk("%s: Allocate failed!\n", __func__);
+ goto out;
+ }
+ else
+ {
+ p2mt = p2m_populate_on_demand;
+ goto out;
+ }
+ }
+
+ goto pod_retry_l1;
+ }
- if ( (ret == 0) && (l2e_get_flags(l2e) & _PAGE_PRESENT) &&
- (l2e_get_flags(l2e) & _PAGE_PSE) )
+ if (l2e_get_flags(l2e) & _PAGE_PSE)
{
p2mt = p2m_flags_to_type(l2e_get_flags(l2e));
ASSERT(l2e_get_pfn(l2e) != INVALID_MFN || !p2m_is_ram(p2mt));
+
if ( p2m_is_valid(p2mt) )
mfn = _mfn(l2e_get_pfn(l2e) + l1_table_offset(addr));
else
p2mt = p2m_mmio_dm;
+
+ goto out;
}
- else
- {
-
- /* Need to __copy_from_user because the p2m is sparse and this
- * part might not exist */
- ret = __copy_from_user(&l1e,
- &phys_to_machine_mapping[gfn],
- sizeof(l1e));
+
+ /*
+ * Read and process L1
+ */
+
+ /* Need to __copy_from_user because the p2m is sparse and this
+ * part might not exist */
+ pod_retry_l1:
+ p2m_entry = &phys_to_machine_mapping[gfn];
+
+ ret = __copy_from_user(&l1e,
+ p2m_entry,
+ sizeof(l1e));
- if ( ret == 0 ) {
- p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
- ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
- if ( p2m_is_valid(p2mt) )
- mfn = _mfn(l1e_get_pfn(l1e));
- else
- /* XXX see above */
- p2mt = p2m_mmio_dm;
+ if ( ret == 0 ) {
+ p2mt = p2m_flags_to_type(l1e_get_flags(l1e));
+ ASSERT(l1e_get_pfn(l1e) != INVALID_MFN || !p2m_is_ram(p2mt));
+
+ if ( p2m_flags_to_type(l1e_get_flags(l1e))
+ == p2m_populate_on_demand )
+ {
+ /* The read has succeeded, so we know that the mapping
+ * exits at this point. */
+ if ( q != p2m_query )
+ {
+ if( !p2m_pod_demand_populate(current->domain, gfn, mfn,
+ (l1_pgentry_t *)p2m_entry, 0,
+ q) )
+ goto pod_retry_l1;
+
+ /* Allocate failed. */
+ p2mt = p2m_invalid;
+ goto out;
+ }
+ else
+ {
+ p2mt = p2m_populate_on_demand;
+ goto out;
+ }
}
+
+ if ( p2m_is_valid(p2mt) )
+ mfn = _mfn(l1e_get_pfn(l1e));
+ else
+ /* XXX see above */
+ p2mt = p2m_mmio_dm;
}
}
-
+out:
*t = p2mt;
return mfn;
}
memset(p2m, 0, sizeof(*p2m));
p2m_lock_init(p2m);
- INIT_LIST_HEAD(&p2m->pages);
+ INIT_PAGE_LIST_HEAD(&p2m->pages);
+ INIT_PAGE_LIST_HEAD(&p2m->pod.super);
+ INIT_PAGE_LIST_HEAD(&p2m->pod.single);
p2m->set_entry = p2m_set_entry;
p2m->get_entry = p2m_gfn_to_mfn;
{
mfn_t mfn = _mfn(INVALID_MFN);
- struct list_head *entry;
struct page_info *page, *p2m_top;
unsigned int page_count = 0;
unsigned long gfn = -1UL;
p2m_unlock(p2m);
return -ENOMEM;
}
- list_add_tail(&p2m_top->list, &p2m->pages);
+ page_list_add_tail(p2m_top, &p2m->pages);
p2m_top->count_info = 1;
p2m_top->u.inuse.type_info =
goto error;
/* Copy all existing mappings from the page list and m2p */
- for ( entry = d->page_list.next;
- entry != &d->page_list;
- entry = entry->next )
+ page_list_for_each(page, &d->page_list)
{
- page = list_entry(entry, struct page_info, list);
mfn = page_to_mfn(page);
gfn = get_gpfn_from_mfn(mfn_x(mfn));
page_count++;
/* Return all the p2m pages to Xen.
* We know we don't have any extra mappings to these pages */
{
- struct list_head *entry, *n;
struct page_info *pg;
struct p2m_domain *p2m = d->arch.p2m;
p2m_lock(p2m);
d->arch.phys_table = pagetable_null();
- list_for_each_safe(entry, n, &p2m->pages)
- {
- pg = list_entry(entry, struct page_info, list);
- list_del(entry);
+ while ( (pg = page_list_remove_head(&p2m->pages)) )
p2m->free_page(d, pg);
- }
p2m_unlock(p2m);
}
struct page_info *page;
struct domain *od;
unsigned long mfn, gfn, m2pfn, lp2mfn = 0;
+ int entry_count = 0;
mfn_t p2mfn;
unsigned long orphans_d = 0, orphans_i = 0, mpbad = 0, pmbad = 0;
int test_linear;
if ( test_linear )
flush_tlb_local();
+ spin_lock(&d->page_alloc_lock);
+
/* Audit part one: walk the domain's page allocation list, checking
* the m2p entries. */
for ( entry = d->page_list.next;
continue;
}
- p2mfn = gfn_to_mfn_foreign(d, gfn, &type);
+ p2mfn = gfn_to_mfn_type_foreign(d, gfn, &type, p2m_query);
if ( mfn_x(p2mfn) != mfn )
{
mpbad++;
if ( test_linear && (gfn <= d->arch.p2m->max_mapped_pfn) )
{
- lp2mfn = mfn_x(gfn_to_mfn_current(gfn, &type));
+ lp2mfn = mfn_x(gfn_to_mfn_query(d, gfn, &type));
if ( lp2mfn != mfn_x(p2mfn) )
{
P2M_PRINTK("linear mismatch gfn %#lx -> mfn %#lx "
// mfn, gfn, p2mfn, lp2mfn);
}
+ spin_unlock(&d->page_alloc_lock);
+
/* Audit part two: walk the domain's p2m table, checking the entries. */
if ( pagetable_get_pfn(d->arch.phys_table) != 0 )
{
{
if ( !(l2e_get_flags(l2e[i2]) & _PAGE_PRESENT) )
{
+ if ( (l2e_get_flags(l2e[i2]) & _PAGE_PSE)
+ && ( p2m_flags_to_type(l2e_get_flags(l2e[i2]))
+ == p2m_populate_on_demand ) )
+ entry_count+=(1<<9);
gfn += 1 << (L2_PAGETABLE_SHIFT - PAGE_SHIFT);
continue;
}
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++)
{
m2pfn = get_gpfn_from_mfn(mfn+i1);
- if ( m2pfn != (gfn + i) )
+ if ( m2pfn != (gfn + i1) )
{
pmbad++;
P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
for ( i1 = 0; i1 < L1_PAGETABLE_ENTRIES; i1++, gfn++ )
{
if ( !(l1e_get_flags(l1e[i1]) & _PAGE_PRESENT) )
+ {
+ if ( p2m_flags_to_type(l1e_get_flags(l1e[i1]))
+ == p2m_populate_on_demand )
+ entry_count++;
continue;
+ }
mfn = l1e_get_pfn(l1e[i1]);
ASSERT(mfn_valid(_mfn(mfn)));
m2pfn = get_gpfn_from_mfn(mfn);
if ( m2pfn != gfn )
{
pmbad++;
+ printk("mismatch: gfn %#lx -> mfn %#lx"
+ " -> gfn %#lx\n", gfn, mfn, m2pfn);
P2M_PRINTK("mismatch: gfn %#lx -> mfn %#lx"
" -> gfn %#lx\n", gfn, mfn, m2pfn);
BUG();
}
+ if ( entry_count != d->arch.p2m->pod.entry_count )
+ {
+ printk("%s: refcounted entry count %d, audit count %d!\n",
+ __func__,
+ d->arch.p2m->pod.entry_count,
+ entry_count);
+ BUG();
+ }
+
//P2M_PRINTK("p2m audit complete\n");
//if ( orphans_i | orphans_d | mpbad | pmbad )
// P2M_PRINTK("p2m audit found %lu orphans (%lu inval %lu debug)\n",
P2M_PRINTK("p2m audit found %lu odd p2m, %lu bad m2p entries\n",
pmbad, mpbad);
}
-#else
-#define audit_p2m(_d) do { (void)(_d); } while(0)
#endif /* P2M_AUDIT */
p2m_unlock(d->arch.p2m);
}
+#if CONFIG_PAGING_LEVELS == 3
+static int gfn_check_limit(
+ struct domain *d, unsigned long gfn, unsigned int order)
+{
+ /*
+ * 32bit AMD nested paging does not support over 4GB guest due to
+ * hardware translation limit. This limitation is checked by comparing
+ * gfn with 0xfffffUL.
+ */
+ if ( !paging_mode_hap(d) || ((gfn + (1ul << order)) <= 0x100000UL) ||
+ (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) )
+ return 0;
+
+ if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
+ dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
+ " 4GB: specify 'hap=0' domain config option.\n",
+ d->domain_id);
+
+ return -EINVAL;
+}
+#else
+#define gfn_check_limit(d, g, o) 0
+#endif
+
+int
+guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
+ unsigned int order)
+{
+ struct p2m_domain *p2md = d->arch.p2m;
+ unsigned long i;
+ p2m_type_t ot;
+ mfn_t omfn;
+ int pod_count = 0;
+ int rc = 0;
+
+ BUG_ON(!paging_mode_translate(d));
+
+ rc = gfn_check_limit(d, gfn, order);
+ if ( rc != 0 )
+ return rc;
+
+ p2m_lock(p2md);
+ audit_p2m(d);
+
+ P2M_DEBUG("adding gfn=%#lx mfn=%#lx\n", gfn, mfn);
+
+ /* Make sure all gpfns are unused */
+ for ( i = 0; i < (1UL << order); i++ )
+ {
+ omfn = gfn_to_mfn_query(d, gfn + i, &ot);
+ if ( p2m_is_ram(ot) )
+ {
+ printk("%s: gfn_to_mfn returned type %d!\n",
+ __func__, ot);
+ rc = -EBUSY;
+ goto out;
+ }
+ else if ( ot == p2m_populate_on_demand )
+ {
+ /* Count how man PoD entries we'll be replacing if successful */
+ pod_count++;
+ }
+ }
+
+ /* Now, actually do the two-way mapping */
+ if ( !set_p2m_entry(d, gfn, _mfn(POPULATE_ON_DEMAND_MFN), order,
+ p2m_populate_on_demand) )
+ rc = -EINVAL;
+ else
+ {
+ p2md->pod.entry_count += 1 << order; /* Lock: p2m */
+ p2md->pod.entry_count -= pod_count;
+ BUG_ON(p2md->pod.entry_count < 0);
+ }
+
+ audit_p2m(d);
+ p2m_unlock(p2md);
+
+out:
+ return rc;
+
+}
+
int
guest_physmap_add_entry(struct domain *d, unsigned long gfn,
unsigned long mfn, unsigned int page_order,
unsigned long i, ogfn;
p2m_type_t ot;
mfn_t omfn;
+ int pod_count = 0;
int rc = 0;
if ( !paging_mode_translate(d) )
return 0;
}
-#if CONFIG_PAGING_LEVELS == 3
- /*
- * 32bit PAE nested paging does not support over 4GB guest due to
- * hardware translation limit. This limitation is checked by comparing
- * gfn with 0xfffffUL.
- */
- if ( paging_mode_hap(d) && (gfn > 0xfffffUL) )
- {
- if ( !test_and_set_bool(d->arch.hvm_domain.svm.npt_4gb_warning) )
- dprintk(XENLOG_WARNING, "Dom%d failed to populate memory beyond"
- " 4GB: specify 'hap=0' domain config option.\n",
- d->domain_id);
- return -EINVAL;
- }
-#endif
+ rc = gfn_check_limit(d, gfn, page_order);
+ if ( rc != 0 )
+ return rc;
p2m_lock(d->arch.p2m);
audit_p2m(d);
/* First, remove m->p mappings for existing p->m mappings */
for ( i = 0; i < (1UL << page_order); i++ )
{
- omfn = gfn_to_mfn(d, gfn, &ot);
+ omfn = gfn_to_mfn_query(d, gfn + i, &ot);
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
- set_gpfn_from_mfn(mfn_x(omfn)+i, INVALID_M2P_ENTRY);
+ set_gpfn_from_mfn(mfn_x(omfn), INVALID_M2P_ENTRY);
+ }
+ else if ( ot == p2m_populate_on_demand )
+ {
+ /* Count how man PoD entries we'll be replacing if successful */
+ pod_count++;
}
}
/* Then, look for m->p mappings for this range and deal with them */
for ( i = 0; i < (1UL << page_order); i++ )
{
- ogfn = mfn_to_gfn(d, _mfn(mfn));
+ ogfn = mfn_to_gfn(d, _mfn(mfn+i));
if (
#ifdef __x86_64__
(ogfn != 0x5555555555555555L)
(ogfn != 0x55555555L)
#endif
&& (ogfn != INVALID_M2P_ENTRY)
- && (ogfn != gfn) )
+ && (ogfn != gfn + i) )
{
/* This machine frame is already mapped at another physical
* address */
P2M_DEBUG("aliased! mfn=%#lx, old gfn=%#lx, new gfn=%#lx\n",
- mfn, ogfn, gfn);
- omfn = gfn_to_mfn(d, ogfn, &ot);
+ mfn + i, ogfn, gfn + i);
+ omfn = gfn_to_mfn_query(d, ogfn, &ot);
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
P2M_DEBUG("old gfn=%#lx -> mfn %#lx\n",
ogfn , mfn_x(omfn));
- if ( mfn_x(omfn) == mfn )
- p2m_remove_page(d, ogfn, mfn, 0);
+ if ( mfn_x(omfn) == (mfn + i) )
+ p2m_remove_page(d, ogfn, mfn + i, 0);
}
}
}
if ( !set_p2m_entry(d, gfn, _mfn(INVALID_MFN), page_order,
p2m_invalid) )
rc = -EINVAL;
+ else
+ {
+ d->arch.p2m->pod.entry_count -= pod_count; /* Lock: p2m */
+ BUG_ON(d->arch.p2m->pod.entry_count < 0);
+ }
}
audit_p2m(d);
if ( !paging_mode_translate(d) )
return 0;
- omfn = gfn_to_mfn(d, gfn, &ot);
+ omfn = gfn_to_mfn_query(d, gfn, &ot);
if ( p2m_is_ram(ot) )
{
ASSERT(mfn_valid(omfn));
/************************************************/
/* Override macros from asm/page.h to make them work with mfn_t */
#undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
#undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
/* The log-dirty lock. This protects the log-dirty bitmap from
* concurrent accesses (and teardowns, etc).
/* clean up log dirty resources. */
paging_log_dirty_teardown(d);
+
+ /* Move populate-on-demand cache back to domain_list for destruction */
+ p2m_pod_empty_cache(d);
}
/* Call once all of the references to the domain have gone away */
obj-$(x86_32) += common.o guest_2.o guest_3.o
obj-$(x86_64) += common.o guest_2.o guest_3.o guest_4.o
-guest_%.o: multi.c $(HDRS) Makefile
+guest_%.o: multi.c Makefile
$(CC) $(CFLAGS) -DGUEST_PAGING_LEVELS=$* -c $< -o $@
int i;
shadow_lock_init(d);
for ( i = 0; i <= SHADOW_MAX_ORDER; i++ )
- INIT_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
- INIT_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
- INIT_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
+ INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.freelists[i]);
+ INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.p2m_freelist);
+ INIT_PAGE_LIST_HEAD(&d->arch.paging.shadow.pinned_shadows);
/* Use shadow pagetables for log-dirty support */
paging_log_dirty_init(d, shadow_enable_log_dirty,
idx = (idx + 1) % SHADOW_OOS_PAGES;
if ( mfn_x(oos[idx]) == mfn_x(gmfn) )
{
+ int i;
+ for ( i = 0; i < SHADOW_OOS_FIXUPS; i++ )
+ {
+ if ( mfn_valid(oos_fixup[idx].smfn[i])
+ && (mfn_x(oos_fixup[idx].smfn[i]) == mfn_x(smfn))
+ && (oos_fixup[idx].off[i] == off) )
+ return;
+ }
+
next = oos_fixup[idx].next;
if ( mfn_x(oos_fixup[idx].smfn[next]) != INVALID_MFN )
for ( ; order <= shadow_max_order(d); ++order )
{
unsigned int n = count;
- const struct list_head *p;
+ const struct page_info *sp;
- list_for_each ( p, &d->arch.paging.shadow.freelists[order] )
+ page_list_for_each ( sp, &d->arch.paging.shadow.freelists[order] )
if ( --n == 0 )
return 1;
count = (count + 1) >> 1;
* non-Xen mappings in this top-level shadow mfn */
static void shadow_unhook_mappings(struct vcpu *v, mfn_t smfn)
{
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
- switch ( sp->type )
+ struct page_info *sp = mfn_to_page(smfn);
+ switch ( sp->u.sh.type )
{
case SH_type_l2_32_shadow:
SHADOW_INTERNAL_NAME(sh_unhook_32b_mappings, 2)(v,smfn);
break;
#endif
default:
- SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->type);
+ SHADOW_ERROR("top-level shadow has bad type %08x\n", sp->u.sh.type);
BUG();
}
}
/* Convert smfn to gfn */
unsigned long gfn;
ASSERT(mfn_valid(smfn));
- gfn = mfn_to_gfn(d, _mfn(mfn_to_shadow_page(smfn)->backpointer));
+ gfn = mfn_to_gfn(d, _mfn(mfn_to_page(smfn)->v.sh.back));
__trace_var(TRC_SHADOW_PREALLOC_UNPIN, 0/*!tsc*/,
sizeof(gfn), (unsigned char*)&gfn);
}
/* Need a vpcu for calling unpins; for now, since we don't have
* per-vcpu shadows, any will do */
struct vcpu *v, *v2;
- struct list_head *l, *t;
- struct shadow_page_info *sp;
+ struct page_info *sp, *t;
mfn_t smfn;
int i;
/* Stage one: walk the list of pinned pages, unpinning them */
perfc_incr(shadow_prealloc_1);
- list_for_each_backwards_safe(l, t, &d->arch.paging.shadow.pinned_shadows)
+ page_list_for_each_safe_reverse(sp, t, &d->arch.paging.shadow.pinned_shadows)
{
- sp = list_entry(l, struct shadow_page_info, list);
- smfn = shadow_page_to_mfn(sp);
+ smfn = page_to_mfn(sp);
/* Unpin this top-level shadow */
trace_shadow_prealloc_unpin(d, smfn);
* this domain's shadows */
static void shadow_blow_tables(struct domain *d)
{
- struct list_head *l, *t;
- struct shadow_page_info *sp;
+ struct page_info *sp, *t;
struct vcpu *v = d->vcpu[0];
mfn_t smfn;
int i;
ASSERT(v != NULL);
/* Pass one: unpin all pinned pages */
- list_for_each_backwards_safe(l,t, &d->arch.paging.shadow.pinned_shadows)
+ page_list_for_each_safe_reverse(sp, t, &d->arch.paging.shadow.pinned_shadows)
{
- sp = list_entry(l, struct shadow_page_info, list);
- smfn = shadow_page_to_mfn(sp);
+ smfn = page_to_mfn(sp);
sh_unpin(v, smfn);
}
__initcall(shadow_blow_tables_keyhandler_init);
#endif /* !NDEBUG */
+static inline struct page_info *
+next_shadow(const struct page_info *sp)
+{
+ return sp->next_shadow ? mfn_to_page(_mfn(sp->next_shadow)) : NULL;
+}
+
+static inline void
+set_next_shadow(struct page_info *sp, struct page_info *next)
+{
+ sp->next_shadow = next ? mfn_x(page_to_mfn(next)) : 0;
+}
+
/* Allocate another shadow's worth of (contiguous, aligned) pages,
* and fill in the type and backpointer fields of their page_infos.
* Never fails to allocate. */
u32 shadow_type,
unsigned long backpointer)
{
- struct shadow_page_info *sp = NULL;
+ struct page_info *sp = NULL;
unsigned int order = shadow_order(shadow_type);
cpumask_t mask;
void *p;
/* Find smallest order which can satisfy the request. */
for ( i = order; i <= SHADOW_MAX_ORDER; i++ )
- if ( !list_empty(&d->arch.paging.shadow.freelists[i]) )
+ if ( (sp = page_list_remove_head(&d->arch.paging.shadow.freelists[i])) )
goto found;
/* If we get here, we failed to allocate. This should never happen.
BUG();
found:
- sp = list_entry(d->arch.paging.shadow.freelists[i].next,
- struct shadow_page_info, list);
- list_del(&sp->list);
-
/* We may have to halve the chunk a number of times. */
while ( i != order )
{
i--;
- sp->order = i;
- list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[i]);
+ sp->v.free.order = i;
+ page_list_add_tail(sp, &d->arch.paging.shadow.freelists[i]);
sp += 1 << i;
}
d->arch.paging.shadow.free_pages -= 1 << order;
flush_tlb_mask(mask);
}
/* Now safe to clear the page for reuse */
- p = sh_map_domain_page(shadow_page_to_mfn(sp+i));
+ p = sh_map_domain_page(page_to_mfn(sp+i));
ASSERT(p != NULL);
clear_page(p);
sh_unmap_domain_page(p);
- INIT_LIST_HEAD(&sp[i].list);
- sp[i].type = shadow_type;
- sp[i].pinned = 0;
- sp[i].count = 0;
- sp[i].backpointer = backpointer;
- sp[i].next_shadow = NULL;
+ INIT_PAGE_LIST_ENTRY(&sp[i].list);
+ sp[i].u.sh.type = shadow_type;
+ sp[i].u.sh.pinned = 0;
+ sp[i].u.sh.count = 0;
+ sp[i].v.sh.back = backpointer;
+ set_next_shadow(&sp[i], NULL);
perfc_incr(shadow_alloc_count);
}
- return shadow_page_to_mfn(sp);
+ return page_to_mfn(sp);
}
/* Return some shadow pages to the pool. */
void shadow_free(struct domain *d, mfn_t smfn)
{
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+ struct page_info *sp = mfn_to_page(smfn);
u32 shadow_type;
unsigned long order;
unsigned long mask;
ASSERT(shadow_locked_by_me(d));
perfc_incr(shadow_free);
- shadow_type = sp->type;
+ shadow_type = sp->u.sh.type;
ASSERT(shadow_type != SH_type_none);
ASSERT(shadow_type != SH_type_p2m_table);
order = shadow_order(shadow_type);
}
#endif
/* Strip out the type: this is now a free shadow page */
- sp[i].type = 0;
+ sp[i].u.sh.type = 0;
/* Remember the TLB timestamp so we will know whether to flush
* TLBs when we reuse the page. Because the destructors leave the
* contents of the pages in place, we can delay TLB flushes until
for ( ; order < shadow_max_order(d); ++order )
{
mask = 1 << order;
- if ( (mfn_x(shadow_page_to_mfn(sp)) & mask) ) {
+ if ( (mfn_x(page_to_mfn(sp)) & mask) ) {
/* Merge with predecessor block? */
- if ( ((sp-mask)->type != PGT_none) || ((sp-mask)->order != order) )
+ if ( ((sp-mask)->u.sh.type != PGT_none) ||
+ ((sp-mask)->v.free.order != order) )
break;
- list_del(&(sp-mask)->list);
sp -= mask;
+ page_list_del(sp, &d->arch.paging.shadow.freelists[order]);
} else {
/* Merge with successor block? */
- if ( ((sp+mask)->type != PGT_none) || ((sp+mask)->order != order) )
+ if ( ((sp+mask)->u.sh.type != PGT_none) ||
+ ((sp+mask)->v.free.order != order) )
break;
- list_del(&(sp+mask)->list);
+ page_list_del(sp + mask, &d->arch.paging.shadow.freelists[order]);
}
}
- sp->order = order;
- list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
+ sp->v.free.order = order;
+ page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]);
}
/* Divert some memory from the pool to be used by the p2m mapping.
* believed to be a concern.
*/
page_set_owner(&pg[i], d);
- pg[i].count_info = 1;
- list_add_tail(&pg[i].list, &d->arch.paging.shadow.p2m_freelist);
+ pg[i].count_info |= 1;
+ page_list_add_tail(&pg[i], &d->arch.paging.shadow.p2m_freelist);
}
return 1;
}
static struct page_info *
shadow_alloc_p2m_page(struct domain *d)
{
- struct list_head *entry;
struct page_info *pg;
mfn_t mfn;
void *p;
shadow_lock(d);
- if ( list_empty(&d->arch.paging.shadow.p2m_freelist) &&
+ if ( page_list_empty(&d->arch.paging.shadow.p2m_freelist) &&
!sh_alloc_p2m_pages(d) )
{
shadow_unlock(d);
return NULL;
}
- entry = d->arch.paging.shadow.p2m_freelist.next;
- list_del(entry);
+ pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist);
shadow_unlock(d);
- pg = list_entry(entry, struct page_info, list);
mfn = page_to_mfn(pg);
p = sh_map_domain_page(mfn);
clear_page(p);
/* Should have just the one ref we gave it in alloc_p2m_page() */
if ( (pg->count_info & PGC_count_mask) != 1 )
{
- SHADOW_ERROR("Odd p2m page count c=%#x t=%"PRtype_info"\n",
+ SHADOW_ERROR("Odd p2m page count c=%#lx t=%"PRtype_info"\n",
pg->count_info, pg->u.inuse.type_info);
}
- pg->count_info = 0;
+ pg->count_info &= ~PGC_count_mask;
/* Free should not decrement domain's total allocation, since
* these pages were allocated without an owner. */
page_set_owner(pg, NULL);
-#if defined(__x86_64__)
- spin_lock_init(&pg->lock);
-#endif
free_domheap_pages(pg, 0);
d->arch.paging.shadow.p2m_pages--;
perfc_decr(shadow_alloc_count);
unsigned int pages,
int *preempted)
{
- struct shadow_page_info *sp;
+ struct page_info *sp;
unsigned int lower_bound;
unsigned int j, order = shadow_max_order(d);
if ( d->arch.paging.shadow.total_pages < pages )
{
/* Need to allocate more memory from domheap */
- sp = (struct shadow_page_info *)
+ sp = (struct page_info *)
alloc_domheap_pages(NULL, order, MEMF_node(domain_to_node(d)));
if ( sp == NULL )
{
d->arch.paging.shadow.total_pages += 1 << order;
for ( j = 0; j < 1U << order; j++ )
{
- sp[j].type = 0;
- sp[j].pinned = 0;
- sp[j].count = 0;
- sp[j].mbz = 0;
+ sp[j].u.sh.type = 0;
+ sp[j].u.sh.pinned = 0;
+ sp[j].u.sh.count = 0;
sp[j].tlbflush_timestamp = 0; /* Not in any TLB */
}
- sp->order = order;
- list_add_tail(&sp->list, &d->arch.paging.shadow.freelists[order]);
+ sp->v.free.order = order;
+ page_list_add_tail(sp, &d->arch.paging.shadow.freelists[order]);
}
else if ( d->arch.paging.shadow.total_pages > pages )
{
/* Need to return memory to domheap */
_shadow_prealloc(d, order, 1);
- ASSERT(!list_empty(&d->arch.paging.shadow.freelists[order]));
- sp = list_entry(d->arch.paging.shadow.freelists[order].next,
- struct shadow_page_info, list);
- list_del(&sp->list);
-#if defined(__x86_64__)
+ sp = page_list_remove_head(&d->arch.paging.shadow.freelists[order]);
+ ASSERT(sp);
/*
- * Re-instate lock field which we overwrite with shadow_page_info.
- * This was safe, since the lock is only used on guest pages.
+ * The pages were allocated anonymously, but the owner field
+ * gets overwritten normally, so need to clear it here.
*/
for ( j = 0; j < 1U << order; j++ )
- spin_lock_init(&((struct page_info *)sp)[j].lock);
-#endif
+ page_set_owner(&((struct page_info *)sp)[j], NULL);
d->arch.paging.shadow.free_pages -= 1 << order;
d->arch.paging.shadow.total_pages -= 1 << order;
free_domheap_pages((struct page_info *)sp, order);
static void sh_hash_audit_bucket(struct domain *d, int bucket)
/* Audit one bucket of the hash table */
{
- struct shadow_page_info *sp, *x;
+ struct page_info *sp, *x;
if ( !(SHADOW_AUDIT_ENABLE) )
return;
while ( sp )
{
/* Not a shadow? */
- BUG_ON( sp->mbz != 0 );
+ BUG_ON( (sp->count_info & PGC_count_mask )!= 0 ) ;
/* Bogus type? */
- BUG_ON( sp->type == 0 );
- BUG_ON( sp->type > SH_type_max_shadow );
+ BUG_ON( sp->u.sh.type == 0 );
+ BUG_ON( sp->u.sh.type > SH_type_max_shadow );
/* Wrong bucket? */
- BUG_ON( sh_hash(sp->backpointer, sp->type) != bucket );
+ BUG_ON( sh_hash(sp->v.sh.back, sp->u.sh.type) != bucket );
/* Duplicate entry? */
- for ( x = sp->next_shadow; x; x = x->next_shadow )
- BUG_ON( x->backpointer == sp->backpointer && x->type == sp->type );
+ for ( x = next_shadow(sp); x; x = next_shadow(x) )
+ BUG_ON( x->v.sh.back == sp->v.sh.back &&
+ x->u.sh.type == sp->u.sh.type );
/* Follow the backpointer to the guest pagetable */
- if ( sp->type != SH_type_fl1_32_shadow
- && sp->type != SH_type_fl1_pae_shadow
- && sp->type != SH_type_fl1_64_shadow )
+ if ( sp->u.sh.type != SH_type_fl1_32_shadow
+ && sp->u.sh.type != SH_type_fl1_pae_shadow
+ && sp->u.sh.type != SH_type_fl1_64_shadow )
{
- struct page_info *gpg = mfn_to_page(_mfn(sp->backpointer));
+ struct page_info *gpg = mfn_to_page(_mfn(sp->v.sh.back));
/* Bad shadow flags on guest page? */
- BUG_ON( !(gpg->shadow_flags & (1<<sp->type)) );
+ BUG_ON( !(gpg->shadow_flags & (1<<sp->u.sh.type)) );
/* Bad type count on guest page? */
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
- if ( sp->type == SH_type_l1_32_shadow
- || sp->type == SH_type_l1_pae_shadow
- || sp->type == SH_type_l1_64_shadow )
+ if ( sp->u.sh.type == SH_type_l1_32_shadow
+ || sp->u.sh.type == SH_type_l1_pae_shadow
+ || sp->u.sh.type == SH_type_l1_64_shadow )
{
if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page
&& (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
{
if ( !page_is_out_of_sync(gpg) )
{
- SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
+ SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")"
" and not OOS but has typecount %#lx\n",
- sp->backpointer,
- mfn_x(shadow_page_to_mfn(sp)),
+ sp->v.sh.back,
+ mfn_x(page_to_mfn(sp)),
gpg->u.inuse.type_info);
BUG();
}
if ( (gpg->u.inuse.type_info & PGT_type_mask) == PGT_writable_page
&& (gpg->u.inuse.type_info & PGT_count_mask) != 0 )
{
- SHADOW_ERROR("MFN %#lx shadowed (by %#"PRI_mfn")"
+ SHADOW_ERROR("MFN %#"PRpgmfn" shadowed (by %#"PRI_mfn")"
" but has typecount %#lx\n",
- sp->backpointer, mfn_x(shadow_page_to_mfn(sp)),
+ sp->v.sh.back, mfn_x(page_to_mfn(sp)),
gpg->u.inuse.type_info);
BUG();
}
}
/* That entry was OK; on we go */
- sp = sp->next_shadow;
+ sp = next_shadow(sp);
}
}
* Returns 0 for success, 1 for error. */
static int shadow_hash_alloc(struct domain *d)
{
- struct shadow_page_info **table;
+ struct page_info **table;
ASSERT(shadow_locked_by_me(d));
ASSERT(!d->arch.paging.shadow.hash_table);
- table = xmalloc_array(struct shadow_page_info *, SHADOW_HASH_BUCKETS);
+ table = xmalloc_array(struct page_info *, SHADOW_HASH_BUCKETS);
if ( !table ) return 1;
memset(table, 0,
- SHADOW_HASH_BUCKETS * sizeof (struct shadow_page_info *));
+ SHADOW_HASH_BUCKETS * sizeof (struct page_info *));
d->arch.paging.shadow.hash_table = table;
return 0;
}
* or INVALID_MFN if it doesn't exist */
{
struct domain *d = v->domain;
- struct shadow_page_info *sp, *prev;
+ struct page_info *sp, *prev;
key_t key;
ASSERT(shadow_locked_by_me(d));
prev = NULL;
while(sp)
{
- if ( sp->backpointer == n && sp->type == t )
+ if ( sp->v.sh.back == n && sp->u.sh.type == t )
{
/* Pull-to-front if 'sp' isn't already the head item */
if ( unlikely(sp != d->arch.paging.shadow.hash_table[key]) )
{
if ( unlikely(d->arch.paging.shadow.hash_walking != 0) )
/* Can't reorder: someone is walking the hash chains */
- return shadow_page_to_mfn(sp);
+ return page_to_mfn(sp);
else
{
ASSERT(prev);
/* Delete sp from the list */
prev->next_shadow = sp->next_shadow;
/* Re-insert it at the head of the list */
- sp->next_shadow = d->arch.paging.shadow.hash_table[key];
+ set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]);
d->arch.paging.shadow.hash_table[key] = sp;
}
}
{
perfc_incr(shadow_hash_lookup_head);
}
- return shadow_page_to_mfn(sp);
+ return page_to_mfn(sp);
}
prev = sp;
- sp = sp->next_shadow;
+ sp = next_shadow(sp);
}
perfc_incr(shadow_hash_lookup_miss);
/* Put a mapping (n,t)->smfn into the hash table */
{
struct domain *d = v->domain;
- struct shadow_page_info *sp;
+ struct page_info *sp;
key_t key;
ASSERT(shadow_locked_by_me(d));
sh_hash_audit_bucket(d, key);
/* Insert this shadow at the top of the bucket */
- sp = mfn_to_shadow_page(smfn);
- sp->next_shadow = d->arch.paging.shadow.hash_table[key];
+ sp = mfn_to_page(smfn);
+ set_next_shadow(sp, d->arch.paging.shadow.hash_table[key]);
d->arch.paging.shadow.hash_table[key] = sp;
sh_hash_audit_bucket(d, key);
/* Excise the mapping (n,t)->smfn from the hash table */
{
struct domain *d = v->domain;
- struct shadow_page_info *sp, *x;
+ struct page_info *sp, *x;
key_t key;
ASSERT(shadow_locked_by_me(d));
key = sh_hash(n, t);
sh_hash_audit_bucket(d, key);
- sp = mfn_to_shadow_page(smfn);
+ sp = mfn_to_page(smfn);
if ( d->arch.paging.shadow.hash_table[key] == sp )
/* Easy case: we're deleting the head item. */
- d->arch.paging.shadow.hash_table[key] = sp->next_shadow;
+ d->arch.paging.shadow.hash_table[key] = next_shadow(sp);
else
{
/* Need to search for the one we want */
{
ASSERT(x); /* We can't have hit the end, since our target is
* still in the chain somehwere... */
- if ( x->next_shadow == sp )
+ if ( next_shadow(x) == sp )
{
x->next_shadow = sp->next_shadow;
break;
}
- x = x->next_shadow;
+ x = next_shadow(x);
}
}
- sp->next_shadow = NULL;
+ set_next_shadow(sp, NULL);
sh_hash_audit_bucket(d, key);
}
{
int i, done = 0;
struct domain *d = v->domain;
- struct shadow_page_info *x;
+ struct page_info *x;
/* Say we're here, to stop hash-lookups reordering the chains */
ASSERT(shadow_locked_by_me(d));
/* WARNING: This is not safe against changes to the hash table.
* The callback *must* return non-zero if it has inserted or
* deleted anything from the hash (lookups are OK, though). */
- for ( x = d->arch.paging.shadow.hash_table[i]; x; x = x->next_shadow )
+ for ( x = d->arch.paging.shadow.hash_table[i]; x; x = next_shadow(x) )
{
- if ( callback_mask & (1 << x->type) )
+ if ( callback_mask & (1 << x->u.sh.type) )
{
- ASSERT(x->type <= 15);
- ASSERT(callbacks[x->type] != NULL);
- done = callbacks[x->type](v, shadow_page_to_mfn(x),
- callback_mfn);
+ ASSERT(x->u.sh.type <= 15);
+ ASSERT(callbacks[x->u.sh.type] != NULL);
+ done = callbacks[x->u.sh.type](v, page_to_mfn(x),
+ callback_mfn);
if ( done ) break;
}
}
void sh_destroy_shadow(struct vcpu *v, mfn_t smfn)
{
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
- unsigned int t = sp->type;
+ struct page_info *sp = mfn_to_page(smfn);
+ unsigned int t = sp->u.sh.type;
SHADOW_PRINTK("smfn=%#lx\n", mfn_x(smfn));
t == SH_type_fl1_64_shadow ||
t == SH_type_monitor_table ||
(is_pv_32on64_vcpu(v) && t == SH_type_l4_64_shadow) ||
- (page_get_owner(mfn_to_page(_mfn(sp->backpointer)))
+ (page_get_owner(mfn_to_page(_mfn(sp->v.sh.back)))
== v->domain));
/* The down-shifts here are so that the switch statement is on nice
{
unsigned long old_count = (pg->u.inuse.type_info & PGT_count_mask);
mfn_t last_smfn = _mfn(v->arch.paging.shadow.last_writeable_pte_smfn);
- int shtype = mfn_to_shadow_page(last_smfn)->type;
+ int shtype = mfn_to_page(last_smfn)->u.sh.type;
if ( callbacks[shtype] )
callbacks[shtype](v, last_smfn, gmfn);
int sh_remove_write_access_from_sl1p(struct vcpu *v, mfn_t gmfn,
mfn_t smfn, unsigned long off)
{
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+ struct page_info *sp = mfn_to_page(smfn);
ASSERT(mfn_valid(smfn));
ASSERT(mfn_valid(gmfn));
- if ( sp->type == SH_type_l1_32_shadow
- || sp->type == SH_type_fl1_32_shadow )
+ if ( sp->u.sh.type == SH_type_l1_32_shadow
+ || sp->u.sh.type == SH_type_fl1_32_shadow )
{
return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,2)
(v, gmfn, smfn, off);
}
#if CONFIG_PAGING_LEVELS >= 3
- else if ( sp->type == SH_type_l1_pae_shadow
- || sp->type == SH_type_fl1_pae_shadow )
+ else if ( sp->u.sh.type == SH_type_l1_pae_shadow
+ || sp->u.sh.type == SH_type_fl1_pae_shadow )
return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,3)
(v, gmfn, smfn, off);
#if CONFIG_PAGING_LEVELS >= 4
- else if ( sp->type == SH_type_l1_64_shadow
- || sp->type == SH_type_fl1_64_shadow )
+ else if ( sp->u.sh.type == SH_type_l1_64_shadow
+ || sp->u.sh.type == SH_type_fl1_64_shadow )
return SHADOW_INTERNAL_NAME(sh_rm_write_access_from_sl1p,4)
(v, gmfn, smfn, off);
#endif
&& (page->u.inuse.type_info & PGT_count_mask) == 0) )
{
SHADOW_ERROR("can't find all mappings of mfn %lx: "
- "c=%08x t=%08lx\n", mfn_x(gmfn),
+ "c=%08lx t=%08lx\n", mfn_x(gmfn),
page->count_info, page->u.inuse.type_info);
}
}
/* Follow this shadow's up-pointer, if it has one, and remove the reference
* found there. Returns 1 if that was the only reference to this shadow */
{
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+ struct page_info *sp = mfn_to_page(smfn);
mfn_t pmfn;
void *vaddr;
int rc;
- ASSERT(sp->type > 0);
- ASSERT(sp->type < SH_type_max_shadow);
- ASSERT(sp->type != SH_type_l2_32_shadow);
- ASSERT(sp->type != SH_type_l2_pae_shadow);
- ASSERT(sp->type != SH_type_l2h_pae_shadow);
- ASSERT(sp->type != SH_type_l4_64_shadow);
+ ASSERT(sp->u.sh.type > 0);
+ ASSERT(sp->u.sh.type < SH_type_max_shadow);
+ ASSERT(sp->u.sh.type != SH_type_l2_32_shadow);
+ ASSERT(sp->u.sh.type != SH_type_l2_pae_shadow);
+ ASSERT(sp->u.sh.type != SH_type_l2h_pae_shadow);
+ ASSERT(sp->u.sh.type != SH_type_l4_64_shadow);
if (sp->up == 0) return 0;
pmfn = _mfn(sp->up >> PAGE_SHIFT);
ASSERT(l1e_get_pfn(*(l1_pgentry_t *)vaddr) == mfn_x(smfn));
/* Is this the only reference to this shadow? */
- rc = (sp->count == 1) ? 1 : 0;
+ rc = (sp->u.sh.count == 1) ? 1 : 0;
/* Blank the offending entry */
- switch (sp->type)
+ switch (sp->u.sh.type)
{
case SH_type_l1_32_shadow:
case SH_type_l2_32_shadow:
{
struct vcpu *v;
mfn_t mfn;
- struct list_head *entry, *n;
struct page_info *pg;
ASSERT(d->is_dying);
}
#endif /* (SHADOW_OPTIMIZATIONS & (SHOPT_VIRTUAL_TLB|SHOPT_OUT_OF_SYNC)) */
- list_for_each_safe(entry, n, &d->arch.paging.shadow.p2m_freelist)
- {
- list_del(entry);
- pg = list_entry(entry, struct page_info, list);
+ while ( (pg = page_list_remove_head(&d->arch.paging.shadow.p2m_freelist)) )
shadow_free_p2m_page(d, pg);
- }
if ( d->arch.paging.shadow.total_pages != 0 )
{
for ( i = 0; i < nr; i++ ) {
mfn_t mfn = gfn_to_mfn(d, begin_pfn + i, &t);
struct page_info *page;
- u32 count_info;
int dirty = 0;
paddr_t sl1ma = d->dirty_vram->sl1ma[i];
else
{
page = mfn_to_page(mfn);
- count_info = page->u.inuse.type_info & PGT_count_mask;
- switch (count_info)
+ switch (page->u.inuse.type_info & PGT_count_mask)
{
case 0:
/* No guest reference, nothing to track. */
#include <asm/hvm/hvm.h>
#include <asm/hvm/cacheattr.h>
#include <asm/mtrr.h>
+#include <asm/guest_pt.h>
#include "private.h"
#include "types.h"
put_page(mfn_to_page(gmfn));
}
-/**************************************************************************/
-/* CPU feature support querying */
-
-static inline int
-guest_supports_superpages(struct vcpu *v)
-{
- /* The _PAGE_PSE bit must be honoured in HVM guests, whenever
- * CR4.PSE is set or the guest is in PAE or long mode.
- * It's also used in the dummy PT for vcpus with CR4.PG cleared. */
- return (is_hvm_vcpu(v) &&
- (GUEST_PAGING_LEVELS != 2
- || !hvm_paging_enabled(v)
- || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));
-}
-
-static inline int
-guest_supports_nx(struct vcpu *v)
-{
- if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
- return 0;
- if ( !is_hvm_vcpu(v) )
- return cpu_has_nx;
- return hvm_nx_enabled(v);
-}
-
/**************************************************************************/
/* Functions for walking the guest page tables */
-/* Flags that are needed in a pagetable entry, with the sense of NX inverted */
-static uint32_t mandatory_flags(struct vcpu *v, uint32_t pfec)
-{
- static uint32_t flags[] = {
- /* I/F - Usr Wr */
- /* 0 0 0 0 */ _PAGE_PRESENT,
- /* 0 0 0 1 */ _PAGE_PRESENT|_PAGE_RW,
- /* 0 0 1 0 */ _PAGE_PRESENT|_PAGE_USER,
- /* 0 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
- /* 0 1 0 0 */ _PAGE_PRESENT,
- /* 0 1 0 1 */ _PAGE_PRESENT|_PAGE_RW,
- /* 0 1 1 0 */ _PAGE_PRESENT|_PAGE_USER,
- /* 0 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER,
- /* 1 0 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT,
- /* 1 0 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
- /* 1 0 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
- /* 1 0 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
- /* 1 1 0 0 */ _PAGE_PRESENT|_PAGE_NX_BIT,
- /* 1 1 0 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_NX_BIT,
- /* 1 1 1 0 */ _PAGE_PRESENT|_PAGE_USER|_PAGE_NX_BIT,
- /* 1 1 1 1 */ _PAGE_PRESENT|_PAGE_RW|_PAGE_USER|_PAGE_NX_BIT,
- };
-
- /* Don't demand not-NX if the CPU wouldn't enforce it. */
- if ( !guest_supports_nx(v) )
- pfec &= ~PFEC_insn_fetch;
-
- /* Don't demand R/W if the CPU wouldn't enforce it. */
- if ( is_hvm_vcpu(v) && unlikely(!hvm_wp_enabled(v))
- && !(pfec & PFEC_user_mode) )
- pfec &= ~PFEC_write_access;
-
- return flags[(pfec & 0x1f) >> 1];
-}
-
-/* Modify a guest pagetable entry to set the Accessed and Dirty bits.
- * Returns non-zero if it actually writes to guest memory. */
-static uint32_t set_ad_bits(void *guest_p, void *walk_p, int set_dirty)
+static inline uint32_t
+sh_walk_guest_tables(struct vcpu *v, unsigned long va, walk_t *gw,
+ uint32_t pfec)
{
- guest_intpte_t old, new;
- int ret = 0;
-
- old = *(guest_intpte_t *)walk_p;
- new = old | _PAGE_ACCESSED | (set_dirty ? _PAGE_DIRTY : 0);
- if ( old != new )
- {
- /* Write the new entry into the walk, and try to write it back
- * into the guest table as well. If the guest table has changed
- * under out feet then leave it alone. */
- *(guest_intpte_t *)walk_p = new;
- if( cmpxchg(((guest_intpte_t *)guest_p), old, new) == old )
- ret = 1;
-
- /* FIXME -- this code is longer than necessary */
- if(set_dirty)
- TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_AD);
- else
- TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_SET_A);
- }
- return ret;
+ return guest_walk_tables(v, va, gw, pfec,
+#if GUEST_PAGING_LEVELS == 3 /* PAE */
+ _mfn(INVALID_MFN),
+ v->arch.paging.shadow.gl3e
+#else /* 32 or 64 */
+ pagetable_get_mfn(v->arch.guest_table),
+ v->arch.paging.shadow.guest_vtable
+#endif
+ );
}
/* This validation is called with lock held, and after write permission
* Return 1 to indicate success and 0 for inconsistency
*/
static inline uint32_t
-shadow_check_gwalk(struct vcpu *v, unsigned long va, walk_t *gw)
+shadow_check_gwalk(struct vcpu *v, unsigned long va, walk_t *gw, int version)
{
struct domain *d = v->domain;
guest_l1e_t *l1p;
ASSERT(shadow_locked_by_me(d));
- if ( gw->version ==
- atomic_read(&d->arch.paging.shadow.gtable_dirty_version) )
- return 1;
+ if ( version == atomic_read(&d->arch.paging.shadow.gtable_dirty_version) )
+ return 1;
/* We may consider caching guest page mapping from last
* guest table walk. However considering this check happens
return rc;
}
-/* Walk the guest pagetables, after the manner of a hardware walker.
- *
- * Inputs: a vcpu, a virtual address, a walk_t to fill, a
- * pointer to a pagefault code
- *
- * We walk the vcpu's guest pagetables, filling the walk_t with what we
- * see and adding any Accessed and Dirty bits that are needed in the
- * guest entries. Using the pagefault code, we check the permissions as
- * we go. For the purposes of reading pagetables we treat all non-RAM
- * memory as contining zeroes.
- *
- * The walk is done in a lock-free style, with some sanity check postponed
- * after grabbing shadow lock later. Those delayed checks will make sure
- * no inconsistent mapping being translated into shadow page table.
- *
- * Returns 0 for success, or the set of permission bits that we failed on
- * if the walk did not complete.
- * N.B. This is different from the old return code but almost no callers
- * checked the old return code anyway.
- */
-static uint32_t
-guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw, uint32_t pfec)
-{
- struct domain *d = v->domain;
- p2m_type_t p2mt;
- guest_l1e_t *l1p = NULL;
- guest_l2e_t *l2p = NULL;
-#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
- guest_l3e_t *l3p = NULL;
- guest_l4e_t *l4p;
-#endif
- uint32_t gflags, mflags, rc = 0;
- int pse;
-
- perfc_incr(shadow_guest_walk);
- memset(gw, 0, sizeof(*gw));
- gw->va = va;
-
- gw->version = atomic_read(&d->arch.paging.shadow.gtable_dirty_version);
- rmb();
-
- /* Mandatory bits that must be set in every entry. We invert NX, to
- * calculate as if there were an "X" bit that allowed access.
- * We will accumulate, in rc, the set of flags that are missing. */
- mflags = mandatory_flags(v, pfec);
-
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
-#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
-
- /* Get the l4e from the top level table and check its flags*/
- gw->l4mfn = pagetable_get_mfn(v->arch.guest_table);
- l4p = ((guest_l4e_t *)v->arch.paging.shadow.guest_vtable);
- gw->l4e = l4p[guest_l4_table_offset(va)];
- gflags = guest_l4e_get_flags(gw->l4e) ^ _PAGE_NX_BIT;
- rc |= ((gflags & mflags) ^ mflags);
- if ( rc & _PAGE_PRESENT ) goto out;
-
- /* Map the l3 table */
- gw->l3mfn = gfn_to_mfn(d, guest_l4e_get_gfn(gw->l4e), &p2mt);
- if ( !p2m_is_ram(p2mt) )
- {
- rc |= _PAGE_PRESENT;
- goto out;
- }
- ASSERT(mfn_valid(gw->l3mfn));
-
- /* Get the l3e and check its flags*/
- l3p = sh_map_domain_page(gw->l3mfn);
- gw->l3e = l3p[guest_l3_table_offset(va)];
- gflags = guest_l3e_get_flags(gw->l3e) ^ _PAGE_NX_BIT;
- rc |= ((gflags & mflags) ^ mflags);
- if ( rc & _PAGE_PRESENT )
- goto out;
-
-#else /* PAE only... */
-
- /* Get l3e from the cache of the top level table and check its flag */
- gw->l3e = v->arch.paging.shadow.gl3e[guest_l3_table_offset(va)];
- if ( !(guest_l3e_get_flags(gw->l3e) & _PAGE_PRESENT) )
- {
- rc |= _PAGE_PRESENT;
- goto out;
- }
-
-#endif /* PAE or 64... */
-
- /* Map the l2 table */
- gw->l2mfn = gfn_to_mfn(d, guest_l3e_get_gfn(gw->l3e), &p2mt);
- if ( !p2m_is_ram(p2mt) )
- {
- rc |= _PAGE_PRESENT;
- goto out;
- }
- ASSERT(mfn_valid(gw->l2mfn));
-
- /* Get the l2e */
- l2p = sh_map_domain_page(gw->l2mfn);
- gw->l2e = l2p[guest_l2_table_offset(va)];
-
-#else /* 32-bit only... */
-
- /* Get l2e from the top level table */
- gw->l2mfn = pagetable_get_mfn(v->arch.guest_table);
- l2p = ((guest_l2e_t *)v->arch.paging.shadow.guest_vtable);
- gw->l2e = l2p[guest_l2_table_offset(va)];
-
-#endif /* All levels... */
-
- gflags = guest_l2e_get_flags(gw->l2e) ^ _PAGE_NX_BIT;
- rc |= ((gflags & mflags) ^ mflags);
- if ( rc & _PAGE_PRESENT )
- goto out;
-
- pse = (guest_supports_superpages(v) &&
- (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE));
-
- if ( pse )
- {
- /* Special case: this guest VA is in a PSE superpage, so there's
- * no guest l1e. We make one up so that the propagation code
- * can generate a shadow l1 table. Start with the gfn of the
- * first 4k-page of the superpage. */
- gfn_t start = guest_l2e_get_gfn(gw->l2e);
- /* Grant full access in the l1e, since all the guest entry's
- * access controls are enforced in the shadow l2e. */
- int flags = (_PAGE_PRESENT|_PAGE_USER|_PAGE_RW|
- _PAGE_ACCESSED|_PAGE_DIRTY);
- /* PSE level 2 entries use bit 12 for PAT; propagate it to bit 7
- * of the level 1. */
- if ( (guest_l2e_get_flags(gw->l2e) & _PAGE_PSE_PAT) )
- flags |= _PAGE_PAT;
- /* Copy the cache-control bits to the l1 as well, because we
- * can't represent PAT in the (non-PSE) shadow l2e. :(
- * This could cause problems if a guest ever maps an area of
- * memory with superpages using more than one caching mode. */
- flags |= guest_l2e_get_flags(gw->l2e) & (_PAGE_PWT|_PAGE_PCD);
- /* Increment the pfn by the right number of 4k pages.
- * The ~0x1 is to mask out the PAT bit mentioned above. */
- start = _gfn((gfn_x(start) & ~0x1) + guest_l1_table_offset(va));
- gw->l1e = guest_l1e_from_gfn(start, flags);
- gw->l1mfn = _mfn(INVALID_MFN);
- }
- else
- {
- /* Not a superpage: carry on and find the l1e. */
- gw->l1mfn = gfn_to_mfn(d, guest_l2e_get_gfn(gw->l2e), &p2mt);
- if ( !p2m_is_ram(p2mt) )
- {
- rc |= _PAGE_PRESENT;
- goto out;
- }
- ASSERT(mfn_valid(gw->l1mfn));
- l1p = sh_map_domain_page(gw->l1mfn);
- gw->l1e = l1p[guest_l1_table_offset(va)];
- gflags = guest_l1e_get_flags(gw->l1e) ^ _PAGE_NX_BIT;
- rc |= ((gflags & mflags) ^ mflags);
- }
-
- /* Go back and set accessed and dirty bits only if the walk was a
- * success. Although the PRMs say higher-level _PAGE_ACCESSED bits
- * get set whenever a lower-level PT is used, at least some hardware
- * walkers behave this way. */
- if ( rc == 0 )
- {
-#if GUEST_PAGING_LEVELS == 4 /* 64-bit only... */
- if ( set_ad_bits(l4p + guest_l4_table_offset(va), &gw->l4e, 0) )
- paging_mark_dirty(d, mfn_x(gw->l4mfn));
- if ( set_ad_bits(l3p + guest_l3_table_offset(va), &gw->l3e, 0) )
- paging_mark_dirty(d, mfn_x(gw->l3mfn));
-#endif
- if ( set_ad_bits(l2p + guest_l2_table_offset(va), &gw->l2e,
- (pse && (pfec & PFEC_write_access))) )
- paging_mark_dirty(d, mfn_x(gw->l2mfn));
- if ( !pse )
- {
- if ( set_ad_bits(l1p + guest_l1_table_offset(va), &gw->l1e,
- (pfec & PFEC_write_access)) )
- paging_mark_dirty(d, mfn_x(gw->l1mfn));
- }
- }
-
- out:
-#if GUEST_PAGING_LEVELS == 4
- if ( l3p ) sh_unmap_domain_page(l3p);
-#endif
-#if GUEST_PAGING_LEVELS >= 3
- if ( l2p ) sh_unmap_domain_page(l2p);
-#endif
- if ( l1p ) sh_unmap_domain_page(l1p);
-
- return rc;
-}
-
-/* Given a walk_t, translate the gw->va into the guest's notion of the
- * corresponding frame number. */
-static inline gfn_t
-guest_walk_to_gfn(walk_t *gw)
-{
- if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
- return _gfn(INVALID_GFN);
- return guest_l1e_get_gfn(gw->l1e);
-}
-
-/* Given a walk_t, translate the gw->va into the guest's notion of the
- * corresponding physical address. */
-static inline paddr_t
-guest_walk_to_gpa(walk_t *gw)
-{
- if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
- return 0;
- return guest_l1e_get_paddr(gw->l1e) + (gw->va & ~PAGE_MASK);
-}
-
-#if 0 /* Keep for debugging */
-/* Pretty-print the contents of a guest-walk */
-static inline void print_gw(walk_t *gw)
-{
- SHADOW_PRINTK("GUEST WALK TO %#lx:\n", gw->va);
-#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
-#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
- SHADOW_PRINTK(" l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn));
- SHADOW_PRINTK(" l4e=%" SH_PRI_gpte "\n", gw->l4e.l4);
- SHADOW_PRINTK(" l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn));
-#endif /* PAE or 64... */
- SHADOW_PRINTK(" l3e=%" SH_PRI_gpte "\n", gw->l3e.l3);
-#endif /* All levels... */
- SHADOW_PRINTK(" l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn));
- SHADOW_PRINTK(" l2e=%" SH_PRI_gpte "\n", gw->l2e.l2);
- SHADOW_PRINTK(" l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn));
- SHADOW_PRINTK(" l1e=%" SH_PRI_gpte "\n", gw->l1e.l1);
-}
-#endif /* 0 */
-
#if SHADOW_AUDIT & SHADOW_AUDIT_ENTRIES
/* Lightweight audit: pass all the shadows associated with this guest walk
* through the audit mechanisms */
// XXX -- this is expensive, but it's easy to cobble together...
// FIXME!
- if ( guest_walk_tables(v, addr, &gw, PFEC_page_present) == 0
+ if ( sh_walk_guest_tables(v, addr, &gw, PFEC_page_present) == 0
&& mfn_valid(gw.l1mfn) )
{
if ( gl1mfn )
// XXX -- this is expensive, but it's easy to cobble together...
// FIXME!
- (void) guest_walk_tables(v, addr, &gw, PFEC_page_present);
+ (void) sh_walk_guest_tables(v, addr, &gw, PFEC_page_present);
*(guest_l1e_t *)eff_l1e = gw.l1e;
}
#endif /* CONFIG == GUEST (== SHADOW) */
!is_xen_heap_mfn(mfn_x(target_mfn)) )
{
unsigned int type;
+
+ /* compute the PAT index for shadow page entry when VT-d is enabled
+ * and device assigned.
+ * 1) direct MMIO: compute the PAT index with gMTRR=UC and gPAT.
+ * 2) if enables snoop control, compute the PAT index as WB.
+ * 3) if disables snoop control, compute the PAT index with
+ * gMTRR and gPAT.
+ */
if ( hvm_get_mem_pinned_cacheattr(d, gfn_x(target_gfn), &type) )
sflags |= pat_type_2_pte_flags(type);
else if ( d->arch.hvm_domain.is_in_uc_mode )
sflags |= pat_type_2_pte_flags(PAT_TYPE_UNCACHABLE);
+ else if ( p2mt == p2m_mmio_direct )
+ sflags |= get_pat_flags(v,
+ gflags,
+ gfn_to_paddr(target_gfn),
+ ((paddr_t)mfn_x(target_mfn)) << PAGE_SHIFT,
+ MTRR_TYPE_UNCACHABLE);
+ else if ( iommu_snoop )
+ sflags |= pat_type_2_pte_flags(PAT_TYPE_WRBACK);
else
sflags |= get_pat_flags(v,
gflags,
gfn_to_paddr(target_gfn),
- ((paddr_t)mfn_x(target_mfn)) << PAGE_SHIFT);
+ ((paddr_t)mfn_x(target_mfn)) << PAGE_SHIFT,
+ NO_HARDCODE_MEM_TYPE);
}
// Set the A&D bits for higher level shadows.
domain_crash(v->domain);
return SHADOW_SET_ERROR;
}
-#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
- shadow_resync_all(v, 0);
-#endif
}
/* Write the new entry */
domain_crash(v->domain);
return SHADOW_SET_ERROR;
}
-#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
- shadow_resync_all(v, 0);
-#endif
}
/* Write the new entry */
}
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
{
- struct shadow_page_info *sp = mfn_to_shadow_page(sl1mfn);
- mfn_t gl1mfn = _mfn(sp->backpointer);
+ struct page_info *sp = mfn_to_page(sl1mfn);
+ mfn_t gl1mfn = _mfn(sp->v.sh.back);
/* If the shadow is a fl1 then the backpointer contains
the GFN instead of the GMFN, and it's definitely not
OOS. */
- if ( (sp->type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
+ if ( (sp->u.sh.type != SH_type_fl1_shadow) && mfn_valid(gl1mfn)
&& mfn_is_out_of_sync(gl1mfn) )
sh_resync(v, gl1mfn);
}
mfn_t sl1mfn,
struct domain *d)
{
- mfn_t mfn;
+ mfn_t mfn = shadow_l1e_get_mfn(new_sl1e);
+ int flags = shadow_l1e_get_flags(new_sl1e);
unsigned long gfn;
- if ( !d->dirty_vram ) return;
-
- mfn = shadow_l1e_get_mfn(new_sl1e);
-
- if ( !mfn_valid(mfn) ) return; /* m2p for mmio_direct may not exist */
+ if ( !d->dirty_vram /* tracking disabled? */
+ || !(flags & _PAGE_RW) /* read-only mapping? */
+ || !mfn_valid(mfn) ) /* mfn can be invalid in mmio_direct */
+ return;
gfn = mfn_to_gfn(d, mfn);
- if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
+ if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) )
+ {
unsigned long i = gfn - d->dirty_vram->begin_pfn;
struct page_info *page = mfn_to_page(mfn);
- u32 count_info = page->u.inuse.type_info & PGT_count_mask;
- if ( count_info == 1 )
+ if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
/* Initial guest reference, record it */
d->dirty_vram->sl1ma[i] = pfn_to_paddr(mfn_x(sl1mfn))
| ((unsigned long)sl1e & ~PAGE_MASK);
mfn_t sl1mfn,
struct domain *d)
{
- mfn_t mfn;
+ mfn_t mfn = shadow_l1e_get_mfn(old_sl1e);
+ int flags = shadow_l1e_get_flags(old_sl1e);
unsigned long gfn;
- if ( !d->dirty_vram ) return;
-
- mfn = shadow_l1e_get_mfn(old_sl1e);
-
- if ( !mfn_valid(mfn) ) return;
+ if ( !d->dirty_vram /* tracking disabled? */
+ || !(flags & _PAGE_RW) /* read-only mapping? */
+ || !mfn_valid(mfn) ) /* mfn can be invalid in mmio_direct */
+ return;
gfn = mfn_to_gfn(d, mfn);
- if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) ) {
+ if ( (gfn >= d->dirty_vram->begin_pfn) && (gfn < d->dirty_vram->end_pfn) )
+ {
unsigned long i = gfn - d->dirty_vram->begin_pfn;
struct page_info *page = mfn_to_page(mfn);
- u32 count_info = page->u.inuse.type_info & PGT_count_mask;
int dirty = 0;
paddr_t sl1ma = pfn_to_paddr(mfn_x(sl1mfn))
| ((unsigned long)sl1e & ~PAGE_MASK);
- if ( count_info == 1 ) {
+ if ( (page->u.inuse.type_info & PGT_count_mask) == 1 )
+ {
/* Last reference */
if ( d->dirty_vram->sl1ma[i] == INVALID_PADDR ) {
/* We didn't know it was that one, let's say it is dirty */
dirty = 1;
- } else {
+ }
+ else
+ {
ASSERT(d->dirty_vram->sl1ma[i] == sl1ma);
d->dirty_vram->sl1ma[i] = INVALID_PADDR;
- if ( shadow_l1e_get_flags(old_sl1e) & _PAGE_DIRTY )
+ if ( flags & _PAGE_DIRTY )
dirty = 1;
}
- } else {
+ }
+ else
+ {
/* We had more than one reference, just consider the page dirty. */
dirty = 1;
/* Check that it's not the one we recorded. */
- if ( d->dirty_vram->sl1ma[i] == sl1ma ) {
+ if ( d->dirty_vram->sl1ma[i] == sl1ma )
+ {
/* Too bad, we remembered the wrong one... */
d->dirty_vram->sl1ma[i] = INVALID_PADDR;
- } else {
+ }
+ else
+ {
/* Ok, our recorded sl1e is still pointing to this page, let's
* just hope it will remain. */
}
}
- if ( dirty ) {
+ if ( dirty )
+ {
d->dirty_vram->dirty_bitmap[i / 8] |= 1 << (i % 8);
d->dirty_vram->last_dirty = NOW();
}
do { \
int _i; \
shadow_l1e_t *_sp = sh_map_domain_page((_sl1mfn)); \
- ASSERT(mfn_to_shadow_page(_sl1mfn)->type == SH_type_l1_shadow \
- || mfn_to_shadow_page(_sl1mfn)->type == SH_type_fl1_shadow); \
+ ASSERT(mfn_to_page(_sl1mfn)->u.sh.type == SH_type_l1_shadow \
+ || mfn_to_page(_sl1mfn)->u.sh.type == SH_type_fl1_shadow);\
for ( _i = 0; _i < SHADOW_L1_PAGETABLE_ENTRIES; _i++ ) \
{ \
(_sl1e) = _sp + _i; \
do { \
int _i, _j, __done = 0; \
int _xen = !shadow_mode_external(_dom); \
- ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_32_shadow); \
+ ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_32_shadow);\
for ( _j = 0; _j < 4 && !__done; _j++ ) \
{ \
shadow_l2e_t *_sp = sh_map_domain_page(_sl2mfn); \
int _i; \
int _xen = !shadow_mode_external(_dom); \
shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn)); \
- ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_pae_shadow \
- || mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_pae_shadow);\
+ ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_pae_shadow \
+ || mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow);\
for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
if ( (!(_xen)) \
- || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_pae_shadow\
+ || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_pae_shadow\
|| ((_i + (3 * SHADOW_L2_PAGETABLE_ENTRIES)) \
< (HYPERVISOR_VIRT_START >> SHADOW_L2_PAGETABLE_SHIFT)) ) \
{ \
int _i; \
int _xen = !shadow_mode_external(_dom); \
shadow_l2e_t *_sp = sh_map_domain_page((_sl2mfn)); \
- ASSERT(mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2_64_shadow || \
- mfn_to_shadow_page(_sl2mfn)->type == SH_type_l2h_64_shadow); \
+ ASSERT(mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2_64_shadow ||\
+ mfn_to_page(_sl2mfn)->u.sh.type == SH_type_l2h_64_shadow);\
for ( _i = 0; _i < SHADOW_L2_PAGETABLE_ENTRIES; _i++ ) \
{ \
if ( (!(_xen)) \
|| !is_pv_32on64_domain(_dom) \
- || mfn_to_shadow_page(_sl2mfn)->type != SH_type_l2h_64_shadow \
+ || mfn_to_page(_sl2mfn)->u.sh.type != SH_type_l2h_64_shadow\
|| (_i < COMPAT_L2_PAGETABLE_FIRST_XEN_SLOT(_dom)) ) \
{ \
(_sl2e) = _sp + _i; \
do { \
int _i; \
shadow_l3e_t *_sp = sh_map_domain_page((_sl3mfn)); \
- ASSERT(mfn_to_shadow_page(_sl3mfn)->type == SH_type_l3_64_shadow); \
+ ASSERT(mfn_to_page(_sl3mfn)->u.sh.type == SH_type_l3_64_shadow);\
for ( _i = 0; _i < SHADOW_L3_PAGETABLE_ENTRIES; _i++ ) \
{ \
(_sl3e) = _sp + _i; \
shadow_l4e_t *_sp = sh_map_domain_page((_sl4mfn)); \
int _xen = !shadow_mode_external(_dom); \
int _i; \
- ASSERT(mfn_to_shadow_page(_sl4mfn)->type == SH_type_l4_64_shadow); \
+ ASSERT(mfn_to_page(_sl4mfn)->u.sh.type == SH_type_l4_64_shadow);\
for ( _i = 0; _i < SHADOW_L4_PAGETABLE_ENTRIES; _i++ ) \
{ \
if ( (!(_xen)) || is_guest_l4_slot(_dom, _i) ) \
&& shadow_type != SH_type_l2h_pae_shadow
&& shadow_type != SH_type_l4_64_shadow )
/* Lower-level shadow, not yet linked form a higher level */
- mfn_to_shadow_page(smfn)->up = 0;
+ mfn_to_page(smfn)->up = 0;
#if GUEST_PAGING_LEVELS == 4
#if (SHADOW_OPTIMIZATIONS & SHOPT_LINUX_L3_TOPLEVEL)
* of them, decide that this isn't an old linux guest, and stop
* pinning l3es. This is not very quick but it doesn't happen
* very often. */
- struct list_head *l, *t;
- struct shadow_page_info *sp;
+ struct page_info *sp, *t;
struct vcpu *v2;
int l4count = 0, vcpus = 0;
- list_for_each(l, &v->domain->arch.paging.shadow.pinned_shadows)
+ page_list_for_each(sp, &v->domain->arch.paging.shadow.pinned_shadows)
{
- sp = list_entry(l, struct shadow_page_info, list);
- if ( sp->type == SH_type_l4_64_shadow )
+ if ( sp->u.sh.type == SH_type_l4_64_shadow )
l4count++;
}
for_each_vcpu ( v->domain, v2 )
if ( l4count > 2 * vcpus )
{
/* Unpin all the pinned l3 tables, and don't pin any more. */
- list_for_each_safe(l, t, &v->domain->arch.paging.shadow.pinned_shadows)
+ page_list_for_each_safe(sp, t, &v->domain->arch.paging.shadow.pinned_shadows)
{
- sp = list_entry(l, struct shadow_page_info, list);
- if ( sp->type == SH_type_l3_64_shadow )
- sh_unpin(v, shadow_page_to_mfn(sp));
+ if ( sp->u.sh.type == SH_type_l3_64_shadow )
+ sh_unpin(v, page_to_mfn(sp));
}
v->domain->arch.paging.shadow.opt_flags &= ~SHOPT_LINUX_L3_TOPLEVEL;
}
static shadow_l3e_t * shadow_get_and_create_l3e(struct vcpu *v,
walk_t *gw,
mfn_t *sl3mfn,
- fetch_type_t ft)
+ fetch_type_t ft,
+ int *resync)
{
mfn_t sl4mfn;
shadow_l4e_t *sl4e;
ASSERT((r & SHADOW_SET_FLUSH) == 0);
if ( r & SHADOW_SET_ERROR )
return NULL;
+
+#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+ *resync |= 1;
+#endif
+
}
/* Now follow it down a level. Guaranteed to succeed. */
return sh_linear_l3_table(v) + shadow_l3_linear_offset(gw->va);
static shadow_l2e_t * shadow_get_and_create_l2e(struct vcpu *v,
walk_t *gw,
mfn_t *sl2mfn,
- fetch_type_t ft)
+ fetch_type_t ft,
+ int *resync)
{
#if GUEST_PAGING_LEVELS >= 4 /* 64bit... */
mfn_t sl3mfn = _mfn(INVALID_MFN);
shadow_l3e_t *sl3e;
if ( !mfn_valid(gw->l2mfn) ) return NULL; /* No guest page. */
/* Get the l3e */
- sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft);
+ sl3e = shadow_get_and_create_l3e(v, gw, &sl3mfn, ft, resync);
if ( sl3e == NULL ) return NULL;
if ( shadow_l3e_get_flags(*sl3e) & _PAGE_PRESENT )
{
ASSERT((r & SHADOW_SET_FLUSH) == 0);
if ( r & SHADOW_SET_ERROR )
return NULL;
+
+#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+ *resync |= 1;
+#endif
+
}
/* Now follow it down a level. Guaranteed to succeed. */
return sh_linear_l2_table(v) + shadow_l2_linear_offset(gw->va);
fetch_type_t ft)
{
mfn_t sl2mfn;
+ int resync = 0;
shadow_l2e_t *sl2e;
/* Get the l2e */
- sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft);
+ sl2e = shadow_get_and_create_l2e(v, gw, &sl2mfn, ft, &resync);
if ( sl2e == NULL ) return NULL;
+
/* Install the sl1 in the l2e if it wasn't there or if we need to
* re-do it to fix a PSE dirty bit. */
if ( shadow_l2e_get_flags(*sl2e) & _PAGE_PRESENT
ASSERT((r & SHADOW_SET_FLUSH) == 0);
if ( r & SHADOW_SET_ERROR )
return NULL;
+
/* This next line is important: in 32-on-PAE and 32-on-64 modes,
* the guest l1 table has an 8k shadow, and we need to return
* the right mfn of the pair. This call will set it for us as a
* compiled out.) */
(void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
}
+
+#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+ /* All pages walked are now pagetables. Safe to resync pages
+ in case level 4 or 3 shadows were set. */
+ if ( resync )
+ shadow_resync_all(v, 0);
+#endif
+
/* Now follow it down a level. Guaranteed to succeed. */
return sh_linear_l1_table(v) + shadow_l1_linear_offset(gw->va);
}
void sh_destroy_l4_shadow(struct vcpu *v, mfn_t smfn)
{
shadow_l4e_t *sl4e;
- u32 t = mfn_to_shadow_page(smfn)->type;
+ u32 t = mfn_to_page(smfn)->u.sh.type;
mfn_t gmfn, sl4mfn;
SHADOW_DEBUG(DESTROY_SHADOW,
ASSERT(t == SH_type_l4_shadow);
/* Record that the guest page isn't shadowed any more (in this type) */
- gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+ gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
delete_shadow_status(v, gmfn, t, smfn);
shadow_demote(v, gmfn, t);
/* Decrement refcounts of all the old entries */
void sh_destroy_l3_shadow(struct vcpu *v, mfn_t smfn)
{
shadow_l3e_t *sl3e;
- u32 t = mfn_to_shadow_page(smfn)->type;
+ u32 t = mfn_to_page(smfn)->u.sh.type;
mfn_t gmfn, sl3mfn;
SHADOW_DEBUG(DESTROY_SHADOW,
ASSERT(t == SH_type_l3_shadow);
/* Record that the guest page isn't shadowed any more (in this type) */
- gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+ gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
delete_shadow_status(v, gmfn, t, smfn);
shadow_demote(v, gmfn, t);
void sh_destroy_l2_shadow(struct vcpu *v, mfn_t smfn)
{
shadow_l2e_t *sl2e;
- u32 t = mfn_to_shadow_page(smfn)->type;
+ u32 t = mfn_to_page(smfn)->u.sh.type;
mfn_t gmfn, sl2mfn;
SHADOW_DEBUG(DESTROY_SHADOW,
#endif
/* Record that the guest page isn't shadowed any more (in this type) */
- gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+ gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
delete_shadow_status(v, gmfn, t, smfn);
shadow_demote(v, gmfn, t);
{
struct domain *d = v->domain;
shadow_l1e_t *sl1e;
- u32 t = mfn_to_shadow_page(smfn)->type;
+ u32 t = mfn_to_page(smfn)->u.sh.type;
SHADOW_DEBUG(DESTROY_SHADOW,
"%s(%05lx)\n", __func__, mfn_x(smfn));
/* Record that the guest page isn't shadowed any more (in this type) */
if ( t == SH_type_fl1_shadow )
{
- gfn_t gfn = _gfn(mfn_to_shadow_page(smfn)->backpointer);
+ gfn_t gfn = _gfn(mfn_to_page(smfn)->v.sh.back);
delete_fl1_shadow_status(v, gfn, smfn);
}
else
{
- mfn_t gmfn = _mfn(mfn_to_shadow_page(smfn)->backpointer);
+ mfn_t gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
delete_shadow_status(v, gmfn, t, smfn);
shadow_demote(v, gmfn, t);
}
void sh_destroy_monitor_table(struct vcpu *v, mfn_t mmfn)
{
struct domain *d = v->domain;
- ASSERT(mfn_to_shadow_page(mmfn)->type == SH_type_monitor_table);
+ ASSERT(mfn_to_page(mmfn)->u.sh.type == SH_type_monitor_table);
#if (CONFIG_PAGING_LEVELS == 4) && (SHADOW_PAGING_LEVELS != 4)
{
if ( guest_l4e_get_flags(new_gl4e) & _PAGE_PRESENT )
{
gfn_t gl3gfn = guest_l4e_get_gfn(new_gl4e);
- mfn_t gl3mfn = gfn_to_mfn(d, gl3gfn, &p2mt);
+ mfn_t gl3mfn = gfn_to_mfn_query(d, gl3gfn, &p2mt);
if ( p2m_is_ram(p2mt) )
sl3mfn = get_shadow_status(v, gl3mfn, SH_type_l3_shadow);
- else
+ else if ( p2mt != p2m_populate_on_demand )
result |= SHADOW_SET_ERROR;
+
+#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+ if ( mfn_valid(sl3mfn) )
+ shadow_resync_all(v, 0);
+#endif
}
l4e_propagate_from_guest(v, new_gl4e, sl3mfn, &new_sl4e, ft_prefetch);
if ( guest_l3e_get_flags(new_gl3e) & _PAGE_PRESENT )
{
gfn_t gl2gfn = guest_l3e_get_gfn(new_gl3e);
- mfn_t gl2mfn = gfn_to_mfn(v->domain, gl2gfn, &p2mt);
+ mfn_t gl2mfn = gfn_to_mfn_query(v->domain, gl2gfn, &p2mt);
if ( p2m_is_ram(p2mt) )
sl2mfn = get_shadow_status(v, gl2mfn, SH_type_l2_shadow);
- else
+ else if ( p2mt != p2m_populate_on_demand )
result |= SHADOW_SET_ERROR;
+
+#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+ if ( mfn_valid(sl2mfn) )
+ shadow_resync_all(v, 0);
+#endif
}
l3e_propagate_from_guest(v, new_gl3e, sl2mfn, &new_sl3e, ft_prefetch);
result |= shadow_set_l3e(v, sl3p, new_sl3e, sl3mfn);
}
else
{
- mfn_t gl1mfn = gfn_to_mfn(v->domain, gl1gfn, &p2mt);
+ mfn_t gl1mfn = gfn_to_mfn_query(v->domain, gl1gfn, &p2mt);
if ( p2m_is_ram(p2mt) )
- sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
- else
+ sl1mfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
+ else if ( p2mt != p2m_populate_on_demand )
result |= SHADOW_SET_ERROR;
}
}
#if SHADOW_PAGING_LEVELS == 3
reserved_xen_slot =
- ((mfn_to_shadow_page(sl2mfn)->type == SH_type_l2h_pae_shadow) &&
+ ((mfn_to_page(sl2mfn)->u.sh.type == SH_type_l2h_pae_shadow) &&
(shadow_index
>= (L2_PAGETABLE_FIRST_XEN_SLOT & (L2_PAGETABLE_ENTRIES-1))));
#else /* SHADOW_PAGING_LEVELS == 2 */
perfc_incr(shadow_validate_gl1e_calls);
gfn = guest_l1e_get_gfn(new_gl1e);
- gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+ gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
l1e_propagate_from_guest(v, new_gl1e, gmfn, &new_sl1e, ft_prefetch, p2mt);
result |= shadow_set_l1e(v, sl1p, new_sl1e, sl1mfn);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
- gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+ gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
if ( mfn_valid(gl1mfn)
&& mfn_is_out_of_sync(gl1mfn) )
{
shadow_l1e_t nsl1e;
gfn = guest_l1e_get_gfn(gl1e);
- gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+ gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
l1e_propagate_from_guest(v, gl1e, gmfn, &nsl1e, ft_prefetch, p2mt);
rc |= shadow_set_l1e(v, sl1p, nsl1e, sl1mfn);
* called in the *mode* of the vcpu that unsynced it. Clear? Good. */
int sh_safe_not_to_sync(struct vcpu *v, mfn_t gl1mfn)
{
- struct shadow_page_info *sp;
+ struct page_info *sp;
mfn_t smfn;
smfn = get_shadow_status(v, gl1mfn, SH_type_l1_shadow);
ASSERT(mfn_valid(smfn)); /* Otherwise we would not have been called */
/* Up to l2 */
- sp = mfn_to_shadow_page(smfn);
- if ( sp->count != 1 || !sp->up )
+ sp = mfn_to_page(smfn);
+ if ( sp->u.sh.count != 1 || !sp->up )
return 0;
smfn = _mfn(sp->up >> PAGE_SHIFT);
ASSERT(mfn_valid(smfn));
#if (SHADOW_PAGING_LEVELS == 4)
/* up to l3 */
- sp = mfn_to_shadow_page(smfn);
- if ( sp->count != 1 || !sp->up )
+ sp = mfn_to_page(smfn);
+ if ( sp->u.sh.count != 1 || !sp->up )
return 0;
smfn = _mfn(sp->up >> PAGE_SHIFT);
ASSERT(mfn_valid(smfn));
/* up to l4 */
- sp = mfn_to_shadow_page(smfn);
- if ( sp->count != 1
+ sp = mfn_to_page(smfn);
+ if ( sp->u.sh.count != 1
|| sh_type_is_pinnable(v, SH_type_l3_64_shadow) || !sp->up )
return 0;
smfn = _mfn(sp->up >> PAGE_SHIFT);
ASSERT(mfn_valid(smfn));
+#endif
-#if (GUEST_PAGING_LEVELS == 2)
+#if (GUEST_PAGING_LEVELS == 2 && SHADOW_PAGING_LEVELS == 3)
/* In 2-on-3 shadow mode the up pointer contains the link to the
* shadow page, but the shadow_table contains only the first of the
* four pages that makes the PAE top shadow tables. */
smfn = _mfn(mfn_x(smfn) & ~0x3UL);
-#endif
-
#endif
if ( pagetable_get_pfn(v->arch.shadow_table[0]) == mfn_x(smfn)
/* Look at the gfn that the l1e is pointing at */
gfn = guest_l1e_get_gfn(gl1e);
- gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+ gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
/* Propagate the entry. */
l1e_propagate_from_guest(v, gl1e, gmfn, &sl1e, ft_prefetch, p2mt);
fetch_type_t ft = 0;
p2m_type_t p2mt;
uint32_t rc;
+ int version;
#if SHADOW_OPTIMIZATIONS & SHOPT_FAST_EMULATION
int fast_emul = 0;
#endif
writes to an out of sync page. */
if ( mfn_valid(gmfn) && mfn_is_out_of_sync(gmfn) )
{
+ fast_emul = 0;
v->arch.paging.last_write_emul_ok = 0;
goto page_fault_slow_path;
}
+ shadow_l2_linear_offset(va)),
sizeof(sl2e)) != 0)
|| !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT)
- || !mfn_valid(gl1mfn = _mfn(mfn_to_shadow_page(
- shadow_l2e_get_mfn(sl2e))->backpointer))
+ || !mfn_valid(gl1mfn = _mfn(mfn_to_page(
+ shadow_l2e_get_mfn(sl2e))->v.sh.back))
|| unlikely(mfn_is_out_of_sync(gl1mfn)) )
{
/* Hit the slow path as if there had been no
}
rewalk:
- rc = guest_walk_tables(v, va, &gw, regs->error_code);
+
+ /* The walk is done in a lock-free style, with some sanity check
+ * postponed after grabbing shadow lock later. Those delayed checks
+ * will make sure no inconsistent mapping being translated into
+ * shadow page table. */
+ version = atomic_read(&d->arch.paging.shadow.gtable_dirty_version);
+ rmb();
+ rc = sh_walk_guest_tables(v, va, &gw, regs->error_code);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
regs->error_code &= ~PFEC_page_present;
/* What mfn is the guest trying to access? */
gfn = guest_l1e_get_gfn(gw.l1e);
- gmfn = gfn_to_mfn(d, gfn, &p2mt);
+ gmfn = gfn_to_mfn_guest(d, gfn, &p2mt);
if ( shadow_mode_refcounts(d) &&
(!p2m_is_valid(p2mt) || (!p2m_is_mmio(p2mt) && !mfn_valid(gmfn))) )
shadow_lock(d);
TRACE_CLEAR_PATH_FLAGS;
+
+ /* Make sure there is enough free shadow memory to build a chain of
+ * shadow tables. (We never allocate a top-level shadow on this path,
+ * only a 32b l1, pae l1, or 64b l3+2+1. Note that while
+ * SH_type_l1_shadow isn't correct in the latter case, all page
+ * tables are the same size there.)
+ *
+ * Preallocate shadow pages *before* removing writable accesses
+ * otherwhise an OOS L1 might be demoted and promoted again with
+ * writable mappings. */
+ shadow_prealloc(d,
+ SH_type_l1_shadow,
+ GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1);
rc = gw_remove_write_accesses(v, va, &gw);
}
#endif /* OOS */
- if ( !shadow_check_gwalk(v, va, &gw) )
+ if ( !shadow_check_gwalk(v, va, &gw, version) )
{
perfc_incr(shadow_inconsistent_gwalk);
shadow_unlock(d);
shadow_audit_tables(v);
sh_audit_gw(v, &gw);
- /* Make sure there is enough free shadow memory to build a chain of
- * shadow tables. (We never allocate a top-level shadow on this path,
- * only a 32b l1, pae l1, or 64b l3+2+1. Note that while
- * SH_type_l1_shadow isn't correct in the latter case, all page
- * tables are the same size there.) */
- shadow_prealloc(d,
- SH_type_l1_shadow,
- GUEST_PAGING_LEVELS < 4 ? 1 : GUEST_PAGING_LEVELS - 1);
-
/* Acquire the shadow. This must happen before we figure out the rights
* for the shadow entry, since we might promote a page here. */
ptr_sl1e = shadow_get_and_create_l1e(v, &gw, &sl1mfn, ft);
goto mmio;
}
- /* Log attempts to write to read-only memory */
+ /* Ignore attempts to write to read-only memory. */
if ( (p2mt == p2m_ram_ro) && (ft == ft_demand_write) )
{
- static unsigned long lastpage = 0;
+ static unsigned long lastpage;
if ( xchg(&lastpage, va & PAGE_MASK) != (va & PAGE_MASK) )
gdprintk(XENLOG_DEBUG, "guest attempted write to read-only memory"
" page. va page=%#lx, mfn=%#lx\n",
// easier than invalidating all of the individual 4K pages).
//
sl1mfn = shadow_l2e_get_mfn(sl2e);
- if ( mfn_to_shadow_page(sl1mfn)->type
+ if ( mfn_to_page(sl1mfn)->u.sh.type
== SH_type_fl1_shadow )
{
flush_tlb_local();
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Check to see if the SL1 is out of sync. */
{
- mfn_t gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+ mfn_t gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
struct page_info *pg = mfn_to_page(gl1mfn);
if ( mfn_valid(gl1mfn)
&& page_is_out_of_sync(pg) )
}
sl1mfn = shadow_l2e_get_mfn(sl2e);
- gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+ gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
pg = mfn_to_page(gl1mfn);
if ( likely(sh_mfn_is_a_page_table(gl1mfn)
return vtlb_gfn;
#endif /* (SHADOW_OPTIMIZATIONS & SHOPT_VIRTUAL_TLB) */
- if ( guest_walk_tables(v, va, &gw, pfec[0]) != 0 )
+ if ( sh_walk_guest_tables(v, va, &gw, pfec[0]) != 0 )
{
if ( !(guest_l1e_get_flags(gw.l1e) & _PAGE_PRESENT) )
pfec[0] &= ~PFEC_page_present;
/* Need to repin the old toplevel shadow if it's been unpinned
* by shadow_prealloc(): in PV mode we're still running on this
* shadow and it's not safe to free it yet. */
- if ( !mfn_to_shadow_page(old_smfn)->pinned && !sh_pin(v, old_smfn) )
+ if ( !mfn_to_page(old_smfn)->u.sh.pinned && !sh_pin(v, old_smfn) )
{
SHADOW_ERROR("can't re-pin %#lx\n", mfn_x(old_smfn));
domain_crash(v->domain);
if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
{
gl2gfn = guest_l3e_get_gfn(gl3e[i]);
- gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+ gl2mfn = gfn_to_mfn_query(d, gl2gfn, &p2mt);
if ( p2m_is_ram(p2mt) )
flush |= sh_remove_write_access(v, gl2mfn, 2, 0);
}
if ( guest_l3e_get_flags(gl3e[i]) & _PAGE_PRESENT )
{
gl2gfn = guest_l3e_get_gfn(gl3e[i]);
- gl2mfn = gfn_to_mfn(d, gl2gfn, &p2mt);
+ gl2mfn = gfn_to_mfn_query(d, gl2gfn, &p2mt);
if ( p2m_is_ram(p2mt) )
sh_set_toplevel_shadow(v, i, gl2mfn, (i == 3)
? SH_type_l2h_shadow
{
int r;
shadow_l1e_t *sl1p, sl1e;
- struct shadow_page_info *sp;
+ struct page_info *sp;
ASSERT(mfn_valid(gmfn));
ASSERT(mfn_valid(smfn));
- sp = mfn_to_shadow_page(smfn);
+ sp = mfn_to_page(smfn);
- if ( sp->mbz != 0
- || (sp->type != SH_type_l1_shadow
- && sp->type != SH_type_fl1_shadow) )
+ if ( ((sp->count_info & PGC_count_mask) != 0)
+ || (sp->u.sh.type != SH_type_l1_shadow
+ && sp->u.sh.type != SH_type_fl1_shadow) )
goto fail;
sl1p = sh_map_domain_page(smfn);
void sh_clear_shadow_entry(struct vcpu *v, void *ep, mfn_t smfn)
/* Blank out a single shadow entry */
{
- switch ( mfn_to_shadow_page(smfn)->type )
+ switch ( mfn_to_page(smfn)->u.sh.type )
{
case SH_type_l1_shadow:
(void) shadow_set_l1e(v, ep, shadow_l1e_empty(), smfn); break;
&& (mfn_x(shadow_l2e_get_mfn(*sl2e)) == mfn_x(sl1mfn)) )
{
(void) shadow_set_l2e(v, sl2e, shadow_l2e_empty(), sl2mfn);
- if ( mfn_to_shadow_page(sl1mfn)->type == 0 )
+ if ( mfn_to_page(sl1mfn)->u.sh.type == 0 )
/* This breaks us cleanly out of the FOREACH macro */
done = 1;
}
&& (mfn_x(shadow_l3e_get_mfn(*sl3e)) == mfn_x(sl2mfn)) )
{
(void) shadow_set_l3e(v, sl3e, shadow_l3e_empty(), sl3mfn);
- if ( mfn_to_shadow_page(sl2mfn)->type == 0 )
+ if ( mfn_to_page(sl2mfn)->u.sh.type == 0 )
/* This breaks us cleanly out of the FOREACH macro */
done = 1;
}
&& (mfn_x(shadow_l4e_get_mfn(*sl4e)) == mfn_x(sl3mfn)) )
{
(void) shadow_set_l4e(v, sl4e, shadow_l4e_empty(), sl4mfn);
- if ( mfn_to_shadow_page(sl3mfn)->type == 0 )
+ if ( mfn_to_page(sl3mfn)->u.sh.type == 0 )
/* This breaks us cleanly out of the FOREACH macro */
done = 1;
}
}
/* Translate the GFN to an MFN */
- mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
+ /* PoD: query only if shadow lock is held (to avoid deadlock) */
+ if ( shadow_locked_by_me(v->domain) )
+ mfn = gfn_to_mfn_query(v->domain, _gfn(gfn), &p2mt);
+ else
+ mfn = gfn_to_mfn(v->domain, _gfn(gfn), &p2mt);
+
if ( p2mt == p2m_ram_ro )
return _mfn(READONLY_GFN);
if ( !p2m_is_ram(p2mt) )
int done = 0;
/* Follow the backpointer */
- gl1mfn = _mfn(mfn_to_shadow_page(sl1mfn)->backpointer);
+ gl1mfn = _mfn(mfn_to_page(sl1mfn)->v.sh.back);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Out-of-sync l1 shadows can contain anything: just check the OOS hash */
{
gfn = guest_l1e_get_gfn(*gl1e);
mfn = shadow_l1e_get_mfn(*sl1e);
- gmfn = gfn_to_mfn(v->domain, gfn, &p2mt);
+ gmfn = gfn_to_mfn_query(v->domain, gfn, &p2mt);
if ( mfn_x(gmfn) != mfn_x(mfn) )
AUDIT_FAIL(1, "bad translation: gfn %" SH_PRI_gfn
" --> %" PRI_mfn " != mfn %" PRI_mfn,
int done = 0;
/* Follow the backpointer */
- gl2mfn = _mfn(mfn_to_shadow_page(sl2mfn)->backpointer);
+ gl2mfn = _mfn(mfn_to_page(sl2mfn)->v.sh.back);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Only L1's may be out of sync. */
mfn = shadow_l2e_get_mfn(*sl2e);
gmfn = (guest_l2e_get_flags(*gl2e) & _PAGE_PSE)
? get_fl1_shadow_status(v, gfn)
- : get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt),
+ : get_shadow_status(v, gfn_to_mfn_query(v->domain, gfn, &p2mt),
SH_type_l1_shadow);
if ( mfn_x(gmfn) != mfn_x(mfn) )
AUDIT_FAIL(2, "bad translation: gfn %" SH_PRI_gfn
" --> %" PRI_mfn " != mfn %" PRI_mfn,
gfn_x(gfn),
(guest_l2e_get_flags(*gl2e) & _PAGE_PSE) ? 0
- : mfn_x(gfn_to_mfn(v->domain, gfn, &p2mt)),
+ : mfn_x(gfn_to_mfn_query(v->domain, gfn, &p2mt)),
mfn_x(gmfn), mfn_x(mfn));
}
});
int done = 0;
/* Follow the backpointer */
- gl3mfn = _mfn(mfn_to_shadow_page(sl3mfn)->backpointer);
+ gl3mfn = _mfn(mfn_to_page(sl3mfn)->v.sh.back);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Only L1's may be out of sync. */
{
gfn = guest_l3e_get_gfn(*gl3e);
mfn = shadow_l3e_get_mfn(*sl3e);
- gmfn = get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt),
+ gmfn = get_shadow_status(v, gfn_to_mfn_query(v->domain, gfn, &p2mt),
((GUEST_PAGING_LEVELS == 3 ||
is_pv_32on64_vcpu(v))
&& !shadow_mode_external(v->domain)
int done = 0;
/* Follow the backpointer */
- gl4mfn = _mfn(mfn_to_shadow_page(sl4mfn)->backpointer);
+ gl4mfn = _mfn(mfn_to_page(sl4mfn)->v.sh.back);
#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* Only L1's may be out of sync. */
{
gfn = guest_l4e_get_gfn(*gl4e);
mfn = shadow_l4e_get_mfn(*sl4e);
- gmfn = get_shadow_status(v, gfn_to_mfn(v->domain, gfn, &p2mt),
+ gmfn = get_shadow_status(v, gfn_to_mfn_query(v->domain, gfn, &p2mt),
SH_type_l3_shadow);
if ( mfn_x(gmfn) != mfn_x(mfn) )
AUDIT_FAIL(4, "bad translation: gfn %" SH_PRI_gfn
#undef GUEST_LEVELS
#endif /* CONFIG_PAGING_LEVELS == 4 */
-/******************************************************************************
- * Page metadata for shadow pages.
- */
-
-struct shadow_page_info
-{
- union {
- /* When in use, guest page we're a shadow of */
- unsigned long backpointer;
- /* When free, order of the freelist we're on */
- unsigned int order;
- };
- union {
- /* When in use, next shadow in this hash chain */
- struct shadow_page_info *next_shadow;
- /* When free, TLB flush time when freed */
- u32 tlbflush_timestamp;
- };
- struct {
- unsigned int type:5; /* What kind of shadow is this? */
- unsigned int pinned:1; /* Is the shadow pinned? */
- unsigned int count:26; /* Reference count */
- u32 mbz; /* Must be zero: this is where the owner
- * field lives in a non-shadow page */
- } __attribute__((packed));
- union {
- /* For unused shadow pages, a list of pages of this order;
- * for pinnable shadows, if pinned, a list of other pinned shadows
- * (see sh_type_is_pinnable() below for the definition of
- * "pinnable" shadow types). */
- struct list_head list;
- /* For non-pinnable shadows, a higher entry that points at us */
- paddr_t up;
- };
-};
-
-/* The structure above *must* be no larger than a struct page_info
- * from mm.h, since we'll be using the same space in the frametable.
- * Also, the mbz field must line up with the owner field of normal
- * pages, so they look properly like anonymous/xen pages. */
-static inline void shadow_check_page_struct_offsets(void) {
- BUILD_BUG_ON(sizeof (struct shadow_page_info) > sizeof (struct page_info));
- BUILD_BUG_ON(offsetof(struct shadow_page_info, mbz) !=
- offsetof(struct page_info, u.inuse._domain));
-};
-
/* Shadow type codes */
#define SH_type_none (0U) /* on the shadow free list */
#define SH_type_min_shadow (1U)
* MFN/page-info handling
*/
-// Override mfn_to_page from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
+/* Override macros from asm/page.h to make them work with mfn_t */
#undef mfn_to_page
-#define mfn_to_page(_m) (frame_table + mfn_x(_m))
-#define mfn_to_shadow_page(_m) ((struct shadow_page_info *)mfn_to_page(_m))
-
-// Override page_to_mfn from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
-#undef page_to_mfn
-#define page_to_mfn(_pg) (_mfn((_pg) - frame_table))
-#define shadow_page_to_mfn(_spg) (page_to_mfn((struct page_info *)_spg))
-
-// Override mfn_valid from asm/page.h, which was #include'd above,
-// in order to make it work with our mfn type.
+#define mfn_to_page(_m) __mfn_to_page(mfn_x(_m))
#undef mfn_valid
-#define mfn_valid(_mfn) (mfn_x(_mfn) < max_page)
+#define mfn_valid(_mfn) __mfn_valid(mfn_x(_mfn))
+#undef page_to_mfn
+#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
/* Override pagetable_t <-> struct page_info conversions to work with mfn_t */
#undef pagetable_get_page
static inline int sh_get_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
{
u32 x, nx;
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+ struct page_info *sp = mfn_to_page(smfn);
ASSERT(mfn_valid(smfn));
- x = sp->count;
+ x = sp->u.sh.count;
nx = x + 1;
if ( unlikely(nx >= 1U<<26) )
{
- SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRtype_info " smfn=%lx\n",
- sp->backpointer, mfn_x(smfn));
+ SHADOW_PRINTK("shadow ref overflow, gmfn=%" PRpgmfn " smfn=%lx\n",
+ sp->v.sh.back, mfn_x(smfn));
return 0;
}
/* Guarded by the shadow lock, so no need for atomic update */
- sp->count = nx;
+ sp->u.sh.count = nx;
/* We remember the first shadow entry that points to each shadow. */
if ( entry_pa != 0
- && !sh_type_is_pinnable(v, sp->type)
+ && !sh_type_is_pinnable(v, sp->u.sh.type)
&& sp->up == 0 )
sp->up = entry_pa;
static inline void sh_put_ref(struct vcpu *v, mfn_t smfn, paddr_t entry_pa)
{
u32 x, nx;
- struct shadow_page_info *sp = mfn_to_shadow_page(smfn);
+ struct page_info *sp = mfn_to_page(smfn);
ASSERT(mfn_valid(smfn));
- ASSERT(sp->mbz == 0);
+ ASSERT(!(sp->count_info & PGC_count_mask));
/* If this is the entry in the up-pointer, remove it */
if ( entry_pa != 0
- && !sh_type_is_pinnable(v, sp->type)
+ && !sh_type_is_pinnable(v, sp->u.sh.type)
&& sp->up == entry_pa )
sp->up = 0;
- x = sp->count;
+ x = sp->u.sh.count;
nx = x - 1;
if ( unlikely(x == 0) )
{
SHADOW_ERROR("shadow ref underflow, smfn=%lx oc=%08x t=%#x\n",
- mfn_x(smfn), sp->count, sp->type);
+ mfn_x(smfn), sp->u.sh.count, sp->u.sh.type);
BUG();
}
/* Guarded by the shadow lock, so no need for atomic update */
- sp->count = nx;
+ sp->u.sh.count = nx;
if ( unlikely(nx == 0) )
sh_destroy_shadow(v, smfn);
* Returns 0 for failure, 1 for success. */
static inline int sh_pin(struct vcpu *v, mfn_t smfn)
{
- struct shadow_page_info *sp;
+ struct page_info *sp;
ASSERT(mfn_valid(smfn));
- sp = mfn_to_shadow_page(smfn);
- ASSERT(sh_type_is_pinnable(v, sp->type));
- if ( sp->pinned )
+ sp = mfn_to_page(smfn);
+ ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
+ if ( sp->u.sh.pinned )
{
/* Already pinned: take it out of the pinned-list so it can go
* at the front */
- list_del(&sp->list);
+ page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows);
}
else
{
/* Not pinned: pin it! */
if ( !sh_get_ref(v, smfn, 0) )
return 0;
- sp->pinned = 1;
+ sp->u.sh.pinned = 1;
}
/* Put it at the head of the list of pinned shadows */
- list_add(&sp->list, &v->domain->arch.paging.shadow.pinned_shadows);
+ page_list_add(sp, &v->domain->arch.paging.shadow.pinned_shadows);
return 1;
}
* of pinned shadows, and release the extra ref. */
static inline void sh_unpin(struct vcpu *v, mfn_t smfn)
{
- struct shadow_page_info *sp;
+ struct page_info *sp;
ASSERT(mfn_valid(smfn));
- sp = mfn_to_shadow_page(smfn);
- ASSERT(sh_type_is_pinnable(v, sp->type));
- if ( sp->pinned )
+ sp = mfn_to_page(smfn);
+ ASSERT(sh_type_is_pinnable(v, sp->u.sh.type));
+ if ( sp->u.sh.pinned )
{
- sp->pinned = 0;
- list_del(&sp->list);
+ sp->u.sh.pinned = 0;
+ page_list_del(sp, &v->domain->arch.paging.shadow.pinned_shadows);
sp->up = 0; /* in case this stops being a pinnable type in future */
sh_put_ref(v, smfn, 0);
}
})
#endif
+ /* Override gfn_to_mfn to work with gfn_t */
+#undef gfn_to_mfn_query
+#define gfn_to_mfn_query(d, g, t) _gfn_to_mfn_type((d), gfn_x(g), (t), p2m_query)
+#undef gfn_to_mfn_guest
+#define gfn_to_mfn_guest(d, g, t) _gfn_to_mfn_type((d), gfn_x(g), (t), p2m_guest)
-/* Type of the guest's frame numbers */
-TYPE_SAFE(unsigned long,gfn)
-#define SH_PRI_gfn "05lx"
-
-#define VALID_GFN(m) (m != INVALID_GFN)
-
-static inline int
-valid_gfn(gfn_t m)
-{
- return VALID_GFN(gfn_x(m));
-}
-
-static inline paddr_t
-gfn_to_paddr(gfn_t gfn)
-{
- return ((paddr_t)gfn_x(gfn)) << PAGE_SHIFT;
-}
-
-/* Override gfn_to_mfn to work with gfn_t */
-#undef gfn_to_mfn
-#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), gfn_x(g), (t))
+/* The shadow types needed for the various levels. */
#if GUEST_PAGING_LEVELS == 2
-
-#include "../page-guest32.h"
-
-#define GUEST_L1_PAGETABLE_ENTRIES 1024
-#define GUEST_L2_PAGETABLE_ENTRIES 1024
-#define GUEST_L1_PAGETABLE_SHIFT 12
-#define GUEST_L2_PAGETABLE_SHIFT 22
-
-/* Types of the guest's page tables */
-typedef l1_pgentry_32_t guest_l1e_t;
-typedef l2_pgentry_32_t guest_l2e_t;
-typedef intpte_32_t guest_intpte_t;
-
-/* Access functions for them */
-static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
-{ return l1e_get_paddr_32(gl1e); }
-static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e)
-{ return l2e_get_paddr_32(gl2e); }
-
-static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
-{ return _gfn(l1e_get_paddr_32(gl1e) >> PAGE_SHIFT); }
-static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
-{ return _gfn(l2e_get_paddr_32(gl2e) >> PAGE_SHIFT); }
-
-static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
-{ return l1e_get_flags_32(gl1e); }
-static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
-{ return l2e_get_flags_32(gl2e); }
-
-static inline guest_l1e_t guest_l1e_add_flags(guest_l1e_t gl1e, u32 flags)
-{ l1e_add_flags_32(gl1e, flags); return gl1e; }
-static inline guest_l2e_t guest_l2e_add_flags(guest_l2e_t gl2e, u32 flags)
-{ l2e_add_flags_32(gl2e, flags); return gl2e; }
-
-static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
-{ return l1e_from_pfn_32(gfn_x(gfn), flags); }
-static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
-{ return l2e_from_pfn_32(gfn_x(gfn), flags); }
-
-#define guest_l1_table_offset(a) l1_table_offset_32(a)
-#define guest_l2_table_offset(a) l2_table_offset_32(a)
-
-/* The shadow types needed for the various levels. */
#define SH_type_l1_shadow SH_type_l1_32_shadow
#define SH_type_l2_shadow SH_type_l2_32_shadow
#define SH_type_fl1_shadow SH_type_fl1_32_shadow
-
-#else /* GUEST_PAGING_LEVELS != 2 */
-
-#if GUEST_PAGING_LEVELS == 3
-#define GUEST_L1_PAGETABLE_ENTRIES 512
-#define GUEST_L2_PAGETABLE_ENTRIES 512
-#define GUEST_L3_PAGETABLE_ENTRIES 4
-#define GUEST_L1_PAGETABLE_SHIFT 12
-#define GUEST_L2_PAGETABLE_SHIFT 21
-#define GUEST_L3_PAGETABLE_SHIFT 30
-#else /* GUEST_PAGING_LEVELS == 4 */
-#define GUEST_L1_PAGETABLE_ENTRIES 512
-#define GUEST_L2_PAGETABLE_ENTRIES 512
-#define GUEST_L3_PAGETABLE_ENTRIES 512
-#define GUEST_L4_PAGETABLE_ENTRIES 512
-#define GUEST_L1_PAGETABLE_SHIFT 12
-#define GUEST_L2_PAGETABLE_SHIFT 21
-#define GUEST_L3_PAGETABLE_SHIFT 30
-#define GUEST_L4_PAGETABLE_SHIFT 39
-#endif
-
-/* Types of the guest's page tables */
-typedef l1_pgentry_t guest_l1e_t;
-typedef l2_pgentry_t guest_l2e_t;
-typedef l3_pgentry_t guest_l3e_t;
-#if GUEST_PAGING_LEVELS >= 4
-typedef l4_pgentry_t guest_l4e_t;
-#endif
-typedef intpte_t guest_intpte_t;
-
-/* Access functions for them */
-static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
-{ return l1e_get_paddr(gl1e); }
-static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e)
-{ return l2e_get_paddr(gl2e); }
-static inline paddr_t guest_l3e_get_paddr(guest_l3e_t gl3e)
-{ return l3e_get_paddr(gl3e); }
-#if GUEST_PAGING_LEVELS >= 4
-static inline paddr_t guest_l4e_get_paddr(guest_l4e_t gl4e)
-{ return l4e_get_paddr(gl4e); }
-#endif
-
-static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
-{ return _gfn(l1e_get_paddr(gl1e) >> PAGE_SHIFT); }
-static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
-{ return _gfn(l2e_get_paddr(gl2e) >> PAGE_SHIFT); }
-static inline gfn_t guest_l3e_get_gfn(guest_l3e_t gl3e)
-{ return _gfn(l3e_get_paddr(gl3e) >> PAGE_SHIFT); }
-#if GUEST_PAGING_LEVELS >= 4
-static inline gfn_t guest_l4e_get_gfn(guest_l4e_t gl4e)
-{ return _gfn(l4e_get_paddr(gl4e) >> PAGE_SHIFT); }
-#endif
-
-static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
-{ return l1e_get_flags(gl1e); }
-static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
-{ return l2e_get_flags(gl2e); }
-static inline u32 guest_l3e_get_flags(guest_l3e_t gl3e)
-{ return l3e_get_flags(gl3e); }
-#if GUEST_PAGING_LEVELS >= 4
-static inline u32 guest_l4e_get_flags(guest_l4e_t gl4e)
-{ return l4e_get_flags(gl4e); }
-#endif
-
-static inline guest_l1e_t guest_l1e_add_flags(guest_l1e_t gl1e, u32 flags)
-{ l1e_add_flags(gl1e, flags); return gl1e; }
-static inline guest_l2e_t guest_l2e_add_flags(guest_l2e_t gl2e, u32 flags)
-{ l2e_add_flags(gl2e, flags); return gl2e; }
-static inline guest_l3e_t guest_l3e_add_flags(guest_l3e_t gl3e, u32 flags)
-{ l3e_add_flags(gl3e, flags); return gl3e; }
-#if GUEST_PAGING_LEVELS >= 4
-static inline guest_l4e_t guest_l4e_add_flags(guest_l4e_t gl4e, u32 flags)
-{ l4e_add_flags(gl4e, flags); return gl4e; }
-#endif
-
-static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
-{ return l1e_from_pfn(gfn_x(gfn), flags); }
-static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
-{ return l2e_from_pfn(gfn_x(gfn), flags); }
-static inline guest_l3e_t guest_l3e_from_gfn(gfn_t gfn, u32 flags)
-{ return l3e_from_pfn(gfn_x(gfn), flags); }
-#if GUEST_PAGING_LEVELS >= 4
-static inline guest_l4e_t guest_l4e_from_gfn(gfn_t gfn, u32 flags)
-{ return l4e_from_pfn(gfn_x(gfn), flags); }
-#endif
-
-#define guest_l1_table_offset(a) l1_table_offset(a)
-#define guest_l2_table_offset(a) l2_table_offset(a)
-#define guest_l3_table_offset(a) l3_table_offset(a)
-#define guest_l4_table_offset(a) l4_table_offset(a)
-
-/* The shadow types needed for the various levels. */
-#if GUEST_PAGING_LEVELS == 3
+#elif GUEST_PAGING_LEVELS == 3
#define SH_type_l1_shadow SH_type_l1_pae_shadow
#define SH_type_fl1_shadow SH_type_fl1_pae_shadow
#define SH_type_l2_shadow SH_type_l2_pae_shadow
#define SH_type_l4_shadow SH_type_l4_64_shadow
#endif
-#endif /* GUEST_PAGING_LEVELS != 2 */
-
-
-/* Type used for recording a walk through guest pagetables. It is
- * filled in by the pagetable walk function, and also used as a cache
- * for later walks. When we encounter a suporpage l2e, we fabricate an
- * l1e for propagation to the shadow (for splintering guest superpages
- * into many shadow l1 entries). */
-typedef struct shadow_walk_t walk_t;
-struct shadow_walk_t
-{
- unsigned long va; /* Address we were looking for */
-#if GUEST_PAGING_LEVELS >= 3
-#if GUEST_PAGING_LEVELS >= 4
- guest_l4e_t l4e; /* Guest's level 4 entry */
-#endif
- guest_l3e_t l3e; /* Guest's level 3 entry */
-#endif
- guest_l2e_t l2e; /* Guest's level 2 entry */
- guest_l1e_t l1e; /* Guest's level 1 entry (or fabrication) */
-#if GUEST_PAGING_LEVELS >= 4
- mfn_t l4mfn; /* MFN that the level 4 entry was in */
- mfn_t l3mfn; /* MFN that the level 3 entry was in */
-#endif
- mfn_t l2mfn; /* MFN that the level 2 entry was in */
- mfn_t l1mfn; /* MFN that the level 1 entry was in */
- int version; /* Saved guest dirty version */
-};
-
/* macros for dealing with the naming of the internal function names of the
* shadow code's external entry points.
*/
#define MFN_FITS_IN_HVM_CR3(_MFN) !(mfn_x(_MFN) >> 20)
#endif
-#define SH_PRI_pte PRIpte
-
-#if GUEST_PAGING_LEVELS == 2
-#define SH_PRI_gpte "08x"
-#else /* GUEST_PAGING_LEVELS >= 3 */
-#ifndef __x86_64__
-#define SH_PRI_gpte "016llx"
-#else
-#define SH_PRI_gpte "016lx"
-#endif
-#endif /* GUEST_PAGING_LEVELS >= 3 */
+#define SH_PRI_pte PRIpte
+#define SH_PRI_gpte PRI_gpte
+#define SH_PRI_gfn PRI_gfn
#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
/* bitmap indicate which fixed map is free */
DEFINE_SPINLOCK(msix_fixmap_lock);
-DECLARE_BITMAP(msix_fixmap_pages, MAX_MSIX_PAGES);
+DECLARE_BITMAP(msix_fixmap_pages, FIX_MSIX_MAX_PAGES);
static int msix_fixmap_alloc(void)
{
- int i;
- int rc = -1;
+ int i, rc = -ENOMEM;
spin_lock(&msix_fixmap_lock);
- for ( i = 0; i < MAX_MSIX_PAGES; i++ )
+ for ( i = 0; i < FIX_MSIX_MAX_PAGES; i++ )
if ( !test_bit(i, &msix_fixmap_pages) )
break;
- if ( i == MAX_MSIX_PAGES )
+ if ( i == FIX_MSIX_MAX_PAGES )
goto out;
rc = FIX_MSIX_IO_RESERV_BASE + i;
set_bit(i, &msix_fixmap_pages);
static void msix_fixmap_free(int idx)
{
- if ( idx < FIX_MSIX_IO_RESERV_BASE )
- return;
-
spin_lock(&msix_fixmap_lock);
- clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
+ if ( idx >= FIX_MSIX_IO_RESERV_BASE )
+ clear_bit(idx - FIX_MSIX_IO_RESERV_BASE, &msix_fixmap_pages);
spin_unlock(&msix_fixmap_lock);
}
+static int msix_get_fixmap(struct pci_dev *dev, unsigned long table_paddr,
+ unsigned long entry_paddr)
+{
+ int nr_page, idx;
+
+ nr_page = (entry_paddr >> PAGE_SHIFT) - (table_paddr >> PAGE_SHIFT);
+
+ if ( nr_page < 0 || nr_page >= MAX_MSIX_TABLE_PAGES )
+ return -EINVAL;
+
+ spin_lock(&dev->msix_table_lock);
+ if ( dev->msix_table_refcnt[nr_page]++ == 0 )
+ {
+ idx = msix_fixmap_alloc();
+ if ( idx < 0 )
+ {
+ dev->msix_table_refcnt[nr_page]--;
+ goto out;
+ }
+ set_fixmap_nocache(idx, entry_paddr);
+ dev->msix_table_idx[nr_page] = idx;
+ }
+ else
+ idx = dev->msix_table_idx[nr_page];
+
+ out:
+ spin_unlock(&dev->msix_table_lock);
+ return idx;
+}
+
+static void msix_put_fixmap(struct pci_dev *dev, int idx)
+{
+ int i;
+ unsigned long start;
+
+ spin_lock(&dev->msix_table_lock);
+ for ( i = 0; i < MAX_MSIX_TABLE_PAGES; i++ )
+ {
+ if ( dev->msix_table_idx[i] == idx )
+ break;
+ }
+ if ( i == MAX_MSIX_TABLE_PAGES )
+ goto out;
+
+ if ( --dev->msix_table_refcnt[i] == 0 )
+ {
+ start = fix_to_virt(idx);
+ destroy_xen_mappings(start, start + PAGE_SIZE);
+ msix_fixmap_free(idx);
+ dev->msix_table_idx[i] = 0;
+ }
+
+ out:
+ spin_unlock(&dev->msix_table_lock);
+}
+
/*
* MSI message composition
*/
-static void msi_compose_msg(struct pci_dev *pdev, int vector,
+void msi_compose_msg(struct pci_dev *pdev, int vector,
struct msi_msg *msg)
{
unsigned dest;
msg->address_lo =
MSI_ADDR_BASE_LO |
((INT_DEST_MODE == 0) ?
- MSI_ADDR_DESTMODE_PHYS:
- MSI_ADDR_DESTMODE_LOGIC) |
+ MSI_ADDR_DESTMODE_PHYS:
+ MSI_ADDR_DESTMODE_LOGIC) |
((INT_DELIVERY_MODE != dest_LowestPrio) ?
- MSI_ADDR_REDIRECTION_CPU:
- MSI_ADDR_REDIRECTION_LOWPRI) |
+ MSI_ADDR_REDIRECTION_CPU:
+ MSI_ADDR_REDIRECTION_LOWPRI) |
MSI_ADDR_DEST_ID(dest);
msg->data =
MSI_DATA_TRIGGER_EDGE |
MSI_DATA_LEVEL_ASSERT |
((INT_DELIVERY_MODE != dest_LowestPrio) ?
- MSI_DATA_DELIVERY_FIXED:
- MSI_DATA_DELIVERY_LOWPRI) |
+ MSI_DATA_DELIVERY_FIXED:
+ MSI_DATA_DELIVERY_LOWPRI) |
MSI_DATA_VECTOR(vector);
}
}
case PCI_CAP_ID_MSIX:
{
void __iomem *base;
- base = entry->mask_base +
- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+ base = entry->mask_base;
msg->address_lo = readl(base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
msg->address_hi = readl(base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
static int unset_vector_msi(int vector)
{
+ ASSERT(spin_is_locked(&irq_desc[vector].lock));
+
if ( vector >= NR_VECTORS )
{
dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n",
}
irq_desc[vector].msi_desc = NULL;
+
return 0;
}
case PCI_CAP_ID_MSIX:
{
void __iomem *base;
- base = entry->mask_base +
- entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+ base = entry->mask_base;
writel(msg->address_lo,
- base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
+ base + PCI_MSIX_ENTRY_LOWER_ADDR_OFFSET);
writel(msg->address_hi,
- base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
+ base + PCI_MSIX_ENTRY_UPPER_ADDR_OFFSET);
writel(msg->data, base + PCI_MSIX_ENTRY_DATA_OFFSET);
break;
}
entry->msg = *msg;
}
-void set_msi_irq_affinity(unsigned int irq, cpumask_t mask)
+void set_msi_affinity(unsigned int vector, cpumask_t mask)
{
- struct msi_desc *desc = irq_desc[irq].msi_desc;
+ struct msi_desc *desc = irq_desc[vector].msi_desc;
struct msi_msg msg;
unsigned int dest;
dest = cpu_mask_to_apicid(mask);
if ( !desc )
- return;
+ return;
- ASSERT(spin_is_locked(&irq_desc[irq].lock));
- spin_lock(&desc->dev->lock);
+ ASSERT(spin_is_locked(&irq_desc[vector].lock));
read_msi_msg(desc, &msg);
msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
msg.address_lo |= MSI_ADDR_DEST_ID(dest);
write_msi_msg(desc, &msg);
- spin_unlock(&desc->dev->lock);
}
static void msi_set_enable(struct pci_dev *dev, int enable)
}
}
-static void msix_flush_writes(unsigned int irq)
+static void msix_flush_writes(unsigned int vector)
{
- struct msi_desc *entry = irq_desc[irq].msi_desc;
+ struct msi_desc *entry = irq_desc[vector].msi_desc;
BUG_ON(!entry || !entry->dev);
switch (entry->msi_attrib.type) {
break;
case PCI_CAP_ID_MSIX:
{
- int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+ int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
readl(entry->mask_base + offset);
break;
}
}
}
-static void msi_set_mask_bit(unsigned int irq, int flag)
+int msi_maskable_irq(const struct msi_desc *entry)
+{
+ BUG_ON(!entry);
+ return entry->msi_attrib.type != PCI_CAP_ID_MSI
+ || entry->msi_attrib.maskbit;
+}
+
+static void msi_set_mask_bit(unsigned int vector, int flag)
{
- struct msi_desc *entry = irq_desc[irq].msi_desc;
+ struct msi_desc *entry = irq_desc[vector].msi_desc;
- ASSERT(spin_is_locked(&irq_desc[irq].lock));
+ ASSERT(spin_is_locked(&irq_desc[vector].lock));
BUG_ON(!entry || !entry->dev);
switch (entry->msi_attrib.type) {
case PCI_CAP_ID_MSI:
mask_bits &= ~(1);
mask_bits |= flag;
pci_conf_write32(bus, slot, func, pos, mask_bits);
- } else {
- msi_set_enable(entry->dev, !flag);
}
break;
case PCI_CAP_ID_MSIX:
{
- int offset = entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
- PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
+ int offset = PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET;
writel(flag, entry->mask_base + offset);
readl(entry->mask_base + offset);
break;
entry->msi_attrib.masked = !!flag;
}
-void mask_msi_irq(unsigned int irq)
+void mask_msi_vector(unsigned int vector)
{
- msi_set_mask_bit(irq, 1);
- msix_flush_writes(irq);
+ msi_set_mask_bit(vector, 1);
+ msix_flush_writes(vector);
}
-void unmask_msi_irq(unsigned int irq)
+void unmask_msi_vector(unsigned int vector)
{
- msi_set_mask_bit(irq, 0);
- msix_flush_writes(irq);
+ msi_set_mask_bit(vector, 0);
+ msix_flush_writes(vector);
}
static struct msi_desc* alloc_msi_entry(void)
return entry;
}
-static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
+int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
{
struct msi_msg msg;
return 0;
}
-static void teardown_msi_vector(int vector)
+void teardown_msi_vector(int vector)
{
unset_vector_msi(vector);
}
-static void msi_free_vector(int vector)
+int msi_free_vector(struct msi_desc *entry)
{
- struct msi_desc *entry;
-
- ASSERT(spin_is_locked(&irq_desc[vector].lock));
- entry = irq_desc[vector].msi_desc;
- teardown_msi_vector(vector);
-
if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
{
unsigned long start;
- writel(1, entry->mask_base + entry->msi_attrib.entry_nr
- * PCI_MSIX_ENTRY_SIZE
- + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
+ writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
start = (unsigned long)entry->mask_base & ~(PAGE_SIZE - 1);
- msix_fixmap_free(virt_to_fix(start));
- destroy_xen_mappings(start, start + PAGE_SIZE);
+ msix_put_fixmap(entry->dev, virt_to_fix(start));
}
list_del(&entry->list);
xfree(entry);
+ return 0;
}
static struct msi_desc *find_msi_entry(struct pci_dev *dev,
* multiple messages. A return of zero indicates the successful setup
* of an entry zero with the new MSI irq or non-zero for otherwise.
**/
-static int msi_capability_init(struct pci_dev *dev, int vector)
+static int msi_capability_init(struct pci_dev *dev,
+ int vector,
+ struct msi_desc **desc)
{
struct msi_desc *entry;
- int pos, ret;
+ int pos;
u16 control;
u8 bus = dev->bus;
u8 slot = PCI_SLOT(dev->devfn);
u8 func = PCI_FUNC(dev->devfn);
+ ASSERT(spin_is_locked(&pcidevs_lock));
pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSI);
control = pci_conf_read16(bus, slot, func, msi_control_reg(pos));
/* MSI Entry Initialization */
entry->vector = vector;
if ( is_mask_bit_support(control) )
entry->mask_base = (void __iomem *)(long)msi_mask_bits_reg(pos,
- is_64bit_address(control));
+ is_64bit_address(control));
entry->dev = dev;
if ( entry->msi_attrib.maskbit )
{
unsigned int maskbits, temp;
/* All MSIs are unmasked by default, Mask them all */
maskbits = pci_conf_read32(bus, slot, func,
- msi_mask_bits_reg(pos, is_64bit_address(control)));
+ msi_mask_bits_reg(pos, is_64bit_address(control)));
temp = (1 << multi_msi_capable(control));
temp = ((temp - 1) & ~temp);
maskbits |= temp;
pci_conf_write32(bus, slot, func,
- msi_mask_bits_reg(pos, is_64bit_address(control)),
- maskbits);
+ msi_mask_bits_reg(pos, is_64bit_address(control)),
+ maskbits);
}
list_add_tail(&entry->list, &dev->msi_list);
- /* Configure MSI capability structure */
- ret = setup_msi_irq(dev, entry);
- if ( ret )
- {
- msi_free_vector(vector);
- return ret;
- }
-
+ *desc = entry;
/* Restore the original MSI enabled bits */
pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
* single MSI-X irq. A return of zero indicates the successful setup of
* requested MSI-X entries with allocated irqs or non-zero for otherwise.
**/
-static int msix_capability_init(struct pci_dev *dev, struct msi_info *msi)
+static int msix_capability_init(struct pci_dev *dev,
+ struct msi_info *msi,
+ struct msi_desc **desc)
{
struct msi_desc *entry;
int pos;
u16 control;
- unsigned long phys_addr;
- u32 table_offset;
+ unsigned long table_paddr, entry_paddr;
+ u32 table_offset, entry_offset;
u8 bir;
void __iomem *base;
int idx;
u8 slot = PCI_SLOT(dev->devfn);
u8 func = PCI_FUNC(dev->devfn);
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ ASSERT(desc);
+
pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */
table_offset = pci_conf_read32(bus, slot, func, msix_table_offset_reg(pos));
bir = (u8)(table_offset & PCI_MSIX_FLAGS_BIRMASK);
table_offset &= ~PCI_MSIX_FLAGS_BIRMASK;
- phys_addr = msi->table_base + table_offset;
- idx = msix_fixmap_alloc();
+ entry_offset = msi->entry_nr * PCI_MSIX_ENTRY_SIZE;
+
+ table_paddr = msi->table_base + table_offset;
+ entry_paddr = table_paddr + entry_offset;
+ idx = msix_get_fixmap(dev, table_paddr, entry_paddr);
if ( idx < 0 )
{
xfree(entry);
- return -ENOMEM;
+ return idx;
}
- set_fixmap_nocache(idx, phys_addr);
- base = (void *)(fix_to_virt(idx) + (phys_addr & ((1UL << PAGE_SHIFT) - 1)));
+ base = (void *)(fix_to_virt(idx) + (entry_paddr & ((1UL << PAGE_SHIFT) - 1)));
entry->msi_attrib.type = PCI_CAP_ID_MSIX;
entry->msi_attrib.is_64 = 1;
list_add_tail(&entry->list, &dev->msi_list);
- setup_msi_irq(dev, entry);
+ /* Mask interrupt here */
+ writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
- /* Set MSI-X enabled bits */
+ *desc = entry;
+ /* Restore MSI-X enabled bits */
pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
return 0;
* indicates the successful setup of an entry zero with the new MSI
* irq or non-zero for otherwise.
**/
-static int __pci_enable_msi(struct msi_info *msi)
+static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
{
int status;
struct pci_dev *pdev;
- pdev = pci_lock_pdev(msi->bus, msi->devfn);
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ pdev = pci_get_pdev(msi->bus, msi->devfn);
if ( !pdev )
- return -ENODEV;
+ return -ENODEV;
if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) )
{
- spin_unlock(&pdev->lock);
dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on "
- "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
- PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
+ "device %02x:%02x.%01x.\n", msi->vector, msi->bus,
+ PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
return 0;
}
- status = msi_capability_init(pdev, msi->vector);
- spin_unlock(&pdev->lock);
+ status = msi_capability_init(pdev, msi->vector, desc);
return status;
}
-static void __pci_disable_msi(int vector)
+static void __pci_disable_msi(struct msi_desc *entry)
{
- struct msi_desc *entry;
struct pci_dev *dev;
int pos;
u16 control;
u8 bus, slot, func;
- entry = irq_desc[vector].msi_desc;
- if ( !entry )
- return;
- /*
- * Lock here is safe. msi_desc can not be removed without holding
- * both irq_desc[].lock (which we do) and pdev->lock.
- */
- spin_lock(&entry->dev->lock);
dev = entry->dev;
bus = dev->bus;
slot = PCI_SLOT(dev->devfn);
BUG_ON(list_empty(&dev->msi_list));
- msi_free_vector(vector);
-
- pci_conf_write16(bus, slot, func, msi_control_reg(pos), control);
- spin_unlock(&dev->lock);
}
/**
* of irqs available. Driver should use the returned value to re-send
* its request.
**/
-static int __pci_enable_msix(struct msi_info *msi)
+static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc)
{
int status, pos, nr_entries;
struct pci_dev *pdev;
u8 slot = PCI_SLOT(msi->devfn);
u8 func = PCI_FUNC(msi->devfn);
- pdev = pci_lock_pdev(msi->bus, msi->devfn);
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ pdev = pci_get_pdev(msi->bus, msi->devfn);
if ( !pdev )
- return -ENODEV;
+ return -ENODEV;
pos = pci_find_cap_offset(msi->bus, slot, func, PCI_CAP_ID_MSIX);
control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos));
nr_entries = multi_msix_capable(control);
- if (msi->entry_nr > nr_entries)
- {
- spin_unlock(&pdev->lock);
+ if (msi->entry_nr >= nr_entries)
return -EINVAL;
- }
if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) )
{
- spin_unlock(&pdev->lock);
dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on "
"device %02x:%02x.%01x.\n", msi->vector, msi->bus,
PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn));
return 0;
}
- status = msix_capability_init(pdev, msi);
- spin_unlock(&pdev->lock);
+ status = msix_capability_init(pdev, msi, desc);
return status;
}
-static void __pci_disable_msix(int vector)
+static void __pci_disable_msix(struct msi_desc *entry)
{
- struct msi_desc *entry;
struct pci_dev *dev;
int pos;
u16 control;
u8 bus, slot, func;
- entry = irq_desc[vector].msi_desc;
- if ( !entry )
- return;
- /*
- * Lock here is safe. msi_desc can not be removed without holding
- * both irq_desc[].lock (which we do) and pdev->lock.
- */
- spin_lock(&entry->dev->lock);
dev = entry->dev;
bus = dev->bus;
slot = PCI_SLOT(dev->devfn);
pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
- msi_set_enable(dev, 0);
+ msix_set_enable(dev, 0);
BUG_ON(list_empty(&dev->msi_list));
- msi_free_vector(vector);
+ writel(1, entry->mask_base + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET);
pci_conf_write16(bus, slot, func, msix_control_reg(pos), control);
- spin_unlock(&dev->lock);
}
-int pci_enable_msi(struct msi_info *msi)
+/*
+ * Notice: only construct the msi_desc
+ * no change to irq_desc here, and the interrupt is masked
+ */
+int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc)
{
- ASSERT(spin_is_locked(&irq_desc[msi->vector].lock));
+ ASSERT(spin_is_locked(&pcidevs_lock));
- return msi->table_base ? __pci_enable_msix(msi) :
- __pci_enable_msi(msi);
+ return msi->table_base ? __pci_enable_msix(msi, desc) :
+ __pci_enable_msi(msi, desc);
}
-void pci_disable_msi(int vector)
+/*
+ * Device only, no irq_desc
+ */
+void pci_disable_msi(struct msi_desc *msi_desc)
{
- irq_desc_t *desc = &irq_desc[vector];
- ASSERT(spin_is_locked(&desc->lock));
- if ( !desc->msi_desc )
- return;
-
- if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
- __pci_disable_msi(vector);
- else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
- __pci_disable_msix(vector);
+ if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI )
+ __pci_disable_msi(msi_desc);
+ else if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX )
+ __pci_disable_msix(msi_desc);
}
static void msi_free_vectors(struct pci_dev* dev)
{
struct msi_desc *entry, *tmp;
irq_desc_t *desc;
- unsigned long flags;
+ unsigned long flags, vector;
-retry:
list_for_each_entry_safe( entry, tmp, &dev->msi_list, list )
{
- desc = &irq_desc[entry->vector];
+ vector = entry->vector;
+ desc = &irq_desc[vector];
+ pci_disable_msi(entry);
- local_irq_save(flags);
- if ( !spin_trylock(&desc->lock) )
- {
- local_irq_restore(flags);
- goto retry;
- }
+ spin_lock_irqsave(&desc->lock, flags);
+
+ teardown_msi_vector(vector);
if ( desc->handler == &pci_msi_type )
{
desc->handler = &no_irq_type;
}
- msi_free_vector(entry->vector);
spin_unlock_irqrestore(&desc->lock, flags);
+ msi_free_vector(entry);
}
}
msi_free_vectors(pdev);
}
+int pci_restore_msi_state(struct pci_dev *pdev)
+{
+ unsigned long flags;
+ int vector;
+ struct msi_desc *entry, *tmp;
+ irq_desc_t *desc;
+
+ ASSERT(spin_is_locked(&pcidevs_lock));
+
+ if (!pdev)
+ return -EINVAL;
+
+ list_for_each_entry_safe( entry, tmp, &pdev->msi_list, list )
+ {
+ vector = entry->vector;
+ desc = &irq_desc[vector];
+
+ spin_lock_irqsave(&desc->lock, flags);
+
+ ASSERT(desc->msi_desc == entry);
+
+ if (desc->msi_desc != entry)
+ {
+ dprintk(XENLOG_ERR, "Restore MSI for dev %x:%x not set before?\n",
+ pdev->bus, pdev->devfn);
+ spin_unlock_irqrestore(&desc->lock, flags);
+ return -EINVAL;
+ }
+
+ if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+ msi_set_enable(pdev, 0);
+ else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+ msix_set_enable(pdev, 0);
+
+ write_msi_msg(entry, &entry->msg);
+
+ msi_set_mask_bit(vector, entry->msi_attrib.masked);
+
+ if ( entry->msi_attrib.type == PCI_CAP_ID_MSI )
+ msi_set_enable(pdev, 1);
+ else if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX )
+ msix_set_enable(pdev, 1);
+
+ spin_unlock_irqrestore(&desc->lock, flags);
+ }
+
+ return 0;
+}
+
+unsigned int pci_msix_get_table_len(struct pci_dev *pdev)
+{
+ int pos;
+ u16 control;
+ u8 bus, slot, func;
+ unsigned int len;
+
+ bus = pdev->bus;
+ slot = PCI_SLOT(pdev->devfn);
+ func = PCI_FUNC(pdev->devfn);
+
+ pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX);
+ if ( !pos )
+ return 0;
+
+ control = pci_conf_read16(bus, slot, func, msix_control_reg(pos));
+ len = msix_table_size(control) * PCI_MSIX_ENTRY_SIZE;
+
+ return len;
+}
#define P6_EVNTSEL_INT (1 << 20)
#define P6_EVNTSEL_OS (1 << 17)
#define P6_EVNTSEL_USR (1 << 16)
-#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
-#define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
+#define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
+#define CORE_EVENT_CPU_CLOCKS_NOT_HALTED 0x3c
#define P4_ESCR_EVENT_SELECT(N) ((N)<<25)
#define P4_CCCR_OVF_PMI0 (1<<26)
printk("\n");
- /* now that we know it works we can reduce NMI frequency to
- something more reasonable; makes a difference in some configs */
+ /*
+ * Now that we know it works we can reduce NMI frequency to
+ * something more reasonable; makes a difference in some configs.
+ * There's a limit to how slow we can go because writing the perfctr
+ * MSRs only sets the low 32 bits, with the top 8 bits sign-extended
+ * from those, so it's not possible to set up a delay larger than
+ * 2^31 cycles and smaller than (2^40 - 2^31) cycles.
+ * (Intel SDM, section 18.22.2)
+ */
if ( nmi_watchdog == NMI_LOCAL_APIC )
- nmi_hz = 1;
+ nmi_hz = max(1ul, cpu_khz >> 20);
return 0;
}
wrmsr(MSR_K7_EVNTSEL0, evntsel, 0);
}
-static void __pminit setup_p6_watchdog(void)
+static void __pminit setup_p6_watchdog(unsigned counter)
{
unsigned int evntsel;
evntsel = P6_EVNTSEL_INT
| P6_EVNTSEL_OS
| P6_EVNTSEL_USR
- | P6_NMI_EVENT;
+ | counter;
wrmsr(MSR_P6_EVNTSEL0, evntsel, 0);
write_watchdog_counter("P6_PERFCTR0");
nmi_perfctr_msr = MSR_P4_IQ_PERFCTR0;
nmi_p4_cccr_val = P4_NMI_IQ_CCCR0;
- if ( smp_num_siblings == 2 )
+ if ( boot_cpu_data.x86_num_siblings == 2 )
nmi_p4_cccr_val |= P4_CCCR_OVF_PMI1;
if (!(misc_enable & MSR_IA32_MISC_ENABLE_PEBS_UNAVAIL))
case X86_VENDOR_INTEL:
switch (boot_cpu_data.x86) {
case 6:
- setup_p6_watchdog();
+ setup_p6_watchdog((boot_cpu_data.x86_model < 14)
+ ? P6_EVENT_CPU_CLOCKS_NOT_HALTED
+ : CORE_EVENT_CPU_CLOCKS_NOT_HALTED);
break;
case 15:
if (!setup_p4_watchdog())
for_each_online_node(i)
page_num_node[i] = 0;
- list_for_each_entry(page, &d->page_list, list)
+ page_list_for_each(page, &d->page_list)
{
i = phys_to_nid(page_to_mfn(page) << PAGE_SHIFT);
page_num_node[i]++;
static char *cpu_type;
extern int is_active(struct domain *d);
+extern int is_passive(struct domain *d);
+
+static int passive_domain_msr_op_checks(struct cpu_user_regs *regs ,int *typep, int *indexp)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(current);
+ if ( model == NULL )
+ return 0;
+ if ( model->is_arch_pmu_msr == NULL )
+ return 0;
+ if ( !model->is_arch_pmu_msr((u64)regs->ecx, typep, indexp) )
+ return 0;
+
+ if ( !(vpmu->flags & PASSIVE_DOMAIN_ALLOCATED) )
+ if ( ! model->allocated_msr(current) )
+ return 0;
+ return 1;
+}
+
+int passive_domain_do_rdmsr(struct cpu_user_regs *regs)
+{
+ u64 msr_content;
+ int type, index;
+
+ if ( !passive_domain_msr_op_checks(regs, &type, &index))
+ return 0;
+
+ model->load_msr(current, type, index, &msr_content);
+ regs->eax = msr_content & 0xFFFFFFFF;
+ regs->edx = msr_content >> 32;
+ return 1;
+}
+
+int passive_domain_do_wrmsr(struct cpu_user_regs *regs)
+{
+ u64 msr_content;
+ int type, index;
+
+ if ( !passive_domain_msr_op_checks(regs, &type, &index))
+ return 0;
+
+ msr_content = (u32)regs->eax | ((u64)regs->edx << 32);
+ model->save_msr(current, type, index, msr_content);
+ return 1;
+}
+
+void passive_domain_destroy(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ if ( vpmu->flags & PASSIVE_DOMAIN_ALLOCATED )
+ model->free_msr(v);
+}
static int nmi_callback(struct cpu_user_regs *regs, int cpu)
{
if ( ovf && is_active(current->domain) && !xen_mode )
send_guest_vcpu_virq(current, VIRQ_XENOPROF);
+ if ( ovf == 2 )
+ test_and_set_bool(current->nmi_pending);
return 1;
}
model = &op_p4_spec;
return 1;
#else
- switch (smp_num_siblings) {
+ switch (current_cpu_data.x86_num_siblings) {
case 1:
*cpu_type = "i386/p4";
model = &op_p4_spec;
case 14:
*cpu_type = "i386/core";
break;
- case 15: case 23:
- *cpu_type = "i386/core_2";
- ppro_has_global_ctrl = 1;
- break;
+ case 15:
+ case 23:
case 26:
+ case 29:
*cpu_type = "i386/core_2";
ppro_has_global_ctrl = 1;
break;
static inline void setup_num_counters(void)
{
#ifdef CONFIG_SMP
- if (smp_num_siblings == 2)
+ if (boot_cpu_data.x86_num_siblings == 2) /* XXX */
num_counters = NUM_COUNTERS_HT2;
#endif
}
static int inline addr_increment(void)
{
#ifdef CONFIG_SMP
- return smp_num_siblings == 2 ? 2 : 1;
+ return boot_cpu_data.x86_num_siblings == 2 ? 2 : 1;
#else
return 1;
#endif
#include <xen/sched.h>
#include <asm/regs.h>
#include <asm/current.h>
+#include <asm/hvm/vmx/vpmu.h>
+#include <asm/hvm/vmx/vpmu_core2.h>
#include "op_x86_model.h"
#include "op_counter.h"
#define CTRL_SET_KERN(val,k) (val |= ((k & 1) << 17))
#define CTRL_SET_UM(val, m) (val |= (m << 8))
#define CTRL_SET_EVENT(val, e) (val |= e)
-
+#define IS_ACTIVE(val) (val & (1 << 22) )
+#define IS_ENABLE(val) (val & (1 << 20) )
static unsigned long reset_value[NUM_COUNTERS];
int ppro_has_global_ctrl = 0;
+extern int is_passive(struct domain *d);
static void ppro_fill_in_addresses(struct op_msrs * const msrs)
{
int ovf = 0;
unsigned long eip = regs->eip;
int mode = xenoprofile_get_mode(current, regs);
+ struct arch_msr_pair *msrs_content = vcpu_vpmu(current)->context;
for (i = 0 ; i < NUM_COUNTERS; ++i) {
if (!reset_value[i])
if (CTR_OVERFLOWED(low)) {
xenoprof_log_event(current, regs, eip, mode, i);
CTR_WRITE(reset_value[i], msrs, i);
- ovf = 1;
+ if ( is_passive(current->domain) && (mode != 2) &&
+ (vcpu_vpmu(current)->flags & PASSIVE_DOMAIN_ALLOCATED) )
+ {
+ if ( IS_ACTIVE(msrs_content[i].control) )
+ {
+ msrs_content[i].counter = (low | (u64)high << 32);
+ if ( IS_ENABLE(msrs_content[i].control) )
+ ovf = 2;
+ }
+ }
+ if ( !ovf )
+ ovf = 1;
}
}
wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
}
+static int ppro_is_arch_pmu_msr(u64 msr_index, int *type, int *index)
+{
+ if ( (msr_index >= MSR_IA32_PERFCTR0) &&
+ (msr_index < (MSR_IA32_PERFCTR0 + NUM_COUNTERS)) )
+ {
+ *type = MSR_TYPE_ARCH_COUNTER;
+ *index = msr_index - MSR_IA32_PERFCTR0;
+ return 1;
+ }
+ if ( (msr_index >= MSR_P6_EVNTSEL0) &&
+ (msr_index < (MSR_P6_EVNTSEL0 + NUM_CONTROLS)) )
+ {
+ *type = MSR_TYPE_ARCH_CTRL;
+ *index = msr_index - MSR_P6_EVNTSEL0;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int ppro_allocate_msr(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+ struct arch_msr_pair *msr_content;
+
+ msr_content = xmalloc_bytes( sizeof(struct arch_msr_pair) * NUM_COUNTERS );
+ if ( !msr_content )
+ goto out;
+ memset(msr_content, 0, sizeof(struct arch_msr_pair) * NUM_COUNTERS);
+ vpmu->context = (void *)msr_content;
+ vpmu->flags = 0;
+ vpmu->flags |= PASSIVE_DOMAIN_ALLOCATED;
+ return 1;
+out:
+ gdprintk(XENLOG_WARNING, "Insufficient memory for oprofile, oprofile is "
+ "unavailable on domain %d vcpu %d.\n",
+ v->vcpu_id, v->domain->domain_id);
+ return 0;
+}
+
+static void ppro_free_msr(struct vcpu *v)
+{
+ struct vpmu_struct *vpmu = vcpu_vpmu(v);
+
+ if ( !(vpmu->flags & PASSIVE_DOMAIN_ALLOCATED) )
+ return;
+ xfree(vpmu->context);
+ vpmu->flags &= ~PASSIVE_DOMAIN_ALLOCATED;
+}
+
+static void ppro_load_msr(struct vcpu *v, int type, int index, u64 *msr_content)
+{
+ struct arch_msr_pair *msrs = vcpu_vpmu(v)->context;
+ switch ( type )
+ {
+ case MSR_TYPE_ARCH_COUNTER:
+ *msr_content = msrs[index].counter;
+ break;
+ case MSR_TYPE_ARCH_CTRL:
+ *msr_content = msrs[index].control;
+ break;
+ }
+}
+
+static void ppro_save_msr(struct vcpu *v, int type, int index, u64 msr_content)
+{
+ struct arch_msr_pair *msrs = vcpu_vpmu(v)->context;
+
+ switch ( type )
+ {
+ case MSR_TYPE_ARCH_COUNTER:
+ msrs[index].counter = msr_content;
+ break;
+ case MSR_TYPE_ARCH_CTRL:
+ msrs[index].control = msr_content;
+ break;
+ }
+}
struct op_x86_model_spec const op_ppro_spec = {
.num_counters = NUM_COUNTERS,
.setup_ctrs = &ppro_setup_ctrs,
.check_ctrs = &ppro_check_ctrs,
.start = &ppro_start,
- .stop = &ppro_stop
+ .stop = &ppro_stop,
+ .is_arch_pmu_msr = &ppro_is_arch_pmu_msr,
+ .allocated_msr = &ppro_allocate_msr,
+ .free_msr = &ppro_free_msr,
+ .load_msr = &ppro_load_msr,
+ .save_msr = &ppro_save_msr
};
struct cpu_user_regs * const regs);
void (*start)(struct op_msrs const * const msrs);
void (*stop)(struct op_msrs const * const msrs);
+ int (*is_arch_pmu_msr)(u64 msr_index, int *type, int *index);
+ int (*allocated_msr)(struct vcpu *v);
+ void (*free_msr)(struct vcpu *v);
+ void (*load_msr)(struct vcpu * const v, int type, int index, u64 *msr_content);
+ void (*save_msr)(struct vcpu * const v, int type, int index, u64 msr_content);
};
extern struct op_x86_model_spec const op_ppro_spec;
#include <public/xen.h>
#include <public/physdev.h>
#include <xsm/xsm.h>
+#include <asm/p2m.h>
#ifndef COMPAT
typedef long ret_t;
ret = -EINVAL;
goto free_domain;
}
- vector = IO_APIC_VECTOR(map->index);
+ vector = domain_irq_to_vector(current->domain, map->index);
if ( !vector )
{
dprintk(XENLOG_G_ERR, "dom%d: map irq with no vector %d\n",
case MAP_PIRQ_TYPE_MSI:
vector = map->index;
if ( vector == -1 )
- vector = assign_irq_vector(AUTO_ASSIGN);
+ vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
if ( vector < 0 || vector >= NR_VECTORS )
{
goto free_domain;
}
+ spin_lock(&pcidevs_lock);
/* Verify or get pirq. */
spin_lock(&d->event_lock);
+ pirq = domain_vector_to_irq(d, vector);
if ( map->pirq < 0 )
{
- if ( d->arch.vector_pirq[vector] )
+ if ( pirq )
{
dprintk(XENLOG_G_ERR, "dom%d: %d:%d already mapped to %d\n",
d->domain_id, map->index, map->pirq,
- d->arch.vector_pirq[vector]);
- pirq = d->arch.vector_pirq[vector];
+ pirq);
if ( pirq < 0 )
{
ret = -EBUSY;
}
else
{
- if ( d->arch.vector_pirq[vector] &&
- d->arch.vector_pirq[vector] != map->pirq )
+ if ( pirq && pirq != map->pirq )
{
dprintk(XENLOG_G_ERR, "dom%d: vector %d conflicts with irq %d\n",
d->domain_id, map->index, map->pirq);
done:
spin_unlock(&d->event_lock);
+ spin_unlock(&pcidevs_lock);
if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) && (map->index == -1) )
free_irq_vector(vector);
free_domain:
if ( d == NULL )
return -ESRCH;
+ spin_lock(&pcidevs_lock);
spin_lock(&d->event_lock);
ret = unmap_domain_pirq(d, unmap->pirq);
spin_unlock(&d->event_lock);
+ spin_unlock(&pcidevs_lock);
rcu_unlock_domain(d);
ret = -EFAULT;
if ( copy_from_guest(&eoi, arg, 1) != 0 )
break;
+ ret = -EINVAL;
+ if ( eoi.irq < 0 || eoi.irq >= NR_IRQS )
+ break;
+ if ( v->domain->arch.pirq_eoi_map )
+ evtchn_unmask(v->domain->pirq_to_evtchn[eoi.irq]);
ret = pirq_guest_eoi(v->domain, eoi.irq);
break;
}
+ case PHYSDEVOP_pirq_eoi_gmfn: {
+ struct physdev_pirq_eoi_gmfn info;
+ unsigned long mfn;
+
+ BUILD_BUG_ON(NR_IRQS > (PAGE_SIZE * 8));
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&info, arg, 1) != 0 )
+ break;
+
+ ret = -EINVAL;
+ mfn = gmfn_to_mfn(current->domain, info.gmfn);
+ if ( !mfn_valid(mfn) ||
+ !get_page_and_type(mfn_to_page(mfn), v->domain,
+ PGT_writable_page) )
+ break;
+
+ if ( cmpxchg(&v->domain->arch.pirq_eoi_map_mfn, 0, mfn) != 0 )
+ {
+ put_page_and_type(mfn_to_page(mfn));
+ ret = -EBUSY;
+ break;
+ }
+
+ v->domain->arch.pirq_eoi_map = map_domain_page_global(mfn);
+ if ( v->domain->arch.pirq_eoi_map == NULL )
+ {
+ v->domain->arch.pirq_eoi_map_mfn = 0;
+ put_page_and_type(mfn_to_page(mfn));
+ ret = -ENOSPC;
+ break;
+ }
+
+ ret = 0;
+ break;
+ }
+
/* Legacy since 0x00030202. */
case PHYSDEVOP_IRQ_UNMASK_NOTIFY: {
ret = pirq_guest_unmask(v->domain);
if ( (irq < 0) || (irq >= NR_IRQS) )
break;
irq_status_query.flags = 0;
- if ( pirq_acktype(v->domain, irq) != 0 )
- irq_status_query.flags |= XENIRQSTAT_needs_eoi;
+ /*
+ * Even edge-triggered or message-based IRQs can need masking from
+ * time to time. If teh guest is not dynamically checking for this
+ * via the new pirq_eoi_map mechanism, it must conservatively always
+ * execute the EOI hypercall. In practice, this only really makes a
+ * difference for maskable MSI sources, and if those are supported
+ * then dom0 is probably modern anyway.
+ */
+ irq_status_query.flags |= XENIRQSTAT_needs_eoi;
if ( pirq_shared(v->domain, irq) )
irq_status_query.flags |= XENIRQSTAT_shared;
ret = copy_to_guest(arg, &irq_status_query, 1) ? -EFAULT : 0;
irq_op.vector = assign_irq_vector(irq);
+ spin_lock(&pcidevs_lock);
spin_lock(&dom0->event_lock);
ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector,
MAP_PIRQ_TYPE_GSI, NULL);
spin_unlock(&dom0->event_lock);
+ spin_unlock(&pcidevs_lock);
if ( copy_to_guest(arg, &irq_op, 1) != 0 )
ret = -EFAULT;
break;
}
+ case PHYSDEVOP_manage_pci_add_ext: {
+ struct physdev_manage_pci_ext manage_pci_ext;
+ struct pci_dev_info pdev_info;
+
+ ret = -EPERM;
+ if ( !IS_PRIV(current->domain) )
+ break;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&manage_pci_ext, arg, 1) != 0 )
+ break;
+
+ ret = -EINVAL;
+ if ( (manage_pci_ext.is_extfn > 1) || (manage_pci_ext.is_virtfn > 1) )
+ break;
+
+ pdev_info.is_extfn = manage_pci_ext.is_extfn;
+ pdev_info.is_virtfn = manage_pci_ext.is_virtfn;
+ pdev_info.physfn.bus = manage_pci_ext.physfn.bus;
+ pdev_info.physfn.devfn = manage_pci_ext.physfn.devfn;
+ ret = pci_add_device_ext(manage_pci_ext.bus,
+ manage_pci_ext.devfn,
+ &pdev_info);
+ break;
+ }
+
+ case PHYSDEVOP_restore_msi: {
+ struct physdev_restore_msi restore_msi;
+ struct pci_dev *pdev;
+
+ ret = -EPERM;
+ if ( !IS_PRIV(v->domain) )
+ break;
+
+ ret = -EFAULT;
+ if ( copy_from_guest(&restore_msi, arg, 1) != 0 )
+ break;
+
+ spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev(restore_msi.bus, restore_msi.devfn);
+ ret = pdev ? pci_restore_msi_state(pdev) : -ENODEV;
+ spin_unlock(&pcidevs_lock);
+ break;
+ }
default:
ret = -ENOSYS;
break;
return cpu_frequency_change(this_cpu(freq));
}
-int xenpf_copy_px_states(struct processor_performance *pxpt,
- struct xen_processor_performance *dom0_px_info)
-{
- if (!pxpt || !dom0_px_info)
- return -EINVAL;
- return copy_from_compat(pxpt->states, dom0_px_info->states,
- dom0_px_info->state_count);
-}
-
ret_t do_platform_op(XEN_GUEST_HANDLE(xen_platform_op_t) u_xenpf_op)
{
ret_t ret = 0;
for_each_cpu_mask ( cpu, cpumap )
{
if ( (v = idle_vcpu[cpu]) != NULL )
- {
- idletime = v->runstate.time[RUNSTATE_running];
- if ( v->is_running )
- idletime += now - v->runstate.state_entry_time;
- }
- else
- {
- idletime = 0;
cpu_clear(cpu, cpumap);
- }
+ idletime = get_cpu_idle_time(cpu);
ret = -EFAULT;
if ( copy_to_guest_offset(idletimes, cpu, &idletime, 1) )
switch ( op->u.set_pminfo.type )
{
case XEN_PM_PX:
- {
-
- ret = set_px_pminfo(op->u.set_pminfo.id,
- &op->u.set_pminfo.perf);
+ if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
+ {
+ ret = -ENOSYS;
+ break;
+ }
+ ret = set_px_pminfo(op->u.set_pminfo.id, &op->u.set_pminfo.perf);
break;
- }
case XEN_PM_CX:
if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_CX) )
#include <xsm/xsm.h>
#include <asm/tboot.h>
+int __init bzimage_headroom(char *image_start, unsigned long image_length);
+
#if defined(CONFIG_X86_64)
#define BOOTSTRAP_DIRECTMAP_END (1UL << 32) /* 4GB */
#define maddr_to_bootstrap_virt(m) maddr_to_virt(m)
extern u8 boot_edid_info[128];
extern struct boot_video_info boot_vid_info;
-/*
- * opt_xenheap_megabytes: Size of Xen heap in megabytes, excluding the
- * page_info table and allocation bitmap.
- */
-static unsigned int opt_xenheap_megabytes = XENHEAP_DEFAULT_MB;
-#if defined(CONFIG_X86_64)
-integer_param("xenheap_megabytes", opt_xenheap_megabytes);
-#endif
-
/* opt_nosmp: If true, secondary processors are ignored. */
static int opt_nosmp = 0;
boolean_param("nosmp", opt_nosmp);
/* **** Linux config option: propagated to domain0. */
/* xen_cpuidle: xen control cstate. */
-/*static*/ int xen_cpuidle;
+/*static*/ int xen_cpuidle = 1;
boolean_param("cpuidle", xen_cpuidle);
int early_boot = 1;
cpumask_t cpu_present_map;
unsigned long xen_phys_start;
+unsigned long allocator_bitmap_end;
+#ifdef CONFIG_X86_32
/* Limits of Xen heap, used to initialise the allocator. */
-unsigned long xenheap_phys_start, xenheap_phys_end;
+unsigned long xenheap_initial_phys_start, xenheap_phys_end;
+#endif
extern void arch_init_memory(void);
extern void init_IRQ(void);
for ( ; ; ) halt(); \
} while (0)
-static unsigned long __initdata initial_images_start, initial_images_end;
+static unsigned long __initdata initial_images_base;
+static unsigned long __initdata initial_images_start;
+static unsigned long __initdata initial_images_end;
unsigned long __init initial_images_nrpages(void)
{
- ASSERT(!(initial_images_start & ~PAGE_MASK));
+ ASSERT(!(initial_images_base & ~PAGE_MASK));
ASSERT(!(initial_images_end & ~PAGE_MASK));
return ((initial_images_end >> PAGE_SHIFT) -
- (initial_images_start >> PAGE_SHIFT));
+ (initial_images_base >> PAGE_SHIFT));
}
void __init discard_initial_images(void)
{
- init_domheap_pages(initial_images_start, initial_images_end);
+ init_domheap_pages(initial_images_base, initial_images_end);
}
extern char __per_cpu_start[], __per_cpu_data_end[], __per_cpu_end[];
static void __init init_idle_domain(void)
{
struct domain *idle_domain;
- unsigned int i;
/* Domain creation requires that scheduler structures are initialised. */
scheduler_init();
idle_vcpu[0] = this_cpu(curr_vcpu) = current;
setup_idle_pagetable();
-
- for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
- idle_domain->arch.mm_perdomain_pt[FIRST_RESERVED_GDT_PAGE + i] =
- l1e_from_page(virt_to_page(boot_cpu_gdt_table) + i,
- __PAGE_HYPERVISOR);
-
}
static void __init srat_detect_node(int cpu)
extern char __init_begin[], __init_end[];
/* Free (or page-protect) the init areas. */
+ memset(__init_begin, 0xcc, __init_end - __init_begin); /* int3 poison */
#ifndef MEMORY_GUARD
init_xenheap_pages(__pa(__init_begin), __pa(__init_end));
#endif
unsigned int initrdidx = 1;
multiboot_info_t *mbi = __va(mbi_p);
module_t *mod = (module_t *)__va(mbi->mods_addr);
- unsigned long nr_pages, modules_length;
+ unsigned long nr_pages, modules_length, modules_headroom;
int i, e820_warn = 0, bytes = 0;
struct ns16550_defaults ns16550 = {
.data_bits = 8,
parse_video_info();
set_current((struct vcpu *)0xfffff000); /* debug sanity */
+ idle_vcpu[0] = current;
set_processor_id(0); /* needed early, for smp_processor_id() */
if ( cpu_has_efer )
rdmsrl(MSR_EFER, this_cpu(efer));
/* Sanitise the raw E820 map to produce a final clean version. */
max_page = init_e820(memmap_type, e820_raw, &e820_raw_nr);
-#ifdef CONFIG_X86_64
- /*
- * On x86/64 we are able to account for the allocation bitmap
- * (allocated in common/page_alloc.c:init_boot_allocator()) stealing
- * from the Xen heap. Here we make the Xen heap appropriately larger.
- */
- opt_xenheap_megabytes += (max_page / 8) >> 20;
-#endif
-
- /*
- * Since there are some stubs getting built on the stacks which use
- * direct calls/jumps, the heap must be confined to the lower 2G so
- * that those branches can reach their targets.
- */
- if ( opt_xenheap_megabytes > 2048 )
- opt_xenheap_megabytes = 2048;
-
/* Create a temporary copy of the E820 map. */
memcpy(&boot_e820, &e820, sizeof(e820));
* x86/64, we relocate Xen to higher memory.
*/
modules_length = mod[mbi->mods_count-1].mod_end - mod[0].mod_start;
+
+ /* ensure mod[0] is mapped before parsing */
+ bootstrap_map(mod[0].mod_start, mod[0].mod_end);
+ modules_headroom = bzimage_headroom(
+ (char *)(unsigned long)mod[0].mod_start,
+ (unsigned long)(mod[0].mod_end - mod[0].mod_start));
+
for ( i = boot_e820.nr_map-1; i >= 0; i-- )
{
uint64_t s, e, mask = (1UL << L2_PAGETABLE_SHIFT) - 1;
s >> PAGE_SHIFT, (e-s) >> PAGE_SHIFT, PAGE_HYPERVISOR);
#if defined(CONFIG_X86_64)
+/* Relocate Xen image, allocation bitmap, and one page of padding. */
+#define reloc_size ((__pa(&_end) + max_page/8 + PAGE_SIZE + mask) & ~mask)
/* Is the region suitable for relocating Xen? */
- if ( !xen_phys_start && (((e-s) >> 20) >= opt_xenheap_megabytes) )
+ if ( !xen_phys_start && ((e-s) >= reloc_size) )
{
extern l2_pgentry_t l2_xenmap[];
l4_pgentry_t *pl4e;
int i, j, k;
/* Select relocation address. */
- e = (e - (opt_xenheap_megabytes << 20)) & ~mask;
+ e -= reloc_size;
xen_phys_start = e;
bootsym(trampoline_xen_phys_start) = e;
#endif
/* Is the region suitable for relocating the multiboot modules? */
- if ( !initial_images_start && (s < e) && ((e-s) >= modules_length) )
+ if ( !initial_images_start && (s < e) &&
+ ((e-s) >= (modules_length+modules_headroom)) )
{
initial_images_end = e;
e = (e - modules_length) & PAGE_MASK;
initial_images_start = e;
- move_memory(initial_images_start,
+ e -= modules_headroom;
+ initial_images_base = e;
+ move_memory(initial_images_start,
mod[0].mod_start, mod[mbi->mods_count-1].mod_end);
}
if ( !initial_images_start )
EARLY_FAIL("Not enough memory to relocate the dom0 kernel image.\n");
- reserve_e820_ram(&boot_e820, initial_images_start, initial_images_end);
+ reserve_e820_ram(&boot_e820, initial_images_base, initial_images_end);
- /* Initialise Xen heap and boot heap. */
- xenheap_phys_start = init_boot_allocator(__pa(&_end));
- xenheap_phys_end = opt_xenheap_megabytes << 20;
-#if defined(CONFIG_X86_64)
+ /* Initialise boot heap. */
+ allocator_bitmap_end = init_boot_allocator(__pa(&_end));
+#if defined(CONFIG_X86_32)
+ xenheap_initial_phys_start = allocator_bitmap_end;
+ xenheap_phys_end = DIRECTMAP_MBYTES << 20;
+#else
if ( !xen_phys_start )
EARLY_FAIL("Not enough memory to relocate Xen.\n");
- xenheap_phys_end += xen_phys_start;
- reserve_e820_ram(&boot_e820, xen_phys_start,
- xen_phys_start + (opt_xenheap_megabytes<<20));
+ reserve_e820_ram(&boot_e820, __pa(&_start), allocator_bitmap_end);
#endif
/* Late kexec reservation (dynamic start address). */
numa_initmem_init(0, max_page);
- /* Initialise the Xen heap, skipping RAM holes. */
- init_xenheap_pages(xenheap_phys_start, xenheap_phys_end);
- nr_pages = (xenheap_phys_end - xenheap_phys_start) >> PAGE_SHIFT;
-#ifdef __x86_64__
- init_xenheap_pages(xen_phys_start, __pa(&_start));
- nr_pages += (__pa(&_start) - xen_phys_start) >> PAGE_SHIFT;
- vesa_init();
-#endif
- xenheap_phys_start = xen_phys_start;
+#if defined(CONFIG_X86_32)
+ /* Initialise the Xen heap. */
+ init_xenheap_pages(xenheap_initial_phys_start, xenheap_phys_end);
+ nr_pages = (xenheap_phys_end - xenheap_initial_phys_start) >> PAGE_SHIFT;
printk("Xen heap: %luMB (%lukB)\n",
nr_pages >> (20 - PAGE_SHIFT),
nr_pages << (PAGE_SHIFT - 10));
+#endif
end_boot_allocator();
-
early_boot = 0;
+#if defined(CONFIG_X86_64)
+ vesa_init();
+#endif
+
softirq_init();
early_cpu_init();
set_in_cr4(X86_CR4_OSFXSR);
if ( cpu_has_xmm )
set_in_cr4(X86_CR4_OSXMMEXCPT);
+
+ local_irq_enable();
+
#ifdef CONFIG_X86_64
vesa_mtrr_init();
#endif
smp_prepare_cpus(max_cpus);
+ spin_debug_enable();
+
/*
* Initialise higher-level timer functions. We do this fairly late
* (post-SMP) because the time bases and scale factors need to be updated
serial_init_postirq();
- BUG_ON(!local_irq_is_enabled());
-
for_each_present_cpu ( i )
{
if ( num_online_cpus() >= max_cpus )
if ( opt_watchdog )
watchdog_enable();
+
+ if ( !tboot_protect_mem_regions() )
+ panic("Could not protect TXT memory regions\n");
/* Create initial domain 0. */
- dom0 = domain_create(0, 0, DOM0_SSIDREF);
+ dom0 = domain_create(0, DOMCRF_s3_integrity, DOM0_SSIDREF);
if ( (dom0 == NULL) || (alloc_vcpu(dom0, 0, 0) == NULL) )
panic("Error creating domain 0\n");
* above our heap. The second module, if present, is an initrd ramdisk.
*/
if ( construct_dom0(dom0,
- initial_images_start,
+ initial_images_base,
+ initial_images_start,
mod[0].mod_end-mod[0].mod_start,
_initrd_start,
_initrd_len,
int xen_in_range(paddr_t start, paddr_t end)
{
- start = max_t(paddr_t, start, xenheap_phys_start);
- end = min_t(paddr_t, end, xenheap_phys_end);
-
- return start < end;
+ int i;
+ static struct {
+ paddr_t s, e;
+ } xen_regions[5];
+
+ /* initialize first time */
+ if ( !xen_regions[0].s )
+ {
+ extern char __init_begin[], __per_cpu_start[], __per_cpu_end[],
+ __bss_start[];
+ extern unsigned long allocator_bitmap_end;
+
+ /* S3 resume code (and other real mode trampoline code) */
+ xen_regions[0].s = bootsym_phys(trampoline_start);
+ xen_regions[0].e = bootsym_phys(trampoline_end);
+ /* hypervisor code + data */
+ xen_regions[1].s =__pa(&_stext);
+ xen_regions[1].e = __pa(&__init_begin);
+ /* per-cpu data */
+ xen_regions[2].s = __pa(&__per_cpu_start);
+ xen_regions[2].e = __pa(&__per_cpu_end);
+ /* bss + boot allocator bitmap */
+ xen_regions[3].s = __pa(&__bss_start);
+ xen_regions[3].e = allocator_bitmap_end;
+ /* frametable */
+ xen_regions[4].s = (unsigned long)frame_table;
+ xen_regions[4].e = (unsigned long)frame_table +
+ PFN_UP(max_page * sizeof(*frame_table));
+ }
+
+ for ( i = 0; i < ARRAY_SIZE(xen_regions); i++ )
+ {
+ if ( (start < xen_regions[i].e) && (end > xen_regions[i].s) )
+ return 1;
+ }
+
+ return 0;
}
/*
watchdog_disable();
console_start_sync();
+ spin_debug_disable();
local_irq_enable();
/* Set if we find a B stepping CPU */
static int __devinitdata smp_b_stepping;
-/* Number of siblings per CPU package */
-int smp_num_siblings = 1;
-#ifdef CONFIG_X86_HT
-EXPORT_SYMBOL(smp_num_siblings);
-#endif
-
/* Package ID of each logical CPU */
int phys_proc_id[NR_CPUS] __read_mostly = {[0 ... NR_CPUS-1] = BAD_APICID};
static int __devinitdata tsc_sync_disabled;
/* Per CPU bogomips and other parameters */
-struct cpuinfo_x86 cpu_data[NR_CPUS] __cacheline_aligned;
+struct cpuinfo_x86 cpu_data[NR_CPUS];
EXPORT_SYMBOL(cpu_data);
u32 x86_cpu_to_apicid[NR_CPUS] __read_mostly =
/* State of each CPU. */
DEFINE_PER_CPU(int, cpu_state) = { 0 };
-static void *stack_base[NR_CPUS] __cacheline_aligned;
+static void *stack_base[NR_CPUS];
static DEFINE_SPINLOCK(cpu_add_remove_lock);
/*
/*
* Save our processor parameters
*/
- smp_store_cpu_info(cpuid);
-
- disable_APIC_timer();
+ smp_store_cpu_info(cpuid);
/*
* Allow the master to continue.
cpu_set(cpu, cpu_sibling_setup_map);
- if (smp_num_siblings > 1) {
+ if (c[cpu].x86_num_siblings > 1) {
for_each_cpu_mask(i, cpu_sibling_setup_map) {
if (phys_proc_id[cpu] == phys_proc_id[i] &&
cpu_core_id[cpu] == cpu_core_id[i]) {
cpu_set(cpu, cpu_sibling_map[cpu]);
}
- if (current_cpu_data.x86_max_cores == 1) {
+ if (c[cpu].x86_max_cores == 1) {
cpu_core_map[cpu] = cpu_sibling_map[cpu];
c[cpu].booted_cores = 1;
return;
{
unsigned char idt_load[10];
- /* If IDT table exists since last hotplug, reuse it */
- if (!idt_tables[cpu]) {
- idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
- memcpy(idt_tables[cpu], idt_table,
- IDT_ENTRIES*sizeof(idt_entry_t));
- }
-
*(unsigned short *)(&idt_load[0]) = (IDT_ENTRIES*sizeof(idt_entry_t))-1;
*(unsigned long *)(&idt_load[2]) = (unsigned long)idt_tables[cpu];
__asm__ __volatile__ ( "lidt %0" : "=m" (idt_load) );
/* We can take interrupts now: we're officially "up". */
local_irq_enable();
+ microcode_resume_cpu(cpu);
+
wmb();
startup_cpu_idle_loop();
}
return cpu;
}
-static struct vcpu *prepare_idle_vcpu(unsigned int cpu)
-{
- if (idle_vcpu[cpu])
- return idle_vcpu[cpu];
-
- return alloc_idle_vcpu(cpu);
-}
-
static void *prepare_idle_stack(unsigned int cpu)
{
if (!stack_base[cpu])
- stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER);
+ stack_base[cpu] = alloc_xenheap_pages(STACK_ORDER, 0);
return stack_base[cpu];
}
*/
{
unsigned long boot_error;
- unsigned int i;
+ unsigned int order;
int timeout;
unsigned long start_eip;
unsigned short nmi_high = 0, nmi_low = 0;
booting_cpu = cpu;
- v = prepare_idle_vcpu(cpu);
+ v = alloc_idle_vcpu(cpu);
BUG_ON(v == NULL);
/* start_eip had better be page-aligned! */
gdt = per_cpu(gdt_table, cpu);
if (gdt == boot_cpu_gdt_table) {
- i = get_order_from_pages(NR_RESERVED_GDT_PAGES);
+ order = get_order_from_pages(NR_RESERVED_GDT_PAGES);
#ifdef __x86_64__
#ifdef CONFIG_COMPAT
- page = alloc_domheap_pages(NULL, i,
+ page = alloc_domheap_pages(NULL, order,
MEMF_node(cpu_to_node(cpu)));
per_cpu(compat_gdt_table, cpu) = gdt = page_to_virt(page);
memcpy(gdt, boot_cpu_compat_gdt_table,
NR_RESERVED_GDT_PAGES * PAGE_SIZE);
gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
#endif
- page = alloc_domheap_pages(NULL, i,
+ page = alloc_domheap_pages(NULL, order,
MEMF_node(cpu_to_node(cpu)));
per_cpu(gdt_table, cpu) = gdt = page_to_virt(page);
#else
- per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(i);
+ per_cpu(gdt_table, cpu) = gdt = alloc_xenheap_pages(order, 0);
#endif
memcpy(gdt, boot_cpu_gdt_table,
NR_RESERVED_GDT_PAGES * PAGE_SIZE);
gdt[PER_CPU_GDT_ENTRY - FIRST_RESERVED_GDT_ENTRY].a = cpu;
}
- for (i = 0; i < NR_RESERVED_GDT_PAGES; ++i)
- v->domain->arch.mm_perdomain_pt
- [(v->vcpu_id << GDT_LDT_VCPU_SHIFT) +
- FIRST_RESERVED_GDT_PAGE + i]
- = l1e_from_page(virt_to_page(gdt) + i,
- __PAGE_HYPERVISOR);
-
#ifdef __i386__
if (!per_cpu(doublefault_tss, cpu)) {
per_cpu(doublefault_tss, cpu) = alloc_xenheap_page();
}
#endif
+ if (!idt_tables[cpu]) {
+ idt_tables[cpu] = xmalloc_array(idt_entry_t, IDT_ENTRIES);
+ memcpy(idt_tables[cpu], idt_table,
+ IDT_ENTRIES*sizeof(idt_entry_t));
+ }
+
/*
* This grunge runs the startup process for
* the targeted processor.
mdelay(1);
local_irq_disable();
- cpufreq_del_cpu(cpu);
-
time_suspend();
+ cpu_mcheck_disable();
+
remove_siblinginfo(cpu);
cpu_clear(cpu, map);
void __cpu_die(unsigned int cpu)
{
/* We don't do anything here: idle task is faking death itself. */
- unsigned int i;
+ unsigned int i = 0;
- for (i = 0; i < 10; i++) {
+ for (;;) {
/* They ack this in play_dead by setting CPU_DEAD */
if (per_cpu(cpu_state, cpu) == CPU_DEAD) {
- printk ("CPU %d is now offline\n", cpu);
+ printk ("CPU %u is now offline\n", cpu);
return;
}
mdelay(100);
mb();
process_pending_timers();
+ if ((++i % 10) == 0)
+ printk(KERN_ERR "CPU %u still not dead...\n", cpu);
}
- printk(KERN_ERR "CPU %u didn't die...\n", cpu);
}
static int take_cpu_down(void *unused)
printk("Prepare to bring CPU%d down...\n", cpu);
+ cpufreq_del_cpu(cpu);
+
err = stop_machine_run(take_cpu_down, NULL, cpu);
- if ( err < 0 )
+ if (err < 0)
goto out;
__cpu_die(cpu);
- if (cpu_online(cpu)) {
- printk("Bad state (DEAD, but in online map) on CPU%d\n", cpu);
- err = -EBUSY;
- }
+ BUG_ON(cpu_online(cpu));
+
+ cpu_mcheck_distribute_cmci();
+
out:
spin_unlock(&cpu_add_remove_lock);
return err;
* cpu_callin_map is set during AP kickstart process. Its reset
* when a cpu is taken offline from cpu_exit_clear().
*/
- if (!cpu_isset(cpu, cpu_callin_map))
+ if (!cpu_isset(cpu, cpu_callin_map)) {
ret = __smp_prepare_cpu(cpu);
+ smpboot_restore_warm_reset_vector();
+ }
if (ret)
return -EIO;
long arch_do_sysctl(
struct xen_sysctl *sysctl, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl)
{
- long ret = 0;
+ long ret = 0, status;
switch ( sysctl->cmd )
{
{
unsigned int cpu = sysctl->u.cpu_hotplug.cpu;
+ if (cpu_present(cpu)) {
+ status = cpu_online(cpu) ? XEN_CPU_HOTPLUG_STATUS_ONLINE :
+ XEN_CPU_HOTPLUG_STATUS_OFFLINE;
+ } else {
+ status = -EINVAL;
+ }
+
switch ( sysctl->u.cpu_hotplug.op )
{
case XEN_SYSCTL_CPU_HOTPLUG_ONLINE:
ret = cpu_up(cpu);
+ /*
+ * In the case of a true hotplug, this CPU wasn't present
+ * before, so return the 'new' status for it.
+ */
+ if (ret == 0 && status == -EINVAL)
+ status = XEN_CPU_HOTPLUG_STATUS_NEW;
break;
case XEN_SYSCTL_CPU_HOTPLUG_OFFLINE:
ret = continue_hypercall_on_cpu(
0, cpu_down_helper, (void *)(unsigned long)cpu);
break;
+ case XEN_SYSCTL_CPU_HOTPLUG_STATUS:
+ ret = 0;
+ break;
default:
ret = -EINVAL;
break;
}
+
+ /*
+ * If the operation was successful, return the old status.
+ */
+ if (ret >= 0)
+ ret = status;
}
break;
#include <xen/types.h>
#include <xen/lib.h>
#include <xen/sched.h>
+#include <xen/domain_page.h>
+#include <xen/iommu.h>
#include <asm/fixmap.h>
#include <asm/page.h>
#include <asm/processor.h>
+#include <asm/e820.h>
#include <asm/tboot.h>
+#include <crypto/vmac.h>
/* tboot=<physical address of shared page> */
static char opt_tboot[20] = "";
/* Global pointer to shared data; NULL means no measured launch. */
tboot_shared_t *g_tboot_shared;
+static vmac_t domain_mac; /* MAC for all domains during S3 */
+static vmac_t xenheap_mac; /* MAC for xen heap during S3 */
+static vmac_t frametable_mac; /* MAC for frame table during S3 */
+
static const uuid_t tboot_shared_uuid = TBOOT_SHARED_UUID;
+/* used by tboot_protect_mem_regions() and/or tboot_parse_dmar_table() */
+static uint64_t txt_heap_base, txt_heap_size;
+static uint64_t sinit_base, sinit_size;
+
+/*
+ * TXT configuration registers (offsets from TXT_{PUB, PRIV}_CONFIG_REGS_BASE)
+ */
+
+#define TXT_PUB_CONFIG_REGS_BASE 0xfed30000
+#define TXT_PRIV_CONFIG_REGS_BASE 0xfed20000
+
+/* # pages for each config regs space - used by fixmap */
+#define NR_TXT_CONFIG_PAGES ((TXT_PUB_CONFIG_REGS_BASE - \
+ TXT_PRIV_CONFIG_REGS_BASE) >> PAGE_SHIFT)
+
+/* offsets from pub/priv config space */
+#define TXTCR_SINIT_BASE 0x0270
+#define TXTCR_SINIT_SIZE 0x0278
+#define TXTCR_HEAP_BASE 0x0300
+#define TXTCR_HEAP_SIZE 0x0308
+
+extern char __init_begin[], __per_cpu_start[], __per_cpu_end[], __bss_start[];
+extern unsigned long allocator_bitmap_end;
+
+#define SHA1_SIZE 20
+typedef uint8_t sha1_hash_t[SHA1_SIZE];
+
+typedef struct __packed {
+ uint32_t version; /* currently 6 */
+ sha1_hash_t bios_acm_id;
+ uint32_t edx_senter_flags;
+ uint64_t mseg_valid;
+ sha1_hash_t sinit_hash;
+ sha1_hash_t mle_hash;
+ sha1_hash_t stm_hash;
+ sha1_hash_t lcp_policy_hash;
+ uint32_t lcp_policy_control;
+ uint32_t rlp_wakeup_addr;
+ uint32_t reserved;
+ uint32_t num_mdrs;
+ uint32_t mdrs_off;
+ uint32_t num_vtd_dmars;
+ uint32_t vtd_dmars_off;
+} sinit_mle_data_t;
+
void __init tboot_probe(void)
{
tboot_shared_t *tboot_shared;
unsigned long p_tboot_shared;
+ uint32_t map_base, map_size;
+ unsigned long map_addr;
/* Look for valid page-aligned address for shared page. */
p_tboot_shared = simple_strtoul(opt_tboot, NULL, 0);
/* Map and check for tboot UUID. */
set_fixmap(FIX_TBOOT_SHARED_BASE, p_tboot_shared);
tboot_shared = (tboot_shared_t *)fix_to_virt(FIX_TBOOT_SHARED_BASE);
+ if ( tboot_shared == NULL )
+ return;
if ( memcmp(&tboot_shared_uuid, (uuid_t *)tboot_shared, sizeof(uuid_t)) )
return;
+ /* new tboot_shared (w/ GAS support, integrity, etc.) is not backwards
+ compatible */
+ if ( tboot_shared->version < 4 ) {
+ printk("unsupported version of tboot (%u)\n", tboot_shared->version);
+ return;
+ }
+
g_tboot_shared = tboot_shared;
printk("TBOOT: found shared page at phys addr %lx:\n", p_tboot_shared);
printk(" version: %d\n", tboot_shared->version);
printk(" log_addr: 0x%08x\n", tboot_shared->log_addr);
- printk(" shutdown_entry32: 0x%08x\n", tboot_shared->shutdown_entry32);
- printk(" shutdown_entry64: 0x%08x\n", tboot_shared->shutdown_entry64);
- printk(" shutdown_type: %d\n", tboot_shared->shutdown_type);
- printk(" s3_tb_wakeup_entry: 0x%08x\n", tboot_shared->s3_tb_wakeup_entry);
- printk(" s3_k_wakeup_entry: 0x%08x\n", tboot_shared->s3_k_wakeup_entry);
- printk(" &acpi_sinfo: 0x%p\n", &tboot_shared->acpi_sinfo);
- if ( tboot_shared->version >= 0x02 )
+ printk(" shutdown_entry: 0x%08x\n", tboot_shared->shutdown_entry);
+ printk(" tboot_base: 0x%08x\n", tboot_shared->tboot_base);
+ printk(" tboot_size: 0x%x\n", tboot_shared->tboot_size);
+
+ /* these will be needed by tboot_protect_mem_regions() and/or
+ tboot_parse_dmar_table(), so get them now */
+
+ map_base = PFN_DOWN(TXT_PUB_CONFIG_REGS_BASE);
+ map_size = PFN_UP(NR_TXT_CONFIG_PAGES * PAGE_SIZE);
+ map_addr = (unsigned long)__va(map_base << PAGE_SHIFT);
+ if ( map_pages_to_xen(map_addr, map_base, map_size, __PAGE_HYPERVISOR) )
+ return;
+
+ /* TXT Heap */
+ txt_heap_base =
+ *(uint64_t *)__va(TXT_PUB_CONFIG_REGS_BASE + TXTCR_HEAP_BASE);
+ txt_heap_size =
+ *(uint64_t *)__va(TXT_PUB_CONFIG_REGS_BASE + TXTCR_HEAP_SIZE);
+
+ /* SINIT */
+ sinit_base =
+ *(uint64_t *)__va(TXT_PUB_CONFIG_REGS_BASE + TXTCR_SINIT_BASE);
+ sinit_size =
+ *(uint64_t *)__va(TXT_PUB_CONFIG_REGS_BASE + TXTCR_SINIT_SIZE);
+
+ destroy_xen_mappings((unsigned long)__va(map_base << PAGE_SHIFT),
+ (unsigned long)__va((map_base + map_size) << PAGE_SHIFT));
+}
+
+/* definitions from xen/drivers/passthrough/vtd/iommu.h
+ * used to walk through vtd page tables */
+#define LEVEL_STRIDE (9)
+#define PTE_NUM (1<<LEVEL_STRIDE)
+#define dma_pte_present(p) (((p).val & 3) != 0)
+#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
+#define agaw_to_level(val) ((val)+2)
+struct dma_pte {
+ u64 val;
+};
+
+static void update_iommu_mac(vmac_ctx_t *ctx, uint64_t pt_maddr, int level)
+{
+ int i;
+ struct dma_pte *pt_vaddr, *pte;
+ int next_level = level - 1;
+
+ if ( pt_maddr == 0 )
+ return;
+
+ pt_vaddr = (struct dma_pte *)map_domain_page(pt_maddr >> PAGE_SHIFT_4K);
+ vmac_update((void *)pt_vaddr, PAGE_SIZE, ctx);
+
+ for ( i = 0; i < PTE_NUM; i++ )
+ {
+ pte = &pt_vaddr[i];
+ if ( !dma_pte_present(*pte) )
+ continue;
+
+ if ( next_level >= 1 )
+ update_iommu_mac(ctx, dma_pte_addr(*pte), next_level);
+ }
+
+ unmap_domain_page(pt_vaddr);
+}
+
+#define is_page_in_use(page) \
+ ((page->count_info & PGC_count_mask) != 0 || page->count_info == 0)
+
+static void update_pagetable_mac(vmac_ctx_t *ctx)
+{
+ unsigned long mfn;
+
+ for ( mfn = 0; mfn < max_page; mfn++ )
+ {
+ struct page_info *page = mfn_to_page(mfn);
+ if ( is_page_in_use(page) && !is_xen_heap_page(page) ) {
+ if ( page->count_info & PGC_page_table ) {
+ void *pg = map_domain_page(mfn);
+ vmac_update(pg, PAGE_SIZE, ctx);
+ unmap_domain_page(pg);
+ }
+ }
+ }
+}
+
+static void tboot_gen_domain_integrity(const uint8_t key[TB_KEY_SIZE],
+ vmac_t *mac)
+{
+ struct domain *d;
+ struct page_info *page;
+ uint8_t nonce[16] = {};
+ vmac_ctx_t ctx;
+
+ vmac_set_key((uint8_t *)key, &ctx);
+ for_each_domain( d )
+ {
+ if ( !d->arch.s3_integrity )
+ continue;
+ printk("MACing Domain %u\n", d->domain_id);
+
+ page_list_for_each(page, &d->page_list)
+ {
+ void *pg;
+ pg = map_domain_page(page_to_mfn(page));
+ vmac_update(pg, PAGE_SIZE, &ctx);
+ unmap_domain_page(pg);
+ }
+
+ if ( !is_idle_domain(d) )
+ {
+ struct hvm_iommu *hd = domain_hvm_iommu(d);
+ update_iommu_mac(&ctx, hd->pgd_maddr, agaw_to_level(hd->agaw));
+ }
+ }
+
+ /* MAC all shadow page tables */
+ update_pagetable_mac(&ctx);
+
+ *mac = vmac(NULL, 0, nonce, NULL, &ctx);
+
+ printk("MAC for domains is: 0x%08"PRIx64"\n", *mac);
+
+ /* wipe ctx to ensure key is not left in memory */
+ memset(&ctx, 0, sizeof(ctx));
+}
+
+static void tboot_gen_xenheap_integrity(const uint8_t key[TB_KEY_SIZE],
+ vmac_t *mac)
+{
+ unsigned long mfn;
+ uint8_t nonce[16] = {};
+ vmac_ctx_t ctx;
+
+ vmac_set_key((uint8_t *)key, &ctx);
+ for ( mfn = 0; mfn < max_page; mfn++ )
{
- printk(" tboot_base: 0x%08x\n", tboot_shared->tboot_base);
- printk(" tboot_size: 0x%x\n", tboot_shared->tboot_size);
+ struct page_info *page = __mfn_to_page(mfn);
+ if ( is_page_in_use(page) && is_xen_heap_page(page) ) {
+ void *pg = mfn_to_virt(mfn);
+ vmac_update((uint8_t *)pg, PAGE_SIZE, &ctx);
+ }
}
+ *mac = vmac(NULL, 0, nonce, NULL, &ctx);
+
+ printk("MAC for xenheap is: 0x%08"PRIx64"\n", *mac);
+
+ /* wipe ctx to ensure key is not left in memory */
+ memset(&ctx, 0, sizeof(ctx));
+}
+
+static void tboot_gen_frametable_integrity(const uint8_t key[TB_KEY_SIZE],
+ vmac_t *mac)
+{
+ uint8_t nonce[16] = {};
+ vmac_ctx_t ctx;
+
+ vmac_set_key((uint8_t *)key, &ctx);
+ *mac = vmac((uint8_t *)frame_table,
+ PFN_UP(max_page * sizeof(*frame_table)), nonce, NULL, &ctx);
+
+ printk("MAC for frametable is: 0x%08"PRIx64"\n", *mac);
+
+ /* wipe ctx to ensure key is not left in memory */
+ memset(&ctx, 0, sizeof(ctx));
}
void tboot_shutdown(uint32_t shutdown_type)
local_irq_disable();
+ /* we may be called from an interrupt context, so to prevent */
+ /* 'ASSERT(!in_irq());' in alloc_domheap_pages(), decrease count */
+ while ( in_irq() )
+ irq_exit();
+
/* Create identity map for tboot shutdown code. */
- if ( g_tboot_shared->version >= 0x02 )
- {
- map_base = PFN_DOWN(g_tboot_shared->tboot_base);
- map_size = PFN_UP(g_tboot_shared->tboot_size);
- }
- else
- {
- map_base = 0;
- map_size = PFN_UP(0xa0000);
- }
+ /* do before S3 integrity because mapping tboot may change xenheap */
+ map_base = PFN_DOWN(g_tboot_shared->tboot_base);
+ map_size = PFN_UP(g_tboot_shared->tboot_size);
err = map_pages_to_xen(map_base << PAGE_SHIFT, map_base, map_size,
__PAGE_HYPERVISOR);
- if ( err != 0 )
- {
+ if ( err != 0 ) {
printk("error (0x%x) mapping tboot pages (mfns) @ 0x%x, 0x%x\n", err,
map_base, map_size);
return;
}
+ /* if this is S3 then set regions to MAC */
+ if ( shutdown_type == TB_SHUTDOWN_S3 ) {
+ /*
+ * Xen regions for tboot to MAC
+ */
+ g_tboot_shared->num_mac_regions = 5;
+ /* S3 resume code (and other real mode trampoline code) */
+ g_tboot_shared->mac_regions[0].start = bootsym_phys(trampoline_start);
+ g_tboot_shared->mac_regions[0].size = bootsym_phys(trampoline_end) -
+ bootsym_phys(trampoline_start);
+ /* hypervisor code + data */
+ g_tboot_shared->mac_regions[1].start = (uint64_t)__pa(&_stext);
+ g_tboot_shared->mac_regions[1].size = __pa(&__init_begin) -
+ __pa(&_stext);
+ /* per-cpu data */
+ g_tboot_shared->mac_regions[2].start = (uint64_t)__pa(&__per_cpu_start);
+ g_tboot_shared->mac_regions[2].size = __pa(&__per_cpu_end) -
+ __pa(&__per_cpu_start);
+ /* bss */
+ g_tboot_shared->mac_regions[3].start = (uint64_t)__pa(&__bss_start);
+ g_tboot_shared->mac_regions[3].size = __pa(&_end) - __pa(&__bss_start);
+ /* boot allocator bitmap */
+ g_tboot_shared->mac_regions[4].start = (uint64_t)__pa(&_end);
+ g_tboot_shared->mac_regions[4].size = allocator_bitmap_end -
+ __pa(&_end);
+
+ /*
+ * MAC domains and other Xen memory
+ */
+ /* Xen has no better entropy source for MAC key than tboot's */
+ /* MAC domains first in case it perturbs xenheap */
+ tboot_gen_domain_integrity(g_tboot_shared->s3_key, &domain_mac);
+ tboot_gen_frametable_integrity(g_tboot_shared->s3_key, &frametable_mac);
+ tboot_gen_xenheap_integrity(g_tboot_shared->s3_key, &xenheap_mac);
+ }
+
write_ptbase(idle_vcpu[0]);
-#ifdef __x86_64__
- asm volatile ( "call *%%rdi" :: "D" (g_tboot_shared->shutdown_entry64) );
-#else
- asm volatile ( "call *%0" :: "r" (g_tboot_shared->shutdown_entry32) );
-#endif
+ ((void(*)(void))(unsigned long)g_tboot_shared->shutdown_entry)();
BUG(); /* should not reach here */
}
return (g_tboot_shared != NULL);
}
-int tboot_in_range(paddr_t start, paddr_t end)
+int __init tboot_protect_mem_regions(void)
{
- if ( g_tboot_shared == NULL || g_tboot_shared->version < 0x02 )
+ int rc;
+
+ if ( !tboot_in_measured_env() )
+ return 1;
+
+ /* TXT Heap */
+ if ( txt_heap_base == 0 )
+ return 0;
+ rc = e820_change_range_type(
+ &e820, txt_heap_base, txt_heap_base + txt_heap_size,
+ E820_RESERVED, E820_UNUSABLE);
+ if ( !rc )
return 0;
- start = max_t(paddr_t, start, g_tboot_shared->tboot_base);
- end = min_t(paddr_t, end,
- g_tboot_shared->tboot_base + g_tboot_shared->tboot_size);
-
- return start < end;
+ /* SINIT */
+ if ( sinit_base == 0 )
+ return 0;
+ rc = e820_change_range_type(
+ &e820, sinit_base, sinit_base + sinit_size,
+ E820_RESERVED, E820_UNUSABLE);
+ if ( !rc )
+ return 0;
+
+ /* TXT Private Space */
+ rc = e820_change_range_type(
+ &e820, TXT_PRIV_CONFIG_REGS_BASE,
+ TXT_PRIV_CONFIG_REGS_BASE + NR_TXT_CONFIG_PAGES * PAGE_SIZE,
+ E820_RESERVED, E820_UNUSABLE);
+ if ( !rc )
+ return 0;
+
+ return 1;
+}
+
+int __init tboot_parse_dmar_table(acpi_table_handler dmar_handler)
+{
+ uint32_t map_base, map_size;
+ unsigned long map_vaddr;
+ void *heap_ptr;
+ struct acpi_table_header *dmar_table;
+ int rc;
+
+ if ( !tboot_in_measured_env() )
+ return acpi_table_parse(ACPI_SIG_DMAR, dmar_handler);
+
+ /* ACPI tables may not be DMA protected by tboot, so use DMAR copy */
+ /* SINIT saved in SinitMleData in TXT heap (which is DMA protected) */
+
+ if ( txt_heap_base == 0 )
+ return 1;
+
+ /* map TXT heap into Xen addr space */
+ map_base = PFN_DOWN(txt_heap_base);
+ map_size = PFN_UP(txt_heap_size);
+ map_vaddr = (unsigned long)__va(map_base << PAGE_SHIFT);
+ if ( map_pages_to_xen(map_vaddr, map_base, map_size, __PAGE_HYPERVISOR) )
+ return 1;
+
+ /* walk heap to SinitMleData */
+ heap_ptr = __va(txt_heap_base);
+ /* skip BiosData */
+ heap_ptr += *(uint64_t *)heap_ptr;
+ /* skip OsMleData */
+ heap_ptr += *(uint64_t *)heap_ptr;
+ /* skip OsSinitData */
+ heap_ptr += *(uint64_t *)heap_ptr;
+ /* now points to SinitMleDataSize; set to SinitMleData */
+ heap_ptr += sizeof(uint64_t);
+ /* get addr of DMAR table */
+ dmar_table = (struct acpi_table_header *)(heap_ptr +
+ ((sinit_mle_data_t *)heap_ptr)->vtd_dmars_off - sizeof(uint64_t));
+
+ rc = dmar_handler(dmar_table);
+
+ destroy_xen_mappings(
+ (unsigned long)__va(map_base << PAGE_SHIFT),
+ (unsigned long)__va((map_base + map_size) << PAGE_SHIFT));
+
+ /* acpi_parse_dmar() zaps APCI DMAR signature in TXT heap table */
+ /* but dom0 will read real table, so must zap it there too */
+ dmar_table = NULL;
+ acpi_get_table(ACPI_SIG_DMAR, 0, &dmar_table);
+ if ( dmar_table != NULL )
+ ((struct acpi_table_dmar *)dmar_table)->header.signature[0] = '\0';
+
+ return rc;
+}
+
+int tboot_s3_resume(void)
+{
+ vmac_t mac;
+
+ if ( !tboot_in_measured_env() )
+ return 0;
+
+ /* need to do these in reverse order of shutdown */
+ tboot_gen_xenheap_integrity(g_tboot_shared->s3_key, &mac);
+ if ( mac != xenheap_mac )
+ return -1;
+
+ tboot_gen_frametable_integrity(g_tboot_shared->s3_key, &mac);
+ if ( mac != frametable_mac )
+ return -2;
+
+ tboot_gen_domain_integrity(g_tboot_shared->s3_key, &mac);
+ if ( mac != domain_mac )
+ return -3;
+
+ return 0;
}
/*
struct cpu_time {
u64 local_tsc_stamp;
- u64 cstate_tsc_stamp;
s_time_t stime_local_stamp;
s_time_t stime_master_stamp;
struct time_scale tsc_scale;
- u64 cstate_plt_count_stamp;
};
struct platform_timesource {
+ char *id;
char *name;
u64 frequency;
u64 (*read_counter)(void);
+ int (*init)(struct platform_timesource *);
+ void (*resume)(struct platform_timesource *);
int counter_bits;
};
#define EPOCH MILLISECS(1000)
static struct timer calibration_timer;
-/* TSC is invariant on C state entry? */
-static bool_t tsc_invariant;
-
/*
* We simulate a 32-bit platform timer from the 16-bit PIT ch2 counter.
* Otherwise overflow happens too quickly (~50ms) for us to guarantee that
return product;
}
+/* Compute the reciprocal of the given time_scale. */
+static inline struct time_scale scale_reciprocal(struct time_scale scale)
+{
+ struct time_scale reciprocal;
+ u32 dividend;
+
+ dividend = 0x80000000u;
+ reciprocal.shift = 1 - scale.shift;
+ while ( unlikely(dividend >= scale.mul_frac) )
+ {
+ dividend >>= 1;
+ reciprocal.shift++;
+ }
+
+ asm (
+ "divl %4"
+ : "=a" (reciprocal.mul_frac), "=d" (dividend)
+ : "0" (0), "1" (dividend), "r" (scale.mul_frac) );
+
+ return reciprocal;
+}
+
/*
* cpu_mask that denotes the CPUs that needs timer interrupt coming in as
* IPIs in place of local APIC timers
return count32;
}
-static void init_pit(struct platform_timesource *pts)
+static int init_pit(struct platform_timesource *pts)
{
- pts->name = "PIT";
- pts->frequency = CLOCK_TICK_RATE;
- pts->read_counter = read_pit_count;
- pts->counter_bits = 32;
using_pit = 1;
+ return 1;
}
+static struct platform_timesource plt_pit =
+{
+ .id = "pit",
+ .name = "PIT",
+ .frequency = CLOCK_TICK_RATE,
+ .read_counter = read_pit_count,
+ .counter_bits = 32,
+ .init = init_pit
+};
+
/************************************************************
* PLATFORM TIMER 2: HIGH PRECISION EVENT TIMER (HPET)
*/
if ( hpet_rate == 0 )
return 0;
- pts->name = "HPET";
pts->frequency = hpet_rate;
- pts->read_counter = read_hpet_count;
- pts->counter_bits = 32;
-
return 1;
}
+static void resume_hpet(struct platform_timesource *pts)
+{
+ u64 hpet_rate = hpet_setup();
+
+ BUG_ON(hpet_rate == 0);
+ pts->frequency = hpet_rate;
+}
+
+static struct platform_timesource plt_hpet =
+{
+ .id = "hpet",
+ .name = "HPET",
+ .read_counter = read_hpet_count,
+ .counter_bits = 32,
+ .init = init_hpet,
+ .resume = resume_hpet
+};
+
/************************************************************
* PLATFORM TIMER 3: IBM 'CYCLONE' TIMER
*/
printk(KERN_ERR "Cyclone: Could not find valid CBAR value.\n");
return 0;
}
-
+
/* Enable timer and map the counter register. */
*(map_cyclone_reg(base + CYCLONE_PMCC_OFFSET)) = 1;
*(map_cyclone_reg(base + CYCLONE_MPCS_OFFSET)) = 1;
cyclone_timer = map_cyclone_reg(base + CYCLONE_MPMC_OFFSET);
-
- pts->name = "IBM Cyclone";
- pts->frequency = CYCLONE_TIMER_FREQ;
- pts->read_counter = read_cyclone_count;
- pts->counter_bits = 32;
-
return 1;
}
+static struct platform_timesource plt_cyclone =
+{
+ .id = "cyclone",
+ .name = "IBM Cyclone",
+ .frequency = CYCLONE_TIMER_FREQ,
+ .read_counter = read_cyclone_count,
+ .counter_bits = 32,
+ .init = init_cyclone
+};
+
/************************************************************
* PLATFORM TIMER 4: ACPI PM TIMER
*/
if ( pmtmr_ioport == 0 )
return 0;
- pts->name = "ACPI PM Timer";
- pts->frequency = ACPI_PM_FREQUENCY;
- pts->read_counter = read_pmtimer_count;
- pts->counter_bits = 24;
-
return 1;
}
+static struct platform_timesource plt_pmtimer =
+{
+ .id = "acpi",
+ .name = "ACPI PM Timer",
+ .frequency = ACPI_PM_FREQUENCY,
+ .read_counter = read_pmtimer_count,
+ .counter_bits = 24,
+ .init = init_pmtimer
+};
+
+static struct time_scale pmt_scale;
+static struct time_scale pmt_scale_r;
+static __init int init_pmtmr_scale(void)
+{
+ set_time_scale(&pmt_scale, ACPI_PM_FREQUENCY);
+ pmt_scale_r = scale_reciprocal(pmt_scale);
+ return 0;
+}
+__initcall(init_pmtmr_scale);
+
+uint64_t acpi_pm_tick_to_ns(uint64_t ticks)
+{
+ return scale_delta(ticks, &pmt_scale);
+}
+
+uint64_t ns_to_acpi_pm_tick(uint64_t ns)
+{
+ return scale_delta(ns, &pmt_scale_r);
+}
+
/************************************************************
* GENERIC PLATFORM TIMER INFRASTRUCTURE
*/
{
u64 count;
s_time_t stamp;
+ unsigned long flags;
- spin_lock_irq(&platform_timer_lock);
+ spin_lock_irqsave(&platform_timer_lock, flags);
count = plt_stamp64 + ((plt_src.read_counter() - plt_stamp) & plt_mask);
stamp = __read_platform_stime(count);
stime_platform_stamp = stamp;
platform_timer_stamp = count;
- spin_unlock_irq(&platform_timer_lock);
+ spin_unlock_irqrestore(&platform_timer_lock, flags);
}
static void resume_platform_timer(void)
{
- /* No change in platform_stime across suspend/resume. */
- platform_timer_stamp = plt_stamp64;
+ /* Timer source can be reset when backing from S3 to S0 */
+ if ( plt_src.resume )
+ plt_src.resume(&plt_src);
+
+ plt_stamp64 = platform_timer_stamp;
plt_stamp = plt_src.read_counter();
}
static void init_platform_timer(void)
{
- struct platform_timesource *pts = &plt_src;
- int rc = -1;
+ static struct platform_timesource * const plt_timers[] = {
+ &plt_cyclone, &plt_hpet, &plt_pmtimer, &plt_pit
+ };
+
+ struct platform_timesource *pts = NULL;
+ int i, rc = -1;
if ( opt_clocksource[0] != '\0' )
{
- if ( !strcmp(opt_clocksource, "pit") )
- rc = (init_pit(pts), 1);
- else if ( !strcmp(opt_clocksource, "hpet") )
- rc = init_hpet(pts);
- else if ( !strcmp(opt_clocksource, "cyclone") )
- rc = init_cyclone(pts);
- else if ( !strcmp(opt_clocksource, "acpi") )
- rc = init_pmtimer(pts);
+ for ( i = 0; i < ARRAY_SIZE(plt_timers); i++ )
+ {
+ pts = plt_timers[i];
+ if ( !strcmp(opt_clocksource, pts->id) )
+ {
+ rc = pts->init(pts);
+ break;
+ }
+ }
if ( rc <= 0 )
printk("WARNING: %s clocksource '%s'.\n",
opt_clocksource);
}
- if ( (rc <= 0) &&
- !init_cyclone(pts) &&
- !init_hpet(pts) &&
- !init_pmtimer(pts) )
- init_pit(pts);
+ if ( rc <= 0 )
+ {
+ for ( i = 0; i < ARRAY_SIZE(plt_timers); i++ )
+ {
+ pts = plt_timers[i];
+ if ( (rc = pts->init(pts)) > 0 )
+ break;
+ }
+ }
+
+ BUG_ON(rc <= 0);
plt_mask = (u64)~0ull >> (64 - pts->counter_bits);
plt_overflow_period = scale_delta(
1ull << (pts->counter_bits-1), &plt_scale);
init_timer(&plt_overflow_timer, plt_overflow, NULL, 0);
+ plt_src = *pts;
plt_overflow(NULL);
platform_timer_stamp = plt_stamp64;
+ stime_platform_stamp = NOW();
printk("Platform timer is %s %s\n",
freq_string(pts->frequency), pts->name);
}
-void cstate_save_tsc(void)
+void cstate_restore_tsc(void)
{
struct cpu_time *t = &this_cpu(cpu_time);
+ struct time_scale sys_to_tsc = scale_reciprocal(t->tsc_scale);
+ s_time_t stime_delta;
+ u64 tsc_delta;
- if ( tsc_invariant )
+ if ( boot_cpu_has(X86_FEATURE_NOSTOP_TSC) )
return;
- t->cstate_plt_count_stamp = plt_src.read_counter();
- rdtscll(t->cstate_tsc_stamp);
-}
-
-void cstate_restore_tsc(void)
-{
- struct cpu_time *t = &this_cpu(cpu_time);
- u64 plt_count_delta, tsc_delta;
+ stime_delta = read_platform_stime() - t->stime_master_stamp;
+ if ( stime_delta < 0 )
+ stime_delta = 0;
- if ( tsc_invariant )
- return;
+ tsc_delta = scale_delta(stime_delta, &sys_to_tsc);
- plt_count_delta = (plt_src.read_counter() -
- t->cstate_plt_count_stamp) & plt_mask;
- tsc_delta = scale_delta(plt_count_delta, &plt_scale) * cpu_khz/1000000UL;
- wrmsrl(MSR_IA32_TSC, t->cstate_tsc_stamp + tsc_delta);
+ wrmsrl(MSR_IA32_TSC, t->local_tsc_stamp + tsc_delta);
}
/***************************************************************************
/* The overall calibration scale multiplier. */
u32 calibration_mul_frac;
+ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+ {
+ /* Atomically read cpu_calibration struct and write cpu_time struct. */
+ local_irq_disable();
+ t->local_tsc_stamp = c->local_tsc_stamp;
+ t->stime_local_stamp = c->stime_master_stamp;
+ t->stime_master_stamp = c->stime_master_stamp;
+ local_irq_enable();
+ update_vcpu_system_time(current);
+ goto out;
+ }
+
prev_tsc = t->local_tsc_stamp;
prev_local_stime = t->stime_local_stamp;
prev_master_stime = t->stime_master_stamp;
*/
struct calibration_rendezvous {
cpumask_t cpu_calibration_map;
- atomic_t nr_cpus;
+ atomic_t count_start;
+ atomic_t count_end;
s_time_t master_stime;
+ u64 master_tsc_stamp;
};
+#define NR_LOOPS 5
+
static void time_calibration_rendezvous(void *_r)
{
+ int i;
struct cpu_calibration *c = &this_cpu(cpu_calibration);
struct calibration_rendezvous *r = _r;
unsigned int total_cpus = cpus_weight(r->cpu_calibration_map);
- if ( smp_processor_id() == 0 )
- {
- while ( atomic_read(&r->nr_cpus) != (total_cpus - 1) )
- cpu_relax();
- r->master_stime = read_platform_stime();
- mb(); /* write r->master_stime /then/ signal */
- atomic_inc(&r->nr_cpus);
- }
- else
+ /*
+ * Loop is used here to get rid of the cache's side effect to enlarge
+ * the TSC difference among CPUs.
+ */
+ for ( i = 0; i < NR_LOOPS; i++ )
{
- atomic_inc(&r->nr_cpus);
- while ( atomic_read(&r->nr_cpus) != total_cpus )
- cpu_relax();
- mb(); /* receive signal /then/ read r->master_stime */
+ if ( smp_processor_id() == 0 )
+ {
+ while ( atomic_read(&r->count_start) != (total_cpus - 1) )
+ mb();
+
+ if ( r->master_stime == 0 )
+ {
+ r->master_stime = read_platform_stime();
+ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+ rdtscll(r->master_tsc_stamp);
+ }
+ atomic_set(&r->count_end, 0);
+ wmb();
+ atomic_inc(&r->count_start);
+
+ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
+ i == NR_LOOPS - 1 )
+ write_tsc((u32)r->master_tsc_stamp, (u32)(r->master_tsc_stamp >> 32));
+
+ while (atomic_read(&r->count_end) != total_cpus - 1)
+ mb();
+ atomic_set(&r->count_start, 0);
+ wmb();
+ atomic_inc(&r->count_end);
+ }
+ else
+ {
+ atomic_inc(&r->count_start);
+ while ( atomic_read(&r->count_start) != total_cpus )
+ mb();
+
+ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) &&
+ i == NR_LOOPS - 1 )
+ write_tsc((u32)r->master_tsc_stamp, (u32)(r->master_tsc_stamp >> 32));
+
+ atomic_inc(&r->count_end);
+ while (atomic_read(&r->count_end) != total_cpus)
+ mb();
+ }
}
rdtscll(c->local_tsc_stamp);
{
struct calibration_rendezvous r = {
.cpu_calibration_map = cpu_online_map,
- .nr_cpus = ATOMIC_INIT(0)
+ .count_start = ATOMIC_INIT(0),
+ .count_end = ATOMIC_INIT(0),
+ .master_stime = 0
};
/* @wait=1 because we must wait for all cpus before freeing @r. */
local_irq_save(flags);
rdtscll(t->local_tsc_stamp);
- now = !plt_src.read_counter ? 0 : read_platform_stime();
+ now = read_platform_stime();
local_irq_restore(flags);
t->stime_master_stamp = now;
/* Late init function (after all CPUs are booted). */
int __init init_xen_time(void)
{
- local_irq_disable();
-
- /* check if TSC is invariant during deep C state
- this is a new feature introduced by Nehalem*/
- if ( cpuid_edx(0x80000007) & (1u<<8) )
- tsc_invariant = 1;
+ /* If we have constant TSCs then scale factor can be shared. */
+ if ( boot_cpu_has(X86_FEATURE_CONSTANT_TSC) )
+ {
+ int cpu;
+ for_each_cpu ( cpu )
+ per_cpu(cpu_time, cpu).tsc_scale = per_cpu(cpu_time, 0).tsc_scale;
+ }
open_softirq(TIME_CALIBRATE_SOFTIRQ, local_time_calibration);
- init_percpu_time();
-
- stime_platform_stamp = 0;
- init_platform_timer();
+ /* System time (get_s_time()) starts ticking from now. */
+ rdtscll(this_cpu(cpu_time).local_tsc_stamp);
+ /* NB. get_cmos_time() can take over one second to execute. */
do_settime(get_cmos_time(), 0, NOW());
- local_irq_enable();
+ init_platform_timer();
+
+ init_percpu_time();
return 0;
}
setup_irq(0, &irq0);
}
-/* force_hpet_broadcast: if true, force using hpet_broadcast to fix lapic stop
- issue for deep C state with pit disabled */
-static int force_hpet_broadcast;
-boolean_param("hpetbroadcast", force_hpet_broadcast);
-
/* keep pit enabled for pit_broadcast working while cpuidle enabled */
static int disable_pit_irq(void)
{
- if ( using_pit || !cpu_has_apic || (xen_cpuidle && !force_hpet_broadcast) )
+ if ( using_pit || !cpu_has_apic )
return 0;
/*
* If we do not rely on PIT CH0 then we can use HPET for one-shot timer
* emulation when entering deep C states.
* XXX dom0 may rely on RTC interrupt delivery, so only enable
- * hpet_broadcast if force_hpet_broadcast.
+ * hpet_broadcast if FSB mode available or if force_hpet_broadcast.
*/
- if ( xen_cpuidle && force_hpet_broadcast )
+ if ( xen_cpuidle )
{
hpet_broadcast_init();
if ( !hpet_broadcast_is_available() )
cmos_utc_offset = -get_cmos_time();
cmos_utc_offset += (wc_sec + (wc_nsec + NOW()) / 1000000000ULL);
kill_timer(&calibration_timer);
+
+ /* Sync platform timer stamps. */
+ platform_time_calibration();
}
/* Better to cancel calibration timer for accuracy. */
{
/*u64 tmp = */init_pit_and_calibrate_tsc();
- disable_pit_irq();
-
/* Disable this while calibrate_tsc_ap() also is skipped. */
/*set_time_scale(&this_cpu(cpu_time).tsc_scale, tmp);*/
resume_platform_timer();
+ disable_pit_irq();
+
init_percpu_time();
do_settime(get_cmos_time() + cmos_utc_offset, 0, NOW());
- if ( !is_idle_vcpu(current) )
- update_vcpu_system_time(current);
+ update_vcpu_system_time(current);
return 0;
}
{
/* Modify Feature Information. */
__clear_bit(X86_FEATURE_VME, &d);
- __clear_bit(X86_FEATURE_PSE, &d);
+ if ( !cpu_has_apic )
+ __clear_bit(X86_FEATURE_APIC, &d);
+ if ( !opt_allow_hugepage )
+ __clear_bit(X86_FEATURE_PSE, &d);
__clear_bit(X86_FEATURE_PGE, &d);
- __clear_bit(X86_FEATURE_MCE, &d);
- __clear_bit(X86_FEATURE_MCA, &d);
__clear_bit(X86_FEATURE_PSE36, &d);
}
switch ( (uint32_t)regs->eax )
__clear_bit(X86_FEATURE_XTPR % 32, &c);
__clear_bit(X86_FEATURE_PDCM % 32, &c);
__clear_bit(X86_FEATURE_DCA % 32, &c);
+ __clear_bit(X86_FEATURE_XSAVE % 32, &c);
+ if ( !cpu_has_apic )
+ __clear_bit(X86_FEATURE_X2APIC % 32, &c);
+ __set_bit(X86_FEATURE_HYPERVISOR % 32, &c);
break;
case 0x80000001:
/* Modify Feature Information. */
__clear_bit(X86_FEATURE_RDTSCP % 32, &d);
__clear_bit(X86_FEATURE_SVME % 32, &c);
+ if ( !cpu_has_apic )
+ __clear_bit(X86_FEATURE_EXTAPICSPACE % 32, &c);
__clear_bit(X86_FEATURE_OSVW % 32, &c);
__clear_bit(X86_FEATURE_IBS % 32, &c);
__clear_bit(X86_FEATURE_SKINIT % 32, &c);
#endif
static int __spurious_page_fault(
- unsigned long addr, struct cpu_user_regs *regs)
+ unsigned long addr, unsigned int error_code)
{
unsigned long mfn, cr3 = read_cr3();
#if CONFIG_PAGING_LEVELS >= 4
return 0;
/* Reserved bit violations are never spurious faults. */
- if ( regs->error_code & PFEC_reserved_bit )
+ if ( error_code & PFEC_reserved_bit )
return 0;
required_flags = _PAGE_PRESENT;
- if ( regs->error_code & PFEC_write_access )
+ if ( error_code & PFEC_write_access )
required_flags |= _PAGE_RW;
- if ( regs->error_code & PFEC_user_mode )
+ if ( error_code & PFEC_user_mode )
required_flags |= _PAGE_USER;
disallowed_flags = 0;
- if ( regs->error_code & PFEC_insn_fetch )
+ if ( error_code & PFEC_insn_fetch )
disallowed_flags |= _PAGE_NX;
mfn = cr3 >> PAGE_SHIFT;
dprintk(XENLOG_WARNING, "Spurious fault in domain %u:%u "
"at addr %lx, e/c %04x\n",
current->domain->domain_id, current->vcpu_id,
- addr, regs->error_code);
+ addr, error_code);
#if CONFIG_PAGING_LEVELS >= 4
dprintk(XENLOG_WARNING, " l4e = %"PRIpte"\n", l4e_get_intpte(l4e));
#endif
#endif
dprintk(XENLOG_WARNING, " l2e = %"PRIpte"\n", l2e_get_intpte(l2e));
dprintk(XENLOG_WARNING, " l1e = %"PRIpte"\n", l1e_get_intpte(l1e));
-#ifndef NDEBUG
- show_registers(regs);
-#endif
return 1;
}
static int spurious_page_fault(
- unsigned long addr, struct cpu_user_regs *regs)
+ unsigned long addr, unsigned int error_code)
{
unsigned long flags;
int is_spurious;
* page tables from becoming invalid under our feet during the walk.
*/
local_irq_save(flags);
- is_spurious = __spurious_page_fault(addr, regs);
+ is_spurious = __spurious_page_fault(addr, error_code);
local_irq_restore(flags);
return is_spurious;
if ( in_irq() || !(regs->eflags & X86_EFLAGS_IF) )
return 0;
+ /* Faults from external-mode guests are handled by shadow/hap */
+ if ( paging_mode_external(d) && guest_mode(regs) )
+ {
+ int ret = paging_fault(addr, regs);
+ if ( ret == EXCRET_fault_fixed )
+ trace_trap_two_addr(TRC_PV_PAGING_FIXUP, regs->eip, addr);
+ return ret;
+ }
+
if ( unlikely(IN_HYPERVISOR_RANGE(addr)) )
{
- if ( paging_mode_external(d) && guest_mode(regs) )
- {
- int ret = paging_fault(addr, regs);
- if ( ret == EXCRET_fault_fixed )
- trace_trap_two_addr(TRC_PV_PAGING_FIXUP, regs->eip, addr);
- return ret;
- }
if ( !(regs->error_code & PFEC_reserved_bit) &&
(addr >= GDT_LDT_VIRT_START) && (addr < GDT_LDT_VIRT_END) )
return handle_gdt_ldt_mapping_fault(
ptwr_do_page_fault(v, addr, regs) )
return EXCRET_fault_fixed;
- if ( paging_mode_enabled(d) )
+ /* For non-external shadowed guests, we fix up both their own
+ * pagefaults and Xen's, since they share the pagetables. */
+ if ( paging_mode_enabled(d) && !paging_mode_external(d) )
{
int ret = paging_fault(addr, regs);
if ( ret == EXCRET_fault_fixed )
asmlinkage void do_page_fault(struct cpu_user_regs *regs)
{
unsigned long addr, fixup;
+ unsigned int error_code;
addr = read_cr2();
+ /* fixup_page_fault() might change regs->error_code, so cache it here. */
+ error_code = regs->error_code;
+
DEBUGGER_trap_entry(TRAP_page_fault, regs);
perfc_incr(page_faults);
if ( unlikely(!guest_mode(regs)) )
{
- if ( spurious_page_fault(addr, regs) )
+ if ( spurious_page_fault(addr, error_code) )
return;
if ( likely((fixup = search_exception_table(regs->eip)) != 0) )
panic("FATAL PAGE FAULT\n"
"[error_code=%04x]\n"
"Faulting linear address: %p\n",
- regs->error_code, _p(addr));
+ error_code, _p(addr));
}
if ( unlikely(current->domain->arch.suppress_spurious_page_faults
- && spurious_page_fault(addr, regs)) )
+ && spurious_page_fault(addr, error_code)) )
return;
propagate_page_fault(addr, regs->error_code);
# define read_sreg(regs, sr) read_segment_register(sr)
#endif
+static int is_cpufreq_controller(struct domain *d)
+{
+ return ((cpufreq_controller == FREQCTL_dom0_kernel) &&
+ (d->domain_id == 0));
+}
+
+/*Intel vMCE MSRs virtualization*/
+extern int intel_mce_wrmsr(u32 msr, u32 lo, u32 hi);
+extern int intel_mce_rdmsr(u32 msr, u32 *lo, u32 *hi);
+
static int emulate_privileged_op(struct cpu_user_regs *regs)
{
struct vcpu *v = current;
case 4: /* Read CR4 */
/*
* Guests can read CR4 to see what features Xen has enabled. We
- * therefore lie about PGE & PSE as they are unavailable to guests.
+ * therefore lie about PGE as it is unavailable to guests.
+ * Also disallow PSE if hugepages are not enabled.
*/
- *reg = read_cr4() & ~(X86_CR4_PGE|X86_CR4_PSE);
+ *reg = read_cr4() & ~X86_CR4_PGE;
+ if ( !opt_allow_hugepage )
+ *reg &= ~X86_CR4_PSE;
break;
default:
case MSR_K8_PSTATE7:
if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
goto fail;
- if ( cpufreq_controller != FREQCTL_dom0_kernel )
+ if ( !is_cpufreq_controller(v->domain) )
break;
if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
goto fail;
if ( wrmsr_safe(MSR_FAM10H_MMIO_CONF_BASE, eax, edx) != 0 )
goto fail;
break;
+ case MSR_IA32_MPERF:
+ case MSR_IA32_APERF:
case MSR_IA32_PERF_CTL:
if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
goto fail;
- if ( cpufreq_controller != FREQCTL_dom0_kernel )
+ if ( !is_cpufreq_controller(v->domain) )
break;
if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
goto fail;
case MSR_IA32_THERM_CONTROL:
if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL )
goto fail;
+ if ( (v->domain->domain_id != 0) || !v->domain->is_pinned )
+ break;
if ( wrmsr_safe(regs->ecx, eax, edx) != 0 )
goto fail;
break;
default:
if ( wrmsr_hypervisor_regs(regs->ecx, eax, edx) )
break;
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+ {
+ int rc = intel_mce_wrmsr(regs->ecx, eax, edx);
+ if ( rc == -1 )
+ goto fail;
+ if ( rc == 0 )
+ break;
+ }
+
if ( (rdmsr_safe(regs->ecx, l, h) != 0) ||
(eax != l) || (edx != h) )
invalid:
case MSR_K8_PSTATE7:
if ( boot_cpu_data.x86_vendor != X86_VENDOR_AMD )
goto fail;
- if ( cpufreq_controller != FREQCTL_dom0_kernel )
+ if ( !is_cpufreq_controller(v->domain) )
{
regs->eax = regs->edx = 0;
break;
MSR_IA32_MISC_ENABLE_XTPR_DISABLE;
break;
case MSR_EFER:
- case MSR_IA32_THERM_CONTROL:
case MSR_AMD_PATCHLEVEL:
default:
if ( rdmsr_hypervisor_regs(regs->ecx, &l, &h) )
_p(regs->ecx));*/
if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) )
goto fail;
+
+ if ( boot_cpu_data.x86_vendor == X86_VENDOR_INTEL )
+ {
+ int rc = intel_mce_rdmsr(regs->ecx, &eax, &edx);
+ if ( rc == -1 )
+ goto fail;
+ if ( rc == 0 )
+ break;
+ }
+
break;
}
break;
__set_intr_gate(n, 0, addr);
}
-void set_tss_desc(unsigned int n, void *addr)
+void load_TR(void)
{
+ struct tss_struct *tss = &init_tss[smp_processor_id()];
+ struct desc_ptr old_gdt, tss_gdt = {
+ .base = (long)(this_cpu(gdt_table) - FIRST_RESERVED_GDT_ENTRY),
+ .limit = LAST_RESERVED_GDT_BYTE
+ };
+
_set_tssldt_desc(
- per_cpu(gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
- (unsigned long)addr,
+ this_cpu(gdt_table) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
+ (unsigned long)tss,
offsetof(struct tss_struct, __cacheline_filler) - 1,
9);
#ifdef CONFIG_COMPAT
_set_tssldt_desc(
- per_cpu(compat_gdt_table, n) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
- (unsigned long)addr,
+ this_cpu(compat_gdt_table) + TSS_ENTRY - FIRST_RESERVED_GDT_ENTRY,
+ (unsigned long)tss,
offsetof(struct tss_struct, __cacheline_filler) - 1,
11);
#endif
+
+ /* Switch to non-compat GDT (which has B bit clear) to execute LTR. */
+ asm volatile (
+ "sgdt %1; lgdt %2; ltr %%ax; lgdt %1"
+ : : "a" (TSS_ENTRY << 3), "m" (old_gdt), "m" (tss_gdt) : "memory" );
}
void __devinit percpu_traps_init(void)
t->vector = TRAP_nmi;
t->flags = 0;
- t->cs = !IS_COMPAT(d) ? FLAT_KERNEL_CS : FLAT_COMPAT_KERNEL_CS;
+ t->cs = (is_pv_32on64_domain(d) ?
+ FLAT_COMPAT_KERNEL_CS : FLAT_KERNEL_CS);
t->address = address;
TI_SET_IF(t, 1);
BLANK();
OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched);
- OFFSET(VCPU_vmx_emul, struct vcpu, arch.hvm_vmx.vmxemul);
+ OFFSET(VCPU_vmx_realmode, struct vcpu, arch.hvm_vmx.vmx_realmode);
+ OFFSET(VCPU_vmx_emulate, struct vcpu, arch.hvm_vmx.vmx_emulate);
+ OFFSET(VCPU_vm86_seg_mask, struct vcpu, arch.hvm_vmx.vm86_segment_mask);
OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]);
BLANK();
void *map_domain_page(unsigned long mfn)
{
unsigned long va;
- unsigned int idx, i;
+ unsigned int idx, i, flags;
struct vcpu *v;
struct mapcache_domain *dcache;
struct mapcache_vcpu *vcache;
goto out;
}
- spin_lock(&dcache->lock);
+ spin_lock_irqsave(&dcache->lock, flags);
/* Has some other CPU caused a wrap? We must flush if so. */
if ( unlikely(dcache->epoch != vcache->shadow_epoch) )
set_bit(idx, dcache->inuse);
dcache->cursor = idx + 1;
- spin_unlock(&dcache->lock);
+ spin_unlock_irqrestore(&dcache->lock, flags);
l1e_write(&dcache->l1tab[idx], l1e_from_pfn(mfn, __PAGE_HYPERVISOR));
return (void *)va;
}
-void unmap_domain_page(void *va)
+void unmap_domain_page(const void *va)
{
unsigned int idx;
struct vcpu *v;
return (void *)va;
}
-void unmap_domain_page_global(void *va)
+void unmap_domain_page_global(const void *va)
{
unsigned long __va = (unsigned long)va;
l2_pgentry_t *pl2e;
.long do_sysctl /* 35 */
.long do_domctl
.long do_kexec_op
+ .long do_tmem_op
.rept __HYPERVISOR_arch_0-((.-hypercall_table)/4)
.long do_ni_hypercall
.endr
.byte 1 /* do_sysctl */ /* 35 */
.byte 1 /* do_domctl */
.byte 2 /* do_kexec_op */
+ .byte 1 /* do_tmem_op */
.rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
* - Magnus Damm <magnus@valinux.co.jp>
*/
-#ifndef CONFIG_COMPAT
-
#include <xen/types.h>
#include <xen/kernel.h>
#include <asm/page.h>
(unsigned long)range->start;
return 0;
}
-#endif
/*
* Local variables:
void *alloc_xen_pagetable(void)
{
extern int early_boot;
- extern unsigned long xenheap_phys_start;
+ extern unsigned long xenheap_initial_phys_start;
unsigned long mfn;
if ( !early_boot )
return v;
}
- mfn = xenheap_phys_start >> PAGE_SHIFT;
- xenheap_phys_start += PAGE_SIZE;
+ mfn = xenheap_initial_phys_start >> PAGE_SHIFT;
+ xenheap_initial_phys_start += PAGE_SIZE;
return mfn_to_virt(mfn);
}
__PAGE_HYPERVISOR));
}
-unsigned long clone_idle_pagetable(struct vcpu *v)
-{
- unsigned int i;
- struct domain *d = v->domain;
- l3_pgentry_t *l3_table = v->arch.pae_l3_cache.table[0];
- l2_pgentry_t *l2_table = alloc_xenheap_page();
-
- if ( !l2_table )
- return 0;
-
- memcpy(l3_table, idle_pg_table, L3_PAGETABLE_ENTRIES * sizeof(*l3_table));
- l3_table[l3_table_offset(PERDOMAIN_VIRT_START)] =
- l3e_from_page(virt_to_page(l2_table), _PAGE_PRESENT);
-
- copy_page(l2_table, idle_pg_table_l2 +
- l3_table_offset(PERDOMAIN_VIRT_START) * L2_PAGETABLE_ENTRIES);
- for ( i = 0; i < PDPT_L2_ENTRIES; ++i )
- l2_table[l2_table_offset(PERDOMAIN_VIRT_START) + i] =
- l2e_from_page(virt_to_page(d->arch.mm_perdomain_pt) + i,
- __PAGE_HYPERVISOR);
-
- return __pa(l3_table);
-}
-
void __init zap_low_mappings(l2_pgentry_t *dom0_l2)
{
int i;
unsigned long m2p_start_mfn;
unsigned int i, j;
- /*
- * We are rather picky about the layout of 'struct page_info'. The
- * count_info and domain fields must be adjacent, as we perform atomic
- * 64-bit operations on them. Also, just for sanity, we assert the size
- * of the structure here.
- */
- BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) !=
- (offsetof(struct page_info, count_info) + sizeof(u32)));
- BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0);
BUILD_BUG_ON(sizeof(struct page_info) != 24);
/* M2P table is mappable read-only by privileged domains. */
*(.fixup)
*(.gnu.warning)
} :text =0x9090
- .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
_etext = .; /* End of text section */
*(.exit.text)
*(.exit.data)
*(.exitcall.exit)
+ *(.eh_frame)
}
/* Stabs debugging sections. */
obj-$(CONFIG_COMPAT) += physdev.o
obj-$(CONFIG_COMPAT) += platform_hypercall.o
obj-$(CONFIG_COMPAT) += cpu_idle.o
-
-ifeq ($(CONFIG_COMPAT),y)
-# extra dependencies
-compat.o: ../compat.c
-domctl.o: ../domctl.c
-mm.o: compat/mm.c
-physdev.o: ../physdev.c
-platform_hypercall.o: ../platform_hypercall.c
-sysctl.o: ../sysctl.c
-traps.o: compat/traps.c
-cpu_idle.o: ../acpi/cpu_idle.c
-endif
+obj-$(CONFIG_COMPAT) += cpufreq.o
DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs));
BLANK();
+ OFFSET(irq_caps_offset, struct domain, irq_caps);
+ OFFSET(next_in_list_offset, struct domain, next_in_list);
OFFSET(VCPU_processor, struct vcpu, processor);
OFFSET(VCPU_domain, struct vcpu, domain);
OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
BLANK();
OFFSET(VCPU_vmx_launched, struct vcpu, arch.hvm_vmx.launched);
- OFFSET(VCPU_vmx_emul, struct vcpu, arch.hvm_vmx.vmxemul);
+ OFFSET(VCPU_vmx_realmode, struct vcpu, arch.hvm_vmx.vmx_realmode);
+ OFFSET(VCPU_vmx_emulate, struct vcpu, arch.hvm_vmx.vmx_emulate);
+ OFFSET(VCPU_vm86_seg_mask, struct vcpu, arch.hvm_vmx.vm86_segment_mask);
OFFSET(VCPU_hvm_guest_cr2, struct vcpu, arch.hvm_vcpu.guest_cr[2]);
BLANK();
call compat_create_bounce_frame
jmp compat_test_all_events
2: movl $TRAP_gp_fault,UREGS_entry_vector(%rsp)
+ subl $2,UREGS_rip(%rsp)
movq VCPU_gp_fault_addr(%rbx),%rax
movzwl VCPU_gp_fault_sel(%rbx),%esi
movb $(TBF_EXCEPTION|TBF_EXCEPTION_ERRCODE|TBF_INTERRUPT),%cl
.quad do_sysctl /* 35 */
.quad do_domctl
.quad compat_kexec_op
+ .quad do_tmem_op
.rept __HYPERVISOR_arch_0-((.-compat_hypercall_table)/8)
.quad compat_ni_hypercall
.endr
.byte 1 /* do_sysctl */ /* 35 */
.byte 1 /* do_domctl */
.byte 2 /* compat_kexec_op */
+ .byte 1 /* do_tmem_op */
.rept __HYPERVISOR_arch_0-(.-compat_hypercall_args_table)
.byte 0 /* compat_ni_hypercall */
.endr
break;
}
- case XENMEM_remove_from_physmap:
- {
- struct compat_remove_from_physmap cmp;
- struct xen_remove_from_physmap *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE;
-
- if ( copy_from_guest(&cmp, arg, 1) )
- return -EFAULT;
-
- XLAT_remove_from_physmap(nat, &cmp);
- rc = arch_memory_op(op, guest_handle_from_ptr(nat, void));
-
- break;
- }
-
case XENMEM_set_memory_map:
{
struct compat_foreign_memory_map cmp;
break;
}
+ case XENMEM_set_pod_target:
+ case XENMEM_get_pod_target:
+ {
+ struct compat_pod_target cmp;
+ struct xen_pod_target *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE;
+
+ if ( copy_from_guest(&cmp, arg, 1) )
+ return -EFAULT;
+
+ XLAT_pod_target(nat, &cmp);
+
+ rc = arch_memory_op(op, guest_handle_from_ptr(nat, void));
+ if ( rc < 0 )
+ break;
+
+ XLAT_pod_target(&cmp, nat);
+
+ if ( copy_to_guest(arg, &cmp, 1) )
+ rc = -EFAULT;
+
+ break;
+ }
+
case XENMEM_machphys_mapping:
{
struct domain *d = current->domain;
case MMUEXT_PIN_L4_TABLE:
case MMUEXT_UNPIN_TABLE:
case MMUEXT_NEW_BASEPTR:
+ case MMUEXT_CLEAR_PAGE:
+ case MMUEXT_COPY_PAGE:
arg1 = XLAT_mmuext_op_arg1_mfn;
break;
default:
case MMUEXT_INVLPG_MULTI:
arg2 = XLAT_mmuext_op_arg2_vcpumask;
break;
+ case MMUEXT_COPY_PAGE:
+ arg2 = XLAT_mmuext_op_arg2_src_mfn;
+ break;
default:
arg2 = -1;
break;
xlat_page_current = xlat_page_start; \
} while (0)
-static void *xlat_malloc(unsigned long *xlat_page_current, size_t size)
+void *xlat_malloc(unsigned long *xlat_page_current, size_t size)
{
void *ret;
--- /dev/null
+/******************************************************************************
+ * cpufreq.c -- adapt 32b compat guest to 64b hypervisor.
+ *
+ * Copyright (C) 2008, Liu Jinsong <jinsong.liu@intel.com>
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or (at
+ * your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ */
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/xmalloc.h>
+#include <xen/guest_access.h>
+#include <compat/platform.h>
+
+DEFINE_XEN_GUEST_HANDLE(compat_processor_px_t);
+
+#define xlat_page_start ((unsigned long)COMPAT_ARG_XLAT_VIRT_BASE)
+
+#define xlat_malloc_init(xlat_page_current) do { \
+ xlat_page_current = xlat_page_start; \
+} while (0)
+
+extern void *xlat_malloc(unsigned long *xlat_page_current, size_t size);
+
+#define xlat_malloc_array(_p, _t, _c) ((_t *) xlat_malloc(&_p, sizeof(_t) * _c))
+
+extern int
+set_px_pminfo(uint32_t cpu, struct xen_processor_performance *perf);
+
+int
+compat_set_px_pminfo(uint32_t cpu, struct compat_processor_performance *perf)
+{
+ struct xen_processor_performance *xen_perf;
+ unsigned long xlat_page_current;
+
+ xlat_malloc_init(xlat_page_current);
+
+ xen_perf = xlat_malloc_array(xlat_page_current,
+ struct xen_processor_performance, 1);
+ if ( unlikely(xen_perf == NULL) )
+ return -EFAULT;
+
+#define XLAT_processor_performance_HNDL_states(_d_, _s_) do { \
+ XEN_GUEST_HANDLE(compat_processor_px_t) states; \
+ if ( unlikely(!compat_handle_okay((_s_)->states, (_s_)->state_count)) ) \
+ return -EFAULT; \
+ guest_from_compat_handle(states, (_s_)->states); \
+ (_d_)->states = guest_handle_cast(states, xen_processor_px_t); \
+} while (0)
+
+ XLAT_processor_performance(xen_perf, perf);
+#undef XLAT_processor_performance_HNDL_states
+
+ return set_px_pminfo(cpu, xen_perf);
+}
.quad do_sysctl /* 35 */
.quad do_domctl
.quad do_kexec_op
+ .quad do_tmem_op
.rept __HYPERVISOR_arch_0-((.-hypercall_table)/8)
.quad do_ni_hypercall
.endr
.byte 1 /* do_sysctl */ /* 35 */
.byte 1 /* do_domctl */
.byte 2 /* do_kexec */
- .byte 1 /* do_xsm_op */
+ .byte 1 /* do_tmem_op */
.rept __HYPERVISOR_arch_0-(.-hypercall_args_table)
.byte 0 /* do_ni_hypercall */
.endr
* - Magnus Damm <magnus@valinux.co.jp>
*/
-#ifndef CONFIG_COMPAT
-
#include <xen/types.h>
+#include <xen/kernel.h>
#include <asm/page.h>
#include <public/kexec.h>
int machine_kexec_get_xen(xen_kexec_range_t *range)
{
- range->start = xenheap_phys_start;
- range->size = (unsigned long)xenheap_phys_end -
- (unsigned long)range->start;
+ range->start = virt_to_maddr(_start);
+ range->size = virt_to_maddr(_end) - (unsigned long)range->start;
return 0;
}
-#endif
/*
* Local variables:
#include <xen/lib.h>
#include <xen/init.h>
#include <xen/mm.h>
-#include <xen/numa.h>
#include <xen/sched.h>
#include <xen/guest_access.h>
#include <asm/current.h>
#include <asm/fixmap.h>
#include <asm/hypercall.h>
#include <asm/msr.h>
+#include <asm/numa.h>
#include <public/memory.h>
#ifdef CONFIG_COMPAT
void __init paging_init(void)
{
unsigned long i, mpt_size, va;
+ unsigned int memflags;
l3_pgentry_t *l3_ro_mpt;
l2_pgentry_t *l2_ro_mpt = NULL;
struct page_info *l1_pg, *l2_pg, *l3_pg;
mpt_size &= ~((1UL << L2_PAGETABLE_SHIFT) - 1UL);
for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
{
- if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
+ BUILD_BUG_ON(RO_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
+ va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
+ memflags = MEMF_node(phys_to_nid(i <<
+ (L2_PAGETABLE_SHIFT - 3 + PAGE_SHIFT)));
+
+ if ( cpu_has_page1gb &&
+ !((unsigned long)l2_ro_mpt & ~PAGE_MASK) &&
+ (mpt_size >> L3_PAGETABLE_SHIFT) > (i >> PAGETABLE_ORDER) &&
+ (l1_pg = alloc_domheap_pages(NULL, 2 * PAGETABLE_ORDER,
+ memflags)) != NULL )
+ {
+ map_pages_to_xen(
+ RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
+ page_to_mfn(l1_pg),
+ 1UL << (2 * PAGETABLE_ORDER),
+ PAGE_HYPERVISOR);
+ memset((void *)(RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT)),
+ 0x77, 1UL << L3_PAGETABLE_SHIFT);
+
+ ASSERT(!l2_table_offset(va));
+ /* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
+ l3e_write(&l3_ro_mpt[l3_table_offset(va)],
+ l3e_from_page(l1_pg,
+ /*_PAGE_GLOBAL|*/_PAGE_PSE|_PAGE_USER|_PAGE_PRESENT));
+ i += (1UL << PAGETABLE_ORDER) - 1;
+ continue;
+ }
+
+ if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
+ memflags)) == NULL )
goto nomem;
map_pages_to_xen(
RDWR_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
1UL << L2_PAGETABLE_SHIFT);
if ( !((unsigned long)l2_ro_mpt & ~PAGE_MASK) )
{
- if ( (l2_pg = alloc_domheap_page(NULL, 0)) == NULL )
+ if ( (l2_pg = alloc_domheap_page(NULL, memflags)) == NULL )
goto nomem;
- va = RO_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT);
l2_ro_mpt = page_to_virt(l2_pg);
clear_page(l2_ro_mpt);
l3e_write(&l3_ro_mpt[l3_table_offset(va)],
l3e_from_page(l2_pg, __PAGE_HYPERVISOR | _PAGE_USER));
- l2_ro_mpt += l2_table_offset(va);
+ ASSERT(!l2_table_offset(va));
}
/* NB. Cannot be GLOBAL as shadow_mode_translate reuses this area. */
l2e_write(l2_ro_mpt, l2e_from_page(
m2p_compat_vstart = MACH2PHYS_COMPAT_VIRT_END - mpt_size;
for ( i = 0; i < (mpt_size >> L2_PAGETABLE_SHIFT); i++ )
{
- if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER, 0)) == NULL )
+ memflags = MEMF_node(phys_to_nid(i <<
+ (L2_PAGETABLE_SHIFT - 2 + PAGE_SHIFT)));
+ if ( (l1_pg = alloc_domheap_pages(NULL, PAGETABLE_ORDER,
+ memflags)) == NULL )
goto nomem;
map_pages_to_xen(
RDWR_COMPAT_MPT_VIRT_START + (i << L2_PAGETABLE_SHIFT),
__PAGE_HYPERVISOR));
}
-unsigned long clone_idle_pagetable(struct vcpu *v)
-{
- struct domain *d = v->domain;
- struct page_info *page = alloc_domheap_page(NULL,
- MEMF_node(vcpu_to_node(v)));
- l4_pgentry_t *l4_table = page_to_virt(page);
-
- if ( !page )
- return 0;
-
- copy_page(l4_table, idle_pg_table);
- l4_table[l4_table_offset(PERDOMAIN_VIRT_START)] =
- l4e_from_page(virt_to_page(d->arch.mm_perdomain_l3),
- __PAGE_HYPERVISOR);
-
- return __pa(l4_table);
-}
-
void __init zap_low_mappings(void)
{
BUG_ON(num_online_cpus() != 1);
void __init subarch_init_memory(void)
{
- unsigned long i, v, m2p_start_mfn;
+ unsigned long i, n, v, m2p_start_mfn;
l3_pgentry_t l3e;
l2_pgentry_t l2e;
- /*
- * We are rather picky about the layout of 'struct page_info'. The
- * count_info and domain fields must be adjacent, as we perform atomic
- * 64-bit operations on them.
- */
- BUILD_BUG_ON(offsetof(struct page_info, u.inuse._domain) !=
- (offsetof(struct page_info, count_info) + sizeof(u32)));
- BUILD_BUG_ON((offsetof(struct page_info, count_info) & 7) != 0);
- BUILD_BUG_ON(sizeof(struct page_info) !=
- (32 + BITS_TO_LONGS(NR_CPUS)*sizeof(long)));
-
+ BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
+ BUILD_BUG_ON(RDWR_MPT_VIRT_END & ((1UL << L3_PAGETABLE_SHIFT) - 1));
/* M2P table is mappable read-only by privileged domains. */
for ( v = RDWR_MPT_VIRT_START;
v != RDWR_MPT_VIRT_END;
- v += 1 << L2_PAGETABLE_SHIFT )
+ v += n << PAGE_SHIFT )
{
+ n = L2_PAGETABLE_ENTRIES * L1_PAGETABLE_ENTRIES;
l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
l3_table_offset(v)];
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
continue;
- l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
- if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
- continue;
- m2p_start_mfn = l2e_get_pfn(l2e);
+ if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
+ {
+ n = L1_PAGETABLE_ENTRIES;
+ l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ continue;
+ m2p_start_mfn = l2e_get_pfn(l2e);
+ }
+ else
+ {
+ m2p_start_mfn = l3e_get_pfn(l3e);
+ }
- for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
+ for ( i = 0; i < n; i++ )
{
struct page_info *page = mfn_to_page(m2p_start_mfn + i);
share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
if ( copy_from_guest(&xmml, arg, 1) )
return -EFAULT;
+ BUILD_BUG_ON(RDWR_MPT_VIRT_START & ((1UL << L3_PAGETABLE_SHIFT) - 1));
+ BUILD_BUG_ON(RDWR_MPT_VIRT_END & ((1UL << L3_PAGETABLE_SHIFT) - 1));
for ( i = 0, v = RDWR_MPT_VIRT_START;
(i != xmml.max_extents) && (v != RDWR_MPT_VIRT_END);
- i++, v += 1 << 21 )
+ i++, v += 1UL << L2_PAGETABLE_SHIFT )
{
l3e = l4e_to_l3e(idle_pg_table[l4_table_offset(v)])[
l3_table_offset(v)];
if ( !(l3e_get_flags(l3e) & _PAGE_PRESENT) )
break;
- l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
- if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
- break;
- mfn = l2e_get_pfn(l2e) + l1_table_offset(v);
+ if ( !(l3e_get_flags(l3e) & _PAGE_PSE) )
+ {
+ l2e = l3e_to_l2e(l3e)[l2_table_offset(v)];
+ if ( !(l2e_get_flags(l2e) & _PAGE_PRESENT) )
+ break;
+ mfn = l2e_get_pfn(l2e);
+ }
+ else
+ {
+ mfn = l3e_get_pfn(l3e)
+ + (l2_table_offset(v) << PAGETABLE_ORDER);
+ }
+ ASSERT(!l1_table_offset(v));
if ( copy_to_guest_offset(xmml.extent_start, i, &mfn, 1) )
return -EFAULT;
}
#define physdev_eoi compat_physdev_eoi
#define physdev_eoi_t physdev_eoi_compat_t
+#define physdev_pirq_eoi_gmfn compat_physdev_pirq_eoi_gmfn
+#define physdev_pirq_eoi_gmfn_t physdev_pirq_eoi_gmfn_compat_t
+
#define physdev_set_iobitmap compat_physdev_set_iobitmap
#define physdev_set_iobitmap_t physdev_set_iobitmap_compat_t
#define xen_platform_op_t compat_platform_op_t
#define do_platform_op(x) compat_platform_op(_##x)
-#define xenpf_copy_px_states compat_xenpf_copy_px_states
-
#define xen_processor_px compat_processor_px
#define xen_processor_px_t compat_processor_px_t
#define xen_processor_performance compat_processor_performance
#define xen_processor_performance_t compat_processor_performance_t
#define xenpf_set_processor_pminfo compat_pf_set_processor_pminfo
+#define set_px_pminfo compat_set_px_pminfo
+
#define xen_processor_power compat_processor_power
#define xen_processor_power_t compat_processor_power_t
#define set_cx_pminfo compat_set_cx_pminfo
#include <xen/nmi.h>
#include <asm/current.h>
#include <asm/flushtlb.h>
+#include <asm/traps.h>
+#include <asm/event.h>
#include <asm/msr.h>
#include <asm/page.h>
#include <asm/shared.h>
struct cpu_user_regs *regs = guest_cpu_user_regs();
struct iret_context iret_saved;
struct vcpu *v = current;
+ struct domain *d = v->domain;
+ struct bank_entry *entry;
+ int cpu = smp_processor_id();
if ( unlikely(copy_from_user(&iret_saved, (void *)regs->rsp,
sizeof(iret_saved))) )
&& !cpus_equal(v->cpu_affinity_tmp, v->cpu_affinity))
vcpu_set_affinity(v, &v->cpu_affinity_tmp);
+ /*Currently, only inject vMCE to DOM0.*/
+ if (v->trap_priority >= VCPU_TRAP_NMI) {
+ printk(KERN_DEBUG "MCE: Return from vMCE# trap!");
+ if (d->domain_id == 0 && v->vcpu_id == 0) {
+ if ( !d->arch.vmca_msrs.nr_injection ) {
+ printk(KERN_WARNING "MCE: Ret from vMCE#, nr_injection is 0\n");
+ goto end;
+ }
+
+ d->arch.vmca_msrs.nr_injection--;
+ if (!list_empty(&d->arch.vmca_msrs.impact_header)) {
+ entry = list_entry(d->arch.vmca_msrs.impact_header.next,
+ struct bank_entry, list);
+ printk(KERN_DEBUG "MCE: Delete last injection Node\n");
+ list_del(&entry->list);
+ }
+ else
+ printk(KERN_DEBUG "MCE: Not found last injection "
+ "Node, something Wrong!\n");
+
+ /* futher injection*/
+ if ( d->arch.vmca_msrs.nr_injection > 0) {
+ if ( d->arch.vmca_msrs.nr_injection > 0 &&
+ guest_has_trap_callback(d, v->vcpu_id,
+ TRAP_machine_check) &&
+ !test_and_set_bool(dom0->vcpu[0]->mce_pending)) {
+ cpumask_t affinity;
+
+ dom0->vcpu[0]->cpu_affinity_tmp =
+ dom0->vcpu[0]->cpu_affinity;
+ cpus_clear(affinity);
+ cpu_set(cpu, affinity);
+ printk(KERN_DEBUG "MCE: CPU%d set affinity, old %d\n", cpu,
+ dom0->vcpu[0]->processor);
+ vcpu_set_affinity(dom0->vcpu[0], &affinity);
+ vcpu_kick(dom0->vcpu[0]);
+ }
+ }
+ }
+ } /* end of outer-if */
+
+end:
/* Restore previous trap priority */
v->trap_priority = v->old_trap_priority;
*(.fixup)
*(.gnu.warning)
} :text = 0x9090
- .text.lock : { *(.text.lock) } :text /* out-of-line lock text */
_etext = .; /* End of text section */
*(.exit.text)
*(.exit.data)
*(.exitcall.exit)
+ *(.eh_frame)
}
/* Stabs debugging sections. */
#define DstImplicit (0<<1) /* Destination operand is implicit in the opcode. */
#define DstBitBase (1<<1) /* Memory operand, bit string. */
#define DstReg (2<<1) /* Register operand. */
+#define DstEax DstReg /* Register EAX (aka DstReg with no ModRM) */
#define DstMem (3<<1) /* Memory operand. */
#define DstMask (3<<1)
/* Source operand type. */
/* 0x00 - 0x07 */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps, ImplicitOps,
/* 0x08 - 0x0F */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, 0,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps, 0,
/* 0x10 - 0x17 */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps, ImplicitOps,
/* 0x18 - 0x1F */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, ImplicitOps, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, ImplicitOps, ImplicitOps,
/* 0x20 - 0x27 */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
/* 0x28 - 0x2F */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
/* 0x30 - 0x37 */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
/* 0x38 - 0x3F */
ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
ByteOp|DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
- ByteOp|DstReg|SrcImm, DstReg|SrcImm, 0, ImplicitOps,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm, 0, ImplicitOps,
/* 0x40 - 0x4F */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
ByteOp|ImplicitOps, ImplicitOps,
/* 0xA8 - 0xAF */
- ByteOp|DstReg|SrcImm, DstReg|SrcImm,
+ ByteOp|DstEax|SrcImm, DstEax|SrcImm,
ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
ByteOp|ImplicitOps|Mov, ImplicitOps|Mov,
ByteOp|ImplicitOps, ImplicitOps,
DstReg|SrcMem|ModRM, DstReg|SrcMem|ModRM,
ByteOp|DstReg|SrcMem|ModRM|Mov, DstReg|SrcMem16|ModRM|Mov,
/* 0xC0 - 0xC7 */
- ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM, 0, 0,
+ ByteOp|DstMem|SrcReg|ModRM, DstMem|SrcReg|ModRM,
+ 0, DstMem|SrcReg|ModRM|Mov,
0, 0, 0, ImplicitOps|ModRM,
/* 0xC8 - 0xCF */
ImplicitOps, ImplicitOps, ImplicitOps, ImplicitOps,
})
/* Clip maximum repetitions so that the index register only just wraps. */
-#define truncate_ea_and_reps(ea, reps, bytes_per_rep) ({ \
- unsigned long __todo = (ctxt->regs->eflags & EF_DF) ? (ea) : ~(ea); \
- __todo = truncate_word(__todo, ad_bytes); \
- __todo = (__todo / (bytes_per_rep)) + 1; \
- (reps) = (__todo < (reps)) ? __todo : (reps); \
- truncate_word((ea), ad_bytes); \
+#define truncate_ea_and_reps(ea, reps, bytes_per_rep) ({ \
+ unsigned long __todo = (ctxt->regs->eflags & EFLG_DF) ? (ea) : ~(ea); \
+ __todo = truncate_word(__todo, ad_bytes); \
+ __todo = (__todo / (bytes_per_rep)) + 1; \
+ (reps) = (__todo < (reps)) ? __todo : (reps); \
+ truncate_word((ea), ad_bytes); \
})
/* Compatibility function: read guest memory, zero-extend result to a ulong. */
switch ( b )
{
- case 0x04 ... 0x05: /* add imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x00 ... 0x03: add: /* add */
+ case 0x00 ... 0x05: add: /* add */
emulate_2op_SrcV("add", src, dst, _regs.eflags);
break;
- case 0x0c ... 0x0d: /* or imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x08 ... 0x0b: or: /* or */
+ case 0x08 ... 0x0d: or: /* or */
emulate_2op_SrcV("or", src, dst, _regs.eflags);
break;
- case 0x14 ... 0x15: /* adc imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x10 ... 0x13: adc: /* adc */
+ case 0x10 ... 0x15: adc: /* adc */
emulate_2op_SrcV("adc", src, dst, _regs.eflags);
break;
- case 0x1c ... 0x1d: /* sbb imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x18 ... 0x1b: sbb: /* sbb */
+ case 0x18 ... 0x1d: sbb: /* sbb */
emulate_2op_SrcV("sbb", src, dst, _regs.eflags);
break;
- case 0x24 ... 0x25: /* and imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x20 ... 0x23: and: /* and */
+ case 0x20 ... 0x25: and: /* and */
emulate_2op_SrcV("and", src, dst, _regs.eflags);
break;
- case 0x2c ... 0x2d: /* sub imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x28 ... 0x2b: sub: /* sub */
+ case 0x28 ... 0x2d: sub: /* sub */
emulate_2op_SrcV("sub", src, dst, _regs.eflags);
break;
- case 0x34 ... 0x35: /* xor imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x30 ... 0x33: xor: /* xor */
+ case 0x30 ... 0x35: xor: /* xor */
emulate_2op_SrcV("xor", src, dst, _regs.eflags);
break;
- case 0x3c ... 0x3d: /* cmp imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
- case 0x38 ... 0x3b: cmp: /* cmp */
+ case 0x38 ... 0x3d: cmp: /* cmp */
emulate_2op_SrcV("cmp", src, dst, _regs.eflags);
dst.type = OP_NONE;
break;
break;
case 0xa8 ... 0xa9: /* test imm,%%eax */
- dst.reg = (unsigned long *)&_regs.eax;
- dst.val = _regs.eax;
case 0x84 ... 0x85: test: /* test */
emulate_2op_SrcV("test", src, dst, _regs.eflags);
dst.type = OP_NONE;
}
goto add;
+ case 0xc3: /* movnti */
+ /* Ignore the non-temporal hint for now. */
+ generate_exception_if(dst.bytes <= 2, EXC_UD, -1);
+ dst.val = src.val;
+ break;
+
case 0xc7: /* Grp9 (cmpxchg8b/cmpxchg16b) */ {
unsigned long old[2], exp[2], new[2];
unsigned int i;
uint16_t l: 1; /* 9; Bit 53 */
uint16_t db: 1; /* 10; Bit 54 */
uint16_t g: 1; /* 11; Bit 55 */
+ uint16_t pad: 4;
} fields;
} __attribute__ ((packed)) segment_attributes_t;
obj-y += schedule.o
obj-y += shutdown.o
obj-y += softirq.o
+obj-y += spinlock.o
obj-y += stop_machine.o
obj-y += string.o
obj-y += symbols.o
subdir-$(ia64) += hvm
subdir-y += libelf
-
-# Object file contains changeset and compiler information.
-version.o: $(BASEDIR)/include/xen/compile.h
-
-ifeq ($(CONFIG_COMPAT),y)
-# extra dependencies
-grant_table.o: compat/grant_table.c
-schedule.o: compat/schedule.c
-xenoprof.o: compat/xenoprof.c
-endif
obj-y += memory.o
obj-y += multicall.o
obj-y += xlat.o
-
-# extra dependencies
-kernel.o: ../kernel.c
-multicall.o: ../multicall.c
XEN_GUEST_HANDLE(void) hnd;
struct xen_memory_reservation *rsrv;
struct xen_memory_exchange *xchg;
- struct xen_translate_gpfn_list *xlat;
} nat;
union {
struct compat_memory_reservation rsrv;
struct compat_memory_exchange xchg;
- struct compat_translate_gpfn_list xlat;
} cmp;
set_xen_guest_handle(nat.hnd, (void *)COMPAT_ARG_XLAT_VIRT_BASE);
nat.hnd = compat;
break;
- case XENMEM_translate_gpfn_list:
- if ( copy_from_guest(&cmp.xlat, compat, 1) )
- return -EFAULT;
-
- /* Is size too large for us to encode a continuation? */
- if ( cmp.xlat.nr_gpfns > (UINT_MAX >> MEMOP_EXTENT_SHIFT) )
- return -EINVAL;
-
- if ( !compat_handle_okay(cmp.xlat.gpfn_list, cmp.xlat.nr_gpfns) ||
- !compat_handle_okay(cmp.xlat.mfn_list, cmp.xlat.nr_gpfns) )
- return -EFAULT;
-
- end_extent = start_extent + (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.xlat)) /
- sizeof(*space);
- if ( end_extent > cmp.xlat.nr_gpfns )
- end_extent = cmp.xlat.nr_gpfns;
-
- space = (xen_pfn_t *)(nat.xlat + 1);
- /* Code below depends upon .gpfn_list preceding .mfn_list. */
- BUILD_BUG_ON(offsetof(xen_translate_gpfn_list_t, gpfn_list) > offsetof(xen_translate_gpfn_list_t, mfn_list));
-#define XLAT_translate_gpfn_list_HNDL_gpfn_list(_d_, _s_) \
- do \
- { \
- set_xen_guest_handle((_d_)->gpfn_list, space - start_extent); \
- for ( i = start_extent; i < end_extent; ++i ) \
- { \
- compat_pfn_t pfn; \
- if ( __copy_from_compat_offset(&pfn, (_s_)->gpfn_list, i, 1) ) \
- return -EFAULT; \
- *space++ = pfn; \
- } \
- } while (0)
-#define XLAT_translate_gpfn_list_HNDL_mfn_list(_d_, _s_) \
- (_d_)->mfn_list = (_d_)->gpfn_list
- XLAT_translate_gpfn_list(nat.xlat, &cmp.xlat);
-#undef XLAT_translate_gpfn_list_HNDL_mfn_list
-#undef XLAT_translate_gpfn_list_HNDL_gpfn_list
-
- if ( end_extent < cmp.xlat.nr_gpfns )
- {
- nat.xlat->nr_gpfns = end_extent;
- ++split;
- }
-
- break;
-
default:
return compat_arch_memory_op(cmd, compat);
}
case XENMEM_maximum_gpfn:
break;
- case XENMEM_translate_gpfn_list:
- if ( split < 0 )
- end_extent = cmd >> MEMOP_EXTENT_SHIFT;
- else
- BUG_ON(rc);
-
- for ( ; start_extent < end_extent; ++start_extent )
- {
- compat_pfn_t pfn = nat.xlat->mfn_list.p[start_extent];
-
- BUG_ON(pfn != nat.xlat->mfn_list.p[start_extent]);
- if ( __copy_to_compat_offset(cmp.xlat.mfn_list, start_extent, &pfn, 1) )
- {
- if ( split < 0 )
- /* Cannot cancel the continuation... */
- domain_crash(current->domain);
- return -EFAULT;
- }
- }
- break;
-
default:
domain_crash(current->domain);
split = 0;
#include <xen/percpu.h>
#include <xen/multicall.h>
#include <xen/rcupdate.h>
+#include <acpi/cpufreq/cpufreq.h>
#include <asm/debugger.h>
#include <public/sched.h>
#include <public/vcpu.h>
#include <xsm/xsm.h>
+#include <xen/trace.h>
/* Linux config option: propageted to domain0 */
/* xen_processor_pmbits: xen control Cx, Px, ... */
-unsigned int xen_processor_pmbits = 0;
+unsigned int xen_processor_pmbits = XEN_PROCESSOR_PM_PX;
/* opt_dom0_vcpus_pin: If true, dom0 VCPUs are pinned. */
static unsigned int opt_dom0_vcpus_pin;
boolean_param("dom0_vcpus_pin", opt_dom0_vcpus_pin);
-enum cpufreq_controller cpufreq_controller;
+/* set xen as default cpufreq */
+enum cpufreq_controller cpufreq_controller = FREQCTL_xen;
+
static void __init setup_cpufreq_option(char *str)
{
+ char *arg;
+
if ( !strcmp(str, "dom0-kernel") )
{
xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
cpufreq_controller = FREQCTL_dom0_kernel;
opt_dom0_vcpus_pin = 1;
+ return;
}
- else if ( !strcmp(str, "xen") )
+
+ if ( !strcmp(str, "none") )
{
- xen_processor_pmbits |= XEN_PROCESSOR_PM_PX;
- cpufreq_controller = FREQCTL_xen;
+ xen_processor_pmbits &= ~XEN_PROCESSOR_PM_PX;
+ cpufreq_controller = FREQCTL_none;
+ return;
}
+
+ if ( (arg = strpbrk(str, ",:")) != NULL )
+ *arg++ = '\0';
+
+ if ( !strcmp(str, "xen") )
+ if ( arg && *arg )
+ cpufreq_cmdline_parse(arg);
}
custom_param("cpufreq", setup_cpufreq_option);
return current->domain->domain_id;
}
-static struct domain *alloc_domain_struct(void)
-{
- return xmalloc(struct domain);
-}
-
-static void free_domain_struct(struct domain *d)
-{
- xfree(d);
-}
-
static void __domain_finalise_shutdown(struct domain *d)
{
struct vcpu *v;
v->domain = d;
v->vcpu_id = vcpu_id;
- v->runstate.state = is_idle_vcpu(v) ? RUNSTATE_running : RUNSTATE_offline;
- v->runstate.state_entry_time = NOW();
-
spin_lock_init(&v->virq_lock);
- if ( !is_idle_domain(d) )
+ if ( is_idle_domain(d) )
+ {
+ v->runstate.state = RUNSTATE_running;
+ }
+ else
{
+ v->runstate.state = RUNSTATE_offline;
+ v->runstate.state_entry_time = NOW();
set_bit(_VPF_down, &v->pause_flags);
v->vcpu_info = (void *)&shared_info(d, vcpu_info[vcpu_id]);
}
spin_lock_init(&d->page_alloc_lock);
spin_lock_init(&d->shutdown_lock);
spin_lock_init(&d->hypercall_deadlock_mutex);
- INIT_LIST_HEAD(&d->page_list);
- INIT_LIST_HEAD(&d->xenpage_list);
+ INIT_PAGE_LIST_HEAD(&d->page_list);
+ INIT_PAGE_LIST_HEAD(&d->xenpage_list);
if ( domcr_flags & DOMCRF_hvm )
d->is_hvm = 1;
for_each_vcpu ( d, v )
{
- if ( v->defer_shutdown )
+ if ( reason == SHUTDOWN_crash )
+ v->defer_shutdown = 0;
+ else if ( v->defer_shutdown )
continue;
vcpu_pause_nosync(v);
v->paused_for_shutdown = 1;
sched_destroy_vcpu(v);
}
- rangeset_domain_destroy(d);
-
grant_table_destroy(d);
arch_domain_destroy(d);
+ rangeset_domain_destroy(d);
+
sched_destroy_domain(d);
+ /* Free page used by xen oprofile buffer. */
+ free_xenoprof_pages(d);
+
for ( i = MAX_VIRT_CPUS-1; i >= 0; i-- )
if ( (v = d->vcpu[i]) != NULL )
free_vcpu_struct(v);
return;
/* Delete from task list and task hashtable. */
+ TRACE_1D(TRC_SCHED_DOM_REM, d->domain_id);
spin_lock(&domlist_update_lock);
pd = &domain_list;
while ( *pd != d )
if ( (c.nat = xmalloc(struct vcpu_guest_context)) == NULL )
goto svc_out;
- if ( !IS_COMPAT(v->domain) )
- ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
#ifdef CONFIG_COMPAT
+ if ( !is_pv_32on64_vcpu(v) )
+ ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
else
ret = copy_from_guest(c.cmp,
guest_handle_cast(op->u.vcpucontext.ctxt,
void), 1);
+#else
+ ret = copy_from_guest(c.nat, op->u.vcpucontext.ctxt, 1);
#endif
ret = ret ? -EFAULT : 0;
ret = -EINVAL;
if ( supervisor_mode_kernel ||
(op->u.createdomain.flags &
- ~(XEN_DOMCTL_CDF_hvm_guest | XEN_DOMCTL_CDF_hap)) )
+ ~(XEN_DOMCTL_CDF_hvm_guest | XEN_DOMCTL_CDF_hap |
+ XEN_DOMCTL_CDF_s3_integrity)) )
break;
dom = op->domain;
domcr_flags |= DOMCRF_hvm;
if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_hap )
domcr_flags |= DOMCRF_hap;
+ if ( op->u.createdomain.flags & XEN_DOMCTL_CDF_s3_integrity )
+ domcr_flags |= DOMCRF_s3_integrity;
ret = -ENOMEM;
d = domain_create(dom, domcr_flags, op->u.createdomain.ssidref);
cpu = (i == 0) ?
default_vcpu0_location() :
- (d->vcpu[i-1]->processor + 1) % num_online_cpus();
+ cycle_cpu(d->vcpu[i-1]->processor, cpu_online_map);
if ( alloc_vcpu(d, i, cpu) == NULL )
goto maxvcpu_out;
if ( v != current )
vcpu_unpause(v);
- if ( !IS_COMPAT(v->domain) )
- ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
#ifdef CONFIG_COMPAT
+ if ( !is_pv_32on64_vcpu(v) )
+ ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
else
ret = copy_to_guest(guest_handle_cast(op->u.vcpucontext.ctxt,
void), c.cmp, 1);
+#else
+ ret = copy_to_guest(op->u.vcpucontext.ctxt, c.nat, 1);
#endif
if ( copy_to_guest(u_domctl, op, 1) || ret )
if ( v->virq_to_evtchn[chn1->u.virq] != port1 )
continue;
v->virq_to_evtchn[chn1->u.virq] = 0;
- spin_barrier(&v->virq_lock);
+ spin_barrier_irq(&v->virq_lock);
}
break;
return 1;
if ( !test_bit (port, &shared_info(d, evtchn_mask)) &&
- !test_and_set_bit(port / BITS_PER_GUEST_LONG(d),
+ !test_and_set_bit(port / BITS_PER_EVTCHN_WORD(d),
&vcpu_info(v, evtchn_pending_sel)) )
{
vcpu_mark_events_pending(v);
}
-static long evtchn_unmask(evtchn_unmask_t *unmask)
+int evtchn_unmask(unsigned int port)
{
struct domain *d = current->domain;
- int port = unmask->port;
struct vcpu *v;
spin_lock(&d->event_lock);
*/
if ( test_and_clear_bit(port, &shared_info(d, evtchn_mask)) &&
test_bit (port, &shared_info(d, evtchn_pending)) &&
- !test_and_set_bit (port / BITS_PER_GUEST_LONG(d),
+ !test_and_set_bit (port / BITS_PER_EVTCHN_WORD(d),
&vcpu_info(v, evtchn_pending_sel)) )
{
vcpu_mark_events_pending(v);
struct evtchn_unmask unmask;
if ( copy_from_guest(&unmask, arg, 1) != 0 )
return -EFAULT;
- rc = evtchn_unmask(&unmask);
+ rc = evtchn_unmask(unmask.port);
break;
}
__gnttab_map_grant_ref(
struct gnttab_map_grant_ref *op)
{
- struct domain *ld, *rd;
+ struct domain *ld, *rd, *owner;
struct vcpu *led;
int handle;
unsigned long frame = 0, nr_gets = 0;
if ( !act->pin )
{
act->domid = scombo.shorts.domid;
+ act->gfn = sha->frame;
act->frame = gmfn_to_mfn(rd, sha->frame);
}
}
spin_unlock(&rd->grant_table->lock);
- if ( is_iomem_page(frame) )
+ if ( !mfn_valid(frame) ||
+ (owner = page_get_owner_and_reference(mfn_to_page(frame))) == dom_io )
{
+ /* Only needed the reference to confirm dom_io ownership. */
+ if ( mfn_valid(frame) )
+ put_page(mfn_to_page(frame));
+
if ( !iomem_access_permitted(rd, frame, frame) )
{
gdprintk(XENLOG_WARNING,
if ( rc != GNTST_okay )
goto undo_out;
}
- else
+ else if ( owner == rd )
{
- if ( unlikely(!mfn_valid(frame)) ||
- unlikely(!(gnttab_host_mapping_get_page_type(op, ld, rd) ?
- get_page_and_type(mfn_to_page(frame), rd,
- PGT_writable_page) :
- get_page(mfn_to_page(frame), rd))) )
- {
- if ( !rd->is_dying )
- gdprintk(XENLOG_WARNING, "Could not pin grant frame %lx\n",
- frame);
- rc = GNTST_general_error;
- goto undo_out;
- }
+ if ( gnttab_host_mapping_get_page_type(op, ld, rd) &&
+ !get_page_type(mfn_to_page(frame), PGT_writable_page) )
+ goto could_not_pin;
nr_gets++;
if ( op->flags & GNTMAP_host_map )
}
}
}
+ else
+ {
+ could_not_pin:
+ if ( !rd->is_dying )
+ gdprintk(XENLOG_WARNING, "Could not pin grant frame %lx\n",
+ frame);
+ if ( owner != NULL )
+ put_page(mfn_to_page(frame));
+ rc = GNTST_general_error;
+ goto undo_out;
+ }
if ( need_iommu(ld) &&
!(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
/* Okay, add the page to 'e'. */
if ( unlikely(e->tot_pages++ == 0) )
get_knownalive_domain(e);
- list_add_tail(&page->list, &e->page_list);
+ page_list_add_tail(page, &e->page_list);
page_set_owner(page, e);
spin_unlock(&e->page_alloc_lock);
if ( !act->pin )
{
act->domid = scombo.shorts.domid;
+ act->gfn = sha->frame;
act->frame = gmfn_to_mfn(rd, sha->frame);
}
}
#include <xen/version.h>
#include <public/version.h>
#include <xen/sched.h>
+#include <xen/guest_access.h>
#include <asm/hvm/support.h>
return sz;
}
+/* Extract a single instance of a save record, by marshalling all
+ * records of that type and copying out the one we need. */
+int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance,
+ XEN_GUEST_HANDLE_64(uint8) handle)
+{
+ int rv = 0;
+ size_t sz = 0;
+ struct vcpu *v;
+ hvm_domain_context_t ctxt = { 0, };
+
+ if ( d->is_dying
+ || typecode > HVM_SAVE_CODE_MAX
+ || hvm_sr_handlers[typecode].size < sizeof(struct hvm_save_descriptor)
+ || hvm_sr_handlers[typecode].save == NULL )
+ return -EINVAL;
+
+ if ( hvm_sr_handlers[typecode].kind == HVMSR_PER_VCPU )
+ for_each_vcpu(d, v)
+ sz += hvm_sr_handlers[typecode].size;
+ else
+ sz = hvm_sr_handlers[typecode].size;
+
+ if ( (instance + 1) * hvm_sr_handlers[typecode].size > sz )
+ return -EINVAL;
+
+ ctxt.size = sz;
+ ctxt.data = xmalloc_bytes(sz);
+ if ( !ctxt.data )
+ return -ENOMEM;
+
+ if ( hvm_sr_handlers[typecode].save(d, &ctxt) != 0 )
+ {
+ gdprintk(XENLOG_ERR,
+ "HVM save: failed to save type %"PRIu16"\n", typecode);
+ rv = -EFAULT;
+ }
+ else if ( copy_to_guest(handle,
+ ctxt.data
+ + (instance * hvm_sr_handlers[typecode].size)
+ + sizeof (struct hvm_save_descriptor),
+ hvm_sr_handlers[typecode].size
+ - sizeof (struct hvm_save_descriptor)) )
+ rv = -EFAULT;
+
+ xfree(ctxt.data);
+ return rv;
+}
int hvm_save(struct domain *d, hvm_domain_context_t *h)
{
--- /dev/null
+#define DEBG(x)
+#define DEBG1(x)
+/* inflate.c -- Not copyrighted 1992 by Mark Adler
+ version c10p1, 10 January 1993 */
+
+/*
+ * Adapted for booting Linux by Hannu Savolainen 1993
+ * based on gzip-1.0.3
+ *
+ * Nicolas Pitre <nico@cam.org>, 1999/04/14 :
+ * Little mods for all variable to reside either into rodata or bss segments
+ * by marking constant variables with 'const' and initializing all the others
+ * at run-time only. This allows for the kernel uncompressor to run
+ * directly from Flash or ROM memory on embedded systems.
+ */
+
+/*
+ Inflate deflated (PKZIP's method 8 compressed) data. The compression
+ method searches for as much of the current string of bytes (up to a
+ length of 258) in the previous 32 K bytes. If it doesn't find any
+ matches (of at least length 3), it codes the next byte. Otherwise, it
+ codes the length of the matched string and its distance backwards from
+ the current position. There is a single Huffman code that codes both
+ single bytes (called "literals") and match lengths. A second Huffman
+ code codes the distance information, which follows a length code. Each
+ length or distance code actually represents a base value and a number
+ of "extra" (sometimes zero) bits to get to add to the base value. At
+ the end of each deflated block is a special end-of-block (EOB) literal/
+ length code. The decoding process is basically: get a literal/length
+ code; if EOB then done; if a literal, emit the decoded byte; if a
+ length then get the distance and emit the referred-to bytes from the
+ sliding window of previously emitted data.
+
+ There are (currently) three kinds of inflate blocks: stored, fixed, and
+ dynamic. The compressor deals with some chunk of data at a time, and
+ decides which method to use on a chunk-by-chunk basis. A chunk might
+ typically be 32 K or 64 K. If the chunk is incompressible, then the
+ "stored" method is used. In this case, the bytes are simply stored as
+ is, eight bits per byte, with none of the above coding. The bytes are
+ preceded by a count, since there is no longer an EOB code.
+
+ If the data is compressible, then either the fixed or dynamic methods
+ are used. In the dynamic method, the compressed data is preceded by
+ an encoding of the literal/length and distance Huffman codes that are
+ to be used to decode this block. The representation is itself Huffman
+ coded, and so is preceded by a description of that code. These code
+ descriptions take up a little space, and so for small blocks, there is
+ a predefined set of codes, called the fixed codes. The fixed method is
+ used if the block codes up smaller that way (usually for quite small
+ chunks), otherwise the dynamic method is used. In the latter case, the
+ codes are customized to the probabilities in the current block, and so
+ can code it much better than the pre-determined fixed codes.
+
+ The Huffman codes themselves are decoded using a multi-level table
+ lookup, in order to maximize the speed of decoding plus the speed of
+ building the decoding tables. See the comments below that precede the
+ lbits and dbits tuning parameters.
+ */
+
+
+/*
+ Notes beyond the 1.93a appnote.txt:
+
+ 1. Distance pointers never point before the beginning of the output
+ stream.
+ 2. Distance pointers can point back across blocks, up to 32k away.
+ 3. There is an implied maximum of 7 bits for the bit length table and
+ 15 bits for the actual data.
+ 4. If only one code exists, then it is encoded using one bit. (Zero
+ would be more efficient, but perhaps a little confusing.) If two
+ codes exist, they are coded using one bit each (0 and 1).
+ 5. There is no way of sending zero distance codes--a dummy must be
+ sent if there are none. (History: a pre 2.0 version of PKZIP would
+ store blocks with no distance codes, but this was discovered to be
+ too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
+ zero distance codes, which is sent as one code of zero bits in
+ length.
+ 6. There are up to 286 literal/length codes. Code 256 represents the
+ end-of-block. Note however that the static length tree defines
+ 288 codes just to fill out the Huffman codes. Codes 286 and 287
+ cannot be used though, since there is no length base or extra bits
+ defined for them. Similarly, there are up to 30 distance codes.
+ However, static trees define 32 codes (all 5 bits) to fill out the
+ Huffman codes, but the last two had better not show up in the data.
+ 7. Unzip can check dynamic Huffman blocks for complete code sets.
+ The exception is that a single code would not be complete (see #4).
+ 8. The five bits following the block type is really the number of
+ literal codes sent minus 257.
+ 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
+ (1+6+6). Therefore, to output three times the length, you output
+ three codes (1+1+1), whereas to output four times the same length,
+ you only need two codes (1+3). Hmm.
+ 10. In the tree reconstruction algorithm, Code = Code + Increment
+ only if BitLength(i) is not zero. (Pretty obvious.)
+ 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
+ 12. Note: length code 284 can represent 227-258, but length code 285
+ really is 258. The last length deserves its own, short code
+ since it gets used a lot in very redundant files. The length
+ 258 is special since 258 - 3 (the min match length) is 255.
+ 13. The literal/length and distance code bit lengths are read as a
+ single stream of lengths. It is possible (and advantageous) for
+ a repeat code (16, 17, or 18) to go across the boundary between
+ the two sets of lengths.
+ */
+
+#ifdef RCSID
+static char rcsid[] = "#Id: inflate.c,v 0.14 1993/06/10 13:27:04 jloup Exp #";
+#endif
+
+#ifndef STATIC
+
+#if defined(STDC_HEADERS) || defined(HAVE_STDLIB_H)
+# include <sys/types.h>
+# include <stdlib.h>
+#endif
+
+#include "gzip.h"
+#define STATIC
+#endif /* !STATIC */
+
+#ifndef INIT
+#define INIT
+#endif
+
+#define slide window
+
+/* Huffman code lookup table entry--this entry is four bytes for machines
+ that have 16-bit pointers (e.g. PC's in the small or medium model).
+ Valid extra bits are 0..13. e == 15 is EOB (end of block), e == 16
+ means that v is a literal, 16 < e < 32 means that v is a pointer to
+ the next table, which codes e - 16 bits, and lastly e == 99 indicates
+ an unused code. If a code with e == 99 is looked up, this implies an
+ error in the data. */
+struct huft {
+ uch e; /* number of extra bits or operation */
+ uch b; /* number of bits in this code or subcode */
+ union {
+ ush n; /* literal, length base, or distance base */
+ struct huft *t; /* pointer to next level of table */
+ } v;
+};
+
+
+/* Function prototypes */
+STATIC int INIT huft_build OF((unsigned *, unsigned, unsigned,
+ const ush *, const ush *, struct huft **, int *));
+STATIC int INIT huft_free OF((struct huft *));
+STATIC int INIT inflate_codes OF((struct huft *, struct huft *, int, int));
+STATIC int INIT inflate_stored OF((void));
+STATIC int INIT inflate_fixed OF((void));
+STATIC int INIT inflate_dynamic OF((void));
+STATIC int INIT inflate_block OF((int *));
+STATIC int INIT inflate OF((void));
+
+
+/* The inflate algorithm uses a sliding 32 K byte window on the uncompressed
+ stream to find repeated byte strings. This is implemented here as a
+ circular buffer. The index is updated simply by incrementing and then
+ ANDing with 0x7fff (32K-1). */
+/* It is left to other modules to supply the 32 K area. It is assumed
+ to be usable as if it were declared "uch slide[32768];" or as just
+ "uch *slide;" and then malloc'ed in the latter case. The definition
+ must be in unzip.h, included above. */
+/* unsigned wp; current position in slide */
+#define wp outcnt
+#define flush_output(w) (wp=(w),flush_window())
+
+/* Tables for deflate from PKZIP's appnote.txt. */
+static const unsigned border[] = { /* Order of the bit length code lengths */
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+static const ush cplens[] = { /* Copy lengths for literal codes 257..285 */
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+/* note: see note #13 above about the 258 in this list. */
+static const ush cplext[] = { /* Extra bits for literal codes 257..285 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+ 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 99, 99}; /* 99==invalid */
+static const ush cpdist[] = { /* Copy offsets for distance codes 0..29 */
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+ 8193, 12289, 16385, 24577};
+static const ush cpdext[] = { /* Extra bits for distance codes */
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+ 12, 12, 13, 13};
+
+
+
+/* Macros for inflate() bit peeking and grabbing.
+ The usage is:
+
+ NEEDBITS(j)
+ x = b & mask_bits[j];
+ DUMPBITS(j)
+
+ where NEEDBITS makes sure that b has at least j bits in it, and
+ DUMPBITS removes the bits from b. The macros use the variable k
+ for the number of bits in b. Normally, b and k are register
+ variables for speed, and are initialized at the beginning of a
+ routine that uses these macros from a global bit buffer and count.
+
+ If we assume that EOB will be the longest code, then we will never
+ ask for bits with NEEDBITS that are beyond the end of the stream.
+ So, NEEDBITS should not read any more bytes than are needed to
+ meet the request. Then no bytes need to be "returned" to the buffer
+ at the end of the last block.
+
+ However, this assumption is not true for fixed blocks--the EOB code
+ is 7 bits, but the other literal/length codes can be 8 or 9 bits.
+ (The EOB code is shorter than other codes because fixed blocks are
+ generally short. So, while a block always has an EOB, many other
+ literal/length codes have a significantly lower probability of
+ showing up at all.) However, by making the first table have a
+ lookup of seven bits, the EOB code will be found in that first
+ lookup, and so will not require that too many bits be pulled from
+ the stream.
+ */
+
+STATIC ulg bb; /* bit buffer */
+STATIC unsigned bk; /* bits in bit buffer */
+
+STATIC const ush mask_bits[] = {
+ 0x0000,
+ 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
+ 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
+};
+
+#define NEXTBYTE() ({ int v = get_byte(); if (v < 0) goto underrun; (uch)v; })
+#define NEEDBITS(n) {while(k<(n)){b|=((ulg)NEXTBYTE())<<k;k+=8;}}
+#define DUMPBITS(n) {b>>=(n);k-=(n);}
+
+#ifndef NO_INFLATE_MALLOC
+/* A trivial malloc implementation, adapted from
+ * malloc by Hannu Savolainen 1993 and Matthias Urlichs 1994
+ */
+
+static unsigned long malloc_ptr;
+static int malloc_count;
+
+static void *malloc(int size)
+{
+ void *p;
+
+ if (size < 0)
+ error("Malloc error");
+ if (!malloc_ptr)
+ malloc_ptr = free_mem_ptr;
+
+ malloc_ptr = (malloc_ptr + 3) & ~3; /* Align */
+
+ p = (void *)malloc_ptr;
+ malloc_ptr += size;
+
+ if (free_mem_end_ptr && malloc_ptr >= free_mem_end_ptr)
+ error("Out of memory");
+
+ malloc_count++;
+ return p;
+}
+
+static void free(void *where)
+{
+ malloc_count--;
+ if (!malloc_count)
+ malloc_ptr = free_mem_ptr;
+}
+#else
+#define malloc(a) kmalloc(a, GFP_KERNEL)
+#define free(a) kfree(a)
+#endif
+
+/*
+ Huffman code decoding is performed using a multi-level table lookup.
+ The fastest way to decode is to simply build a lookup table whose
+ size is determined by the longest code. However, the time it takes
+ to build this table can also be a factor if the data being decoded
+ is not very long. The most common codes are necessarily the
+ shortest codes, so those codes dominate the decoding time, and hence
+ the speed. The idea is you can have a shorter table that decodes the
+ shorter, more probable codes, and then point to subsidiary tables for
+ the longer codes. The time it costs to decode the longer codes is
+ then traded against the time it takes to make longer tables.
+
+ This results of this trade are in the variables lbits and dbits
+ below. lbits is the number of bits the first level table for literal/
+ length codes can decode in one step, and dbits is the same thing for
+ the distance codes. Subsequent tables are also less than or equal to
+ those sizes. These values may be adjusted either when all of the
+ codes are shorter than that, in which case the longest code length in
+ bits is used, or when the shortest code is *longer* than the requested
+ table size, in which case the length of the shortest code in bits is
+ used.
+
+ There are two different values for the two tables, since they code a
+ different number of possibilities each. The literal/length table
+ codes 286 possible values, or in a flat code, a little over eight
+ bits. The distance table codes 30 possible values, or a little less
+ than five bits, flat. The optimum values for speed end up being
+ about one bit more than those, so lbits is 8+1 and dbits is 5+1.
+ The optimum values may differ though from machine to machine, and
+ possibly even between compilers. Your mileage may vary.
+ */
+
+
+STATIC const int lbits = 9; /* bits in base literal/length lookup table */
+STATIC const int dbits = 6; /* bits in base distance lookup table */
+
+
+/* If BMAX needs to be larger than 16, then h and x[] should be ulg. */
+#define BMAX 16 /* maximum bit length of any code (16 for explode) */
+#define N_MAX 288 /* maximum number of codes in any set */
+
+
+STATIC unsigned hufts; /* track memory usage */
+
+
+STATIC int INIT huft_build(
+ unsigned *b, /* code lengths in bits (all assumed <= BMAX) */
+ unsigned n, /* number of codes (assumed <= N_MAX) */
+ unsigned s, /* number of simple-valued codes (0..s-1) */
+ const ush *d, /* list of base values for non-simple codes */
+ const ush *e, /* list of extra bits for non-simple codes */
+ struct huft **t, /* result: starting table */
+ int *m /* maximum lookup bits, returns actual */
+ )
+/* Given a list of code lengths and a maximum table size, make a set of
+ tables to decode that set of codes. Return zero on success, one if
+ the given code set is incomplete (the tables are still built in this
+ case), two if the input is invalid (all zero length codes or an
+ oversubscribed set of lengths), and three if not enough memory. */
+{
+ unsigned a; /* counter for codes of length k */
+ unsigned f; /* i repeats in table every f entries */
+ int g; /* maximum code length */
+ int h; /* table level */
+ register unsigned i; /* counter, current code */
+ register unsigned j; /* counter */
+ register int k; /* number of bits in current code */
+ int l; /* bits per table (returned in m) */
+ register unsigned *p; /* pointer into c[], b[], or v[] */
+ register struct huft *q; /* points to current table */
+ struct huft r; /* table entry for structure assignment */
+ register int w; /* bits before this table == (l * h) */
+ unsigned *xp; /* pointer into x */
+ int y; /* number of dummy codes added */
+ unsigned z; /* number of entries in current table */
+ struct {
+ unsigned c[BMAX+1]; /* bit length count table */
+ struct huft *u[BMAX]; /* table stack */
+ unsigned v[N_MAX]; /* values in order of bit length */
+ unsigned x[BMAX+1]; /* bit offsets, then code stack */
+ } *stk;
+ unsigned *c, *v, *x;
+ struct huft **u;
+ int ret;
+
+ DEBG("huft1 ");
+
+ stk = malloc(sizeof(*stk));
+ if (stk == NULL)
+ return 3; /* out of memory */
+
+ c = stk->c;
+ v = stk->v;
+ x = stk->x;
+ u = stk->u;
+
+ /* Generate counts for each bit length */
+ memzero(stk->c, sizeof(stk->c));
+ p = b; i = n;
+ do {
+ Tracecv(*p, (stderr, (n-i >= ' ' && n-i <= '~' ? "%c %d\n" : "0x%x %d\n"),
+ n-i, *p));
+ c[*p]++; /* assume all entries <= BMAX */
+ p++; /* Can't combine with above line (Solaris bug) */
+ } while (--i);
+ if (c[0] == n) /* null input--all zero length codes */
+ {
+ *t = (struct huft *)NULL;
+ *m = 0;
+ ret = 2;
+ goto out;
+ }
+
+ DEBG("huft2 ");
+
+ /* Find minimum and maximum length, bound *m by those */
+ l = *m;
+ for (j = 1; j <= BMAX; j++)
+ if (c[j])
+ break;
+ k = j; /* minimum code length */
+ if ((unsigned)l < j)
+ l = j;
+ for (i = BMAX; i; i--)
+ if (c[i])
+ break;
+ g = i; /* maximum code length */
+ if ((unsigned)l > i)
+ l = i;
+ *m = l;
+
+ DEBG("huft3 ");
+
+ /* Adjust last length count to fill out codes, if needed */
+ for (y = 1 << j; j < i; j++, y <<= 1)
+ if ((y -= c[j]) < 0) {
+ ret = 2; /* bad input: more codes than bits */
+ goto out;
+ }
+ if ((y -= c[i]) < 0) {
+ ret = 2;
+ goto out;
+ }
+ c[i] += y;
+
+ DEBG("huft4 ");
+
+ /* Generate starting offsets into the value table for each length */
+ x[1] = j = 0;
+ p = c + 1; xp = x + 2;
+ while (--i) { /* note that i == g from above */
+ *xp++ = (j += *p++);
+ }
+
+ DEBG("huft5 ");
+
+ /* Make a table of values in order of bit lengths */
+ p = b; i = 0;
+ do {
+ if ((j = *p++) != 0)
+ v[x[j]++] = i;
+ } while (++i < n);
+ n = x[g]; /* set n to length of v */
+
+ DEBG("h6 ");
+
+ /* Generate the Huffman codes and for each, make the table entries */
+ x[0] = i = 0; /* first Huffman code is zero */
+ p = v; /* grab values in bit order */
+ h = -1; /* no tables yet--level -1 */
+ w = -l; /* bits decoded == (l * h) */
+ u[0] = (struct huft *)NULL; /* just to keep compilers happy */
+ q = (struct huft *)NULL; /* ditto */
+ z = 0; /* ditto */
+ DEBG("h6a ");
+
+ /* go through the bit lengths (k already is bits in shortest code) */
+ for (; k <= g; k++)
+ {
+ DEBG("h6b ");
+ a = c[k];
+ while (a--)
+ {
+ DEBG("h6b1 ");
+ /* here i is the Huffman code of length k bits for value *p */
+ /* make tables up to required level */
+ while (k > w + l)
+ {
+ DEBG1("1 ");
+ h++;
+ w += l; /* previous table always l bits */
+
+ /* compute minimum size table less than or equal to l bits */
+ z = (z = g - w) > (unsigned)l ? l : z; /* upper limit on table size */
+ if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */
+ { /* too few codes for k-w bit table */
+ DEBG1("2 ");
+ f -= a + 1; /* deduct codes from patterns left */
+ xp = c + k;
+ if (j < z)
+ while (++j < z) /* try smaller tables up to z bits */
+ {
+ if ((f <<= 1) <= *++xp)
+ break; /* enough codes to use up j bits */
+ f -= *xp; /* else deduct codes from patterns */
+ }
+ }
+ DEBG1("3 ");
+ z = 1 << j; /* table entries for j-bit table */
+
+ /* allocate and link in new table */
+ if ((q = (struct huft *)malloc((z + 1)*sizeof(struct huft))) ==
+ (struct huft *)NULL)
+ {
+ if (h)
+ huft_free(u[0]);
+ ret = 3; /* not enough memory */
+ goto out;
+ }
+ DEBG1("4 ");
+ hufts += z + 1; /* track memory usage */
+ *t = q + 1; /* link to list for huft_free() */
+ *(t = &(q->v.t)) = (struct huft *)NULL;
+ u[h] = ++q; /* table starts after link */
+
+ DEBG1("5 ");
+ /* connect to last table, if there is one */
+ if (h)
+ {
+ x[h] = i; /* save pattern for backing up */
+ r.b = (uch)l; /* bits to dump before this table */
+ r.e = (uch)(16 + j); /* bits in this table */
+ r.v.t = q; /* pointer to this table */
+ j = i >> (w - l); /* (get around Turbo C bug) */
+ u[h-1][j] = r; /* connect to last table */
+ }
+ DEBG1("6 ");
+ }
+ DEBG("h6c ");
+
+ /* set up table entry in r */
+ r.b = (uch)(k - w);
+ if (p >= v + n)
+ r.e = 99; /* out of values--invalid code */
+ else if (*p < s)
+ {
+ r.e = (uch)(*p < 256 ? 16 : 15); /* 256 is end-of-block code */
+ r.v.n = (ush)(*p); /* simple code is just the value */
+ p++; /* one compiler does not like *p++ */
+ }
+ else
+ {
+ r.e = (uch)e[*p - s]; /* non-simple--look up in lists */
+ r.v.n = d[*p++ - s];
+ }
+ DEBG("h6d ");
+
+ /* fill code-like entries with r */
+ f = 1 << (k - w);
+ for (j = i >> w; j < z; j += f)
+ q[j] = r;
+
+ /* backwards increment the k-bit code i */
+ for (j = 1 << (k - 1); i & j; j >>= 1)
+ i ^= j;
+ i ^= j;
+
+ /* backup over finished tables */
+ while ((i & ((1 << w) - 1)) != x[h])
+ {
+ h--; /* don't need to update q */
+ w -= l;
+ }
+ DEBG("h6e ");
+ }
+ DEBG("h6f ");
+ }
+
+ DEBG("huft7 ");
+
+ /* Return true (1) if we were given an incomplete table */
+ ret = y != 0 && g != 1;
+
+ out:
+ free(stk);
+ return ret;
+}
+
+
+
+STATIC int INIT huft_free(
+ struct huft *t /* table to free */
+ )
+/* Free the malloc'ed tables built by huft_build(), which makes a linked
+ list of the tables it made, with the links in a dummy first entry of
+ each table. */
+{
+ register struct huft *p, *q;
+
+
+ /* Go through linked list, freeing from the malloced (t[-1]) address. */
+ p = t;
+ while (p != (struct huft *)NULL)
+ {
+ q = (--p)->v.t;
+ free((char*)p);
+ p = q;
+ }
+ return 0;
+}
+
+
+STATIC int INIT inflate_codes(
+ struct huft *tl, /* literal/length decoder tables */
+ struct huft *td, /* distance decoder tables */
+ int bl, /* number of bits decoded by tl[] */
+ int bd /* number of bits decoded by td[] */
+ )
+/* inflate (decompress) the codes in a deflated (compressed) block.
+ Return an error code or zero if it all goes ok. */
+{
+ register unsigned e; /* table entry flag/number of extra bits */
+ unsigned n, d; /* length and index for copy */
+ unsigned w; /* current window position */
+ struct huft *t; /* pointer to table entry */
+ unsigned ml, md; /* masks for bl and bd bits */
+ register ulg b; /* bit buffer */
+ register unsigned k; /* number of bits in bit buffer */
+
+
+ /* make local copies of globals */
+ b = bb; /* initialize bit buffer */
+ k = bk;
+ w = wp; /* initialize window position */
+
+ /* inflate the coded data */
+ ml = mask_bits[bl]; /* precompute masks for speed */
+ md = mask_bits[bd];
+ for (;;) /* do until end of block */
+ {
+ NEEDBITS((unsigned)bl)
+ if ((e = (t = tl + ((unsigned)b & ml))->e) > 16)
+ do {
+ if (e == 99)
+ return 1;
+ DUMPBITS(t->b)
+ e -= 16;
+ NEEDBITS(e)
+ } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16);
+ DUMPBITS(t->b)
+ if (e == 16) /* then it's a literal */
+ {
+ slide[w++] = (uch)t->v.n;
+ Tracevv((stderr, "%c", slide[w-1]));
+ if (w == WSIZE)
+ {
+ flush_output(w);
+ w = 0;
+ }
+ }
+ else /* it's an EOB or a length */
+ {
+ /* exit if end of block */
+ if (e == 15)
+ break;
+
+ /* get length of block to copy */
+ NEEDBITS(e)
+ n = t->v.n + ((unsigned)b & mask_bits[e]);
+ DUMPBITS(e);
+
+ /* decode distance of block to copy */
+ NEEDBITS((unsigned)bd)
+ if ((e = (t = td + ((unsigned)b & md))->e) > 16)
+ do {
+ if (e == 99)
+ return 1;
+ DUMPBITS(t->b)
+ e -= 16;
+ NEEDBITS(e)
+ } while ((e = (t = t->v.t + ((unsigned)b & mask_bits[e]))->e) > 16);
+ DUMPBITS(t->b)
+ NEEDBITS(e)
+ d = w - t->v.n - ((unsigned)b & mask_bits[e]);
+ DUMPBITS(e)
+ Tracevv((stderr,"\\[%d,%d]", w-d, n));
+
+ /* do the copy */
+ do {
+ n -= (e = (e = WSIZE - ((d &= WSIZE-1) > w ? d : w)) > n ? n : e);
+#if !defined(NOMEMCPY) && !defined(DEBUG)
+ if (w - d >= e) /* (this test assumes unsigned comparison) */
+ {
+ memcpy(slide + w, slide + d, e);
+ w += e;
+ d += e;
+ }
+ else /* do it slow to avoid memcpy() overlap */
+#endif /* !NOMEMCPY */
+ do {
+ slide[w++] = slide[d++];
+ Tracevv((stderr, "%c", slide[w-1]));
+ } while (--e);
+ if (w == WSIZE)
+ {
+ flush_output(w);
+ w = 0;
+ }
+ } while (n);
+ }
+ }
+
+
+ /* restore the globals from the locals */
+ wp = w; /* restore global window pointer */
+ bb = b; /* restore global bit buffer */
+ bk = k;
+
+ /* done */
+ return 0;
+
+ underrun:
+ return 4; /* Input underrun */
+}
+
+
+
+STATIC int INIT inflate_stored(void)
+/* "decompress" an inflated type 0 (stored) block. */
+{
+ unsigned n; /* number of bytes in block */
+ unsigned w; /* current window position */
+ register ulg b; /* bit buffer */
+ register unsigned k; /* number of bits in bit buffer */
+
+ DEBG("<stor");
+
+ /* make local copies of globals */
+ b = bb; /* initialize bit buffer */
+ k = bk;
+ w = wp; /* initialize window position */
+
+
+ /* go to byte boundary */
+ n = k & 7;
+ DUMPBITS(n);
+
+
+ /* get the length and its complement */
+ NEEDBITS(16)
+ n = ((unsigned)b & 0xffff);
+ DUMPBITS(16)
+ NEEDBITS(16)
+ if (n != (unsigned)((~b) & 0xffff))
+ return 1; /* error in compressed data */
+ DUMPBITS(16)
+
+
+ /* read and output the compressed data */
+ while (n--)
+ {
+ NEEDBITS(8)
+ slide[w++] = (uch)b;
+ if (w == WSIZE)
+ {
+ flush_output(w);
+ w = 0;
+ }
+ DUMPBITS(8)
+ }
+
+
+ /* restore the globals from the locals */
+ wp = w; /* restore global window pointer */
+ bb = b; /* restore global bit buffer */
+ bk = k;
+
+ DEBG(">");
+ return 0;
+
+ underrun:
+ return 4; /* Input underrun */
+}
+
+
+/*
+ * We use `noinline' here to prevent gcc-3.5 from using too much stack space
+ */
+STATIC int noinline INIT inflate_fixed(void)
+/* decompress an inflated type 1 (fixed Huffman codes) block. We should
+ either replace this with a custom decoder, or at least precompute the
+ Huffman tables. */
+{
+ int i; /* temporary variable */
+ struct huft *tl; /* literal/length code table */
+ struct huft *td; /* distance code table */
+ int bl; /* lookup bits for tl */
+ int bd; /* lookup bits for td */
+ unsigned *l; /* length list for huft_build */
+
+ DEBG("<fix");
+
+ l = malloc(sizeof(*l) * 288);
+ if (l == NULL)
+ return 3; /* out of memory */
+
+ /* set up literal table */
+ for (i = 0; i < 144; i++)
+ l[i] = 8;
+ for (; i < 256; i++)
+ l[i] = 9;
+ for (; i < 280; i++)
+ l[i] = 7;
+ for (; i < 288; i++) /* make a complete, but wrong code set */
+ l[i] = 8;
+ bl = 7;
+ if ((i = huft_build(l, 288, 257, cplens, cplext, &tl, &bl)) != 0) {
+ free(l);
+ return i;
+ }
+
+ /* set up distance table */
+ for (i = 0; i < 30; i++) /* make an incomplete code set */
+ l[i] = 5;
+ bd = 5;
+ if ((i = huft_build(l, 30, 0, cpdist, cpdext, &td, &bd)) > 1)
+ {
+ huft_free(tl);
+ free(l);
+
+ DEBG(">");
+ return i;
+ }
+
+
+ /* decompress until an end-of-block code */
+ if (inflate_codes(tl, td, bl, bd)) {
+ free(l);
+ return 1;
+ }
+
+ /* free the decoding tables, return */
+ free(l);
+ huft_free(tl);
+ huft_free(td);
+ return 0;
+}
+
+
+/*
+ * We use `noinline' here to prevent gcc-3.5 from using too much stack space
+ */
+STATIC int noinline INIT inflate_dynamic(void)
+/* decompress an inflated type 2 (dynamic Huffman codes) block. */
+{
+ int i; /* temporary variables */
+ unsigned j;
+ unsigned l; /* last length */
+ unsigned m; /* mask for bit lengths table */
+ unsigned n; /* number of lengths to get */
+ struct huft *tl; /* literal/length code table */
+ struct huft *td; /* distance code table */
+ int bl; /* lookup bits for tl */
+ int bd; /* lookup bits for td */
+ unsigned nb; /* number of bit length codes */
+ unsigned nl; /* number of literal/length codes */
+ unsigned nd; /* number of distance codes */
+ unsigned *ll; /* literal/length and distance code lengths */
+ register ulg b; /* bit buffer */
+ register unsigned k; /* number of bits in bit buffer */
+ int ret;
+
+ DEBG("<dyn");
+
+#ifdef PKZIP_BUG_WORKAROUND
+ ll = malloc(sizeof(*ll) * (288+32)); /* literal/length and distance code lengths */
+#else
+ ll = malloc(sizeof(*ll) * (286+30)); /* literal/length and distance code lengths */
+#endif
+
+ if (ll == NULL)
+ return 1;
+
+ /* make local bit buffer */
+ b = bb;
+ k = bk;
+
+
+ /* read in table lengths */
+ NEEDBITS(5)
+ nl = 257 + ((unsigned)b & 0x1f); /* number of literal/length codes */
+ DUMPBITS(5)
+ NEEDBITS(5)
+ nd = 1 + ((unsigned)b & 0x1f); /* number of distance codes */
+ DUMPBITS(5)
+ NEEDBITS(4)
+ nb = 4 + ((unsigned)b & 0xf); /* number of bit length codes */
+ DUMPBITS(4)
+#ifdef PKZIP_BUG_WORKAROUND
+ if (nl > 288 || nd > 32)
+#else
+ if (nl > 286 || nd > 30)
+#endif
+ {
+ ret = 1; /* bad lengths */
+ goto out;
+ }
+
+ DEBG("dyn1 ");
+
+ /* read in bit-length-code lengths */
+ for (j = 0; j < nb; j++)
+ {
+ NEEDBITS(3)
+ ll[border[j]] = (unsigned)b & 7;
+ DUMPBITS(3)
+ }
+ for (; j < 19; j++)
+ ll[border[j]] = 0;
+
+ DEBG("dyn2 ");
+
+ /* build decoding table for trees--single level, 7 bit lookup */
+ bl = 7;
+ if ((i = huft_build(ll, 19, 19, NULL, NULL, &tl, &bl)) != 0)
+ {
+ if (i == 1)
+ huft_free(tl);
+ ret = i; /* incomplete code set */
+ goto out;
+ }
+
+ DEBG("dyn3 ");
+
+ /* read in literal and distance code lengths */
+ n = nl + nd;
+ m = mask_bits[bl];
+ i = l = 0;
+ while ((unsigned)i < n)
+ {
+ NEEDBITS((unsigned)bl)
+ j = (td = tl + ((unsigned)b & m))->b;
+ DUMPBITS(j)
+ j = td->v.n;
+ if (j < 16) /* length of code in bits (0..15) */
+ ll[i++] = l = j; /* save last length in l */
+ else if (j == 16) /* repeat last length 3 to 6 times */
+ {
+ NEEDBITS(2)
+ j = 3 + ((unsigned)b & 3);
+ DUMPBITS(2)
+ if ((unsigned)i + j > n) {
+ ret = 1;
+ goto out;
+ }
+ while (j--)
+ ll[i++] = l;
+ }
+ else if (j == 17) /* 3 to 10 zero length codes */
+ {
+ NEEDBITS(3)
+ j = 3 + ((unsigned)b & 7);
+ DUMPBITS(3)
+ if ((unsigned)i + j > n) {
+ ret = 1;
+ goto out;
+ }
+ while (j--)
+ ll[i++] = 0;
+ l = 0;
+ }
+ else /* j == 18: 11 to 138 zero length codes */
+ {
+ NEEDBITS(7)
+ j = 11 + ((unsigned)b & 0x7f);
+ DUMPBITS(7)
+ if ((unsigned)i + j > n) {
+ ret = 1;
+ goto out;
+ }
+ while (j--)
+ ll[i++] = 0;
+ l = 0;
+ }
+ }
+
+ DEBG("dyn4 ");
+
+ /* free decoding table for trees */
+ huft_free(tl);
+
+ DEBG("dyn5 ");
+
+ /* restore the global bit buffer */
+ bb = b;
+ bk = k;
+
+ DEBG("dyn5a ");
+
+ /* build the decoding tables for literal/length and distance codes */
+ bl = lbits;
+ if ((i = huft_build(ll, nl, 257, cplens, cplext, &tl, &bl)) != 0)
+ {
+ DEBG("dyn5b ");
+ if (i == 1) {
+ error("incomplete literal tree");
+ huft_free(tl);
+ }
+ ret = i; /* incomplete code set */
+ goto out;
+ }
+ DEBG("dyn5c ");
+ bd = dbits;
+ if ((i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &td, &bd)) != 0)
+ {
+ DEBG("dyn5d ");
+ if (i == 1) {
+ error("incomplete distance tree");
+#ifdef PKZIP_BUG_WORKAROUND
+ i = 0;
+ }
+#else
+ huft_free(td);
+ }
+ huft_free(tl);
+ ret = i; /* incomplete code set */
+ goto out;
+#endif
+}
+
+DEBG("dyn6 ");
+
+ /* decompress until an end-of-block code */
+if (inflate_codes(tl, td, bl, bd)) {
+ ret = 1;
+ goto out;
+}
+
+DEBG("dyn7 ");
+
+ /* free the decoding tables, return */
+huft_free(tl);
+huft_free(td);
+
+DEBG(">");
+ret = 0;
+out:
+free(ll);
+return ret;
+
+underrun:
+ret = 4; /* Input underrun */
+goto out;
+}
+
+
+
+STATIC int INIT inflate_block(
+int *e /* last block flag */
+)
+/* decompress an inflated block */
+{
+unsigned t; /* block type */
+register ulg b; /* bit buffer */
+register unsigned k; /* number of bits in bit buffer */
+
+DEBG("<blk");
+
+/* make local bit buffer */
+b = bb;
+k = bk;
+
+
+/* read in last block bit */
+NEEDBITS(1)
+ *e = (int)b & 1;
+ DUMPBITS(1)
+
+
+ /* read in block type */
+ NEEDBITS(2)
+ t = (unsigned)b & 3;
+ DUMPBITS(2)
+
+
+ /* restore the global bit buffer */
+ bb = b;
+ bk = k;
+
+ /* inflate that block type */
+ if (t == 2)
+ return inflate_dynamic();
+ if (t == 0)
+ return inflate_stored();
+ if (t == 1)
+ return inflate_fixed();
+
+ DEBG(">");
+
+ /* bad block type */
+ return 2;
+
+ underrun:
+ return 4; /* Input underrun */
+}
+
+
+
+STATIC int INIT inflate(void)
+/* decompress an inflated entry */
+{
+ int e; /* last block flag */
+ int r; /* result code */
+ unsigned h; /* maximum struct huft's malloc'ed */
+
+ /* initialize window, bit buffer */
+ wp = 0;
+ bk = 0;
+ bb = 0;
+
+
+ /* decompress until the last block */
+ h = 0;
+ do {
+ hufts = 0;
+#ifdef ARCH_HAS_DECOMP_WDOG
+ arch_decomp_wdog();
+#endif
+ r = inflate_block(&e);
+ if (r)
+ return r;
+ if (hufts > h)
+ h = hufts;
+ } while (!e);
+
+ /* Undo too much lookahead. The next read will be byte aligned so we
+ * can discard unused bits in the last meaningful byte.
+ */
+ while (bk >= 8) {
+ bk -= 8;
+ inptr--;
+ }
+
+ /* flush out slide */
+ flush_output(wp);
+
+
+ /* return success */
+#ifdef DEBUG
+ fprintf(stderr, "<%u> ", h);
+#endif /* DEBUG */
+ return 0;
+}
+
+/**********************************************************************
+ *
+ * The following are support routines for inflate.c
+ *
+ **********************************************************************/
+
+static ulg crc_32_tab[256];
+static ulg crc; /* initialized in makecrc() so it'll reside in bss */
+#define CRC_VALUE (crc ^ 0xffffffffUL)
+
+/*
+ * Code to compute the CRC-32 table. Borrowed from
+ * gzip-1.0.3/makecrc.c.
+ */
+
+static void INIT
+makecrc(void)
+{
+/* Not copyrighted 1990 Mark Adler */
+
+ unsigned long c; /* crc shift register */
+ unsigned long e; /* polynomial exclusive-or pattern */
+ int i; /* counter for all possible eight bit values */
+ int k; /* byte being shifted into crc apparatus */
+
+ /* terms of polynomial defining this crc (except x^32): */
+ static const int p[] = {0,1,2,4,5,7,8,10,11,12,16,22,23,26};
+
+ /* Make exclusive-or pattern from polynomial */
+ e = 0;
+ for (i = 0; i < sizeof(p)/sizeof(int); i++)
+ e |= 1L << (31 - p[i]);
+
+ crc_32_tab[0] = 0;
+
+ for (i = 1; i < 256; i++)
+ {
+ c = 0;
+ for (k = i | 256; k != 1; k >>= 1)
+ {
+ c = c & 1 ? (c >> 1) ^ e : c >> 1;
+ if (k & 1)
+ c ^= e;
+ }
+ crc_32_tab[i] = c;
+ }
+
+ /* this is initialized here so this code could reside in ROM */
+ crc = (ulg)0xffffffffUL; /* shift register contents */
+}
+
+/* gzip flag byte */
+#define ASCII_FLAG 0x01 /* bit 0 set: file probably ASCII text */
+#define CONTINUATION 0x02 /* bit 1 set: continuation of multi-part gzip file */
+#define EXTRA_FIELD 0x04 /* bit 2 set: extra field present */
+#define ORIG_NAME 0x08 /* bit 3 set: original file name present */
+#define COMMENT 0x10 /* bit 4 set: file comment present */
+#define ENCRYPTED 0x20 /* bit 5 set: file is encrypted */
+#define RESERVED 0xC0 /* bit 6,7: reserved */
+
+/*
+ * Do the uncompression!
+ */
+static int INIT gunzip(void)
+{
+ uch flags;
+ unsigned char magic[2]; /* magic header */
+ char method;
+ ulg orig_crc = 0; /* original crc */
+ ulg orig_len = 0; /* original uncompressed length */
+ int res;
+
+ magic[0] = NEXTBYTE();
+ magic[1] = NEXTBYTE();
+ method = NEXTBYTE();
+
+ if (magic[0] != 037 ||
+ ((magic[1] != 0213) && (magic[1] != 0236))) {
+ error("bad gzip magic numbers");
+ return -1;
+ }
+
+ /* We only support method #8, DEFLATED */
+ if (method != 8) {
+ error("internal error, invalid method");
+ return -1;
+ }
+
+ flags = (uch)get_byte();
+ if ((flags & ENCRYPTED) != 0) {
+ error("Input is encrypted");
+ return -1;
+ }
+ if ((flags & CONTINUATION) != 0) {
+ error("Multi part input");
+ return -1;
+ }
+ if ((flags & RESERVED) != 0) {
+ error("Input has invalid flags");
+ return -1;
+ }
+ NEXTBYTE(); /* Get timestamp */
+ NEXTBYTE();
+ NEXTBYTE();
+ NEXTBYTE();
+
+ (void)NEXTBYTE(); /* Ignore extra flags for the moment */
+ (void)NEXTBYTE(); /* Ignore OS type for the moment */
+
+ if ((flags & EXTRA_FIELD) != 0) {
+ unsigned len = (unsigned)NEXTBYTE();
+ len |= ((unsigned)NEXTBYTE())<<8;
+ while (len--) (void)NEXTBYTE();
+ }
+
+ /* Get original file name if it was truncated */
+ if ((flags & ORIG_NAME) != 0) {
+ /* Discard the old name */
+ while (NEXTBYTE() != 0) /* null */ ;
+ }
+
+ /* Discard file comment if any */
+ if ((flags & COMMENT) != 0) {
+ while (NEXTBYTE() != 0) /* null */ ;
+ }
+
+ /* Decompress */
+ if ((res = inflate())) {
+ switch (res) {
+ case 0:
+ break;
+ case 1:
+ error("invalid compressed format (err=1)");
+ break;
+ case 2:
+ error("invalid compressed format (err=2)");
+ break;
+ case 3:
+ error("out of memory");
+ break;
+ case 4:
+ error("out of input data");
+ break;
+ default:
+ error("invalid compressed format (other)");
+ }
+ return -1;
+ }
+
+ /* Get the crc and original length */
+ /* crc32 (see algorithm.doc)
+ * uncompressed input size modulo 2^32
+ */
+ orig_crc = (ulg) NEXTBYTE();
+ orig_crc |= (ulg) NEXTBYTE() << 8;
+ orig_crc |= (ulg) NEXTBYTE() << 16;
+ orig_crc |= (ulg) NEXTBYTE() << 24;
+
+ orig_len = (ulg) NEXTBYTE();
+ orig_len |= (ulg) NEXTBYTE() << 8;
+ orig_len |= (ulg) NEXTBYTE() << 16;
+ orig_len |= (ulg) NEXTBYTE() << 24;
+
+ /* Validate decompression */
+ if (orig_crc != CRC_VALUE) {
+ error("crc error");
+ return -1;
+ }
+ if (orig_len != bytes_out) {
+ error("length error");
+ return -1;
+ }
+ return 0;
+
+ underrun: /* NEXTBYTE() goto's here if needed */
+ error("out of input data");
+ return -1;
+}
fi.submap |= 1U << XENFEAT_supervisor_mode_kernel;
#ifdef CONFIG_X86
if ( !is_hvm_vcpu(current) )
- fi.submap |= 1U << XENFEAT_mmu_pt_update_preserve_ad;
+ fi.submap |= (1U << XENFEAT_mmu_pt_update_preserve_ad) |
+ (1U << XENFEAT_highmem_assist) |
+ (1U << XENFEAT_gnttab_map_avail_bits);
#endif
break;
default:
VMCOREINFO_SYMBOL(frame_table);
VMCOREINFO_SYMBOL(alloc_bitmap);
VMCOREINFO_SYMBOL(max_page);
- VMCOREINFO_SYMBOL(xenheap_phys_end);
VMCOREINFO_STRUCT_SIZE(page_info);
VMCOREINFO_STRUCT_SIZE(domain);
{
printk("General information for domain %u:\n", d->domain_id);
cpuset_print(tmpstr, sizeof(tmpstr), d->domain_dirty_cpumask);
- printk(" refcnt=%d nr_pages=%d xenheap_pages=%d "
- "dirty_cpus=%s\n",
- atomic_read(&d->refcnt),
- d->tot_pages, d->xenheap_pages, tmpstr);
+ printk(" refcnt=%d dying=%d nr_pages=%d xenheap_pages=%d "
+ "dirty_cpus=%s max_pages=%u\n",
+ atomic_read(&d->refcnt), d->is_dying,
+ d->tot_pages, d->xenheap_pages, tmpstr, d->max_pages);
printk(" handle=%02x%02x%02x%02x-%02x%02x-%02x%02x-"
"%02x%02x-%02x%02x%02x%02x%02x%02x vm_assist=%08lx\n",
d->handle[ 0], d->handle[ 1], d->handle[ 2], d->handle[ 3],
test_bit(v->virq_to_evtchn[VIRQ_DEBUG],
&shared_info(d, evtchn_mask)),
test_bit(v->virq_to_evtchn[VIRQ_DEBUG] /
- BITS_PER_GUEST_LONG(d),
+ BITS_PER_EVTCHN_WORD(d),
&vcpu_info(v, evtchn_pending_sel)));
send_guest_vcpu_virq(v, VIRQ_DEBUG);
}
[XEN_ELFNOTE_ENTRY] = { "ENTRY", 0},
[XEN_ELFNOTE_HYPERCALL_PAGE] = { "HYPERCALL_PAGE", 0},
[XEN_ELFNOTE_VIRT_BASE] = { "VIRT_BASE", 0},
+ [XEN_ELFNOTE_INIT_P2M] = { "INIT_P2M", 0},
[XEN_ELFNOTE_PADDR_OFFSET] = { "PADDR_OFFSET", 0},
[XEN_ELFNOTE_HV_START_LOW] = { "HV_START_LOW", 0},
[XEN_ELFNOTE_XEN_VERSION] = { "XEN_VERSION", 1},
case XEN_ELFNOTE_ENTRY:
parms->virt_entry = val;
break;
+ case XEN_ELFNOTE_INIT_P2M:
+ parms->p2m_base = val;
+ break;
case XEN_ELFNOTE_PADDR_OFFSET:
parms->elf_paddr_offset = val;
break;
elf_msg(elf, " virt_kstart = 0x%" PRIx64 "\n", parms->virt_kstart);
elf_msg(elf, " virt_kend = 0x%" PRIx64 "\n", parms->virt_kend);
elf_msg(elf, " virt_entry = 0x%" PRIx64 "\n", parms->virt_entry);
+ elf_msg(elf, " p2m_base = 0x%" PRIx64 "\n", parms->p2m_base);
if ( (parms->virt_kstart > parms->virt_kend) ||
(parms->virt_entry < parms->virt_kstart) ||
return -1;
}
+ if ( (parms->p2m_base != UNSET_ADDR) &&
+ (parms->p2m_base >= parms->virt_kstart) &&
+ (parms->p2m_base < parms->virt_kend) )
+ {
+ elf_err(elf, "%s: ERROR: P->M table base is out of bounds.\n",
+ __FUNCTION__);
+ return -1;
+ }
+
return 0;
}
parms->virt_entry = UNSET_ADDR;
parms->virt_hypercall = UNSET_ADDR;
parms->virt_hv_start_low = UNSET_ADDR;
+ parms->p2m_base = UNSET_ADDR;
parms->elf_paddr_offset = UNSET_ADDR;
/* Find and parse elf notes. */
#include <xen/types.h>
#include <xen/string.h>
#include <xen/lib.h>
+#include <xen/libelf.h>
#include <asm/byteorder.h>
#include <public/elfnote.h>
-#include <public/libelf.h>
#define elf_msg(elf, fmt, args ... ) \
if (elf->verbose) printk(fmt, ## args )
#error Unsupported OS
#endif
#include <xen/elfnote.h>
-#include <xen/libelf.h>
+#include <xen/libelf/libelf.h>
#include "xenctrl.h"
#include "xc_private.h"
if ( unlikely(__copy_from_guest_offset(&gpfn, a->extent_list, i, 1)) )
goto out;
- page = alloc_domheap_pages(d, a->extent_order, a->memflags);
- if ( unlikely(page == NULL) )
+ if ( a->memflags & MEMF_populate_on_demand )
{
- gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
- "id=%d memflags=%x (%ld of %d)\n",
- a->extent_order, d->domain_id, a->memflags,
- i, a->nr_extents);
- goto out;
+ if ( guest_physmap_mark_populate_on_demand(d, gpfn,
+ a->extent_order) < 0 )
+ goto out;
}
+ else
+ {
+ page = alloc_domheap_pages(d, a->extent_order, a->memflags);
+ if ( unlikely(page == NULL) )
+ {
+ gdprintk(XENLOG_INFO, "Could not allocate order=%d extent: "
+ "id=%d memflags=%x (%ld of %d)\n",
+ a->extent_order, d->domain_id, a->memflags,
+ i, a->nr_extents);
+ goto out;
+ }
- mfn = page_to_mfn(page);
- guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
+ mfn = page_to_mfn(page);
+ guest_physmap_add_page(d, gpfn, mfn, a->extent_order);
- if ( !paging_mode_translate(d) )
- {
- for ( j = 0; j < (1 << a->extent_order); j++ )
- set_gpfn_from_mfn(mfn + j, gpfn + j);
+ if ( !paging_mode_translate(d) )
+ {
+ for ( j = 0; j < (1 << a->extent_order); j++ )
+ set_gpfn_from_mfn(mfn + j, gpfn + j);
- /* Inform the domain of the new page's machine address. */
- if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
- goto out;
+ /* Inform the domain of the new page's machine address. */
+ if ( unlikely(__copy_to_guest_offset(a->extent_list, i, &mfn, 1)) )
+ goto out;
+ }
}
}
- out:
+out:
a->nr_done = i;
}
if ( unlikely(__copy_from_guest_offset(&gmfn, a->extent_list, i, 1)) )
goto out;
+ /* See if populate-on-demand wants to handle this */
+ if ( is_hvm_domain(a->domain)
+ && p2m_pod_decrease_reservation(a->domain, gmfn, a->extent_order) )
+ continue;
+
for ( j = 0; j < (1 << a->extent_order); j++ )
if ( !guest_remove_page(a->domain, gmfn + j) )
goto out;
a->nr_done = i;
}
-static long translate_gpfn_list(
- XEN_GUEST_HANDLE(xen_translate_gpfn_list_t) uop, unsigned long *progress)
-{
- struct xen_translate_gpfn_list op;
- unsigned long i;
- xen_pfn_t gpfn;
- xen_pfn_t mfn;
- struct domain *d;
- int rc;
-
- if ( copy_from_guest(&op, uop, 1) )
- return -EFAULT;
-
- /* Is size too large for us to encode a continuation? */
- if ( op.nr_gpfns > (ULONG_MAX >> MEMOP_EXTENT_SHIFT) )
- return -EINVAL;
-
- if ( !guest_handle_subrange_okay(op.gpfn_list, *progress, op.nr_gpfns-1) ||
- !guest_handle_subrange_okay(op.mfn_list, *progress, op.nr_gpfns-1) )
- return -EFAULT;
-
- rc = rcu_lock_target_domain_by_id(op.domid, &d);
- if ( rc )
- return rc;
-
- if ( !paging_mode_translate(d) )
- {
- rcu_unlock_domain(d);
- return -EINVAL;
- }
-
- for ( i = *progress; i < op.nr_gpfns; i++ )
- {
- if ( hypercall_preempt_check() )
- {
- rcu_unlock_domain(d);
- *progress = i;
- return -EAGAIN;
- }
-
- if ( unlikely(__copy_from_guest_offset(&gpfn, op.gpfn_list, i, 1)) )
- {
- rcu_unlock_domain(d);
- return -EFAULT;
- }
-
- mfn = gmfn_to_mfn(d, gpfn);
-
- rc = xsm_translate_gpfn_list(current->domain, mfn);
- if ( rc )
- {
- rcu_unlock_domain(d);
- return rc;
- }
-
- if ( unlikely(__copy_to_guest_offset(op.mfn_list, i, &mfn, 1)) )
- {
- rcu_unlock_domain(d);
- return -EFAULT;
- }
- }
-
- rcu_unlock_domain(d);
- return 0;
-}
-
static long memory_exchange(XEN_GUEST_HANDLE(xen_memory_exchange_t) arg)
{
struct xen_memory_exchange exch;
- LIST_HEAD(in_chunk_list);
- LIST_HEAD(out_chunk_list);
+ PAGE_LIST_HEAD(in_chunk_list);
+ PAGE_LIST_HEAD(out_chunk_list);
unsigned long in_chunk_order, out_chunk_order;
xen_pfn_t gpfn, gmfn, mfn;
unsigned long i, j, k;
goto fail;
}
- list_add(&page->list, &in_chunk_list);
+ page_list_add(page, &in_chunk_list);
}
}
goto fail;
}
- list_add(&page->list, &out_chunk_list);
+ page_list_add(page, &out_chunk_list);
}
/*
*/
/* Destroy final reference to each input page. */
- while ( !list_empty(&in_chunk_list) )
+ while ( (page = page_list_remove_head(&in_chunk_list)) )
{
- page = list_entry(in_chunk_list.next, struct page_info, list);
- list_del(&page->list);
if ( !test_and_clear_bit(_PGC_allocated, &page->count_info) )
BUG();
mfn = page_to_mfn(page);
/* Assign each output page to the domain. */
j = 0;
- while ( !list_empty(&out_chunk_list) )
+ while ( (page = page_list_remove_head(&out_chunk_list)) )
{
- page = list_entry(out_chunk_list.next, struct page_info, list);
- list_del(&page->list);
if ( assign_pages(d, page, exch.out.extent_order,
MEMF_no_refcount) )
BUG();
*/
fail:
/* Reassign any input pages we managed to steal. */
- while ( !list_empty(&in_chunk_list) )
- {
- page = list_entry(in_chunk_list.next, struct page_info, list);
- list_del(&page->list);
+ while ( (page = page_list_remove_head(&in_chunk_list)) )
if ( assign_pages(d, page, 0, MEMF_no_refcount) )
BUG();
- }
/* Free any output pages we managed to allocate. */
- while ( !list_empty(&out_chunk_list) )
- {
- page = list_entry(out_chunk_list.next, struct page_info, list);
- list_del(&page->list);
+ while ( (page = page_list_remove_head(&out_chunk_list)) )
free_domheap_pages(page, exch.out.extent_order);
- }
exch.nr_exchanged = i << in_chunk_order;
struct domain *d;
int rc, op;
unsigned int address_bits;
- unsigned long start_extent, progress;
+ unsigned long start_extent;
struct xen_memory_reservation reservation;
struct memop_args args;
domid_t domid;
args.memflags |= MEMF_node(XENMEMF_get_node(reservation.mem_flags));
+ if ( op == XENMEM_populate_physmap
+ && (reservation.mem_flags & XENMEMF_populate_on_demand) )
+ args.memflags |= MEMF_populate_on_demand;
+
if ( likely(reservation.domid == DOMID_SELF) )
{
d = rcu_lock_current_domain();
break;
- case XENMEM_translate_gpfn_list:
- progress = cmd >> MEMOP_EXTENT_SHIFT;
- rc = translate_gpfn_list(
- guest_handle_cast(arg, xen_translate_gpfn_list_t),
- &progress);
- if ( rc == -EAGAIN )
- return hypercall_create_continuation(
- __HYPERVISOR_memory_op, "lh",
- op | (progress << MEMOP_EXTENT_SHIFT), arg);
- break;
-
default:
rc = arch_memory_op(op, arg);
break;
return rc;
}
+/* Temporary placeholder. */
+int do_tmem_op(void *tmem_op)
+{
+ static bool_t warned;
+
+ if ( !test_and_set_bool(warned) )
+ printk("tmem: not implemented\n");
+
+ return -ENOSYS;
+}
+
/*
* Local variables:
* mode: C
#include <xen/perfc.h>
#include <xen/numa.h>
#include <xen/nodemask.h>
+#include <public/sysctl.h>
#include <asm/page.h>
#include <asm/numa.h>
#include <asm/flushtlb.h>
#endif
static DEFINE_SPINLOCK(page_scrub_lock);
-LIST_HEAD(page_scrub_list);
+PAGE_LIST_HEAD(page_scrub_list);
static unsigned long scrub_pages;
+/* Offlined page list, protected by heap_lock. */
+PAGE_LIST_HEAD(page_offlined_list);
+/* Broken page list, protected by heap_lock. */
+PAGE_LIST_HEAD(page_broken_list);
+
/*********************
* ALLOCATION BITMAP
* One bit per page of memory. Bit set => page is allocated.
#define MEMZONE_XEN 0
#define NR_ZONES (PADDR_BITS - PAGE_SHIFT)
-#define pfn_dom_zone_type(_pfn) (fls(_pfn) - 1)
+#define bits_to_zone(b) (((b) < (PAGE_SHIFT + 1)) ? 0 : ((b) - PAGE_SHIFT - 1))
+#define page_to_zone(pg) (is_xen_heap_page(pg) ? MEMZONE_XEN : \
+ (fls(page_to_mfn(pg)) - 1))
-typedef struct list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
+typedef struct page_list_head heap_by_zone_and_order_t[NR_ZONES][MAX_ORDER+1];
static heap_by_zone_and_order_t *_heap[MAX_NUMNODES];
#define heap(node, zone, order) ((*_heap[node])[zone][order])
static DEFINE_SPINLOCK(heap_lock);
-static void init_node_heap(int node)
+static unsigned long init_node_heap(int node, unsigned long mfn,
+ unsigned long nr)
{
/* First node to be discovered has its heap metadata statically alloced. */
static heap_by_zone_and_order_t _heap_static;
static unsigned long avail_static[NR_ZONES];
static int first_node_initialised;
-
+ unsigned long needed = (sizeof(**_heap) +
+ sizeof(**avail) * NR_ZONES +
+ PAGE_SIZE - 1) >> PAGE_SHIFT;
int i, j;
if ( !first_node_initialised )
_heap[node] = &_heap_static;
avail[node] = avail_static;
first_node_initialised = 1;
+ needed = 0;
+ }
+#ifdef DIRECTMAP_VIRT_END
+ else if ( nr >= needed &&
+ (mfn + needed) <= (virt_to_mfn(DIRECTMAP_VIRT_END - 1) + 1) )
+ {
+ _heap[node] = mfn_to_virt(mfn);
+ avail[node] = mfn_to_virt(mfn + needed) - sizeof(**avail) * NR_ZONES;
+ }
+#endif
+ else if ( get_order_from_bytes(sizeof(**_heap)) ==
+ get_order_from_pages(needed) )
+ {
+ _heap[node] = alloc_xenheap_pages(get_order_from_pages(needed), 0);
+ BUG_ON(!_heap[node]);
+ avail[node] = (void *)_heap[node] + (needed << PAGE_SHIFT) -
+ sizeof(**avail) * NR_ZONES;
+ needed = 0;
}
else
{
_heap[node] = xmalloc(heap_by_zone_and_order_t);
avail[node] = xmalloc_array(unsigned long, NR_ZONES);
BUG_ON(!_heap[node] || !avail[node]);
+ needed = 0;
}
memset(avail[node], 0, NR_ZONES * sizeof(long));
for ( i = 0; i < NR_ZONES; i++ )
for ( j = 0; j <= MAX_ORDER; j++ )
- INIT_LIST_HEAD(&(*_heap[node])[i][j]);
+ INIT_PAGE_LIST_HEAD(&(*_heap[node])[i][j]);
+
+ return needed;
}
/* Allocate 2^@order contiguous pages. */
/* Find smallest order which can satisfy the request. */
for ( j = order; j <= MAX_ORDER; j++ )
- if ( !list_empty(&heap(node, zone, j)) )
+ if ( (pg = page_list_remove_head(&heap(node, zone, j))) )
goto found;
} while ( zone-- > zone_lo ); /* careful: unsigned zone may wrap */
return NULL;
found:
- pg = list_entry(heap(node, zone, j).next, struct page_info, list);
- list_del(&pg->list);
-
/* We may have to halve the chunk a number of times. */
while ( j != order )
{
PFN_ORDER(pg) = --j;
- list_add_tail(&pg->list, &heap(node, zone, j));
+ page_list_add_tail(pg, &heap(node, zone, j));
pg += 1 << j;
}
/* Reference count must continuously be zero for free pages. */
BUG_ON(pg[i].count_info != 0);
- /* Add in any extra CPUs that need flushing because of this page. */
- cpus_andnot(extra_cpus_mask, pg[i].u.free.cpumask, mask);
- tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
- cpus_or(mask, mask, extra_cpus_mask);
+ if ( pg[i].u.free.need_tlbflush )
+ {
+ /* Add in extra CPUs that need flushing because of this page. */
+ cpus_andnot(extra_cpus_mask, cpu_online_map, mask);
+ tlbflush_filter(extra_cpus_mask, pg[i].tlbflush_timestamp);
+ cpus_or(mask, mask, extra_cpus_mask);
+ }
/* Initialise fields which have other uses for free pages. */
pg[i].u.inuse.type_info = 0;
return pg;
}
+/* Remove any offlined page in the buddy pointed to by head. */
+static int reserve_offlined_page(struct page_info *head)
+{
+ unsigned int node = phys_to_nid(page_to_maddr(head));
+ int zone = page_to_zone(head), i, head_order = PFN_ORDER(head), count = 0;
+ struct page_info *cur_head;
+ int cur_order;
+
+ ASSERT(spin_is_locked(&heap_lock));
+
+ cur_head = head;
+
+ page_list_del(head, &heap(node, zone, head_order));
+
+ while ( cur_head < (head + (1 << head_order)) )
+ {
+ struct page_info *pg;
+ int next_order;
+
+ if ( test_bit(_PGC_offlined, &cur_head->count_info) )
+ {
+ cur_head++;
+ continue;
+ }
+
+ next_order = cur_order = 0;
+
+ while ( cur_order < head_order )
+ {
+ next_order = cur_order + 1;
+
+ if ( (cur_head + (1 << next_order)) >= (head + ( 1 << head_order)) )
+ goto merge;
+
+ for ( i = (1 << cur_order), pg = cur_head + (1 << cur_order );
+ i < (1 << next_order);
+ i++, pg++ )
+ if ( test_bit(_PGC_offlined, &pg->count_info) )
+ break;
+ if ( i == ( 1 << next_order) )
+ {
+ cur_order = next_order;
+ continue;
+ }
+ else
+ {
+ merge:
+ /* We don't consider merging outside the head_order. */
+ page_list_add_tail(cur_head, &heap(node, zone, cur_order));
+ PFN_ORDER(cur_head) = cur_order;
+ cur_head += (1 << cur_order);
+ break;
+ }
+ }
+ }
+
+ for ( cur_head = head; cur_head < head + ( 1UL << head_order); cur_head++ )
+ {
+ if ( !test_bit(_PGC_offlined, &cur_head->count_info) )
+ continue;
+
+ avail[node][zone]--;
+
+ map_alloc(page_to_mfn(cur_head), 1);
+
+ page_list_add_tail(cur_head,
+ test_bit(_PGC_broken, &cur_head->count_info) ?
+ &page_broken_list : &page_offlined_list);
+
+ count++;
+ }
+
+ return count;
+}
+
/* Free 2^@order set of pages. */
static void free_heap_pages(
- unsigned int zone, struct page_info *pg, unsigned int order)
+ struct page_info *pg, unsigned int order)
{
unsigned long mask;
- unsigned int i, node = phys_to_nid(page_to_maddr(pg));
- struct domain *d;
+ unsigned int i, node = phys_to_nid(page_to_maddr(pg)), tainted = 0;
+ unsigned int zone = page_to_zone(pg);
- ASSERT(zone < NR_ZONES);
ASSERT(order <= MAX_ORDER);
ASSERT(node >= 0);
ASSERT(node < num_online_nodes());
* in its pseudophysical address space).
* In all the above cases there can be no guest mappings of this page.
*/
- pg[i].count_info = 0;
-
- if ( (d = page_get_owner(&pg[i])) != NULL )
- {
- pg[i].tlbflush_timestamp = tlbflush_current_time();
- pg[i].u.free.cpumask = d->domain_dirty_cpumask;
- }
- else
+ ASSERT(!(pg[i].count_info & PGC_offlined));
+ pg[i].count_info &= PGC_offlining | PGC_broken;
+ if ( pg[i].count_info & PGC_offlining )
{
- cpus_clear(pg[i].u.free.cpumask);
+ pg[i].count_info &= ~PGC_offlining;
+ pg[i].count_info |= PGC_offlined;
+ tainted = 1;
}
+
+ /* If a page has no owner it will need no safety TLB flush. */
+ pg[i].u.free.need_tlbflush = (page_get_owner(&pg[i]) != NULL);
+ if ( pg[i].u.free.need_tlbflush )
+ pg[i].tlbflush_timestamp = tlbflush_current_time();
}
spin_lock(&heap_lock);
if ( allocated_in_map(page_to_mfn(pg)-mask) ||
(PFN_ORDER(pg-mask) != order) )
break;
- list_del(&(pg-mask)->list);
pg -= mask;
+ page_list_del(pg, &heap(node, zone, order));
}
else
{
if ( allocated_in_map(page_to_mfn(pg)+mask) ||
(PFN_ORDER(pg+mask) != order) )
break;
- list_del(&(pg+mask)->list);
+ page_list_del(pg + mask, &heap(node, zone, order));
}
-
+
order++;
/* After merging, pg should remain in the same node. */
}
PFN_ORDER(pg) = order;
- list_add_tail(&pg->list, &heap(node, zone, order));
+ page_list_add_tail(pg, &heap(node, zone, order));
+
+ if ( tainted )
+ reserve_offlined_page(pg);
spin_unlock(&heap_lock);
}
+
+/*
+ * Following possible status for a page:
+ * free and Online; free and offlined; free and offlined and broken;
+ * assigned and online; assigned and offlining; assigned and offling and broken
+ *
+ * Following rules applied for page offline:
+ * Once a page is broken, it can't be assigned anymore
+ * A page will be offlined only if it is free
+ * return original count_info
+ *
+ */
+static unsigned long mark_page_offline(struct page_info *pg, int broken)
+{
+ unsigned long nx, x, y = pg->count_info;
+
+ ASSERT(page_is_ram_type(page_to_mfn(pg), RAM_TYPE_CONVENTIONAL));
+ ASSERT(spin_is_locked(&heap_lock));
+
+ do {
+ nx = x = y;
+
+ if ( ((x & PGC_offlined_broken) == PGC_offlined_broken) )
+ return y;
+
+ if ( x & PGC_offlined )
+ {
+ /* PGC_offlined means it is a free page. */
+ if ( broken && !(nx & PGC_broken) )
+ nx |= PGC_broken;
+ else
+ return y;
+ }
+ else
+ {
+ /* It is not offlined, not reserved page */
+ nx |= (allocated_in_map(page_to_mfn(pg)) ?
+ PGC_offlining : PGC_offlined);
+ }
+
+ if ( broken )
+ nx |= PGC_broken;
+ } while ( (y = cmpxchg(&pg->count_info, x, nx)) != x );
+
+ return y;
+}
+
+static int reserve_heap_page(struct page_info *pg)
+{
+ struct page_info *head = NULL;
+ unsigned int i, node = phys_to_nid(page_to_maddr(pg));
+ unsigned int zone = page_to_zone(pg);
+
+ for ( i = 0; i <= MAX_ORDER; i++ )
+ {
+ struct page_info *tmp;
+
+ if ( page_list_empty(&heap(node, zone, i)) )
+ continue;
+
+ page_list_for_each_safe ( head, tmp, &heap(node, zone, i) )
+ {
+ if ( (head <= pg) &&
+ (head + (1UL << i) > pg) )
+ return reserve_offlined_page(head);
+ }
+ }
+
+ return -EINVAL;
+
+}
+
+int offline_page(unsigned long mfn, int broken, uint32_t *status)
+{
+ unsigned long old_info = 0;
+ struct domain *owner;
+ int ret = 0;
+ struct page_info *pg;
+
+ if ( mfn > max_page )
+ {
+ dprintk(XENLOG_WARNING,
+ "try to offline page out of range %lx\n", mfn);
+ return -EINVAL;
+ }
+
+ *status = 0;
+ pg = mfn_to_page(mfn);
+
+#if defined(__x86_64__)
+ /* Xen's txt mfn in x86_64 is reserved in e820 */
+ if ( is_xen_fixed_mfn(mfn) )
+#elif defined(__i386__)
+ if ( is_xen_heap_mfn(mfn) )
+#endif
+ {
+ *status = PG_OFFLINE_XENPAGE | PG_OFFLINE_FAILED |
+ (DOMID_XEN << PG_OFFLINE_OWNER_SHIFT);
+ return -EPERM;
+ }
+
+ /*
+ * N.B. xen's txt in x86_64 is marked reserved and handled already
+ * Also kexec range is reserved
+ */
+ if ( !page_is_ram_type(mfn, RAM_TYPE_CONVENTIONAL) )
+ {
+ *status = PG_OFFLINE_FAILED | PG_OFFLINE_NOT_CONV_RAM;
+ return -EINVAL;
+ }
+
+ spin_lock(&heap_lock);
+
+ old_info = mark_page_offline(pg, broken);
+
+ if ( !allocated_in_map(mfn) )
+ {
+ /* Free pages are reserve directly */
+ reserve_heap_page(pg);
+ *status = PG_OFFLINE_OFFLINED;
+ }
+ else if ( test_bit(_PGC_offlined, &pg->count_info) )
+ {
+ *status = PG_OFFLINE_OFFLINED;
+ }
+ else if ( (owner = page_get_owner_and_reference(pg)) )
+ {
+ *status = PG_OFFLINE_OWNED | PG_OFFLINE_PENDING |
+ (owner->domain_id << PG_OFFLINE_OWNER_SHIFT);
+ /* Release the reference since it will not be allocated anymore */
+ put_page(pg);
+ }
+ else if ( old_info & PGC_xen_heap)
+ {
+ *status = PG_OFFLINE_XENPAGE | PG_OFFLINE_PENDING |
+ (DOMID_XEN << PG_OFFLINE_OWNER_SHIFT);
+ }
+ else
+ {
+ /*
+ * assign_pages does not hold heap_lock, so small window that the owner
+ * may be set later, but please notice owner will only change from
+ * NULL to be set, not verse, since page is offlining now.
+ * No windows If called from #MC handler, since all CPU are in softirq
+ * If called from user space like CE handling, tools can wait some time
+ * before call again.
+ */
+ *status = PG_OFFLINE_ANONYMOUS | PG_OFFLINE_FAILED |
+ (DOMID_INVALID << PG_OFFLINE_OWNER_SHIFT );
+ }
+
+ if ( broken )
+ *status |= PG_OFFLINE_BROKEN;
+
+ spin_unlock(&heap_lock);
+
+ return ret;
+}
+
+/*
+ * Online the memory.
+ * The caller should make sure end_pfn <= max_page,
+ * if not, expand_pages() should be called prior to online_page().
+ */
+unsigned int online_page(unsigned long mfn, uint32_t *status)
+{
+ struct page_info *pg;
+ int ret = 0, free = 0;
+
+ if ( mfn > max_page )
+ {
+ dprintk(XENLOG_WARNING, "call expand_pages() first\n");
+ return -EINVAL;
+ }
+
+ pg = mfn_to_page(mfn);
+
+ *status = 0;
+
+ spin_lock(&heap_lock);
+
+ if ( unlikely(is_page_broken(pg)) )
+ {
+ ret = -EINVAL;
+ *status = PG_ONLINE_FAILED |PG_ONLINE_BROKEN;
+ }
+ else if ( pg->count_info & PGC_offlined )
+ {
+ clear_bit(_PGC_offlined, &pg->count_info);
+ page_list_del(pg, &page_offlined_list);
+ *status = PG_ONLINE_ONLINED;
+ free = 1;
+ }
+ else if ( pg->count_info & PGC_offlining )
+ {
+ clear_bit(_PGC_offlining, &pg->count_info);
+ *status = PG_ONLINE_ONLINED;
+ }
+ spin_unlock(&heap_lock);
+
+ if ( free )
+ free_heap_pages(pg, 0);
+
+ return ret;
+}
+
+int query_page_offline(unsigned long mfn, uint32_t *status)
+{
+ struct page_info *pg;
+
+ if ( (mfn > max_page) || !page_is_ram_type(mfn, RAM_TYPE_CONVENTIONAL) )
+ {
+ dprintk(XENLOG_WARNING, "call expand_pages() first\n");
+ return -EINVAL;
+ }
+
+ *status = 0;
+ spin_lock(&heap_lock);
+
+ pg = mfn_to_page(mfn);
+
+ if (pg->count_info & PGC_offlining)
+ *status |= PG_OFFLINE_STATUS_OFFLINE_PENDING;
+ if (pg->count_info & PGC_broken)
+ *status |= PG_OFFLINE_STATUS_BROKEN;
+ if (pg->count_info & PGC_offlined)
+ *status |= PG_OFFLINE_STATUS_OFFLINED;
+
+ spin_unlock(&heap_lock);
+
+ return 0;
+}
+
/*
* Hand the specified arbitrary page range to the specified heap zone
* checking the node_id of the previous page. If they differ and the
* latter is not on a MAX_ORDER boundary, then we reserve the page by
* not freeing it to the buddy allocator.
*/
-#define MAX_ORDER_ALIGNED (1UL << (MAX_ORDER))
static void init_heap_pages(
- unsigned int zone, struct page_info *pg, unsigned long nr_pages)
+ struct page_info *pg, unsigned long nr_pages)
{
unsigned int nid_curr, nid_prev;
unsigned long i;
- ASSERT(zone < NR_ZONES);
-
- if ( likely(page_to_mfn(pg) != 0) )
- nid_prev = phys_to_nid(page_to_maddr(pg-1));
- else
- nid_prev = phys_to_nid(page_to_maddr(pg));
+ nid_prev = phys_to_nid(page_to_maddr(pg-1));
- for ( i = 0; i < nr_pages; i++ )
+ for ( i = 0; i < nr_pages; nid_prev = nid_curr, i++ )
{
nid_curr = phys_to_nid(page_to_maddr(pg+i));
if ( unlikely(!avail[nid_curr]) )
- init_node_heap(nid_curr);
+ {
+ unsigned long n;
+
+ n = init_node_heap(nid_curr, page_to_mfn(pg+i), nr_pages - i);
+ if ( n )
+ {
+ BUG_ON(i + n > nr_pages);
+ i += n - 1;
+ continue;
+ }
+ }
/*
- * free pages of the same node, or if they differ, but are on a
- * MAX_ORDER alignement boundary (which already get reserved)
+ * Free pages of the same node, or if they differ, but are on a
+ * MAX_ORDER alignment boundary (which already get reserved).
*/
- if ( (nid_curr == nid_prev) || (page_to_maddr(pg+i) &
- MAX_ORDER_ALIGNED) )
- free_heap_pages(zone, pg+i, 0);
- else
- printk("Reserving non-aligned node boundary @ mfn %lu\n",
- page_to_mfn(pg+i));
-
- nid_prev = nid_curr;
+ if ( (nid_curr == nid_prev) ||
+ !(page_to_mfn(pg+i) & ((1UL << MAX_ORDER) - 1)) )
+ free_heap_pages(pg+i, 0);
+ else
+ printk("Reserving non-aligned node boundary @ mfn %#lx\n",
+ page_to_mfn(pg+i));
}
}
#define avail_for_domheap(mfn) !(allocated_in_map(mfn) || is_xen_heap_mfn(mfn))
void __init end_boot_allocator(void)
{
- unsigned long i;
+ unsigned long i, nr = 0;
int curr_free, next_free;
/* Pages that are free now go to the domain sub-allocator. */
if ( next_free )
map_alloc(i+1, 1); /* prevent merging in free_heap_pages() */
if ( curr_free )
- init_heap_pages(pfn_dom_zone_type(i), mfn_to_page(i), 1);
+ ++nr;
+ else if ( nr )
+ {
+ init_heap_pages(mfn_to_page(i - nr), nr);
+ nr = 0;
+ }
}
+ if ( nr )
+ init_heap_pages(mfn_to_page(i - nr), nr);
if ( !dma_bitsize && (num_online_nodes() > 1) )
{
* XEN-HEAP SUB-ALLOCATOR
*/
+#if !defined(__x86_64__) && !defined(__ia64__)
+
void init_xenheap_pages(paddr_t ps, paddr_t pe)
{
ps = round_pgup(ps);
if ( !is_xen_heap_mfn(paddr_to_pfn(pe)) )
pe -= PAGE_SIZE;
- init_heap_pages(MEMZONE_XEN, maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
+ init_heap_pages(maddr_to_page(ps), (pe - ps) >> PAGE_SHIFT);
}
-void *alloc_xenheap_pages(unsigned int order)
+void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
{
struct page_info *pg;
pg = alloc_heap_pages(
MEMZONE_XEN, MEMZONE_XEN, cpu_to_node(smp_processor_id()), order);
if ( unlikely(pg == NULL) )
- goto no_memory;
+ return NULL;
memguard_unguard_range(page_to_virt(pg), 1 << (order + PAGE_SHIFT));
return page_to_virt(pg);
-
- no_memory:
- printk("Cannot handle page request order %d!\n", order);
- return NULL;
}
memguard_guard_range(v, 1 << (order + PAGE_SHIFT));
- free_heap_pages(MEMZONE_XEN, virt_to_page(v), order);
+ free_heap_pages(virt_to_page(v), order);
}
+#else
+
+void init_xenheap_pages(paddr_t ps, paddr_t pe)
+{
+ init_domheap_pages(ps, pe);
+}
+
+void *alloc_xenheap_pages(unsigned int order, unsigned int memflags)
+{
+ struct page_info *pg;
+ unsigned int i;
+
+ ASSERT(!in_irq());
+
+ pg = alloc_domheap_pages(NULL, order, memflags);
+ if ( unlikely(pg == NULL) )
+ return NULL;
+
+ for ( i = 0; i < (1u << order); i++ )
+ pg[i].count_info |= PGC_xen_heap;
+
+ return page_to_virt(pg);
+}
+
+void free_xenheap_pages(void *v, unsigned int order)
+{
+ struct page_info *pg;
+ unsigned int i;
+
+ ASSERT(!in_irq());
+
+ if ( v == NULL )
+ return;
+
+ pg = virt_to_page(v);
+
+ for ( i = 0; i < (1u << order); i++ )
+ pg[i].count_info &= ~PGC_xen_heap;
+
+ free_heap_pages(pg, order);
+}
+
+#endif
+
/*************************
void init_domheap_pages(paddr_t ps, paddr_t pe)
{
- unsigned long s_tot, e_tot;
- unsigned int zone;
+ unsigned long smfn, emfn;
ASSERT(!in_irq());
- s_tot = round_pgup(ps) >> PAGE_SHIFT;
- e_tot = round_pgdown(pe) >> PAGE_SHIFT;
-
- zone = fls(s_tot);
- BUG_ON(zone <= MEMZONE_XEN + 1);
- for ( --zone; s_tot < e_tot; ++zone )
- {
- unsigned long end = e_tot;
+ smfn = round_pgup(ps) >> PAGE_SHIFT;
+ emfn = round_pgdown(pe) >> PAGE_SHIFT;
- BUILD_BUG_ON(NR_ZONES > BITS_PER_LONG);
- if ( zone < BITS_PER_LONG - 1 && end > 1UL << (zone + 1) )
- end = 1UL << (zone + 1);
- init_heap_pages(zone, mfn_to_page(s_tot), end - s_tot);
- s_tot = end;
- }
+ init_heap_pages(mfn_to_page(smfn), emfn - smfn);
}
page_set_owner(&pg[i], d);
wmb(); /* Domain pointer must be visible before updating refcnt. */
pg[i].count_info = PGC_allocated | 1;
- list_add_tail(&pg[i].list, &d->page_list);
+ page_list_add_tail(&pg[i], &d->page_list);
}
spin_unlock(&d->page_alloc_lock);
{
struct page_info *pg = NULL;
unsigned int bits = memflags >> _MEMF_bits, zone_hi = NR_ZONES - 1;
- unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1);
+ unsigned int node = (uint8_t)((memflags >> _MEMF_node) - 1), dma_zone;
ASSERT(!in_irq());
node = domain_to_node(d);
bits = domain_clamp_alloc_bitsize(d, bits ? : (BITS_PER_LONG+PAGE_SHIFT));
- if ( bits <= (PAGE_SHIFT + 1) )
+ if ( (zone_hi = min_t(unsigned int, bits_to_zone(bits), zone_hi)) == 0 )
return NULL;
- bits -= PAGE_SHIFT + 1;
- if ( bits < zone_hi )
- zone_hi = bits;
-
- if ( (dma_bitsize > PAGE_SHIFT) &&
- ((zone_hi + PAGE_SHIFT) >= dma_bitsize) )
- pg = alloc_heap_pages(dma_bitsize - PAGE_SHIFT, zone_hi, node, order);
+ if ( dma_bitsize && ((dma_zone = bits_to_zone(dma_bitsize)) < zone_hi) )
+ pg = alloc_heap_pages(dma_zone + 1, zone_hi, node, order);
if ( (pg == NULL) &&
((pg = alloc_heap_pages(MEMZONE_XEN + 1, zone_hi,
if ( (d != NULL) && assign_pages(d, pg, order, memflags) )
{
- free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
+ free_heap_pages(pg, order);
return NULL;
}
spin_lock_recursive(&d->page_alloc_lock);
for ( i = 0; i < (1 << order); i++ )
- list_del(&pg[i].list);
+ page_list_del2(&pg[i], &d->xenpage_list, &d->arch.relmem_list);
d->xenheap_pages -= 1 << order;
drop_dom_ref = (d->xenheap_pages == 0);
for ( i = 0; i < (1 << order); i++ )
{
BUG_ON((pg[i].u.inuse.type_info & PGT_count_mask) != 0);
- list_del(&pg[i].list);
+ page_list_del2(&pg[i], &d->page_list, &d->arch.relmem_list);
}
d->tot_pages -= 1 << order;
if ( likely(!d->is_dying) )
{
- free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
+ free_heap_pages(pg, order);
}
else
{
{
page_set_owner(&pg[i], NULL);
spin_lock(&page_scrub_lock);
- list_add(&pg[i].list, &page_scrub_list);
+ page_list_add(&pg[i], &page_scrub_list);
scrub_pages++;
spin_unlock(&page_scrub_lock);
}
else
{
/* Freeing anonymous domain-heap pages. */
- free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, order);
+ free_heap_pages(pg, order);
drop_dom_ref = 0;
}
{
int zone_lo, zone_hi;
- zone_lo = min_width ? (min_width - (PAGE_SHIFT + 1)) : (MEMZONE_XEN + 1);
- zone_lo = max_t(int, MEMZONE_XEN + 1, zone_lo);
- zone_lo = min_t(int, NR_ZONES - 1, zone_lo);
+ zone_lo = min_width ? bits_to_zone(min_width) : (MEMZONE_XEN + 1);
+ zone_lo = max_t(int, MEMZONE_XEN + 1, min_t(int, NR_ZONES - 1, zone_lo));
- zone_hi = max_width ? (max_width - (PAGE_SHIFT + 1)) : (NR_ZONES - 1);
- zone_hi = max_t(int, MEMZONE_XEN + 1, zone_hi);
- zone_hi = min_t(int, NR_ZONES - 1, zone_hi);
+ zone_hi = max_width ? bits_to_zone(max_width) : (NR_ZONES - 1);
+ zone_hi = max_t(int, MEMZONE_XEN + 1, min_t(int, NR_ZONES - 1, zone_hi));
return avail_heap_pages(zone_lo, zone_hi, node);
}
static void page_scrub_softirq(void)
{
- struct list_head *ent;
+ PAGE_LIST_HEAD(list);
struct page_info *pg;
void *p;
int i;
do {
spin_lock(&page_scrub_lock);
- if ( unlikely((ent = page_scrub_list.next) == &page_scrub_list) )
- {
- spin_unlock(&page_scrub_lock);
- goto out;
- }
-
/* Peel up to 16 pages from the list. */
for ( i = 0; i < 16; i++ )
{
- if ( ent->next == &page_scrub_list )
+ if ( !(pg = page_list_remove_head(&page_scrub_list)) )
break;
- ent = ent->next;
+ page_list_add_tail(pg, &list);
}
- /* Remove peeled pages from the list. */
- ent->next->prev = &page_scrub_list;
- page_scrub_list.next = ent->next;
- scrub_pages -= (i+1);
+ if ( unlikely(i == 0) )
+ {
+ spin_unlock(&page_scrub_lock);
+ goto out;
+ }
+
+ scrub_pages -= i;
spin_unlock(&page_scrub_lock);
- /* Working backwards, scrub each page in turn. */
- while ( ent != &page_scrub_list )
- {
- pg = list_entry(ent, struct page_info, list);
- ent = ent->prev;
+ /* Scrub each page in turn. */
+ while ( (pg = page_list_remove_head(&list)) ) {
p = map_domain_page(page_to_mfn(pg));
scrub_page(p);
unmap_domain_page(p);
- free_heap_pages(pfn_dom_zone_type(page_to_mfn(pg)), pg, 0);
+ free_heap_pages(pg, 0);
}
} while ( (NOW() - start) < MILLISECS(1) );
/*
* CSCHED_STATS
*
- * Manage very basic counters and stats.
+ * Manage very basic per-vCPU counters and stats.
*
* Useful for debugging live systems. The stats are displayed
* with runq dumps ('r' on the Xen console).
*/
+#ifdef PERF_COUNTERS
#define CSCHED_STATS
+#endif
/*
/*
* Stats
*/
-#ifdef CSCHED_STATS
-
-#define CSCHED_STAT(_X) (csched_priv.stats._X)
-#define CSCHED_STAT_DEFINE(_X) uint32_t _X;
-#define CSCHED_STAT_PRINTK(_X) \
- do \
- { \
- printk("\t%-30s = %u\n", #_X, CSCHED_STAT(_X)); \
- } while ( 0 );
-
-/*
- * Try and keep often cranked stats on top so they'll fit on one
- * cache line.
- */
-#define CSCHED_STATS_EXPAND_SCHED(_MACRO) \
- _MACRO(schedule) \
- _MACRO(acct_run) \
- _MACRO(acct_no_work) \
- _MACRO(acct_balance) \
- _MACRO(acct_reorder) \
- _MACRO(acct_min_credit) \
- _MACRO(acct_vcpu_active) \
- _MACRO(acct_vcpu_idle) \
- _MACRO(vcpu_sleep) \
- _MACRO(vcpu_wake_running) \
- _MACRO(vcpu_wake_onrunq) \
- _MACRO(vcpu_wake_runnable) \
- _MACRO(vcpu_wake_not_runnable) \
- _MACRO(vcpu_park) \
- _MACRO(vcpu_unpark) \
- _MACRO(tickle_local_idler) \
- _MACRO(tickle_local_over) \
- _MACRO(tickle_local_under) \
- _MACRO(tickle_local_other) \
- _MACRO(tickle_idlers_none) \
- _MACRO(tickle_idlers_some) \
- _MACRO(load_balance_idle) \
- _MACRO(load_balance_over) \
- _MACRO(load_balance_other) \
- _MACRO(steal_trylock_failed) \
- _MACRO(steal_peer_idle) \
- _MACRO(migrate_queued) \
- _MACRO(migrate_running) \
- _MACRO(dom_init) \
- _MACRO(dom_destroy) \
- _MACRO(vcpu_init) \
- _MACRO(vcpu_destroy)
-
-#ifndef NDEBUG
-#define CSCHED_STATS_EXPAND_CHECKS(_MACRO) \
- _MACRO(vcpu_check)
-#else
-#define CSCHED_STATS_EXPAND_CHECKS(_MACRO)
-#endif
-
-#define CSCHED_STATS_EXPAND(_MACRO) \
- CSCHED_STATS_EXPAND_CHECKS(_MACRO) \
- CSCHED_STATS_EXPAND_SCHED(_MACRO)
-
-#define CSCHED_STATS_RESET() \
- do \
- { \
- memset(&csched_priv.stats, 0, sizeof(csched_priv.stats)); \
- } while ( 0 )
-
-#define CSCHED_STATS_DEFINE() \
- struct \
- { \
- CSCHED_STATS_EXPAND(CSCHED_STAT_DEFINE) \
- } stats;
-
-#define CSCHED_STATS_PRINTK() \
- do \
- { \
- printk("stats:\n"); \
- CSCHED_STATS_EXPAND(CSCHED_STAT_PRINTK) \
- } while ( 0 )
+#define CSCHED_STAT_CRANK(_X) (perfc_incr(_X))
-#define CSCHED_STAT_CRANK(_X) (CSCHED_STAT(_X)++)
+#ifdef CSCHED_STATS
#define CSCHED_VCPU_STATS_RESET(_V) \
do \
#else /* CSCHED_STATS */
-#define CSCHED_STATS_RESET() do {} while ( 0 )
-#define CSCHED_STATS_DEFINE()
-#define CSCHED_STATS_PRINTK() do {} while ( 0 )
-#define CSCHED_STAT_CRANK(_X) do {} while ( 0 )
#define CSCHED_VCPU_STATS_RESET(_V) do {} while ( 0 )
#define CSCHED_VCPU_STAT_CRANK(_V, _X) do {} while ( 0 )
#define CSCHED_VCPU_STAT_SET(_V, _X, _Y) do {} while ( 0 )
uint32_t credit;
int credit_balance;
uint32_t runq_sort;
- CSCHED_STATS_DEFINE()
};
static void csched_tick(void *_cpu);
-static inline int
-__cycle_cpu(int cpu, const cpumask_t *mask)
-{
- int nxt = next_cpu(cpu, *mask);
- if (nxt == NR_CPUS)
- nxt = first_cpu(*mask);
- return nxt;
-}
-
static inline int
__vcpu_on_runq(struct csched_vcpu *svc)
{
#define CSCHED_VCPU_CHECK(_vc)
#endif
+/*
+ * Delay, in microseconds, between migrations of a VCPU between PCPUs.
+ * This prevents rapid fluttering of a VCPU between CPUs, and reduces the
+ * implicit overheads such as cache-warming. 1ms (1000) has been measured
+ * as a good value.
+ */
+static unsigned int vcpu_migration_delay;
+integer_param("vcpu_migration_delay", vcpu_migration_delay);
+
+static inline int
+__csched_vcpu_is_cache_hot(struct vcpu *v)
+{
+ int hot = ((NOW() - v->last_run_time) <
+ ((uint64_t)vcpu_migration_delay * 1000u));
+
+ if ( hot )
+ CSCHED_STAT_CRANK(vcpu_hot);
+
+ return hot;
+}
+
static inline int
__csched_vcpu_is_migrateable(struct vcpu *vc, int dest_cpu)
{
/*
- * Don't pick up work that's in the peer's scheduling tail. Also only pick
- * up work that's allowed to run on our CPU.
+ * Don't pick up work that's in the peer's scheduling tail or hot on
+ * peer PCPU. Only pick up work that's allowed to run on our CPU.
*/
- return !vc->is_running && cpu_isset(dest_cpu, vc->cpu_affinity);
+ return !vc->is_running &&
+ !__csched_vcpu_is_cache_hot(vc) &&
+ cpu_isset(dest_cpu, vc->cpu_affinity);
}
static int
cpus_and(cpus, cpu_online_map, vc->cpu_affinity);
cpu = cpu_isset(vc->processor, cpus)
? vc->processor
- : __cycle_cpu(vc->processor, &cpus);
+ : cycle_cpu(vc->processor, cpus);
ASSERT( !cpus_empty(cpus) && cpu_isset(cpu, cpus) );
/*
{
cpumask_t cpu_idlers;
cpumask_t nxt_idlers;
- int nxt;
+ int nxt, weight_cpu, weight_nxt;
- nxt = __cycle_cpu(cpu, &cpus);
+ nxt = cycle_cpu(cpu, cpus);
if ( cpu_isset(cpu, cpu_core_map[nxt]) )
{
cpus_and(nxt_idlers, idlers, cpu_core_map[nxt]);
}
- if ( cpus_weight(cpu_idlers) < cpus_weight(nxt_idlers) )
+ weight_cpu = cpus_weight(cpu_idlers);
+ weight_nxt = cpus_weight(nxt_idlers);
+ if ( ( (weight_cpu < weight_nxt) ^ sched_smt_power_savings )
+ && (weight_cpu != weight_nxt) )
{
cpu = nxt;
cpu_clear(cpu, cpus);
while ( !cpus_empty(workers) )
{
- peer_cpu = __cycle_cpu(peer_cpu, &workers);
+ peer_cpu = cycle_cpu(peer_cpu, workers);
cpu_clear(peer_cpu, workers);
/*
"\tmsecs per tick = %dms\n"
"\tcredits per tick = %d\n"
"\tticks per tslice = %d\n"
- "\tticks per acct = %d\n",
+ "\tticks per acct = %d\n"
+ "\tmigration delay = %uus\n",
csched_priv.ncpus,
csched_priv.master,
csched_priv.credit,
CSCHED_MSECS_PER_TICK,
CSCHED_CREDITS_PER_TICK,
CSCHED_TICKS_PER_TSLICE,
- CSCHED_TICKS_PER_ACCT);
+ CSCHED_TICKS_PER_ACCT,
+ vcpu_migration_delay);
cpumask_scnprintf(idlers_buf, sizeof(idlers_buf), csched_priv.idlers);
printk("idlers: %s\n", idlers_buf);
- CSCHED_STATS_PRINTK();
-
printk("active vcpus:\n");
loop = 0;
list_for_each( iter_sdom, &csched_priv.active_sdom )
csched_priv.credit = 0U;
csched_priv.credit_balance = 0;
csched_priv.runq_sort = 0U;
- CSCHED_STATS_RESET();
}
/* Tickers cannot be kicked until SMP subsystem is alive. */
static char opt_sched[10] = "credit";
string_param("sched", opt_sched);
+/* if sched_smt_power_savings is set,
+ * scheduler will give preferrence to partially idle package compared to
+ * the full idle package, when picking pCPU to schedule vCPU.
+ */
+int sched_smt_power_savings = 0;
+boolean_param("sched_smt_power_savings", sched_smt_power_savings);
+
#define TIME_SLOP (s32)MICROSECS(50) /* allow time to slip a bit */
/* Various timer handlers. */
__trace_var(event, 1/*tsc*/, sizeof(d), (unsigned char *)&d);
}
+static inline void trace_continue_running(struct vcpu *v)
+{
+ struct { uint32_t vcpu:16, domain:16; } d;
+
+ if ( likely(!tb_init_done) )
+ return;
+
+ d.vcpu = v->vcpu_id;
+ d.domain = v->domain->domain_id;
+
+ __trace_var(TRC_SCHED_CONTINUE_RUNNING, 1/*tsc*/, sizeof(d),
+ (unsigned char *)&d);
+}
+
static inline void vcpu_runstate_change(
struct vcpu *v, int new_state, s_time_t new_entry_time)
{
+ s_time_t delta;
+
ASSERT(v->runstate.state != new_state);
ASSERT(spin_is_locked(&per_cpu(schedule_data,v->processor).schedule_lock));
trace_runstate_change(v, new_state);
- v->runstate.time[v->runstate.state] +=
- new_entry_time - v->runstate.state_entry_time;
- v->runstate.state_entry_time = new_entry_time;
+ delta = new_entry_time - v->runstate.state_entry_time;
+ if ( delta > 0 )
+ {
+ v->runstate.time[v->runstate.state] += delta;
+ v->runstate.state_entry_time = new_entry_time;
+ }
+
v->runstate.state = new_state;
}
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate)
{
- if ( likely(v == current) )
- {
- /* Fast lock-free path. */
- memcpy(runstate, &v->runstate, sizeof(*runstate));
- ASSERT(runstate->state == RUNSTATE_running);
- runstate->time[RUNSTATE_running] += NOW() - runstate->state_entry_time;
- }
- else
- {
+ s_time_t delta;
+
+ if ( unlikely(v != current) )
vcpu_schedule_lock_irq(v);
- memcpy(runstate, &v->runstate, sizeof(*runstate));
- runstate->time[runstate->state] += NOW() - runstate->state_entry_time;
+
+ memcpy(runstate, &v->runstate, sizeof(*runstate));
+ delta = NOW() - runstate->state_entry_time;
+ if ( delta > 0 )
+ runstate->time[runstate->state] += delta;
+
+ if ( unlikely(v != current) )
vcpu_schedule_unlock_irq(v);
- }
+}
+
+uint64_t get_cpu_idle_time(unsigned int cpu)
+{
+ struct vcpu_runstate_info state;
+ struct vcpu *v;
+
+ if ( (v = idle_vcpu[cpu]) == NULL )
+ return 0;
+
+ vcpu_runstate_get(v, &state);
+ return state.time[RUNSTATE_running];
}
int sched_init_vcpu(struct vcpu *v, unsigned int processor)
if ( unlikely(prev == next) )
{
spin_unlock_irq(&sd->schedule_lock);
+ trace_continue_running(next);
return continue_running(prev);
}
(test_bit(_VPF_blocked, &prev->pause_flags) ? RUNSTATE_blocked :
(vcpu_runnable(prev) ? RUNSTATE_runnable : RUNSTATE_offline)),
now);
+ prev->last_run_time = now;
ASSERT(next->runstate.state != RUNSTATE_running);
vcpu_runstate_change(next, RUNSTATE_running, now);
printk("Scheduler: %s (%s)\n", ops.name, ops.opt_name);
SCHED_OP(dump_settings);
+ printk("sched_smt_power_savings: %s\n",
+ sched_smt_power_savings? "enabled":"disabled");
printk("NOW=0x%08X%08X\n", (u32)(now>>32), (u32)now);
for_each_online_cpu ( i )
--- /dev/null
+#include <xen/config.h>
+#include <xen/irq.h>
+#include <xen/smp.h>
+#include <xen/spinlock.h>
+
+#ifndef NDEBUG
+
+static atomic_t spin_debug __read_mostly = ATOMIC_INIT(0);
+
+static void check_lock(struct lock_debug *debug)
+{
+ int irq_safe = !local_irq_is_enabled();
+
+ if ( unlikely(atomic_read(&spin_debug) <= 0) )
+ return;
+
+ /* A few places take liberties with this. */
+ /* BUG_ON(in_irq() && !irq_safe); */
+
+ if ( unlikely(debug->irq_safe != irq_safe) )
+ {
+ int seen = cmpxchg(&debug->irq_safe, -1, irq_safe);
+ BUG_ON(seen == !irq_safe);
+ }
+}
+
+void spin_debug_enable(void)
+{
+ atomic_inc(&spin_debug);
+}
+
+void spin_debug_disable(void)
+{
+ atomic_dec(&spin_debug);
+}
+
+#else /* defined(NDEBUG) */
+
+#define check_lock(l) ((void)0)
+
+#endif
+
+void _spin_lock(spinlock_t *lock)
+{
+ check_lock(&lock->debug);
+ _raw_spin_lock(&lock->raw);
+}
+
+void _spin_lock_irq(spinlock_t *lock)
+{
+ ASSERT(local_irq_is_enabled());
+ local_irq_disable();
+ check_lock(&lock->debug);
+ _raw_spin_lock(&lock->raw);
+}
+
+unsigned long _spin_lock_irqsave(spinlock_t *lock)
+{
+ unsigned long flags;
+ local_irq_save(flags);
+ check_lock(&lock->debug);
+ _raw_spin_lock(&lock->raw);
+ return flags;
+}
+
+void _spin_unlock(spinlock_t *lock)
+{
+ _raw_spin_unlock(&lock->raw);
+}
+
+void _spin_unlock_irq(spinlock_t *lock)
+{
+ _raw_spin_unlock(&lock->raw);
+ local_irq_enable();
+}
+
+void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags)
+{
+ _raw_spin_unlock(&lock->raw);
+ local_irq_restore(flags);
+}
+
+int _spin_is_locked(spinlock_t *lock)
+{
+ check_lock(&lock->debug);
+ return _raw_spin_is_locked(&lock->raw);
+}
+
+int _spin_trylock(spinlock_t *lock)
+{
+ check_lock(&lock->debug);
+ return _raw_spin_trylock(&lock->raw);
+}
+
+void _spin_barrier(spinlock_t *lock)
+{
+ check_lock(&lock->debug);
+ do { mb(); } while ( _raw_spin_is_locked(&lock->raw) );
+ mb();
+}
+
+void _spin_barrier_irq(spinlock_t *lock)
+{
+ unsigned long flags;
+ local_irq_save(flags);
+ _spin_barrier(lock);
+ local_irq_restore(flags);
+}
+
+void _spin_lock_recursive(spinlock_t *lock)
+{
+ int cpu = smp_processor_id();
+
+ /* Don't allow overflow of recurse_cpu field. */
+ BUILD_BUG_ON(NR_CPUS > 0xfffu);
+
+ check_lock(&lock->debug);
+
+ if ( likely(lock->recurse_cpu != cpu) )
+ {
+ spin_lock(lock);
+ lock->recurse_cpu = cpu;
+ }
+
+ /* We support only fairly shallow recursion, else the counter overflows. */
+ ASSERT(lock->recurse_cnt < 0xfu);
+ lock->recurse_cnt++;
+}
+
+void _spin_unlock_recursive(spinlock_t *lock)
+{
+ if ( likely(--lock->recurse_cnt == 0) )
+ {
+ lock->recurse_cpu = 0xfffu;
+ spin_unlock(lock);
+ }
+}
+
+void _read_lock(rwlock_t *lock)
+{
+ check_lock(&lock->debug);
+ _raw_read_lock(&lock->raw);
+}
+
+void _read_lock_irq(rwlock_t *lock)
+{
+ ASSERT(local_irq_is_enabled());
+ local_irq_disable();
+ check_lock(&lock->debug);
+ _raw_read_lock(&lock->raw);
+}
+
+unsigned long _read_lock_irqsave(rwlock_t *lock)
+{
+ unsigned long flags;
+ local_irq_save(flags);
+ check_lock(&lock->debug);
+ _raw_read_lock(&lock->raw);
+ return flags;
+}
+
+void _read_unlock(rwlock_t *lock)
+{
+ _raw_read_unlock(&lock->raw);
+}
+
+void _read_unlock_irq(rwlock_t *lock)
+{
+ _raw_read_unlock(&lock->raw);
+ local_irq_enable();
+}
+
+void _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+{
+ _raw_read_unlock(&lock->raw);
+ local_irq_restore(flags);
+}
+
+void _write_lock(rwlock_t *lock)
+{
+ check_lock(&lock->debug);
+ _raw_write_lock(&lock->raw);
+}
+
+void _write_lock_irq(rwlock_t *lock)
+{
+ ASSERT(local_irq_is_enabled());
+ local_irq_disable();
+ check_lock(&lock->debug);
+ _raw_write_lock(&lock->raw);
+}
+
+unsigned long _write_lock_irqsave(rwlock_t *lock)
+{
+ unsigned long flags;
+ local_irq_save(flags);
+ check_lock(&lock->debug);
+ _raw_write_lock(&lock->raw);
+ return flags;
+}
+
+void _write_unlock(rwlock_t *lock)
+{
+ _raw_write_unlock(&lock->raw);
+}
+
+void _write_unlock_irq(rwlock_t *lock)
+{
+ _raw_write_unlock(&lock->raw);
+ local_irq_enable();
+}
+
+void _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags)
+{
+ _raw_write_unlock(&lock->raw);
+ local_irq_restore(flags);
+}
+
+int _rw_is_locked(rwlock_t *lock)
+{
+ check_lock(&lock->debug);
+ return _raw_rw_is_locked(&lock->raw);
+}
#include <xsm/xsm.h>
extern int do_get_pm_info(struct xen_sysctl_get_pmstat *op);
+extern int do_pm_op(struct xen_sysctl_pm_op *op);
extern long arch_do_sysctl(
struct xen_sysctl *op, XEN_GUEST_HANDLE(xen_sysctl_t) u_sysctl);
{
uint32_t i, nr_cpus;
struct xen_sysctl_cpuinfo cpuinfo;
- struct vcpu *v;
nr_cpus = min_t(uint32_t, op->u.getcpuinfo.max_cpus, NR_CPUS);
for ( i = 0; i < nr_cpus; i++ )
{
- /* Assume no holes in idle-vcpu map. */
- if ( (v = idle_vcpu[i]) == NULL )
- break;
-
- cpuinfo.idletime = v->runstate.time[RUNSTATE_running];
- if ( v->is_running )
- cpuinfo.idletime += NOW() - v->runstate.state_entry_time;
+ cpuinfo.idletime = get_cpu_idle_time(i);
ret = -EFAULT;
if ( copy_to_guest_offset(op->u.getcpuinfo.info, i, &cpuinfo, 1) )
}
break;
+ case XEN_SYSCTL_pm_op:
+ {
+ ret = do_pm_op(&op->u.pm_op);
+ if ( ret && (ret != -EAGAIN) )
+ break;
+
+ if ( copy_to_guest(u_sysctl, op, 1) )
+ {
+ ret = -EFAULT;
+ break;
+ }
+ }
+ break;
+
+ case XEN_SYSCTL_page_offline_op:
+ {
+ uint32_t *status, *ptr;
+ unsigned long pfn;
+
+ ptr = status = xmalloc_bytes( sizeof(uint32_t) *
+ (op->u.page_offline.end -
+ op->u.page_offline.start + 1));
+ if ( !status )
+ {
+ dprintk(XENLOG_WARNING, "Out of memory for page offline op\n");
+ ret = -ENOMEM;
+ break;
+ }
+
+ memset(status, PG_OFFLINE_INVALID, sizeof(uint32_t) *
+ (op->u.page_offline.end - op->u.page_offline.start + 1));
+
+ for ( pfn = op->u.page_offline.start;
+ pfn <= op->u.page_offline.end;
+ pfn ++ )
+ {
+ switch ( op->u.page_offline.cmd )
+ {
+ /* Shall revert her if failed, or leave caller do it? */
+ case sysctl_page_offline:
+ ret = offline_page(pfn, 0, ptr++);
+ break;
+ case sysctl_page_online:
+ ret = online_page(pfn, ptr++);
+ break;
+ case sysctl_query_page_offline:
+ ret = query_page_offline(pfn, ptr++);
+ break;
+ default:
+ gdprintk(XENLOG_WARNING, "invalid page offline op %x\n",
+ op->u.page_offline.cmd);
+ ret = -EINVAL;
+ break;
+ }
+
+ if (ret)
+ break;
+ }
+
+ if ( copy_to_guest(
+ op->u.page_offline.status, status,
+ op->u.page_offline.end - op->u.page_offline.start + 1) )
+ {
+ ret = -EFAULT;
+ break;
+ }
+
+ xfree(status);
+ }
+ break;
+
default:
ret = arch_do_sysctl(op, u_sysctl);
break;
* We pull handlers off the timer list this far in future,
* rather than reprogramming the time hardware.
*/
-#define TIMER_SLOP (50*1000) /* ns */
+static unsigned int timer_slop __read_mostly = 50000; /* 50 us */
+integer_param("timer_slop", timer_slop);
struct timers {
spinlock_t lock;
+ bool_t overflow;
struct timer **heap;
struct timer *list;
struct timer *running;
/* Add new entry @t to @heap. Return TRUE if new top of heap. */
-static int add_to_heap(struct timer ***pheap, struct timer *t)
+static int add_to_heap(struct timer **heap, struct timer *t)
{
- struct timer **heap = *pheap;
int sz = GET_HEAP_SIZE(heap);
- /* Copy the heap if it is full. */
+ /* Fail if the heap is full. */
if ( unlikely(sz == GET_HEAP_LIMIT(heap)) )
- {
- /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
- int old_limit = GET_HEAP_LIMIT(heap);
- int new_limit = ((old_limit + 1) << 4) - 1;
- if ( in_irq() )
- goto out;
- heap = xmalloc_array(struct timer *, new_limit + 1);
- if ( heap == NULL )
- goto out;
- memcpy(heap, *pheap, (old_limit + 1) * sizeof(*heap));
- SET_HEAP_LIMIT(heap, new_limit);
- if ( old_limit != 0 )
- xfree(*pheap);
- *pheap = heap;
- }
+ return 0;
SET_HEAP_SIZE(heap, ++sz);
heap[sz] = t;
t->heap_offset = sz;
up_heap(heap, sz);
- out:
+
return (t->heap_offset == 1);
}
/* Try to add to heap. t->heap_offset indicates whether we succeed. */
t->heap_offset = 0;
t->status = TIMER_STATUS_in_heap;
- rc = add_to_heap(&timers->heap, t);
+ rc = add_to_heap(timers->heap, t);
if ( t->heap_offset != 0 )
return rc;
/* Fall back to adding to the slower linked list. */
+ timers->overflow = 1;
t->status = TIMER_STATUS_in_list;
return add_to_list(&timers->list, t);
}
__stop_timer(timer);
timer->expires = expires;
+ timer->expires_end = expires + timer_slop;
if ( likely(timer->status != TIMER_STATUS_killed) )
__add_timer(timer);
}
+static void execute_timer(struct timers *ts, struct timer *t)
+{
+ void (*fn)(void *) = t->function;
+ void *data = t->data;
+
+ ts->running = t;
+ spin_unlock_irq(&ts->lock);
+ (*fn)(data);
+ spin_lock_irq(&ts->lock);
+ ts->running = NULL;
+}
+
+
static void timer_softirq_action(void)
{
struct timer *t, **heap, *next;
struct timers *ts;
- s_time_t now, deadline;
- void (*fn)(void *);
- void *data;
+ s_time_t now;
ts = &this_cpu(timers);
+ heap = ts->heap;
+
+ /* If we overflowed the heap, try to allocate a larger heap. */
+ if ( unlikely(ts->overflow) )
+ {
+ /* old_limit == (2^n)-1; new_limit == (2^(n+4))-1 */
+ int old_limit = GET_HEAP_LIMIT(heap);
+ int new_limit = ((old_limit + 1) << 4) - 1;
+ struct timer **newheap = xmalloc_array(struct timer *, new_limit + 1);
+ if ( newheap != NULL )
+ {
+ spin_lock_irq(&ts->lock);
+ memcpy(newheap, heap, (old_limit + 1) * sizeof(*heap));
+ SET_HEAP_LIMIT(newheap, new_limit);
+ ts->heap = newheap;
+ spin_unlock_irq(&ts->lock);
+ if ( old_limit != 0 )
+ xfree(heap);
+ heap = newheap;
+ }
+ }
spin_lock_irq(&ts->lock);
- /* Try to move timers from overflow linked list to more efficient heap. */
+ now = NOW();
+
+ /* Execute ready heap timers. */
+ while ( (GET_HEAP_SIZE(heap) != 0) &&
+ ((t = heap[1])->expires < now) )
+ {
+ remove_from_heap(heap, t);
+ t->status = TIMER_STATUS_inactive;
+ execute_timer(ts, t);
+ }
+
+ /* Execute ready list timers. */
+ while ( ((t = ts->list) != NULL) && (t->expires < now) )
+ {
+ ts->list = t->list_next;
+ t->status = TIMER_STATUS_inactive;
+ execute_timer(ts, t);
+ }
+
+ /* Try to move timers from linked list to more efficient heap. */
next = ts->list;
ts->list = NULL;
while ( unlikely((t = next) != NULL) )
t->status = TIMER_STATUS_inactive;
add_entry(ts, t);
}
-
- heap = ts->heap;
- now = NOW();
- while ( (GET_HEAP_SIZE(heap) != 0) &&
- ((t = heap[1])->expires < (now + TIMER_SLOP)) )
+ ts->overflow = (ts->list != NULL);
+ if ( unlikely(ts->overflow) )
{
- remove_entry(ts, t);
-
- ts->running = t;
-
- fn = t->function;
- data = t->data;
-
- spin_unlock_irq(&ts->lock);
- (*fn)(data);
- spin_lock_irq(&ts->lock);
-
- /* Heap may have grown while the lock was released. */
- heap = ts->heap;
+ /* Find earliest deadline at head of list or top of heap. */
+ this_cpu(timer_deadline) = ts->list->expires;
+ if ( (GET_HEAP_SIZE(heap) != 0) &&
+ ((t = heap[1])->expires < this_cpu(timer_deadline)) )
+ this_cpu(timer_deadline) = t->expires;
}
-
- deadline = GET_HEAP_SIZE(heap) ? heap[1]->expires : 0;
-
- while ( unlikely((t = ts->list) != NULL) )
+ else
{
- if ( t->expires >= (now + TIMER_SLOP) )
+ /*
+ * Find the earliest deadline that encompasses largest number of timers
+ * on the heap. To do this we take timers from the heap while their
+ * valid deadline ranges continue to intersect.
+ */
+ s_time_t start = 0, end = STIME_MAX;
+ struct timer **list_tail = &ts->list;
+
+ while ( (GET_HEAP_SIZE(heap) != 0) &&
+ ((t = heap[1])->expires <= end) )
{
- if ( (deadline == 0) || (deadline > t->expires) )
- deadline = t->expires;
- break;
- }
-
- ts->list = t->list_next;
- t->status = TIMER_STATUS_inactive;
+ remove_entry(ts, t);
- ts->running = t;
+ t->status = TIMER_STATUS_in_list;
+ t->list_next = NULL;
+ *list_tail = t;
+ list_tail = &t->list_next;
- fn = t->function;
- data = t->data;
+ start = t->expires;
+ if ( end > t->expires_end )
+ end = t->expires_end;
+ }
- spin_unlock_irq(&ts->lock);
- (*fn)(data);
- spin_lock_irq(&ts->lock);
+ this_cpu(timer_deadline) = start;
}
- ts->running = NULL;
-
- this_cpu(timer_deadline) = deadline;
- if ( !reprogram_timer(deadline) )
+ if ( !reprogram_timer(this_cpu(timer_deadline)) )
raise_softirq(TIMER_SOFTIRQ);
spin_unlock_irq(&ts->lock);
timer_softirq_action();
}
+s_time_t align_timer(s_time_t firsttick, uint64_t period)
+{
+ if ( !period )
+ return firsttick;
+
+ return firsttick + (period - 1) - ((firsttick - 1) % period);
+}
static void dump_timerq(unsigned char key)
{
for ( j = 1; j <= GET_HEAP_SIZE(ts->heap); j++ )
{
t = ts->heap[j];
- printk (" %d : %p ex=0x%08X%08X %p\n",
- j, t, (u32)(t->expires>>32), (u32)t->expires, t->data);
+ printk (" %d : %p ex=0x%08X%08X %p %p\n",
+ j, t, (u32)(t->expires>>32), (u32)t->expires,
+ t->data, t->function);
}
for ( t = ts->list, j = 0; t != NULL; t = t->list_next, j++ )
- printk (" L%d : %p ex=0x%08X%08X %p\n",
- j, t, (u32)(t->expires>>32), (u32)t->expires, t->data);
+ printk (" L%d : %p ex=0x%08X%08X %p %p\n",
+ j, t, (u32)(t->expires>>32), (u32)t->expires,
+ t->data, t->function);
spin_unlock_irqrestore(&ts->lock, flags);
printk("\n");
}
#define xen_t_buf t_buf
CHECK_t_buf;
#undef xen_t_buf
-#define TB_COMPAT IS_COMPAT(dom0)
#else
#define compat_t_rec t_rec
-#define TB_COMPAT 0
#endif
/* opt_tbuf_size: trace buffer size (in pages) */
order = get_order_from_pages(nr_pages);
data_size = (opt_tbuf_size * PAGE_SIZE - sizeof(struct t_buf));
- if ( (rawbuf = alloc_xenheap_pages(order)) == NULL )
+ if ( (rawbuf = alloc_xenheap_pages(order, 0)) == NULL )
{
printk("Xen trace buffers: memory allocation failed\n");
opt_tbuf_size = 0;
return -EFAULT;
*page = maddr_to_page(maddr);
- if ( get_page(*page, current->domain) == 0 )
+ if ( !get_page(*page, current->domain) )
{
- if ( page_get_owner(*page) != current->domain )
- {
- /*
- * This page might be a page granted by another domain, or
- * this page is freed with decrease reservation hypercall at
- * the same time.
- */
- gdprintk(XENLOG_WARNING,
- "bad page is passed. paddr 0x%lx maddr 0x%lx\n",
- paddr, maddr);
- return -EFAULT;
- }
-
- /* Try again. */
- cpu_relax();
- return -EAGAIN;
+ /*
+ * This page might be a page granted by another domain, or this page
+ * is freed with decrease reservation hypercall at the same time.
+ */
+ gdprintk(XENLOG_WARNING,
+ "bad page is passed. paddr 0x%lx maddr 0x%lx\n",
+ paddr, maddr);
+ return -EFAULT;
}
return 0;
return ((x != NULL) && (x->domain_type == XENOPROF_DOMAIN_ACTIVE));
}
-static int is_passive(struct domain *d)
+int is_passive(struct domain *d)
{
struct xenoprof *x = d->xenoprof;
return ((x != NULL) && (x->domain_type == XENOPROF_DOMAIN_PASSIVE));
{
int i;
- /* Check if previous page owner has released the page. */
- for ( i = 0; i < npages; i++ )
- {
- struct page_info *page = mfn_to_page(mfn + i);
- if ( (page->count_info & (PGC_allocated|PGC_count_mask)) != 0 )
- {
- gdprintk(XENLOG_INFO, "mfn 0x%lx page->count_info 0x%x\n",
- mfn + i, page->count_info);
- return -EBUSY;
- }
- page_set_owner(page, NULL);
- }
-
- for ( i = 0; i < npages; i++ )
- share_xen_page_with_guest(mfn_to_page(mfn + i), d, XENSHARE_writable);
-
- return 0;
+ /* Check if previous page owner has released the page. */
+ for ( i = 0; i < npages; i++ )
+ {
+ struct page_info *page = mfn_to_page(mfn + i);
+ if ( (page->count_info & (PGC_allocated|PGC_count_mask)) != 0 )
+ {
+ gdprintk(XENLOG_INFO, "mfn 0x%lx page->count_info 0x%lx\n",
+ mfn + i, (unsigned long)page->count_info);
+ return -EBUSY;
+ }
+ page_set_owner(page, NULL);
+ }
+
+ for ( i = 0; i < npages; i++ )
+ share_xen_page_with_guest(mfn_to_page(mfn + i), d, XENSHARE_writable);
+
+ return 0;
}
static void
bufsize = sizeof(struct xenoprof_buf);
i = sizeof(struct event_log);
#ifdef CONFIG_COMPAT
- d->xenoprof->is_compat = IS_COMPAT(is_passive ? dom0 : d);
+ d->xenoprof->is_compat = is_pv_32on64_domain(is_passive ? dom0 : d);
if ( XENOPROF_COMPAT(d->xenoprof) )
{
bufsize = sizeof(struct compat_oprof_buf);
bufsize += (max_samples - 1) * i;
npages = (nvcpu * bufsize - 1) / PAGE_SIZE + 1;
- d->xenoprof->rawbuf = alloc_xenheap_pages(get_order_from_pages(npages));
+ d->xenoprof->rawbuf = alloc_xenheap_pages(get_order_from_pages(npages), 0);
if ( d->xenoprof->rawbuf == NULL )
{
xfree(d->xenoprof);
{
case XENOPROF_init:
ret = xenoprof_op_init(arg);
+ if ( !ret )
+ xenoprof_state = XENOPROF_INITIALIZED;
break;
case XENOPROF_get_buffer:
break;
case XENOPROF_reset_active_list:
- {
reset_active_list();
ret = 0;
break;
- }
+
case XENOPROF_reset_passive_list:
- {
reset_passive_list();
ret = 0;
break;
- }
+
case XENOPROF_set_active:
{
domid_t domid;
- if ( xenoprof_state != XENOPROF_IDLE )
+ if ( xenoprof_state != XENOPROF_INITIALIZED )
{
ret = -EPERM;
break;
ret = add_active_list(domid);
break;
}
+
case XENOPROF_set_passive:
- {
- if ( xenoprof_state != XENOPROF_IDLE )
+ if ( xenoprof_state != XENOPROF_INITIALIZED )
{
ret = -EPERM;
break;
}
ret = add_passive_list(arg);
break;
- }
+
case XENOPROF_reserve_counters:
- if ( xenoprof_state != XENOPROF_IDLE )
+ if ( xenoprof_state != XENOPROF_INITIALIZED )
{
ret = -EPERM;
break;
ret = -EPERM;
break;
}
-
ret = xenoprof_arch_counter(arg);
break;
case XENOPROF_enable_virq:
{
int i;
+
if ( current->domain == xenoprof_primary_profiler )
{
+ if ( xenoprof_state != XENOPROF_READY )
+ {
+ ret = -EPERM;
+ break;
+ }
xenoprof_arch_enable_virq();
xenoprof_reset_stat();
for ( i = 0; i < pdomains; i++ )
if ( (xenoprof_state == XENOPROF_COUNTERS_RESERVED) ||
(xenoprof_state == XENOPROF_READY) )
{
- xenoprof_state = XENOPROF_IDLE;
+ xenoprof_state = XENOPROF_INITIALIZED;
xenoprof_arch_release_counters();
xenoprof_arch_disable_virq();
reset_passive_list();
case XENOPROF_shutdown:
ret = -EPERM;
- if ( xenoprof_state == XENOPROF_IDLE )
+ if ( xenoprof_state == XENOPROF_INITIALIZED )
{
activated = 0;
adomains=0;
pool_bytes = ROUNDUP_SIZE(sizeof(*pool));
pool_order = get_order_from_bytes(pool_bytes);
- pool = (void *)alloc_xenheap_pages(pool_order);
+ pool = (void *)alloc_xenheap_pages(pool_order, 0);
if ( pool == NULL )
return NULL;
memset(pool, 0, pool_bytes);
static void *xmalloc_pool_get(unsigned long size)
{
ASSERT(size == PAGE_SIZE);
- return alloc_xenheap_pages(0);
+ return alloc_xenheap_page();
}
static void xmalloc_pool_put(void *p)
{
- free_xenheap_pages(p,0);
+ free_xenheap_page(p);
}
static void *xmalloc_whole_pages(unsigned long size)
struct bhdr *b;
unsigned int pageorder = get_order_from_bytes(size + BHDR_OVERHEAD);
- b = alloc_xenheap_pages(pageorder);
+ b = alloc_xenheap_pages(pageorder, 0);
if ( b == NULL )
return NULL;
--- /dev/null
+obj-y += rijndael.o
+obj-y += vmac.o
--- /dev/null
+/* $OpenBSD: rijndael.c,v 1.19 2008/06/09 07:49:45 djm Exp $ */
+
+/**
+ * rijndael-alg-fst.c
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* start for Xen */
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <crypto/rijndael.h>
+/* end for Xen */
+
+#undef FULL_UNROLL
+
+/*
+Te0[x] = S [x].[02, 01, 01, 03];
+Te1[x] = S [x].[03, 02, 01, 01];
+Te2[x] = S [x].[01, 03, 02, 01];
+Te3[x] = S [x].[01, 01, 03, 02];
+Te4[x] = S [x].[01, 01, 01, 01];
+
+Td0[x] = Si[x].[0e, 09, 0d, 0b];
+Td1[x] = Si[x].[0b, 0e, 09, 0d];
+Td2[x] = Si[x].[0d, 0b, 0e, 09];
+Td3[x] = Si[x].[09, 0d, 0b, 0e];
+Td4[x] = Si[x].[01, 01, 01, 01];
+*/
+
+static const u32 Te0[256] = {
+ 0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
+ 0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
+ 0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
+ 0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
+ 0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
+ 0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
+ 0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
+ 0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
+ 0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
+ 0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
+ 0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
+ 0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
+ 0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
+ 0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
+ 0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
+ 0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
+ 0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
+ 0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
+ 0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
+ 0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
+ 0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
+ 0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
+ 0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
+ 0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
+ 0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
+ 0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
+ 0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
+ 0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
+ 0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
+ 0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
+ 0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
+ 0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
+ 0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
+ 0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
+ 0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
+ 0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
+ 0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
+ 0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
+ 0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
+ 0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
+ 0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
+ 0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
+ 0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
+ 0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
+ 0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
+ 0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
+ 0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
+ 0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
+ 0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
+ 0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
+ 0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
+ 0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
+ 0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
+ 0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
+ 0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
+ 0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
+ 0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
+ 0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
+ 0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
+ 0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
+ 0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
+ 0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
+ 0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
+ 0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU,
+};
+static const u32 Te1[256] = {
+ 0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
+ 0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
+ 0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
+ 0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
+ 0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
+ 0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
+ 0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
+ 0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
+ 0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
+ 0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
+ 0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
+ 0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
+ 0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
+ 0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
+ 0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
+ 0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
+ 0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
+ 0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
+ 0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
+ 0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
+ 0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
+ 0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
+ 0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
+ 0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
+ 0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
+ 0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
+ 0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
+ 0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
+ 0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
+ 0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
+ 0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
+ 0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
+ 0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
+ 0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
+ 0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
+ 0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
+ 0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
+ 0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
+ 0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
+ 0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
+ 0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
+ 0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
+ 0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
+ 0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
+ 0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
+ 0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
+ 0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
+ 0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
+ 0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
+ 0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
+ 0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
+ 0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
+ 0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
+ 0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
+ 0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
+ 0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
+ 0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
+ 0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
+ 0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
+ 0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
+ 0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
+ 0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
+ 0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
+ 0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U,
+};
+static const u32 Te2[256] = {
+ 0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
+ 0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
+ 0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
+ 0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
+ 0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
+ 0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
+ 0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
+ 0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
+ 0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
+ 0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
+ 0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
+ 0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
+ 0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
+ 0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
+ 0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
+ 0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
+ 0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
+ 0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
+ 0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
+ 0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
+ 0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
+ 0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
+ 0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
+ 0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
+ 0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
+ 0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
+ 0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
+ 0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
+ 0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
+ 0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
+ 0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
+ 0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
+ 0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
+ 0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
+ 0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
+ 0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
+ 0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
+ 0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
+ 0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
+ 0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
+ 0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
+ 0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
+ 0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
+ 0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
+ 0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
+ 0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
+ 0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
+ 0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
+ 0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
+ 0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
+ 0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
+ 0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
+ 0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
+ 0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
+ 0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
+ 0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
+ 0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
+ 0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
+ 0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
+ 0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
+ 0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
+ 0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
+ 0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
+ 0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U,
+};
+static const u32 Te3[256] = {
+ 0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
+ 0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
+ 0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
+ 0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
+ 0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
+ 0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
+ 0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
+ 0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
+ 0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
+ 0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
+ 0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
+ 0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
+ 0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
+ 0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
+ 0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
+ 0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
+ 0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
+ 0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
+ 0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
+ 0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
+ 0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
+ 0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
+ 0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
+ 0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
+ 0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
+ 0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
+ 0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
+ 0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
+ 0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
+ 0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
+ 0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
+ 0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
+ 0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
+ 0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
+ 0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
+ 0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
+ 0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
+ 0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
+ 0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
+ 0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
+ 0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
+ 0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
+ 0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
+ 0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
+ 0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
+ 0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
+ 0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
+ 0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
+ 0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
+ 0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
+ 0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
+ 0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
+ 0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
+ 0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
+ 0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
+ 0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
+ 0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
+ 0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
+ 0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
+ 0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
+ 0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
+ 0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
+ 0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
+ 0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU,
+};
+static const u32 Te4[256] = {
+ 0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
+ 0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
+ 0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
+ 0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
+ 0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
+ 0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
+ 0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
+ 0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
+ 0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
+ 0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
+ 0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
+ 0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
+ 0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
+ 0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
+ 0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
+ 0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
+ 0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
+ 0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
+ 0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
+ 0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
+ 0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
+ 0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
+ 0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
+ 0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
+ 0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
+ 0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
+ 0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
+ 0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
+ 0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
+ 0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
+ 0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
+ 0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
+ 0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
+ 0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
+ 0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
+ 0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
+ 0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
+ 0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
+ 0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
+ 0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
+ 0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
+ 0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
+ 0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
+ 0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
+ 0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
+ 0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
+ 0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
+ 0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
+ 0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
+ 0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
+ 0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
+ 0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
+ 0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
+ 0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
+ 0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
+ 0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
+ 0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
+ 0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
+ 0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
+ 0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
+ 0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
+ 0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
+ 0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
+ 0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U,
+};
+static const u32 Td0[256] = {
+ 0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
+ 0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
+ 0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
+ 0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
+ 0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
+ 0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
+ 0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
+ 0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
+ 0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
+ 0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
+ 0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
+ 0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
+ 0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
+ 0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
+ 0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
+ 0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
+ 0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
+ 0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
+ 0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
+ 0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
+ 0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
+ 0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
+ 0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
+ 0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
+ 0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
+ 0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
+ 0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
+ 0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
+ 0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
+ 0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
+ 0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
+ 0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
+ 0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
+ 0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
+ 0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
+ 0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
+ 0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
+ 0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
+ 0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
+ 0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
+ 0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
+ 0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
+ 0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
+ 0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
+ 0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
+ 0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
+ 0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
+ 0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
+ 0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
+ 0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
+ 0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
+ 0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
+ 0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
+ 0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
+ 0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
+ 0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
+ 0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
+ 0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
+ 0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
+ 0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
+ 0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
+ 0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
+ 0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
+ 0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U,
+};
+static const u32 Td1[256] = {
+ 0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
+ 0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
+ 0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
+ 0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
+ 0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
+ 0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
+ 0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
+ 0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
+ 0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
+ 0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
+ 0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
+ 0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
+ 0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
+ 0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
+ 0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
+ 0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
+ 0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
+ 0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
+ 0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
+ 0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
+ 0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
+ 0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
+ 0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
+ 0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
+ 0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
+ 0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
+ 0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
+ 0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
+ 0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
+ 0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
+ 0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
+ 0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
+ 0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
+ 0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
+ 0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
+ 0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
+ 0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
+ 0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
+ 0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
+ 0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
+ 0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
+ 0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
+ 0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
+ 0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
+ 0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
+ 0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
+ 0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
+ 0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
+ 0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
+ 0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
+ 0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
+ 0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
+ 0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
+ 0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
+ 0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
+ 0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
+ 0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
+ 0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
+ 0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
+ 0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
+ 0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
+ 0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
+ 0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
+ 0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U,
+};
+static const u32 Td2[256] = {
+ 0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
+ 0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
+ 0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
+ 0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
+ 0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
+ 0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
+ 0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
+ 0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
+ 0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
+ 0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
+ 0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
+ 0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
+ 0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
+ 0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
+ 0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
+ 0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
+ 0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
+ 0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
+ 0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
+ 0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
+ 0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
+ 0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
+ 0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
+ 0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
+ 0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
+ 0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
+ 0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
+ 0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
+ 0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
+ 0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
+ 0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
+ 0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
+ 0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
+ 0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
+ 0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
+ 0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
+ 0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
+ 0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
+ 0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
+ 0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
+ 0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
+ 0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
+ 0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
+ 0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
+ 0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
+ 0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
+ 0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
+ 0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
+ 0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
+ 0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
+ 0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
+ 0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
+ 0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
+ 0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
+ 0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
+ 0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
+ 0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
+ 0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
+ 0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
+ 0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
+ 0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
+ 0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
+ 0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
+ 0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U,
+};
+static const u32 Td3[256] = {
+ 0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
+ 0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
+ 0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
+ 0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
+ 0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
+ 0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
+ 0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
+ 0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
+ 0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
+ 0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
+ 0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
+ 0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
+ 0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
+ 0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
+ 0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
+ 0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
+ 0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
+ 0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
+ 0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
+ 0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
+ 0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
+ 0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
+ 0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
+ 0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
+ 0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
+ 0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
+ 0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
+ 0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
+ 0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
+ 0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
+ 0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
+ 0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
+ 0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
+ 0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
+ 0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
+ 0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
+ 0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
+ 0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
+ 0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
+ 0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
+ 0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
+ 0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
+ 0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
+ 0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
+ 0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
+ 0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
+ 0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
+ 0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
+ 0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
+ 0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
+ 0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
+ 0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
+ 0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
+ 0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
+ 0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
+ 0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
+ 0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
+ 0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
+ 0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
+ 0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
+ 0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
+ 0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
+ 0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
+ 0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U,
+};
+static const u32 Td4[256] = {
+ 0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
+ 0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
+ 0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
+ 0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
+ 0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
+ 0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
+ 0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
+ 0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
+ 0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
+ 0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
+ 0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
+ 0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
+ 0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
+ 0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
+ 0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
+ 0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
+ 0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
+ 0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
+ 0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
+ 0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
+ 0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
+ 0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
+ 0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
+ 0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
+ 0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
+ 0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
+ 0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
+ 0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
+ 0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
+ 0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
+ 0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
+ 0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
+ 0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
+ 0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
+ 0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
+ 0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
+ 0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
+ 0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
+ 0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
+ 0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
+ 0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
+ 0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
+ 0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
+ 0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
+ 0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
+ 0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
+ 0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
+ 0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
+ 0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
+ 0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
+ 0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
+ 0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
+ 0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
+ 0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
+ 0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
+ 0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
+ 0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
+ 0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
+ 0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
+ 0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
+ 0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
+ 0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
+ 0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
+ 0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU,
+};
+static const u32 rcon[] = {
+ 0x01000000, 0x02000000, 0x04000000, 0x08000000,
+ 0x10000000, 0x20000000, 0x40000000, 0x80000000,
+ 0x1B000000, 0x36000000, /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
+};
+
+#define GETU32(pt) (((u32)(pt)[0] << 24) ^ ((u32)(pt)[1] << 16) ^ ((u32)(pt)[2] << 8) ^ ((u32)(pt)[3]))
+#define PUTU32(ct, st) { (ct)[0] = (u8)((st) >> 24); (ct)[1] = (u8)((st) >> 16); (ct)[2] = (u8)((st) >> 8); (ct)[3] = (u8)(st); }
+
+/**
+ * Expand the cipher key into the encryption key schedule.
+ *
+ * @return the number of rounds for the given cipher key size.
+ */
+int
+rijndaelKeySetupEnc(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits)
+{
+ int i = 0;
+ u32 temp;
+
+ rk[0] = GETU32(cipherKey );
+ rk[1] = GETU32(cipherKey + 4);
+ rk[2] = GETU32(cipherKey + 8);
+ rk[3] = GETU32(cipherKey + 12);
+ if (keyBits == 128) {
+ for (;;) {
+ temp = rk[3];
+ rk[4] = rk[0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[5] = rk[1] ^ rk[4];
+ rk[6] = rk[2] ^ rk[5];
+ rk[7] = rk[3] ^ rk[6];
+ if (++i == 10) {
+ return 10;
+ }
+ rk += 4;
+ }
+ }
+ rk[4] = GETU32(cipherKey + 16);
+ rk[5] = GETU32(cipherKey + 20);
+ if (keyBits == 192) {
+ for (;;) {
+ temp = rk[ 5];
+ rk[ 6] = rk[ 0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 7] = rk[ 1] ^ rk[ 6];
+ rk[ 8] = rk[ 2] ^ rk[ 7];
+ rk[ 9] = rk[ 3] ^ rk[ 8];
+ if (++i == 8) {
+ return 12;
+ }
+ rk[10] = rk[ 4] ^ rk[ 9];
+ rk[11] = rk[ 5] ^ rk[10];
+ rk += 6;
+ }
+ }
+ rk[6] = GETU32(cipherKey + 24);
+ rk[7] = GETU32(cipherKey + 28);
+ if (keyBits == 256) {
+ for (;;) {
+ temp = rk[ 7];
+ rk[ 8] = rk[ 0] ^
+ (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp ) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp >> 24) ] & 0x000000ff) ^
+ rcon[i];
+ rk[ 9] = rk[ 1] ^ rk[ 8];
+ rk[10] = rk[ 2] ^ rk[ 9];
+ rk[11] = rk[ 3] ^ rk[10];
+ if (++i == 7) {
+ return 14;
+ }
+ temp = rk[11];
+ rk[12] = rk[ 4] ^
+ (Te4[(temp >> 24) ] & 0xff000000) ^
+ (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(temp ) & 0xff] & 0x000000ff);
+ rk[13] = rk[ 5] ^ rk[12];
+ rk[14] = rk[ 6] ^ rk[13];
+ rk[15] = rk[ 7] ^ rk[14];
+ rk += 8;
+ }
+ }
+ return 0;
+}
+
+/**
+ * Expand the cipher key into the decryption key schedule.
+ *
+ * @return the number of rounds for the given cipher key size.
+ */
+int
+rijndaelKeySetupDec(u32 rk[/*4*(Nr + 1)*/], const u8 cipherKey[], int keyBits)
+{
+ int Nr, i, j;
+ u32 temp;
+
+ /* expand the cipher key: */
+ Nr = rijndaelKeySetupEnc(rk, cipherKey, keyBits);
+
+ /* invert the order of the round keys: */
+ for (i = 0, j = 4*Nr; i < j; i += 4, j -= 4) {
+ temp = rk[i ]; rk[i ] = rk[j ]; rk[j ] = temp;
+ temp = rk[i + 1]; rk[i + 1] = rk[j + 1]; rk[j + 1] = temp;
+ temp = rk[i + 2]; rk[i + 2] = rk[j + 2]; rk[j + 2] = temp;
+ temp = rk[i + 3]; rk[i + 3] = rk[j + 3]; rk[j + 3] = temp;
+ }
+ /* apply the inverse MixColumn transform to all round keys but the first and the last: */
+ for (i = 1; i < Nr; i++) {
+ rk += 4;
+ rk[0] =
+ Td0[Te4[(rk[0] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[0] ) & 0xff] & 0xff];
+ rk[1] =
+ Td0[Te4[(rk[1] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[1] ) & 0xff] & 0xff];
+ rk[2] =
+ Td0[Te4[(rk[2] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[2] ) & 0xff] & 0xff];
+ rk[3] =
+ Td0[Te4[(rk[3] >> 24) ] & 0xff] ^
+ Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
+ Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
+ Td3[Te4[(rk[3] ) & 0xff] & 0xff];
+ }
+ return Nr;
+}
+
+void
+rijndaelEncrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 pt[16],
+ u8 ct[16])
+{
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(pt ) ^ rk[0];
+ s1 = GETU32(pt + 4) ^ rk[1];
+ s2 = GETU32(pt + 8) ^ rk[2];
+ s3 = GETU32(pt + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[ 4];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[ 5];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[ 6];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[ 8];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[ 9];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[10];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[12];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[13];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[14];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[16];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[17];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[18];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[20];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[21];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[22];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[24];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[25];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[26];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[28];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[29];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[30];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[32];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[33];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[34];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[36];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[37];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[38];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[39];
+ if (Nr > 10) {
+ /* round 10: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[40];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[41];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[42];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[44];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[45];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[46];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[47];
+ if (Nr > 12) {
+ /* round 12: */
+ s0 = Te0[t0 >> 24] ^ Te1[(t1 >> 16) & 0xff] ^ Te2[(t2 >> 8) & 0xff] ^ Te3[t3 & 0xff] ^ rk[48];
+ s1 = Te0[t1 >> 24] ^ Te1[(t2 >> 16) & 0xff] ^ Te2[(t3 >> 8) & 0xff] ^ Te3[t0 & 0xff] ^ rk[49];
+ s2 = Te0[t2 >> 24] ^ Te1[(t3 >> 16) & 0xff] ^ Te2[(t0 >> 8) & 0xff] ^ Te3[t1 & 0xff] ^ rk[50];
+ s3 = Te0[t3 >> 24] ^ Te1[(t0 >> 16) & 0xff] ^ Te2[(t1 >> 8) & 0xff] ^ Te3[t2 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Te0[s0 >> 24] ^ Te1[(s1 >> 16) & 0xff] ^ Te2[(s2 >> 8) & 0xff] ^ Te3[s3 & 0xff] ^ rk[52];
+ t1 = Te0[s1 >> 24] ^ Te1[(s2 >> 16) & 0xff] ^ Te2[(s3 >> 8) & 0xff] ^ Te3[s0 & 0xff] ^ rk[53];
+ t2 = Te0[s2 >> 24] ^ Te1[(s3 >> 16) & 0xff] ^ Te2[(s0 >> 8) & 0xff] ^ Te3[s1 & 0xff] ^ rk[54];
+ t3 = Te0[s3 >> 24] ^ Te1[(s0 >> 16) & 0xff] ^ Te2[(s1 >> 8) & 0xff] ^ Te3[s2 & 0xff] ^ rk[55];
+ }
+ }
+ rk += Nr << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = Nr >> 1;
+ for (;;) {
+ t0 =
+ Te0[(s0 >> 24) ] ^
+ Te1[(s1 >> 16) & 0xff] ^
+ Te2[(s2 >> 8) & 0xff] ^
+ Te3[(s3 ) & 0xff] ^
+ rk[4];
+ t1 =
+ Te0[(s1 >> 24) ] ^
+ Te1[(s2 >> 16) & 0xff] ^
+ Te2[(s3 >> 8) & 0xff] ^
+ Te3[(s0 ) & 0xff] ^
+ rk[5];
+ t2 =
+ Te0[(s2 >> 24) ] ^
+ Te1[(s3 >> 16) & 0xff] ^
+ Te2[(s0 >> 8) & 0xff] ^
+ Te3[(s1 ) & 0xff] ^
+ rk[6];
+ t3 =
+ Te0[(s3 >> 24) ] ^
+ Te1[(s0 >> 16) & 0xff] ^
+ Te2[(s1 >> 8) & 0xff] ^
+ Te3[(s2 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ Te0[(t0 >> 24) ] ^
+ Te1[(t1 >> 16) & 0xff] ^
+ Te2[(t2 >> 8) & 0xff] ^
+ Te3[(t3 ) & 0xff] ^
+ rk[0];
+ s1 =
+ Te0[(t1 >> 24) ] ^
+ Te1[(t2 >> 16) & 0xff] ^
+ Te2[(t3 >> 8) & 0xff] ^
+ Te3[(t0 ) & 0xff] ^
+ rk[1];
+ s2 =
+ Te0[(t2 >> 24) ] ^
+ Te1[(t3 >> 16) & 0xff] ^
+ Te2[(t0 >> 8) & 0xff] ^
+ Te3[(t1 ) & 0xff] ^
+ rk[2];
+ s3 =
+ Te0[(t3 >> 24) ] ^
+ Te1[(t0 >> 16) & 0xff] ^
+ Te2[(t1 >> 8) & 0xff] ^
+ Te3[(t2 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (Te4[(t0 >> 24) ] & 0xff000000) ^
+ (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(ct , s0);
+ s1 =
+ (Te4[(t1 >> 24) ] & 0xff000000) ^
+ (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(ct + 4, s1);
+ s2 =
+ (Te4[(t2 >> 24) ] & 0xff000000) ^
+ (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(ct + 8, s2);
+ s3 =
+ (Te4[(t3 >> 24) ] & 0xff000000) ^
+ (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Te4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Te4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(ct + 12, s3);
+}
+
+static void
+rijndaelDecrypt(const u32 rk[/*4*(Nr + 1)*/], int Nr, const u8 ct[16],
+ u8 pt[16])
+{
+ u32 s0, s1, s2, s3, t0, t1, t2, t3;
+#ifndef FULL_UNROLL
+ int r;
+#endif /* ?FULL_UNROLL */
+
+ /*
+ * map byte array block to cipher state
+ * and add initial round key:
+ */
+ s0 = GETU32(ct ) ^ rk[0];
+ s1 = GETU32(ct + 4) ^ rk[1];
+ s2 = GETU32(ct + 8) ^ rk[2];
+ s3 = GETU32(ct + 12) ^ rk[3];
+#ifdef FULL_UNROLL
+ /* round 1: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[ 4];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[ 5];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[ 6];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[ 7];
+ /* round 2: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[ 8];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[ 9];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[10];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[11];
+ /* round 3: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[12];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[13];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[14];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[15];
+ /* round 4: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[16];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[17];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[18];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[19];
+ /* round 5: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[20];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[21];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[22];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[23];
+ /* round 6: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[24];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[25];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[26];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[27];
+ /* round 7: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[28];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[29];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[30];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[31];
+ /* round 8: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[32];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[33];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[34];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[35];
+ /* round 9: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[36];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[37];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[38];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[39];
+ if (Nr > 10) {
+ /* round 10: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[40];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[41];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[42];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[43];
+ /* round 11: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[44];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[45];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[46];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[47];
+ if (Nr > 12) {
+ /* round 12: */
+ s0 = Td0[t0 >> 24] ^ Td1[(t3 >> 16) & 0xff] ^ Td2[(t2 >> 8) & 0xff] ^ Td3[t1 & 0xff] ^ rk[48];
+ s1 = Td0[t1 >> 24] ^ Td1[(t0 >> 16) & 0xff] ^ Td2[(t3 >> 8) & 0xff] ^ Td3[t2 & 0xff] ^ rk[49];
+ s2 = Td0[t2 >> 24] ^ Td1[(t1 >> 16) & 0xff] ^ Td2[(t0 >> 8) & 0xff] ^ Td3[t3 & 0xff] ^ rk[50];
+ s3 = Td0[t3 >> 24] ^ Td1[(t2 >> 16) & 0xff] ^ Td2[(t1 >> 8) & 0xff] ^ Td3[t0 & 0xff] ^ rk[51];
+ /* round 13: */
+ t0 = Td0[s0 >> 24] ^ Td1[(s3 >> 16) & 0xff] ^ Td2[(s2 >> 8) & 0xff] ^ Td3[s1 & 0xff] ^ rk[52];
+ t1 = Td0[s1 >> 24] ^ Td1[(s0 >> 16) & 0xff] ^ Td2[(s3 >> 8) & 0xff] ^ Td3[s2 & 0xff] ^ rk[53];
+ t2 = Td0[s2 >> 24] ^ Td1[(s1 >> 16) & 0xff] ^ Td2[(s0 >> 8) & 0xff] ^ Td3[s3 & 0xff] ^ rk[54];
+ t3 = Td0[s3 >> 24] ^ Td1[(s2 >> 16) & 0xff] ^ Td2[(s1 >> 8) & 0xff] ^ Td3[s0 & 0xff] ^ rk[55];
+ }
+ }
+ rk += Nr << 2;
+#else /* !FULL_UNROLL */
+ /*
+ * Nr - 1 full rounds:
+ */
+ r = Nr >> 1;
+ for (;;) {
+ t0 =
+ Td0[(s0 >> 24) ] ^
+ Td1[(s3 >> 16) & 0xff] ^
+ Td2[(s2 >> 8) & 0xff] ^
+ Td3[(s1 ) & 0xff] ^
+ rk[4];
+ t1 =
+ Td0[(s1 >> 24) ] ^
+ Td1[(s0 >> 16) & 0xff] ^
+ Td2[(s3 >> 8) & 0xff] ^
+ Td3[(s2 ) & 0xff] ^
+ rk[5];
+ t2 =
+ Td0[(s2 >> 24) ] ^
+ Td1[(s1 >> 16) & 0xff] ^
+ Td2[(s0 >> 8) & 0xff] ^
+ Td3[(s3 ) & 0xff] ^
+ rk[6];
+ t3 =
+ Td0[(s3 >> 24) ] ^
+ Td1[(s2 >> 16) & 0xff] ^
+ Td2[(s1 >> 8) & 0xff] ^
+ Td3[(s0 ) & 0xff] ^
+ rk[7];
+
+ rk += 8;
+ if (--r == 0) {
+ break;
+ }
+
+ s0 =
+ Td0[(t0 >> 24) ] ^
+ Td1[(t3 >> 16) & 0xff] ^
+ Td2[(t2 >> 8) & 0xff] ^
+ Td3[(t1 ) & 0xff] ^
+ rk[0];
+ s1 =
+ Td0[(t1 >> 24) ] ^
+ Td1[(t0 >> 16) & 0xff] ^
+ Td2[(t3 >> 8) & 0xff] ^
+ Td3[(t2 ) & 0xff] ^
+ rk[1];
+ s2 =
+ Td0[(t2 >> 24) ] ^
+ Td1[(t1 >> 16) & 0xff] ^
+ Td2[(t0 >> 8) & 0xff] ^
+ Td3[(t3 ) & 0xff] ^
+ rk[2];
+ s3 =
+ Td0[(t3 >> 24) ] ^
+ Td1[(t2 >> 16) & 0xff] ^
+ Td2[(t1 >> 8) & 0xff] ^
+ Td3[(t0 ) & 0xff] ^
+ rk[3];
+ }
+#endif /* ?FULL_UNROLL */
+ /*
+ * apply last round and
+ * map cipher state to byte array block:
+ */
+ s0 =
+ (Td4[(t0 >> 24) ] & 0xff000000) ^
+ (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t1 ) & 0xff] & 0x000000ff) ^
+ rk[0];
+ PUTU32(pt , s0);
+ s1 =
+ (Td4[(t1 >> 24) ] & 0xff000000) ^
+ (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t3 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t2 ) & 0xff] & 0x000000ff) ^
+ rk[1];
+ PUTU32(pt + 4, s1);
+ s2 =
+ (Td4[(t2 >> 24) ] & 0xff000000) ^
+ (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t3 ) & 0xff] & 0x000000ff) ^
+ rk[2];
+ PUTU32(pt + 8, s2);
+ s3 =
+ (Td4[(t3 >> 24) ] & 0xff000000) ^
+ (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
+ (Td4[(t1 >> 8) & 0xff] & 0x0000ff00) ^
+ (Td4[(t0 ) & 0xff] & 0x000000ff) ^
+ rk[3];
+ PUTU32(pt + 12, s3);
+}
+
+/* setup key context for encryption only */
+int
+rijndael_set_key_enc_only(rijndael_ctx *ctx, const u_char *key, int bits)
+{
+ int rounds;
+
+ rounds = rijndaelKeySetupEnc(ctx->ek, key, bits);
+ if (rounds == 0)
+ return -1;
+
+ ctx->Nr = rounds;
+ ctx->enc_only = 1;
+
+ return 0;
+}
+
+/* setup key context for both encryption and decryption */
+int
+rijndael_set_key(rijndael_ctx *ctx, const u_char *key, int bits)
+{
+ int rounds;
+
+ rounds = rijndaelKeySetupEnc(ctx->ek, key, bits);
+ if (rounds == 0)
+ return -1;
+ if (rijndaelKeySetupDec(ctx->dk, key, bits) != rounds)
+ return -1;
+
+ ctx->Nr = rounds;
+ ctx->enc_only = 0;
+
+ return 0;
+}
+
+void
+rijndael_decrypt(rijndael_ctx *ctx, const u_char *src, u_char *dst)
+{
+ rijndaelDecrypt(ctx->dk, ctx->Nr, src, dst);
+}
+
+void
+rijndael_encrypt(rijndael_ctx *ctx, const u_char *src, u_char *dst)
+{
+ rijndaelEncrypt(ctx->ek, ctx->Nr, src, dst);
+}
--- /dev/null
+/* --------------------------------------------------------------------------
+ * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
+ * This implementation is herby placed in the public domain.
+ * The authors offers no warranty. Use at your own risk.
+ * Please send bug reports to the authors.
+ * Last modified: 17 APR 08, 1700 PDT
+ * ----------------------------------------------------------------------- */
+
+/* start for Xen */
+#include <xen/config.h>
+#include <xen/init.h>
+#include <xen/types.h>
+#include <xen/lib.h>
+#include <crypto/vmac.h>
+#define UINT64_C(x) x##ULL
+/* end for Xen */
+
+/* Enable code tuned for 64-bit registers; otherwise tuned for 32-bit */
+#ifndef VMAC_ARCH_64
+#define VMAC_ARCH_64 (__x86_64__ || __ppc64__ || _M_X64)
+#endif
+
+/* Enable code tuned for Intel SSE2 instruction set */
+#if ((__SSE2__ || (_M_IX86_FP >= 2)) && ( ! VMAC_ARCH_64))
+#define VMAC_USE_SSE2 1
+#include <emmintrin.h>
+#endif
+
+/* Native word reads. Update (or define via compiler) if incorrect */
+#ifndef VMAC_ARCH_BIG_ENDIAN /* Assume big-endian unless on the list */
+#define VMAC_ARCH_BIG_ENDIAN \
+ (!(__x86_64__ || __i386__ || _M_IX86 || \
+ _M_X64 || __ARMEL__ || __MIPSEL__))
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* Constants and masks */
+
+const uint64_t p64 = UINT64_C(0xfffffffffffffeff); /* 2^64 - 257 prime */
+const uint64_t m62 = UINT64_C(0x3fffffffffffffff); /* 62-bit mask */
+const uint64_t m63 = UINT64_C(0x7fffffffffffffff); /* 63-bit mask */
+const uint64_t m64 = UINT64_C(0xffffffffffffffff); /* 64-bit mask */
+const uint64_t mpoly = UINT64_C(0x1fffffff1fffffff); /* Poly key mask */
+
+/* ----------------------------------------------------------------------- *
+ * The following routines are used in this implementation. They are
+ * written via macros to simulate zero-overhead call-by-reference.
+ * All have default implemantations for when they are not defined in an
+ * architecture-specific manner.
+ *
+ * MUL64: 64x64->128-bit multiplication
+ * PMUL64: assumes top bits cleared on inputs
+ * ADD128: 128x128->128-bit addition
+ * GET_REVERSED_64: load and byte-reverse 64-bit word
+ * ----------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------- */
+#if (__GNUC__ && (__x86_64__ || __amd64__))
+/* ----------------------------------------------------------------------- */
+
+#define ADD128(rh,rl,ih,il) \
+ asm ("addq %3, %1 \n\t" \
+ "adcq %2, %0" \
+ : "+r"(rh),"+r"(rl) \
+ : "r"(ih),"r"(il) : "cc");
+
+#define MUL64(rh,rl,i1,i2) \
+ asm ("mulq %3" : "=a"(rl), "=d"(rh) : "a"(i1), "r"(i2) : "cc")
+
+#define PMUL64 MUL64
+
+#define GET_REVERSED_64(p) \
+ ({uint64_t x; \
+ asm ("bswapq %0" : "=r" (x) : "0"(*(uint64_t *)(p))); x;})
+
+/* ----------------------------------------------------------------------- */
+#elif (__GNUC__ && __i386__)
+/* ----------------------------------------------------------------------- */
+
+#define GET_REVERSED_64(p) \
+ ({ uint64_t x; \
+ uint32_t *tp = (uint32_t *)(p); \
+ asm ("bswap %%edx\n\t" \
+ "bswap %%eax" \
+ : "=A"(x) \
+ : "a"(tp[1]), "d"(tp[0])); \
+ x; })
+
+/* ----------------------------------------------------------------------- */
+#elif (__GNUC__ && __ppc64__)
+/* ----------------------------------------------------------------------- */
+
+#define ADD128(rh,rl,ih,il) \
+ asm volatile ( "addc %1, %1, %3 \n\t" \
+ "adde %0, %0, %2" \
+ : "+r"(rh),"+r"(rl) \
+ : "r"(ih),"r"(il));
+
+#define MUL64(rh,rl,i1,i2) \
+{ uint64_t _i1 = (i1), _i2 = (i2); \
+ rl = _i1 * _i2; \
+ asm volatile ("mulhdu %0, %1, %2" : "=r" (rh) : "r" (_i1), "r" (_i2));\
+}
+
+#define PMUL64 MUL64
+
+#define GET_REVERSED_64(p) \
+ ({ uint32_t hi, lo, *_p = (uint32_t *)(p); \
+ asm volatile ("lwbrx %0, %1, %2" : "=r"(lo) : "b%"(0), "r"(_p) ); \
+ asm volatile ("lwbrx %0, %1, %2" : "=r"(hi) : "b%"(4), "r"(_p) ); \
+ ((uint64_t)hi << 32) | (uint64_t)lo; } )
+
+/* ----------------------------------------------------------------------- */
+#elif (__GNUC__ && (__ppc__ || __PPC__))
+/* ----------------------------------------------------------------------- */
+
+#define GET_REVERSED_64(p) \
+ ({ uint32_t hi, lo, *_p = (uint32_t *)(p); \
+ asm volatile ("lwbrx %0, %1, %2" : "=r"(lo) : "b%"(0), "r"(_p) ); \
+ asm volatile ("lwbrx %0, %1, %2" : "=r"(hi) : "b%"(4), "r"(_p) ); \
+ ((uint64_t)hi << 32) | (uint64_t)lo; } )
+
+/* ----------------------------------------------------------------------- */
+#elif (__GNUC__ && (__ARMEL__ || __ARM__))
+/* ----------------------------------------------------------------------- */
+
+#define bswap32(v) \
+({ uint32_t tmp,out; \
+ asm volatile( \
+ "eor %1, %2, %2, ror #16\n" \
+ "bic %1, %1, #0x00ff0000\n" \
+ "mov %0, %2, ror #8\n" \
+ "eor %0, %0, %1, lsr #8" \
+ : "=r" (out), "=&r" (tmp) \
+ : "r" (v)); \
+ out;})
+
+/* ----------------------------------------------------------------------- */
+#elif _MSC_VER
+/* ----------------------------------------------------------------------- */
+
+#include <intrin.h>
+
+#if (_M_IA64 || _M_X64) && \
+ (!defined(__INTEL_COMPILER) || __INTEL_COMPILER >= 1000)
+#define MUL64(rh,rl,i1,i2) (rl) = _umul128(i1,i2,&(rh));
+#pragma intrinsic(_umul128)
+#define PMUL64 MUL64
+#endif
+
+/* MSVC uses add, adc in this version */
+#define ADD128(rh,rl,ih,il) \
+ { uint64_t _il = (il); \
+ (rl) += (_il); \
+ (rh) += (ih) + ((rl) < (_il)); \
+ }
+
+#if _MSC_VER >= 1300
+#define GET_REVERSED_64(p) _byteswap_uint64(*(uint64_t *)(p))
+#pragma intrinsic(_byteswap_uint64)
+#endif
+
+#if _MSC_VER >= 1400 && \
+ (!defined(__INTEL_COMPILER) || __INTEL_COMPILER >= 1000)
+#define MUL32(i1,i2) (__emulu((uint32_t)(i1),(uint32_t)(i2)))
+#pragma intrinsic(__emulu)
+#endif
+
+/* ----------------------------------------------------------------------- */
+#endif
+/* ----------------------------------------------------------------------- */
+
+#if __GNUC__
+#define ALIGN(n) __attribute__ ((aligned(n)))
+#define NOINLINE __attribute__ ((noinline))
+#define FASTCALL
+#elif _MSC_VER
+#define ALIGN(n) __declspec(align(n))
+#define NOINLINE __declspec(noinline)
+#define FASTCALL __fastcall
+#else
+#define ALIGN(n)
+#define NOINLINE
+#define FASTCALL
+#endif
+
+/* ----------------------------------------------------------------------- */
+/* Default implementations, if not defined above */
+/* ----------------------------------------------------------------------- */
+
+#ifndef ADD128
+#define ADD128(rh,rl,ih,il) \
+ { uint64_t _il = (il); \
+ (rl) += (_il); \
+ if ((rl) < (_il)) (rh)++; \
+ (rh) += (ih); \
+ }
+#endif
+
+#ifndef MUL32
+#define MUL32(i1,i2) ((uint64_t)(uint32_t)(i1)*(uint32_t)(i2))
+#endif
+
+#ifndef PMUL64 /* rh may not be same as i1 or i2 */
+#define PMUL64(rh,rl,i1,i2) /* Assumes m doesn't overflow */ \
+ { uint64_t _i1 = (i1), _i2 = (i2); \
+ uint64_t m = MUL32(_i1,_i2>>32) + MUL32(_i1>>32,_i2); \
+ rh = MUL32(_i1>>32,_i2>>32); \
+ rl = MUL32(_i1,_i2); \
+ ADD128(rh,rl,(m >> 32),(m << 32)); \
+ }
+#endif
+
+#ifndef MUL64
+#define MUL64(rh,rl,i1,i2) \
+ { uint64_t _i1 = (i1), _i2 = (i2); \
+ uint64_t m1= MUL32(_i1,_i2>>32); \
+ uint64_t m2= MUL32(_i1>>32,_i2); \
+ rh = MUL32(_i1>>32,_i2>>32); \
+ rl = MUL32(_i1,_i2); \
+ ADD128(rh,rl,(m1 >> 32),(m1 << 32)); \
+ ADD128(rh,rl,(m2 >> 32),(m2 << 32)); \
+ }
+#endif
+
+#ifndef GET_REVERSED_64
+#ifndef bswap64
+#ifndef bswap32
+#define bswap32(x) \
+ ({ uint32_t bsx = (x); \
+ ((((bsx) & 0xff000000u) >> 24) | (((bsx) & 0x00ff0000u) >> 8) | \
+ (((bsx) & 0x0000ff00u) << 8) | (((bsx) & 0x000000ffu) << 24)); })
+#endif
+#define bswap64(x) \
+ ({ union { uint64_t ll; uint32_t l[2]; } w, r; \
+ w.ll = (x); \
+ r.l[0] = bswap32 (w.l[1]); \
+ r.l[1] = bswap32 (w.l[0]); \
+ r.ll; })
+#endif
+#define GET_REVERSED_64(p) bswap64(*(uint64_t *)(p))
+#endif
+
+/* ----------------------------------------------------------------------- */
+
+#if (VMAC_PREFER_BIG_ENDIAN)
+# define get64PE get64BE
+#else
+# define get64PE get64LE
+#endif
+
+#if (VMAC_ARCH_BIG_ENDIAN)
+# define get64BE(ptr) (*(uint64_t *)(ptr))
+# define get64LE(ptr) GET_REVERSED_64(ptr)
+#else /* assume little-endian */
+# define get64BE(ptr) GET_REVERSED_64(ptr)
+# define get64LE(ptr) (*(uint64_t *)(ptr))
+#endif
+
+
+/* --------------------------------------------------------------------- *
+ * For highest performance the L1 NH and L2 polynomial hashes should be
+ * carefully implemented to take advantage of one's target architechture.
+ * Here these two hash functions are defined multiple time; once for
+ * 64-bit architectures, once for 32-bit SSE2 architectures, and once
+ * for the rest (32-bit) architectures.
+ * For each, nh_16 *must* be defined (works on multiples of 16 bytes).
+ * Optionally, nh_vmac_nhbytes can be defined (for multiples of
+ * VMAC_NHBYTES), and nh_16_2 and nh_vmac_nhbytes_2 (versions that do two
+ * NH computations at once).
+ * --------------------------------------------------------------------- */
+
+/* ----------------------------------------------------------------------- */
+#if VMAC_ARCH_64
+/* ----------------------------------------------------------------------- */
+
+#define nh_16(mp, kp, nw, rh, rl) \
+{ int i; uint64_t th, tl; \
+ rh = rl = 0; \
+ for (i = 0; i < nw; i+= 2) { \
+ MUL64(th,tl,get64PE((mp)+i )+(kp)[i ],get64PE((mp)+i+1)+(kp)[i+1]);\
+ ADD128(rh,rl,th,tl); \
+ } \
+}
+#define nh_16_2(mp, kp, nw, rh, rl, rh1, rl1) \
+{ int i; uint64_t th, tl; \
+ rh1 = rl1 = rh = rl = 0; \
+ for (i = 0; i < nw; i+= 2) { \
+ MUL64(th,tl,get64PE((mp)+i )+(kp)[i ],get64PE((mp)+i+1)+(kp)[i+1]);\
+ ADD128(rh,rl,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i )+(kp)[i+2],get64PE((mp)+i+1)+(kp)[i+3]);\
+ ADD128(rh1,rl1,th,tl); \
+ } \
+}
+
+#if (VMAC_NHBYTES >= 64) /* These versions do 64-bytes of message at a time */
+#define nh_vmac_nhbytes(mp, kp, nw, rh, rl) \
+{ int i; uint64_t th, tl; \
+ rh = rl = 0; \
+ for (i = 0; i < nw; i+= 8) { \
+ MUL64(th,tl,get64PE((mp)+i )+(kp)[i ],get64PE((mp)+i+1)+(kp)[i+1]);\
+ ADD128(rh,rl,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i+2)+(kp)[i+2],get64PE((mp)+i+3)+(kp)[i+3]);\
+ ADD128(rh,rl,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i+4)+(kp)[i+4],get64PE((mp)+i+5)+(kp)[i+5]);\
+ ADD128(rh,rl,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i+6)+(kp)[i+6],get64PE((mp)+i+7)+(kp)[i+7]);\
+ ADD128(rh,rl,th,tl); \
+ } \
+}
+#define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh1, rl1) \
+{ int i; uint64_t th, tl; \
+ rh1 = rl1 = rh = rl = 0; \
+ for (i = 0; i < nw; i+= 8) { \
+ MUL64(th,tl,get64PE((mp)+i )+(kp)[i ],get64PE((mp)+i+1)+(kp)[i+1]);\
+ ADD128(rh,rl,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i )+(kp)[i+2],get64PE((mp)+i+1)+(kp)[i+3]);\
+ ADD128(rh1,rl1,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i+2)+(kp)[i+2],get64PE((mp)+i+3)+(kp)[i+3]);\
+ ADD128(rh,rl,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i+2)+(kp)[i+4],get64PE((mp)+i+3)+(kp)[i+5]);\
+ ADD128(rh1,rl1,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i+4)+(kp)[i+4],get64PE((mp)+i+5)+(kp)[i+5]);\
+ ADD128(rh,rl,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i+4)+(kp)[i+6],get64PE((mp)+i+5)+(kp)[i+7]);\
+ ADD128(rh1,rl1,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i+6)+(kp)[i+6],get64PE((mp)+i+7)+(kp)[i+7]);\
+ ADD128(rh,rl,th,tl); \
+ MUL64(th,tl,get64PE((mp)+i+6)+(kp)[i+8],get64PE((mp)+i+7)+(kp)[i+9]);\
+ ADD128(rh1,rl1,th,tl); \
+ } \
+}
+#endif
+
+#define poly_step(ah, al, kh, kl, mh, ml) \
+{ uint64_t t1h, t1l, t2h, t2l, t3h, t3l, z=0; \
+ /* compute ab*cd, put bd into result registers */ \
+ PMUL64(t3h,t3l,al,kh); \
+ PMUL64(t2h,t2l,ah,kl); \
+ PMUL64(t1h,t1l,ah,2*kh); \
+ PMUL64(ah,al,al,kl); \
+ /* add 2 * ac to result */ \
+ ADD128(ah,al,t1h,t1l); \
+ /* add together ad + bc */ \
+ ADD128(t2h,t2l,t3h,t3l); \
+ /* now (ah,al), (t2l,2*t2h) need summing */ \
+ /* first add the high registers, carrying into t2h */ \
+ ADD128(t2h,ah,z,t2l); \
+ /* double t2h and add top bit of ah */ \
+ t2h = 2 * t2h + (ah >> 63); \
+ ah &= m63; \
+ /* now add the low registers */ \
+ ADD128(ah,al,mh,ml); \
+ ADD128(ah,al,z,t2h); \
+}
+
+/* ----------------------------------------------------------------------- */
+#elif VMAC_USE_SSE2
+/* ----------------------------------------------------------------------- */
+
+// macros from Crypto++ for sharing inline assembly code between MSVC and GNU C
+#if defined(__GNUC__)
+ // define these in two steps to allow arguments to be expanded
+ #define GNU_AS2(x, y) #x ", " #y ";"
+ #define GNU_AS3(x, y, z) #x ", " #y ", " #z ";"
+ #define GNU_ASL(x) "\n" #x ":"
+ #define GNU_ASJ(x, y, z) #x " " #y #z ";"
+ #define AS2(x, y) GNU_AS2(x, y)
+ #define AS3(x, y, z) GNU_AS3(x, y, z)
+ #define ASS(x, y, a, b, c, d) #x ", " #y ", " #a "*64+" #b "*16+" #c "*4+" #d ";"
+ #define ASL(x) GNU_ASL(x)
+ #define ASJ(x, y, z) GNU_ASJ(x, y, z)
+#else
+ #define AS2(x, y) __asm {x, y}
+ #define AS3(x, y, z) __asm {x, y, z}
+ #define ASS(x, y, a, b, c, d) __asm {x, y, _MM_SHUFFLE(a, b, c, d)}
+ #define ASL(x) __asm {label##x:}
+ #define ASJ(x, y, z) __asm {x label##y}
+#endif
+
+static void NOINLINE nh_16_func(const uint64_t *mp, const uint64_t *kp, size_t nw, uint64_t *rh, uint64_t *rl)
+{
+ // This assembly version, using MMX registers, is just as fast as the
+ // intrinsics version (which uses XMM registers) on the Intel Core 2,
+ // but is much faster on the Pentium 4. In order to schedule multiplies
+ // as early as possible, the loop interleaves operations for the current
+ // block and the next block. To mask out high 32-bits, we use "movd"
+ // to move the lower 32-bits to the stack and then back. Surprisingly,
+ // this is faster than any other method.
+#ifdef __GNUC__
+ __asm__ __volatile__
+ (
+ ".intel_syntax noprefix;"
+#else
+ AS2( mov esi, mp)
+ AS2( mov edi, kp)
+ AS2( mov ecx, nw)
+ AS2( mov eax, rl)
+ AS2( mov edx, rh)
+#endif
+ AS2( sub esp, 12)
+ AS2( movq mm6, [esi])
+ AS2( paddq mm6, [edi])
+ AS2( movq mm5, [esi+8])
+ AS2( paddq mm5, [edi+8])
+ AS2( add esi, 16)
+ AS2( add edi, 16)
+ AS2( movq mm4, mm6)
+ ASS( pshufw mm2, mm6, 1, 0, 3, 2)
+ AS2( pmuludq mm6, mm5)
+ ASS( pshufw mm3, mm5, 1, 0, 3, 2)
+ AS2( pmuludq mm5, mm2)
+ AS2( pmuludq mm2, mm3)
+ AS2( pmuludq mm3, mm4)
+ AS2( pxor mm7, mm7)
+ AS2( movd [esp], mm6)
+ AS2( psrlq mm6, 32)
+ AS2( movd [esp+4], mm5)
+ AS2( psrlq mm5, 32)
+ AS2( sub ecx, 2)
+ ASJ( jz, 1, f)
+ ASL(0)
+ AS2( movq mm0, [esi])
+ AS2( paddq mm0, [edi])
+ AS2( movq mm1, [esi+8])
+ AS2( paddq mm1, [edi+8])
+ AS2( add esi, 16)
+ AS2( add edi, 16)
+ AS2( movq mm4, mm0)
+ AS2( paddq mm5, mm2)
+ ASS( pshufw mm2, mm0, 1, 0, 3, 2)
+ AS2( pmuludq mm0, mm1)
+ AS2( movd [esp+8], mm3)
+ AS2( psrlq mm3, 32)
+ AS2( paddq mm5, mm3)
+ ASS( pshufw mm3, mm1, 1, 0, 3, 2)
+ AS2( pmuludq mm1, mm2)
+ AS2( pmuludq mm2, mm3)
+ AS2( pmuludq mm3, mm4)
+ AS2( movd mm4, [esp])
+ AS2( paddq mm7, mm4)
+ AS2( movd mm4, [esp+4])
+ AS2( paddq mm6, mm4)
+ AS2( movd mm4, [esp+8])
+ AS2( paddq mm6, mm4)
+ AS2( movd [esp], mm0)
+ AS2( psrlq mm0, 32)
+ AS2( paddq mm6, mm0)
+ AS2( movd [esp+4], mm1)
+ AS2( psrlq mm1, 32)
+ AS2( paddq mm5, mm1)
+ AS2( sub ecx, 2)
+ ASJ( jnz, 0, b)
+ ASL(1)
+ AS2( paddq mm5, mm2)
+ AS2( movd [esp+8], mm3)
+ AS2( psrlq mm3, 32)
+ AS2( paddq mm5, mm3)
+ AS2( movd mm4, [esp])
+ AS2( paddq mm7, mm4)
+ AS2( movd mm4, [esp+4])
+ AS2( paddq mm6, mm4)
+ AS2( movd mm4, [esp+8])
+ AS2( paddq mm6, mm4)
+
+ ASS( pshufw mm0, mm7, 3, 2, 1, 0)
+ AS2( psrlq mm7, 32)
+ AS2( paddq mm6, mm7)
+ AS2( punpckldq mm0, mm6)
+ AS2( psrlq mm6, 32)
+ AS2( paddq mm5, mm6)
+ AS2( movq [eax], mm0)
+ AS2( movq [edx], mm5)
+ AS2( add esp, 12)
+#ifdef __GNUC__
+ ".att_syntax prefix;"
+ :
+ : "S" (mp), "D" (kp), "c" (nw), "a" (rl), "d" (rh)
+ : "memory", "cc"
+ );
+#endif
+}
+#define nh_16(mp, kp, nw, rh, rl) nh_16_func(mp, kp, nw, &(rh), &(rl));
+
+static void poly_step_func(uint64_t *ahi, uint64_t *alo, const uint64_t *kh,
+ const uint64_t *kl, const uint64_t *mh, const uint64_t *ml)
+{
+ // This code tries to schedule the multiplies as early as possible to overcome
+ // the long latencies on the Pentium 4. It also minimizes "movq" instructions
+ // which are very expensive on the P4.
+
+#define a0 [eax+0]
+#define a1 [eax+4]
+#define a2 [ebx+0]
+#define a3 [ebx+4]
+#define k0 [ecx+0]
+#define k1 [ecx+4]
+#define k2 [edx+0]
+#define k3 [edx+4]
+
+#ifdef __GNUC__
+ uint32_t temp;
+ __asm__ __volatile__
+ (
+ "mov %%ebx, %0;"
+ "mov %1, %%ebx;"
+ ".intel_syntax noprefix;"
+#else
+ AS2( mov ebx, ahi)
+ AS2( mov edx, kh)
+ AS2( mov eax, alo)
+ AS2( mov ecx, kl)
+ AS2( mov esi, mh)
+ AS2( mov edi, ml)
+#endif
+
+ AS2( movd mm0, a3)
+ AS2( movq mm4, mm0)
+ AS2( pmuludq mm0, k3) // a3*k3
+ AS2( movd mm1, a0)
+ AS2( pmuludq mm1, k2) // a0*k2
+ AS2( movd mm2, a1)
+ AS2( movd mm6, k1)
+ AS2( pmuludq mm2, mm6) // a1*k1
+ AS2( movd mm3, a2)
+ AS2( movq mm5, mm3)
+ AS2( movd mm7, k0)
+ AS2( pmuludq mm3, mm7) // a2*k0
+ AS2( pmuludq mm4, mm7) // a3*k0
+ AS2( pmuludq mm5, mm6) // a2*k1
+ AS2( psllq mm0, 1)
+ AS2( paddq mm0, [esi])
+ AS2( paddq mm0, mm1)
+ AS2( movd mm1, a1)
+ AS2( paddq mm4, mm5)
+ AS2( movq mm5, mm1)
+ AS2( pmuludq mm1, k2) // a1*k2
+ AS2( paddq mm0, mm2)
+ AS2( movd mm2, a0)
+ AS2( paddq mm0, mm3)
+ AS2( movq mm3, mm2)
+ AS2( pmuludq mm2, k3) // a0*k3
+ AS2( pmuludq mm3, mm7) // a0*k0
+ AS2( movd esi, mm0)
+ AS2( psrlq mm0, 32)
+ AS2( pmuludq mm7, mm5) // a1*k0
+ AS2( pmuludq mm5, k3) // a1*k3
+ AS2( paddq mm0, mm1)
+ AS2( movd mm1, a2)
+ AS2( pmuludq mm1, k2) // a2*k2
+ AS2( paddq mm0, mm2)
+ AS2( paddq mm0, mm4)
+ AS2( movq mm4, mm0)
+ AS2( movd mm2, a3)
+ AS2( pmuludq mm2, mm6) // a3*k1
+ AS2( pmuludq mm6, a0) // a0*k1
+ AS2( psrlq mm0, 31)
+ AS2( paddq mm0, mm3)
+ AS2( movd mm3, [edi])
+ AS2( paddq mm0, mm3)
+ AS2( movd mm3, a2)
+ AS2( pmuludq mm3, k3) // a2*k3
+ AS2( paddq mm5, mm1)
+ AS2( movd mm1, a3)
+ AS2( pmuludq mm1, k2) // a3*k2
+ AS2( paddq mm5, mm2)
+ AS2( movd mm2, [edi+4])
+ AS2( psllq mm5, 1)
+ AS2( paddq mm0, mm5)
+ AS2( movq mm5, mm0)
+ AS2( psllq mm4, 33)
+ AS2( psrlq mm0, 32)
+ AS2( paddq mm6, mm7)
+ AS2( movd mm7, esi)
+ AS2( paddq mm0, mm6)
+ AS2( paddq mm0, mm2)
+ AS2( paddq mm3, mm1)
+ AS2( psllq mm3, 1)
+ AS2( paddq mm0, mm3)
+ AS2( psrlq mm4, 1)
+ AS2( punpckldq mm5, mm0)
+ AS2( psrlq mm0, 32)
+ AS2( por mm4, mm7)
+ AS2( paddq mm0, mm4)
+ AS2( movq a0, mm5)
+ AS2( movq a2, mm0)
+#ifdef __GNUC__
+ ".att_syntax prefix;"
+ "mov %0, %%ebx;"
+ : "=m" (temp)
+ : "m" (ahi), "D" (ml), "d" (kh), "a" (alo), "S" (mh), "c" (kl)
+ : "memory", "cc"
+ );
+#endif
+
+
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef k0
+#undef k1
+#undef k2
+#undef k3
+}
+
+#define poly_step(ah, al, kh, kl, mh, ml) \
+ poly_step_func(&(ah), &(al), &(kh), &(kl), &(mh), &(ml))
+
+/* ----------------------------------------------------------------------- */
+#else /* not VMAC_ARCH_64 and not SSE2 */
+/* ----------------------------------------------------------------------- */
+
+#ifndef nh_16
+#define nh_16(mp, kp, nw, rh, rl) \
+{ uint64_t t1,t2,m1,m2,t; \
+ int i; \
+ rh = rl = t = 0; \
+ for (i = 0; i < nw; i+=2) { \
+ t1 = get64PE(mp+i) + kp[i]; \
+ t2 = get64PE(mp+i+1) + kp[i+1]; \
+ m2 = MUL32(t1 >> 32, t2); \
+ m1 = MUL32(t1, t2 >> 32); \
+ ADD128(rh,rl,MUL32(t1 >> 32,t2 >> 32),MUL32(t1,t2)); \
+ rh += (uint64_t)(uint32_t)(m1 >> 32) + (uint32_t)(m2 >> 32); \
+ t += (uint64_t)(uint32_t)m1 + (uint32_t)m2; \
+ } \
+ ADD128(rh,rl,(t >> 32),(t << 32)); \
+}
+#endif
+
+static void poly_step_func(uint64_t *ahi, uint64_t *alo, const uint64_t *kh,
+ const uint64_t *kl, const uint64_t *mh, const uint64_t *ml)
+{
+
+#if VMAC_ARCH_BIG_ENDIAN
+#define INDEX_HIGH 0
+#define INDEX_LOW 1
+#else
+#define INDEX_HIGH 1
+#define INDEX_LOW 0
+#endif
+
+#define a0 *(((uint32_t*)alo)+INDEX_LOW)
+#define a1 *(((uint32_t*)alo)+INDEX_HIGH)
+#define a2 *(((uint32_t*)ahi)+INDEX_LOW)
+#define a3 *(((uint32_t*)ahi)+INDEX_HIGH)
+#define k0 *(((uint32_t*)kl)+INDEX_LOW)
+#define k1 *(((uint32_t*)kl)+INDEX_HIGH)
+#define k2 *(((uint32_t*)kh)+INDEX_LOW)
+#define k3 *(((uint32_t*)kh)+INDEX_HIGH)
+
+ uint64_t p, q, t;
+ uint32_t t2;
+
+ p = MUL32(a3, k3);
+ p += p;
+ p += *(uint64_t *)mh;
+ p += MUL32(a0, k2);
+ p += MUL32(a1, k1);
+ p += MUL32(a2, k0);
+ t = (uint32_t)(p);
+ p >>= 32;
+ p += MUL32(a0, k3);
+ p += MUL32(a1, k2);
+ p += MUL32(a2, k1);
+ p += MUL32(a3, k0);
+ t |= ((uint64_t)((uint32_t)p & 0x7fffffff)) << 32;
+ p >>= 31;
+ p += (uint64_t)(((uint32_t*)ml)[INDEX_LOW]);
+ p += MUL32(a0, k0);
+ q = MUL32(a1, k3);
+ q += MUL32(a2, k2);
+ q += MUL32(a3, k1);
+ q += q;
+ p += q;
+ t2 = (uint32_t)(p);
+ p >>= 32;
+ p += (uint64_t)(((uint32_t*)ml)[INDEX_HIGH]);
+ p += MUL32(a0, k1);
+ p += MUL32(a1, k0);
+ q = MUL32(a2, k3);
+ q += MUL32(a3, k2);
+ q += q;
+ p += q;
+ *(uint64_t *)(alo) = (p << 32) | t2;
+ p >>= 32;
+ *(uint64_t *)(ahi) = p + t;
+
+#undef a0
+#undef a1
+#undef a2
+#undef a3
+#undef k0
+#undef k1
+#undef k2
+#undef k3
+}
+
+#define poly_step(ah, al, kh, kl, mh, ml) \
+ poly_step_func(&(ah), &(al), &(kh), &(kl), &(mh), &(ml))
+
+/* ----------------------------------------------------------------------- */
+#endif /* end of specialized NH and poly definitions */
+/* ----------------------------------------------------------------------- */
+
+/* At least nh_16 is defined. Defined others as needed here */
+#ifndef nh_16_2
+#define nh_16_2(mp, kp, nw, rh, rl, rh2, rl2) \
+ nh_16(mp, kp, nw, rh, rl); \
+ nh_16(mp, ((kp)+2), nw, rh2, rl2);
+#endif
+#ifndef nh_vmac_nhbytes
+#define nh_vmac_nhbytes(mp, kp, nw, rh, rl) \
+ nh_16(mp, kp, nw, rh, rl)
+#endif
+#ifndef nh_vmac_nhbytes_2
+#define nh_vmac_nhbytes_2(mp, kp, nw, rh, rl, rh2, rl2) \
+ nh_vmac_nhbytes(mp, kp, nw, rh, rl); \
+ nh_vmac_nhbytes(mp, ((kp)+2), nw, rh2, rl2);
+#endif
+
+/* ----------------------------------------------------------------------- */
+
+void vhash_abort(vmac_ctx_t *ctx)
+{
+ ctx->polytmp[0] = ctx->polykey[0] ;
+ ctx->polytmp[1] = ctx->polykey[1] ;
+ #if (VMAC_TAG_LEN == 128)
+ ctx->polytmp[2] = ctx->polykey[2] ;
+ ctx->polytmp[3] = ctx->polykey[3] ;
+ #endif
+ ctx->first_block_processed = 0;
+}
+
+/* ----------------------------------------------------------------------- */
+static uint64_t l3hash(uint64_t p1, uint64_t p2,
+ uint64_t k1, uint64_t k2, uint64_t len)
+{
+ uint64_t rh, rl, t, z=0;
+
+ /* fully reduce (p1,p2)+(len,0) mod p127 */
+ t = p1 >> 63;
+ p1 &= m63;
+ ADD128(p1, p2, len, t);
+ /* At this point, (p1,p2) is at most 2^127+(len<<64) */
+ t = (p1 > m63) + ((p1 == m63) && (p2 == m64));
+ ADD128(p1, p2, z, t);
+ p1 &= m63;
+
+ /* compute (p1,p2)/(2^64-2^32) and (p1,p2)%(2^64-2^32) */
+ t = p1 + (p2 >> 32);
+ t += (t >> 32);
+ t += (uint32_t)t > 0xfffffffeu;
+ p1 += (t >> 32);
+ p2 += (p1 << 32);
+
+ /* compute (p1+k1)%p64 and (p2+k2)%p64 */
+ p1 += k1;
+ p1 += (0 - (p1 < k1)) & 257;
+ p2 += k2;
+ p2 += (0 - (p2 < k2)) & 257;
+
+ /* compute (p1+k1)*(p2+k2)%p64 */
+ MUL64(rh, rl, p1, p2);
+ t = rh >> 56;
+ ADD128(t, rl, z, rh);
+ rh <<= 8;
+ ADD128(t, rl, z, rh);
+ t += t << 8;
+ rl += t;
+ rl += (0 - (rl < t)) & 257;
+ rl += (0 - (rl > p64-1)) & 257;
+ return rl;
+}
+
+/* ----------------------------------------------------------------------- */
+
+void vhash_update(unsigned char *m,
+ unsigned int mbytes, /* Pos multiple of VMAC_NHBYTES */
+ vmac_ctx_t *ctx)
+{
+ uint64_t rh, rl, *mptr;
+ const uint64_t *kptr = (uint64_t *)ctx->nhkey;
+ int i;
+ uint64_t ch, cl;
+ uint64_t pkh = ctx->polykey[0];
+ uint64_t pkl = ctx->polykey[1];
+ #if (VMAC_TAG_LEN == 128)
+ uint64_t ch2, cl2, rh2, rl2;
+ uint64_t pkh2 = ctx->polykey[2];
+ uint64_t pkl2 = ctx->polykey[3];
+ #endif
+
+ mptr = (uint64_t *)m;
+ i = mbytes / VMAC_NHBYTES; /* Must be non-zero */
+
+ ch = ctx->polytmp[0];
+ cl = ctx->polytmp[1];
+ #if (VMAC_TAG_LEN == 128)
+ ch2 = ctx->polytmp[2];
+ cl2 = ctx->polytmp[3];
+ #endif
+
+ if ( ! ctx->first_block_processed) {
+ ctx->first_block_processed = 1;
+ #if (VMAC_TAG_LEN == 64)
+ nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,rh,rl);
+ #else
+ nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,rh,rl,rh2,rl2);
+ rh2 &= m62;
+ ADD128(ch2,cl2,rh2,rl2);
+ #endif
+ rh &= m62;
+ ADD128(ch,cl,rh,rl);
+ mptr += (VMAC_NHBYTES/sizeof(uint64_t));
+ i--;
+ }
+
+ while (i--) {
+ #if (VMAC_TAG_LEN == 64)
+ nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,rh,rl);
+ #else
+ nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,rh,rl,rh2,rl2);
+ rh2 &= m62;
+ poly_step(ch2,cl2,pkh2,pkl2,rh2,rl2);
+ #endif
+ rh &= m62;
+ poly_step(ch,cl,pkh,pkl,rh,rl);
+ mptr += (VMAC_NHBYTES/sizeof(uint64_t));
+ }
+
+ ctx->polytmp[0] = ch;
+ ctx->polytmp[1] = cl;
+ #if (VMAC_TAG_LEN == 128)
+ ctx->polytmp[2] = ch2;
+ ctx->polytmp[3] = cl2;
+ #endif
+ #if VMAC_USE_SSE2
+ _mm_empty(); /* SSE2 version of poly_step uses mmx instructions */
+ #endif
+}
+
+/* ----------------------------------------------------------------------- */
+
+uint64_t xvhash(unsigned char m[],
+ unsigned int mbytes,
+ uint64_t *tagl,
+ vmac_ctx_t *ctx)
+{
+ uint64_t ch, cl, rh, rl, *mptr;
+ #if (VMAC_TAG_LEN == 128)
+ uint64_t ch2, cl2, rh2, rl2;
+ #endif
+ const uint64_t *kptr = (uint64_t *)ctx->nhkey;
+ int i, remaining;
+
+ remaining = mbytes % VMAC_NHBYTES;
+ i = mbytes-remaining;
+ mptr = (uint64_t *)(m+i);
+ if (i) vhash_update(m,i,ctx);
+
+ ch = ctx->polytmp[0];
+ cl = ctx->polytmp[1];
+ #if (VMAC_TAG_LEN == 128)
+ ch2 = ctx->polytmp[2];
+ cl2 = ctx->polytmp[3];
+ #endif
+
+ if (remaining) {
+ #if (VMAC_TAG_LEN == 128)
+ nh_16_2(mptr,kptr,2*((remaining+15)/16),rh,rl,rh2,rl2);
+ rh2 &= m62;
+ #else
+ nh_16(mptr,kptr,2*((remaining+15)/16),rh,rl);
+ #endif
+ rh &= m62;
+ if (i) {
+ poly_step(ch,cl,ctx->polykey[0],ctx->polykey[1],rh,rl);
+ #if (VMAC_TAG_LEN == 128)
+ poly_step(ch2,cl2,ctx->polykey[2],ctx->polykey[3],rh2,rl2);
+ #endif
+ } else {
+ ADD128(ch,cl,rh,rl);
+ #if (VMAC_TAG_LEN == 128)
+ ADD128(ch2,cl2,rh2,rl2);
+ #endif
+ }
+ }
+
+ #if VMAC_USE_SSE2
+ _mm_empty(); /* SSE2 version of poly_step uses mmx instructions */
+ #endif
+ vhash_abort(ctx);
+ remaining *= 8;
+#if (VMAC_TAG_LEN == 128)
+ *tagl = l3hash(ch2, cl2, ctx->l3key[2], ctx->l3key[3],remaining);
+#endif
+ return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1],remaining);
+}
+
+uint64_t vhash(unsigned char m[],
+ unsigned int mbytes,
+ uint64_t *tagl,
+ vmac_ctx_t *ctx)
+{
+ uint64_t rh, rl, *mptr;
+ const uint64_t *kptr = (uint64_t *)ctx->nhkey;
+ int i, remaining;
+ uint64_t ch, cl;
+ uint64_t pkh = ctx->polykey[0];
+ uint64_t pkl = ctx->polykey[1];
+ #if (VMAC_TAG_LEN == 128)
+ uint64_t ch2, cl2, rh2, rl2;
+ uint64_t pkh2 = ctx->polykey[2];
+ uint64_t pkl2 = ctx->polykey[3];
+ #endif
+
+ mptr = (uint64_t *)m;
+ i = mbytes / VMAC_NHBYTES;
+ remaining = mbytes % VMAC_NHBYTES;
+
+ if (ctx->first_block_processed)
+ {
+ ch = ctx->polytmp[0];
+ cl = ctx->polytmp[1];
+ #if (VMAC_TAG_LEN == 128)
+ ch2 = ctx->polytmp[2];
+ cl2 = ctx->polytmp[3];
+ #endif
+ }
+ else if (i)
+ {
+ #if (VMAC_TAG_LEN == 64)
+ nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,ch,cl);
+ #else
+ nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,ch,cl,ch2,cl2);
+ ch2 &= m62;
+ ADD128(ch2,cl2,pkh2,pkl2);
+ #endif
+ ch &= m62;
+ ADD128(ch,cl,pkh,pkl);
+ mptr += (VMAC_NHBYTES/sizeof(uint64_t));
+ i--;
+ }
+ else if (remaining)
+ {
+ #if (VMAC_TAG_LEN == 64)
+ nh_16(mptr,kptr,2*((remaining+15)/16),ch,cl);
+ #else
+ nh_16_2(mptr,kptr,2*((remaining+15)/16),ch,cl,ch2,cl2);
+ ch2 &= m62;
+ ADD128(ch2,cl2,pkh2,pkl2);
+ #endif
+ ch &= m62;
+ ADD128(ch,cl,pkh,pkl);
+ mptr += (VMAC_NHBYTES/sizeof(uint64_t));
+ goto do_l3;
+ }
+ else /* Empty String */
+ {
+ ch = pkh; cl = pkl;
+ #if (VMAC_TAG_LEN == 128)
+ ch2 = pkh2; cl2 = pkl2;
+ #endif
+ goto do_l3;
+ }
+
+ while (i--) {
+ #if (VMAC_TAG_LEN == 64)
+ nh_vmac_nhbytes(mptr,kptr,VMAC_NHBYTES/8,rh,rl);
+ #else
+ nh_vmac_nhbytes_2(mptr,kptr,VMAC_NHBYTES/8,rh,rl,rh2,rl2);
+ rh2 &= m62;
+ poly_step(ch2,cl2,pkh2,pkl2,rh2,rl2);
+ #endif
+ rh &= m62;
+ poly_step(ch,cl,pkh,pkl,rh,rl);
+ mptr += (VMAC_NHBYTES/sizeof(uint64_t));
+ }
+ if (remaining) {
+ #if (VMAC_TAG_LEN == 64)
+ nh_16(mptr,kptr,2*((remaining+15)/16),rh,rl);
+ #else
+ nh_16_2(mptr,kptr,2*((remaining+15)/16),rh,rl,rh2,rl2);
+ rh2 &= m62;
+ poly_step(ch2,cl2,pkh2,pkl2,rh2,rl2);
+ #endif
+ rh &= m62;
+ poly_step(ch,cl,pkh,pkl,rh,rl);
+ }
+
+do_l3:
+ #if VMAC_USE_SSE2
+ _mm_empty(); /* SSE2 version of poly_step uses mmx instructions */
+ #endif
+ vhash_abort(ctx);
+ remaining *= 8;
+#if (VMAC_TAG_LEN == 128)
+ *tagl = l3hash(ch2, cl2, ctx->l3key[2], ctx->l3key[3],remaining);
+#endif
+ return l3hash(ch, cl, ctx->l3key[0], ctx->l3key[1],remaining);
+}
+
+/* ----------------------------------------------------------------------- */
+
+uint64_t vmac(unsigned char m[],
+ unsigned int mbytes,
+ unsigned char n[16],
+ uint64_t *tagl,
+ vmac_ctx_t *ctx)
+{
+#if (VMAC_TAG_LEN == 64)
+ uint64_t *in_n, *out_p;
+ uint64_t p, h;
+ int i;
+
+ #if VMAC_CACHE_NONCES
+ in_n = ctx->cached_nonce;
+ out_p = ctx->cached_aes;
+ #else
+ uint64_t tmp[2];
+ in_n = out_p = tmp;
+ #endif
+
+ i = n[15] & 1;
+ #if VMAC_CACHE_NONCES
+ if ((*(uint64_t *)(n+8) != in_n[1]) ||
+ (*(uint64_t *)(n ) != in_n[0])) {
+ #endif
+
+ in_n[0] = *(uint64_t *)(n );
+ in_n[1] = *(uint64_t *)(n+8);
+ ((unsigned char *)in_n)[15] &= 0xFE;
+ aes_encryption(in_n, out_p, &ctx->cipher_key);
+
+ #if VMAC_CACHE_NONCES
+ ((unsigned char *)in_n)[15] |= (unsigned char)(1-i);
+ }
+ #endif
+ p = get64BE(out_p + i);
+ h = vhash(m, mbytes, (uint64_t *)0, ctx);
+ return p + h;
+#else
+ uint64_t tmp[2];
+ uint64_t th,tl;
+ aes_encryption(n, (unsigned char *)tmp, &ctx->cipher_key);
+ th = vhash(m, mbytes, &tl, ctx);
+ th += get64BE(tmp);
+ *tagl = tl + get64BE(tmp+1);
+ return th;
+#endif
+}
+
+/* ----------------------------------------------------------------------- */
+
+void vmac_set_key(unsigned char user_key[], vmac_ctx_t *ctx)
+{
+ uint64_t in[2] = {0}, out[2];
+ unsigned i;
+ aes_key_setup(user_key, &ctx->cipher_key);
+
+ /* Fill nh key */
+ ((unsigned char *)in)[0] = 0x80;
+ for (i = 0; i < sizeof(ctx->nhkey)/8; i+=2) {
+ aes_encryption((unsigned char *)in, (unsigned char *)out,
+ &ctx->cipher_key);
+ ctx->nhkey[i ] = get64BE(out);
+ ctx->nhkey[i+1] = get64BE(out+1);
+ ((unsigned char *)in)[15] += 1;
+ }
+
+ /* Fill poly key */
+ ((unsigned char *)in)[0] = 0xC0;
+ in[1] = 0;
+ for (i = 0; i < sizeof(ctx->polykey)/8; i+=2) {
+ aes_encryption((unsigned char *)in, (unsigned char *)out,
+ &ctx->cipher_key);
+ ctx->polytmp[i ] = ctx->polykey[i ] = get64BE(out) & mpoly;
+ ctx->polytmp[i+1] = ctx->polykey[i+1] = get64BE(out+1) & mpoly;
+ ((unsigned char *)in)[15] += 1;
+ }
+
+ /* Fill ip key */
+ ((unsigned char *)in)[0] = 0xE0;
+ in[1] = 0;
+ for (i = 0; i < sizeof(ctx->l3key)/8; i+=2) {
+ do {
+ aes_encryption((unsigned char *)in, (unsigned char *)out,
+ &ctx->cipher_key);
+ ctx->l3key[i ] = get64BE(out);
+ ctx->l3key[i+1] = get64BE(out+1);
+ ((unsigned char *)in)[15] += 1;
+ } while (ctx->l3key[i] >= p64 || ctx->l3key[i+1] >= p64);
+ }
+
+ /* Invalidate nonce/aes cache and reset other elements */
+ #if (VMAC_TAG_LEN == 64) && (VMAC_CACHE_NONCES)
+ ctx->cached_nonce[0] = (uint64_t)-1; /* Ensure illegal nonce */
+ ctx->cached_nonce[1] = (uint64_t)0; /* Ensure illegal nonce */
+ #endif
+ ctx->first_block_processed = 0;
+}
+
+/* ----------------------------------------------------------------------- */
+
+
+#if VMAC_RUN_TESTS
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+#include <string.h>
+
+unsigned prime(void) /* Wake variable speed cpu, get rough speed estimate */
+{
+ volatile uint64_t i;
+ volatile uint64_t j=1;
+ unsigned cnt=0;
+ volatile clock_t ticks = clock();
+ do {
+ for (i = 0; i < 500000; i++) {
+ uint64_t x = get64PE(&j);
+ j = x * x + (uint64_t)ticks;
+ }
+ cnt++;
+ } while (clock() - ticks < (CLOCKS_PER_SEC/2));
+ return cnt; /* cnt is millions of iterations per second */
+}
+
+int main(void)
+{
+ ALIGN(16) vmac_ctx_t ctx, ctx_aio, ctx_inc1, ctx_inc2;
+ uint64_t res, tagl;
+ void *p;
+ unsigned char *m;
+ ALIGN(4) unsigned char key[] = "abcdefghijklmnop";
+ ALIGN(4) unsigned char nonce[] = "\0\0\0\0\0\0\0\0bcdefghi";
+ unsigned int vector_lengths[] = {0,3,48,300,3000000};
+ #if (VMAC_TAG_LEN == 64)
+ ALIGN(4) char *should_be[] = {"2576BE1C56D8B81B","2D376CF5B1813CE5",
+ "E8421F61D573D298","4492DF6C5CAC1BBE",
+ "09BA597DD7601113"};
+ #else
+ ALIGN(4) char *should_be[] = {"472766C70F74ED23481D6D7DE4E80DAC",
+ "4EE815A06A1D71EDD36FC75D51188A42",
+ "09F2C80C8E1007A0C12FAE19FE4504AE",
+ "66438817154850C61D8A412164803BCB",
+ "2B6B02288FFC461B75485DE893C629DC"};
+ #endif
+ unsigned speed_lengths[] = {16, 32, 64, 128, 256, 512, 1024, 2048, 4096};
+ unsigned i, j, *speed_iters;
+ clock_t ticks;
+ double cpb;
+ const unsigned int buf_len = 3 * (1 << 20);
+
+ j = prime();
+ i = sizeof(speed_lengths)/sizeof(speed_lengths[0]);
+ speed_iters = (unsigned *)malloc(i*sizeof(speed_iters[0]));
+ speed_iters[i-1] = j * (1 << 12);
+ while (--i) speed_iters[i-1] = (unsigned)(1.3 * speed_iters[i]);
+
+ /* Initialize context and message buffer, all 16-byte aligned */
+ p = malloc(buf_len + 32);
+ m = (unsigned char *)(((size_t)p + 16) & ~((size_t)15));
+ memset(m, 0, buf_len + 16);
+ vmac_set_key(key, &ctx);
+
+ /* Test incremental and all-in-one interfaces for correctness */
+ vmac_set_key(key, &ctx_aio);
+ vmac_set_key(key, &ctx_inc1);
+ vmac_set_key(key, &ctx_inc2);
+
+
+ /*
+ for (i = 0; i <= 512; i++) {
+ vhash_update(m,(i/VMAC_NHBYTES)*VMAC_NHBYTES,&ctx_inc1);
+ tagh = vmac(m+(i/VMAC_NHBYTES)*VMAC_NHBYTES, i%VMAC_NHBYTES,
+ nonce, &tagl, &ctx);
+ vhash_update(m,(i/VMAC_NHBYTES)*VMAC_NHBYTES,&ctx_inc1);
+ for (j = 0; j < vector_lengths[i]; j++)
+ m[j] = (unsigned char)('a'+j%3);
+
+ }
+ */
+
+ /* Generate vectors */
+ for (i = 0; i < sizeof(vector_lengths)/sizeof(unsigned int); i++) {
+ for (j = 0; j < vector_lengths[i]; j++)
+ m[j] = (unsigned char)('a'+j%3);
+ res = vmac(m, vector_lengths[i], nonce, &tagl, &ctx);
+ #if (VMAC_TAG_LEN == 64)
+ printf("\'abc\' * %7u: %016llX Should be: %s\n",
+ vector_lengths[i]/3,res,should_be[i]);
+ #else
+ printf("\'abc\' * %7u: %016llX%016llX\nShould be : %s\n",
+ vector_lengths[i]/3,res,tagl,should_be[i]);
+ #endif
+ }
+
+ /* Speed test */
+ for (i = 0; i < sizeof(speed_lengths)/sizeof(unsigned int); i++) {
+ ticks = clock();
+ for (j = 0; j < speed_iters[i]; j++) {
+ #if HASH_ONLY
+ res = vhash(m, speed_lengths[i], &tagl, &ctx);
+ #else
+ res = vmac(m, speed_lengths[i], nonce, &tagl, &ctx);
+ nonce[7]++;
+ #endif
+ }
+ ticks = clock() - ticks;
+ cpb = ((ticks*VMAC_HZ)/
+ ((double)CLOCKS_PER_SEC*speed_lengths[i]*speed_iters[i]));
+ printf("%4u bytes, %2.2f cpb\n", speed_lengths[i], cpb);
+ }
+ return 1;
+}
+
+#endif
subdir-y += char
subdir-y += cpufreq
subdir-y += pci
-subdir-$(x86) += passthrough
+subdir-y += passthrough
subdir-$(HAS_ACPI) += acpi
subdir-$(HAS_VGA) += video
extern int pmstat_get_cx_stat(uint32_t cpuid, struct pm_cx_stat *stat);
extern int pmstat_reset_cx_stat(uint32_t cpuid);
+extern struct list_head cpufreq_governor_list;
+
+/*
+ * Get PM statistic info
+ */
int do_get_pm_info(struct xen_sysctl_get_pmstat *op)
{
int ret = 0;
const struct processor_pminfo *pmpt;
- if ( (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) )
+ if ( !op || (op->cpuid >= NR_CPUS) || !cpu_online(op->cpuid) )
return -EINVAL;
pmpt = processor_pminfo[op->cpuid];
case PMSTAT_get_pxstat:
{
- uint64_t now, ct;
- uint64_t total_idle_ns;
- uint64_t tmp_idle_ns;
+ uint32_t ct;
struct pm_px *pxpt = cpufreq_statistic_data[op->cpuid];
+ spinlock_t *cpufreq_statistic_lock =
+ &per_cpu(cpufreq_statistic_lock, op->cpuid);
- if ( !pxpt )
- return -ENODATA;
+ spin_lock(cpufreq_statistic_lock);
- total_idle_ns = get_cpu_idle_time(op->cpuid);
- tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
+ if ( !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt )
+ {
+ spin_unlock(cpufreq_statistic_lock);
+ return -ENODATA;
+ }
- now = NOW();
pxpt->u.usable = pmpt->perf.state_count - pmpt->perf.platform_limit;
- pxpt->u.pt[pxpt->u.cur].residency += now - pxpt->prev_state_wall;
- pxpt->u.pt[pxpt->u.cur].residency -= tmp_idle_ns;
- pxpt->prev_state_wall = now;
- pxpt->prev_idle_wall = total_idle_ns;
+
+ cpufreq_residency_update(op->cpuid, pxpt->u.cur);
ct = pmpt->perf.state_count;
if ( copy_to_guest(op->u.getpx.trans_pt, pxpt->u.trans_pt, ct*ct) )
{
+ spin_unlock(cpufreq_statistic_lock);
ret = -EFAULT;
break;
}
if ( copy_to_guest(op->u.getpx.pt, pxpt->u.pt, ct) )
{
+ spin_unlock(cpufreq_statistic_lock);
ret = -EFAULT;
break;
}
op->u.getpx.last = pxpt->u.last;
op->u.getpx.cur = pxpt->u.cur;
+ spin_unlock(cpufreq_statistic_lock);
+
break;
}
return ret;
}
+
+/*
+ * 1. Get PM parameter
+ * 2. Provide user PM control
+ */
+static int read_scaling_available_governors(char *scaling_available_governors,
+ unsigned int size)
+{
+ unsigned int i = 0;
+ struct cpufreq_governor *t;
+
+ if ( !scaling_available_governors )
+ return -EINVAL;
+
+ list_for_each_entry(t, &cpufreq_governor_list, governor_list)
+ {
+ i += scnprintf(&scaling_available_governors[i],
+ CPUFREQ_NAME_LEN, "%s ", t->name);
+ if ( i > size )
+ return -EINVAL;
+ }
+ scaling_available_governors[i-1] = '\0';
+
+ return 0;
+}
+
+static int get_cpufreq_para(struct xen_sysctl_pm_op *op)
+{
+ uint32_t ret = 0;
+ const struct processor_pminfo *pmpt;
+ struct cpufreq_policy *policy;
+ uint32_t gov_num = 0;
+ uint32_t *affected_cpus;
+ uint32_t *scaling_available_frequencies;
+ char *scaling_available_governors;
+ struct list_head *pos;
+ uint32_t cpu, i, j = 0;
+
+ if ( !op || !cpu_online(op->cpuid) )
+ return -EINVAL;
+ pmpt = processor_pminfo[op->cpuid];
+ policy = cpufreq_cpu_policy[op->cpuid];
+
+ if ( !pmpt || !pmpt->perf.states ||
+ !policy || !policy->governor )
+ return -EINVAL;
+
+ list_for_each(pos, &cpufreq_governor_list)
+ gov_num++;
+
+ if ( (op->get_para.cpu_num != cpus_weight(policy->cpus)) ||
+ (op->get_para.freq_num != pmpt->perf.state_count) ||
+ (op->get_para.gov_num != gov_num) )
+ {
+ op->get_para.cpu_num = cpus_weight(policy->cpus);
+ op->get_para.freq_num = pmpt->perf.state_count;
+ op->get_para.gov_num = gov_num;
+ return -EAGAIN;
+ }
+
+ if ( !(affected_cpus = xmalloc_array(uint32_t, op->get_para.cpu_num)) )
+ return -ENOMEM;
+ memset(affected_cpus, 0, op->get_para.cpu_num * sizeof(uint32_t));
+ for_each_cpu_mask(cpu, policy->cpus)
+ affected_cpus[j++] = cpu;
+ ret = copy_to_guest(op->get_para.affected_cpus,
+ affected_cpus, op->get_para.cpu_num);
+ xfree(affected_cpus);
+ if ( ret )
+ return ret;
+
+ if ( !(scaling_available_frequencies =
+ xmalloc_array(uint32_t, op->get_para.freq_num)) )
+ return -ENOMEM;
+ memset(scaling_available_frequencies, 0,
+ op->get_para.freq_num * sizeof(uint32_t));
+ for ( i = 0; i < op->get_para.freq_num; i++ )
+ scaling_available_frequencies[i] =
+ pmpt->perf.states[i].core_frequency * 1000;
+ ret = copy_to_guest(op->get_para.scaling_available_frequencies,
+ scaling_available_frequencies, op->get_para.freq_num);
+ xfree(scaling_available_frequencies);
+ if ( ret )
+ return ret;
+
+ if ( !(scaling_available_governors =
+ xmalloc_array(char, gov_num * CPUFREQ_NAME_LEN)) )
+ return -ENOMEM;
+ memset(scaling_available_governors, 0,
+ gov_num * CPUFREQ_NAME_LEN * sizeof(char));
+ if ( (ret = read_scaling_available_governors(scaling_available_governors,
+ gov_num * CPUFREQ_NAME_LEN * sizeof(char))) )
+ {
+ xfree(scaling_available_governors);
+ return ret;
+ }
+ ret = copy_to_guest(op->get_para.scaling_available_governors,
+ scaling_available_governors, gov_num * CPUFREQ_NAME_LEN);
+ xfree(scaling_available_governors);
+ if ( ret )
+ return ret;
+
+ op->get_para.cpuinfo_cur_freq =
+ cpufreq_driver->get ? cpufreq_driver->get(op->cpuid) : policy->cur;
+ op->get_para.cpuinfo_max_freq = policy->cpuinfo.max_freq;
+ op->get_para.cpuinfo_min_freq = policy->cpuinfo.min_freq;
+ op->get_para.scaling_cur_freq = policy->cur;
+ op->get_para.scaling_max_freq = policy->max;
+ op->get_para.scaling_min_freq = policy->min;
+
+ if ( cpufreq_driver->name )
+ strlcpy(op->get_para.scaling_driver,
+ cpufreq_driver->name, CPUFREQ_NAME_LEN);
+ else
+ strlcpy(op->get_para.scaling_driver, "Unknown", CPUFREQ_NAME_LEN);
+
+ if ( policy->governor->name )
+ strlcpy(op->get_para.scaling_governor,
+ policy->governor->name, CPUFREQ_NAME_LEN);
+ else
+ strlcpy(op->get_para.scaling_governor, "Unknown", CPUFREQ_NAME_LEN);
+
+ /* governor specific para */
+ if ( !strnicmp(op->get_para.scaling_governor,
+ "userspace", CPUFREQ_NAME_LEN) )
+ {
+ op->get_para.u.userspace.scaling_setspeed = policy->cur;
+ }
+
+ if ( !strnicmp(op->get_para.scaling_governor,
+ "ondemand", CPUFREQ_NAME_LEN) )
+ {
+ ret = get_cpufreq_ondemand_para(
+ &op->get_para.u.ondemand.sampling_rate_max,
+ &op->get_para.u.ondemand.sampling_rate_min,
+ &op->get_para.u.ondemand.sampling_rate,
+ &op->get_para.u.ondemand.up_threshold);
+ }
+
+ return ret;
+}
+
+static int set_cpufreq_gov(struct xen_sysctl_pm_op *op)
+{
+ struct cpufreq_policy new_policy, *old_policy;
+
+ if ( !op || !cpu_online(op->cpuid) )
+ return -EINVAL;
+
+ old_policy = cpufreq_cpu_policy[op->cpuid];
+ if ( !old_policy )
+ return -EINVAL;
+
+ memcpy(&new_policy, old_policy, sizeof(struct cpufreq_policy));
+
+ new_policy.governor = __find_governor(op->set_gov.scaling_governor);
+ if (new_policy.governor == NULL)
+ return -EINVAL;
+
+ return __cpufreq_set_policy(old_policy, &new_policy);
+}
+
+static int set_cpufreq_para(struct xen_sysctl_pm_op *op)
+{
+ int ret = 0;
+ struct cpufreq_policy *policy;
+
+ if ( !op || !cpu_online(op->cpuid) )
+ return -EINVAL;
+ policy = cpufreq_cpu_policy[op->cpuid];
+
+ if ( !policy || !policy->governor )
+ return -EINVAL;
+
+ switch(op->set_para.ctrl_type)
+ {
+ case SCALING_MAX_FREQ:
+ {
+ struct cpufreq_policy new_policy;
+
+ memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
+ new_policy.max = op->set_para.ctrl_value;
+ ret = __cpufreq_set_policy(policy, &new_policy);
+
+ break;
+ }
+
+ case SCALING_MIN_FREQ:
+ {
+ struct cpufreq_policy new_policy;
+
+ memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
+ new_policy.min = op->set_para.ctrl_value;
+ ret = __cpufreq_set_policy(policy, &new_policy);
+
+ break;
+ }
+
+ case SCALING_SETSPEED:
+ {
+ unsigned int freq =op->set_para.ctrl_value;
+
+ if ( !strnicmp(policy->governor->name,
+ "userspace", CPUFREQ_NAME_LEN) )
+ ret = write_userspace_scaling_setspeed(op->cpuid, freq);
+ else
+ ret = -EINVAL;
+
+ break;
+ }
+
+ case SAMPLING_RATE:
+ {
+ unsigned int sampling_rate = op->set_para.ctrl_value;
+
+ if ( !strnicmp(policy->governor->name,
+ "ondemand", CPUFREQ_NAME_LEN) )
+ ret = write_ondemand_sampling_rate(sampling_rate);
+ else
+ ret = -EINVAL;
+
+ break;
+ }
+
+ case UP_THRESHOLD:
+ {
+ unsigned int up_threshold = op->set_para.ctrl_value;
+
+ if ( !strnicmp(policy->governor->name,
+ "ondemand", CPUFREQ_NAME_LEN) )
+ ret = write_ondemand_up_threshold(up_threshold);
+ else
+ ret = -EINVAL;
+
+ break;
+ }
+
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+static int get_cpufreq_avgfreq(struct xen_sysctl_pm_op *op)
+{
+ if ( !op || !cpu_online(op->cpuid) )
+ return -EINVAL;
+
+ op->get_avgfreq = cpufreq_driver_getavg(op->cpuid, USR_GETAVG);
+
+ return 0;
+}
+
+static int get_cputopo (struct xen_sysctl_pm_op *op)
+{
+ uint32_t i, nr_cpus;
+ XEN_GUEST_HANDLE_64(uint32) cpu_to_core_arr;
+ XEN_GUEST_HANDLE_64(uint32) cpu_to_socket_arr;
+ int arr_size, ret=0;
+
+ cpu_to_core_arr = op->get_topo.cpu_to_core;
+ cpu_to_socket_arr = op->get_topo.cpu_to_socket;
+ arr_size= min_t(uint32_t, op->get_topo.max_cpus, NR_CPUS);
+
+ if ( guest_handle_is_null( cpu_to_core_arr ) ||
+ guest_handle_is_null( cpu_to_socket_arr) )
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ nr_cpus = 0;
+ for ( i = 0; i < arr_size; i++ )
+ {
+ uint32_t core, socket;
+ if ( cpu_online(i) )
+ {
+ core = cpu_to_core(i);
+ socket = cpu_to_socket(i);
+ nr_cpus = i;
+ }
+ else
+ {
+ core = socket = INVALID_TOPOLOGY_ID;
+ }
+
+ if ( copy_to_guest_offset(cpu_to_core_arr, i, &core, 1) ||
+ copy_to_guest_offset(cpu_to_socket_arr, i, &socket, 1))
+ {
+ ret = -EFAULT;
+ goto out;
+ }
+ }
+
+ op->get_topo.nr_cpus = nr_cpus + 1;
+out:
+ return ret;
+}
+
+int do_pm_op(struct xen_sysctl_pm_op *op)
+{
+ int ret = 0;
+ const struct processor_pminfo *pmpt;
+
+ if ( !op || !cpu_online(op->cpuid) )
+ return -EINVAL;
+ pmpt = processor_pminfo[op->cpuid];
+
+ switch ( op->cmd & PM_PARA_CATEGORY_MASK )
+ {
+ case CPUFREQ_PARA:
+ if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
+ return -ENODEV;
+ if ( !pmpt || !(pmpt->perf.init & XEN_PX_INIT) )
+ return -EINVAL;
+ break;
+ }
+
+ switch ( op->cmd )
+ {
+ case GET_CPUFREQ_PARA:
+ {
+ ret = get_cpufreq_para(op);
+ break;
+ }
+
+ case SET_CPUFREQ_GOV:
+ {
+ ret = set_cpufreq_gov(op);
+ break;
+ }
+
+ case SET_CPUFREQ_PARA:
+ {
+ ret = set_cpufreq_para(op);
+ break;
+ }
+
+ case GET_CPUFREQ_AVGFREQ:
+ {
+ ret = get_cpufreq_avgfreq(op);
+ break;
+ }
+
+ case XEN_SYSCTL_pm_op_get_cputopo:
+ {
+ ret = get_cputopo(op);
+ break;
+ }
+
+ case XEN_SYSCTL_pm_op_set_sched_opt_smt:
+ {
+ uint32_t saved_value;
+
+ saved_value = sched_smt_power_savings;
+ sched_smt_power_savings = !!op->set_sched_opt_smt;
+ op->set_sched_opt_smt = saved_value;
+
+ break;
+ }
+
+ default:
+ printk("not defined sub-hypercall @ do_pm_op\n");
+ ret = -ENOSYS;
+ break;
+ }
+
+ return ret;
+}
* on a device on bus 0. */
switch (rr->space_id) {
case ACPI_ADR_SPACE_PCI_CONFIG:
- printk("Resetting with ACPI PCI RESET_REG.");
+ printk("Resetting with ACPI PCI RESET_REG.\n");
/* Write the value that resets us. */
pci_conf_write8(0,
(rr->address >> 32) & 31,
break;
case ACPI_ADR_SPACE_SYSTEM_MEMORY:
case ACPI_ADR_SPACE_SYSTEM_IO:
- printk("ACPI MEMORY or I/O RESET_REG.");
+ printk("Resetting with ACPI MEMORY or I/O RESET_REG.\n");
acpi_hw_low_level_write(8, reset_value, rr);
break;
}
__serial_rx(c, regs);
}
+static void notify_dom0_con_ring(unsigned long unused)
+{
+ send_guest_global_virq(dom0, VIRQ_CON_RING);
+}
+static DECLARE_TASKLET(notify_dom0_con_ring_tasklet, notify_dom0_con_ring, 0);
+
static long guest_console_write(XEN_GUEST_HANDLE(char) buffer, int count)
{
char kbuf[128], *kptr;
{
for ( kptr = kbuf; *kptr != '\0'; kptr++ )
putchar_console_ring(*kptr);
- send_guest_global_virq(dom0, VIRQ_CON_RING);
+ tasklet_schedule(¬ify_dom0_con_ring_tasklet);
}
spin_unlock_irq(&console_lock);
* *****************************************************
*/
+static bool_t console_locks_busted;
+
static void __putstr(const char *str)
{
int c;
sercon_puts(str);
vga_puts(str);
- while ( (c = *str++) != '\0' )
- putchar_console_ring(c);
-
- send_guest_global_virq(dom0, VIRQ_CON_RING);
+ if ( !console_locks_busted )
+ {
+ while ( (c = *str++) != '\0' )
+ putchar_console_ring(c);
+ tasklet_schedule(¬ify_dom0_con_ring_tasklet);
+ }
}
static int printk_prefix_check(char *p, char **pp)
{
spin_lock_init(&console_lock);
serial_force_unlock(sercon_handle);
+ console_locks_busted = 1;
console_start_sync();
}
return 0;
order = get_order_from_bytes(bytes);
- debugtrace_buf = alloc_xenheap_pages(order);
+ debugtrace_buf = alloc_xenheap_pages(order, 0);
ASSERT(debugtrace_buf != NULL);
memset(debugtrace_buf, '\0', bytes);
console_start_sync();
printk("\n****************************************\n");
printk("Panic on CPU %d:\n", smp_processor_id());
- printk(buf);
+ printk("%s", buf);
printk("****************************************\n\n");
if ( opt_noreboot )
printk("Manual reset required ('noreboot' specified)\n");
#include <asm/io.h>
/*
- * Configure serial port with a string <baud>,DPS,<io-base>,<irq>.
+ * Configure serial port with a string:
+ * <baud>[/<clock_hz>][,DPS[,<io-base>[,<irq>]]].
* The tail of the string can be omitted if platform defaults are sufficient.
* If the baud rate is pre-configured, perhaps by a bootloader, then 'auto'
- * can be specified in place of a numeric baud rate.
+ * can be specified in place of a numeric baud rate. Polled mode is specified
+ * by requesting irq 0.
*/
static char opt_com1[30] = "", opt_com2[30] = "";
string_param("com1", opt_com1);
string_param("com2", opt_com2);
static struct ns16550 {
- int baud, data_bits, parity, stop_bits, irq;
+ int baud, clock_hz, data_bits, parity, stop_bits, irq;
unsigned long io_base; /* I/O port or memory-mapped I/O address. */
char *remapped_io_base; /* Remapped virtual address of mmap I/O. */
/* UART with IRQ line: interrupt-driven I/O. */
if ( uart->baud != BAUD_AUTO )
{
/* Baud rate specified: program it into the divisor latch. */
- divisor = UART_CLOCK_HZ / (uart->baud * 16);
+ divisor = uart->clock_hz / (uart->baud << 4);
ns_write_reg(uart, DLL, (char)divisor);
ns_write_reg(uart, DLM, (char)(divisor >> 8));
}
/* Baud rate already set: read it out from the divisor latch. */
divisor = ns_read_reg(uart, DLL);
divisor |= ns_read_reg(uart, DLM) << 8;
- uart->baud = UART_CLOCK_HZ / (divisor * 16);
+ uart->baud = uart->clock_hz / (divisor << 4);
}
ns_write_reg(uart, LCR, lcr);
{
unsigned char status, scratch, scratch2, scratch3;
+ /*
+ * We can't poke MMIO UARTs until they get I/O remapped later. Assume that
+ * if we're getting MMIO UARTs, the arch code knows what it's doing.
+ */
+ if ( uart->io_base >= 0x10000 )
+ return 1;
+
/*
* Do a simple existence test first; if we fail this,
* there's no point trying anything else.
else if ( (baud = simple_strtoul(conf, &conf, 10)) != 0 )
uart->baud = baud;
+ if ( *conf == '/')
+ {
+ conf++;
+ uart->clock_hz = simple_strtoul(conf, &conf, 0) << 4;
+ }
+
if ( *conf != ',' )
goto config_parsed;
conf++;
uart->baud = (defaults->baud ? :
console_has((index == 0) ? "com1" : "com2")
? BAUD_AUTO : 0);
+ uart->clock_hz = UART_CLOCK_HZ;
uart->data_bits = defaults->data_bits;
uart->parity = parse_parity_char(defaults->parity);
uart->stop_bits = defaults->stop_bits;
while ( !spin_trylock(&port->tx_lock) )
{
if ( !port->driver->tx_empty(port) )
- return;
+ goto out;
cpu_relax();
}
}
}
- spin_unlock_irqrestore(&port->tx_lock, flags);
+ spin_unlock(&port->tx_lock);
+
+ out:
+ local_irq_restore(flags);
}
static void __serial_putc(struct serial_port *port, char c)
int i, irq;
for ( i = 0; i < ARRAY_SIZE(com); i++ )
if ( (irq = serial_irq(i)) >= 0 )
- free_irq(irq);
+ release_irq(irq);
}
void serial_resume(void)
BUG_ON(!port->driver->tx_empty);
if ( port->txbuf == NULL )
port->txbuf = alloc_xenheap_pages(
- get_order_from_bytes(serial_txbufsz));
+ get_order_from_bytes(serial_txbufsz), 0);
}
/*
obj-y += cpufreq.o
obj-y += cpufreq_ondemand.o
+obj-y += cpufreq_misc_governors.o
obj-y += utility.o
#include <xen/errno.h>
#include <xen/delay.h>
#include <xen/cpumask.h>
+#include <xen/list.h>
#include <xen/sched.h>
+#include <xen/string.h>
#include <xen/timer.h>
#include <xen/xmalloc.h>
+#include <xen/guest_access.h>
#include <xen/domain.h>
#include <asm/bug.h>
#include <asm/io.h>
#include <acpi/acpi.h>
#include <acpi/cpufreq/cpufreq.h>
-/* TODO: change to link list later as domain number may be sparse */
-static cpumask_t cpufreq_dom_map[NR_CPUS];
+static unsigned int usr_max_freq, usr_min_freq;
+static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy);
+
+struct cpufreq_dom {
+ unsigned int dom;
+ cpumask_t map;
+ struct list_head node;
+};
+static LIST_HEAD(cpufreq_dom_list_head);
+
+struct cpufreq_governor *cpufreq_opt_governor;
+LIST_HEAD(cpufreq_governor_list);
+
+struct cpufreq_governor *__find_governor(const char *governor)
+{
+ struct cpufreq_governor *t;
+
+ if (!governor)
+ return NULL;
+
+ list_for_each_entry(t, &cpufreq_governor_list, governor_list)
+ if (!strnicmp(governor, t->name, CPUFREQ_NAME_LEN))
+ return t;
+
+ return NULL;
+}
+
+int cpufreq_register_governor(struct cpufreq_governor *governor)
+{
+ if (!governor)
+ return -EINVAL;
+
+ if (__find_governor(governor->name) != NULL)
+ return -EEXIST;
+
+ list_add(&governor->governor_list, &cpufreq_governor_list);
+ return 0;
+}
+
+int cpufreq_unregister_governor(struct cpufreq_governor *governor)
+{
+ int cpu = smp_processor_id();
+ struct cpufreq_policy *policy = cpufreq_cpu_policy[cpu];
+
+ if (!governor || !policy)
+ return -EINVAL;
+
+ /* error if unregister current cpufreq governor */
+ if (governor == policy->governor)
+ return -EBUSY;
+
+ if (__find_governor(governor->name) == NULL)
+ return -ENOENT;
+
+ list_del(&governor->governor_list);
+ return 0;
+}
int cpufreq_limit_change(unsigned int cpu)
{
{
int ret = 0;
unsigned int firstcpu;
- unsigned int dom;
+ unsigned int dom, domexist = 0;
unsigned int j;
+ struct list_head *pos;
+ struct cpufreq_dom *cpufreq_dom = NULL;
struct cpufreq_policy new_policy;
struct cpufreq_policy *policy;
struct processor_performance *perf = &processor_pminfo[cpu]->perf;
/* to protect the case when Px was not controlled by xen */
- if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
- return 0;
-
- if (!cpu_online(cpu) || cpufreq_cpu_policy[cpu])
+ if (!processor_pminfo[cpu] ||
+ !(perf->init & XEN_PX_INIT) ||
+ !cpu_online(cpu))
return -EINVAL;
+ if (cpufreq_cpu_policy[cpu])
+ return 0;
+
ret = cpufreq_statistic_init(cpu);
if (ret)
return ret;
dom = perf->domain_info.domain;
- if (cpus_weight(cpufreq_dom_map[dom])) {
+
+ list_for_each(pos, &cpufreq_dom_list_head) {
+ cpufreq_dom = list_entry(pos, struct cpufreq_dom, node);
+ if (dom == cpufreq_dom->dom) {
+ domexist = 1;
+ break;
+ }
+ }
+
+ if (domexist) {
/* share policy with the first cpu since on same boat */
- firstcpu = first_cpu(cpufreq_dom_map[dom]);
+ firstcpu = first_cpu(cpufreq_dom->map);
policy = cpufreq_cpu_policy[firstcpu];
cpufreq_cpu_policy[cpu] = policy;
- cpu_set(cpu, cpufreq_dom_map[dom]);
+ cpu_set(cpu, cpufreq_dom->map);
cpu_set(cpu, policy->cpus);
+ /* domain coordination sanity check */
+ if ((perf->domain_info.coord_type !=
+ processor_pminfo[firstcpu]->perf.domain_info.coord_type) ||
+ (perf->domain_info.num_processors !=
+ processor_pminfo[firstcpu]->perf.domain_info.num_processors)) {
+ ret = -EINVAL;
+ goto err2;
+ }
+
printk(KERN_EMERG"adding CPU %u\n", cpu);
} else {
+ cpufreq_dom = xmalloc(struct cpufreq_dom);
+ if (!cpufreq_dom) {
+ cpufreq_statistic_exit(cpu);
+ return -ENOMEM;
+ }
+ memset(cpufreq_dom, 0, sizeof(struct cpufreq_dom));
+ cpufreq_dom->dom = dom;
+ cpu_set(cpu, cpufreq_dom->map);
+ list_add(&cpufreq_dom->node, &cpufreq_dom_list_head);
+
/* for the first cpu, setup policy and do init work */
policy = xmalloc(struct cpufreq_policy);
if (!policy) {
+ list_del(&cpufreq_dom->node);
+ xfree(cpufreq_dom);
cpufreq_statistic_exit(cpu);
return -ENOMEM;
}
memset(policy, 0, sizeof(struct cpufreq_policy));
-
- cpufreq_cpu_policy[cpu] = policy;
- cpu_set(cpu, cpufreq_dom_map[dom]);
+ policy->cpu = cpu;
cpu_set(cpu, policy->cpus);
+ cpufreq_cpu_policy[cpu] = policy;
- policy->cpu = cpu;
ret = cpufreq_driver->init(policy);
if (ret)
goto err1;
* After get full cpumap of the coordination domain,
* we can safely start gov here.
*/
- if (cpus_weight(cpufreq_dom_map[dom]) ==
+ if (cpus_weight(cpufreq_dom->map) ==
perf->domain_info.num_processors) {
memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
policy->governor = NULL;
+
+ cpufreq_cmdline_common_para(&new_policy);
+
ret = __cpufreq_set_policy(policy, &new_policy);
- if (ret)
- goto err2;
+ if (ret) {
+ if (new_policy.governor == CPUFREQ_DEFAULT_GOVERNOR)
+ /* if default governor fail, cpufreq really meet troubles */
+ goto err2;
+ else {
+ /* grub option governor fail */
+ /* give one more chance to default gov */
+ memcpy(&new_policy, policy, sizeof(struct cpufreq_policy));
+ new_policy.governor = CPUFREQ_DEFAULT_GOVERNOR;
+ ret = __cpufreq_set_policy(policy, &new_policy);
+ if (ret)
+ goto err2;
+ }
+ }
}
return 0;
err2:
cpufreq_driver->exit(policy);
err1:
- for_each_cpu_mask(j, cpufreq_dom_map[dom]) {
+ for_each_cpu_mask(j, cpufreq_dom->map) {
cpufreq_cpu_policy[j] = NULL;
cpufreq_statistic_exit(j);
}
- cpus_clear(cpufreq_dom_map[dom]);
+ list_del(&cpufreq_dom->node);
+ xfree(cpufreq_dom);
xfree(policy);
return ret;
}
int cpufreq_del_cpu(unsigned int cpu)
{
- unsigned int dom;
+ unsigned int dom, domexist = 0;
+ struct list_head *pos;
+ struct cpufreq_dom *cpufreq_dom = NULL;
struct cpufreq_policy *policy;
struct processor_performance *perf = &processor_pminfo[cpu]->perf;
/* to protect the case when Px was not controlled by xen */
- if (!processor_pminfo[cpu] || !(perf->init & XEN_PX_INIT))
- return 0;
-
- if (!cpu_online(cpu) || !cpufreq_cpu_policy[cpu])
+ if (!processor_pminfo[cpu] ||
+ !(perf->init & XEN_PX_INIT) ||
+ !cpu_online(cpu))
return -EINVAL;
+ if (!cpufreq_cpu_policy[cpu])
+ return 0;
+
dom = perf->domain_info.domain;
policy = cpufreq_cpu_policy[cpu];
- printk(KERN_EMERG"deleting CPU %u\n", cpu);
+ list_for_each(pos, &cpufreq_dom_list_head) {
+ cpufreq_dom = list_entry(pos, struct cpufreq_dom, node);
+ if (dom == cpufreq_dom->dom) {
+ domexist = 1;
+ break;
+ }
+ }
+
+ if (!domexist)
+ return -EINVAL;
/* for the first cpu of the domain, stop gov */
- if (cpus_weight(cpufreq_dom_map[dom]) ==
+ if (cpus_weight(cpufreq_dom->map) ==
perf->domain_info.num_processors)
__cpufreq_governor(policy, CPUFREQ_GOV_STOP);
cpufreq_cpu_policy[cpu] = NULL;
cpu_clear(cpu, policy->cpus);
- cpu_clear(cpu, cpufreq_dom_map[dom]);
+ cpu_clear(cpu, cpufreq_dom->map);
cpufreq_statistic_exit(cpu);
/* for the last cpu of the domain, clean room */
/* It's safe here to free freq_table, drv_data and policy */
- if (!cpus_weight(cpufreq_dom_map[dom])) {
+ if (!cpus_weight(cpufreq_dom->map)) {
cpufreq_driver->exit(policy);
+ list_del(&cpufreq_dom->node);
+ xfree(cpufreq_dom);
xfree(policy);
}
+ printk(KERN_EMERG"deleting CPU %u\n", cpu);
return 0;
}
+static void print_PCT(struct xen_pct_register *ptr)
+{
+ printk(KERN_INFO "\t_PCT: descriptor=%d, length=%d, space_id=%d, "
+ "bit_width=%d, bit_offset=%d, reserved=%d, address=%"PRId64"\n",
+ ptr->descriptor, ptr->length, ptr->space_id, ptr->bit_width,
+ ptr->bit_offset, ptr->reserved, ptr->address);
+}
+
static void print_PSS(struct xen_processor_px *ptr, int count)
{
int i;
- printk(KERN_INFO "\t_PSS:\n");
+ printk(KERN_INFO "\t_PSS: state_count=%d\n", count);
for (i=0; i<count; i++){
printk(KERN_INFO "\tState%d: %"PRId64"MHz %"PRId64"mW %"PRId64"us "
"%"PRId64"us 0x%"PRIx64" 0x%"PRIx64"\n",
ptr->num_processors);
}
+static void print_PPC(unsigned int platform_limit)
+{
+ printk(KERN_INFO "\t_PPC: %d\n", platform_limit);
+}
+
int set_px_pminfo(uint32_t acpi_id, struct xen_processor_performance *dom0_px_info)
{
int ret=0, cpuid;
struct processor_pminfo *pmpt;
struct processor_performance *pxpt;
- if ( !(xen_processor_pmbits & XEN_PROCESSOR_PM_PX) )
- {
- ret = -ENOSYS;
- goto out;
- }
-
cpuid = get_cpu_id(acpi_id);
- if ( cpuid < 0 )
+ if ( cpuid < 0 || !dom0_px_info)
{
ret = -EINVAL;
goto out;
if ( dom0_px_info->flags & XEN_PX_PCT )
{
+ /* space_id check */
+ if (dom0_px_info->control_register.space_id !=
+ dom0_px_info->status_register.space_id)
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+
+#ifdef CONFIG_IA64
+ /* for IA64, currently it only supports FFH */
+ if (dom0_px_info->control_register.space_id !=
+ ACPI_ADR_SPACE_FIXED_HARDWARE)
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+#endif
+
memcpy ((void *)&pxpt->control_register,
(void *)&dom0_px_info->control_register,
sizeof(struct xen_pct_register));
memcpy ((void *)&pxpt->status_register,
(void *)&dom0_px_info->status_register,
sizeof(struct xen_pct_register));
+ print_PCT(&pxpt->control_register);
+ print_PCT(&pxpt->status_register);
}
+
if ( dom0_px_info->flags & XEN_PX_PSS )
{
- if ( !(pxpt->states = xmalloc_array(struct xen_processor_px,
- dom0_px_info->state_count)) )
+ /* capability check */
+ if (dom0_px_info->state_count <= 1)
{
- ret = -ENOMEM;
+ ret = -EINVAL;
goto out;
}
- if ( xenpf_copy_px_states(pxpt, dom0_px_info) )
+
+ if ( !(pxpt->states = xmalloc_array(struct xen_processor_px,
+ dom0_px_info->state_count)) )
{
- xfree(pxpt->states);
- ret = -EFAULT;
+ ret = -ENOMEM;
goto out;
}
+ copy_from_guest(pxpt->states, dom0_px_info->states,
+ dom0_px_info->state_count);
pxpt->state_count = dom0_px_info->state_count;
print_PSS(pxpt->states,pxpt->state_count);
}
+
if ( dom0_px_info->flags & XEN_PX_PSD )
{
+#ifdef CONFIG_X86
+ /* for X86, check domain coordination */
+ /* for IA64, _PSD is optional for current IA64 cpufreq algorithm */
+ if (dom0_px_info->shared_type != CPUFREQ_SHARED_TYPE_ALL &&
+ dom0_px_info->shared_type != CPUFREQ_SHARED_TYPE_ANY &&
+ dom0_px_info->shared_type != CPUFREQ_SHARED_TYPE_HW)
+ {
+ ret = -EINVAL;
+ goto out;
+ }
+#endif
+
pxpt->shared_type = dom0_px_info->shared_type;
memcpy ((void *)&pxpt->domain_info,
(void *)&dom0_px_info->domain_info,
sizeof(struct xen_psd_package));
print_PSD(&pxpt->domain_info);
}
+
if ( dom0_px_info->flags & XEN_PX_PPC )
{
pxpt->platform_limit = dom0_px_info->platform_limit;
+ print_PPC(pxpt->platform_limit);
if ( pxpt->init == XEN_PX_INIT )
{
-
ret = cpufreq_limit_change(cpuid);
goto out;
}
return ret;
}
+static void cpufreq_cmdline_common_para(struct cpufreq_policy *new_policy)
+{
+ if (usr_max_freq)
+ new_policy->max = usr_max_freq;
+ if (usr_min_freq)
+ new_policy->min = usr_min_freq;
+}
+
+static int __init cpufreq_handle_common_option(const char *name, const char *val)
+{
+ if (!strcmp(name, "maxfreq") && val) {
+ usr_max_freq = simple_strtoul(val, NULL, 0);
+ return 1;
+ }
+
+ if (!strcmp(name, "minfreq") && val) {
+ usr_min_freq = simple_strtoul(val, NULL, 0);
+ return 1;
+ }
+
+ return 0;
+}
+
+void __init cpufreq_cmdline_parse(char *str)
+{
+ static struct cpufreq_governor *__initdata cpufreq_governors[] =
+ {
+ &cpufreq_gov_userspace,
+ &cpufreq_gov_dbs,
+ &cpufreq_gov_performance,
+ &cpufreq_gov_powersave
+ };
+ unsigned int gov_index = 0;
+
+ do {
+ char *val, *end = strchr(str, ',');
+ unsigned int i;
+
+ if (end)
+ *end++ = '\0';
+ val = strchr(str, '=');
+ if (val)
+ *val++ = '\0';
+
+ if (!cpufreq_opt_governor) {
+ if (!val) {
+ for (i = 0; i < ARRAY_SIZE(cpufreq_governors); ++i) {
+ if (!strcmp(str, cpufreq_governors[i]->name)) {
+ cpufreq_opt_governor = cpufreq_governors[i];
+ gov_index = i;
+ str = NULL;
+ break;
+ }
+ }
+ } else {
+ cpufreq_opt_governor = CPUFREQ_DEFAULT_GOVERNOR;
+ }
+ }
+
+ if (str && !cpufreq_handle_common_option(str, val) &&
+ cpufreq_governors[gov_index]->handle_option)
+ cpufreq_governors[gov_index]->handle_option(str, val);
+
+ str = end;
+ } while (str);
+}
--- /dev/null
+/*
+ * xen/drivers/cpufreq/cpufreq_misc_gov.c
+ *
+ * Copyright (C) 2001 Russell King
+ * (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de>
+ *
+ * Nov 2008 Liu Jinsong <jinsong.liu@intel.com>
+ * Porting cpufreq_userspace.c, cpufreq_performance.c, and
+ * cpufreq_powersave.c from Liunx 2.6.23 to Xen hypervisor
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <xen/init.h>
+#include <xen/sched.h>
+#include <acpi/cpufreq/cpufreq.h>
+
+/*
+ * cpufreq userspace governor
+ */
+static unsigned int cpu_set_freq[NR_CPUS];
+
+static int cpufreq_governor_userspace(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ int ret = 0;
+ unsigned int cpu;
+
+ if (unlikely(!policy) ||
+ unlikely(!cpu_online(cpu = policy->cpu)))
+ return -EINVAL;
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ if (!cpu_set_freq[cpu])
+ cpu_set_freq[cpu] = policy->cur;
+ break;
+ case CPUFREQ_GOV_STOP:
+ cpu_set_freq[cpu] = 0;
+ break;
+ case CPUFREQ_GOV_LIMITS:
+ if (policy->max < cpu_set_freq[cpu])
+ ret = __cpufreq_driver_target(policy, policy->max,
+ CPUFREQ_RELATION_H);
+ else if (policy->min > cpu_set_freq[cpu])
+ ret = __cpufreq_driver_target(policy, policy->min,
+ CPUFREQ_RELATION_L);
+ else
+ ret = __cpufreq_driver_target(policy, cpu_set_freq[cpu],
+ CPUFREQ_RELATION_L);
+
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq)
+{
+ struct cpufreq_policy *policy = cpufreq_cpu_policy[cpu];
+
+ if (!cpu_online(cpu) || !policy)
+ return -EINVAL;
+
+ cpu_set_freq[cpu] = freq;
+
+ if (freq < policy->min)
+ freq = policy->min;
+ if (freq > policy->max)
+ freq = policy->max;
+
+ return __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L);
+}
+
+static void __init
+cpufreq_userspace_handle_option(const char *name, const char *val)
+{
+ if (!strcmp(name, "speed") && val) {
+ unsigned int usr_cmdline_freq;
+ unsigned int cpu;
+
+ usr_cmdline_freq = simple_strtoul(val, NULL, 0);
+ for (cpu = 0; cpu < NR_CPUS; cpu++)
+ cpu_set_freq[cpu] = usr_cmdline_freq;
+ }
+}
+
+struct cpufreq_governor cpufreq_gov_userspace = {
+ .name = "userspace",
+ .governor = cpufreq_governor_userspace,
+ .handle_option = cpufreq_userspace_handle_option
+};
+
+static int __init cpufreq_gov_userspace_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_userspace);
+}
+__initcall(cpufreq_gov_userspace_init);
+
+static void __exit cpufreq_gov_userspace_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_userspace);
+}
+__exitcall(cpufreq_gov_userspace_exit);
+
+
+/*
+ * cpufreq performance governor
+ */
+static int cpufreq_governor_performance(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ int ret = 0;
+
+ if (!policy)
+ return -EINVAL;
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ case CPUFREQ_GOV_STOP:
+ break;
+ case CPUFREQ_GOV_LIMITS:
+ ret = __cpufreq_driver_target(policy, policy->max,
+ CPUFREQ_RELATION_H);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+struct cpufreq_governor cpufreq_gov_performance = {
+ .name = "performance",
+ .governor = cpufreq_governor_performance,
+};
+
+static int __init cpufreq_gov_performance_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_performance);
+}
+__initcall(cpufreq_gov_performance_init);
+
+static void __exit cpufreq_gov_performance_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_performance);
+}
+__exitcall(cpufreq_gov_performance_exit);
+
+
+/*
+ * cpufreq powersave governor
+ */
+static int cpufreq_governor_powersave(struct cpufreq_policy *policy,
+ unsigned int event)
+{
+ int ret = 0;
+
+ if (!policy)
+ return -EINVAL;
+
+ switch (event) {
+ case CPUFREQ_GOV_START:
+ case CPUFREQ_GOV_STOP:
+ break;
+ case CPUFREQ_GOV_LIMITS:
+ ret = __cpufreq_driver_target(policy, policy->min,
+ CPUFREQ_RELATION_L);
+ break;
+ default:
+ ret = -EINVAL;
+ break;
+ }
+
+ return ret;
+}
+
+struct cpufreq_governor cpufreq_gov_powersave = {
+ .name = "powersave",
+ .governor = cpufreq_governor_powersave,
+};
+
+static int __init cpufreq_gov_powersave_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_powersave);
+}
+__initcall(cpufreq_gov_powersave_init);
+
+static void __exit cpufreq_gov_powersave_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_powersave);
+}
+__exitcall(cpufreq_gov_powersave_exit);
#include <acpi/cpufreq/cpufreq.h>
#define DEF_FREQUENCY_UP_THRESHOLD (80)
+#define MIN_FREQUENCY_UP_THRESHOLD (11)
+#define MAX_FREQUENCY_UP_THRESHOLD (100)
#define MIN_DBS_INTERVAL (MICROSECS(100))
-#define MIN_SAMPLING_MILLISECS (20)
-#define MIN_STAT_SAMPLING_RATE \
+#define MIN_SAMPLING_RATE_RATIO (2)
+#define MIN_SAMPLING_MILLISECS (MIN_SAMPLING_RATE_RATIO * 10)
+#define MIN_STAT_SAMPLING_RATE \
(MIN_SAMPLING_MILLISECS * MILLISECS(1))
+#define MIN_SAMPLING_RATE \
+ (def_sampling_rate / MIN_SAMPLING_RATE_RATIO)
+#define MAX_SAMPLING_RATE (500 * def_sampling_rate)
#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000)
#define TRANSITION_LATENCY_LIMIT (10 * 1000 )
static uint64_t def_sampling_rate;
+static uint64_t usr_sampling_rate;
/* Sampling types */
enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE};
static struct dbs_tuners {
uint64_t sampling_rate;
unsigned int up_threshold;
- unsigned int ignore_nice;
unsigned int powersave_bias;
} dbs_tuners_ins = {
+ .sampling_rate = 0,
.up_threshold = DEF_FREQUENCY_UP_THRESHOLD,
- .ignore_nice = 0,
.powersave_bias = 0,
};
static struct timer dbs_timer[NR_CPUS];
-uint64_t get_cpu_idle_time(unsigned int cpu)
+int write_ondemand_sampling_rate(unsigned int sampling_rate)
{
- uint64_t idle_ns;
- struct vcpu *v;
+ if ( (sampling_rate > MAX_SAMPLING_RATE / MICROSECS(1)) ||
+ (sampling_rate < MIN_SAMPLING_RATE / MICROSECS(1)) )
+ return -EINVAL;
- if ((v = idle_vcpu[cpu]) == NULL)
- return 0;
+ dbs_tuners_ins.sampling_rate = sampling_rate * MICROSECS(1);
+ return 0;
+}
- idle_ns = v->runstate.time[RUNSTATE_running];
- if (v->is_running)
- idle_ns += NOW() - v->runstate.state_entry_time;
+int write_ondemand_up_threshold(unsigned int up_threshold)
+{
+ if ( (up_threshold > MAX_FREQUENCY_UP_THRESHOLD) ||
+ (up_threshold < MIN_FREQUENCY_UP_THRESHOLD) )
+ return -EINVAL;
- return idle_ns;
+ dbs_tuners_ins.up_threshold = up_threshold;
+ return 0;
+}
+
+int get_cpufreq_ondemand_para(uint32_t *sampling_rate_max,
+ uint32_t *sampling_rate_min,
+ uint32_t *sampling_rate,
+ uint32_t *up_threshold)
+{
+ if (!sampling_rate_max || !sampling_rate_min ||
+ !sampling_rate || !up_threshold)
+ return -EINVAL;
+
+ *sampling_rate_max = MAX_SAMPLING_RATE/MICROSECS(1);
+ *sampling_rate_min = MIN_SAMPLING_RATE/MICROSECS(1);
+ *sampling_rate = dbs_tuners_ins.sampling_rate / MICROSECS(1);
+ *up_threshold = dbs_tuners_ins.up_threshold;
+
+ return 0;
}
static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
if (load < (dbs_tuners_ins.up_threshold - 10)) {
unsigned int freq_next, freq_cur;
- freq_cur = __cpufreq_driver_getavg(policy);
- if (!freq_cur)
- freq_cur = policy->cur;
+ freq_cur = cpufreq_driver_getavg(policy->cpu, GOV_GETAVG);
freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
* Start the timerschedule work, when this governor
* is used for first time
*/
- if (dbs_enable == 1) {
+ if ((dbs_enable == 1) && !dbs_tuners_ins.sampling_rate) {
def_sampling_rate = policy->cpuinfo.transition_latency *
DEF_SAMPLING_RATE_LATENCY_MULTIPLIER;
if (def_sampling_rate < MIN_STAT_SAMPLING_RATE)
def_sampling_rate = MIN_STAT_SAMPLING_RATE;
- dbs_tuners_ins.sampling_rate = def_sampling_rate;
+ if (!usr_sampling_rate)
+ dbs_tuners_ins.sampling_rate = def_sampling_rate;
+ else if (usr_sampling_rate < MIN_SAMPLING_RATE) {
+ printk(KERN_WARNING "cpufreq/ondemand: "
+ "specified sampling rate too low, using %"PRIu64"\n",
+ MIN_SAMPLING_RATE);
+ dbs_tuners_ins.sampling_rate = MIN_SAMPLING_RATE;
+ } else if (usr_sampling_rate > MAX_SAMPLING_RATE) {
+ printk(KERN_WARNING "cpufreq/ondemand: "
+ "specified sampling rate too high, using %"PRIu64"\n",
+ MAX_SAMPLING_RATE);
+ dbs_tuners_ins.sampling_rate = MAX_SAMPLING_RATE;
+ } else
+ dbs_tuners_ins.sampling_rate = usr_sampling_rate;
}
dbs_timer_init(this_dbs_info);
return 0;
}
+static void __init cpufreq_dbs_handle_option(const char *name, const char *val)
+{
+ if ( !strcmp(name, "rate") && val )
+ {
+ usr_sampling_rate = simple_strtoull(val, NULL, 0) * MICROSECS(1);
+ }
+ else if ( !strcmp(name, "up_threshold") && val )
+ {
+ unsigned long tmp = simple_strtoul(val, NULL, 0);
+
+ if ( tmp < MIN_FREQUENCY_UP_THRESHOLD )
+ {
+ printk(XENLOG_WARNING "cpufreq/ondemand: "
+ "specified threshold too low, using %d\n",
+ MIN_FREQUENCY_UP_THRESHOLD);
+ tmp = MIN_FREQUENCY_UP_THRESHOLD;
+ }
+ else if ( tmp > MAX_FREQUENCY_UP_THRESHOLD )
+ {
+ printk(XENLOG_WARNING "cpufreq/ondemand: "
+ "specified threshold too high, using %d\n",
+ MAX_FREQUENCY_UP_THRESHOLD);
+ tmp = MAX_FREQUENCY_UP_THRESHOLD;
+ }
+ dbs_tuners_ins.up_threshold = tmp;
+ }
+ else if ( !strcmp(name, "bias") && val )
+ {
+ unsigned long tmp = simple_strtoul(val, NULL, 0);
+
+ if ( tmp > 1000 )
+ {
+ printk(XENLOG_WARNING "cpufreq/ondemand: "
+ "specified bias too high, using 1000\n");
+ tmp = 1000;
+ }
+ dbs_tuners_ins.powersave_bias = tmp;
+ }
+}
+
struct cpufreq_governor cpufreq_gov_dbs = {
.name = "ondemand",
.governor = cpufreq_governor_dbs,
+ .handle_option = cpufreq_dbs_handle_option
};
+
+static int __init cpufreq_gov_dbs_init(void)
+{
+ return cpufreq_register_governor(&cpufreq_gov_dbs);
+}
+__initcall(cpufreq_gov_dbs_init);
+
+static void __exit cpufreq_gov_dbs_exit(void)
+{
+ cpufreq_unregister_governor(&cpufreq_gov_dbs);
+}
+__exitcall(cpufreq_gov_dbs_exit);
struct processor_pminfo *__read_mostly processor_pminfo[NR_CPUS];
struct cpufreq_policy *__read_mostly cpufreq_cpu_policy[NR_CPUS];
+DEFINE_PER_CPU(spinlock_t, cpufreq_statistic_lock) = SPIN_LOCK_UNLOCKED;
+
/*********************************************************************
* Px STATISTIC INFO *
*********************************************************************/
+void cpufreq_residency_update(unsigned int cpu, uint8_t state)
+{
+ uint64_t now, total_idle_ns;
+ int64_t delta;
+ struct pm_px *pxpt = cpufreq_statistic_data[cpu];
+
+ total_idle_ns = get_cpu_idle_time(cpu);
+ now = NOW();
+
+ delta = (now - pxpt->prev_state_wall) -
+ (total_idle_ns - pxpt->prev_idle_wall);
+
+ if ( likely(delta >= 0) )
+ pxpt->u.pt[state].residency += delta;
+
+ pxpt->prev_state_wall = now;
+ pxpt->prev_idle_wall = total_idle_ns;
+}
+
void cpufreq_statistic_update(unsigned int cpu, uint8_t from, uint8_t to)
{
- uint64_t now;
struct pm_px *pxpt = cpufreq_statistic_data[cpu];
struct processor_pminfo *pmpt = processor_pminfo[cpu];
- uint64_t total_idle_ns;
- uint64_t tmp_idle_ns;
+ spinlock_t *cpufreq_statistic_lock =
+ &per_cpu(cpufreq_statistic_lock, cpu);
- if ( !pxpt || !pmpt )
- return;
+ spin_lock(cpufreq_statistic_lock);
- now = NOW();
- total_idle_ns = get_cpu_idle_time(cpu);
- tmp_idle_ns = total_idle_ns - pxpt->prev_idle_wall;
+ if ( !pxpt || !pmpt ) {
+ spin_unlock(cpufreq_statistic_lock);
+ return;
+ }
pxpt->u.last = from;
pxpt->u.cur = to;
pxpt->u.pt[to].count++;
- pxpt->u.pt[from].residency += now - pxpt->prev_state_wall;
- pxpt->u.pt[from].residency -= tmp_idle_ns;
+
+ cpufreq_residency_update(cpu, from);
(*(pxpt->u.trans_pt + from * pmpt->perf.state_count + to))++;
- pxpt->prev_state_wall = now;
- pxpt->prev_idle_wall = total_idle_ns;
+ spin_unlock(cpufreq_statistic_lock);
}
int cpufreq_statistic_init(unsigned int cpuid)
uint32_t i, count;
struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
-
- count = pmpt->perf.state_count;
+ spinlock_t *cpufreq_statistic_lock =
+ &per_cpu(cpufreq_statistic_lock, cpuid);
if ( !pmpt )
return -EINVAL;
- if ( !pxpt )
- {
- pxpt = xmalloc(struct pm_px);
- if ( !pxpt )
- return -ENOMEM;
- memset(pxpt, 0, sizeof(*pxpt));
- cpufreq_statistic_data[cpuid] = pxpt;
+ spin_lock(cpufreq_statistic_lock);
+
+ if ( pxpt ) {
+ spin_unlock(cpufreq_statistic_lock);
+ return 0;
+ }
+
+ count = pmpt->perf.state_count;
+
+ pxpt = xmalloc(struct pm_px);
+ if ( !pxpt ) {
+ spin_unlock(cpufreq_statistic_lock);
+ return -ENOMEM;
}
+ memset(pxpt, 0, sizeof(*pxpt));
+ cpufreq_statistic_data[cpuid] = pxpt;
pxpt->u.trans_pt = xmalloc_array(uint64_t, count * count);
- if (!pxpt->u.trans_pt)
+ if (!pxpt->u.trans_pt) {
+ xfree(pxpt);
+ spin_unlock(cpufreq_statistic_lock);
return -ENOMEM;
+ }
pxpt->u.pt = xmalloc_array(struct pm_px_val, count);
if (!pxpt->u.pt) {
xfree(pxpt->u.trans_pt);
+ xfree(pxpt);
+ spin_unlock(cpufreq_statistic_lock);
return -ENOMEM;
}
pxpt->prev_state_wall = NOW();
pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
+ spin_unlock(cpufreq_statistic_lock);
+
return 0;
}
void cpufreq_statistic_exit(unsigned int cpuid)
{
struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
+ spinlock_t *cpufreq_statistic_lock =
+ &per_cpu(cpufreq_statistic_lock, cpuid);
+
+ spin_lock(cpufreq_statistic_lock);
- if (!pxpt)
+ if (!pxpt) {
+ spin_unlock(cpufreq_statistic_lock);
return;
+ }
+
xfree(pxpt->u.trans_pt);
xfree(pxpt->u.pt);
- memset(pxpt, 0, sizeof(struct pm_px));
+ xfree(pxpt);
+ cpufreq_statistic_data[cpuid] = NULL;
+
+ spin_unlock(cpufreq_statistic_lock);
}
void cpufreq_statistic_reset(unsigned int cpuid)
uint32_t i, j, count;
struct pm_px *pxpt = cpufreq_statistic_data[cpuid];
const struct processor_pminfo *pmpt = processor_pminfo[cpuid];
+ spinlock_t *cpufreq_statistic_lock =
+ &per_cpu(cpufreq_statistic_lock, cpuid);
+
+ spin_lock(cpufreq_statistic_lock);
- if ( !pxpt || !pmpt )
+ if ( !pmpt || !pxpt || !pxpt->u.pt || !pxpt->u.trans_pt ) {
+ spin_unlock(cpufreq_statistic_lock);
return;
+ }
count = pmpt->perf.state_count;
pxpt->prev_state_wall = NOW();
pxpt->prev_idle_wall = get_cpu_idle_time(cpuid);
+
+ spin_unlock(cpufreq_statistic_lock);
}
return retval;
}
-int __cpufreq_driver_getavg(struct cpufreq_policy *policy)
+int cpufreq_driver_getavg(unsigned int cpu, unsigned int flag)
{
- int ret = 0;
+ struct cpufreq_policy *policy;
+ int freq_avg;
- if (!policy)
- return -EINVAL;
+ policy = cpufreq_cpu_policy[cpu];
+ if (!cpu_online(cpu) || !policy)
+ return 0;
- if (cpu_online(policy->cpu) && cpufreq_driver->getavg)
- ret = cpufreq_driver->getavg(policy->cpu);
+ if (cpufreq_driver->getavg)
+ {
+ freq_avg = cpufreq_driver->getavg(cpu, flag);
+ if (freq_avg > 0)
+ return freq_avg;
+ }
- return ret;
+ return policy->cur;
}
/* start new governor */
data->governor = policy->governor;
if (__cpufreq_governor(data, CPUFREQ_GOV_START)) {
+ printk(KERN_WARNING "Fail change to %s governor\n",
+ data->governor->name);
+
/* new governor failed, so re-start old one */
if (old_gov) {
data->governor = old_gov;
__cpufreq_governor(data, CPUFREQ_GOV_START);
+ printk(KERN_WARNING "Still stay at %s governor\n",
+ data->governor->name);
}
return -EINVAL;
}
subdir-$(x86) += vtd
+subdir-$(ia64) += vtd
subdir-$(x86) += amd
obj-y += iommu.o
struct list_head amd_iommu_head;
struct table_struct device_table;
-extern void *int_remap_table;
-extern spinlock_t int_remap_table_lock;
-
static int __init map_iommu_mmio_region(struct amd_iommu *iommu)
{
unsigned long mfn;
{
u32 entry;
- entry = readl(iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
- set_field_in_reg_u32(iommu->ht_tunnel_support ? IOMMU_CONTROL_ENABLED :
- IOMMU_CONTROL_ENABLED, entry,
+ entry = readl(iommu->mmio_base + IOMMU_CONTROL_MMIO_OFFSET);
+
+ if ( enable )
+ {
+ set_field_in_reg_u32(iommu->ht_tunnel_support ? IOMMU_CONTROL_ENABLED :
+ IOMMU_CONTROL_DISABLED, entry,
IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_MASK,
IOMMU_CONTROL_HT_TUNNEL_TRANSLATION_SHIFT, &entry);
+ set_field_in_reg_u32(iommu->isochronous ? IOMMU_CONTROL_ENABLED :
+ IOMMU_CONTROL_DISABLED, entry,
+ IOMMU_CONTROL_ISOCHRONOUS_MASK,
+ IOMMU_CONTROL_ISOCHRONOUS_SHIFT, &entry);
+ set_field_in_reg_u32(iommu->coherent ? IOMMU_CONTROL_ENABLED :
+ IOMMU_CONTROL_DISABLED, entry,
+ IOMMU_CONTROL_COHERENT_MASK,
+ IOMMU_CONTROL_COHERENT_SHIFT, &entry);
+ set_field_in_reg_u32(iommu->res_pass_pw ? IOMMU_CONTROL_ENABLED :
+ IOMMU_CONTROL_DISABLED, entry,
+ IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_MASK,
+ IOMMU_CONTROL_RESP_PASS_POSTED_WRITE_SHIFT, &entry);
+ /* do not set PassPW bit */
+ set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
+ IOMMU_CONTROL_PASS_POSTED_WRITE_MASK,
+ IOMMU_CONTROL_PASS_POSTED_WRITE_SHIFT, &entry);
+ }
set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
- IOMMU_CONTROL_ENABLED, entry,
+ IOMMU_CONTROL_DISABLED, entry,
IOMMU_CONTROL_TRANSLATION_ENABLE_MASK,
IOMMU_CONTROL_TRANSLATION_ENABLE_SHIFT, &entry);
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
entry = readl(iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
- IOMMU_CONTROL_ENABLED, entry,
+ IOMMU_CONTROL_DISABLED, entry,
IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_MASK,
IOMMU_CONTROL_COMMAND_BUFFER_ENABLE_SHIFT, &entry);
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+
+ /*reset head and tail pointer */
+ writel(0x0, iommu->mmio_base + IOMMU_CMD_BUFFER_HEAD_OFFSET);
+ writel(0x0, iommu->mmio_base + IOMMU_CMD_BUFFER_TAIL_OFFSET);
}
static void __init register_iommu_exclusion_range(struct amd_iommu *iommu)
IOMMU_CONTROL_EVENT_LOG_INT_SHIFT, &entry);
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
- set_field_in_reg_u32(enable ? IOMMU_CONTROL_ENABLED :
- IOMMU_CONTROL_DISABLED, entry,
+ set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, entry,
IOMMU_CONTROL_COMP_WAIT_INT_MASK,
IOMMU_CONTROL_COMP_WAIT_INT_SHIFT, &entry);
writel(entry, iommu->mmio_base+IOMMU_CONTROL_MMIO_OFFSET);
+
+ /*reset head and tail pointer */
+ writel(0x0, iommu->mmio_base + IOMMU_EVENT_LOG_HEAD_OFFSET);
+ writel(0x0, iommu->mmio_base + IOMMU_EVENT_LOG_TAIL_OFFSET);
}
static int amd_iommu_read_event_log(struct amd_iommu *iommu, u32 event[])
u32 code;
u64 *addr;
char * event_str[] = {"ILLEGAL_DEV_TABLE_ENTRY",
- "IO_PAGE_FALT",
- "DEV_TABLE_HW_ERROR",
- "PAGE_TABLE_HW_ERROR",
- "ILLEGAL_COMMAND_ERROR",
- "COMMAND_HW_ERROR",
- "IOTLB_INV_TIMEOUT",
- "INVALID_DEV_REQUEST"};
-
- code = get_field_from_reg_u32(entry[1],
- IOMMU_EVENT_CODE_MASK,
- IOMMU_EVENT_CODE_SHIFT);
-
- if ( (code > IOMMU_EVENT_INVALID_DEV_REQUEST)
- || (code < IOMMU_EVENT_ILLEGAL_DEV_TABLE_ENTRY) )
+ "IO_PAGE_FALT",
+ "DEV_TABLE_HW_ERROR",
+ "PAGE_TABLE_HW_ERROR",
+ "ILLEGAL_COMMAND_ERROR",
+ "COMMAND_HW_ERROR",
+ "IOTLB_INV_TIMEOUT",
+ "INVALID_DEV_REQUEST"};
+
+ code = get_field_from_reg_u32(entry[1], IOMMU_EVENT_CODE_MASK,
+ IOMMU_EVENT_CODE_SHIFT);
+
+ if ( (code > IOMMU_EVENT_INVALID_DEV_REQUEST) ||
+ (code < IOMMU_EVENT_ILLEGAL_DEV_TABLE_ENTRY) )
{
amd_iov_error("Invalid event log entry!\n");
return;
static void amd_iommu_page_fault(int vector, void *dev_id,
struct cpu_user_regs *regs)
{
- u32 event[4];
+ u32 event[4];
+ u32 entry;
unsigned long flags;
int ret = 0;
struct amd_iommu *iommu = dev_id;
spin_lock_irqsave(&iommu->lock, flags);
ret = amd_iommu_read_event_log(iommu, event);
+ /* reset interrupt status bit */
+ entry = readl(iommu->mmio_base + IOMMU_STATUS_MMIO_OFFSET);
+ set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+ IOMMU_STATUS_EVENT_LOG_INT_MASK,
+ IOMMU_STATUS_EVENT_LOG_INT_SHIFT, &entry);
+ writel(entry, iommu->mmio_base+IOMMU_STATUS_MMIO_OFFSET);
spin_unlock_irqrestore(&iommu->lock, flags);
if ( ret != 0 )
{
int vector, ret;
- vector = assign_irq_vector(AUTO_ASSIGN);
- vector_to_iommu[vector] = iommu;
-
- /* make irq == vector */
- irq_vector[vector] = vector;
- vector_irq[vector] = vector;
-
- if ( !vector )
+ vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
+ if ( vector <= 0 )
{
- amd_iov_error("no vectors\n");
+ gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
return 0;
}
irq_desc[vector].handler = &iommu_msi_type;
- ret = request_irq(vector, amd_iommu_page_fault, 0, "amd_iommu", iommu);
+ vector_to_iommu[vector] = iommu;
+ ret = request_irq_vector(vector, amd_iommu_page_fault, 0,
+ "amd_iommu", iommu);
if ( ret )
{
+ irq_desc[vector].handler = &no_irq_type;
+ vector_to_iommu[vector] = NULL;
+ free_irq_vector(vector);
amd_iov_error("can't request irq\n");
return 0;
}
+ /* Make sure that vector is never re-used. */
+ vector_irq[vector] = NEVER_ASSIGN_IRQ;
+ iommu->vector = vector;
return vector;
}
static void __init deallocate_iommu_table_struct(
struct table_struct *table)
{
+ int order = 0;
if ( table->buffer )
{
- free_xenheap_pages(table->buffer,
- get_order_from_bytes(table->alloc_size));
+ order = get_order_from_bytes(table->alloc_size);
+ __free_amd_iommu_tables(table->buffer, order);
table->buffer = NULL;
}
}
static int __init allocate_iommu_table_struct(struct table_struct *table,
const char *name)
{
- table->buffer = (void *) alloc_xenheap_pages(
- get_order_from_bytes(table->alloc_size));
-
- if ( !table->buffer )
+ int order = 0;
+ if ( table->buffer == NULL )
{
- amd_iov_error("Error allocating %s\n", name);
- return -ENOMEM;
- }
+ order = get_order_from_bytes(table->alloc_size);
+ table->buffer = __alloc_amd_iommu_tables(order);
- memset(table->buffer, 0, table->alloc_size);
+ if ( table->buffer == NULL )
+ {
+ amd_iov_error("Error allocating %s\n", name);
+ return -ENOMEM;
+ }
+ memset(table->buffer, 0, PAGE_SIZE * (1UL << order));
+ }
return 0;
}
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
-DEFINE_SPINLOCK(int_remap_table_lock);
+#define INTREMAP_TABLE_ORDER 1
+static DEFINE_SPINLOCK(int_remap_table_lock);
void *int_remap_table = NULL;
static u8 *get_intremap_entry(u8 vector, u8 dm)
int __init amd_iommu_setup_intremap_table(void)
{
- unsigned long flags;
-
- spin_lock_irqsave(&int_remap_table_lock, flags);
if ( int_remap_table == NULL )
- int_remap_table = (void *)alloc_xenheap_pages(1);
- if ( !int_remap_table )
{
- spin_unlock_irqrestore(&int_remap_table_lock, flags);
- return -ENOMEM;
+ int_remap_table = __alloc_amd_iommu_tables(INTREMAP_TABLE_ORDER);
+ if ( int_remap_table == NULL )
+ return -ENOMEM;
+ memset(int_remap_table, 0, PAGE_SIZE * (1UL << INTREMAP_TABLE_ORDER));
}
- memset((u8*)int_remap_table, 0, PAGE_SIZE*2);
- spin_unlock_irqrestore(&int_remap_table_lock, flags);
return 0;
}
int __init deallocate_intremap_table(void)
{
- unsigned long flags;
-
- spin_lock_irqsave(&int_remap_table_lock, flags);
if ( int_remap_table )
{
- free_xenheap_pages(int_remap_table, 1);
+ __free_amd_iommu_tables(int_remap_table, INTREMAP_TABLE_ORDER);
int_remap_table = NULL;
}
- spin_unlock_irqrestore(&int_remap_table_lock, flags);
return 0;
}
}
}
-static void clear_page_table_entry_present(u32 *pte)
+static void clear_iommu_l1e_present(u64 l2e, unsigned long gfn)
{
- set_field_in_reg_u32(IOMMU_CONTROL_DISABLED, pte[0],
- IOMMU_PTE_PRESENT_MASK,
- IOMMU_PTE_PRESENT_SHIFT, &pte[0]);
+ u32 *l1e;
+ int offset;
+ void *l1_table;
+
+ l1_table = map_domain_page(l2e >> PAGE_SHIFT);
+
+ offset = gfn & (~PTE_PER_TABLE_MASK);
+ l1e = (u32*)(l1_table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE));
+
+ /* clear l1 entry */
+ l1e[0] = l1e[1] = 0;
+
+ unmap_domain_page(l1_table);
}
-static void set_page_table_entry_present(u32 *pte, u64 page_addr,
- int iw, int ir)
+static void set_iommu_l1e_present(u64 l2e, unsigned long gfn,
+ u64 maddr, int iw, int ir)
{
u64 addr_lo, addr_hi;
u32 entry;
+ void *l1_table;
+ int offset;
+ u32 *l1e;
+
+ l1_table = map_domain_page(l2e >> PAGE_SHIFT);
- addr_lo = page_addr & DMA_32BIT_MASK;
- addr_hi = page_addr >> 32;
+ offset = gfn & (~PTE_PER_TABLE_MASK);
+ l1e = (u32*)((u8*)l1_table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE));
+
+ addr_lo = maddr & DMA_32BIT_MASK;
+ addr_hi = maddr >> 32;
set_field_in_reg_u32((u32)addr_hi, 0,
IOMMU_PTE_ADDR_HIGH_MASK,
IOMMU_CONTROL_DISABLED, entry,
IOMMU_PTE_IO_READ_PERMISSION_MASK,
IOMMU_PTE_IO_READ_PERMISSION_SHIFT, &entry);
- pte[1] = entry;
+ l1e[1] = entry;
set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, 0,
IOMMU_PTE_ADDR_LOW_MASK,
set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
IOMMU_PTE_PRESENT_MASK,
IOMMU_PTE_PRESENT_SHIFT, &entry);
- pte[0] = entry;
-}
+ l1e[0] = entry;
+ unmap_domain_page(l1_table);
+}
static void amd_iommu_set_page_directory_entry(u32 *pde,
u64 next_ptr, u8 next_level)
dte[0] = entry;
}
-void *amd_iommu_get_vptr_from_page_table_entry(u32 *entry)
+u64 amd_iommu_get_next_table_from_pte(u32 *entry)
{
u64 addr_lo, addr_hi, ptr;
IOMMU_DEV_TABLE_PAGE_TABLE_PTR_HIGH_SHIFT);
ptr = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
- return ptr ? maddr_to_virt((unsigned long)ptr) : NULL;
+ return ptr;
}
static int amd_iommu_is_pte_present(u32 *entry)
IOMMU_DEV_TABLE_TRANSLATION_VALID_SHIFT));
}
-static void *get_pte_from_page_tables(void *table, int level,
- unsigned long io_pfn)
+static u64 iommu_l2e_from_pfn(struct page_info *table, int level,
+ unsigned long io_pfn)
{
unsigned long offset;
void *pde = NULL;
+ void *table_vaddr;
+ u64 next_table_maddr = 0;
- BUG_ON(table == NULL);
+ BUG_ON( table == NULL || level == 0 );
- while ( level > 0 )
+ while ( level > 1 )
{
offset = io_pfn >> ((PTE_PER_TABLE_SHIFT *
(level - IOMMU_PAGING_MODE_LEVEL_1)));
offset &= ~PTE_PER_TABLE_MASK;
- pde = table + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE);
- if ( level == 1 )
- break;
- if ( !pde )
- return NULL;
+ table_vaddr = map_domain_page(page_to_mfn(table));
+ pde = table_vaddr + (offset * IOMMU_PAGE_TABLE_ENTRY_SIZE);
+ next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
+
if ( !amd_iommu_is_pte_present(pde) )
{
- void *next_table = alloc_xenheap_page();
- if ( next_table == NULL )
- return NULL;
- memset(next_table, 0, PAGE_SIZE);
- if ( *(u64 *)pde == 0 )
+ if ( next_table_maddr == 0 )
{
- unsigned long next_ptr = (u64)virt_to_maddr(next_table);
+ table = alloc_amd_iommu_pgtable();
+ if ( table == NULL )
+ return 0;
+ next_table_maddr = page_to_maddr(table);
amd_iommu_set_page_directory_entry(
- (u32 *)pde, next_ptr, level - 1);
- }
- else
- {
- free_xenheap_page(next_table);
+ (u32 *)pde, next_table_maddr, level - 1);
}
+ else /* should never reach here */
+ return 0;
}
- table = amd_iommu_get_vptr_from_page_table_entry(pde);
+
+ unmap_domain_page(table_vaddr);
+ table = maddr_to_page(next_table_maddr);
level--;
}
- return pde;
+ return next_table_maddr;
}
int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn)
{
- void *pte;
- unsigned long flags;
- u64 maddr;
+ u64 iommu_l2e;
struct hvm_iommu *hd = domain_hvm_iommu(d);
int iw = IOMMU_IO_WRITE_ENABLED;
int ir = IOMMU_IO_READ_ENABLED;
BUG_ON( !hd->root_table );
- spin_lock_irqsave(&hd->mapping_lock, flags);
+ spin_lock(&hd->mapping_lock);
if ( is_hvm_domain(d) && !hd->p2m_synchronized )
goto out;
- maddr = (u64)mfn << PAGE_SHIFT;
- pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
- if ( pte == NULL )
+ iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
+ if ( iommu_l2e == 0 )
{
+ spin_unlock(&hd->mapping_lock);
amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
return -EFAULT;
}
+ set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir);
- set_page_table_entry_present((u32 *)pte, maddr, iw, ir);
out:
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ spin_unlock(&hd->mapping_lock);
return 0;
}
int amd_iommu_unmap_page(struct domain *d, unsigned long gfn)
{
- void *pte;
+ u64 iommu_l2e;
unsigned long flags;
- u64 io_addr = gfn;
- int requestor_id;
struct amd_iommu *iommu;
struct hvm_iommu *hd = domain_hvm_iommu(d);
BUG_ON( !hd->root_table );
- spin_lock_irqsave(&hd->mapping_lock, flags);
+ spin_lock(&hd->mapping_lock);
if ( is_hvm_domain(d) && !hd->p2m_synchronized )
{
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ spin_unlock(&hd->mapping_lock);
return 0;
}
- requestor_id = hd->domain_id;
- io_addr = (u64)gfn << PAGE_SHIFT;
+ iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
- pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
- if ( pte == NULL )
+ if ( iommu_l2e == 0 )
{
+ spin_unlock(&hd->mapping_lock);
amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
return -EFAULT;
}
/* mark PTE as 'page not present' */
- clear_page_table_entry_present((u32 *)pte);
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ clear_iommu_l1e_present(iommu_l2e, gfn);
+ spin_unlock(&hd->mapping_lock);
/* send INVALIDATE_IOMMU_PAGES command */
for_each_amd_iommu ( iommu )
{
spin_lock_irqsave(&iommu->lock, flags);
- invalidate_iommu_page(iommu, io_addr, requestor_id);
+ invalidate_iommu_page(iommu, (u64)gfn << PAGE_SHIFT, hd->domain_id);
flush_command_buffer(iommu);
spin_unlock_irqrestore(&iommu->lock, flags);
}
unsigned long phys_addr,
unsigned long size, int iw, int ir)
{
- unsigned long flags, npages, i;
- void *pte;
+ u64 iommu_l2e;
+ unsigned long npages, i;
struct hvm_iommu *hd = domain_hvm_iommu(domain);
npages = region_to_pages(phys_addr, size);
- spin_lock_irqsave(&hd->mapping_lock, flags);
+ spin_lock(&hd->mapping_lock);
for ( i = 0; i < npages; ++i )
{
- pte = get_pte_from_page_tables(
+ iommu_l2e = iommu_l2e_from_pfn(
hd->root_table, hd->paging_mode, phys_addr >> PAGE_SHIFT);
- if ( pte == NULL )
+
+ if ( iommu_l2e == 0 )
{
- amd_iov_error(
- "Invalid IO pagetable entry phys_addr = %lx\n", phys_addr);
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ spin_unlock(&hd->mapping_lock);
+ amd_iov_error("Invalid IO pagetable entry phys_addr = %lx\n",
+ phys_addr);
return -EFAULT;
}
- set_page_table_entry_present((u32 *)pte,
- phys_addr, iw, ir);
+
+ set_iommu_l1e_present(iommu_l2e,
+ (phys_addr >> PAGE_SHIFT), phys_addr, iw, ir);
+
phys_addr += PAGE_SIZE;
}
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ spin_unlock(&hd->mapping_lock);
return 0;
}
int amd_iommu_sync_p2m(struct domain *d)
{
- unsigned long mfn, gfn, flags;
- void *pte;
- u64 maddr;
- struct list_head *entry;
+ unsigned long mfn, gfn;
+ u64 iommu_l2e;
struct page_info *page;
struct hvm_iommu *hd;
int iw = IOMMU_IO_WRITE_ENABLED;
hd = domain_hvm_iommu(d);
- spin_lock_irqsave(&hd->mapping_lock, flags);
+ spin_lock(&hd->mapping_lock);
if ( hd->p2m_synchronized )
goto out;
- for ( entry = d->page_list.next; entry != &d->page_list;
- entry = entry->next )
+ spin_lock(&d->page_alloc_lock);
+
+ page_list_for_each ( page, &d->page_list )
{
- page = list_entry(entry, struct page_info, list);
mfn = page_to_mfn(page);
gfn = get_gpfn_from_mfn(mfn);
if ( gfn == INVALID_M2P_ENTRY )
continue;
- maddr = (u64)mfn << PAGE_SHIFT;
- pte = get_pte_from_page_tables(hd->root_table, hd->paging_mode, gfn);
- if ( pte == NULL )
+ iommu_l2e = iommu_l2e_from_pfn(hd->root_table, hd->paging_mode, gfn);
+
+ if ( iommu_l2e == 0 )
{
+ spin_unlock(&d->page_alloc_lock);
+ spin_unlock(&hd->mapping_lock);
amd_iov_error("Invalid IO pagetable entry gfn = %lx\n", gfn);
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
return -EFAULT;
}
- set_page_table_entry_present((u32 *)pte, maddr, iw, ir);
+
+ set_iommu_l1e_present(iommu_l2e, gfn, (u64)mfn << PAGE_SHIFT, iw, ir);
}
+ spin_unlock(&d->page_alloc_lock);
+
hd->p2m_synchronized = 1;
out:
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
+ spin_unlock(&hd->mapping_lock);
return 0;
}
+
+void invalidate_all_iommu_pages(struct domain *d)
+{
+ u32 cmd[4], entry;
+ unsigned long flags;
+ struct amd_iommu *iommu;
+ int domain_id = d->domain_id;
+ u64 addr_lo = 0x7FFFFFFFFFFFF000ULL & DMA_32BIT_MASK;
+ u64 addr_hi = 0x7FFFFFFFFFFFF000ULL >> 32;
+
+ set_field_in_reg_u32(domain_id, 0,
+ IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_MASK,
+ IOMMU_INV_IOMMU_PAGES_DOMAIN_ID_SHIFT, &entry);
+ set_field_in_reg_u32(IOMMU_CMD_INVALIDATE_IOMMU_PAGES, entry,
+ IOMMU_CMD_OPCODE_MASK, IOMMU_CMD_OPCODE_SHIFT,
+ &entry);
+ cmd[1] = entry;
+
+ set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, 0,
+ IOMMU_INV_IOMMU_PAGES_S_FLAG_MASK,
+ IOMMU_INV_IOMMU_PAGES_S_FLAG_SHIFT, &entry);
+ set_field_in_reg_u32(IOMMU_CONTROL_ENABLED, entry,
+ IOMMU_INV_IOMMU_PAGES_PDE_FLAG_MASK,
+ IOMMU_INV_IOMMU_PAGES_PDE_FLAG_SHIFT, &entry);
+ set_field_in_reg_u32((u32)addr_lo >> PAGE_SHIFT, entry,
+ IOMMU_INV_IOMMU_PAGES_ADDR_LOW_MASK,
+ IOMMU_INV_IOMMU_PAGES_ADDR_LOW_SHIFT, &entry);
+ cmd[2] = entry;
+
+ set_field_in_reg_u32((u32)addr_hi, 0,
+ IOMMU_INV_IOMMU_PAGES_ADDR_HIGH_MASK,
+ IOMMU_INV_IOMMU_PAGES_ADDR_HIGH_SHIFT, &entry);
+ cmd[3] = entry;
+
+ cmd[0] = 0;
+
+ for_each_amd_iommu ( iommu )
+ {
+ spin_lock_irqsave(&iommu->lock, flags);
+ send_iommu_command(iommu, cmd);
+ flush_command_buffer(iommu);
+ spin_unlock_irqrestore(&iommu->lock, flags);
+ }
+}
#include <xen/pci_regs.h>
#include <asm/amd-iommu.h>
#include <asm/hvm/svm/amd-iommu-proto.h>
-#include <asm/mm.h>
extern unsigned short ivrs_bdf_entries;
extern struct ivrs_mappings *ivrs_mappings;
extern void *int_remap_table;
-static void deallocate_domain_page_tables(struct hvm_iommu *hd)
-{
- if ( hd->root_table )
- free_xenheap_page(hd->root_table);
-}
-
-static void deallocate_domain_resources(struct hvm_iommu *hd)
-{
- deallocate_domain_page_tables(hd);
-}
-
int __init amd_iommu_init(void)
{
struct amd_iommu *iommu;
struct domain *domain, struct amd_iommu *iommu, int bdf)
{
void *dte;
- u64 root_ptr;
- u64 intremap_ptr;
unsigned long flags;
int req_id;
u8 sys_mgt, dev_ex;
BUG_ON( !hd->root_table || !hd->paging_mode || !int_remap_table );
- root_ptr = (u64)virt_to_maddr(hd->root_table);
/* get device-table entry */
req_id = ivrs_mappings[bdf].dte_requestor_id;
- dte = iommu->dev_table.buffer +
- (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+ dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
- intremap_ptr = (u64)virt_to_maddr(int_remap_table);
+ spin_lock_irqsave(&iommu->lock, flags);
if ( !amd_iommu_is_dte_page_translation_valid((u32 *)dte) )
{
- spin_lock_irqsave(&iommu->lock, flags);
-
/* bind DTE to domain page-tables */
sys_mgt = ivrs_mappings[req_id].dte_sys_mgt_enable;
dev_ex = ivrs_mappings[req_id].dte_allow_exclusion;
- amd_iommu_set_dev_table_entry((u32 *)dte, root_ptr, intremap_ptr,
+
+ amd_iommu_set_dev_table_entry((u32 *)dte,
+ page_to_maddr(hd->root_table),
+ virt_to_maddr(int_remap_table),
hd->domain_id, sys_mgt, dev_ex,
hd->paging_mode);
invalidate_interrupt_table(iommu, req_id);
flush_command_buffer(iommu);
amd_iov_info("Enable DTE:0x%x, "
- "root_ptr:%"PRIx64", domain_id:%d, paging_mode:%d\n",
- req_id, root_ptr, hd->domain_id, hd->paging_mode);
-
- spin_unlock_irqrestore(&iommu->lock, flags);
+ "root_table:%"PRIx64", interrupt_table:%"PRIx64", "
+ "domain_id:%d, paging_mode:%d\n",
+ req_id, (u64)page_to_maddr(hd->root_table),
+ (u64)virt_to_maddr(int_remap_table), hd->domain_id,
+ hd->paging_mode);
}
+
+ spin_unlock_irqrestore(&iommu->lock, flags);
+
}
static void amd_iommu_setup_dom0_devices(struct domain *d)
u32 l;
int bdf;
- write_lock(&pcidevs_lock);
+ spin_lock(&pcidevs_lock);
for ( bus = 0; bus < 256; bus++ )
{
for ( dev = 0; dev < 32; dev++ )
}
}
}
- write_unlock(&pcidevs_lock);
+ spin_unlock(&pcidevs_lock);
}
int amd_iov_detect(void)
static int allocate_domain_resources(struct hvm_iommu *hd)
{
/* allocate root table */
- unsigned long flags;
-
- spin_lock_irqsave(&hd->mapping_lock, flags);
+ spin_lock(&hd->mapping_lock);
if ( !hd->root_table )
{
- hd->root_table = (void *)alloc_xenheap_page();
+ hd->root_table = alloc_amd_iommu_pgtable();
if ( !hd->root_table )
- goto error_out;
- memset((u8*)hd->root_table, 0, PAGE_SIZE);
+ {
+ spin_unlock(&hd->mapping_lock);
+ return -ENOMEM;
+ }
}
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
-
+ spin_unlock(&hd->mapping_lock);
return 0;
-
- error_out:
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
- return -ENOMEM;
}
static int get_paging_mode(unsigned long entries)
/* allocate page directroy */
if ( allocate_domain_resources(hd) != 0 )
{
- deallocate_domain_resources(hd);
+ if ( hd->root_table )
+ free_domheap_page(hd->root_table);
return -ENOMEM;
}
int req_id;
req_id = ivrs_mappings[bdf].dte_requestor_id;
- dte = iommu->dev_table.buffer +
- (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+ dte = iommu->dev_table.buffer + (req_id * IOMMU_DEV_TABLE_ENTRY_SIZE);
+ spin_lock_irqsave(&iommu->lock, flags);
if ( amd_iommu_is_dte_page_translation_valid((u32 *)dte) )
{
- spin_lock_irqsave(&iommu->lock, flags);
memset (dte, 0, IOMMU_DEV_TABLE_ENTRY_SIZE);
invalidate_dev_table_entry(iommu, req_id);
flush_command_buffer(iommu);
" domain_id:%d, paging_mode:%d\n",
req_id, domain_hvm_iommu(domain)->domain_id,
domain_hvm_iommu(domain)->paging_mode);
- spin_unlock_irqrestore(&iommu->lock, flags);
}
+ spin_unlock_irqrestore(&iommu->lock, flags);
}
static int reassign_device( struct domain *source, struct domain *target,
struct amd_iommu *iommu;
int bdf;
- pdev = pci_lock_domain_pdev(source, bus, devfn);
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ pdev = pci_get_pdev_by_domain(source, bus, devfn);
if ( !pdev )
- return -ENODEV;
+ return -ENODEV;
bdf = (bus << 8) | devfn;
/* supported device? */
iommu = (bdf < ivrs_bdf_entries) ?
- find_iommu_for_device(bus, pdev->devfn) : NULL;
+ find_iommu_for_device(bus, pdev->devfn) : NULL;
if ( !iommu )
{
- spin_unlock(&pdev->lock);
- amd_iov_error("Fail to find iommu."
- " %x:%x.%x cannot be assigned to domain %d\n",
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
- return -ENODEV;
+ amd_iov_error("Fail to find iommu."
+ " %x:%x.%x cannot be assigned to domain %d\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id);
+ return -ENODEV;
}
amd_iommu_disable_domain_device(source, iommu, bdf);
- write_lock(&pcidevs_lock);
list_move(&pdev->domain_list, &target->arch.pdev_list);
- write_unlock(&pcidevs_lock);
pdev->domain = target;
amd_iommu_setup_domain_device(target, iommu, bdf);
bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
source->domain_id, target->domain_id);
- spin_unlock(&pdev->lock);
return 0;
}
return reassign_device(dom0, d, bus, devfn);
}
-static void deallocate_next_page_table(void *table, unsigned long index,
- int level)
+static void deallocate_next_page_table(struct page_info* pg, int level)
{
- unsigned long next_index;
- void *next_table, *pde;
- int next_level;
+ void *table_vaddr, *pde;
+ u64 next_table_maddr;
+ int index;
- pde = table + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
- next_table = amd_iommu_get_vptr_from_page_table_entry((u32 *)pde);
+ table_vaddr = map_domain_page(page_to_mfn(pg));
- if ( next_table )
+ if ( level > 1 )
{
- next_level = level - 1;
- if ( next_level > 1 )
+ for ( index = 0; index < PTE_PER_TABLE_SIZE; index++ )
{
- next_index = 0;
- do
+ pde = table_vaddr + (index * IOMMU_PAGE_TABLE_ENTRY_SIZE);
+ next_table_maddr = amd_iommu_get_next_table_from_pte(pde);
+ if ( next_table_maddr != 0 )
{
- deallocate_next_page_table(next_table,
- next_index, next_level);
- next_index++;
- } while (next_index < PTE_PER_TABLE_SIZE);
+ deallocate_next_page_table(
+ maddr_to_page(next_table_maddr), level - 1);
+ }
}
-
- free_xenheap_page(next_table);
}
+
+ unmap_domain_page(table_vaddr);
+ free_amd_iommu_pgtable(pg);
}
static void deallocate_iommu_page_tables(struct domain *d)
{
- unsigned long index;
struct hvm_iommu *hd = domain_hvm_iommu(d);
- if ( hd ->root_table )
+ spin_lock(&hd->mapping_lock);
+ if ( hd->root_table )
{
- index = 0;
-
- do
- {
- deallocate_next_page_table(hd->root_table,
- index, hd->paging_mode);
- index++;
- } while ( index < PTE_PER_TABLE_SIZE );
-
- free_xenheap_page(hd ->root_table);
+ deallocate_next_page_table(hd->root_table, hd->paging_mode);
+ hd->root_table = NULL;
}
-
- hd ->root_table = NULL;
+ spin_unlock(&hd->mapping_lock);
}
+
static void amd_iommu_domain_destroy(struct domain *d)
{
deallocate_iommu_page_tables(d);
+ invalidate_all_iommu_pages(d);
}
static int amd_iommu_return_device(
#include <xen/event.h>
#include <xen/iommu.h>
+#include <asm/hvm/irq.h>
+#include <asm/hvm/iommu.h>
+#include <xen/hvm/irq.h>
+
+static int pt_irq_need_timer(uint32_t flags)
+{
+ return !(flags & (HVM_IRQ_DPCI_GUEST_MSI | HVM_IRQ_DPCI_TRANSLATE));
+}
static void pt_irq_time_out(void *data)
{
int vector;
struct hvm_irq_dpci *dpci = NULL;
struct dev_intx_gsi_link *digl;
+ struct hvm_girq_dpci_mapping *girq;
uint32_t device, intx;
+ DECLARE_BITMAP(machine_gsi_map, NR_IRQS);
+
+ bitmap_zero(machine_gsi_map, NR_IRQS);
spin_lock(&irq_map->dom->event_lock);
list_for_each_entry ( digl, &irq_map->digl_list, list )
{
guest_gsi = digl->gsi;
- machine_gsi = dpci->girq[guest_gsi].machine_gsi;
+ list_for_each_entry ( girq, &dpci->girq[guest_gsi], list )
+ {
+ machine_gsi = girq->machine_gsi;
+ set_bit(machine_gsi, machine_gsi_map);
+ }
device = digl->device;
intx = digl->intx;
hvm_pci_intx_deassert(irq_map->dom, device, intx);
}
- clear_bit(machine_gsi, dpci->dirq_mask);
- vector = domain_irq_to_vector(irq_map->dom, machine_gsi);
- dpci->mirq[machine_gsi].pending = 0;
+ for ( machine_gsi = find_first_bit(machine_gsi_map, NR_IRQS);
+ machine_gsi < NR_IRQS;
+ machine_gsi = find_next_bit(machine_gsi_map, NR_IRQS,
+ machine_gsi + 1) )
+ {
+ clear_bit(machine_gsi, dpci->dirq_mask);
+ vector = domain_irq_to_vector(irq_map->dom, machine_gsi);
+ dpci->mirq[machine_gsi].pending = 0;
+ }
+
spin_unlock(&irq_map->dom->event_lock);
- pirq_guest_eoi(irq_map->dom, machine_gsi);
+
+ for ( machine_gsi = find_first_bit(machine_gsi_map, NR_IRQS);
+ machine_gsi < NR_IRQS;
+ machine_gsi = find_next_bit(machine_gsi_map, NR_IRQS,
+ machine_gsi + 1) )
+ {
+ pirq_guest_eoi(irq_map->dom, machine_gsi);
+ }
}
int pt_irq_create_bind_vtd(
uint32_t machine_gsi, guest_gsi;
uint32_t device, intx, link;
struct dev_intx_gsi_link *digl;
- int pirq = pt_irq_bind->machine_irq;
+ struct hvm_girq_dpci_mapping *girq;
+ int rc, pirq = pt_irq_bind->machine_irq;
- if ( pirq < 0 || pirq >= NR_PIRQS )
+ if ( pirq < 0 || pirq >= NR_IRQS )
return -EINVAL;
spin_lock(&d->event_lock);
}
memset(hvm_irq_dpci, 0, sizeof(*hvm_irq_dpci));
for ( int i = 0; i < NR_IRQS; i++ )
+ {
INIT_LIST_HEAD(&hvm_irq_dpci->mirq[i].digl_list);
- }
+ INIT_LIST_HEAD(&hvm_irq_dpci->girq[i]);
+ }
- if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
- {
- xfree(hvm_irq_dpci);
- spin_unlock(&d->event_lock);
- return -EINVAL;
+ if ( domain_set_irq_dpci(d, hvm_irq_dpci) == 0 )
+ {
+ spin_unlock(&d->event_lock);
+ xfree(hvm_irq_dpci);
+ return -EINVAL;
+ }
}
if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI )
if ( !test_and_set_bit(pirq, hvm_irq_dpci->mapping))
{
- set_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags);
+ hvm_irq_dpci->mirq[pirq].flags = HVM_IRQ_DPCI_MACH_MSI |
+ HVM_IRQ_DPCI_GUEST_MSI;
hvm_irq_dpci->mirq[pirq].gmsi.gvec = pt_irq_bind->u.msi.gvec;
hvm_irq_dpci->mirq[pirq].gmsi.gflags = pt_irq_bind->u.msi.gflags;
hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] = pirq;
/* bind after hvm_irq_dpci is setup to avoid race with irq handler*/
- pirq_guest_bind(d->vcpu[0], pirq, 0);
+ rc = pirq_guest_bind(d->vcpu[0], pirq, 0);
+ if ( rc == 0 && pt_irq_bind->u.msi.gtable )
+ {
+ rc = msixtbl_pt_register(d, pirq, pt_irq_bind->u.msi.gtable);
+ if ( unlikely(rc) )
+ pirq_guest_unbind(d, pirq);
+ }
+ if ( unlikely(rc) )
+ {
+ hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] = 0;
+ hvm_irq_dpci->mirq[pirq].gmsi.gflags = 0;
+ hvm_irq_dpci->mirq[pirq].gmsi.gvec = 0;
+ hvm_irq_dpci->mirq[pirq].flags = 0;
+ clear_bit(pirq, hvm_irq_dpci->mapping);
+ spin_unlock(&d->event_lock);
+ return rc;
+ }
}
else if (hvm_irq_dpci->mirq[pirq].gmsi.gvec != pt_irq_bind->u.msi.gvec
||hvm_irq_dpci->msi_gvec_pirq[pt_irq_bind->u.msi.gvec] != pirq)
return -ENOMEM;
}
+ girq = xmalloc(struct hvm_girq_dpci_mapping);
+ if ( !girq )
+ {
+ xfree(digl);
+ spin_unlock(&d->event_lock);
+ return -ENOMEM;
+ }
+
digl->device = device;
digl->intx = intx;
digl->gsi = guest_gsi;
list_add_tail(&digl->list,
&hvm_irq_dpci->mirq[machine_gsi].digl_list);
- hvm_irq_dpci->girq[guest_gsi].valid = 1;
- hvm_irq_dpci->girq[guest_gsi].device = device;
- hvm_irq_dpci->girq[guest_gsi].intx = intx;
- hvm_irq_dpci->girq[guest_gsi].machine_gsi = machine_gsi;
+ girq->device = device;
+ girq->intx = intx;
+ girq->machine_gsi = machine_gsi;
+ list_add_tail(&girq->list, &hvm_irq_dpci->girq[guest_gsi]);
/* Bind the same mirq once in the same domain */
if ( !test_and_set_bit(machine_gsi, hvm_irq_dpci->mapping))
{
+ unsigned int vector = domain_irq_to_vector(d, machine_gsi);
+ unsigned int share;
+
hvm_irq_dpci->mirq[machine_gsi].dom = d;
+ if ( pt_irq_bind->irq_type == PT_IRQ_TYPE_MSI_TRANSLATE )
+ {
+ hvm_irq_dpci->mirq[machine_gsi].flags = HVM_IRQ_DPCI_MACH_MSI |
+ HVM_IRQ_DPCI_GUEST_PCI |
+ HVM_IRQ_DPCI_TRANSLATE;
+ share = 0;
+ }
+ else /* PT_IRQ_TYPE_PCI */
+ {
+ hvm_irq_dpci->mirq[machine_gsi].flags = HVM_IRQ_DPCI_MACH_PCI |
+ HVM_IRQ_DPCI_GUEST_PCI;
+ share = BIND_PIRQ__WILL_SHARE;
+ }
/* Init timer before binding */
- init_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)],
- pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0);
+ if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
+ init_timer(&hvm_irq_dpci->hvm_timer[vector],
+ pt_irq_time_out, &hvm_irq_dpci->mirq[machine_gsi], 0);
/* Deal with gsi for legacy devices */
- pirq_guest_bind(d->vcpu[0], machine_gsi, BIND_PIRQ__WILL_SHARE);
+ rc = pirq_guest_bind(d->vcpu[0], machine_gsi, share);
+ if ( unlikely(rc) )
+ {
+ if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
+ kill_timer(&hvm_irq_dpci->hvm_timer[vector]);
+ hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
+ clear_bit(machine_gsi, hvm_irq_dpci->mapping);
+ list_del(&girq->list);
+ xfree(girq);
+ list_del(&digl->list);
+ hvm_irq_dpci->link_cnt[link]--;
+ spin_unlock(&d->event_lock);
+ xfree(digl);
+ return rc;
+ }
}
gdprintk(XENLOG_INFO VTDPREFIX,
uint32_t device, intx, link;
struct list_head *digl_list, *tmp;
struct dev_intx_gsi_link *digl;
+ struct hvm_girq_dpci_mapping *girq;
machine_gsi = pt_irq_bind->machine_irq;
device = pt_irq_bind->u.pci.device;
}
hvm_irq_dpci->link_cnt[link]--;
- memset(&hvm_irq_dpci->girq[guest_gsi], 0,
- sizeof(struct hvm_girq_dpci_mapping));
+
+ list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
+ {
+ if ( girq->machine_gsi == machine_gsi )
+ {
+ list_del(&girq->list);
+ xfree(girq);
+ break;
+ }
+ }
/* clear the mirq info */
if ( test_bit(machine_gsi, hvm_irq_dpci->mapping))
if ( list_empty(&hvm_irq_dpci->mirq[machine_gsi].digl_list) )
{
pirq_guest_unbind(d, machine_gsi);
- kill_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)]);
+ msixtbl_pt_unregister(d, machine_gsi);
+ if ( pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
+ kill_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)]);
hvm_irq_dpci->mirq[machine_gsi].dom = NULL;
hvm_irq_dpci->mirq[machine_gsi].flags = 0;
clear_bit(machine_gsi, hvm_irq_dpci->mapping);
* PIC) and we need to detect that.
*/
set_bit(mirq, dpci->dirq_mask);
- if ( !test_bit(_HVM_IRQ_DPCI_MSI, &dpci->mirq[mirq].flags) )
+ if ( pt_irq_need_timer(dpci->mirq[mirq].flags) )
set_timer(&dpci->hvm_timer[domain_irq_to_vector(d, mirq)],
NOW() + PT_IRQ_TIME_OUT);
vcpu_kick(d->vcpu[0]);
return 1;
}
-void hvm_dpci_msi_eoi(struct domain *d, int vector)
+#ifdef SUPPORT_MSI_REMAPPING
+/* called with d->event_lock held */
+static void __msi_pirq_eoi(struct domain *d, int pirq)
{
struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
irq_desc_t *desc;
+
+ if ( ( pirq >= 0 ) && ( pirq < NR_IRQS ) &&
+ test_bit(pirq, hvm_irq_dpci->mapping) &&
+ ( hvm_irq_dpci->mirq[pirq].flags & HVM_IRQ_DPCI_MACH_MSI) )
+ {
+ BUG_ON(!local_irq_is_enabled());
+ desc = domain_spin_lock_irq_desc(d, pirq, NULL);
+ if ( !desc )
+ return;
+
+ desc->status &= ~IRQ_INPROGRESS;
+ spin_unlock_irq(&desc->lock);
+
+ pirq_guest_eoi(d, pirq);
+ }
+}
+
+void hvm_dpci_msi_eoi(struct domain *d, int vector)
+{
+ struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
int pirq;
if ( !iommu_enabled || (hvm_irq_dpci == NULL) )
return;
spin_lock(&d->event_lock);
+
pirq = hvm_irq_dpci->msi_gvec_pirq[vector];
+ __msi_pirq_eoi(d, pirq);
- if ( ( pirq >= 0 ) && (pirq < NR_PIRQS) &&
- test_bit(pirq, hvm_irq_dpci->mapping) &&
- (test_bit(_HVM_IRQ_DPCI_MSI, &hvm_irq_dpci->mirq[pirq].flags)))
- {
- BUG_ON(!local_irq_is_enabled());
- desc = domain_spin_lock_irq_desc(d, pirq, NULL);
- if (!desc)
- {
+ spin_unlock(&d->event_lock);
+}
+
+extern int vmsi_deliver(struct domain *d, int pirq);
+static int hvm_pci_msi_assert(struct domain *d, int pirq)
+{
+ return vmsi_deliver(d, pirq);
+}
+#endif
+
+void hvm_dirq_assist(struct vcpu *v)
+{
+ unsigned int irq;
+ uint32_t device, intx;
+ struct domain *d = v->domain;
+ struct hvm_irq_dpci *hvm_irq_dpci = d->arch.hvm_domain.irq.dpci;
+ struct dev_intx_gsi_link *digl;
+
+ if ( !iommu_enabled || (v->vcpu_id != 0) || (hvm_irq_dpci == NULL) )
+ return;
+
+ for ( irq = find_first_bit(hvm_irq_dpci->dirq_mask, NR_IRQS);
+ irq < NR_IRQS;
+ irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
+ {
+ if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
+ continue;
+
+ spin_lock(&d->event_lock);
+#ifdef SUPPORT_MSI_REMAPPING
+ if ( hvm_irq_dpci->mirq[irq].flags & HVM_IRQ_DPCI_GUEST_MSI )
+ {
+ hvm_pci_msi_assert(d, irq);
spin_unlock(&d->event_lock);
- return;
- }
+ continue;
+ }
+#endif
+ if ( pt_irq_need_timer(hvm_irq_dpci->mirq[irq].flags) )
+ stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)]);
- desc->status &= ~IRQ_INPROGRESS;
- spin_unlock_irq(&desc->lock);
+ list_for_each_entry ( digl, &hvm_irq_dpci->mirq[irq].digl_list, list )
+ {
+ device = digl->device;
+ intx = digl->intx;
+ hvm_pci_intx_assert(d, device, intx);
+ hvm_irq_dpci->mirq[irq].pending++;
- pirq_guest_eoi(d, pirq);
- }
+#ifdef SUPPORT_MSI_REMAPPING
+ if ( hvm_irq_dpci->mirq[irq].flags & HVM_IRQ_DPCI_TRANSLATE )
+ {
+ /* for translated MSI to INTx interrupt, eoi as early as possible */
+ __msi_pirq_eoi(d, irq);
+ }
+#endif
+ }
- spin_unlock(&d->event_lock);
+ /*
+ * Set a timer to see if the guest can finish the interrupt or not. For
+ * example, the guest OS may unmask the PIC during boot, before the
+ * guest driver is loaded. hvm_pci_intx_assert() may succeed, but the
+ * guest will never deal with the irq, then the physical interrupt line
+ * will never be deasserted.
+ */
+ if ( pt_irq_need_timer(hvm_irq_dpci->mirq[irq].flags) )
+ set_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, irq)],
+ NOW() + PT_IRQ_TIME_OUT);
+ spin_unlock(&d->event_lock);
+ }
+}
+
+static void __hvm_dpci_eoi(struct domain *d,
+ struct hvm_irq_dpci *hvm_irq_dpci,
+ struct hvm_girq_dpci_mapping *girq,
+ union vioapic_redir_entry *ent)
+{
+ uint32_t device, intx, machine_gsi;
+
+ device = girq->device;
+ intx = girq->intx;
+ hvm_pci_intx_deassert(d, device, intx);
+
+ machine_gsi = girq->machine_gsi;
+
+ /*
+ * No need to get vector lock for timer
+ * since interrupt is still not EOIed
+ */
+ if ( --hvm_irq_dpci->mirq[machine_gsi].pending ||
+ ( ent && ent->fields.mask ) ||
+ ! pt_irq_need_timer(hvm_irq_dpci->mirq[machine_gsi].flags) )
+ return;
+
+ stop_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, machine_gsi)]);
+ pirq_guest_eoi(d, machine_gsi);
}
void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
union vioapic_redir_entry *ent)
{
- struct hvm_irq_dpci *hvm_irq_dpci = NULL;
- uint32_t device, intx, machine_gsi;
+ struct hvm_irq_dpci *hvm_irq_dpci;
+ struct hvm_girq_dpci_mapping *girq;
- if ( !iommu_enabled)
+ if ( !iommu_enabled )
return;
if ( guest_gsi < NR_ISAIRQS )
spin_lock(&d->event_lock);
hvm_irq_dpci = domain_get_irq_dpci(d);
- if((hvm_irq_dpci == NULL) ||
- (guest_gsi >= NR_ISAIRQS &&
- !hvm_irq_dpci->girq[guest_gsi].valid) )
- {
- spin_unlock(&d->event_lock);
- return;
- }
+ if ( !hvm_irq_dpci )
+ goto unlock;
- device = hvm_irq_dpci->girq[guest_gsi].device;
- intx = hvm_irq_dpci->girq[guest_gsi].intx;
- hvm_pci_intx_deassert(d, device, intx);
+ list_for_each_entry ( girq, &hvm_irq_dpci->girq[guest_gsi], list )
+ __hvm_dpci_eoi(d, hvm_irq_dpci, girq, ent);
- machine_gsi = hvm_irq_dpci->girq[guest_gsi].machine_gsi;
- if ( --hvm_irq_dpci->mirq[machine_gsi].pending == 0 )
- {
- if ( (ent == NULL) || !ent->fields.mask )
- {
- /*
- * No need to get vector lock for timer
- * since interrupt is still not EOIed
- */
- stop_timer(&hvm_irq_dpci->hvm_timer[
- domain_irq_to_vector(d, machine_gsi)]);
- pirq_guest_eoi(d, machine_gsi);
- }
- }
+unlock:
spin_unlock(&d->event_lock);
}
#include <xen/paging.h>
#include <xen/guest_access.h>
-extern struct iommu_ops intel_iommu_ops;
-extern struct iommu_ops amd_iommu_ops;
static void parse_iommu_param(char *s);
static int iommu_populate_page_table(struct domain *d);
int intel_vtd_setup(void);
* pv Enable IOMMU for PV domains
* no-pv Disable IOMMU for PV domains (default)
* force|required Don't boot unless IOMMU is enabled
- * passthrough Bypass VT-d translation for Dom0
+ * passthrough Enable VT-d DMA passthrough (no DMA
+ * translation for Dom0)
+ * no-snoop Disable VT-d Snoop Control
+ * no-qinval Disable VT-d Queued Invalidation
+ * no-intremap Disable VT-d Interrupt Remapping
*/
custom_param("iommu", parse_iommu_param);
int iommu_enabled = 0;
int iommu_pv_enabled = 0;
int force_iommu = 0;
int iommu_passthrough = 0;
+int iommu_snoop = 0;
+int iommu_qinval = 0;
+int iommu_intremap = 0;
static void __init parse_iommu_param(char *s)
{
char *ss;
iommu_enabled = 1;
+ iommu_snoop = 1;
+ iommu_qinval = 1;
+ iommu_intremap = 1;
do {
ss = strchr(s, ',');
force_iommu = 1;
else if ( !strcmp(s, "passthrough") )
iommu_passthrough = 1;
+ else if ( !strcmp(s, "no-snoop") )
+ iommu_snoop = 0;
+ else if ( !strcmp(s, "no-qinval") )
+ iommu_qinval = 0;
+ else if ( !strcmp(s, "no-intremap") )
+ iommu_intremap = 0;
s = ss + 1;
} while ( ss );
int iommu_add_device(struct pci_dev *pdev)
{
struct hvm_iommu *hd;
+
if ( !pdev->domain )
return -EINVAL;
+ ASSERT(spin_is_locked(&pcidevs_lock));
+
hd = domain_hvm_iommu(pdev->domain);
if ( !iommu_enabled || !hd->platform_ops )
return 0;
int assign_device(struct domain *d, u8 bus, u8 devfn)
{
struct hvm_iommu *hd = domain_hvm_iommu(d);
- int rc;
+ int rc = 0;
if ( !iommu_enabled || !hd->platform_ops )
return 0;
+ spin_lock(&pcidevs_lock);
if ( (rc = hd->platform_ops->assign_device(d, bus, devfn)) )
- return rc;
+ goto done;
if ( has_arch_pdevs(d) && !is_hvm_domain(d) && !need_iommu(d) )
{
d->need_iommu = 1;
- return iommu_populate_page_table(d);
+ rc = iommu_populate_page_table(d);
+ goto done;
}
- return 0;
+done:
+ spin_unlock(&pcidevs_lock);
+ return rc;
}
static int iommu_populate_page_table(struct domain *d)
spin_lock(&d->page_alloc_lock);
- list_for_each_entry ( page, &d->page_list, list )
+ page_list_for_each ( page, &d->page_list )
{
if ( (page->u.inuse.type_info & PGT_type_mask) == PGT_writable_page )
{
return hd->platform_ops->unmap_page(d, gfn);
}
-void deassign_device(struct domain *d, u8 bus, u8 devfn)
+/* caller should hold the pcidevs_lock */
+int deassign_device(struct domain *d, u8 bus, u8 devfn)
{
struct hvm_iommu *hd = domain_hvm_iommu(d);
+ struct pci_dev *pdev = NULL;
if ( !iommu_enabled || !hd->platform_ops )
- return;
+ return -EINVAL;
+
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ pdev = pci_get_pdev(bus, devfn);
+ if (!pdev)
+ return -ENODEV;
+
+ if (pdev->domain != d)
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "IOMMU: deassign a device not owned\n");
+ return -EINVAL;
+ }
hd->platform_ops->reassign_device(d, dom0, bus, devfn);
d->need_iommu = 0;
hd->platform_ops->teardown(d);
}
+
+ return 0;
}
static int iommu_setup(void)
group_id = ops->get_device_group_id(bus, devfn);
- read_lock(&pcidevs_lock);
+ spin_lock(&pcidevs_lock);
for_each_pdev( d, pdev )
{
if ( (pdev->bus == bus) && (pdev->devfn == devfn) )
bdf |= (pdev->devfn & 0xff) << 8;
if ( unlikely(copy_to_guest_offset(buf, i, &bdf, 1)) )
{
- read_unlock(&pcidevs_lock);
+ spin_unlock(&pcidevs_lock);
return -1;
}
i++;
}
}
- read_unlock(&pcidevs_lock);
+ spin_unlock(&pcidevs_lock);
return i;
}
#include <xen/list.h>
#include <xen/prefetch.h>
#include <xen/iommu.h>
+#include <asm/hvm/iommu.h>
+#include <asm/hvm/irq.h>
#include <xen/delay.h>
#include <xen/keyhandler.h>
LIST_HEAD(alldevs_list);
-rwlock_t pcidevs_lock = RW_LOCK_UNLOCKED;
+spinlock_t pcidevs_lock = SPIN_LOCK_UNLOCKED;
struct pci_dev *alloc_pdev(u8 bus, u8 devfn)
{
pdev = xmalloc(struct pci_dev);
if ( !pdev )
return NULL;
+ memset(pdev, 0, sizeof(struct pci_dev));
*((u8*) &pdev->bus) = bus;
*((u8*) &pdev->devfn) = devfn;
pdev->domain = NULL;
- spin_lock_init(&pdev->lock);
INIT_LIST_HEAD(&pdev->msi_list);
list_add(&pdev->alldevs_list, &alldevs_list);
+ spin_lock_init(&pdev->msix_table_lock);
return pdev;
}
xfree(pdev);
}
-struct pci_dev *pci_lock_pdev(int bus, int devfn)
+struct pci_dev *pci_get_pdev(int bus, int devfn)
{
- struct pci_dev *pdev;
+ struct pci_dev *pdev = NULL;
+
+ ASSERT(spin_is_locked(&pcidevs_lock));
- read_lock(&pcidevs_lock);
list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
if ( (pdev->bus == bus || bus == -1) &&
(pdev->devfn == devfn || devfn == -1) )
- {
- spin_lock(&pdev->lock);
- read_unlock(&pcidevs_lock);
- return pdev;
- }
- read_unlock(&pcidevs_lock);
+ {
+ return pdev;
+ }
return NULL;
}
-struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn)
+struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn)
{
- struct pci_dev *pdev;
+ struct pci_dev *pdev = NULL;
- read_lock(&pcidevs_lock);
- list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list )
- {
- spin_lock(&pdev->lock);
- if ( (pdev->bus == bus || bus == -1) &&
- (pdev->devfn == devfn || devfn == -1) &&
- (pdev->domain == d) )
- {
- read_unlock(&pcidevs_lock);
- return pdev;
- }
- spin_unlock(&pdev->lock);
- }
- read_unlock(&pcidevs_lock);
+ ASSERT(spin_is_locked(&pcidevs_lock));
+
+ list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
+ if ( (pdev->bus == bus || bus == -1) &&
+ (pdev->devfn == devfn || devfn == -1) &&
+ (pdev->domain == d) )
+ {
+ return pdev;
+ }
return NULL;
}
struct pci_dev *pdev;
int ret = -ENOMEM;
- write_lock(&pcidevs_lock);
+ spin_lock(&pcidevs_lock);
pdev = alloc_pdev(bus, devfn);
if ( !pdev )
goto out;
ret = 0;
- spin_lock(&pdev->lock);
if ( !pdev->domain )
{
pdev->domain = dom0;
ret = iommu_add_device(pdev);
if ( ret )
- {
- spin_unlock(&pdev->lock);
goto out;
- }
+
list_add(&pdev->domain_list, &dom0->arch.pdev_list);
}
- spin_unlock(&pdev->lock);
- printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
- PCI_SLOT(devfn), PCI_FUNC(devfn));
out:
- write_unlock(&pcidevs_lock);
+ spin_unlock(&pcidevs_lock);
+ printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus,
+ PCI_SLOT(devfn), PCI_FUNC(devfn));
return ret;
}
struct pci_dev *pdev;
int ret = -ENODEV;;
- write_lock(&pcidevs_lock);
+ spin_lock(&pcidevs_lock);
list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
if ( pdev->bus == bus && pdev->devfn == devfn )
{
- spin_lock(&pdev->lock);
ret = iommu_remove_device(pdev);
if ( pdev->domain )
list_del(&pdev->domain_list);
break;
}
- write_unlock(&pcidevs_lock);
+ spin_unlock(&pcidevs_lock);
+ return ret;
+}
+
+int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info)
+{
+ int ret;
+ char *pdev_type;
+ struct pci_dev *pdev;
+
+ if (info->is_extfn)
+ pdev_type = "Extended Function";
+ else if (info->is_virtfn)
+ pdev_type = "Virtual Function";
+ else
+ return -EINVAL;;
+
+
+ ret = -ENOMEM;
+ spin_lock(&pcidevs_lock);
+ pdev = alloc_pdev(bus, devfn);
+ if ( !pdev )
+ goto out;
+
+ pdev->info = *info;
+
+ ret = 0;
+ if ( !pdev->domain )
+ {
+ pdev->domain = dom0;
+ ret = iommu_add_device(pdev);
+ if ( ret )
+ goto out;
+
+ list_add(&pdev->domain_list, &dom0->arch.pdev_list);
+ }
+
+out:
+ spin_unlock(&pcidevs_lock);
+ printk(XENLOG_DEBUG "PCI add %s %02x:%02x.%x\n", pdev_type,
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+
return ret;
}
hvm_irq_dpci = domain_get_irq_dpci(d);
if ( hvm_irq_dpci != NULL )
{
- for ( i = find_first_bit(hvm_irq_dpci->mapping, NR_PIRQS);
- i < NR_PIRQS;
- i = find_next_bit(hvm_irq_dpci->mapping, NR_PIRQS, i + 1) )
+ for ( i = find_first_bit(hvm_irq_dpci->mapping, NR_IRQS);
+ i < NR_IRQS;
+ i = find_next_bit(hvm_irq_dpci->mapping, NR_IRQS, i + 1) )
{
pirq_guest_unbind(d, i);
kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(i)]);
struct pci_dev *pdev;
u8 bus, devfn;
+ spin_lock(&pcidevs_lock);
pci_clean_dpci_irqs(d);
- while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) )
+ while ( (pdev = pci_get_pdev_by_domain(d, -1, -1)) )
{
pci_cleanup_msi(pdev);
bus = pdev->bus; devfn = pdev->devfn;
- spin_unlock(&pdev->lock);
deassign_device(d, bus, devfn);
}
+ spin_unlock(&pcidevs_lock);
}
+#ifdef SUPPORT_MSI_REMAPPING
static void dump_pci_devices(unsigned char ch)
{
struct pci_dev *pdev;
struct msi_desc *msi;
printk("==== PCI devices ====\n");
- read_lock(&pcidevs_lock);
+ spin_lock(&pcidevs_lock);
list_for_each_entry ( pdev, &alldevs_list, alldevs_list )
{
- spin_lock(&pdev->lock);
printk("%02x:%02x.%x - dom %-3d - MSIs < ",
pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn),
pdev->domain ? pdev->domain->domain_id : -1);
list_for_each_entry ( msi, &pdev->msi_list, list )
printk("%d ", msi->vector);
printk(">\n");
- spin_unlock(&pdev->lock);
}
- read_unlock(&pcidevs_lock);
+ spin_unlock(&pcidevs_lock);
}
static int __init setup_dump_pcidevs(void)
return 0;
}
__initcall(setup_dump_pcidevs);
-
+#endif
/*
subdir-$(x86) += x86
+subdir-$(ia64) += ia64
obj-y += iommu.o
obj-y += dmar.o
#include <xen/init.h>
#include <xen/bitmap.h>
+#include <xen/errno.h>
#include <xen/kernel.h>
#include <xen/acpi.h>
#include <xen/mm.h>
#include <xen/pci_regs.h>
#include <asm/string.h>
#include "dmar.h"
+#include "iommu.h"
int vtd_enabled = 1;
return 0;
}
-struct acpi_drhd_unit * acpi_find_matched_drhd_unit(u8 bus, u8 devfn)
+struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *pdev)
{
+ u8 bus, devfn;
struct acpi_drhd_unit *drhd;
struct acpi_drhd_unit *found = NULL, *include_all = NULL;
int i;
+ if (pdev->info.is_extfn) {
+ bus = pdev->bus;
+ devfn = 0;
+ } else if (pdev->info.is_virtfn) {
+ bus = pdev->info.physfn.bus;
+ devfn = PCI_SLOT(pdev->info.physfn.devfn) ? 0 : pdev->info.physfn.devfn;
+ } else {
+ bus = pdev->bus;
+ devfn = pdev->devfn;
+ }
+
list_for_each_entry ( drhd, &acpi_drhd_units, list )
{
for (i = 0; i < drhd->scope.devices_cnt; i++)
return found ? found : include_all;
}
+struct acpi_atsr_unit * acpi_find_matched_atsr_unit(u8 bus, u8 devfn)
+{
+ struct acpi_atsr_unit *atsr;
+ struct acpi_atsr_unit *found = NULL, *include_all = NULL;
+ int i;
+
+ list_for_each_entry ( atsr, &acpi_atsr_units, list )
+ {
+ for (i = 0; i < atsr->scope.devices_cnt; i++)
+ if ( atsr->scope.devices[i] == PCI_BDF2(bus, devfn) )
+ return atsr;
+
+ if ( test_bit(bus, atsr->scope.buses) )
+ found = atsr;
+
+ if ( atsr->all_ports )
+ include_all = atsr;
+ }
+
+ return found ? found : include_all;
+}
+
/*
* Count number of devices in device scope. Do not include PCI sub
* hierarchies.
switch ( acpi_scope->dev_type )
{
case ACPI_DEV_P2PBRIDGE:
- {
sec_bus = pci_conf_read8(
bus, path->dev, path->fn, PCI_SECONDARY_BUS);
sub_bus = pci_conf_read8(
dmar_scope_add_buses(scope, sec_bus, sub_bus);
break;
- }
case ACPI_DEV_MSI_HPET:
dprintk(XENLOG_INFO VTDPREFIX, "found MSI HPET: bdf = %x:%x.%x\n",
break;
case ACPI_DEV_IOAPIC:
- {
dprintk(XENLOG_INFO VTDPREFIX, "found IOAPIC: bdf = %x:%x.%x\n",
bus, path->dev, path->fn);
scope->devices[didx++] = PCI_BDF(bus, path->dev, path->fn);
break;
}
- }
start += acpi_scope->length;
}
if ( rmrr->base_address >= rmrr->end_address )
{
- dprintk(XENLOG_ERR VTDPREFIX, "RMRR is incorrect.\n");
+ dprintk(XENLOG_ERR VTDPREFIX,
+ "RMRR error: base_addr %"PRIx64" end_address %"PRIx64"\n",
+ rmrr->base_address, rmrr->end_address);
return -EFAULT;
}
+#ifdef CONFIG_X86
+ /* This check is here simply to detect when RMRR values are not properly represented in the
+ system memory map and inform the user */
+ if ( (!page_is_ram_type(paddr_to_pfn(rmrr->base_address), RAM_TYPE_RESERVED))||
+ (!page_is_ram_type(paddr_to_pfn(rmrr->end_address) - 1, RAM_TYPE_RESERVED)) )
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ "RMRR address range not in reserved memory base = %"PRIx64" end = %"PRIx64"; " \
+ "iommu_inclusive_mapping=1 parameter may be needed.\n",
+ rmrr->base_address, rmrr->end_address);
+ }
+#endif
+
rmrru = xmalloc(struct acpi_rmrr_unit);
if ( !rmrru )
return -ENOMEM;
return ret;
}
+#ifdef CONFIG_X86
+#include <asm/tboot.h>
+/* ACPI tables may not be DMA protected by tboot, so use DMAR copy */
+/* SINIT saved in SinitMleData in TXT heap (which is DMA protected) */
+#define parse_dmar_table(h) tboot_parse_dmar_table(h)
+#else
+#define parse_dmar_table(h) acpi_table_parse(ACPI_SIG_DMAR, h)
+#endif
+
int acpi_dmar_init(void)
{
int rc;
if ( !iommu_enabled )
goto fail;
- rc = acpi_table_parse(ACPI_SIG_DMAR, acpi_parse_dmar);
+ rc = parse_dmar_table(acpi_parse_dmar);
if ( rc )
goto fail;
if ( list_empty(&acpi_drhd_units) )
goto fail;
- printk("Intel VT-d has been enabled\n");
+ printk("Intel VT-d DMAR tables have been parsed.\n");
return 0;
for (idx = 0; (bdf = rmrr->scope.devices[idx]) && \
idx < rmrr->scope.devices_cnt; idx++)
-struct acpi_drhd_unit * acpi_find_matched_drhd_unit(u8 bus, u8 devfn);
+struct acpi_drhd_unit * acpi_find_matched_drhd_unit(struct pci_dev *pdev);
+struct acpi_atsr_unit * acpi_find_matched_atsr_unit(u8 bus, u8 devfn);
void dmar_scope_add_buses(struct dmar_scope *scope, u16 sec, u16 sub);
void dmar_scope_remove_buses(struct dmar_scope *scope, u16 sec, u16 sub);
void print_vtd_entries(struct iommu *iommu, int bus, int devfn, u64 gmfn);
void dump_iommu_info(unsigned char key);
-int qinval_setup(struct iommu *iommu);
-int intremap_setup(struct iommu *iommu);
+int enable_qinval(struct iommu *iommu);
+void disable_qinval(struct iommu *iommu);
+int enable_intremap(struct iommu *iommu);
+void disable_intremap(struct iommu *iommu);
int queue_invalidate_context(struct iommu *iommu,
u16 did, u16 source_id, u8 function_mask, u8 granu);
int queue_invalidate_iotlb(struct iommu *iommu,
--- /dev/null
+obj-y += vtd.o
--- /dev/null
+/*
+ * Copyright (c) 2008, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) Allen Kay <allen.m.kay@intel.com>
+ * Copyright (C) Weidong Han <weidong.han@intel.com>
+ */
+
+#include <xen/sched.h>
+#include <xen/domain_page.h>
+#include <xen/iommu.h>
+#include <xen/numa.h>
+#include <asm/xensystem.h>
+#include <asm/sal.h>
+#include "../iommu.h"
+#include "../dmar.h"
+#include "../vtd.h"
+
+
+int vector_irq[NR_VECTORS] __read_mostly = {
+ [0 ... NR_VECTORS - 1] = FREE_TO_ASSIGN_IRQ
+};
+/* irq_vectors is indexed by the sum of all RTEs in all I/O APICs. */
+u8 irq_vector[NR_IRQS] __read_mostly;
+
+void *map_vtd_domain_page(u64 maddr)
+{
+ return (void *)((u64)map_domain_page(maddr >> PAGE_SHIFT) |
+ (maddr & (PAGE_SIZE - PAGE_SIZE_4K)));
+}
+
+void unmap_vtd_domain_page(void *va)
+{
+ unmap_domain_page(va);
+}
+
+/* Allocate page table, return its machine address */
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages)
+{
+ struct page_info *pg;
+ u64 *vaddr;
+
+ pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
+ d ? MEMF_node(domain_to_node(d)) : 0);
+ vaddr = map_domain_page(page_to_mfn(pg));
+ if ( !vaddr )
+ return 0;
+ memset(vaddr, 0, PAGE_SIZE * npages);
+
+ iommu_flush_cache_page(vaddr, npages);
+ unmap_domain_page(vaddr);
+
+ return page_to_maddr(pg);
+}
+
+void free_pgtable_maddr(u64 maddr)
+{
+ if ( maddr != 0 )
+ free_domheap_page(maddr_to_page(maddr));
+}
+
+unsigned int get_cache_line_size(void)
+{
+ return L1_CACHE_BYTES;
+}
+
+void cacheline_flush(char * addr)
+{
+ ia64_fc(addr);
+ ia64_sync_i();
+ ia64_srlz_i();
+}
+
+void flush_all_cache()
+{
+ ia64_sal_cache_flush(3);
+}
+
+void * map_to_nocache_virt(int nr_iommus, u64 maddr)
+{
+ return (void *) ( maddr + __IA64_UNCACHED_OFFSET);
+}
+
+struct hvm_irq_dpci *domain_get_irq_dpci(struct domain *domain)
+{
+ if ( !domain )
+ return NULL;
+
+ return domain->arch.hvm_domain.irq.dpci;
+}
+
+int domain_set_irq_dpci(struct domain *domain, struct hvm_irq_dpci *dpci)
+{
+ if ( !domain || !dpci )
+ return 0;
+
+ domain->arch.hvm_domain.irq.dpci = dpci;
+ return 1;
+}
+
+void hvm_dpci_isairq_eoi(struct domain *d, unsigned int isairq)
+{
+ /* dummy */
+}
+
+static int do_dom0_iommu_mapping(unsigned long start, unsigned long end,
+ void *arg)
+{
+ unsigned long tmp, pfn, j, page_addr = start;
+ struct domain *d = (struct domain *)arg;
+
+ extern int xen_in_range(paddr_t start, paddr_t end);
+ /* Set up 1:1 page table for dom0 for all Ram except Xen bits.*/
+
+ while (page_addr < end)
+ {
+ if (xen_in_range(page_addr, page_addr + PAGE_SIZE))
+ continue;
+
+ pfn = page_addr >> PAGE_SHIFT;
+ tmp = 1 << (PAGE_SHIFT - PAGE_SHIFT_4K);
+ for ( j = 0; j < tmp; j++ )
+ iommu_map_page(d, (pfn*tmp+j), (pfn*tmp+j));
+
+ page_addr += PAGE_SIZE;
+ }
+ return 0;
+}
+
+void iommu_set_dom0_mapping(struct domain *d)
+{
+ if (dom0)
+ BUG_ON(d != dom0);
+ efi_memmap_walk(do_dom0_iommu_mapping, d);
+}
#include <xen/irq.h>
#include <xen/sched.h>
#include <xen/iommu.h>
+#include <asm/hvm/iommu.h>
#include <xen/time.h>
#include <xen/pci.h>
#include <xen/pci_regs.h>
#include "vtd.h"
#include "extern.h"
+#ifndef dest_SMI
+#define dest_SMI -1
+#endif
+
+/* The max number of IOAPIC (or IOSAPIC) pin. The typical values can be 24 or
+ * 48 on x86 and Itanium platforms. Here we use a biger number 256. This
+ * should be big enough. Actually now IREMAP_ENTRY_NR is also 256.
+ */
+#define MAX_IOAPIC_PIN_NUM 256
+
+static int ioapic_pin_to_intremap_index[MAX_IOAPIC_PIN_NUM] =
+ { [0 ... MAX_IOAPIC_PIN_NUM-1] = -1 };
+
u16 apicid_to_bdf(int apic_id)
{
struct acpi_drhd_unit *drhd = ioapic_to_drhd(apic_id);
}
static int ioapic_rte_to_remap_entry(struct iommu *iommu,
- int apic_id, struct IO_xAPIC_route_entry *old_rte,
+ int apic_id, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
unsigned int rte_upper, unsigned int value)
{
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
- if ( remap_rte->format == 0 )
+ if ( ioapic_pin_to_intremap_index[ioapic_pin] < 0 )
{
ir_ctrl->iremap_index++;
index = ir_ctrl->iremap_index;
+ ioapic_pin_to_intremap_index[ioapic_pin] = index;
}
else
- index = (remap_rte->index_15 << 15) | remap_rte->index_0_14;
+ index = ioapic_pin_to_intremap_index[ioapic_pin];
if ( index > IREMAP_ENTRY_NR - 1 )
{
memcpy(&new_ire, iremap_entry, sizeof(struct iremap_entry));
if ( rte_upper )
+ {
+#if defined(__i386__) || defined(__x86_64__)
new_ire.lo.dst = (value >> 24) << 8;
+#else /* __ia64__ */
+ new_ire.lo.dst = value >> 16;
+#endif
+ }
else
{
*(((u32 *)&new_rte) + 0) = value;
struct IO_xAPIC_route_entry old_rte = { 0 };
struct IO_APIC_route_remap_entry *remap_rte;
int rte_upper = (reg & 1) ? 1 : 0;
- struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
+ struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
if ( !iommu || !ir_ctrl || ir_ctrl->iremap_maddr == 0 ||
remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
- if ( remap_rte->format == 0 )
+ if ( (remap_rte->format == 0) || (old_rte.delivery_mode == dest_SMI) )
{
*IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
return *(IO_APIC_BASE(apic)+4);
void io_apic_write_remap_rte(
unsigned int apic, unsigned int reg, unsigned int value)
{
+ unsigned int ioapic_pin = (reg - 0x10) / 2;
struct IO_xAPIC_route_entry old_rte = { 0 };
struct IO_APIC_route_remap_entry *remap_rte;
unsigned int rte_upper = (reg & 1) ? 1 : 0;
- struct iommu *iommu = ioapic_to_iommu(mp_ioapics[apic].mpc_apicid);
+ struct iommu *iommu = ioapic_to_iommu(IO_APIC_ID(apic));
struct ir_ctrl *ir_ctrl = iommu_ir_ctrl(iommu);
int saved_mask;
remap_rte = (struct IO_APIC_route_remap_entry *) &old_rte;
+ if ( old_rte.delivery_mode == dest_SMI )
+ {
+ /* Some BIOS does not zero out reserve fields in IOAPIC
+ * RTE's. clear_IO_APIC() zeroes out all RTE's except for RTE
+ * with MSI delivery type. This is a problem when the host
+ * OS converts SMI delivery type to some other type but leaving
+ * the reserved field uninitialized. This can cause interrupt
+ * remapping table out of bound error if "format" field is 1
+ * and the "index" field has a value that that is larger than
+ * the maximum index of interrupt remapping table.
+ */
+ if ( remap_rte->format == 1 )
+ {
+ remap_rte->format = 0;
+ *IO_APIC_BASE(apic) = reg;
+ *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+0);
+ *IO_APIC_BASE(apic) = reg + 1;
+ *(IO_APIC_BASE(apic)+4) = *(((u32 *)&old_rte)+1);
+ }
+
+ *IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
+ *(IO_APIC_BASE(apic)+4) = value;
+ return;
+ }
+
/* mask the interrupt while we change the intremap table */
saved_mask = remap_rte->mask;
remap_rte->mask = 1;
*(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
remap_rte->mask = saved_mask;
- if ( ioapic_rte_to_remap_entry(iommu, mp_ioapics[apic].mpc_apicid,
+ ASSERT(ioapic_pin < MAX_IOAPIC_PIN_NUM);
+ if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic), ioapic_pin,
&old_rte, rte_upper, value) )
{
*IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
struct iommu *iommu = NULL;
struct ir_ctrl *ir_ctrl;
- drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
+ drhd = acpi_find_matched_drhd_unit(pdev);
iommu = drhd->iommu;
ir_ctrl = iommu_ir_ctrl(iommu);
struct iommu *iommu = NULL;
struct ir_ctrl *ir_ctrl;
- drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
+ drhd = acpi_find_matched_drhd_unit(pdev);
iommu = drhd->iommu;
ir_ctrl = iommu_ir_ctrl(iommu);
}
#endif
-int intremap_setup(struct iommu *iommu)
+int enable_intremap(struct iommu *iommu)
{
struct ir_ctrl *ir_ctrl;
s_time_t start_time;
- if ( !ecap_intr_remap(iommu->ecap) )
- return -ENODEV;
+ ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
ir_ctrl = iommu_ir_ctrl(iommu);
if ( ir_ctrl->iremap_maddr == 0 )
{
- ir_ctrl->iremap_maddr = alloc_pgtable_maddr();
+ ir_ctrl->iremap_maddr = alloc_pgtable_maddr(NULL, 1);
if ( ir_ctrl->iremap_maddr == 0 )
{
dprintk(XENLOG_WARNING VTDPREFIX,
"Cannot allocate memory for ir_ctrl->iremap_maddr\n");
- return -ENODEV;
+ return -ENOMEM;
}
ir_ctrl->iremap_index = -1;
}
#if defined(ENABLED_EXTENDED_INTERRUPT_SUPPORT)
/* set extended interrupt mode bit */
ir_ctrl->iremap_maddr |=
- ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIMI_SHIFT) : 0;
+ ecap_ext_intr(iommu->ecap) ? (1 << IRTA_REG_EIME_SHIFT) : 0;
#endif
- /* size field = 256 entries per 4K page = 8 - 1 */
- ir_ctrl->iremap_maddr |= 7;
+ /* set size of the interrupt remapping table */
+ ir_ctrl->iremap_maddr |= IRTA_REG_TABLE_SIZE;
dmar_writeq(iommu->reg, DMAR_IRTA_REG, ir_ctrl->iremap_maddr);
/* set SIRTP */
while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_SIRTPS) )
{
if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "Cannot set SIRTP field for interrupt remapping\n");
- return -ENODEV;
- }
+ panic("Cannot set SIRTP field for interrupt remapping\n");
cpu_relax();
}
while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_CFIS) )
{
if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "Cannot set CFI field for interrupt remapping\n");
- return -ENODEV;
- }
+ panic("Cannot set CFI field for interrupt remapping\n");
cpu_relax();
}
start_time = NOW();
while ( !(dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_IRES) )
{
- if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
- {
- dprintk(XENLOG_ERR VTDPREFIX,
- "Cannot set IRE field for interrupt remapping\n");
- return -ENODEV;
- }
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+ panic("Cannot set IRE field for interrupt remapping\n");
cpu_relax();
}
return 0;
}
+
+void disable_intremap(struct iommu *iommu)
+{
+ s_time_t start_time;
+
+ ASSERT(ecap_intr_remap(iommu->ecap) && iommu_intremap);
+
+ iommu->gcmd &= ~(DMA_GCMD_SIRTP | DMA_GCMD_CFI | DMA_GCMD_IRE);
+ dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+
+ start_time = NOW();
+ while ( dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_IRES )
+ {
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+ panic("Cannot clear IRE field for interrupt remapping\n");
+ cpu_relax();
+ }
+}
#include <xen/xmalloc.h>
#include <xen/domain_page.h>
#include <xen/iommu.h>
+#include <asm/hvm/iommu.h>
#include <xen/numa.h>
#include <xen/time.h>
#include <xen/pci.h>
#include <xen/pci_regs.h>
#include <xen/keyhandler.h>
+#include <asm/msi.h>
#include "iommu.h"
#include "dmar.h"
#include "extern.h"
static spinlock_t domid_bitmap_lock; /* protect domain id bitmap */
static int domid_bitmap_size; /* domain id bitmap size in bits */
static unsigned long *domid_bitmap; /* iommu domain id bitmap */
+static bool_t rwbf_quirk;
static void setup_dom0_devices(struct domain *d);
static void setup_dom0_rmrr(struct domain *d);
static void context_set_domain_id(struct context_entry *context,
struct domain *d)
{
- unsigned long flags;
domid_t iommu_domid = domain_iommu_domid(d);
if ( iommu_domid == 0 )
{
- spin_lock_irqsave(&domid_bitmap_lock, flags);
+ spin_lock(&domid_bitmap_lock);
iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
set_bit(iommu_domid, domid_bitmap);
- spin_unlock_irqrestore(&domid_bitmap_lock, flags);
+ spin_unlock(&domid_bitmap_lock);
d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
}
__iommu_flush_cache(addr, 8);
}
-void iommu_flush_cache_page(void *addr)
+void iommu_flush_cache_page(void *addr, unsigned long npages)
{
- __iommu_flush_cache(addr, PAGE_SIZE_4K);
+ __iommu_flush_cache(addr, PAGE_SIZE_4K * npages);
}
int nr_iommus;
static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus)
{
struct root_entry *root, *root_entries;
- unsigned long flags;
u64 maddr;
- spin_lock_irqsave(&iommu->lock, flags);
+ ASSERT(spin_is_locked(&iommu->lock));
root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
root = &root_entries[bus];
if ( !root_present(*root) )
{
- maddr = alloc_pgtable_maddr();
+ maddr = alloc_pgtable_maddr(NULL, 1);
if ( maddr == 0 )
{
unmap_vtd_domain_page(root_entries);
- spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
set_root_value(*root, maddr);
}
maddr = (u64) get_context_addr(*root);
unmap_vtd_domain_page(root_entries);
- spin_unlock_irqrestore(&iommu->lock, flags);
return maddr;
}
-static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn)
-{
- struct root_entry *root, *root_entries;
- struct context_entry *context;
- u64 context_maddr;
- int ret;
- unsigned long flags;
-
- spin_lock_irqsave(&iommu->lock, flags);
- root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr);
- root = &root_entries[bus];
- if ( !root_present(*root) )
- {
- ret = 0;
- goto out;
- }
- context_maddr = get_context_addr(*root);
- context = (struct context_entry *)map_vtd_domain_page(context_maddr);
- ret = context_present(context[devfn]);
- unmap_vtd_domain_page(context);
- out:
- unmap_vtd_domain_page(root_entries);
- spin_unlock_irqrestore(&iommu->lock, flags);
- return ret;
-}
-
static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc)
{
struct hvm_iommu *hd = domain_hvm_iommu(domain);
struct dma_pte *parent, *pte = NULL;
int level = agaw_to_level(hd->agaw);
int offset;
- unsigned long flags;
u64 pte_maddr = 0, maddr;
u64 *vaddr = NULL;
addr &= (((u64)1) << addr_width) - 1;
- spin_lock_irqsave(&hd->mapping_lock, flags);
+ ASSERT(spin_is_locked(&hd->mapping_lock));
if ( hd->pgd_maddr == 0 )
- if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr()) == 0) )
+ if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain, 1)) == 0) )
goto out;
parent = (struct dma_pte *)map_vtd_domain_page(hd->pgd_maddr);
{
if ( !alloc )
break;
- maddr = alloc_pgtable_maddr();
+ maddr = alloc_pgtable_maddr(domain, 1);
+ if ( !maddr )
+ break;
dma_set_pte_addr(*pte, maddr);
vaddr = map_vtd_domain_page(maddr);
- if ( !vaddr )
- break;
/*
* high level table always sets r/w, last level
else
{
vaddr = map_vtd_domain_page(pte->val);
- if ( !vaddr )
- break;
}
if ( level == 2 )
unmap_vtd_domain_page(parent);
out:
- spin_unlock_irqrestore(&hd->mapping_lock, flags);
return pte_maddr;
}
unsigned long flag;
s_time_t start_time;
- if ( !cap_rwbf(iommu->cap) )
+ if ( !rwbf_quirk && !cap_rwbf(iommu->cap) )
return;
val = iommu->gcmd | DMA_GCMD_WBF;
if ( DMA_TLB_IAIG(val) == 0 )
dprintk(XENLOG_ERR VTDPREFIX, "IOMMU: flush IOTLB failed\n");
- if ( DMA_TLB_IAIG(val) != DMA_TLB_IIRG(type) )
- dprintk(XENLOG_INFO VTDPREFIX,
- "IOMMU: tlb flush request %x, actual %x\n",
- (u32)DMA_TLB_IIRG(type), (u32)DMA_TLB_IAIG(val));
/* flush iotlb entry will implicitly flush write buffer */
return 0;
}
struct dma_pte *page = NULL, *pte = NULL;
u64 pg_maddr;
+ spin_lock(&hd->mapping_lock);
/* get last level pte */
pg_maddr = addr_to_dma_page_maddr(domain, addr, 0);
if ( pg_maddr == 0 )
+ {
+ spin_unlock(&hd->mapping_lock);
return;
+ }
+
page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
pte = page + address_level_offset(addr, 1);
if ( !dma_pte_present(*pte) )
{
+ spin_unlock(&hd->mapping_lock);
unmap_vtd_domain_page(page);
return;
}
dma_clear_pte(*pte);
+ spin_unlock(&hd->mapping_lock);
iommu_flush_cache_entry(pte);
+ /* No need pcidevs_lock here since do that on assign/deassign device*/
for_each_drhd_unit ( drhd )
{
iommu = drhd->iommu;
unmap_vtd_domain_page(page);
}
-/* clear last level pte, a tlb flush should be followed */
-static void dma_pte_clear_range(struct domain *domain, u64 start, u64 end)
-{
- struct hvm_iommu *hd = domain_hvm_iommu(domain);
- int addr_width = agaw_to_width(hd->agaw);
-
- start &= (((u64)1) << addr_width) - 1;
- end &= (((u64)1) << addr_width) - 1;
- /* in case it's partial page */
- start = PAGE_ALIGN_4K(start);
- end &= PAGE_MASK_4K;
-
- /* we don't need lock here, nobody else touches the iova range */
- while ( start < end )
- {
- dma_pte_clear_one(domain, start);
- start += PAGE_SIZE_4K;
- }
-}
-
static void iommu_free_pagetable(u64 pt_maddr, int level)
{
int i;
unsigned long flags;
s_time_t start_time;
- spin_lock_irqsave(&iommu->register_lock, flags);
+ spin_lock(&iommu->lock);
if ( iommu->root_maddr == 0 )
- iommu->root_maddr = alloc_pgtable_maddr();
+ iommu->root_maddr = alloc_pgtable_maddr(NULL, 1);
if ( iommu->root_maddr == 0 )
{
- spin_unlock_irqrestore(&iommu->register_lock, flags);
+ spin_unlock(&iommu->lock);
return -ENOMEM;
}
+ spin_unlock(&iommu->lock);
+ spin_lock_irqsave(&iommu->register_lock, flags);
dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr);
cmd = iommu->gcmd | DMA_GCMD_SRTP;
dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd);
return 0;
}
-static int iommu_enable_translation(struct iommu *iommu)
+static void iommu_enable_translation(struct iommu *iommu)
{
u32 sts;
unsigned long flags;
/* Disable PMRs when VT-d engine takes effect per spec definition */
disable_pmr(iommu);
spin_unlock_irqrestore(&iommu->register_lock, flags);
- return 0;
}
-int iommu_disable_translation(struct iommu *iommu)
+static void iommu_disable_translation(struct iommu *iommu)
{
u32 sts;
unsigned long flags;
cpu_relax();
}
spin_unlock_irqrestore(&iommu->register_lock, flags);
- return 0;
}
static struct iommu *vector_to_iommu[NR_VECTORS];
if ( fault_status & DMA_FSTS_PFO )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Fault Overflow\n");
- else if ( fault_status & DMA_FSTS_PPF )
+ if ( fault_status & DMA_FSTS_PPF )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Primary Pending Fault\n");
- else if ( fault_status & DMA_FSTS_AFO )
+ if ( fault_status & DMA_FSTS_AFO )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Advanced Fault Overflow\n");
- else if ( fault_status & DMA_FSTS_APF )
+ if ( fault_status & DMA_FSTS_APF )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Advanced Pending Fault\n");
- else if ( fault_status & DMA_FSTS_IQE )
+ if ( fault_status & DMA_FSTS_IQE )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Invalidation Queue Error\n");
- else if ( fault_status & DMA_FSTS_ICE )
+ if ( fault_status & DMA_FSTS_ICE )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Invalidation Completion Error\n");
- else if ( fault_status & DMA_FSTS_ITE )
+ if ( fault_status & DMA_FSTS_ITE )
dprintk(XENLOG_ERR VTDPREFIX,
"iommu_fault_status: Invalidation Time-out Error\n");
}
dprintk(XENLOG_WARNING VTDPREFIX,
"iommu_page_fault: iommu->reg = %p\n", iommu->reg);
- spin_lock_irqsave(&iommu->register_lock, flags);
fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG);
- spin_unlock_irqrestore(&iommu->register_lock, flags);
iommu_fault_status(fault_status);
/* FIXME: ignore advanced fault log */
if ( !(fault_status & DMA_FSTS_PPF) )
- return;
+ goto clear_overflow;
+
fault_index = dma_fsts_fault_record_index(fault_status);
reg = cap_fault_reg_offset(iommu->cap);
- for ( ; ; )
+ while (1)
{
u8 fault_reason;
u16 source_id;
if ( fault_index > cap_num_fault_regs(iommu->cap) )
fault_index = 0;
}
-
+clear_overflow:
/* clear primary fault overflow */
+ fault_status = readl(iommu->reg + DMAR_FSTS_REG);
if ( fault_status & DMA_FSTS_PFO )
{
spin_lock_irqsave(&iommu->register_lock, flags);
.set_affinity = dma_msi_set_affinity,
};
-int iommu_set_interrupt(struct iommu *iommu)
+static int iommu_set_interrupt(struct iommu *iommu)
{
int vector, ret;
- vector = assign_irq_vector(AUTO_ASSIGN);
- vector_to_iommu[vector] = iommu;
-
- /* VT-d fault is a MSI, make irq == vector */
- irq_vector[vector] = vector;
- vector_irq[vector] = vector;
-
- if ( !vector )
+ vector = assign_irq_vector(AUTO_ASSIGN_IRQ);
+ if ( vector <= 0 )
{
gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no vectors\n");
return -EINVAL;
}
irq_desc[vector].handler = &dma_msi_type;
- ret = request_irq(vector, iommu_page_fault, 0, "dmar", iommu);
+ vector_to_iommu[vector] = iommu;
+ ret = request_irq_vector(vector, iommu_page_fault, 0, "dmar", iommu);
if ( ret )
+ {
+ irq_desc[vector].handler = &no_irq_type;
+ vector_to_iommu[vector] = NULL;
+ free_irq_vector(vector);
gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: can't request irq\n");
+ return ret;
+ }
+
+ /* Make sure that vector is never re-used. */
+ vector_irq[vector] = NEVER_ASSIGN_IRQ;
+
return vector;
}
iounmap(iommu->reg);
free_intel_iommu(iommu->intel);
- free_irq(iommu->vector);
+ release_irq_vector(iommu->vector);
xfree(iommu);
drhd->iommu = NULL;
{
struct hvm_iommu *hd = domain_hvm_iommu(d);
struct iommu *iommu = NULL;
- u64 i;
struct acpi_drhd_unit *drhd;
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
if ( d->domain_id == 0 )
{
- extern int xen_in_range(paddr_t start, paddr_t end);
- extern int tboot_in_range(paddr_t start, paddr_t end);
-
- /*
- * Set up 1:1 page table for dom0 except the critical segments
- * like Xen and tboot.
- */
- for ( i = 0; i < max_page; i++ )
- {
- if ( xen_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) ||
- tboot_in_range(i << PAGE_SHIFT_4K, (i + 1) << PAGE_SHIFT_4K) )
- continue;
-
- iommu_map_page(d, i, i);
- }
+ /* Set up 1:1 page table for dom0 */
+ iommu_set_dom0_mapping(d);
setup_dom0_devices(d);
setup_dom0_rmrr(d);
for_each_drhd_unit ( drhd )
{
iommu = drhd->iommu;
- if ( iommu_enable_translation(iommu) )
- return -EIO;
+ iommu_enable_translation(iommu);
}
}
{
struct hvm_iommu *hd = domain_hvm_iommu(domain);
struct context_entry *context, *context_entries;
- unsigned long flags;
u64 maddr, pgd_maddr;
+ struct pci_dev *pdev = NULL;
int agaw;
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ spin_lock(&iommu->lock);
maddr = bus_to_context_maddr(iommu, bus);
context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
context = &context_entries[devfn];
if ( context_present(*context) )
{
+ int res = 0;
+
+ pdev = pci_get_pdev(bus, devfn);
+ if (!pdev)
+ res = -ENODEV;
+ else if (pdev->domain != domain)
+ res = -EINVAL;
unmap_vtd_domain_page(context_entries);
- return 0;
+ spin_unlock(&iommu->lock);
+ return res;
}
- spin_lock_irqsave(&iommu->lock, flags);
- if ( iommu_passthrough &&
- ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) )
+ if ( iommu_passthrough && (domain->domain_id == 0) )
{
context_set_translation_type(*context, CONTEXT_TT_PASS_THRU);
agaw = level_to_agaw(iommu->nr_pt_levels);
}
else
{
+ spin_lock(&hd->mapping_lock);
+
/* Ensure we have pagetables allocated down to leaf PTE. */
if ( hd->pgd_maddr == 0 )
{
if ( hd->pgd_maddr == 0 )
{
nomem:
+ spin_unlock(&hd->mapping_lock);
+ spin_unlock(&iommu->lock);
unmap_vtd_domain_page(context_entries);
- spin_unlock_irqrestore(&iommu->lock, flags);
return -ENOMEM;
}
}
context_set_address_root(*context, pgd_maddr);
context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL);
+ spin_unlock(&hd->mapping_lock);
}
/*
context_set_fault_enable(*context);
context_set_present(*context);
iommu_flush_cache_entry(context);
-
- unmap_vtd_domain_page(context_entries);
+ spin_unlock(&iommu->lock);
/* Context entry was previously non-present (with domid 0). */
- iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
- DMA_CCMD_MASK_NOBIT, 1);
- if ( iommu_flush_iotlb_dsi(iommu, 0, 1) )
+ if ( iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT, 1) )
iommu_flush_write_buffer(iommu);
+ else
+ iommu_flush_iotlb_dsi(iommu, 0, 1);
set_bit(iommu->index, &hd->iommu_bitmap);
- spin_unlock_irqrestore(&iommu->lock, flags);
+
+ unmap_vtd_domain_page(context_entries);
return 0;
}
enum {
DEV_TYPE_PCIe_ENDPOINT,
- DEV_TYPE_PCIe_BRIDGE,
- DEV_TYPE_PCI_BRIDGE,
+ DEV_TYPE_PCIe_BRIDGE, // PCIe root port, switch
+ DEV_TYPE_PCI_BRIDGE, // PCIe-to-PCI/PCIx bridge, PCI-to-PCI bridge
DEV_TYPE_PCI,
};
class_device = pci_conf_read16(bus, d, f, PCI_CLASS_DEVICE);
if ( class_device == PCI_CLASS_BRIDGE_PCI )
{
- pos = pci_find_next_cap(bus, devfn, PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP);
+ pos = pci_find_next_cap(bus, devfn,
+ PCI_CAPABILITY_LIST, PCI_CAP_ID_EXP);
if ( !pos )
return DEV_TYPE_PCI_BRIDGE;
creg = pci_conf_read16(bus, d, f, pos + PCI_EXP_FLAGS);
}
#define MAX_BUSES 256
+static DEFINE_SPINLOCK(bus2bridge_lock);
static struct { u8 map, bus, devfn; } bus2bridge[MAX_BUSES];
-static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
+static int _find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
{
int cnt = 0;
*secbus = *bus;
- if ( *bus == 0 )
- /* assume integrated PCI devices in RC have valid requester-id */
- return 1;
-
+ ASSERT(spin_is_locked(&bus2bridge_lock));
if ( !bus2bridge[*bus].map )
return 0;
return 1;
}
+static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus)
+{
+ int ret = 0;
+
+ if ( *bus == 0 )
+ /* assume integrated PCI devices in RC have valid requester-id */
+ return 1;
+
+ spin_lock(&bus2bridge_lock);
+ ret = _find_pcie_endpoint(bus, devfn, secbus);
+ spin_unlock(&bus2bridge_lock);
+
+ return ret;
+}
+
static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn)
{
struct acpi_drhd_unit *drhd;
int ret = 0;
- u16 sec_bus, sub_bus, ob, odf;
+ u16 sec_bus, sub_bus;
u32 type;
- u8 secbus;
+ u8 secbus, secdevfn;
+ struct pci_dev *pdev = pci_get_pdev(bus, devfn);
+
+ BUG_ON(!pdev);
- drhd = acpi_find_matched_drhd_unit(bus, devfn);
+ drhd = acpi_find_matched_drhd_unit(pdev);
if ( !drhd )
return -ENODEV;
+ ASSERT(spin_is_locked(&pcidevs_lock));
+
type = pdev_type(bus, devfn);
switch ( type )
{
case DEV_TYPE_PCIe_BRIDGE:
+ break;
+
case DEV_TYPE_PCI_BRIDGE:
sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
PCI_SECONDARY_BUS);
sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
PCI_SUBORDINATE_BUS);
- /*dmar_scope_add_buses(&drhd->scope, sec_bus, sub_bus);*/
-
- if ( type == DEV_TYPE_PCIe_BRIDGE )
- break;
+ spin_lock(&bus2bridge_lock);
for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ )
{
bus2bridge[sec_bus].map = 1;
bus2bridge[sec_bus].bus = bus;
bus2bridge[sec_bus].devfn = devfn;
}
+ spin_unlock(&bus2bridge_lock);
break;
case DEV_TYPE_PCIe_ENDPOINT:
case DEV_TYPE_PCI:
gdprintk(XENLOG_INFO VTDPREFIX,
- "domain_context_mapping:PCI: bdf = %x:%x.%x\n",
+ "domain_context_mapping:PCI: bdf = %x:%x.%x\n",
bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
- ob = bus; odf = devfn;
- if ( !find_pcie_endpoint(&bus, &devfn, &secbus) )
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
+ if ( ret )
+ break;
+
+ secbus = bus;
+ secdevfn = devfn;
+ /* dependent devices mapping */
+ while ( bus2bridge[bus].map )
{
- gdprintk(XENLOG_WARNING VTDPREFIX,
- "domain_context_mapping:invalid\n");
- break;
+ secbus = bus;
+ secdevfn = devfn;
+ devfn = bus2bridge[bus].devfn;
+ bus = bus2bridge[bus].bus;
+ ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
+ if ( ret )
+ return ret;
}
- if ( ob != bus || odf != devfn )
- gdprintk(XENLOG_INFO VTDPREFIX,
- "domain_context_mapping:map: "
- "bdf = %x:%x.%x -> %x:%x.%x\n",
- ob, PCI_SLOT(odf), PCI_FUNC(odf),
- bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
-
- ret = domain_context_mapping_one(domain, drhd->iommu, bus, devfn);
- if ( secbus != bus )
+ if ( (secbus != bus) && (secdevfn != 0) )
/*
* The source-id for transactions on non-PCIe buses seem
* to originate from devfn=0 on the secondary bus behind
* these scanarios is not particularly well documented
* anywhere.
*/
- domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
+ ret = domain_context_mapping_one(domain, drhd->iommu, secbus, 0);
break;
default:
u8 bus, u8 devfn)
{
struct context_entry *context, *context_entries;
- unsigned long flags;
u64 maddr;
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ spin_lock(&iommu->lock);
+
maddr = bus_to_context_maddr(iommu, bus);
context_entries = (struct context_entry *)map_vtd_domain_page(maddr);
context = &context_entries[devfn];
if ( !context_present(*context) )
{
+ spin_unlock(&iommu->lock);
unmap_vtd_domain_page(context_entries);
return 0;
}
- spin_lock_irqsave(&iommu->lock, flags);
context_clear_present(*context);
context_clear_entry(*context);
iommu_flush_cache_entry(context);
- iommu_flush_context_domain(iommu, domain_iommu_domid(domain), 0);
- iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
+
+ if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
+ (((u16)bus) << 8) | devfn,
+ DMA_CCMD_MASK_NOBIT, 0) )
+ iommu_flush_write_buffer(iommu);
+ else
+ iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0);
+
+ spin_unlock(&iommu->lock);
unmap_vtd_domain_page(context_entries);
- spin_unlock_irqrestore(&iommu->lock, flags);
return 0;
}
static int domain_context_unmap(struct domain *domain, u8 bus, u8 devfn)
{
struct acpi_drhd_unit *drhd;
- u16 sec_bus, sub_bus;
int ret = 0;
u32 type;
- u8 secbus;
+ u8 secbus, secdevfn;
+ struct pci_dev *pdev = pci_get_pdev(bus, devfn);
+
+ BUG_ON(!pdev);
- drhd = acpi_find_matched_drhd_unit(bus, devfn);
+ drhd = acpi_find_matched_drhd_unit(pdev);
if ( !drhd )
return -ENODEV;
{
case DEV_TYPE_PCIe_BRIDGE:
case DEV_TYPE_PCI_BRIDGE:
- sec_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
- PCI_SECONDARY_BUS);
- sub_bus = pci_conf_read8(bus, PCI_SLOT(devfn), PCI_FUNC(devfn),
- PCI_SUBORDINATE_BUS);
- /*dmar_scope_remove_buses(&drhd->scope, sec_bus, sub_bus);*/
- if ( DEV_TYPE_PCI_BRIDGE )
- ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
break;
case DEV_TYPE_PCIe_ENDPOINT:
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap:PCIe: bdf = %x:%x.%x\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
break;
case DEV_TYPE_PCI:
- if ( find_pcie_endpoint(&bus, &devfn, &secbus) )
+ gdprintk(XENLOG_INFO VTDPREFIX,
+ "domain_context_unmap:PCI: bdf = %x:%x.%x\n",
+ bus, PCI_SLOT(devfn), PCI_FUNC(devfn));
+ ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
+ if ( ret )
+ break;
+
+ secbus = bus;
+ secdevfn = devfn;
+ /* dependent devices unmapping */
+ while ( bus2bridge[bus].map )
+ {
+ secbus = bus;
+ secdevfn = devfn;
+ devfn = bus2bridge[bus].devfn;
+ bus = bus2bridge[bus].bus;
ret = domain_context_unmap_one(domain, drhd->iommu, bus, devfn);
- if ( bus != secbus )
- domain_context_unmap_one(domain, drhd->iommu, secbus, 0);
+ if ( ret )
+ return ret;
+ }
+
+ if ( (secbus != bus) && (secdevfn != 0) )
+ ret = domain_context_unmap_one(domain, drhd->iommu, secbus, 0);
break;
default:
struct iommu *pdev_iommu;
int ret, found = 0;
- if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) )
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ pdev = pci_get_pdev_by_domain(source, bus, devfn);
+
+ if (!pdev)
return -ENODEV;
- drhd = acpi_find_matched_drhd_unit(bus, devfn);
+ drhd = acpi_find_matched_drhd_unit(pdev);
pdev_iommu = drhd->iommu;
domain_context_unmap(source, bus, devfn);
if ( ret )
return ret;
- write_lock(&pcidevs_lock);
list_move(&pdev->domain_list, &target->arch.pdev_list);
- write_unlock(&pcidevs_lock);
pdev->domain = target;
- spin_unlock(&pdev->lock);
-
- read_lock(&pcidevs_lock);
for_each_pdev ( source, pdev )
{
- drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn);
+ drhd = acpi_find_matched_drhd_unit(pdev);
if ( drhd->iommu == pdev_iommu )
{
found = 1;
break;
}
}
- read_unlock(&pcidevs_lock);
if ( !found )
clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
if ( list_empty(&acpi_drhd_units) )
return;
+ spin_lock(&hd->mapping_lock);
iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
hd->pgd_maddr = 0;
- iommu_domid_release(d);
-}
-
-static int domain_context_mapped(u8 bus, u8 devfn)
-{
- struct acpi_drhd_unit *drhd;
-
- for_each_drhd_unit ( drhd )
- if ( device_context_mapped(drhd->iommu, bus, devfn) )
- return 1;
+ spin_unlock(&hd->mapping_lock);
- return 0;
+ iommu_domid_release(d);
}
int intel_iommu_map_page(
iommu = drhd->iommu;
/* do nothing if dom0 and iommu supports pass thru */
- if ( iommu_passthrough &&
- ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
+ if ( iommu_passthrough && (d->domain_id == 0) )
return 0;
+ spin_lock(&hd->mapping_lock);
+
pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1);
if ( pg_maddr == 0 )
+ {
+ spin_unlock(&hd->mapping_lock);
return -ENOMEM;
+ }
page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
pte = page + (gfn & LEVEL_MASK);
pte_present = dma_pte_present(*pte);
dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K);
dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE);
+
+ /* Set the SNP on leaf page table if Snoop Control available */
+ if ( iommu_snoop )
+ dma_set_pte_snp(*pte);
+
iommu_flush_cache_entry(pte);
+ spin_unlock(&hd->mapping_lock);
unmap_vtd_domain_page(page);
+ /*
+ * No need pcideves_lock here because we have flush
+ * when assign/deassign device
+ */
for_each_drhd_unit ( drhd )
{
iommu = drhd->iommu;
iommu = drhd->iommu;
/* do nothing if dom0 and iommu supports pass thru */
- if ( iommu_passthrough &&
- ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) )
+ if ( iommu_passthrough && (d->domain_id == 0) )
return 0;
dma_pte_clear_one(d, (paddr_t)gfn << PAGE_SHIFT_4K);
return 0;
}
-int iommu_page_mapping(struct domain *domain, paddr_t iova,
- paddr_t hpa, size_t size, int prot)
-{
- struct hvm_iommu *hd = domain_hvm_iommu(domain);
- struct acpi_drhd_unit *drhd;
- struct iommu *iommu;
- u64 start_pfn, end_pfn;
- struct dma_pte *page = NULL, *pte = NULL;
- int index;
- u64 pg_maddr;
-
- if ( (prot & (DMA_PTE_READ|DMA_PTE_WRITE)) == 0 )
- return -EINVAL;
-
- iova = (iova >> PAGE_SHIFT_4K) << PAGE_SHIFT_4K;
- start_pfn = hpa >> PAGE_SHIFT_4K;
- end_pfn = (PAGE_ALIGN_4K(hpa + size)) >> PAGE_SHIFT_4K;
- index = 0;
- while ( start_pfn < end_pfn )
- {
- pg_maddr = addr_to_dma_page_maddr(domain, iova + PAGE_SIZE_4K*index, 1);
- if ( pg_maddr == 0 )
- return -ENOMEM;
- page = (struct dma_pte *)map_vtd_domain_page(pg_maddr);
- pte = page + (start_pfn & LEVEL_MASK);
- dma_set_pte_addr(*pte, (paddr_t)start_pfn << PAGE_SHIFT_4K);
- dma_set_pte_prot(*pte, prot);
- iommu_flush_cache_entry(pte);
- unmap_vtd_domain_page(page);
- start_pfn++;
- index++;
- }
-
- if ( index > 0 )
- {
- for_each_drhd_unit ( drhd )
- {
- iommu = drhd->iommu;
- if ( test_bit(iommu->index, &hd->iommu_bitmap) )
- if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
- iova, index, 1))
- iommu_flush_write_buffer(iommu);
- }
- }
-
- return 0;
-}
-
-int iommu_page_unmapping(struct domain *domain, paddr_t addr, size_t size)
-{
- dma_pte_clear_range(domain, addr, addr + size);
-
- return 0;
-}
-
static int iommu_prepare_rmrr_dev(struct domain *d,
struct acpi_rmrr_unit *rmrr,
u8 bus, u8 devfn)
{
- u64 size;
- int ret;
-
- /* page table init */
- size = rmrr->end_address - rmrr->base_address + 1;
- ret = iommu_page_mapping(d, rmrr->base_address,
- rmrr->base_address, size,
- DMA_PTE_READ|DMA_PTE_WRITE);
- if ( ret )
- return ret;
+ int ret = 0;
+ u64 base, end;
+ unsigned long base_pfn, end_pfn;
+
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ ASSERT(rmrr->base_address < rmrr->end_address);
+
+ base = rmrr->base_address & PAGE_MASK_4K;
+ base_pfn = base >> PAGE_SHIFT_4K;
+ end = PAGE_ALIGN_4K(rmrr->end_address);
+ end_pfn = end >> PAGE_SHIFT_4K;
+
+ while ( base_pfn < end_pfn )
+ {
+ intel_iommu_map_page(d, base_pfn, base_pfn);
+ base_pfn++;
+ }
- if ( domain_context_mapped(bus, devfn) == 0 )
- ret = domain_context_mapping(d, bus, devfn);
+ ret = domain_context_mapping(d, bus, devfn);
return ret;
}
u16 bdf;
int ret, i;
+ ASSERT(spin_is_locked(&pcidevs_lock));
+
if ( !pdev->domain )
return -EINVAL;
hd = domain_hvm_iommu(d);
- write_lock(&pcidevs_lock);
+ spin_lock(&pcidevs_lock);
for ( bus = 0; bus < 256; bus++ )
{
for ( dev = 0; dev < 32; dev++ )
}
}
}
- write_unlock(&pcidevs_lock);
+ spin_unlock(&pcidevs_lock);
}
void clear_fault_bits(struct iommu *iommu)
}
vector = iommu_set_interrupt(iommu);
+ if ( vector < 0 )
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: interrupt setup failed\n");
+ return vector;
+ }
dma_msi_data_init(iommu, vector);
dma_msi_addr_init(iommu, cpu_physical_id(first_cpu(cpu_online_map)));
iommu->vector = vector;
flush->iotlb = flush_iotlb_reg;
}
- for_each_drhd_unit ( drhd )
+ if ( iommu_qinval )
{
- iommu = drhd->iommu;
- if ( qinval_setup(iommu) != 0 )
- dprintk(XENLOG_INFO VTDPREFIX,
- "Queued Invalidation hardware not found\n");
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ if ( enable_qinval(iommu) != 0 )
+ {
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "Failed to enable Queued Invalidation!\n");
+ break;
+ }
+ }
}
- for_each_drhd_unit ( drhd )
+ if ( iommu_intremap )
{
- iommu = drhd->iommu;
- if ( intremap_setup(iommu) != 0 )
- dprintk(XENLOG_INFO VTDPREFIX,
- "Interrupt Remapping hardware not found\n");
+ for_each_drhd_unit ( drhd )
+ {
+ iommu = drhd->iommu;
+ if ( enable_intremap(iommu) != 0 )
+ {
+ dprintk(XENLOG_INFO VTDPREFIX,
+ "Failed to enable Interrupt Remapping!\n");
+ break;
+ }
+ }
}
return 0;
u16 bdf;
int ret, i;
+ spin_lock(&pcidevs_lock);
for_each_rmrr_device ( rmrr, bdf, i )
{
ret = iommu_prepare_rmrr_dev(d, rmrr, PCI_BUS(bdf), PCI_DEVFN2(bdf));
gdprintk(XENLOG_ERR VTDPREFIX,
"IOMMU: mapping reserved region failed\n");
}
+ spin_unlock(&pcidevs_lock);
+}
+
+static void platform_quirks(void)
+{
+ u32 id;
+
+ /* Mobile 4 Series Chipset neglects to set RWBF capability. */
+ id = pci_conf_read32(0, 0, 0, 0);
+ if ( id == 0x2a408086 )
+ {
+ dprintk(XENLOG_INFO VTDPREFIX, "DMAR: Forcing write-buffer flush\n");
+ rwbf_quirk = 1;
+ }
}
int intel_vtd_setup(void)
if ( !vtd_enabled )
return -ENODEV;
+ platform_quirks();
+
spin_lock_init(&domid_bitmap_lock);
clflush_size = get_cache_line_size();
+ /* We enable the following features only if they are supported by all VT-d
+ * engines: Snoop Control, DMA passthrough, Queued Invalidation and
+ * Interrupt Remapping.
+ */
for_each_drhd_unit ( drhd )
+ {
if ( iommu_alloc(drhd) != 0 )
goto error;
+ iommu = drhd->iommu;
+
+ if ( iommu_snoop && !ecap_snp_ctl(iommu->ecap) )
+ iommu_snoop = 0;
+
+ if ( iommu_passthrough && !ecap_pass_thru(iommu->ecap) )
+ iommu_passthrough = 0;
+
+ if ( iommu_qinval && !ecap_queued_inval(iommu->ecap) )
+ iommu_qinval = 0;
+
+ if ( iommu_intremap && !ecap_intr_remap(iommu->ecap) )
+ iommu_intremap = 0;
+ }
+
+ if ( !iommu_qinval && iommu_intremap )
+ {
+ iommu_intremap = 0;
+ gdprintk(XENLOG_WARNING VTDPREFIX, "Interrupt Remapping disabled "
+ "since Queued Invalidation isn't supported or enabled.\n");
+ }
+
+#define P(p,s) printk("Intel VT-d %s %ssupported.\n", s, (p)? "" : "not ")
+ P(iommu_snoop, "Snoop Control");
+ P(iommu_passthrough, "DMA Passthrough");
+ P(iommu_qinval, "Queued Invalidation");
+ P(iommu_intremap, "Interrupt Remapping");
+#undef P
+
/* Allocate IO page directory page for the domain. */
drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
iommu = drhd->iommu;
for_each_drhd_unit ( drhd )
iommu_free(drhd);
vtd_enabled = 0;
+ iommu_snoop = 0;
+ iommu_passthrough = 0;
+ iommu_qinval = 0;
+ iommu_intremap = 0;
return -ENOMEM;
}
{
struct pci_dev *pdev;
- if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) )
+ spin_lock(&pcidevs_lock);
+ pdev = pci_get_pdev_by_domain(dom0, bus, devfn);
+ if (!pdev)
{
- spin_unlock(&pdev->lock);
- return 0;
+ spin_unlock(&pcidevs_lock);
+ return -1;
}
- return 1;
+ spin_unlock(&pcidevs_lock);
+ return 0;
}
int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn)
{
struct acpi_rmrr_unit *rmrr;
int ret = 0, i;
+ struct pci_dev *pdev;
u16 bdf;
if ( list_empty(&acpi_drhd_units) )
return -ENODEV;
+ ASSERT(spin_is_locked(&pcidevs_lock));
+ pdev = pci_get_pdev(bus, devfn);
+ if (!pdev)
+ return -ENODEV;
+
+ if (pdev->domain != dom0)
+ {
+ gdprintk(XENLOG_ERR VTDPREFIX,
+ "IOMMU: assign a assigned device\n");
+ return -EBUSY;
+ }
+
ret = reassign_device_ownership(dom0, d, bus, devfn);
if ( ret )
- return ret;
+ goto done;
/* Setup rmrr identity mapping */
for_each_rmrr_device( rmrr, bdf, i )
* ignore USB RMRR temporarily.
*/
if ( is_usb_device(bus, devfn) )
- return 0;
+ {
+ ret = 0;
+ goto done;
+ }
ret = iommu_prepare_rmrr_dev(d, rmrr, bus, devfn);
if ( ret )
gdprintk(XENLOG_ERR VTDPREFIX,
"IOMMU: mapping reserved region failed\n");
- return ret;
+ goto done;
}
}
+done:
return ret;
}
}
static u32 iommu_state[MAX_IOMMUS][MAX_IOMMU_REGS];
-int iommu_suspend(void)
+void iommu_suspend(void)
{
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
u32 i;
if ( !vtd_enabled )
- return 0;
+ return;
iommu_flush_all();
(u32) dmar_readl(iommu->reg, DMAR_FEADDR_REG);
iommu_state[i][DMAR_FEUADDR_REG] =
(u32) dmar_readl(iommu->reg, DMAR_FEUADDR_REG);
- }
- return 0;
+ iommu_disable_translation(iommu);
+
+ if ( iommu_intremap )
+ disable_intremap(iommu);
+
+ if ( iommu_qinval )
+ disable_qinval(iommu);
+ }
}
-int iommu_resume(void)
+void iommu_resume(void)
{
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
u32 i;
if ( !vtd_enabled )
- return 0;
+ return;
- iommu_flush_all();
+ /* Not sure whether the flush operation is required to meet iommu
+ * specification. Note that BIOS also executes in S3 resume and iommu may
+ * be touched again, so let us do the flush operation for safety.
+ */
+ flush_all_cache();
if ( init_vtd_hw() != 0 && force_iommu )
panic("IOMMU setup failed, crash Xen for security purpose!\n");
dmar_writel(iommu->reg, DMAR_FEUADDR_REG,
(u32) iommu_state[i][DMAR_FEUADDR_REG]);
- if ( iommu_enable_translation(iommu) )
- return -EIO;
+ iommu_enable_translation(iommu);
}
-
- return 0;
}
struct iommu_ops intel_iommu_ops = {
#define ecap_ext_intr(e) ((e >> 4) & 0x1)
#define ecap_cache_hints(e) ((e >> 5) & 0x1)
#define ecap_pass_thru(e) ((e >> 6) & 0x1)
+#define ecap_snp_ctl(e) ((e >> 7) & 0x1)
/* IOTLB_REG */
#define DMA_TLB_FLUSH_GRANU_OFFSET 60
};
#define DMA_PTE_READ (1)
#define DMA_PTE_WRITE (2)
+#define DMA_PTE_SNP (1 << 11)
#define dma_clear_pte(p) do {(p).val = 0;} while(0)
#define dma_set_pte_readable(p) do {(p).val |= DMA_PTE_READ;} while(0)
#define dma_set_pte_writable(p) do {(p).val |= DMA_PTE_WRITE;} while(0)
#define dma_set_pte_superpage(p) do {(p).val |= (1 << 7);} while(0)
+#define dma_set_pte_snp(p) do {(p).val |= DMA_PTE_SNP;} while(0)
#define dma_set_pte_prot(p, prot) \
do {(p).val = ((p).val & ~3) | ((prot) & 3); } while (0)
#define dma_pte_addr(p) ((p).val & PAGE_MASK_4K)
/* queue invalidation entry */
struct qinval_entry {
union {
+ struct {
+ u64 lo;
+ u64 hi;
+ }val;
struct {
struct {
u64 type : 4,
u32 udata;
};
-#define QINVAL_ENTRY_NR (PAGE_SIZE_4K/sizeof(struct qinval_entry))
+#define NUM_QINVAL_PAGES 1
+#define IQA_REG_QS 0 // derived from NUM_QINVAL_PAGES per VT-d spec.
+#define QINVAL_ENTRY_NR (PAGE_SIZE_4K*NUM_QINVAL_PAGES/sizeof(struct qinval_entry))
#define qinval_present(v) ((v).lo & 1)
#define qinval_fault_disable(v) (((v).lo >> 1) & 1)
u64 val;
val = dmar_readq(iommu->reg, DMAR_IQA_REG);
- printk("DMAR_IAQ_REG = %"PRIx64"\n", val);
+ printk("DMAR_IQA_REG = %"PRIx64"\n", val);
val = dmar_readq(iommu->reg, DMAR_IQH_REG);
- printk("DMAR_IAH_REG = %"PRIx64"\n", val);
+ printk("DMAR_IQH_REG = %"PRIx64"\n", val);
val = dmar_readq(iommu->reg, DMAR_IQT_REG);
- printk("DMAR_IAT_REG = %"PRIx64"\n", val);
+ printk("DMAR_IQT_REG = %"PRIx64"\n", val);
}
static int qinval_next_index(struct iommu *iommu)
qinval_entry->q.dev_iotlb_inv_dsc.lo.res_3 = 0;
qinval_entry->q.dev_iotlb_inv_dsc.hi.size = size;
- qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr;
+ qinval_entry->q.dev_iotlb_inv_dsc.hi.res_1 = 0;
+ qinval_entry->q.dev_iotlb_inv_dsc.hi.addr = addr >> PAGE_SHIFT_4K;
unmap_vtd_domain_page(qinval_entries);
spin_unlock_irqrestore(&qi_ctrl->qinval_lock, flags);
return 0;
}
-int queue_invalidate_device_iotlb(struct iommu *iommu,
+int qinval_device_iotlb(struct iommu *iommu,
u32 max_invs_pend, u16 sid, u16 size, u64 addr)
{
int ret = -1;
return ret;
}
-u64 iec_cap;
int __iommu_flush_iec(struct iommu *iommu, u8 granu, u8 im, u16 iidx)
{
int ret;
* reading vt-d architecture register will ensure
* draining happens in implementation independent way.
*/
- iec_cap = dmar_readq(iommu->reg, DMAR_CAP_REG);
+ (void)dmar_readq(iommu->reg, DMAR_CAP_REG);
return ret;
}
return ret;
}
-int qinval_setup(struct iommu *iommu)
+int enable_qinval(struct iommu *iommu)
{
s_time_t start_time;
struct qi_ctrl *qi_ctrl;
qi_ctrl = iommu_qi_ctrl(iommu);
flush = iommu_get_flush(iommu);
- if ( !ecap_queued_inval(iommu->ecap) )
- return -ENODEV;
+ ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
if ( qi_ctrl->qinval_maddr == 0 )
{
- qi_ctrl->qinval_maddr = alloc_pgtable_maddr();
+ qi_ctrl->qinval_maddr = alloc_pgtable_maddr(NULL, NUM_QINVAL_PAGES);
if ( qi_ctrl->qinval_maddr == 0 )
- panic("Cannot allocate memory for qi_ctrl->qinval_maddr\n");
+ {
+ dprintk(XENLOG_WARNING VTDPREFIX,
+ "Cannot allocate memory for qi_ctrl->qinval_maddr\n");
+ return -ENOMEM;
+ }
flush->context = flush_context_qi;
flush->iotlb = flush_iotlb_qi;
}
* registers are automatically reset to 0 with write
* to IQA register.
*/
+ qi_ctrl->qinval_maddr |= IQA_REG_QS;
dmar_writeq(iommu->reg, DMAR_IQA_REG, qi_ctrl->qinval_maddr);
+ dmar_writeq(iommu->reg, DMAR_IQT_REG, 0);
+
/* enable queued invalidation hardware */
iommu->gcmd |= DMA_GCMD_QIE;
dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
return 0;
}
+
+void disable_qinval(struct iommu *iommu)
+{
+ s_time_t start_time;
+
+ ASSERT(ecap_queued_inval(iommu->ecap) && iommu_qinval);
+
+ iommu->gcmd &= ~DMA_GCMD_QIE;
+ dmar_writel(iommu->reg, DMAR_GCMD_REG, iommu->gcmd);
+
+ /* Make sure hardware complete it */
+ start_time = NOW();
+ while ( dmar_readl(iommu->reg, DMAR_GSTS_REG) & DMA_GSTS_QIES )
+ {
+ if ( NOW() > (start_time + DMAR_OPERATION_TIMEOUT) )
+ panic("Cannot clear QIE field for queue invalidation\n");
+ cpu_relax();
+ }
+}
void dump_iommu_info(unsigned char key)
{
+#if defined(__i386__) || defined(__x86_64__)
struct acpi_drhd_unit *drhd;
struct iommu *iommu;
int i;
}
}
}
+#else
+ printk("%s: not implemnted on IA64 for now.\n", __func__);
+ /* ia64: TODO */
+#endif
}
/*
void cacheline_flush(char *);
void flush_all_cache(void);
void *map_to_nocache_virt(int nr_iommus, u64 maddr);
-u64 alloc_pgtable_maddr(void);
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages);
void free_pgtable_maddr(u64 maddr);
void *map_vtd_domain_page(u64 maddr);
void unmap_vtd_domain_page(void *va);
void iommu_flush_cache_entry(void *addr);
-void iommu_flush_cache_page(void *addr);
+void iommu_flush_cache_page(void *addr, unsigned long npages);
#endif // _VTD_H_
#include <xen/domain_page.h>
#include <asm/paging.h>
#include <xen/iommu.h>
+#include <xen/numa.h>
#include "../iommu.h"
#include "../dmar.h"
#include "../vtd.h"
+/*
+ * iommu_inclusive_mapping: when set, all memory below 4GB is included in dom0
+ * 1:1 iommu mappings except xen and unusable regions.
+ */
+static int iommu_inclusive_mapping;
+boolean_param("iommu_inclusive_mapping", iommu_inclusive_mapping);
+
void *map_vtd_domain_page(u64 maddr)
{
return map_domain_page(maddr >> PAGE_SHIFT_4K);
}
/* Allocate page table, return its machine address */
-u64 alloc_pgtable_maddr(void)
+u64 alloc_pgtable_maddr(struct domain *d, unsigned long npages)
{
struct page_info *pg;
u64 *vaddr;
+ unsigned long mfn;
- pg = alloc_domheap_page(NULL, 0);
- vaddr = map_domain_page(page_to_mfn(pg));
- if ( !vaddr )
+ pg = alloc_domheap_pages(NULL, get_order_from_pages(npages),
+ d ? MEMF_node(domain_to_node(d)) : 0);
+ if ( !pg )
return 0;
- memset(vaddr, 0, PAGE_SIZE);
+ mfn = page_to_mfn(pg);
+ vaddr = map_domain_page(mfn);
+ memset(vaddr, 0, PAGE_SIZE * npages);
- iommu_flush_cache_page(vaddr);
+ iommu_flush_cache_page(vaddr, npages);
unmap_domain_page(vaddr);
- return page_to_maddr(pg);
+ return (u64)mfn << PAGE_SHIFT_4K;
}
void free_pgtable_maddr(u64 maddr)
return;
}
/* Multiple mirq may be mapped to one isa irq */
- for ( i = find_first_bit(dpci->mapping, NR_PIRQS);
- i < NR_PIRQS;
- i = find_next_bit(dpci->mapping, NR_PIRQS, i + 1) )
+ for ( i = find_first_bit(dpci->mapping, NR_IRQS);
+ i < NR_IRQS;
+ i = find_next_bit(dpci->mapping, NR_IRQS, i + 1) )
{
list_for_each_entry_safe ( digl, tmp,
&dpci->mirq[i].digl_list, list )
}
spin_unlock(&d->event_lock);
}
+
+void iommu_set_dom0_mapping(struct domain *d)
+{
+ u64 i, j, tmp, max_pfn;
+ extern int xen_in_range(paddr_t start, paddr_t end);
+
+ BUG_ON(d->domain_id != 0);
+
+ max_pfn = max_t(u64, max_page, 0x100000000ull >> PAGE_SHIFT);
+
+ for ( i = 0; i < max_pfn; i++ )
+ {
+ /*
+ * Set up 1:1 mapping for dom0. Default to use only conventional RAM
+ * areas and let RMRRs include needed reserved regions. When set, the
+ * inclusive mapping maps in everything below 4GB except unusable
+ * ranges.
+ */
+ if ( !page_is_ram_type(i, RAM_TYPE_CONVENTIONAL) &&
+ (!iommu_inclusive_mapping ||
+ page_is_ram_type(i, RAM_TYPE_UNUSABLE)) )
+ continue;
+
+ /* Exclude Xen bits */
+ if ( xen_in_range(i << PAGE_SHIFT, (i + 1) << PAGE_SHIFT) )
+ continue;
+
+ tmp = 1 << (PAGE_SHIFT - PAGE_SHIFT_4K);
+ for ( j = 0; j < tmp; j++ )
+ iommu_map_page(d, (i*tmp+j), (i*tmp+j));
+ }
+}
xfree(text_buf);
}
-void __init vesa_endboot(void)
+void __init vesa_endboot(bool_t keep)
{
- xpos = 0;
- vga_puts = vesa_scroll_puts;
+ if ( keep )
+ {
+ xpos = 0;
+ vga_puts = vesa_scroll_puts;
+ }
+ else
+ {
+ unsigned int i, bpp = (vlfb_info.bits_per_pixel + 7) >> 3;
+ for ( i = 0; i < vlfb_info.height; i++ )
+ memset(lfb + i * vlfb_info.bytes_per_line, 0,
+ vlfb_info.width * bpp);
+ }
}
#if defined(CONFIG_X86)
#ifdef CONFIG_X86_64
void vesa_early_init(void);
-void vesa_endboot(void);
+void vesa_endboot(bool_t keep);
#else
#define vesa_early_init() ((void)0)
-#define vesa_endboot() ((void)0)
+#define vesa_endboot(x) ((void)0)
#endif
void __init vga_init(void)
switch ( vga_console_info.video_type )
{
case XEN_VGATYPE_TEXT_MODE_3:
- if ( memory_is_conventional_ram(0xB8000) ||
+ if ( page_is_ram_type(paddr_to_pfn(0xB8000), RAM_TYPE_CONVENTIONAL) ||
((video = ioremap(0xB8000, 0x8000)) == NULL) )
return;
outw(0x200a, 0x3d4); /* disable cursor */
printk("Xen is %s VGA console.\n",
vgacon_keep ? "keeping" : "relinquishing");
- vesa_endboot();
-
if ( !vgacon_keep )
vga_puts = vga_noop_puts;
+
+ switch ( vga_console_info.video_type )
+ {
+ case XEN_VGATYPE_TEXT_MODE_3:
+ if ( !vgacon_keep )
+ memset(video, 0, columns * lines * 2);
+ break;
+ case XEN_VGATYPE_VESA_LFB:
+ vesa_endboot(vgacon_keep);
+ break;
+ default:
+ BUG();
+ }
}
static void vga_text_puts(const char *s)
mv -f $@.new $@
compat/%.i: compat/%.c Makefile
- $(CPP) $(CFLAGS) $(cppflags-y) -o $@ $<
+ $(CPP) $(filter-out -M% .%.d,$(CFLAGS)) $(cppflags-y) -o $@ $<
compat/%.c: public/%.h xlat.lst Makefile
mkdir -p $(@D)
* published by the Free Software Foundation.
*/
+#ifndef __XEN_CPUFREQ_PM_H__
+#define __XEN_CPUFREQ_PM_H__
+
#include <xen/types.h>
#include <xen/list.h>
#include <xen/cpumask.h>
#include "processor_perf.h"
-#define CPUFREQ_NAME_LEN 16
+DECLARE_PER_CPU(spinlock_t, cpufreq_statistic_lock);
struct cpufreq_governor;
extern int __cpufreq_set_policy(struct cpufreq_policy *data,
struct cpufreq_policy *policy);
+void cpufreq_cmdline_parse(char *);
+
#define CPUFREQ_SHARED_TYPE_NONE (0) /* None */
#define CPUFREQ_SHARED_TYPE_HW (1) /* HW does needed coordination */
#define CPUFREQ_SHARED_TYPE_ALL (2) /* All dependent CPUs should set freq */
char name[CPUFREQ_NAME_LEN];
int (*governor)(struct cpufreq_policy *policy,
unsigned int event);
+ void (*handle_option)(const char *name, const char *value);
+ struct list_head governor_list;
};
+extern struct cpufreq_governor *cpufreq_opt_governor;
extern struct cpufreq_governor cpufreq_gov_dbs;
-#define CPUFREQ_DEFAULT_GOVERNOR &cpufreq_gov_dbs
+extern struct cpufreq_governor cpufreq_gov_userspace;
+extern struct cpufreq_governor cpufreq_gov_performance;
+extern struct cpufreq_governor cpufreq_gov_powersave;
+
+extern int cpufreq_register_governor(struct cpufreq_governor *governor);
+extern int cpufreq_unregister_governor(struct cpufreq_governor *governor);
+extern struct cpufreq_governor *__find_governor(const char *governor);
+#define CPUFREQ_DEFAULT_GOVERNOR &cpufreq_gov_userspace
/* pass a target to the cpufreq driver */
extern int __cpufreq_driver_target(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation);
-extern int __cpufreq_driver_getavg(struct cpufreq_policy *policy);
+
+#define GOV_GETAVG 1
+#define USR_GETAVG 2
+extern int cpufreq_driver_getavg(unsigned int cpu, unsigned int flag);
static __inline__ int
__cpufreq_governor(struct cpufreq_policy *policy, unsigned int event)
#define CPUFREQ_RELATION_H 1 /* highest frequency below or at target */
struct cpufreq_driver {
+ char name[CPUFREQ_NAME_LEN];
int (*init)(struct cpufreq_policy *policy);
int (*verify)(struct cpufreq_policy *policy);
int (*target)(struct cpufreq_policy *policy,
unsigned int target_freq,
unsigned int relation);
unsigned int (*get)(unsigned int cpu);
- unsigned int (*getavg)(unsigned int cpu);
+ unsigned int (*getavg)(unsigned int cpu, unsigned int flag);
int (*exit)(struct cpufreq_policy *policy);
};
};
int cpufreq_governor_dbs(struct cpufreq_policy *policy, unsigned int event);
+int get_cpufreq_ondemand_para(uint32_t *sampling_rate_max,
+ uint32_t *sampling_rate_min,
+ uint32_t *sampling_rate,
+ uint32_t *up_threshold);
+int write_ondemand_sampling_rate(unsigned int sampling_rate);
+int write_ondemand_up_threshold(unsigned int up_threshold);
+
+int write_userspace_scaling_setspeed(unsigned int cpu, unsigned int freq);
+#endif /* __XEN_CPUFREQ_PM_H__ */
int get_cpu_id(u8);
int powernow_cpufreq_init(void);
+void cpufreq_residency_update(unsigned int, uint8_t);
void cpufreq_statistic_update(unsigned int, uint8_t, uint8_t);
int cpufreq_statistic_init(unsigned int);
void cpufreq_statistic_exit(unsigned int);
int cpufreq_add_cpu(unsigned int);
int cpufreq_del_cpu(unsigned int);
-uint64_t get_cpu_idle_time(unsigned int);
-
struct processor_performance {
uint32_t state;
uint32_t platform_limit;
extern struct pm_px *cpufreq_statistic_data[NR_CPUS];
-int xenpf_copy_px_states(struct processor_performance *pxpt,
- struct xen_processor_performance *dom0_px_info);
-
int cpufreq_cpu_init(unsigned int cpuid);
#endif /* __XEN_PROCESSOR_PM_H__ */
#define BUG() __bug(__FILE__, __LINE__)
#define WARN() __warn(__FILE__, __LINE__)
-#define dump_execution_state() printk("FIXME: implement ia64 dump_execution_state()\n")
+#define dump_execution_state() \
+ do { \
+ printk("FIXME: implement ia64 dump_execution_state()\n"); \
+ dump_stack(); \
+ } while (0)
+
#define vcpu_show_execution_state(v) printk("FIXME: implement ia64 vcpu_show_execution_state()\n")
#endif /* __IA64_BUG_H__ */
struct { unsigned long qp:6, imm20:20, :1, x6:6, :3, i:1, major:4; };
} INST64_B9;
+typedef union U_INST64_I18 {
+ IA64_INST inst;
+ struct { unsigned long qp:6, imm20:20, y:1, x6:6, x3:3, i:1, major:4; };
+} INST64_I18;
+
typedef union U_INST64_I19 {
IA64_INST inst;
struct { unsigned long qp:6, imm20:20, :1, x6:6, x3:3, i:1, major:4; };
INST64_B4 B4; // used in build_hypercall_bundle only
INST64_B8 B8; // rfi, bsw.[01]
INST64_B9 B9; // break.b
+ INST64_I18 I18; // nop.i used in build_fpswa_hypercall_bundle only
INST64_I19 I19; // used in build_hypercall_bundle only
INST64_I26 I26; // mov register to ar (I unit)
INST64_I27 I27; // mov immediate to ar (I unit)
// FIXME?: x86-ism used in xen/mm.h
#define LOCK_PREFIX
-extern unsigned long xenheap_phys_end;
extern unsigned long total_pages;
extern unsigned long xen_pstart;
extern unsigned long xenheap_size;
// FIXME SMP: leave SMP for a later time
///////////////////////////////////////////////////////////////
// xen/include/asm/config.h
-// Natural boundary upon TR size to define xenheap space
-#define XENHEAP_DEFAULT_MB (1 << (KERNEL_TR_PAGE_SHIFT - 20))
-#define XENHEAP_DEFAULT_SIZE (1 << KERNEL_TR_PAGE_SHIFT)
#define ELFSIZE 64
///////////////////////////////////////////////////////////////
* Dan Magenheimer (dan.magenheimer@hp.com)
*/
+#define __IA64_XEN_HYPERCALL_DEFAULT 0x1000
+#define __IA64_XEN_HYPERCALL_DEFAULT_STR "0x1000"
+
/* Portion of guest physical memory space reserved for PAL/SAL/EFI/ACPI
data and code. */
#define FW_BASE_PADDR 0x0000UL
#define FW_HYPERCALL_PAL_CALL_INDEX 0x80UL
#define FW_HYPERCALL_PAL_CALL_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_PAL_CALL_INDEX)
#define FW_HYPERCALL_PAL_CALL 0x1000UL
+#define FW_HYPERCALL_PAL_CALL_ASM 0x1000
/*
* SAL consists of a table of descriptors, one of which (type=0)
/*
* This is a hypercall number for FPSWA.
- * FPSWA hypercall uses 2 bundles for a pseudo-entry-point and a hypercall-patch.
+ * FPSWA hypercall uses one bundle for a pseudo-entry-point
+ * and 14 bundles for a hypercall-patch.
+ *
+ * 0x500 was used before. But that implemetation is broken.
+ * To keep hypercall abi, 0x500 is obsoleted and allocate 0x501 for
+ * fspwa hypercall.
*/
#define FW_HYPERCALL_FPSWA_ENTRY_INDEX 0x90UL
#define FW_HYPERCALL_FPSWA_PATCH_INDEX 0x91UL
#define FW_HYPERCALL_FPSWA_ENTRY_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_FPSWA_ENTRY_INDEX)
#define FW_HYPERCALL_FPSWA_PATCH_PADDR FW_HYPERCALL_PADDR(FW_HYPERCALL_FPSWA_PATCH_INDEX)
-#define FW_HYPERCALL_FPSWA 0x500UL
+#define FW_HYPERCALL_FPSWA_BASE 0x500UL
+#define FW_HYPERCALL_FPSWA_BROKEN 0x500UL
+#define FW_HYPERCALL_FPSWA 0x501UL
+#define FW_HYPERCALL_FPSWA_STR "0x501"
/* Set the shared_info base virtual address. */
#define FW_HYPERCALL_SET_SHARED_INFO_VA 0x600UL
/* Additionnal OEM SAL. */
#define SAL_XEN_SAL_RETURN 0x02000000
-#ifdef __XEN__
+#if defined(__XEN__) && !defined(__ASSEMBLY__)
#include <linux/efi.h>
extern struct ia64_pal_retval xen_pal_emulator(u64, u64, u64, u64);
extern struct sal_ret_values sal_emulator (long index, unsigned long in1, unsigned long in2, unsigned long in3, unsigned long in4, unsigned long in5, unsigned long in6, unsigned long in7);
#define printk(fmt, args ...) xc_dom_printf(fmt, ## args)
#define BUG_ON(p) assert(!(p))
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2 * !!(condition)]))
+#define BUILD_BUG_ON(condition) ((void)sizeof(struct { int:-!!(condition); }))
//for sort in linux/sort.h.
#define sort(base, num, size, cmp, swap) qsort((base), (num), (size), (cmp))
#include <asm/vmx_platform.h>
#include <xen/list.h>
#include <xen/cpumask.h>
+#include <xen/mm.h>
#include <asm/fpswa.h>
#include <xen/rangeset.h>
extern void panic_domain(struct pt_regs *, const char *, ...)
__attribute__ ((noreturn, format (printf, 2, 3)));
+#define has_arch_pdevs(d) (!list_empty(&(d)->arch.pdev_list))
+
struct mm_struct {
volatile pgd_t * pgd;
// atomic_t mm_users; /* How many users with user space? */
unsigned char rid_bits; /* number of virtual rid bits (default: 18) */
int breakimm; /* The imm value for hypercalls. */
+ struct list_head pdev_list;
struct virtual_platform_def vmx_platform;
#define hvm_domain vmx_platform /* platform defs are not vmx specific */
/* Address of SAL emulator data */
struct xen_sal_data *sal_data;
+ /* Shared page for notifying that explicit PIRQ EOI is required. */
+ unsigned long *pirq_eoi_map;
+ unsigned long pirq_eoi_map_mfn;
+
/* Address of efi_runtime_services_t (placed in domain memory) */
void *efi_runtime;
/* Address of fpswa_interface_t (placed in domain memory) */
/* Continuable mm_teardown() */
unsigned long mm_teardown_offset;
/* Continuable domain_relinquish_resources() */
- struct list_head relmem_list;
+ struct page_list_head relmem_list;
};
#define INT_ENABLE_OFFSET(v) \
(sizeof(vcpu_info_t) * (v)->vcpu_id + \
char irq_new_condition; // vpsr.i/vtpr change, check for pending VHPI
char hypercall_continuation;
- fpswa_ret_t fpswa_ret; /* save return values of FPSWA emulation */
+ fpswa_ret_t fpswa_ret; /* save return values of FPSWA emulation */
struct timer hlt_timer;
struct arch_vmx_struct arch_vmx; /* Virtual Machine Extensions */
unsigned long iim, unsigned long itir, unsigned long arg5,
unsigned long arg6, unsigned long arg7, unsigned long stack);
+void
+ia64_lazy_load_fpu(struct vcpu *vcpu);
+
+int construct_dom0(
+ struct domain *d,
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline);
+
#endif /* __ASM_DOMAIN_H__ */
/*
#define __ARCH_IRQ_STAT 1
#define HARDIRQ_BITS 14
#include <linux/hardirq.h>
+#include <xen/sched.h>
#define local_softirq_pending() (local_cpu_data->softirq_pending)
--- /dev/null
+#ifndef __ASM_IA64_HVM_IOMMU_H__
+#define __ASM_IA64_HVM_IOMMU_H__
+
+#include <asm/hvm/irq.h>
+#include <public/event_channel.h>
+#include <public/arch-ia64/hvm/save.h>
+#include <asm/hw_irq.h>
+#include <asm/iosapic.h>
+
+struct iommu_ops;
+extern struct iommu_ops intel_iommu_ops;
+extern int intel_vtd_setup(void);
+
+#define iommu_get_ops() (&intel_iommu_ops)
+#define iommu_hardware_setup() (intel_vtd_setup())
+
+static inline int domain_irq_to_vector(struct domain *d, int irq)
+{
+ return irq;
+}
+
+static inline void ack_APIC_irq(void)
+{
+ /* TODO */
+}
+
+static inline void pci_cleanup_msi(struct pci_dev *pdev)
+{
+ /* TODO */
+}
+
+
+extern int assign_irq_vector (int irq);
+
+#endif /* __ASM_IA64_HVM_IOMMU_H__ */
--- /dev/null
+/******************************************************************************
+ * irq.h
+ *
+ * Interrupt distribution and delivery logic.
+ *
+ * Copyright (c) 2006, K A Fraser, XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __ASM_IA64_HVM_IRQ_H__
+#define __ASM_IA64_HVM_IRQ_H__
+
+#include <xen/irq.h>
+
+#define VIOAPIC_NUM_PINS 48
+
+#include <xen/hvm/irq.h>
+
+struct hvm_hw_pci_irqs {
+ /*
+ * Virtual interrupt wires for a single PCI bus.
+ * Indexed by: device*4 + INTx#.
+ */
+ union {
+ DECLARE_BITMAP(i, 32*4);
+ uint64_t pad[2];
+ };
+};
+
+struct hvm_irq {
+ /*
+ * Virtual interrupt wires for a single PCI bus.
+ * Indexed by: device*4 + INTx#.
+ */
+ struct hvm_hw_pci_irqs pci_intx;
+
+ /* Virtual interrupt and via-link for paravirtual platform driver. */
+ uint32_t callback_via_asserted;
+ union {
+ enum {
+ HVMIRQ_callback_none,
+ HVMIRQ_callback_gsi,
+ HVMIRQ_callback_pci_intx
+ } callback_via_type;
+ };
+ union {
+ uint32_t gsi;
+ struct { uint8_t dev, intx; } pci;
+ } callback_via;
+
+ /*
+ * Number of wires asserting each GSI.
+ *
+ * GSIs 0-15 are the ISA IRQs. ISA devices map directly into this space
+ * except ISA IRQ 0, which is connected to GSI 2.
+ * PCI links map into this space via the PCI-ISA bridge.
+ *
+ * GSIs 16+ are used only be PCI devices. The mapping from PCI device to
+ * GSI is as follows: ((device*4 + device/8 + INTx#) & 31) + 16
+ */
+ u8 gsi_assert_count[VIOAPIC_NUM_PINS];
+
+ /*
+ * GSIs map onto PIC/IO-APIC in the usual way:
+ * 0-7: Master 8259 PIC, IO-APIC pins 0-7
+ * 8-15: Slave 8259 PIC, IO-APIC pins 8-15
+ * 16+ : IO-APIC pins 16+
+ */
+
+ /* Last VCPU that was delivered a LowestPrio interrupt. */
+ u8 round_robin_prev_vcpu;
+
+ struct hvm_irq_dpci *dpci;
+};
+
+#define hvm_pci_intx_gsi(dev, intx) \
+ (((((dev)<<2) + ((dev)>>3) + (intx)) & 31) + 16)
+#define hvm_pci_intx_link(dev, intx) \
+ (((dev) + (intx)) & 3)
+
+#define IA64_INVALID_VECTOR ((unsigned int)((int)-1))
+static inline unsigned int irq_to_vector(int irq)
+{
+ int acpi_gsi_to_irq (u32 gsi, unsigned int *irq);
+ unsigned int vector;
+
+ if ( acpi_gsi_to_irq(irq, &vector) < 0)
+ return 0;
+
+ return vector;
+}
+
+extern u8 irq_vector[NR_IRQS];
+extern int vector_irq[NR_VECTORS];
+
+#endif /* __ASM_IA64_HVM_IRQ_H__ */
#ifndef __IA64_IOCAP_H__
#define __IA64_IOCAP_H__
-extern int ioports_permit_access(struct domain *d,
+extern int ioports_permit_access(struct domain *d, unsigned int gs,
unsigned int s, unsigned int e);
extern int ioports_deny_access(struct domain *d,
unsigned int s, unsigned int e);
gcc_intrin.h -> linux/include/asm-ia64/gcc_intrin.h
ia64regs.h -> linux/include/asm-ia64/ia64regs.h
io.h -> linux/include/asm-ia64/io.h
+hw_irq.h -> linux/include/asm-ia64/hw_irq.h
kregs.h -> linux/include/asm-ia64/kregs.h
mca_asm.h -> linux/include/asm-ia64/mca_asm.h
meminit.h -> linux/include/asm-ia64/meminit.h
#include <asm/numa.h>
#ifdef XEN
#include <xen/nodemask.h>
+extern int acpi_dmar_init(void);
#endif
#define COMPILER_DEPENDENT_INT64 long
--- /dev/null
+#ifndef _ASM_IA64_HW_IRQ_H
+#define _ASM_IA64_HW_IRQ_H
+
+/*
+ * Copyright (C) 2001-2003 Hewlett-Packard Co
+ * David Mosberger-Tang <davidm@hpl.hp.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/profile.h>
+
+#include <asm/machvec.h>
+#include <asm/ptrace.h>
+#include <asm/smp.h>
+
+typedef u8 ia64_vector;
+
+/*
+ * 0 special
+ *
+ * 1,3-14 are reserved from firmware
+ *
+ * 16-255 (vectored external interrupts) are available
+ *
+ * 15 spurious interrupt (see IVR)
+ *
+ * 16 lowest priority, 255 highest priority
+ *
+ * 15 classes of 16 interrupts each.
+ */
+#define IA64_MIN_VECTORED_IRQ 16
+#define IA64_MAX_VECTORED_IRQ 255
+#define IA64_NUM_VECTORS 256
+
+#define AUTO_ASSIGN_IRQ (-1)
+
+#define IA64_SPURIOUS_INT_VECTOR 0x0f
+
+/*
+ * Vectors 0x10-0x1f are used for low priority interrupts, e.g. CMCI.
+ */
+#define IA64_CPEP_VECTOR 0x1c /* corrected platform error polling vector */
+#define IA64_CMCP_VECTOR 0x1d /* corrected machine-check polling vector */
+#define IA64_CPE_VECTOR 0x1e /* corrected platform error interrupt vector */
+#define IA64_CMC_VECTOR 0x1f /* corrected machine-check interrupt vector */
+/*
+ * Vectors 0x20-0x2f are reserved for legacy ISA IRQs.
+ */
+#define IA64_FIRST_DEVICE_VECTOR 0x30
+#define IA64_LAST_DEVICE_VECTOR 0xe7
+#define IA64_NUM_DEVICE_VECTORS (IA64_LAST_DEVICE_VECTOR - IA64_FIRST_DEVICE_VECTOR + 1)
+
+#define IA64_MCA_RENDEZ_VECTOR 0xe8 /* MCA rendez interrupt */
+#define IA64_PERFMON_VECTOR 0xee /* performanc monitor interrupt vector */
+#define IA64_TIMER_VECTOR 0xef /* use highest-prio group 15 interrupt for timer */
+#define IA64_MCA_WAKEUP_VECTOR 0xf0 /* MCA wakeup (must be >MCA_RENDEZ_VECTOR) */
+#define IA64_IPI_RESCHEDULE 0xfd /* SMP reschedule */
+#define IA64_IPI_VECTOR 0xfe /* inter-processor interrupt vector */
+
+/* Used for encoding redirected irqs */
+
+#define IA64_IRQ_REDIRECTED (1 << 31)
+
+/* IA64 inter-cpu interrupt related definitions */
+
+#define IA64_IPI_DEFAULT_BASE_ADDR 0xfee00000
+
+/* Delivery modes for inter-cpu interrupts */
+enum {
+ IA64_IPI_DM_INT = 0x0, /* pend an external interrupt */
+ IA64_IPI_DM_PMI = 0x2, /* pend a PMI */
+ IA64_IPI_DM_NMI = 0x4, /* pend an NMI (vector 2) */
+ IA64_IPI_DM_INIT = 0x5, /* pend an INIT interrupt */
+ IA64_IPI_DM_EXTINT = 0x7, /* pend an 8259-compatible interrupt. */
+};
+
+extern __u8 isa_irq_to_vector_map[16];
+#define isa_irq_to_vector(x) isa_irq_to_vector_map[(x)]
+
+extern struct hw_interrupt_type irq_type_ia64_lsapic; /* CPU-internal interrupt controller */
+
+extern int assign_irq_vector (int irq); /* allocate a free vector */
+extern void free_irq_vector (int vector);
+extern void ia64_send_ipi (int cpu, int vector, int delivery_mode, int redirect);
+extern void register_percpu_irq (ia64_vector vec, struct irqaction *action);
+#ifdef XEN
+extern int xen_do_IRQ(ia64_vector vector);
+extern int setup_vector(unsigned int vec, struct irqaction *action);
+#endif
+
+static inline void
+hw_resend_irq (struct hw_interrupt_type *h, unsigned int vector)
+{
+ platform_send_ipi(smp_processor_id(), vector, IA64_IPI_DM_INT, 0);
+}
+
+/*
+ * Default implementations for the irq-descriptor API:
+ */
+
+extern irq_desc_t irq_desc[NR_IRQS];
+
+#ifndef CONFIG_IA64_GENERIC
+static inline unsigned int
+__ia64_local_vector_to_irq (ia64_vector vec)
+{
+ return (unsigned int) vec;
+}
+#endif
+
+/*
+ * Next follows the irq descriptor interface. On IA-64, each CPU supports 256 interrupt
+ * vectors. On smaller systems, there is a one-to-one correspondence between interrupt
+ * vectors and the Linux irq numbers. However, larger systems may have multiple interrupt
+ * domains meaning that the translation from vector number to irq number depends on the
+ * interrupt domain that a CPU belongs to. This API abstracts such platform-dependent
+ * differences and provides a uniform means to translate between vector and irq numbers
+ * and to obtain the irq descriptor for a given irq number.
+ */
+
+/* Return a pointer to the irq descriptor for IRQ. */
+static inline irq_desc_t *
+irq_descp (int irq)
+{
+ return irq_desc + irq;
+}
+
+/*
+ * Convert the local IA-64 vector to the corresponding irq number. This translation is
+ * done in the context of the interrupt domain that the currently executing CPU belongs
+ * to.
+ */
+static inline unsigned int
+local_vector_to_irq (ia64_vector vec)
+{
+ return platform_local_vector_to_irq(vec);
+}
+
+#endif /* _ASM_IA64_HW_IRQ_H */
static inline unsigned int iosapic_read(char __iomem *iosapic, unsigned int reg)
{
+#ifdef XEN
+ if(iommu_enabled && (reg >= 10)){
+ int apic = find_iosapic_by_addr((unsigned long)iosapic);
+ return io_apic_read_remap_rte(apic, reg);
+ }
+#endif
writel(reg, iosapic + IOSAPIC_REG_SELECT);
return readl(iosapic + IOSAPIC_WINDOW);
}
static inline void iosapic_write(char __iomem *iosapic, unsigned int reg, u32 val)
{
+#ifdef XEN
+ if (iommu_enabled && (reg >= 10)){
+ int apic = find_iosapic_by_addr((unsigned long)iosapic);
+ iommu_update_ire_from_apic(apic, reg, val);
+ return;
+ }
+#endif
writel(reg, iosapic + IOSAPIC_REG_SELECT);
writel(val, iosapic + IOSAPIC_WINDOW);
}
extern unsigned long ia64_xen_vector[];
#endif /* XEN */
+#define IO_APIC_BASE(idx) ((unsigned int *)iosapic_lists[idx].addr)
+#define IO_APIC_ID(idx) (iosapic_lists[idx].id)
+
# endif /* !__ASSEMBLY__ */
#endif /* __ASM_IA64_IOSAPIC_H */
#define virt_addr_valid(kaddr) mfn_valid(__pa(kaddr) >> PAGE_SHIFT)
+#ifndef XEN
#ifdef CONFIG_VIRTUAL_MEM_MAP
extern int ia64_mfn_valid (unsigned long pfn);
#else
#define page_to_maddr(page) (page_to_mfn(page) << PAGE_SHIFT)
#define virt_to_page(kaddr) mfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
+#endif
typedef union ia64_va {
struct {
#include <asm/system.h>
#include <asm/types.h>
#ifdef XEN
+#include <asm/xenpage.h>
#ifndef __ASSEMBLY__
#include <xen/sched.h> /* needed for mm_struct (via asm/domain.h) */
#endif
#endif
+#ifndef XEN
#define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits (architected) */
+#endif
/*
* First, define the various bits in a PTE. Note that the PTE format
#define local_cpu_data (&__ia64_per_cpu_var(cpu_info))
#define cpu_data(cpu) (&per_cpu(cpu_info, cpu))
+#ifdef CONFIG_SMP
+#define cpu_to_core(cpu) (cpu_data(cpu)->core_id)
+#define cpu_to_socket(cpu) (cpu_data(cpu)->socket_id)
+#else
+#define cpu_to_core(cpu) 0
+#define cpu_to_socket(cpu) 0
+#endif
+
extern void identify_cpu (struct cpuinfo_ia64 *);
extern void print_cpu_info (struct cpuinfo_ia64 *);
extern int smp_call_function_single (int cpuid, void (*func) (void *info), void *info,
int retry, int wait);
extern void smp_send_reschedule (int cpu);
+#ifdef XEN
+extern void lock_ipi_calllock(unsigned long *flags);
+extern void unlock_ipi_calllock(unsigned long flags);
+#else
extern void lock_ipi_calllock(void);
extern void unlock_ipi_calllock(void);
+#endif
extern void identify_siblings (struct cpuinfo_ia64 *);
#else
#ifdef DEBUG_SPINLOCK
void *locker;
#endif
-#ifdef XEN
- unsigned char recurse_cpu;
- unsigned char recurse_cnt;
-#endif
-} spinlock_t;
+} raw_spinlock_t;
#ifdef XEN
#ifdef DEBUG_SPINLOCK
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 0, NULL, -1, 0 }
+#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 0, NULL }
#else
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 0, -1, 0 }
+#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 0 }
#endif
-static inline void spin_lock_init(spinlock_t *lock)
-{
- *lock = ((spinlock_t)SPIN_LOCK_UNLOCKED);
-}
#else
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 0 }
-#define spin_lock_init(x) ((x)->lock = 0)
+#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 0 }
#endif
#ifdef ASM_SUPPORTED
#define IA64_SPINLOCK_CLOBBERS "ar.ccv", "ar.pfs", "p14", "p15", "r27", "r28", "r29", "r30", "b6", "memory"
static inline void
-_raw_spin_lock_flags (spinlock_t *lock, unsigned long flags)
+_raw_spin_lock_flags (raw_spinlock_t *lock, unsigned long flags)
{
register volatile unsigned int *ptr asm ("r31") = &lock->lock;
} while (0)
#endif /* !ASM_SUPPORTED */
-#define spin_is_locked(x) ((x)->lock != 0)
-#define _raw_spin_unlock(x) do { barrier(); ((spinlock_t *) x)->lock = 0; } while (0)
+#define _raw_spin_is_locked(x) ((x)->lock != 0)
+#define _raw_spin_unlock(x) do { barrier(); (x)->lock = 0; } while (0)
#define _raw_spin_trylock(x) (cmpxchg_acq(&(x)->lock, 0, 1) == 0)
-#define spin_unlock_wait(x) do { barrier(); } while ((x)->lock)
typedef struct {
volatile unsigned int read_counter : 31;
#ifdef CONFIG_PREEMPT
unsigned int break_lock;
#endif
-} rwlock_t;
-#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { 0, 0 }
-
-#define rwlock_init(x) do { *(x) = (rwlock_t) RW_LOCK_UNLOCKED; } while(0)
-#define read_can_lock(rw) (*(volatile int *)(rw) >= 0)
-#define write_can_lock(rw) (*(volatile int *)(rw) == 0)
+} raw_rwlock_t;
+#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { 0, 0 }
#define _raw_read_lock(rw) \
do { \
- rwlock_t *__read_lock_ptr = (rw); \
+ raw_rwlock_t *__read_lock_ptr = (rw); \
\
while (unlikely(ia64_fetchadd(1, (int *) __read_lock_ptr, acq) < 0)) { \
ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \
#define _raw_read_unlock(rw) \
do { \
- rwlock_t *__read_lock_ptr = (rw); \
+ raw_rwlock_t *__read_lock_ptr = (rw); \
ia64_fetchadd(-1, (int *) __read_lock_ptr, rel); \
} while (0)
clear_bit(31, (x)); \
})
-#ifdef XEN
-#include <asm/xenspinlock.h>
-#endif
+#define _raw_rw_is_locked(x) (*(int *)(x) != 0)
+
#endif /* _ASM_IA64_SPINLOCK_H */
#include <asm/page.h>
#include <asm/system.h>
+#ifdef XEN
+#include <asm/pgtable.h>
+#endif
#define EFI_SUCCESS 0
#define EFI_LOAD_ERROR ( 1 | (1UL << (BITS_PER_LONG-1)))
};
extern irqreturn_t no_action(int cpl, void *dev_id, struct pt_regs *regs);
-extern int request_irq(unsigned int,
+extern int request_irq_vector(unsigned int,
irqreturn_t (*handler)(int, void *, struct pt_regs *),
unsigned long, const char *, void *);
-extern void free_irq(unsigned int, void *);
+extern void release_irq_vector(unsigned int, void *);
#endif
* 7:3 = slot
* 2:0 = function
*/
+
+#ifndef XEN
#define PCI_DEVFN(slot,func) ((((slot) & 0x1f) << 3) | ((func) & 0x07))
#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
#define PCI_FUNC(devfn) ((devfn) & 0x07)
+#endif
/* Ioctls for /proc/bus/pci/X/Y nodes. */
#define PCIIOC_BASE ('P' << 24 | 'C' << 16 | 'I' << 8)
/*
* The pci_dev structure is used to describe PCI devices.
*/
+#ifdef XEN
+struct sn_pci_dev {
+#else
struct pci_dev {
+#endif
struct list_head global_list; /* node in list of all PCI devices */
struct list_head bus_list; /* node in per-bus list */
struct pci_bus *bus; /* bus this device is on */
struct bin_attribute *res_attr[DEVICE_COUNT_RESOURCE]; /* sysfs file for resources */
};
+#ifndef XEN
#define pci_dev_g(n) list_entry(n, struct pci_dev, global_list)
#define pci_dev_b(n) list_entry(n, struct pci_dev, bus_list)
#define to_pci_dev(n) container_of(n, struct pci_dev, dev)
{
hlist_del(&cap->next);
}
+#endif
/*
* For PCI devices, the region numbers are assigned this way:
struct pci_bus *parent; /* parent bus this bridge is on */
struct list_head children; /* list of child buses */
struct list_head devices; /* list of devices on this bus */
+#ifdef XEN
+ struct sn_pci_dev *self; /* bridge device as seen by parent */
+#else
struct pci_dev *self; /* bridge device as seen by parent */
+#endif
struct resource *resource[PCI_BUS_NUM_RESOURCES];
/* address space routed to this bus */
};
/* ---------------------------------------------------------------- */
-
+#ifndef XEN
struct module;
struct pci_driver {
struct list_head node;
#endif /* CONFIG_PCI */
/* Include architecture-dependent settings and functions */
+#endif
#include <asm/pci.h>
+#ifndef XEN
/* these helpers provide future and backwards compatibility
* for accessing popular PCI BAR info */
#define pci_resource_start(dev,bar) ((dev)->resource[(bar)].start)
void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
extern int pci_pci_problems;
+#endif
#define PCIPCI_FAIL 1 /* No PCI PCI DMA */
#define PCIPCI_TRITON 2
#define PCIPCI_NATOMA 4
fpswa.h -> linux/include/asm-ia64/fpswa.h
fpu.h -> linux/include/asm-ia64/fpu.h
hdreg.h -> linux/include/asm-ia64/hdreg.h
-hw_irq.h -> linux/include/asm-ia64/hw_irq.h
intrinsics.h -> linux/include/asm-ia64/intrinsics.h
ioctl.h -> linux/include/asm-ia64/ioctl.h
irq.h -> linux/include/asm-ia64/irq.h
* 02/29/00 D.Mosberger moved most things into hw_irq.h
*/
+#define NR_VECTORS 256
#define NR_IRQS 256
-#define NR_IRQ_VECTORS NR_IRQS
static __inline__ int
irq_canonicalize (int irq)
#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */
#define PCI_PM_CTRL 4 /* PM control and status register */
#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */
-#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */
+#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */
#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */
#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */
#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */
#include <xen/list.h>
#include <xen/spinlock.h>
#include <xen/perfc.h>
-#include <xen/sched.h>
#include <asm/processor.h>
#include <asm/atomic.h>
#define PRtype_info "016lx"
+#ifdef CONFIG_IA64_SHRINK_PAGE_LIST
+/*
+ * See include/xen/mm.h.
+ * To compress page_list_entry, all the physical address must
+ * be addressed by (32 + PAGE_SHIFT) .
+ * However this is lower than IA64_MAX_PHYS_BITS = 50.
+ */
+#undef page_list_entry
+struct page_list_entry
+{
+ u32 next, prev;
+};
+#endif
+
+#ifdef CONFIG_IA64_PICKLE_DOMAIN
+typedef u32 __ia64_domain_t;
+#else
+typedef unsigned long __ia64_domain_t;
+#endif
+
struct page_info
{
/* Each frame can be threaded onto a doubly-linked list. */
- struct list_head list;
+ struct page_list_entry list;
/* Reference count and various PGC_xxx flags and fields. */
- u32 count_info;
+ unsigned long count_info;
/* Context-dependent fields follow... */
union {
/* Page is in use: ((count_info & PGC_count_mask) != 0). */
struct {
- /* Owner of this page (NULL if page is anonymous). */
- u32 _domain; /* pickled format */
/* Type reference count and various PGT_xxx flags and fields. */
unsigned long type_info;
- } __attribute__ ((packed)) inuse;
+ /* Owner of this page (NULL if page is anonymous). */
+ __ia64_domain_t _domain; /* pickled format */
+ } inuse;
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
/* Order-size of the free chunk this page is the head of. */
u32 order;
- /* Mask of possibly-tainted TLBs. */
- cpumask_t cpumask;
- } __attribute__ ((packed)) free;
+ /* Do TLBs need flushing for safety before next page use? */
+ bool_t need_tlbflush;
+ } free;
} u;
/* Timestamp from 'TLB clock', used to reduce need for safety flushes. */
u32 tlbflush_timestamp;
+};
-#if 0
-// following added for Linux compiling
- page_flags_t flags;
- atomic_t _count;
- struct list_head lru; // is this the same as above "list"?
+#ifndef page_list_entry
+static inline void
+page_list_splice_init(struct page_list_head *list, struct page_list_head *head)
+{
+ if ( !page_list_empty(list) )
+ {
+ if ( head->next )
+ head->tail->list.next = page_to_mfn(list->next);
+ else
+ head->next = list->next;
+ head->tail = list->tail;
+ INIT_PAGE_LIST_HEAD(list);
+ }
+}
+#else
+# define page_list_splice_init list_splice_init
#endif
-};
#define set_page_count(p,v) atomic_set(&(p)->_count, v - 1)
* Still small set of flags defined by far on IA-64.
* IA-64 should make it a definition same as x86_64.
*/
+#define PG_shift(idx) (BITS_PER_LONG - (idx))
+#define PG_mask(x, idx) (x ## UL << PG_shift(idx))
+
/* The following page types are MUTUALLY EXCLUSIVE. */
-#define PGT_none (0UL<<29) /* no special uses of this page */
-#define PGT_l1_page_table (1UL<<29) /* using this page as an L1 page table? */
-#define PGT_l2_page_table (2UL<<29) /* using this page as an L2 page table? */
-#define PGT_l3_page_table (3UL<<29) /* using this page as an L3 page table? */
-#define PGT_l4_page_table (4UL<<29) /* using this page as an L4 page table? */
+#define PGT_none PG_mask(0, 3) /* no special uses of this page */
+#define PGT_l1_page_table PG_mask(1, 3) /* using as an L1 page table? */
+#define PGT_l2_page_table PG_mask(2, 3) /* using as an L2 page table? */
+#define PGT_l3_page_table PG_mask(3, 3) /* using as an L3 page table? */
+#define PGT_l4_page_table PG_mask(4, 3) /* using as an L4 page table? */
/* Value 5 reserved. See asm-x86/mm.h */
/* Value 6 reserved. See asm-x86/mm.h */
-#define PGT_writable_page (7UL<<29) /* has writable mappings of this page? */
-#define PGT_type_mask (7UL<<29) /* Bits 29-31. */
+#define PGT_writable_page PG_mask(7, 3) /* has writable mappings? */
+#define PGT_type_mask PG_mask(7, 3) /* Bits 29-31. */
- /* Has this page been validated for use as its current type? */
-#define _PGT_validated 28
-#define PGT_validated (1UL<<_PGT_validated)
/* Owning guest has pinned this page to its current type? */
-#define _PGT_pinned 27
-#define PGT_pinned (1UL<<_PGT_pinned)
+#define _PGT_pinned PG_shift(4)
+#define PGT_pinned PG_mask(1, 4)
+ /* Has this page been validated for use as its current type? */
+#define _PGT_validated PG_shift(5)
+#define PGT_validated PG_mask(1, 5)
- /* 16-bit count of uses of this frame as its current type. */
-#define PGT_count_mask ((1UL<<16)-1)
+ /* Count of uses of this frame as its current type. */
+#define PGT_count_width PG_shift(7)
+#define PGT_count_mask ((1UL<<PGT_count_width)-1)
/* Cleared when the owning guest 'frees' this page. */
-#define _PGC_allocated 31
-#define PGC_allocated (1UL<<_PGC_allocated)
- /* Bit 30 reserved. See asm-x86/mm.h */
- /* Bit 29 reserved. See asm-x86/mm.h */
- /* 29-bit count of references to this frame. */
-#define PGC_count_mask ((1UL<<29)-1)
-
-#define is_xen_heap_mfn(mfn) (((mfn) < paddr_to_pfn(xenheap_phys_end)) \
- && ((mfn) >= paddr_to_pfn(xen_pstart)))
-#define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page))
-
-extern void* xen_pickle_offset;
-#define __pickle(a) ((unsigned long)a - (unsigned long)xen_pickle_offset)
-#define __unpickle(a) (void *)(a + xen_pickle_offset)
-
-static inline struct domain *unpickle_domptr(u64 _d)
-{ return (_d == 0) ? NULL : __unpickle(_d); }
-static inline u32 pickle_domptr(struct domain *_d)
-{ return (_d == NULL) ? 0 : (u32)__pickle(_d); }
-
-#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
-#define page_set_owner(_p, _d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+#define _PGC_allocated PG_shift(1)
+#define PGC_allocated PG_mask(1, 1)
+ /* Page is Xen heap? */
+# define _PGC_xen_heap PG_shift(2)
+# define PGC_xen_heap PG_mask(1, 2)
+ /* bit PG_shift(3) reserved. See asm-x86/mm.h */
+ /* PG_mask(7, 6) reserved. See asm-x86/mm.h*/
+
+ /* Page is broken? */
+#define _PGC_broken PG_shift(7)
+#define PGC_broken PG_mask(1, 7)
+ /* Page is offline pending ? */
+#define _PGC_offlining PG_shift(8)
+#define PGC_offlining PG_mask(1, 8)
+ /* Page is offlined */
+#define _PGC_offlined PG_shift(9)
+#define PGC_offlined PG_mask(1, 9)
+#define PGC_offlined_broken (PGC_offlined | PGC_broken)
+
+#define is_page_offlining(page) ((page)->count_info & PGC_offlining)
+#define is_page_offlined(page) ((page)->count_info & PGC_offlined)
+#define is_page_broken(page) ((page)->count_info & PGC_broken)
+#define is_page_online(page) (!is_page_offlined(page))
+
+ /* Count of references to this frame. */
+#define PGC_count_width PG_shift(9)
+#define PGC_count_mask ((1UL<<PGC_count_width)-1)
+
+extern unsigned long xen_fixed_mfn_start;
+extern unsigned long xen_fixed_mfn_end;
+#define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
+#define is_xen_heap_mfn(mfn) (mfn_valid(mfn) && \
+ is_xen_heap_page(mfn_to_page(mfn)))
+#define is_xen_fixed_mfn(mfn) \
+ (xen_fixed_mfn_start <= (mfn) && (mfn) <= xen_fixed_mfn_end)
+
+#ifdef CONFIG_IA64_PICKLE_DOMAIN
+#define page_get_owner(_p) \
+ ((struct domain *)((_p)->v.inuse._domain ? \
+ mfn_to_virt((_p)->v.inuse._domain) : NULL))
+#define page_set_owner(_p,_d) \
+ ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
+#else
+#define page_get_owner(_p) ((struct domain *)(_p)->u.inuse._domain)
+#define page_set_owner(_p, _d) ((_p)->u.inuse._domain = (unsigned long)(_d))
+#endif
#define XENSHARE_writable 0
#define XENSHARE_readonly 1
static inline void put_page(struct page_info *page)
{
- u32 nx, x, y = page->count_info;
+ unsigned long nx, x, y = page->count_info;
do {
- x = y;
- nx = x - 1;
+ x = y;
+ nx = x - 1;
}
while (unlikely((y = cmpxchg_rel(&page->count_info, x, nx)) != x));
if (unlikely((nx & PGC_count_mask) == 0))
- free_domheap_page(page);
+ free_domheap_page(page);
+}
+
+static inline struct domain *page_get_owner_and_reference(
+ struct page_info *page)
+{
+ unsigned long x, y = page->count_info;
+
+ do {
+ x = y;
+ /*
+ * Count == 0: Page is not allocated, so we cannot take a reference.
+ * Count == -1: Reference count would wrap, which is invalid.
+ * Count == -2: Remaining unused ref is reserved for get_page_light().
+ */
+ /*
+ * On ia64, get_page_light() isn't defined so that it doesn't
+ * make sense to take care of Count == -2.
+ * Just for consistency with x86.
+ */
+ if ( unlikely(((x + 2) & PGC_count_mask) <= 2) )
+ return NULL;
+ y = cmpxchg_acq(&page->count_info, x, x + 1);
+ } while (unlikely(y != x));
+
+ return page_get_owner(page);
}
/* count_info and ownership are checked atomically. */
static inline int get_page(struct page_info *page,
struct domain *domain)
{
- u64 x, nx, y = *((u64*)&page->count_info);
- u32 _domain = pickle_domptr(domain);
+ struct domain *owner = page_get_owner_and_reference(page);
- do {
- x = y;
- nx = x + 1;
- if (unlikely((x & PGC_count_mask) == 0) || /* Not allocated? */
- unlikely((nx & PGC_count_mask) == 0) || /* Count overflow? */
- unlikely((x >> 32) != _domain)) { /* Wrong owner? */
-
- gdprintk(XENLOG_INFO, "Error pfn %lx: rd=%p, od=%p, caf=%016lx, taf=%"
- PRtype_info "\n", page_to_mfn(page), domain,
- unpickle_domptr(x >> 32), x, page->u.inuse.type_info);
- return 0;
- }
- }
- while(unlikely((y = cmpxchg_acq((u64*)&page->count_info, x, nx)) != x));
- return 1;
+ if (likely(owner == domain))
+ return 1;
+
+ if (owner != NULL)
+ put_page(page);
+
+ /* if (!domain->is_dying) */ /* XXX: header inclusion hell */
+ gdprintk(XENLOG_INFO,
+ "Error pfn %lx: rd=%p, od=%p, caf=%016lx, taf=%" PRtype_info "\n",
+ page_to_mfn(page), domain,
+ owner, page->count_info, page->u.inuse.type_info);
+ return 0;
}
int is_iomem_page(unsigned long mfn);
extern void put_page_type(struct page_info *page);
-extern int get_page_type(struct page_info *page, u32 type);
+extern int get_page_type(struct page_info *page, unsigned long type);
static inline void put_page_and_type(struct page_info *page)
{
static inline int get_page_and_type(struct page_info *page,
struct domain *domain,
- u32 type)
+ unsigned long type)
{
int rc = get_page(page, domain);
extern int __assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr, unsigned long flags);
extern void assign_domain_page(struct domain *d, unsigned long mpaddr, unsigned long physaddr);
extern void assign_domain_io_page(struct domain *d, unsigned long mpaddr, unsigned long flags);
+extern int deassign_domain_mmio_page(struct domain *d, unsigned long mpaddr,
+ unsigned long phys_addr, unsigned long size);
struct p2m_entry;
extern unsigned long lookup_domain_mpa(struct domain *d, unsigned long mpaddr, struct p2m_entry* entry);
extern void *domain_mpa_to_imva(struct domain *d, unsigned long mpaddr);
#define dom0vp_get_memmap(domid, buffer) (-ENOSYS)
#endif
+int
+p2m_pod_decrease_reservation(struct domain *d,
+ xen_pfn_t gpfn, unsigned int order);
+int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
+ unsigned int order);
+
extern volatile unsigned long *mpt_table;
extern unsigned long gmfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
extern u64 translate_domain_pte(u64 pteval, u64 address, u64 itir__,
--- /dev/null
+#ifndef __ASM_MSI_H
+#define __ASM_MSI_H
+
+/*
+ * MSI Defined Data Structures
+ */
+#define MSI_ADDRESS_HEADER 0xfee
+#define MSI_ADDRESS_HEADER_SHIFT 12
+#define MSI_ADDRESS_HEADER_MASK 0xfff000
+#define MSI_ADDRESS_DEST_ID_MASK 0xfff0000f
+#define MSI_TARGET_CPU_MASK 0xff
+#define MSI_TARGET_CPU_SHIFT 4
+#define MSI_DELIVERY_MODE 0
+#define MSI_LEVEL_MODE 1 /* Edge always assert */
+#define MSI_TRIGGER_MODE 0 /* MSI is edge sensitive */
+#define MSI_PHYSICAL_MODE 0
+#define MSI_LOGICAL_MODE 1
+#define MSI_REDIRECTION_HINT_MODE 0
+
+#endif /* __ASM_MSI_H */
unsigned int limit;
unsigned int num_entries;
unsigned int num_free;
- struct list_head page_list;
+ struct page_list_head page_list;
/* XXX hash table size */
spinlock_t hash_lock;
#ifndef __FLUSHTLB_H__
#define __FLUSHTLB_H__
-#include <xen/sched.h>
+struct vcpu;
+struct domain;
/* TLB flushes can be either local (current vcpu only) or domain wide (on
all vcpus).
spinlock_t lock;
struct vcpu * lowest_vcpu;
uint64_t base_address;
- union viosapic_rte redirtbl[VIOSAPIC_NUM_PINS];
+ union vioapic_redir_entry redirtbl[VIOSAPIC_NUM_PINS];
};
void viosapic_init(struct domain *d);
unsigned long viosapic_read(struct vcpu *v, unsigned long addr,
unsigned long length);
+void hvm_dpci_eoi(struct domain *d, unsigned int guest_gsi,
+ union vioapic_redir_entry *ent);
#endif /* __ASM_IA64_VMX_VIOSAPIC_H__ */
#define _ASM_IA64_VT_H
#include <public/hvm/ioreq.h>
+#include <asm/ia64_int.h>
+
#define vmx_user_mode(regs) (((struct ia64_psr *)&(regs)->cr_ipsr)->vm == 1)
#define VCPU_LID(v) (((u64)(v)->vcpu_id)<<24)
extern int vmx_setup_platform(struct domain *d);
extern void vmx_do_resume(struct vcpu *v);
extern void vmx_io_assist(struct vcpu *v);
-extern int ia64_hypercall (struct pt_regs *regs);
+extern IA64FAULT ia64_hypercall (struct pt_regs *regs);
extern unsigned long __gpfn_to_mfn_foreign(struct domain *d, unsigned long gpfn);
extern void set_privileged_operation_isr (struct vcpu *vcpu,int inst);
extern void set_rsv_reg_field_isr (struct vcpu *vcpu);
extern void vmx_send_assist_req(struct vcpu *v);
extern void deliver_pal_init(struct vcpu *vcpu);
extern void vmx_pend_pal_init(struct domain *d);
+extern void vmx_lazy_load_fpu(struct vcpu *vcpu);
static inline vcpu_iodata_t *get_vio(struct vcpu *v)
{
#include <public/xen.h>
#include <public/hvm/params.h>
+#include <asm/hvm/irq.h>
#include <asm/viosapic.h>
#include <asm/hvm/vacpi.h>
+#include <xen/hvm/iommu.h>
struct vmx_ioreq_page {
spinlock_t lock;
/* One IOSAPIC now... */
struct viosapic viosapic;
struct vacpi vacpi;
+ /* Pass-throgh VT-d */
+ struct hvm_irq irq;
+ struct hvm_iommu hvm_iommu;
} vir_plat_t;
static inline int __fls(uint32_t word)
#ifndef _ASM_IA64_XENPAGE_H
#define _ASM_IA64_XENPAGE_H
+/* moved from xen/include/asm-ia64/linux-xen/asm/pgtable.h to compile */
+#define IA64_MAX_PHYS_BITS 50 /* max. number of physical address bits (architected) */
+
#ifndef __ASSEMBLY__
-#undef mfn_valid
-#undef page_to_mfn
-#undef mfn_to_page
#ifdef CONFIG_VIRTUAL_FRAME_TABLE
-#undef ia64_mfn_valid
extern int ia64_mfn_valid (unsigned long pfn);
# define mfn_valid(_pfn) (((_pfn) < max_page) && ia64_mfn_valid(_pfn))
#else
#include <asm/xensystem.h>
-static inline unsigned long __virt_to_maddr(unsigned long va)
-{
- if (va - KERNEL_START < xenheap_size)
- return xen_pstart + (va - KERNEL_START);
- else
- return (va & ((1UL << 60) - 1));
-}
+/*
+ * macro: avoid header inclustion hell
+ * static inline unsigned long __virt_to_maddr(unsigned long va)
+ */
+/*
+ * Because the significant 8 bits of VA are used by Xen,
+ * and xen uses cached/uncached identity mapping.
+ * IA64_MAX_PHYS_BITS can't be larger than 56
+ */
+#define __virt_to_maddr(va) \
+ ({ \
+ unsigned long __va__ = (va); \
+ (__va__ - KERNEL_START < KERNEL_TR_PAGE_SIZE) ? \
+ xen_pstart + (__va__ - KERNEL_START) : \
+ (__va__ & ((1UL << IA64_MAX_PHYS_BITS) - 1)); \
+ })
#define virt_to_maddr(va) (__virt_to_maddr((unsigned long)va))
-#undef page_to_maddr
-#undef virt_to_page
#define page_to_maddr(page) (page_to_mfn(page) << PAGE_SHIFT)
#define virt_to_page(kaddr) (mfn_to_page(virt_to_maddr(kaddr) >> PAGE_SHIFT))
return (paddr << 1) >> 1;
}
-#undef __pa
-#undef __va
#define __pa(x) (virt_to_maddr(x))
#define __va(x) ({xen_va _v; _v.l = (long) (x); _v.f.reg = -1; _v.p;})
extern int acpi_dmar_init(void);
+/* Incremented whenever we transition through S3. Value is 1 during boot. */
+extern uint32_t system_reset_counter;
+
#endif /*__X86_ASM_ACPI_H*/
#define APIC_LVTTHMR 0x330
#define APIC_LVTPC 0x340
#define APIC_LVT0 0x350
+#define APIC_CMCI 0x2F0
+
#define APIC_LVT_TIMER_BASE_MASK (0x3<<18)
#define GET_APIC_TIMER_BASE(x) (((x)>>18)&0x3)
#define SET_APIC_TIMER_BASE(x) (((x)<<18))
#define CONFIG_X86_IO_APIC 1
#define CONFIG_X86_PM_TIMER 1
#define CONFIG_HPET_TIMER 1
-#define CONFIG_X86_MCE_P4THERMAL 1
+#define CONFIG_X86_MCE_THERMAL 1
#define CONFIG_NUMA 1
#define CONFIG_DISCONTIGMEM 1
#define CONFIG_NUMA_EMU 1
#define CONFIG_HOTPLUG 1
#define CONFIG_HOTPLUG_CPU 1
-/*
- * Avoid deep recursion when tearing down pagetables during domain destruction,
- * causing dom0 to become unresponsive and Xen to miss time-critical softirq
- * deadlines. This will ultimately be replaced by built-in preemptibility of
- * get_page_type().
- */
-#define DOMAIN_DESTRUCT_AVOID_RECURSION 1
-
#define HZ 100
#define OPT_CONSOLE_STR "vga"
#define NR_CPUS 32
#endif
+#ifdef MAX_PHYS_IRQS
+#define NR_IRQS MAX_PHYS_IRQS
+#else
+#define NR_IRQS 256
+#endif
+
#if defined(__i386__) && (NR_CPUS > 32)
#error "Maximum of 32 physical processors supported by Xen on x86_32"
#endif
#define asmlinkage
-#define XENHEAP_DEFAULT_MB (16)
-
#define PML4_ENTRY_BITS 39
#ifndef __ASSEMBLY__
#define PML4_ENTRY_BYTES (1UL << PML4_ENTRY_BITS)
#define RO_MPT_VIRT_END FRAMETABLE_VIRT_START
#define RO_MPT_VIRT_START (RO_MPT_VIRT_END - (MACHPHYS_MBYTES<<20))
-#define XENHEAP_DEFAULT_MB (DIRECTMAP_MBYTES)
#define DIRECTMAP_PHYS_END (DIRECTMAP_MBYTES<<20)
/* Maximum linear address accessible via guest memory segments. */
#endif /* __i386__ */
#ifndef __ASSEMBLY__
-extern unsigned long xen_phys_start, xenheap_phys_start, xenheap_phys_end;
+extern unsigned long xen_phys_start;
+#if defined(__i386__)
+extern unsigned long xenheap_phys_end;
+#endif
#endif
/* GDT/LDT shadow mapping area. The first per-domain-mapping sub-area. */
#define X86_FEATURE_P3 (3*32+ 6) /* P3 */
#define X86_FEATURE_P4 (3*32+ 7) /* P4 */
#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */
+#define X86_FEATURE_NOSTOP_TSC (3*32+ 9) /* TSC does not stop in C states */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */
#define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */
#define X86_FEATURE_SSE4_2 (4*32+20) /* Streaming SIMD Extensions 4.2 */
#define X86_FEATURE_X2APIC (4*32+21) /* Extended xAPIC */
#define X86_FEATURE_POPCNT (4*32+23) /* POPCNT instruction */
+#define X86_FEATURE_XSAVE (4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */
+#define X86_FEATURE_HYPERVISOR (4*32+31) /* Running under some hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
#define X86_FEATURE_XSTORE (5*32+ 2) /* on-CPU RNG present (xstore insn) */
#ifndef __ASSEMBLY__
-#define load_TR(n) __asm__ __volatile__ ("ltr %%ax" : : "a" (TSS_ENTRY<<3) )
-
#if defined(__x86_64__)
#define GUEST_KERNEL_RPL(d) (is_pv_32bit_domain(d) ? 1 : 3)
#elif defined(__i386__)
#endif
extern void set_intr_gate(unsigned int irq, void * addr);
-extern void set_tss_desc(unsigned int n, void *addr);
+extern void load_TR(void);
#endif /* !__ASSEMBLY__ */
#include <asm/hvm/vcpu.h>
#include <asm/hvm/domain.h>
#include <asm/e820.h>
-#include <asm/pirq.h>
#define has_32bit_shinfo(d) ((d)->arch.has_32bit_shinfo)
#define is_pv_32bit_domain(d) ((d)->arch.is_32bit_pv)
#define is_pv_32on64_domain(d) (0)
#endif
#define is_pv_32on64_vcpu(v) (is_pv_32on64_domain((v)->domain))
-#define IS_COMPAT(d) (is_pv_32on64_domain(d))
struct trap_bounce {
uint32_t error_code;
int locker; /* processor which holds the lock */
const char *locker_function; /* Func that took it */
unsigned int opt_flags; /* runtime tunable optimizations on/off */
- struct list_head pinned_shadows;
+ struct page_list_head pinned_shadows;
/* Memory allocation */
- struct list_head freelists[SHADOW_MAX_ORDER + 1];
- struct list_head p2m_freelist;
+ struct page_list_head freelists[SHADOW_MAX_ORDER + 1];
+ struct page_list_head p2m_freelist;
unsigned int total_pages; /* number of pages allocated */
unsigned int free_pages; /* number of pages on freelists */
unsigned int p2m_pages; /* number of pages allocates to p2m */
pagetable_t unpaged_pagetable;
/* Shadow hashtable */
- struct shadow_page_info **hash_table;
+ struct page_info **hash_table;
int hash_walking; /* Some function is walking the hash table */
/* Fast MMIO path heuristic */
int locker;
const char *locker_function;
- struct list_head freelist;
+ struct page_list_head freelist;
unsigned int total_pages; /* number of pages allocated */
unsigned int free_pages; /* number of pages on freelists */
unsigned int p2m_pages; /* number of pages allocates to p2m */
struct p2m_domain;
+/* Define for GUEST MCA handling */
+#define MAX_NR_BANKS 30
+
+/* This entry is for recording bank nodes for the impacted domain,
+ * put into impact_header list. */
+struct bank_entry {
+ struct list_head list;
+ int32_t cpu;
+ uint16_t bank;
+ uint64_t mci_status;
+ uint64_t mci_addr;
+ uint64_t mci_misc;
+};
+
+struct domain_mca_msrs
+{
+ /* Guest should not change below values after DOM boot up */
+ uint64_t mcg_cap;
+ uint64_t mcg_ctl;
+ uint64_t mcg_status;
+ uint64_t mci_ctl[MAX_NR_BANKS];
+ uint16_t nr_injection;
+ struct list_head impact_header;
+};
+
struct arch_domain
{
l1_pgentry_t *mm_perdomain_pt;
unsigned int hv_compat_vstart;
#endif
+ bool_t s3_integrity;
+
/* I/O-port admin-specified access capabilities. */
struct rangeset *ioport_caps;
uint32_t pci_cf8;
/* NB. protected by d->event_lock and by irq_desc[vector].lock */
int vector_pirq[NR_VECTORS];
- int pirq_vector[NR_PIRQS];
+ s16 pirq_vector[NR_IRQS];
+
+ /* Shared page for notifying that explicit PIRQ EOI is required. */
+ unsigned long *pirq_eoi_map;
+ unsigned long pirq_eoi_map_mfn;
/* Pseudophysical e820 map (XENMEM_memory_map). */
struct e820entry e820[3];
RELMEM_l2,
RELMEM_done,
} relmem;
- struct list_head relmem_list;
+ struct page_list_head relmem_list;
cpuid_input_t cpuids[MAX_CPUID_INPUT];
+
+ /* For Guest vMCA handling */
+ struct domain_mca_msrs vmca_msrs;
} __cacheline_aligned;
#define has_arch_pdevs(d) (!list_empty(&(d)->arch.pdev_list))
/* Current LDT details. */
unsigned long shadow_ldt_mapcnt;
+ spinlock_t shadow_ldt_lock;
struct paging_vcpu paging;
unsigned int *ecx,
unsigned int *edx);
+int construct_dom0(
+ struct domain *d,
+ unsigned long image_base,
+ unsigned long image_start, unsigned long image_len,
+ unsigned long initrd_start, unsigned long initrd_len,
+ char *cmdline);
+
#endif /* __ASM_DOMAIN_H__ */
/*
};
extern int reserve_e820_ram(struct e820map *e820, uint64_t s, uint64_t e);
+extern int e820_change_range_type(
+ struct e820map *e820, uint64_t s, uint64_t e,
+ uint32_t orig_type, uint32_t new_type);
extern unsigned long init_e820(const char *, struct e820entry *, int *);
extern struct e820map e820;
#include <xen/shared.h>
-static inline void vcpu_kick(struct vcpu *v)
-{
- /*
- * NB1. 'pause_flags' and 'processor' must be checked /after/ update of
- * pending flag. These values may fluctuate (after all, we hold no
- * locks) but the key insight is that each change will cause
- * evtchn_upcall_pending to be polled.
- *
- * NB2. We save the running flag across the unblock to avoid a needless
- * IPI for domains that we IPI'd to unblock.
- */
- int running = v->is_running;
- vcpu_unblock(v);
- if ( running )
- smp_send_event_check_cpu(v->processor);
-}
-
-static inline void vcpu_mark_events_pending(struct vcpu *v)
-{
- int already_pending = test_and_set_bit(
- 0, (unsigned long *)&vcpu_info(v, evtchn_upcall_pending));
-
- if ( already_pending )
- return;
-
- if ( is_hvm_vcpu(v) )
- hvm_assert_evtchn_irq(v);
- else
- vcpu_kick(v);
-}
+void vcpu_kick(struct vcpu *v);
+void vcpu_mark_events_pending(struct vcpu *v);
int hvm_local_events_need_delivery(struct vcpu *v);
static inline int local_events_need_delivery(void)
* from the end of virtual memory backwards.
*/
enum fixed_addresses {
+ FIX_RESERVED, /* Index 0 is reserved since fix_to_virt(0) > FIXADDR_TOP. */
#ifdef __i386__
FIX_PAE_HIGHMEM_0,
FIX_PAE_HIGHMEM_END = FIX_PAE_HIGHMEM_0 + NR_CPUS-1,
FIX_IOMMU_MMIO_END = FIX_IOMMU_MMIO_BASE_0 + IOMMU_PAGES -1,
FIX_TBOOT_SHARED_BASE,
FIX_MSIX_IO_RESERV_BASE,
- FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + MAX_MSIX_PAGES -1,
+ FIX_MSIX_IO_RESERV_END = FIX_MSIX_IO_RESERV_BASE + FIX_MSIX_MAX_PAGES -1,
__end_of_fixed_addresses
};
--- /dev/null
+/******************************************************************************
+ * xen/asm-x86/guest_pt.h
+ *
+ * Types and accessors for guest pagetable entries, as distinct from
+ * Xen's pagetable types.
+ *
+ * Users must #define GUEST_PAGING_LEVELS to 2, 3 or 4 before including
+ * this file.
+ *
+ * Parts of this code are Copyright (c) 2006 by XenSource Inc.
+ * Parts of this code are Copyright (c) 2006 by Michael A Fetterman
+ * Parts based on earlier work by Michael A Fetterman, Ian Pratt et al.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ */
+
+#ifndef _XEN_ASM_GUEST_PT_H
+#define _XEN_ASM_GUEST_PT_H
+
+/* Type of the guest's frame numbers */
+TYPE_SAFE(unsigned long,gfn)
+#define PRI_gfn "05lx"
+
+#define VALID_GFN(m) (m != INVALID_GFN)
+
+static inline int
+valid_gfn(gfn_t m)
+{
+ return VALID_GFN(gfn_x(m));
+}
+
+static inline paddr_t
+gfn_to_paddr(gfn_t gfn)
+{
+ return ((paddr_t)gfn_x(gfn)) << PAGE_SHIFT;
+}
+
+/* Override gfn_to_mfn to work with gfn_t */
+#undef gfn_to_mfn
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn_type((d), gfn_x(g), (t), p2m_alloc)
+
+
+/* Types of the guest's page tables and access functions for them */
+
+#if GUEST_PAGING_LEVELS == 2
+
+#define GUEST_L1_PAGETABLE_ENTRIES 1024
+#define GUEST_L2_PAGETABLE_ENTRIES 1024
+#define GUEST_L1_PAGETABLE_SHIFT 12
+#define GUEST_L2_PAGETABLE_SHIFT 22
+
+typedef uint32_t guest_intpte_t;
+typedef struct { guest_intpte_t l1; } guest_l1e_t;
+typedef struct { guest_intpte_t l2; } guest_l2e_t;
+
+#define PRI_gpte "08x"
+
+static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
+{ return ((paddr_t) gl1e.l1) & (PADDR_MASK & PAGE_MASK); }
+static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e)
+{ return ((paddr_t) gl2e.l2) & (PADDR_MASK & PAGE_MASK); }
+
+static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
+{ return _gfn(guest_l1e_get_paddr(gl1e) >> PAGE_SHIFT); }
+static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
+{ return _gfn(guest_l2e_get_paddr(gl2e) >> PAGE_SHIFT); }
+
+static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
+{ return gl1e.l1 & 0xfff; }
+static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
+{ return gl2e.l2 & 0xfff; }
+
+static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
+{ return (guest_l1e_t) { (gfn_x(gfn) << PAGE_SHIFT) | flags }; }
+static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
+{ return (guest_l2e_t) { (gfn_x(gfn) << PAGE_SHIFT) | flags }; }
+
+#define guest_l1_table_offset(_va) \
+ (((_va) >> GUEST_L1_PAGETABLE_SHIFT) & (GUEST_L1_PAGETABLE_ENTRIES - 1))
+#define guest_l2_table_offset(_va) \
+ (((_va) >> GUEST_L2_PAGETABLE_SHIFT) & (GUEST_L2_PAGETABLE_ENTRIES - 1))
+
+#else /* GUEST_PAGING_LEVELS != 2 */
+
+#if GUEST_PAGING_LEVELS == 3
+#define GUEST_L1_PAGETABLE_ENTRIES 512
+#define GUEST_L2_PAGETABLE_ENTRIES 512
+#define GUEST_L3_PAGETABLE_ENTRIES 4
+#define GUEST_L1_PAGETABLE_SHIFT 12
+#define GUEST_L2_PAGETABLE_SHIFT 21
+#define GUEST_L3_PAGETABLE_SHIFT 30
+#else /* GUEST_PAGING_LEVELS == 4 */
+#define GUEST_L1_PAGETABLE_ENTRIES 512
+#define GUEST_L2_PAGETABLE_ENTRIES 512
+#define GUEST_L3_PAGETABLE_ENTRIES 512
+#define GUEST_L4_PAGETABLE_ENTRIES 512
+#define GUEST_L1_PAGETABLE_SHIFT 12
+#define GUEST_L2_PAGETABLE_SHIFT 21
+#define GUEST_L3_PAGETABLE_SHIFT 30
+#define GUEST_L4_PAGETABLE_SHIFT 39
+#endif
+
+typedef l1_pgentry_t guest_l1e_t;
+typedef l2_pgentry_t guest_l2e_t;
+typedef l3_pgentry_t guest_l3e_t;
+#if GUEST_PAGING_LEVELS >= 4
+typedef l4_pgentry_t guest_l4e_t;
+#endif
+typedef intpte_t guest_intpte_t;
+
+#define PRI_gpte "016"PRIx64
+
+static inline paddr_t guest_l1e_get_paddr(guest_l1e_t gl1e)
+{ return l1e_get_paddr(gl1e); }
+static inline paddr_t guest_l2e_get_paddr(guest_l2e_t gl2e)
+{ return l2e_get_paddr(gl2e); }
+static inline paddr_t guest_l3e_get_paddr(guest_l3e_t gl3e)
+{ return l3e_get_paddr(gl3e); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline paddr_t guest_l4e_get_paddr(guest_l4e_t gl4e)
+{ return l4e_get_paddr(gl4e); }
+#endif
+
+static inline gfn_t guest_l1e_get_gfn(guest_l1e_t gl1e)
+{ return _gfn(l1e_get_paddr(gl1e) >> PAGE_SHIFT); }
+static inline gfn_t guest_l2e_get_gfn(guest_l2e_t gl2e)
+{ return _gfn(l2e_get_paddr(gl2e) >> PAGE_SHIFT); }
+static inline gfn_t guest_l3e_get_gfn(guest_l3e_t gl3e)
+{ return _gfn(l3e_get_paddr(gl3e) >> PAGE_SHIFT); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline gfn_t guest_l4e_get_gfn(guest_l4e_t gl4e)
+{ return _gfn(l4e_get_paddr(gl4e) >> PAGE_SHIFT); }
+#endif
+
+static inline u32 guest_l1e_get_flags(guest_l1e_t gl1e)
+{ return l1e_get_flags(gl1e); }
+static inline u32 guest_l2e_get_flags(guest_l2e_t gl2e)
+{ return l2e_get_flags(gl2e); }
+static inline u32 guest_l3e_get_flags(guest_l3e_t gl3e)
+{ return l3e_get_flags(gl3e); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline u32 guest_l4e_get_flags(guest_l4e_t gl4e)
+{ return l4e_get_flags(gl4e); }
+#endif
+
+static inline guest_l1e_t guest_l1e_from_gfn(gfn_t gfn, u32 flags)
+{ return l1e_from_pfn(gfn_x(gfn), flags); }
+static inline guest_l2e_t guest_l2e_from_gfn(gfn_t gfn, u32 flags)
+{ return l2e_from_pfn(gfn_x(gfn), flags); }
+static inline guest_l3e_t guest_l3e_from_gfn(gfn_t gfn, u32 flags)
+{ return l3e_from_pfn(gfn_x(gfn), flags); }
+#if GUEST_PAGING_LEVELS >= 4
+static inline guest_l4e_t guest_l4e_from_gfn(gfn_t gfn, u32 flags)
+{ return l4e_from_pfn(gfn_x(gfn), flags); }
+#endif
+
+#define guest_l1_table_offset(a) l1_table_offset(a)
+#define guest_l2_table_offset(a) l2_table_offset(a)
+#define guest_l3_table_offset(a) l3_table_offset(a)
+#define guest_l4_table_offset(a) l4_table_offset(a)
+
+#endif /* GUEST_PAGING_LEVELS != 2 */
+
+
+/* Which pagetable features are supported on this vcpu? */
+
+static inline int
+guest_supports_superpages(struct vcpu *v)
+{
+ /* The _PAGE_PSE bit must be honoured in HVM guests, whenever
+ * CR4.PSE is set or the guest is in PAE or long mode.
+ * It's also used in the dummy PT for vcpus with CR4.PG cleared. */
+ return (is_hvm_vcpu(v) &&
+ (GUEST_PAGING_LEVELS != 2
+ || !hvm_paging_enabled(v)
+ || (v->arch.hvm_vcpu.guest_cr[4] & X86_CR4_PSE)));
+}
+
+static inline int
+guest_supports_nx(struct vcpu *v)
+{
+ if ( GUEST_PAGING_LEVELS == 2 || !cpu_has_nx )
+ return 0;
+ if ( !is_hvm_vcpu(v) )
+ return cpu_has_nx;
+ return hvm_nx_enabled(v);
+}
+
+
+
+/* Type used for recording a walk through guest pagetables. It is
+ * filled in by the pagetable walk function, and also used as a cache
+ * for later walks. When we encounter a superpage l2e, we fabricate an
+ * l1e for propagation to the shadow (for splintering guest superpages
+ * into many shadow l1 entries). */
+typedef struct guest_pagetable_walk walk_t;
+struct guest_pagetable_walk
+{
+ unsigned long va; /* Address we were looking for */
+#if GUEST_PAGING_LEVELS >= 3
+#if GUEST_PAGING_LEVELS >= 4
+ guest_l4e_t l4e; /* Guest's level 4 entry */
+#endif
+ guest_l3e_t l3e; /* Guest's level 3 entry */
+#endif
+ guest_l2e_t l2e; /* Guest's level 2 entry */
+ guest_l1e_t l1e; /* Guest's level 1 entry (or fabrication) */
+#if GUEST_PAGING_LEVELS >= 4
+ mfn_t l4mfn; /* MFN that the level 4 entry was in */
+ mfn_t l3mfn; /* MFN that the level 3 entry was in */
+#endif
+ mfn_t l2mfn; /* MFN that the level 2 entry was in */
+ mfn_t l1mfn; /* MFN that the level 1 entry was in */
+};
+
+/* Given a walk_t, translate the gw->va into the guest's notion of the
+ * corresponding frame number. */
+static inline gfn_t
+guest_walk_to_gfn(walk_t *gw)
+{
+ if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
+ return _gfn(INVALID_GFN);
+ return guest_l1e_get_gfn(gw->l1e);
+}
+
+/* Given a walk_t, translate the gw->va into the guest's notion of the
+ * corresponding physical address. */
+static inline paddr_t
+guest_walk_to_gpa(walk_t *gw)
+{
+ if ( !(guest_l1e_get_flags(gw->l1e) & _PAGE_PRESENT) )
+ return 0;
+ return guest_l1e_get_paddr(gw->l1e) + (gw->va & ~PAGE_MASK);
+}
+
+/* Walk the guest pagetables, after the manner of a hardware walker.
+ *
+ * Inputs: a vcpu, a virtual address, a walk_t to fill, a
+ * pointer to a pagefault code, the MFN of the guest's
+ * top-level pagetable, and a mapping of the
+ * guest's top-level pagetable.
+ *
+ * We walk the vcpu's guest pagetables, filling the walk_t with what we
+ * see and adding any Accessed and Dirty bits that are needed in the
+ * guest entries. Using the pagefault code, we check the permissions as
+ * we go. For the purposes of reading pagetables we treat all non-RAM
+ * memory as contining zeroes.
+ *
+ * Returns 0 for success, or the set of permission bits that we failed on
+ * if the walk did not complete. */
+
+/* Macro-fu so you can call guest_walk_tables() and get the right one. */
+#define GPT_RENAME2(_n, _l) _n ## _ ## _l ## _levels
+#define GPT_RENAME(_n, _l) GPT_RENAME2(_n, _l)
+#define guest_walk_tables GPT_RENAME(guest_walk_tables, GUEST_PAGING_LEVELS)
+
+extern uint32_t
+guest_walk_tables(struct vcpu *v, unsigned long va, walk_t *gw,
+ uint32_t pfec, mfn_t top_mfn, void *top_map);
+
+/* Pretty-print the contents of a guest-walk */
+static inline void print_gw(walk_t *gw)
+{
+ gdprintk(XENLOG_INFO, "GUEST WALK TO %#lx:\n", gw->va);
+#if GUEST_PAGING_LEVELS >= 3 /* PAE or 64... */
+#if GUEST_PAGING_LEVELS >= 4 /* 64-bit only... */
+ gdprintk(XENLOG_INFO, " l4mfn=%" PRI_mfn "\n", mfn_x(gw->l4mfn));
+ gdprintk(XENLOG_INFO, " l4e=%" PRI_gpte "\n", gw->l4e.l4);
+ gdprintk(XENLOG_INFO, " l3mfn=%" PRI_mfn "\n", mfn_x(gw->l3mfn));
+#endif /* PAE or 64... */
+ gdprintk(XENLOG_INFO, " l3e=%" PRI_gpte "\n", gw->l3e.l3);
+#endif /* All levels... */
+ gdprintk(XENLOG_INFO, " l2mfn=%" PRI_mfn "\n", mfn_x(gw->l2mfn));
+ gdprintk(XENLOG_INFO, " l2e=%" PRI_gpte "\n", gw->l2e.l2);
+ gdprintk(XENLOG_INFO, " l1mfn=%" PRI_mfn "\n", mfn_x(gw->l1mfn));
+ gdprintk(XENLOG_INFO, " l1e=%" PRI_gpte "\n", gw->l1e.l1);
+}
+
+#endif /* _XEN_ASM_GUEST_PT_H */
#define HPET_T2_CMP 0x148
#define HPET_T2_ROUTE 0x150
+#define HPET_Tn_CFG(n) (HPET_T0_CFG + n * 0x20)
+#define HPET_Tn_CMP(n) (HPET_T0_CMP + n * 0x20)
+#define HPET_Tn_ROUTE(n) (HPET_T0_ROUTE + n * 0x20)
+
#define HPET_ID_VENDOR 0xffff0000
#define HPET_ID_LEGSUP 0x00008000
#define HPET_ID_NUMBER 0x00001f00
#define HPET_TN_PERIODIC_CAP 0x010
#define HPET_TN_SETVAL 0x040
#define HPET_TN_32BIT 0x100
+#define HPET_TN_ROUTE 0x3e00
+#define HPET_TN_FSB 0x4000
+#define HPET_TN_FSB_CAP 0x8000
+#define HPET_TN_ROUTE_SHIFT 9
+
#define hpet_read32(x) \
(*(volatile u32 *)(fix_to_virt(FIX_HPET_BASE) + (x)))
/* Pass-through */
struct hvm_iommu hvm_iommu;
+ /* hypervisor intercepted msix table */
+ struct list_head msixtbl_list;
+ spinlock_t msixtbl_list_lock;
+
struct viridian_domain viridian;
bool_t hap_enabled;
return hvm_funcs.set_info_guest(v);
}
+int hvm_debug_op(struct vcpu *v, int32_t op);
+
#endif /* __ASM_X86_HVM_HVM_H__ */
#ifndef __ASM_X86_HVM_IRQ_H__
#define __ASM_X86_HVM_IRQ_H__
-#include <xen/types.h>
-#include <xen/spinlock.h>
-#include <asm/irq.h>
-#include <asm/pirq.h>
+#include <xen/hvm/irq.h>
#include <asm/hvm/hvm.h>
#include <asm/hvm/vpic.h>
#include <asm/hvm/vioapic.h>
-#include <public/hvm/save.h>
-
-struct dev_intx_gsi_link {
- struct list_head list;
- uint8_t device;
- uint8_t intx;
- uint8_t gsi;
- uint8_t link;
-};
-
-#define _HVM_IRQ_DPCI_MSI 0x1
-
-struct hvm_gmsi_info {
- uint32_t gvec;
- uint32_t gflags;
-};
-
-struct hvm_mirq_dpci_mapping {
- uint32_t flags;
- int pending;
- struct list_head digl_list;
- struct domain *dom;
- struct hvm_gmsi_info gmsi;
-};
-
-struct hvm_girq_dpci_mapping {
- uint8_t valid;
- uint8_t device;
- uint8_t intx;
- uint8_t machine_gsi;
-};
-
-#define NR_ISAIRQS 16
-#define NR_LINK 4
-/* Protected by domain's event_lock */
-struct hvm_irq_dpci {
- /* Machine IRQ to guest device/intx mapping. */
- DECLARE_BITMAP(mapping, NR_PIRQS);
- struct hvm_mirq_dpci_mapping mirq[NR_IRQS];
- /* Guest IRQ to guest device/intx mapping. */
- struct hvm_girq_dpci_mapping girq[NR_IRQS];
- uint8_t msi_gvec_pirq[NR_VECTORS];
- DECLARE_BITMAP(dirq_mask, NR_IRQS);
- /* Record of mapped ISA IRQs */
- DECLARE_BITMAP(isairq_map, NR_ISAIRQS);
- /* Record of mapped Links */
- uint8_t link_cnt[NR_LINK];
- struct timer hvm_timer[NR_IRQS];
-};
struct hvm_irq {
/*
#define hvm_isa_irq_to_gsi(isa_irq) ((isa_irq) ? : 2)
-/* Modify state of a PCI INTx wire. */
-void hvm_pci_intx_assert(
- struct domain *d, unsigned int device, unsigned int intx);
-void hvm_pci_intx_deassert(
- struct domain *d, unsigned int device, unsigned int intx);
-
-/* Modify state of an ISA device's IRQ wire. */
-void hvm_isa_irq_assert(
- struct domain *d, unsigned int isa_irq);
-void hvm_isa_irq_deassert(
- struct domain *d, unsigned int isa_irq);
-
-void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
-
-void hvm_maybe_deassert_evtchn_irq(void);
-void hvm_assert_evtchn_irq(struct vcpu *v);
-void hvm_set_callback_via(struct domain *d, uint64_t via);
-
/* Check/Acknowledge next pending interrupt. */
struct hvm_intack hvm_vcpu_has_pending_irq(struct vcpu *v);
struct hvm_intack hvm_vcpu_ack_pending_irq(struct vcpu *v,
struct hvm_intack intack);
+/*
+ * Currently IA64 Xen doesn't support MSI. So for x86, we define this macro
+ * to control the conditional compilation of some MSI-related functions.
+ * This macro will be removed once IA64 has MSI support.
+ */
+#define SUPPORT_MSI_REMAPPING 1
+
#endif /* __ASM_X86_HVM_IRQ_H__ */
#include <xen/sched.h>
#include <asm/amd-iommu.h>
+#include <xen/domain_page.h>
#define for_each_amd_iommu(amd_iommu) \
list_for_each_entry(amd_iommu, \
/* mapping functions */
int amd_iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
int amd_iommu_unmap_page(struct domain *d, unsigned long gfn);
-void *amd_iommu_get_vptr_from_page_table_entry(u32 *entry);
+u64 amd_iommu_get_next_table_from_pte(u32 *entry);
int amd_iommu_reserve_domain_unity_map(struct domain *domain,
unsigned long phys_addr, unsigned long size, int iw, int ir);
int amd_iommu_sync_p2m(struct domain *d);
+void invalidate_all_iommu_pages(struct domain *d);
/* device table functions */
void amd_iommu_set_dev_table_entry(u32 *dte, u64 root_ptr, u64 intremap_ptr,
u16 domain_id, u8 sys_mgt, u8 dev_ex, u8 paging_mode);
int amd_iommu_is_dte_page_translation_valid(u32 *entry);
-void invalidate_dev_table_entry(struct amd_iommu *iommu,
- u16 devic_id);
+void invalidate_dev_table_entry(struct amd_iommu *iommu, u16 devic_id);
/* send cmd to iommu */
int send_iommu_command(struct amd_iommu *iommu, u32 cmd[]);
return (PAGE_ALIGN(addr + size) - (addr & PAGE_MASK)) >> PAGE_SHIFT;
}
+static inline struct page_info* alloc_amd_iommu_pgtable(void)
+{
+ struct page_info *pg;
+ void *vaddr;
+
+ pg = alloc_domheap_page(NULL, 0);
+ vaddr = map_domain_page(page_to_mfn(pg));
+ if ( !vaddr )
+ return 0;
+ memset(vaddr, 0, PAGE_SIZE);
+ unmap_domain_page(vaddr);
+ return pg;
+}
+
+static inline void free_amd_iommu_pgtable(struct page_info *pg)
+{
+ if ( pg != 0 )
+ free_domheap_page(pg);
+}
+
+static inline void* __alloc_amd_iommu_tables(int order)
+{
+ void *buf;
+ buf = alloc_xenheap_pages(order, 0);
+ return buf;
+}
+
+static inline void __free_amd_iommu_tables(void *table, int order)
+{
+ free_xenheap_pages(table, order);
+}
+
#endif /* _ASM_X86_64_AMD_IOMMU_PROTO_H */
#define DO_TRC_HVM_INJ_EXC DEFAULT_HVM_INJECT
#define DO_TRC_HVM_INJ_VIRQ DEFAULT_HVM_INJECT
#define DO_TRC_HVM_REINJ_VIRQ DEFAULT_HVM_INJECT
+#define DO_TRC_HVM_INTR_WINDOW DEFAULT_HVM_INJECT
#define DO_TRC_HVM_IO_READ DEFAULT_HVM_IO
#define DO_TRC_HVM_IO_WRITE DEFAULT_HVM_IO
#define DO_TRC_HVM_CR_READ DEFAULT_HVM_REGACCESS
bool_t flag_dr_dirty;
bool_t debug_state_latch;
+ bool_t single_step;
union {
struct arch_vmx_struct vmx;
int vlapic_accept_pic_intr(struct vcpu *v);
-struct vlapic *apic_round_robin(
- struct domain *d, uint8_t vector, uint32_t bitmap);
+struct vlapic *apic_lowest_prio(struct domain *d, uint32_t bitmap);
int vlapic_match_logical_addr(struct vlapic *vlapic, uint8_t mda);
unsigned long host_cr0;
+ /* Is the guest in real mode? */
+ uint8_t vmx_realmode;
/* Are we emulating rather than VMENTERing? */
-#define VMXEMUL_REALMODE 1 /* Yes, because CR0.PE == 0 */
-#define VMXEMUL_BAD_CS 2 /* Yes, because CS.RPL != CPL */
-#define VMXEMUL_BAD_SS 4 /* Yes, because SS.RPL != CPL */
- uint8_t vmxemul;
+ uint8_t vmx_emulate;
+ /* Bitmask of segments that we can't safely use in virtual 8086 mode */
+ uint16_t vm86_segment_mask;
+ /* Shadow CS, SS, DS, ES, FS, GS, TR while in virtual 8086 mode */
+ struct segment_register vm86_saved_seg[x86_seg_tr + 1];
+ /* Remember EFLAGS while in virtual 8086 mode */
+ uint32_t vm86_saved_eflags;
};
int vmx_create_vmcs(struct vcpu *v);
#define CPU_BASED_MOV_DR_EXITING 0x00800000
#define CPU_BASED_UNCOND_IO_EXITING 0x01000000
#define CPU_BASED_ACTIVATE_IO_BITMAP 0x02000000
+#define CPU_BASED_MONITOR_TRAP_FLAG 0x08000000
#define CPU_BASED_ACTIVATE_MSR_BITMAP 0x10000000
#define CPU_BASED_MONITOR_EXITING 0x20000000
#define CPU_BASED_PAUSE_EXITING 0x40000000
#define VM_EXIT_IA32E_MODE 0x00000200
#define VM_EXIT_ACK_INTR_ON_EXIT 0x00008000
+#define VM_EXIT_SAVE_GUEST_PAT 0x00040000
+#define VM_EXIT_LOAD_HOST_PAT 0x00080000
extern u32 vmx_vmexit_control;
#define VM_ENTRY_IA32E_MODE 0x00000200
#define VM_ENTRY_SMM 0x00000400
#define VM_ENTRY_DEACT_DUAL_MONITOR 0x00000800
+#define VM_ENTRY_LOAD_GUEST_PAT 0x00004000
extern u32 vmx_vmentry_control;
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
(vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_EPT)
#define cpu_has_vmx_vpid \
(vmx_secondary_exec_control & SECONDARY_EXEC_ENABLE_VPID)
+#define cpu_has_monitor_trap_flag \
+ (vmx_cpu_based_exec_control & CPU_BASED_MONITOR_TRAP_FLAG)
+#define cpu_has_vmx_pat \
+ (vmx_vmentry_control & VM_ENTRY_LOAD_GUEST_PAT)
/* GUEST_INTERRUPTIBILITY_INFO flags. */
#define VMX_INTR_SHADOW_STI 0x00000001
VMCS_LINK_POINTER_HIGH = 0x00002801,
GUEST_IA32_DEBUGCTL = 0x00002802,
GUEST_IA32_DEBUGCTL_HIGH = 0x00002803,
+ GUEST_PAT = 0x00002804,
+ GUEST_PAT_HIGH = 0x00002805,
GUEST_PDPTR0 = 0x0000280a,
GUEST_PDPTR0_HIGH = 0x0000280b,
GUEST_PDPTR1 = 0x0000280c,
GUEST_PDPTR2_HIGH = 0x0000280f,
GUEST_PDPTR3 = 0x00002810,
GUEST_PDPTR3_HIGH = 0x00002811,
+ HOST_PAT = 0x00002c00,
+ HOST_PAT_HIGH = 0x00002c01,
PIN_BASED_VM_EXEC_CONTROL = 0x00004000,
CPU_BASED_VM_EXEC_CONTROL = 0x00004002,
EXCEPTION_BITMAP = 0x00004004,
u64 r : 1,
w : 1,
x : 1,
- emt : 4,
+ emt : 3,
+ igmt : 1,
sp_avail : 1,
avail1 : 4,
mfn : 45,
#define EPTE_SUPER_PAGE_MASK 0x80
#define EPTE_MFN_MASK 0x1fffffffffff000
#define EPTE_AVAIL1_MASK 0xF00
-#define EPTE_EMT_MASK 0x78
+#define EPTE_EMT_MASK 0x38
+#define EPTE_IGMT_MASK 0x40
+#define EPTE_AVAIL1_SHIFT 8
+#define EPTE_EMT_SHIFT 3
+#define EPTE_IGMT_SHIFT 6
void vmx_asm_vmexit_handler(struct cpu_user_regs);
void vmx_asm_do_vmentry(void);
#define EXIT_REASON_INVALID_GUEST_STATE 33
#define EXIT_REASON_MSR_LOADING 34
#define EXIT_REASON_MWAIT_INSTRUCTION 36
+#define EXIT_REASON_MONITOR_TRAP_FLAG 37
#define EXIT_REASON_MONITOR_INSTRUCTION 39
#define EXIT_REASON_PAUSE_INSTRUCTION 40
#define EXIT_REASON_MACHINE_CHECK 41
return rc;
}
-void vmx_inject_hw_exception(struct vcpu *v, int trap, int error_code);
-void vmx_inject_extint(struct vcpu *v, int trap);
-void vmx_inject_nmi(struct vcpu *v);
+void vmx_inject_hw_exception(int trap, int error_code);
+void vmx_inject_extint(int trap);
+void vmx_inject_nmi(void);
void ept_p2m_init(struct domain *d);
#define VPMU_CONTEXT_ALLOCATED 0x1
#define VPMU_CONTEXT_LOADED 0x2
#define VPMU_RUNNING 0x4
-
+#define PASSIVE_DOMAIN_ALLOCATED 0x8
int vpmu_do_wrmsr(struct cpu_user_regs *regs);
int vpmu_do_rdmsr(struct cpu_user_regs *regs);
int vpmu_do_interrupt(struct cpu_user_regs *regs);
#ifndef __ASM_X86_HVM_VPMU_CORE_H_
#define __ASM_X86_HVM_VPMU_CORE_H_
-/* Core 2 Non-architectual Performance Counter MSRs. */
-u32 core2_counters_msr[] = {
- MSR_CORE_PERF_FIXED_CTR0,
- MSR_CORE_PERF_FIXED_CTR1,
- MSR_CORE_PERF_FIXED_CTR2};
-
-/* Core 2 Non-architectual Performance Control MSRs. */
-u32 core2_ctrls_msr[] = {
- MSR_CORE_PERF_FIXED_CTR_CTRL,
- MSR_IA32_PEBS_ENABLE,
- MSR_IA32_DS_AREA};
-
-struct pmumsr core2_counters = {
- 3,
- core2_counters_msr
-};
-
-struct pmumsr core2_ctrls = {
- 3,
- core2_ctrls_msr
-};
-
struct arch_msr_pair {
u64 counter;
u64 control;
#include <asm/hvm/irq.h>
#include <public/hvm/save.h>
-struct HPETState;
-struct HPET_timer_fn_info {
- struct HPETState *hs;
- unsigned int tn;
-};
-
-struct hpet_registers {
- /* Memory-mapped, software visible registers */
- uint64_t capability; /* capabilities */
- uint64_t config; /* configuration */
- uint64_t isr; /* interrupt status reg */
- uint64_t mc64; /* main counter */
- struct { /* timers */
- uint64_t config; /* configuration/cap */
- uint64_t cmp; /* comparator */
- uint64_t fsb; /* FSB route, not supported now */
- } timers[HPET_TIMER_NUM];
-
- /* Hidden register state */
- uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
-};
-
-typedef struct HPETState {
- struct hpet_registers hpet;
- struct vcpu *vcpu;
- uint64_t stime_freq;
- uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */
- uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns */
- uint64_t mc_offset;
- struct timer timers[HPET_TIMER_NUM];
- struct HPET_timer_fn_info timer_fn_info[HPET_TIMER_NUM];
- spinlock_t lock;
-} HPETState;
-
-
/*
* Abstract layer of periodic time, one short time.
*/
spinlock_t lock;
} PITState;
+struct hpet_registers {
+ /* Memory-mapped, software visible registers */
+ uint64_t capability; /* capabilities */
+ uint64_t config; /* configuration */
+ uint64_t isr; /* interrupt status reg */
+ uint64_t mc64; /* main counter */
+ struct { /* timers */
+ uint64_t config; /* configuration/cap */
+ uint64_t cmp; /* comparator */
+ uint64_t fsb; /* FSB route, not supported now */
+ } timers[HPET_TIMER_NUM];
+
+ /* Hidden register state */
+ uint64_t period[HPET_TIMER_NUM]; /* Last value written to comparator */
+ uint64_t comparator64[HPET_TIMER_NUM]; /* 64 bit running comparator */
+};
+
+typedef struct HPETState {
+ struct hpet_registers hpet;
+ struct vcpu *vcpu;
+ uint64_t stime_freq;
+ uint64_t hpet_to_ns_scale; /* hpet ticks to ns (multiplied by 2^10) */
+ uint64_t hpet_to_ns_limit; /* max hpet ticks convertable to ns */
+ uint64_t mc_offset;
+ struct periodic_time pt[HPET_TIMER_NUM];
+ spinlock_t lock;
+} HPETState;
+
typedef struct RTCState {
/* Hardware state */
struct hvm_hw_rtc hw;
* The given periodic timer structure must be initialised with zero bytes,
* except for the 'source' field which must be initialised with the
* correct PTSRC_ value. The initialised timer structure can then be passed
- * to {create,destroy}_periodic_time() and number of times and in any order.
+ * to {create,destroy}_periodic_time() any number of times and in any order.
* Note that, for a given periodic timer, invocations of these functions MUST
* be serialised.
*/
void create_periodic_time(
- struct vcpu *v, struct periodic_time *pt, uint64_t period,
- uint8_t irq, char one_shot, time_cb *cb, void *data);
+ struct vcpu *v, struct periodic_time *pt, uint64_t delta,
+ uint64_t period, uint8_t irq, time_cb *cb, void *data);
void destroy_periodic_time(struct periodic_time *pt);
int pv_pit_handler(int port, int data, int write);
void pmtimer_deinit(struct domain *d);
void pmtimer_reset(struct domain *d);
-void hpet_migrate_timers(struct vcpu *v);
void hpet_init(struct vcpu *v);
void hpet_deinit(struct domain *d);
void hpet_reset(struct domain *d);
((volatile int *)(__fix_to_virt(FIX_IO_APIC_BASE_0 + idx) \
+ (mp_ioapics[idx].mpc_apicaddr & ~PAGE_MASK)))
+#define IO_APIC_ID(idx) (mp_ioapics[idx].mpc_apicid)
+
/*
* The structure of the IO-APIC:
*/
#endif /*CONFIG_ACPI_BOOT*/
extern int (*ioapic_renumber_irq)(int ioapic, int irq);
-extern int ioapic_suspend(void);
-extern int ioapic_resume(void);
+extern void ioapic_suspend(void);
+extern void ioapic_resume(void);
#else /* !CONFIG_X86_IO_APIC */
#define io_apic_assign_pci_irqs 0
-static inline int ioapic_suspend(void) {return 0};
-static inline int ioapic_resume(void) {return 0};
+static inline void ioapic_suspend(void) {}
+static inline void ioapic_resume(void) {}
#endif
extern int assign_irq_vector(int irq);
#define ioports_access_permitted(d, s, e) \
rangeset_contains_range((d)->arch.ioport_caps, s, e)
-#define cache_flush_permitted(d) \
- (!rangeset_is_empty((d)->iomem_caps))
+#define cache_flush_permitted(d) \
+ (!rangeset_is_empty((d)->iomem_caps) || \
+ !rangeset_is_empty((d)->arch.ioport_caps))
#endif /* __X86_IOCAP_H__ */
#define vector_to_irq(vec) (vector_irq[vec])
extern int vector_irq[NR_VECTORS];
-extern u8 irq_vector[NR_IRQ_VECTORS];
-#define AUTO_ASSIGN -1
-#define NEVER_ASSIGN -2
-#define FREE_TO_ASSIGN -3
+extern u8 irq_vector[NR_IRQS];
#define platform_legacy_irq(irq) ((irq) < 16)
fastcall void pmu_apic_interrupt(void);
fastcall void spurious_interrupt(void);
fastcall void thermal_interrupt(void);
+fastcall void cmci_interrupt(void);
void disable_8259A_irq(unsigned int irq);
void enable_8259A_irq(unsigned int irq);
extern atomic_t irq_err_count;
extern atomic_t irq_mis_count;
-int pirq_acktype(struct domain *d, int irq);
int pirq_shared(struct domain *d , int irq);
int map_domain_pirq(struct domain *d, int pirq, int vector, int type,
int get_free_pirq(struct domain *d, int type, int index);
void free_domain_pirqs(struct domain *d);
-#define domain_irq_to_vector(d, irq) ((d)->arch.pirq_vector[(irq)])
-#define domain_vector_to_irq(d, vec) ((d)->arch.vector_pirq[(vec)])
+#define domain_irq_to_vector(d, irq) ((d)->arch.pirq_vector[irq] ?: \
+ IO_APIC_IRQ(irq) ? 0 : LEGACY_VECTOR(irq))
+#define domain_vector_to_irq(d, vec) ((d)->arch.vector_pirq[vec] ?: \
+ ((vec) < FIRST_LEGACY_VECTOR || \
+ (vec) > LAST_LEGACY_VECTOR) ? \
+ 0 : LEGACY_IRQ_FROM_VECTOR(vec))
int pirq_guest_force_unbind(struct domain *d, int irq);
#define THERMAL_APIC_VECTOR 0xfa
#define LOCAL_TIMER_VECTOR 0xf9
#define PMU_APIC_VECTOR 0xf8
-
+#define CMCI_APIC_VECTOR 0xf7
/*
* High-priority dynamically-allocated vectors. For interrupts that
* must be higher priority than any guest-bound interrupt.
*/
#define FIRST_HIPRIORITY_VECTOR 0xf0
-#define LAST_HIPRIORITY_VECTOR 0xf7
+#define LAST_HIPRIORITY_VECTOR 0xf6
/* Legacy PIC uses vectors 0xe0-0xef. */
#define FIRST_LEGACY_VECTOR 0xe0
#define NR_VECTORS 256
-/* Limited by number of trap vectors. */
-#define NR_IRQS NR_VECTORS
-#define NR_IRQ_VECTORS NR_IRQS
-
#endif /* _ASM_IRQ_VECTORS_H */
#define ASM_X86__MICROCODE_H
struct cpu_signature;
+struct ucode_cpu_info;
struct microcode_ops {
- int (*get_matching_microcode)(void *mc, int cpu);
+ int (*microcode_resume_match)(int cpu, struct cpu_signature *nsig);
int (*cpu_request_microcode)(int cpu, const void *buf, size_t size);
- int (*collect_cpu_info)(int cpu_num, struct cpu_signature *csig);
+ int (*collect_cpu_info)(int cpu, struct cpu_signature *csig);
int (*apply_microcode)(int cpu);
};
};
struct equiv_cpu_entry {
- unsigned int installed_cpu;
- unsigned int fixed_errata_mask;
- unsigned int fixed_errata_compare;
- unsigned int equiv_cpu;
-};
+ uint32_t installed_cpu;
+ uint32_t fixed_errata_mask;
+ uint32_t fixed_errata_compare;
+ uint16_t equiv_cpu;
+ uint16_t reserved;
+} __attribute__((packed));
struct microcode_header_amd {
- unsigned int data_code;
- unsigned int patch_id;
- unsigned char mc_patch_data_id[2];
- unsigned char mc_patch_data_len;
- unsigned char init_flag;
- unsigned int mc_patch_data_checksum;
- unsigned int nb_dev_id;
- unsigned int sb_dev_id;
- unsigned char processor_rev_id[2];
- unsigned char nb_rev_id;
- unsigned char sb_rev_id;
- unsigned char bios_api_rev;
- unsigned char reserved1[3];
- unsigned int match_reg[8];
-};
+ uint32_t data_code;
+ uint32_t patch_id;
+ uint8_t mc_patch_data_id[2];
+ uint8_t mc_patch_data_len;
+ uint8_t init_flag;
+ uint32_t mc_patch_data_checksum;
+ uint32_t nb_dev_id;
+ uint32_t sb_dev_id;
+ uint16_t processor_rev_id;
+ uint8_t nb_rev_id;
+ uint8_t sb_rev_id;
+ uint8_t bios_api_rev;
+ uint8_t reserved1[3];
+ uint32_t match_reg[8];
+} __attribute__((packed));
struct microcode_amd {
struct microcode_header_amd hdr;
struct ucode_cpu_info {
struct cpu_signature cpu_sig;
- int valid;
union {
struct microcode_intel *mc_intel;
struct microcode_amd *mc_amd;
- void *valid_mc;
+ void *mc_valid;
} mc;
};
* Per-page-frame information.
*
* Every architecture must ensure the following:
- * 1. 'struct page_info' contains a 'struct list_head list'.
+ * 1. 'struct page_info' contains a 'struct page_list_entry list'.
* 2. Provide a PFN_ORDER() macro for accessing the order of a free page.
*/
-#define PFN_ORDER(_pfn) ((_pfn)->u.free.order)
+#define PFN_ORDER(_pfn) ((_pfn)->v.free.order)
+
+/*
+ * This definition is solely for the use in struct page_info (and
+ * struct page_list_head), intended to allow easy adjustment once x86-64
+ * wants to support more than 16TB.
+ * 'unsigned long' should be used for MFNs everywhere else.
+ */
+#define __mfn_t unsigned int
+#define PRpgmfn "08x"
+
+#undef page_list_entry
+struct page_list_entry
+{
+ __mfn_t next, prev;
+};
struct page_info
{
- /* Each frame can be threaded onto a doubly-linked list. */
- struct list_head list;
+ union {
+ /* Each frame can be threaded onto a doubly-linked list.
+ *
+ * For unused shadow pages, a list of pages of this order; for
+ * pinnable shadows, if pinned, a list of other pinned shadows
+ * (see sh_type_is_pinnable() below for the definition of
+ * "pinnable" shadow types).
+ */
+ struct page_list_entry list;
+ /* For non-pinnable shadows, a higher entry that points at us. */
+ paddr_t up;
+ };
/* Reference count and various PGC_xxx flags and fields. */
- u32 count_info;
+ unsigned long count_info;
/* Context-dependent fields follow... */
union {
/* Page is in use: ((count_info & PGC_count_mask) != 0). */
struct {
- /* Owner of this page (NULL if page is anonymous). */
- u32 _domain; /* pickled format */
/* Type reference count and various PGT_xxx flags and fields. */
unsigned long type_info;
- } __attribute__ ((packed)) inuse;
+ } inuse;
+
+ /* Page is in use as a shadow: count_info == 0. */
+ struct {
+ unsigned long type:5; /* What kind of shadow is this? */
+ unsigned long pinned:1; /* Is the shadow pinned? */
+ unsigned long count:26; /* Reference count */
+ } sh;
/* Page is on a free list: ((count_info & PGC_count_mask) == 0). */
struct {
- /* Order-size of the free chunk this page is the head of. */
- u32 order;
- /* Mask of possibly-tainted TLBs. */
- cpumask_t cpumask;
- } __attribute__ ((packed)) free;
+ /* Do TLBs need flushing for safety before next page use? */
+ bool_t need_tlbflush;
+ } free;
} u;
-#if defined(__x86_64__)
- spinlock_t lock;
-#endif
+ union {
+
+ /* Page is in use, but not as a shadow. */
+ struct {
+ /* Owner of this page (NULL if page is anonymous). */
+ u32 _domain; /* pickled format */
+ } inuse;
+
+ /* Page is in use as a shadow. */
+ struct {
+ /* GMFN of guest page we're a shadow of. */
+ __mfn_t back;
+ } sh;
+
+ /* Page is on a free list (including shadow code free lists). */
+ struct {
+ /* Order-size of the free chunk this page is the head of. */
+ unsigned int order;
+ } free;
+
+ } v;
union {
/*
/*
* When PGT_partial is true then this field is valid and indicates
* that PTEs in the range [0, @nr_validated_ptes) have been validated.
- * If @partial_pte is true then PTE at @nr_validated_ptes+1 has been
- * partially validated.
+ * An extra page reference must be acquired (or not dropped) whenever
+ * PGT_partial gets set, and it must be dropped when the flag gets
+ * cleared. This is so that a get() leaving a page in partially
+ * validated state (where the caller would drop the reference acquired
+ * due to the getting of the type [apparently] failing [-EAGAIN])
+ * would not accidentally result in a page left with zero general
+ * reference count, but non-zero type reference count (possible when
+ * the partial get() is followed immediately by domain destruction).
+ * Likewise, the ownership of the single type reference for partially
+ * (in-)validated pages is tied to this flag, i.e. the instance
+ * setting the flag must not drop that reference, whereas the instance
+ * clearing it will have to.
+ *
+ * If @partial_pte is positive then PTE at @nr_validated_ptes+1 has
+ * been partially validated. This implies that the general reference
+ * to the page (acquired from get_page_from_lNe()) would be dropped
+ * (again due to the apparent failure) and hence must be re-acquired
+ * when resuming the validation, but must not be dropped when picking
+ * up the page for invalidation.
+ *
+ * If @partial_pte is negative then PTE at @nr_validated_ptes+1 has
+ * been partially invalidated. This is basically the opposite case of
+ * above, i.e. the general reference to the page was not dropped in
+ * put_page_from_lNe() (due to the apparent failure), and hence it
+ * must be dropped when the put operation is resumed (and completes),
+ * but it must not be acquired if picking up the page for validation.
*/
struct {
u16 nr_validated_ptes;
- bool_t partial_pte;
+ s8 partial_pte;
};
/*
* tracked for TLB-flush avoidance when a guest runs in shadow mode.
*/
u32 shadow_flags;
+
+ /* When in use as a shadow, next shadow in this hash chain. */
+ __mfn_t next_shadow;
};
};
+#undef __mfn_t
+
+#define PG_shift(idx) (BITS_PER_LONG - (idx))
+#define PG_mask(x, idx) (x ## UL << PG_shift(idx))
+
/* The following page types are MUTUALLY EXCLUSIVE. */
-#define PGT_none (0U<<29) /* no special uses of this page */
-#define PGT_l1_page_table (1U<<29) /* using this page as an L1 page table? */
-#define PGT_l2_page_table (2U<<29) /* using this page as an L2 page table? */
-#define PGT_l3_page_table (3U<<29) /* using this page as an L3 page table? */
-#define PGT_l4_page_table (4U<<29) /* using this page as an L4 page table? */
-#define PGT_seg_desc_page (5U<<29) /* using this page in a GDT/LDT? */
-#define PGT_writable_page (7U<<29) /* has writable mappings of this page? */
-#define PGT_type_mask (7U<<29) /* Bits 29-31. */
+#define PGT_none PG_mask(0, 3) /* no special uses of this page */
+#define PGT_l1_page_table PG_mask(1, 3) /* using as an L1 page table? */
+#define PGT_l2_page_table PG_mask(2, 3) /* using as an L2 page table? */
+#define PGT_l3_page_table PG_mask(3, 3) /* using as an L3 page table? */
+#define PGT_l4_page_table PG_mask(4, 3) /* using as an L4 page table? */
+#define PGT_seg_desc_page PG_mask(5, 3) /* using this page in a GDT/LDT? */
+#define PGT_writable_page PG_mask(7, 3) /* has writable mappings? */
+#define PGT_type_mask PG_mask(7, 3) /* Bits 29-31. */
/* Owning guest has pinned this page to its current type? */
-#define _PGT_pinned 28
-#define PGT_pinned (1U<<_PGT_pinned)
+#define _PGT_pinned PG_shift(4)
+#define PGT_pinned PG_mask(1, 4)
/* Has this page been validated for use as its current type? */
-#define _PGT_validated 27
-#define PGT_validated (1U<<_PGT_validated)
+#define _PGT_validated PG_shift(5)
+#define PGT_validated PG_mask(1, 5)
/* PAE only: is this an L2 page directory containing Xen-private mappings? */
-#define _PGT_pae_xen_l2 26
-#define PGT_pae_xen_l2 (1U<<_PGT_pae_xen_l2)
+#define _PGT_pae_xen_l2 PG_shift(6)
+#define PGT_pae_xen_l2 PG_mask(1, 6)
/* Has this page been *partially* validated for use as its current type? */
-#define _PGT_partial 25
-#define PGT_partial (1U<<_PGT_partial)
+#define _PGT_partial PG_shift(7)
+#define PGT_partial PG_mask(1, 7)
+ /* Page is locked? */
+#define _PGT_locked PG_shift(8)
+#define PGT_locked PG_mask(1, 8)
- /* 25-bit count of uses of this frame as its current type. */
-#define PGT_count_mask ((1U<<25)-1)
+ /* Count of uses of this frame as its current type. */
+#define PGT_count_width PG_shift(8)
+#define PGT_count_mask ((1UL<<PGT_count_width)-1)
/* Cleared when the owning guest 'frees' this page. */
-#define _PGC_allocated 31
-#define PGC_allocated (1U<<_PGC_allocated)
-#if defined(__i386__)
- /* Page is locked? */
-# define _PGC_locked 30
-# define PGC_locked (1U<<_PGC_out_of_sync)
-#endif
+#define _PGC_allocated PG_shift(1)
+#define PGC_allocated PG_mask(1, 1)
+ /* Page is Xen heap? */
+#define _PGC_xen_heap PG_shift(2)
+#define PGC_xen_heap PG_mask(1, 2)
/* Set when is using a page as a page table */
-#define _PGC_page_table 29
-#define PGC_page_table (1U<<_PGC_page_table)
+#define _PGC_page_table PG_shift(3)
+#define PGC_page_table PG_mask(1, 3)
/* 3-bit PAT/PCD/PWT cache-attribute hint. */
-#define PGC_cacheattr_base 26
-#define PGC_cacheattr_mask (7U<<PGC_cacheattr_base)
- /* 26-bit count of references to this frame. */
-#define PGC_count_mask ((1U<<26)-1)
+#define PGC_cacheattr_base PG_shift(6)
+#define PGC_cacheattr_mask PG_mask(7, 6)
+ /* Page is broken? */
+#define _PGC_broken PG_shift(7)
+#define PGC_broken PG_mask(1, 7)
+ /* Page is offline pending ? */
+#define _PGC_offlining PG_shift(8)
+#define PGC_offlining PG_mask(1, 8)
+ /* Page is offlined */
+#define _PGC_offlined PG_shift(9)
+#define PGC_offlined PG_mask(1, 9)
+#define PGC_offlined_broken (PGC_offlined | PGC_broken)
+
+ /* Count of references to this frame. */
+#define PGC_count_width PG_shift(9)
+#define PGC_count_mask ((1UL<<PGC_count_width)-1)
+
+#define is_page_offlining(page) ((page)->count_info & PGC_offlining)
+#define is_page_offlined(page) ((page)->count_info & PGC_offlined)
+#define is_page_broken(page) ((page)->count_info & PGC_broken)
+#define is_page_online(page) (!is_page_offlined(page))
+#if defined(__i386__)
#define is_xen_heap_page(page) is_xen_heap_mfn(page_to_mfn(page))
#define is_xen_heap_mfn(mfn) ({ \
unsigned long _mfn = (mfn); \
- ((_mfn >= paddr_to_pfn(xenheap_phys_start)) && \
- (_mfn < paddr_to_pfn(xenheap_phys_end))); \
+ (_mfn < paddr_to_pfn(xenheap_phys_end)); \
})
+#else
+extern unsigned long allocator_bitmap_end;
+#define is_xen_heap_page(page) ((page)->count_info & PGC_xen_heap)
+#define is_xen_heap_mfn(mfn) \
+ (__mfn_valid(mfn) && is_xen_heap_page(__mfn_to_page(mfn)))
+#define is_xen_fixed_mfn(mfn) \
+ ( (mfn << PAGE_SHIFT) >= __pa(&_start) && \
+ (mfn << PAGE_SHIFT) <= allocator_bitmap_end )
+#endif
#if defined(__i386__)
-#define pickle_domptr(_d) ((u32)(unsigned long)(_d))
-static inline struct domain *unpickle_domptr(u32 _domain)
-{ return (_domain & 1) ? NULL : (void *)_domain; }
#define PRtype_info "08lx" /* should only be used for printk's */
#elif defined(__x86_64__)
-static inline struct domain *unpickle_domptr(u32 _domain)
-{ return ((_domain == 0) || (_domain & 1)) ? NULL : __va(_domain); }
-static inline u32 pickle_domptr(struct domain *domain)
-{ return (domain == NULL) ? 0 : (u32)__pa(domain); }
#define PRtype_info "016lx"/* should only be used for printk's */
#endif
/* OOS fixup entries */
#define SHADOW_OOS_FIXUPS 2
-#define page_get_owner(_p) (unpickle_domptr((_p)->u.inuse._domain))
-#define page_set_owner(_p,_d) ((_p)->u.inuse._domain = pickle_domptr(_d))
+#define page_get_owner(_p) \
+ ((struct domain *)((_p)->v.inuse._domain ? \
+ mfn_to_virt((_p)->v.inuse._domain) : NULL))
+#define page_set_owner(_p,_d) \
+ ((_p)->v.inuse._domain = (_d) ? virt_to_mfn(_d) : 0)
#define maddr_get_owner(ma) (page_get_owner(maddr_to_page((ma))))
#define vaddr_get_owner(va) (page_get_owner(virt_to_page((va))))
int is_iomem_page(unsigned long mfn);
+struct domain *page_get_owner_and_reference(struct page_info *page);
void put_page(struct page_info *page);
int get_page(struct page_info *page, struct domain *domain);
void put_page_type(struct page_info *page);
int check_descriptor(const struct domain *, struct desc_struct *d);
+extern int opt_allow_hugepage;
/******************************************************************************
* With shadow pagetables, the different kinds of address start
/* MAX fixed pages reserved for mapping MSIX tables. */
#if defined(__x86_64__)
-#define MAX_MSIX_PAGES 512
+#define FIX_MSIX_MAX_PAGES 512
#else
-#define MAX_MSIX_PAGES 32
+#define FIX_MSIX_MAX_PAGES 32
#endif
struct msi_info {
u32 data; /* 16 bits of msi message data */
};
+struct msi_desc;
/* Helper functions */
-extern void mask_msi_irq(unsigned int irq);
-extern void unmask_msi_irq(unsigned int irq);
-extern void set_msi_irq_affinity(unsigned int irq, cpumask_t mask);
-extern int pci_enable_msi(struct msi_info *msi);
-extern void pci_disable_msi(int vector);
+extern void mask_msi_vector(unsigned int vector);
+extern void unmask_msi_vector(unsigned int vector);
+extern void set_msi_affinity(unsigned int vector, cpumask_t mask);
+extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc);
+extern void pci_disable_msi(struct msi_desc *desc);
extern void pci_cleanup_msi(struct pci_dev *pdev);
+extern int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
+extern void teardown_msi_vector(int vector);
+extern int msi_free_vector(struct msi_desc *entry);
+extern int pci_restore_msi_state(struct pci_dev *pdev);
+
+extern unsigned int pci_msix_get_table_len(struct pci_dev *pdev);
struct msi_desc {
struct {
struct list_head list;
- void __iomem *mask_base;
+ void __iomem *mask_base; /* va for the entry in mask table */
struct pci_dev *dev;
int vector;
int remap_index; /* index in interrupt remapping table */
};
+int msi_maskable_irq(const struct msi_desc *);
+
/*
* Assume the maximum number of hot plug slots supported by the system is about
* ten. The worstcase is that each of these slots is hot-added with a device,
#define MSR_IA32_MC0_STATUS 0x00000401
#define MSR_IA32_MC0_ADDR 0x00000402
#define MSR_IA32_MC0_MISC 0x00000403
+#define MSR_IA32_MC0_CTL2 0x00000280
+#define CMCI_EN (1UL<<30)
+#define CMCI_THRESHOLD_MASK 0x7FFF
#define MSR_IA32_MC1_CTL 0x00000404
+#define MSR_IA32_MC1_CTL2 0x00000281
#define MSR_IA32_MC1_STATUS 0x00000405
#define MSR_IA32_MC1_ADDR 0x00000406
#define MSR_IA32_MC1_MISC 0x00000407
#define MSR_IA32_MC2_CTL 0x00000408
+#define MSR_IA32_MC2_CTL2 0x00000282
#define MSR_IA32_MC2_STATUS 0x00000409
#define MSR_IA32_MC2_ADDR 0x0000040A
#define MSR_IA32_MC2_MISC 0x0000040B
+#define MSR_IA32_MC3_CTL2 0x00000283
#define MSR_IA32_MC3_CTL 0x0000040C
#define MSR_IA32_MC3_STATUS 0x0000040D
#define MSR_IA32_MC3_ADDR 0x0000040E
#define MSR_IA32_MC3_MISC 0x0000040F
+#define MSR_IA32_MC4_CTL2 0x00000284
#define MSR_IA32_MC4_CTL 0x00000410
#define MSR_IA32_MC4_STATUS 0x00000411
#define MSR_IA32_MC4_ADDR 0x00000412
#define MSR_IA32_MC4_MISC 0x00000413
+#define MSR_IA32_MC5_CTL2 0x00000285
#define MSR_IA32_MC5_CTL 0x00000414
#define MSR_IA32_MC5_STATUS 0x00000415
#define MSR_IA32_MC5_ADDR 0x00000416
#define MSR_IA32_MC5_MISC 0x00000417
+#define MSR_IA32_MC6_CTL2 0x00000286
+#define MSR_IA32_MC6_CTL 0x00000418
+#define MSR_IA32_MC6_STATUS 0x00000419
+#define MSR_IA32_MC6_ADDR 0x0000041A
+#define MSR_IA32_MC6_MISC 0x0000041B
+
+#define MSR_IA32_MC7_CTL2 0x00000287
+#define MSR_IA32_MC7_CTL 0x0000041C
+#define MSR_IA32_MC7_STATUS 0x0000041D
+#define MSR_IA32_MC7_ADDR 0x0000041E
+#define MSR_IA32_MC7_MISC 0x0000041F
+
+#define MSR_IA32_MC8_CTL2 0x00000288
+#define MSR_IA32_MC8_CTL 0x00000420
+#define MSR_IA32_MC8_STATUS 0x00000421
+#define MSR_IA32_MC8_ADDR 0x00000422
+#define MSR_IA32_MC8_MISC 0x00000423
+
#define MSR_P6_PERFCTR0 0x000000c1
#define MSR_P6_PERFCTR1 0x000000c2
#define MSR_P6_EVNTSEL0 0x00000186
#define MTRR_TYPE_WRBACK 6
#define MTRR_NUM_TYPES 7
#define MEMORY_NUM_TYPES MTRR_NUM_TYPES
+#define NO_HARDCODE_MEM_TYPE MTRR_NUM_TYPES
#define NORMAL_CACHE_MODE 0
#define NO_FILL_CACHE_MODE 2
extern int mtrr_del_page(int reg, unsigned long base, unsigned long size);
extern void mtrr_centaur_report_mcr(int mcr, u32 lo, u32 hi);
extern u32 get_pat_flags(struct vcpu *v, u32 gl1e_flags, paddr_t gpaddr,
- paddr_t spaddr);
-extern uint8_t epte_get_entry_emt(struct domain *d, unsigned long gfn, unsigned long mfn);
-extern void ept_change_entry_emt_with_range(struct domain *d, unsigned long start_gfn,
- unsigned long end_gfn);
+ paddr_t spaddr, uint8_t gmtrr_mtype);
+extern uint8_t epte_get_entry_emt(
+ struct domain *d, unsigned long gfn, unsigned long mfn,
+ uint8_t *igmt, int direct_mmio);
+extern void ept_change_entry_emt_with_range(
+ struct domain *d, unsigned long start_gfn, unsigned long end_gfn);
extern unsigned char pat_type_2_pte_flags(unsigned char pat_type);
#endif /* __ASM_X86_MTRR_H__ */
p2m_ram_ro = 3, /* Read-only; writes are silently dropped */
p2m_mmio_dm = 4, /* Reads and write go to the device model */
p2m_mmio_direct = 5, /* Read/write mapping of genuine MMIO area */
+ p2m_populate_on_demand = 6, /* Place-holder for empty memory */
} p2m_type_t;
+typedef enum {
+ p2m_query = 0, /* Do not populate a PoD entries */
+ p2m_alloc = 1, /* Automatically populate PoD entries */
+ p2m_guest = 2, /* Guest demand-fault; implies alloc */
+} p2m_query_t;
+
/* We use bitmaps and maks to handle groups of types */
#define p2m_to_mask(_t) (1UL << (_t))
#define P2M_RO_TYPES (p2m_to_mask(p2m_ram_logdirty) \
| p2m_to_mask(p2m_ram_ro))
+#define P2M_MAGIC_TYPES (p2m_to_mask(p2m_populate_on_demand))
+
/* Useful predicates */
#define p2m_is_ram(_t) (p2m_to_mask(_t) & P2M_RAM_TYPES)
#define p2m_is_mmio(_t) (p2m_to_mask(_t) & P2M_MMIO_TYPES)
#define p2m_is_readonly(_t) (p2m_to_mask(_t) & P2M_RO_TYPES)
+#define p2m_is_magic(_t) (p2m_to_mask(_t) & P2M_MAGIC_TYPES)
#define p2m_is_valid(_t) (p2m_to_mask(_t) & (P2M_RAM_TYPES | P2M_MMIO_TYPES))
+/* Populate-on-demand */
+#define POPULATE_ON_DEMAND_MFN (1<<9)
+#define POD_PAGE_ORDER 9
+
+
struct p2m_domain {
/* Lock that protects updates to the p2m */
spinlock_t lock;
const char *locker_function; /* Func that took it */
/* Pages used to construct the p2m */
- struct list_head pages;
+ struct page_list_head pages;
/* Functions to call to get or free pages for the p2m */
struct page_info * (*alloc_page )(struct domain *d);
mfn_t mfn, unsigned int page_order,
p2m_type_t p2mt);
mfn_t (*get_entry )(struct domain *d, unsigned long gfn,
- p2m_type_t *p2mt);
+ p2m_type_t *p2mt,
+ p2m_query_t q);
mfn_t (*get_entry_current)(unsigned long gfn,
- p2m_type_t *p2mt);
+ p2m_type_t *p2mt,
+ p2m_query_t q);
void (*change_entry_type_global)(struct domain *d,
p2m_type_t ot,
p2m_type_t nt);
/* Highest guest frame that's ever been mapped in the p2m */
unsigned long max_mapped_pfn;
+
+ /* Populate-on-demand variables
+ * NB on locking. {super,single,count} are
+ * covered by d->page_alloc_lock, since they're almost always used in
+ * conjunction with that functionality. {entry_count} is covered by
+ * the domain p2m lock, since it's almost always used in conjunction
+ * with changing the p2m tables.
+ *
+ * At this point, both locks are held in two places. In both,
+ * the order is [p2m,page_alloc]:
+ * + p2m_pod_decrease_reservation() calls p2m_pod_cache_add(),
+ * which grabs page_alloc
+ * + p2m_pod_demand_populate() grabs both; the p2m lock to avoid
+ * double-demand-populating of pages, the page_alloc lock to
+ * protect moving stuff from the PoD cache to the domain page list.
+ */
+ struct {
+ struct page_list_head super, /* List of superpages */
+ single; /* Non-super lists */
+ int count, /* # of pages in cache lists */
+ entry_count; /* # of pages in p2m marked pod */
+ unsigned reclaim_super; /* Last gpfn of a scan */
+ unsigned reclaim_single; /* Last gpfn of a scan */
+ unsigned max_guest; /* gpfn of max guest demand-populate */
+ } pod;
};
/* Extract the type from the PTE flags that store it */
return (flags >> 9) & 0x7;
}
-/* Read the current domain's p2m table. */
-static inline mfn_t gfn_to_mfn_current(unsigned long gfn, p2m_type_t *t)
+/* Read the current domain's p2m table. Do not populate PoD pages. */
+static inline mfn_t gfn_to_mfn_type_current(unsigned long gfn, p2m_type_t *t,
+ p2m_query_t q)
{
- return current->domain->arch.p2m->get_entry_current(gfn, t);
+ return current->domain->arch.p2m->get_entry_current(gfn, t, q);
}
-/* Read another domain's P2M table, mapping pages as we go */
+/* Read another domain's P2M table, mapping pages as we go.
+ * Do not populate PoD pages. */
static inline
-mfn_t gfn_to_mfn_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t)
+mfn_t gfn_to_mfn_type_foreign(struct domain *d, unsigned long gfn, p2m_type_t *t,
+ p2m_query_t q)
{
- return d->arch.p2m->get_entry(d, gfn, t);
+ return d->arch.p2m->get_entry(d, gfn, t, q);
}
/* General conversion function from gfn to mfn */
-#define gfn_to_mfn(d, g, t) _gfn_to_mfn((d), (g), (t))
-static inline mfn_t _gfn_to_mfn(struct domain *d,
- unsigned long gfn, p2m_type_t *t)
+static inline mfn_t _gfn_to_mfn_type(struct domain *d,
+ unsigned long gfn, p2m_type_t *t,
+ p2m_query_t q)
{
if ( !paging_mode_translate(d) )
{
return _mfn(gfn);
}
if ( likely(current->domain == d) )
- return gfn_to_mfn_current(gfn, t);
+ return gfn_to_mfn_type_current(gfn, t, q);
else
- return gfn_to_mfn_foreign(d, gfn, t);
+ return gfn_to_mfn_type_foreign(d, gfn, t, q);
}
+#define gfn_to_mfn(d, g, t) _gfn_to_mfn_type((d), (g), (t), p2m_alloc)
+#define gfn_to_mfn_query(d, g, t) _gfn_to_mfn_type((d), (g), (t), p2m_query)
+#define gfn_to_mfn_guest(d, g, t) _gfn_to_mfn_type((d), (g), (t), p2m_guest)
+
+#define gfn_to_mfn_current(g, t) gfn_to_mfn_type_current((g), (t), p2m_alloc)
+#define gfn_to_mfn_foreign(d, g, t) gfn_to_mfn_type_foreign((d), (g), (t), p2m_alloc)
+
/* Compatibility function exporting the old untyped interface */
static inline unsigned long gmfn_to_mfn(struct domain *d, unsigned long gpfn)
{
void p2m_teardown(struct domain *d);
void p2m_final_teardown(struct domain *d);
+/* Dump PoD information about the domain */
+void p2m_pod_dump_data(struct domain *d);
+
+/* Move all pages from the populate-on-demand cache to the domain page_list
+ * (usually in preparation for domain destruction) */
+void p2m_pod_empty_cache(struct domain *d);
+
+/* Set populate-on-demand cache size so that the total memory allocated to a
+ * domain matches target */
+int p2m_pod_set_mem_target(struct domain *d, unsigned long target);
+
+/* Call when decreasing memory reservation to handle PoD entries properly.
+ * Will return '1' if all entries were handled and nothing more need be done.*/
+int
+p2m_pod_decrease_reservation(struct domain *d,
+ xen_pfn_t gpfn,
+ unsigned int order);
+
/* Add a page to a domain's p2m table */
int guest_physmap_add_entry(struct domain *d, unsigned long gfn,
unsigned long mfn, unsigned int page_order,
p2m_type_t t);
+/* Set a p2m range as populate-on-demand */
+int guest_physmap_mark_populate_on_demand(struct domain *d, unsigned long gfn,
+ unsigned int order);
+
/* Untyped version for RAM only, for compatibility
*
* Return 0 for success
#define clear_page(_p) (cpu_has_xmm2 ? \
clear_page_sse2((void *)(_p)) : \
(void)memset((void *)(_p), 0, PAGE_SIZE))
-#define copy_page(_t,_f) memcpy((void *)(_t), (void *)(_f), PAGE_SIZE)
+void copy_page_sse2(void *, const void *);
+#define copy_page(_t,_f) (cpu_has_xmm2 ? \
+ copy_page_sse2(_t, _f) : \
+ (void)memcpy(_t, _f, PAGE_SIZE))
-#define mfn_valid(mfn) ((mfn) < max_page)
+#define __mfn_valid(mfn) ((mfn) < max_page)
/* Convert between Xen-heap virtual addresses and machine addresses. */
#define __pa(x) (virt_to_maddr(x))
#define __va(x) (maddr_to_virt(x))
/* Convert between Xen-heap virtual addresses and machine frame numbers. */
-#define virt_to_mfn(va) (virt_to_maddr(va) >> PAGE_SHIFT)
-#define mfn_to_virt(mfn) (maddr_to_virt(mfn << PAGE_SHIFT))
+#define __virt_to_mfn(va) (virt_to_maddr(va) >> PAGE_SHIFT)
+#define __mfn_to_virt(mfn) (maddr_to_virt((paddr_t)(mfn) << PAGE_SHIFT))
/* Convert between machine frame numbers and page-info structures. */
-#define mfn_to_page(mfn) (frame_table + (mfn))
-#define page_to_mfn(pg) ((unsigned long)((pg) - frame_table))
+#define __mfn_to_page(mfn) (frame_table + (mfn))
+#define __page_to_mfn(pg) ((unsigned long)((pg) - frame_table))
/* Convert between machine addresses and page-info structures. */
-#define maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT))
-#define page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
+#define __maddr_to_page(ma) (frame_table + ((ma) >> PAGE_SHIFT))
+#define __page_to_maddr(pg) ((paddr_t)((pg) - frame_table) << PAGE_SHIFT)
/* Convert between Xen-heap virtual addresses and page-info structures. */
-#define virt_to_page(va) (frame_table + (__pa(va) >> PAGE_SHIFT))
-#define page_to_virt(pg) (maddr_to_virt(page_to_maddr(pg)))
+#define __virt_to_page(va) (frame_table + (__pa(va) >> PAGE_SHIFT))
+#define __page_to_virt(pg) (maddr_to_virt(page_to_maddr(pg)))
/* Convert between frame number and address formats. */
-#define pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
-#define paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
+#define __pfn_to_paddr(pfn) ((paddr_t)(pfn) << PAGE_SHIFT)
+#define __paddr_to_pfn(pa) ((unsigned long)((pa) >> PAGE_SHIFT))
+
+/*
+ * We define non-underscored wrappers for above conversion functions. These are
+ * overridden in various source files while underscored versions remain intact.
+ */
+#define mfn_valid(mfn) __mfn_valid(mfn)
+#define virt_to_mfn(va) __virt_to_mfn(va)
+#define mfn_to_virt(mfn) __mfn_to_virt(mfn)
+#define mfn_to_page(mfn) __mfn_to_page(mfn)
+#define page_to_mfn(pg) __page_to_mfn(pg)
+#define maddr_to_page(ma) __maddr_to_page(ma)
+#define page_to_maddr(pg) __page_to_maddr(pg)
+#define virt_to_page(va) __virt_to_page(va)
+#define page_to_virt(pg) __page_to_virt(pg)
+#define pfn_to_paddr(pfn) __pfn_to_paddr(pfn)
+#define paddr_to_pfn(pa) __paddr_to_pfn(pa)
#endif /* !defined(__ASSEMBLY__) */
#endif
void paging_init(void);
void setup_idle_pagetable(void);
-unsigned long clone_idle_pagetable(struct vcpu *);
#endif /* !defined(__ASSEMBLY__) */
#define _PAGE_PRESENT 0x001U
#define __PAGE_HYPERVISOR_NOCACHE \
(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_PCD | _PAGE_ACCESSED)
+#define GRANT_PTE_FLAGS \
+ (_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_NX | _PAGE_GNTTAB)
+
#ifndef __ASSEMBLY__
static inline int get_order_from_bytes(paddr_t size)
* Access to the guest pagetables */
/* Get a mapping of a PV guest's l1e for this virtual address. */
-static inline void *
+static inline l1_pgentry_t *
guest_map_l1e(struct vcpu *v, unsigned long addr, unsigned long *gl1mfn)
{
l2_pgentry_t l2e;
!= _PAGE_PRESENT )
return NULL;
*gl1mfn = l2e_get_pfn(l2e);
- return &__linear_l1_table[l1_linear_offset(addr)];
+ return (l1_pgentry_t *)map_domain_page(*gl1mfn) + l1_table_offset(addr);
}
/* Pull down the mapping we got from guest_map_l1e() */
static inline void
guest_unmap_l1e(struct vcpu *v, void *p)
{
- if ( unlikely(paging_mode_translate(v->domain)) )
- unmap_domain_page(p);
+ unmap_domain_page(p);
}
/* Read the guest's l1e that maps this address. */
#ifndef __ASM_PERFC_H__
#define __ASM_PERFC_H__
-#include <asm/mm.h>
static inline void arch_perfc_printall(void)
{
PERFCOUNTER(exception_fixed, "pre-exception fixed")
+PERFCOUNTER(guest_walk, "guest pagetable walks")
/* Shadow counters */
PERFCOUNTER(shadow_alloc, "calls to shadow_alloc")
PERFCOUNTER(shadow_up_pointer, "shadow unshadow by up-pointer")
PERFCOUNTER(shadow_unshadow_bf, "shadow unshadow brute-force")
PERFCOUNTER(shadow_get_page_fail, "shadow_get_page_from_l1e failed")
-PERFCOUNTER(shadow_guest_walk, "shadow walks guest tables")
PERFCOUNTER(shadow_check_gwalk, "shadow checks gwalk")
PERFCOUNTER(shadow_inconsistent_gwalk, "shadow check inconsistent gwalk")
PERFCOUNTER(shadow_rm_write_flush_tlb,
PERFCOUNTER(mshv_wrmsr_tpr, "MS Hv wrmsr tpr")
PERFCOUNTER(mshv_wrmsr_eoi, "MS Hv wrmsr eoi")
+PERFCOUNTER(realmode_emulations, "realmode instructions emulated")
+PERFCOUNTER(realmode_exits, "vmexits from realmode")
+
/*#endif*/ /* __XEN_PERFC_DEFN_H__ */
int x86_power;
__u32 x86_max_cores; /* cpuid returned max cores value */
__u32 booted_cores; /* number of cores as seen by OS */
+ __u32 x86_num_siblings; /* cpuid logical cpus per chip value */
__u32 apicid;
unsigned short x86_clflush_size;
} __cacheline_aligned;
#define current_cpu_data boot_cpu_data
#endif
+extern u64 host_pat;
extern int phys_proc_id[NR_CPUS];
extern int cpu_core_id[NR_CPUS];
extern void identify_cpu(struct cpuinfo_x86 *);
+extern void setup_clear_cpu_cap(unsigned int);
extern void print_cpu_info(struct cpuinfo_x86 *);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void dodgy_tsc(void);
static always_inline void detect_ht(struct cpuinfo_x86 *c) {}
#endif
+#define cpu_to_core(_cpu) (cpu_core_id[_cpu])
+#define cpu_to_socket(_cpu) (phys_proc_id[_cpu])
+
/*
* Generic CPUID function
* clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx
void mcheck_init(struct cpuinfo_x86 *c);
asmlinkage void do_machine_check(struct cpu_user_regs *regs);
+void cpu_mcheck_distribute_cmci(void);
+void cpu_mcheck_disable(void);
int cpuid_hypervisor_leaves(
uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
uint32_t idx, uint32_t eax, uint32_t edx);
int microcode_update(XEN_GUEST_HANDLE(const_void), unsigned long len);
+int microcode_resume_cpu(int cpu);
#endif /* !__ASSEMBLY__ */
extern void smp_alloc_memory(void);
extern int pic_mode;
-extern int smp_num_siblings;
extern cpumask_t cpu_sibling_map[];
extern cpumask_t cpu_core_map[];
#define NMI_MCE_SOFTIRQ (NR_COMMON_SOFTIRQS + 0)
#define TIME_CALIBRATE_SOFTIRQ (NR_COMMON_SOFTIRQS + 1)
+#define VCPU_KICK_SOFTIRQ (NR_COMMON_SOFTIRQS + 2)
-#define NR_ARCH_SOFTIRQS 2
+#define MACHINE_CHECK_SOFTIRQ (NR_COMMON_SOFTIRQS + 3)
+#define NR_ARCH_SOFTIRQS 4
#endif /* __ASM_SOFTIRQ_H__ */
#include <xen/config.h>
#include <xen/lib.h>
#include <asm/atomic.h>
-#include <asm/rwlock.h>
typedef struct {
volatile s16 lock;
- s8 recurse_cpu;
- u8 recurse_cnt;
-} spinlock_t;
+} raw_spinlock_t;
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 1, -1, 0 }
+#define _RAW_SPIN_LOCK_UNLOCKED /*(raw_spinlock_t)*/ { 1 }
-#define spin_lock_init(x) do { *(x) = (spinlock_t) SPIN_LOCK_UNLOCKED; } while(0)
-#define spin_is_locked(x) (*(volatile char *)(&(x)->lock) <= 0)
+#define _raw_spin_is_locked(x) ((x)->lock <= 0)
-static inline void _raw_spin_lock(spinlock_t *lock)
+static always_inline void _raw_spin_lock(raw_spinlock_t *lock)
{
- __asm__ __volatile__ (
- "1: lock; decb %0 \n"
- " js 2f \n"
- ".section .text.lock,\"ax\"\n"
+ asm volatile (
+ "1: lock; decw %0 \n"
+ " jns 3f \n"
"2: rep; nop \n"
- " cmpb $0,%0 \n"
+ " cmpw $0,%0 \n"
" jle 2b \n"
" jmp 1b \n"
- ".previous"
+ "3:"
: "=m" (lock->lock) : : "memory" );
}
-static inline void _raw_spin_unlock(spinlock_t *lock)
+static always_inline void _raw_spin_unlock(raw_spinlock_t *lock)
{
- ASSERT(spin_is_locked(lock));
- __asm__ __volatile__ (
- "movb $1,%0"
+ ASSERT(_raw_spin_is_locked(lock));
+ asm volatile (
+ "movw $1,%0"
: "=m" (lock->lock) : : "memory" );
}
-static inline int _raw_spin_trylock(spinlock_t *lock)
+static always_inline int _raw_spin_trylock(raw_spinlock_t *lock)
{
- char oldval;
- __asm__ __volatile__(
- "xchgb %b0,%1"
- :"=q" (oldval), "=m" (lock->lock)
- :"0" (0) : "memory");
- return oldval > 0;
+ s16 oldval;
+ asm volatile (
+ "xchgw %w0,%1"
+ :"=r" (oldval), "=m" (lock->lock)
+ :"0" (0) : "memory" );
+ return (oldval > 0);
}
-/*
- * spin_[un]lock_recursive(): Use these forms when the lock can (safely!) be
- * reentered recursively on the same CPU. All critical regions that may form
- * part of a recursively-nested set must be protected by these forms. If there
- * are any critical regions that cannot form part of such a set, they can use
- * standard spin_[un]lock().
- */
-#define _raw_spin_lock_recursive(_lock) \
- do { \
- int cpu = smp_processor_id(); \
- if ( likely((_lock)->recurse_cpu != cpu) ) \
- { \
- spin_lock(_lock); \
- (_lock)->recurse_cpu = cpu; \
- } \
- (_lock)->recurse_cnt++; \
- } while ( 0 )
-
-#define _raw_spin_unlock_recursive(_lock) \
- do { \
- if ( likely(--(_lock)->recurse_cnt == 0) ) \
- { \
- (_lock)->recurse_cpu = -1; \
- spin_unlock(_lock); \
- } \
- } while ( 0 )
-
-
typedef struct {
volatile unsigned int lock;
-} rwlock_t;
+} raw_rwlock_t;
-#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { RW_LOCK_BIAS }
+#define RW_LOCK_BIAS 0x01000000
+#define _RAW_RW_LOCK_UNLOCKED /*(raw_rwlock_t)*/ { RW_LOCK_BIAS }
+
+static always_inline void _raw_read_lock(raw_rwlock_t *rw)
+{
+ asm volatile (
+ "1: lock; decl %0 \n"
+ " jns 3f \n"
+ " lock; incl %0 \n"
+ "2: rep; nop \n"
+ " cmpl $1,%0 \n"
+ " js 2b \n"
+ " jmp 1b \n"
+ "3:"
+ : "=m" (rw->lock) : : "memory" );
+}
-#define rwlock_init(x) do { *(x) = (rwlock_t) RW_LOCK_UNLOCKED; } while(0)
+static always_inline void _raw_write_lock(raw_rwlock_t *rw)
+{
+ asm volatile (
+ "1: lock; subl %1,%0 \n"
+ " jz 3f \n"
+ " lock; addl %1,%0 \n"
+ "2: rep; nop \n"
+ " cmpl %1,%0 \n"
+ " jne 2b \n"
+ " jmp 1b \n"
+ "3:"
+ : "=m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory" );
+}
-/*
- * On x86, we implement read-write locks as a 32-bit counter
- * with the high bit (sign) being the "contended" bit.
- */
-static inline void _raw_read_lock(rwlock_t *rw)
+static always_inline void _raw_read_unlock(raw_rwlock_t *rw)
{
- __build_read_lock(rw, "__read_lock_failed");
+ asm volatile (
+ "lock ; incl %0"
+ : "=m" ((rw)->lock) : : "memory" );
}
-static inline void _raw_write_lock(rwlock_t *rw)
+static always_inline void _raw_write_unlock(raw_rwlock_t *rw)
{
- __build_write_lock(rw, "__write_lock_failed");
+ asm volatile (
+ "lock ; addl %1,%0"
+ : "=m" ((rw)->lock) : "i" (RW_LOCK_BIAS) : "memory" );
}
-#define _raw_read_unlock(rw) \
- __asm__ __volatile__ ( \
- "lock ; incl %0" : \
- "=m" ((rw)->lock) : : "memory" )
-#define _raw_write_unlock(rw) \
- __asm__ __volatile__ ( \
- "lock ; addl $" RW_LOCK_BIAS_STR ",%0" : \
- "=m" ((rw)->lock) : : "memory" )
+#define _raw_rw_is_locked(x) ((x)->lock < RW_LOCK_BIAS)
#endif /* __ASM_SPINLOCK_H */
#ifndef __ASM_SYSTEM_H
#define __ASM_SYSTEM_H
-#include <xen/config.h>
-#include <xen/types.h>
+#include <xen/lib.h>
#include <asm/bitops.h>
#define read_segment_register(name) \
/* used when interrupts are already enabled or to shutdown the processor */
#define halt() asm volatile ( "hlt" : : : "memory" )
+#define local_save_flags(x) \
+({ \
+ BUILD_BUG_ON(sizeof(x) != sizeof(long)); \
+ asm volatile ( "pushf" __OS " ; pop" __OS " %0" : "=g" (x)); \
+})
+#define local_irq_save(x) \
+({ \
+ local_save_flags(x); \
+ local_irq_disable(); \
+})
+#define local_irq_restore(x) \
+({ \
+ BUILD_BUG_ON(sizeof(x) != sizeof(long)); \
+ asm volatile ( "push" __OS " %0 ; popf" __OS \
+ : : "g" (x) : "memory", "cc" ); \
+})
+
static inline int local_irq_is_enabled(void)
{
unsigned long flags;
- __save_flags(flags);
+ local_save_flags(flags);
return !!(flags & (1<<9)); /* EFLAGS_IF */
}
#ifndef __TBOOT_H__
#define __TBOOT_H__
-typedef struct __attribute__ ((__packed__)) {
+#include <xen/acpi.h>
+
+#ifndef __packed
+#define __packed __attribute__ ((packed))
+#endif
+
+typedef struct __packed {
uint32_t data1;
uint16_t data2;
uint16_t data3;
/* used to communicate between tboot and the launched kernel (i.e. Xen) */
-typedef struct __attribute__ ((__packed__)) {
- uint16_t pm1a_cnt;
- uint16_t pm1b_cnt;
- uint16_t pm1a_evt;
- uint16_t pm1b_evt;
+#define TB_KEY_SIZE 64 /* 512 bits */
+
+#define MAX_TB_MAC_REGIONS 32
+typedef struct __packed {
+ uint64_t start; /* must be 64 byte -aligned */
+ uint32_t size; /* must be 64 byte -granular */
+} tboot_mac_region_t;
+
+/* GAS - Generic Address Structure (ACPI 2.0+) */
+typedef struct __packed {
+ uint8_t space_id;
+ uint8_t bit_width;
+ uint8_t bit_offset;
+ uint8_t access_width;
+ uint64_t address;
+} tboot_acpi_generic_address_t;
+
+typedef struct __packed {
+ tboot_acpi_generic_address_t pm1a_cnt_blk;
+ tboot_acpi_generic_address_t pm1b_cnt_blk;
+ tboot_acpi_generic_address_t pm1a_evt_blk;
+ tboot_acpi_generic_address_t pm1b_evt_blk;
uint16_t pm1a_cnt_val;
uint16_t pm1b_cnt_val;
-} tboot_acpi_sleep_info;
+ uint64_t wakeup_vector;
+ uint32_t vector_width;
+ uint64_t kernel_s3_resume_vector;
+} tboot_acpi_sleep_info_t;
-typedef struct __attribute__ ((__packed__)) {
- /* version 0x01+ fields: */
+typedef struct __packed {
+ /* version 3+ fields: */
uuid_t uuid; /* {663C8DFF-E8B3-4b82-AABF-19EA4D057A08} */
- uint32_t version; /* Version number: 0x01, 0x02, ... */
+ uint32_t version; /* Version number; currently supports 0.4 */
uint32_t log_addr; /* physical addr of tb_log_t log */
- uint32_t shutdown_entry32; /* entry point for tboot shutdown from 32b */
- uint32_t shutdown_entry64; /* entry point for tboot shutdown from 64b */
+ uint32_t shutdown_entry; /* entry point for tboot shutdown */
uint32_t shutdown_type; /* type of shutdown (TB_SHUTDOWN_*) */
- uint32_t s3_tb_wakeup_entry;/* entry point for tboot s3 wake up */
- uint32_t s3_k_wakeup_entry; /* entry point for xen s3 wake up */
- tboot_acpi_sleep_info
+ tboot_acpi_sleep_info_t
acpi_sinfo; /* where kernel put acpi sleep info in Sx */
- uint8_t reserved[52]; /* this pad is for compat with old field */
- /* version 0x02+ fields: */
uint32_t tboot_base; /* starting addr for tboot */
uint32_t tboot_size; /* size of tboot */
+ uint8_t num_mac_regions; /* number mem regions to MAC on S3 */
+ /* contig regions memory to MAC on S3 */
+ tboot_mac_region_t mac_regions[MAX_TB_MAC_REGIONS];
+ /* version 4+ fields: */
+ /* populated by tboot; will be encrypted */
+ uint8_t s3_key[TB_KEY_SIZE];
} tboot_shared_t;
#define TB_SHUTDOWN_REBOOT 0
void tboot_probe(void);
void tboot_shutdown(uint32_t shutdown_type);
int tboot_in_measured_env(void);
+int tboot_protect_mem_regions(void);
+int tboot_parse_dmar_table(acpi_table_handler dmar_handler);
+int tboot_s3_resume(void);
#endif /* __TBOOT_H__ */
void pit_broadcast_exit(void);
int pit_broadcast_is_available(void);
+uint64_t acpi_pm_tick_to_ns(uint64_t ticks);
+uint64_t ns_to_acpi_pm_tick(uint64_t ns);
+
#endif /* __X86_TIME_H__ */
struct cpu_user_regs;
-extern void (*machine_check_vector)(struct cpu_user_regs *regs, long error_code);
+extern void machine_check_vector(struct cpu_user_regs *regs, long error_code);
/**
* guest_has_trap_callback
#define get_pte_flags(x) (((int)((x) >> 32) & ~0xFFF) | ((int)(x) & 0xFFF))
#define put_pte_flags(x) (((intpte_t)((x) & ~0xFFF) << 32) | ((x) & 0xFFF))
-#define GRANT_PTE_FLAGS \
- (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB)
-
/*
* Disallow unused flag bits plus PAT/PSE, PCD, PWT and GLOBAL.
* Permit the NX bit if the hardware supports it.
#define BASE_DISALLOW_MASK (0xFFFFF198U & ~_PAGE_NX)
#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
-#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_PSE)
#define L3_DISALLOW_MASK 0xFFFFF1FEU /* must-be-zero */
#endif /* __X86_32_PAGE_H__ */
#define mb() \
asm volatile ( "lock; addl $0,0(%%esp)" : : : "memory" )
-#define __save_flags(x) \
- asm volatile ( "pushfl ; popl %0" : "=g" (x) : )
-#define __restore_flags(x) \
- asm volatile ( "pushl %0 ; popfl" : : "g" (x) : "memory", "cc" )
-
-#define local_irq_save(x) \
- asm volatile ( "pushfl ; popl %0 ; cli" : "=g" (x) : : "memory" )
-#define local_irq_restore(x) \
- __restore_flags(x)
-
#endif /* __X86_32_SYSTEM_H__ */
ASSERT(va >= XEN_VIRT_START);
ASSERT(va < DIRECTMAP_VIRT_END);
ASSERT((va < XEN_VIRT_END) || (va >= DIRECTMAP_VIRT_START));
- if ( va > DIRECTMAP_VIRT_START )
+ if ( va >= DIRECTMAP_VIRT_START )
return va - DIRECTMAP_VIRT_START;
return va - XEN_VIRT_START + xen_phys_start;
}
#define BASE_DISALLOW_MASK (0xFF800198U & ~_PAGE_NX)
#define L1_DISALLOW_MASK (BASE_DISALLOW_MASK | _PAGE_GNTTAB)
-#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK)
+#define L2_DISALLOW_MASK (BASE_DISALLOW_MASK & ~_PAGE_PSE)
#define L3_DISALLOW_MASK (BASE_DISALLOW_MASK)
#define L4_DISALLOW_MASK (BASE_DISALLOW_MASK)
-#define COMPAT_L3_DISALLOW_MASK 0xFFFFF1FEU
+#define COMPAT_L3_DISALLOW_MASK 0xFFFFF198U
#define PAGE_HYPERVISOR (__PAGE_HYPERVISOR | _PAGE_GLOBAL)
#define PAGE_HYPERVISOR_NOCACHE (__PAGE_HYPERVISOR_NOCACHE | _PAGE_GLOBAL)
-#define GRANT_PTE_FLAGS \
- (_PAGE_PRESENT|_PAGE_ACCESSED|_PAGE_DIRTY|_PAGE_GNTTAB|_PAGE_USER)
-
#define USER_MAPPINGS_ARE_GLOBAL
#ifdef USER_MAPPINGS_ARE_GLOBAL
/*
#define mb() \
asm volatile ( "mfence" : : : "memory" )
-#define __save_flags(x) \
- asm volatile ( "pushfq ; popq %q0" : "=g" (x) : :"memory" )
-#define __restore_flags(x) \
- asm volatile ( "pushq %0 ; popfq" : : "g" (x) : "memory", "cc" )
-
-#define local_irq_save(x) \
- asm volatile ( "pushfq ; popq %0 ; cli" : "=g" (x) : : "memory" )
-#define local_irq_restore(x) \
- __restore_flags(x)
-
#endif /* __X86_64_SYSTEM_H__ */
"xenoprof/x86 with autotranslated mode enabled" \
"isn't supported yet\n"); \
} while (0)
+int passive_domain_do_rdmsr(struct cpu_user_regs *regs);
+int passive_domain_do_wrmsr(struct cpu_user_regs *regs);
+void passive_domain_destroy(struct vcpu *v);
#endif /* __ASM_X86_XENOPROF_H__ */
--- /dev/null
+/* $OpenBSD: rijndael.h,v 1.13 2008/06/09 07:49:45 djm Exp $ */
+
+/**
+ * rijndael-alg-fst.h
+ *
+ * @version 3.0 (December 2000)
+ *
+ * Optimised ANSI C code for the Rijndael cipher (now AES)
+ *
+ * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
+ * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
+ * @author Paulo Barreto <paulo.barreto@terra.com.br>
+ *
+ * This code is hereby placed in the public domain.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
+ * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
+ * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
+ * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
+ * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
+ * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+#ifndef __RIJNDAEL_H
+#define __RIJNDAEL_H
+
+#define AES_MAXKEYBITS (256)
+#define AES_MAXKEYBYTES (AES_MAXKEYBITS/8)
+/* for 256-bit keys, fewer for less */
+#define AES_MAXROUNDS 14
+
+//typedef unsigned char u8;
+//typedef unsigned short u16;
+//typedef unsigned int u32;
+
+/* The structure for key information */
+typedef struct {
+ int enc_only; /* context contains only encrypt schedule */
+ int Nr; /* key-length-dependent number of rounds */
+ u32 ek[4*(AES_MAXROUNDS + 1)]; /* encrypt key schedule */
+ u32 dk[4*(AES_MAXROUNDS + 1)]; /* decrypt key schedule */
+} rijndael_ctx;
+
+int rijndael_set_key(rijndael_ctx *, const u_char *, int);
+int rijndael_set_key_enc_only(rijndael_ctx *, const u_char *, int);
+void rijndael_decrypt(rijndael_ctx *, const u_char *, u_char *);
+void rijndael_encrypt(rijndael_ctx *, const u_char *, u_char *);
+
+int rijndaelKeySetupEnc(unsigned int [], const unsigned char [], int);
+int rijndaelKeySetupDec(unsigned int [], const unsigned char [], int);
+void rijndaelEncrypt(const unsigned int [], int, const unsigned char [],
+ unsigned char []);
+
+#endif /* __RIJNDAEL_H */
--- /dev/null
+#ifndef HEADER_VMAC_H
+#define HEADER_VMAC_H
+
+/* --------------------------------------------------------------------------
+ * VMAC and VHASH Implementation by Ted Krovetz (tdk@acm.org) and Wei Dai.
+ * This implementation is herby placed in the public domain.
+ * The authors offers no warranty. Use at your own risk.
+ * Please send bug reports to the authors.
+ * Last modified: 17 APR 08, 1700 PDT
+ * ----------------------------------------------------------------------- */
+
+/* --------------------------------------------------------------------------
+ * User definable settings.
+ * ----------------------------------------------------------------------- */
+#define VMAC_TAG_LEN 64 /* Must be 64 or 128 - 64 sufficient for most */
+#define VMAC_KEY_LEN 128 /* Must be 128, 192 or 256 */
+#define VMAC_NHBYTES 128 /* Must 2^i for any 3 < i < 13. Standard = 128 */
+#define VMAC_PREFER_BIG_ENDIAN 0 /* Prefer non-x86 */
+
+#define VMAC_USE_OPENSSL 0 /* Set to non-zero to use OpenSSL's AES */
+#define VMAC_CACHE_NONCES 1 /* Set to non-zero to cause caching */
+ /* of consecutive nonces on 64-bit tags */
+
+#define VMAC_RUN_TESTS 0 /* Set to non-zero to check vectors and speed */
+#define VMAC_HZ (448e6) /* Set to hz of host machine to get speed */
+#define VMAC_HASH_ONLY 0 /* Set to non-zero to time hash only (not-mac) */
+/* Speeds of cpus I have access to
+#define hz (2400e6) glyme Core 2 "Conroe"
+#define hz (2000e6) jupiter G5
+#define hz (1592e6) titan
+#define hz (2793e6) athena/gaia
+#define hz (1250e6) isis G4
+#define hz (2160e6) imac Core 2 "Merom"
+#define hz (266e6) ppc/arm
+#define hz (400e6) mips
+*/
+
+/* --------------------------------------------------------------------------
+ * This implementation uses uint32_t and uint64_t as names for unsigned 32-
+ * and 64-bit integer types. These are defined in C99 stdint.h. The
+ * following may need adaptation if you are not running a C99 or
+ * Microsoft C environment.
+ * ----------------------------------------------------------------------- */
+#define VMAC_USE_STDINT 1 /* Set to zero if system has no stdint.h */
+
+#if VMAC_USE_STDINT && !_MSC_VER /* Try stdint.h if non-Microsoft */
+#ifdef __cplusplus
+#define __STDC_CONSTANT_MACROS
+#endif
+//#include <stdint.h>
+#elif (_MSC_VER) /* Microsoft C does not have stdint.h */
+typedef unsigned __int32 uint32_t;
+typedef unsigned __int64 uint64_t;
+#define UINT64_C(v) v ## UI64
+#else /* Guess sensibly - may need adaptation */
+typedef unsigned int uint32_t;
+typedef unsigned long long uint64_t;
+#define UINT64_C(v) v ## ULL
+#endif
+
+/* --------------------------------------------------------------------------
+ * This implementation supports two free AES implementations: OpenSSL's and
+ * Paulo Barreto's. To use OpenSSL's, you will need to include the OpenSSL
+ * crypto library (eg, gcc -lcrypto foo.c). For Barreto's, you will need
+ * to compile rijndael-alg-fst.c, last seen at http://www.iaik.tu-graz.ac.at/
+ * research/krypto/AES/old/~rijmen/rijndael/rijndael-fst-3.0.zip and
+ * http://homes.esat.kuleuven.be/~rijmen/rijndael/rijndael-fst-3.0.zip.
+ * To use a different implementation, use these definitions as a model.
+ * ----------------------------------------------------------------------- */
+#if VMAC_USE_OPENSSL
+
+#include <openssl/aes.h>
+typedef AES_KEY aes_int_key;
+
+#define aes_encryption(in,out,int_key) \
+ AES_encrypt((unsigned char *)(in),(unsigned char *)(out),(int_key))
+#define aes_key_setup(key,int_key) \
+ AES_set_encrypt_key((key),VMAC_KEY_LEN,(int_key))
+
+#else
+
+//#include "rijndael-alg-fst.h"
+typedef uint64_t vmac_t;
+#include "rijndael.h"
+typedef u32 aes_int_key[4*(VMAC_KEY_LEN/32+7)];
+
+#define aes_encryption(in,out,int_key) \
+ rijndaelEncrypt((u32 *)(int_key), \
+ ((VMAC_KEY_LEN/32)+6), \
+ (u8 *)(in), (u8 *)(out))
+#define aes_key_setup(user_key,int_key) \
+ rijndaelKeySetupEnc((u32 *)(int_key), \
+ (u8 *)(user_key), \
+ VMAC_KEY_LEN)
+#endif
+
+/* --------------------------------------------------------------------- */
+
+typedef struct {
+ uint64_t nhkey [(VMAC_NHBYTES/8)+2*(VMAC_TAG_LEN/64-1)];
+ uint64_t polykey[2*VMAC_TAG_LEN/64];
+ uint64_t l3key [2*VMAC_TAG_LEN/64];
+ uint64_t polytmp[2*VMAC_TAG_LEN/64];
+ aes_int_key cipher_key;
+ #if (VMAC_TAG_LEN == 64) && (VMAC_CACHE_NONCES)
+ uint64_t cached_nonce[2];
+ uint64_t cached_aes[2];
+ #endif
+ int first_block_processed;
+} vmac_ctx_t;
+
+/* --------------------------------------------------------------------- */
+#ifdef __cplusplus
+extern "C" {
+#endif
+/* --------------------------------------------------------------------------
+ * <<<<< USAGE NOTES >>>>>
+ *
+ * Given msg m (mbytes in length) and nonce buffer n
+ * this function returns a tag as its output. The tag is returned as
+ * a number. When VMAC_TAG_LEN == 64, the 'return'ed integer is the tag,
+ * and *tagl is meaningless. When VMAC_TAG_LEN == 128 the tag is the
+ * number y * 2^64 + *tagl where y is the function's return value.
+ * If you want to consider tags to be strings, then you must do so with
+ * an agreed upon endian orientation for interoperability, and convert
+ * the results appropriately. VHASH hashes m without creating any tag.
+ * Consecutive substrings forming a prefix of a message may be passed
+ * to vhash_update, with vhash or vmac being called with the remainder
+ * to produce the output.
+ *
+ * Requirements:
+ * - On 32-bit architectures with SSE2 instructions, ctx and m MUST be
+ * begin on 16-byte memory boundaries.
+ * - m MUST be your message followed by zeroes to the nearest 16-byte
+ * boundary. If m is a length multiple of 16 bytes, then it is already
+ * at a 16-byte boundary and needs no padding. mbytes should be your
+ * message length without any padding.
+ * - The first bit of the nonce buffer n must be 0. An i byte nonce, is made
+ * as the first 16-i bytes of n being zero, and the final i the nonce.
+ * - vhash_update MUST have mbytes be a positive multiple of VMAC_NHBYTES
+ * ----------------------------------------------------------------------- */
+
+#define vmac_update vhash_update
+
+void vhash_update(unsigned char m[],
+ unsigned int mbytes,
+ vmac_ctx_t *ctx);
+
+uint64_t vmac(unsigned char m[],
+ unsigned int mbytes,
+ unsigned char n[16],
+ uint64_t *tagl,
+ vmac_ctx_t *ctx);
+
+uint64_t vhash(unsigned char m[],
+ unsigned int mbytes,
+ uint64_t *tagl,
+ vmac_ctx_t *ctx);
+
+/* --------------------------------------------------------------------------
+ * When passed a VMAC_KEY_LEN bit user_key, this function initialazies ctx.
+ * ----------------------------------------------------------------------- */
+
+void vmac_set_key(unsigned char user_key[], vmac_ctx_t *ctx);
+
+/* --------------------------------------------------------------------------
+ * This function aborts current hash and resets ctx, ready for a new message.
+ * ----------------------------------------------------------------------- */
+
+void vhash_abort(vmac_ctx_t *ctx);
+
+/* --------------------------------------------------------------------- */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* HEADER_AES_H */
unsigned long rrs[8]; // region registers
unsigned long krs[8]; // kernel registers
unsigned long tmp[16]; // temp registers (e.g. for hyperprivops)
+
+ /* itc paravirtualization
+ * vAR.ITC = mAR.ITC + itc_offset
+ * itc_last is one which was lastly passed to
+ * the guest OS in order to prevent it from
+ * going backwords.
+ */
+ unsigned long itc_offset;
+ unsigned long itc_last;
};
};
};
#define VGCF_EXTRA_REGS (1UL << 1) /* Set extra regs. */
#define VGCF_SET_CR_IRR (1UL << 2) /* Set cr_irr[0:3]. */
#define VGCF_online (1UL << 3) /* make this vcpu online */
+#define VGCF_SET_AR_ITC (1UL << 4) /* set pv ar.itc. itc_offset, itc_last */
unsigned long flags; /* VGCF_* flags */
struct vcpu_guest_context_regs regs;
#ifndef __XEN_PUBLIC_HVM_SAVE_IA64_H__
#define __XEN_PUBLIC_HVM_SAVE_IA64_H__
-#include <public/hvm/save.h>
-#include <public/arch-ia64.h>
+#include "../../hvm/save.h"
+#include "../../arch-ia64.h"
/*
* Save/restore header: general info about the save file.
*/
#define VIOSAPIC_NUM_PINS 48
-union viosapic_rte
+/* To share VT-d code which uses vioapic_redir_entry.
+ * Although on ia64 this is for vsapic, but we have to vioapic_redir_entry
+ * instead of viosapic_redir_entry.
+ */
+union vioapic_redir_entry
{
uint64_t bits;
struct {
uint8_t reserved[3];
uint16_t dest_id;
- };
+ } fields;
};
struct hvm_hw_ia64_viosapic {
uint32_t pad;
uint64_t lowest_vcpu_id;
uint64_t base_address;
- union viosapic_rte redirtbl[VIOSAPIC_NUM_PINS];
+ union vioapic_redir_entry redirtbl[VIOSAPIC_NUM_PINS];
};
DECLARE_HVM_SAVE_TYPE(VIOSAPIC, 6, struct hvm_hw_ia64_viosapic);
* Indexed by: device*4 + INTx#.
*/
union {
- DECLARE_BITMAP(i, 32*4);
+ unsigned long i[16 / sizeof (unsigned long)]; /* DECLARE_BITMAP(i, 32*4); */
uint64_t pad[2];
};
};
* Indexed by ISA IRQ (assumes no ISA-device IRQ sharing).
*/
union {
- DECLARE_BITMAP(i, 16);
+ unsigned long i[1]; /* DECLARE_BITMAP(i, 16); */
uint64_t pad[1];
};
};
/* Hypercall */
#define __HYPERVISOR_mca __HYPERVISOR_arch_0
-#define XEN_MCA_INTERFACE_VERSION 0x03000001
+/*
+ * The xen-unstable repo has interface version 0x03000001; out interface
+ * is incompatible with that and any future minor revisions, so we
+ * choose a different version number range that is numerically less
+ * than that used in xen-unstable.
+ */
+#define XEN_MCA_INTERFACE_VERSION 0x01ecc002
-/* IN: Dom0 calls hypercall from MC event handler. */
-#define XEN_MC_CORRECTABLE 0x0
-/* IN: Dom0/DomU calls hypercall from MC trap handler. */
-#define XEN_MC_TRAP 0x1
-/* XEN_MC_CORRECTABLE and XEN_MC_TRAP are mutually exclusive. */
+/* IN: Dom0 calls hypercall to retrieve nonurgent telemetry */
+#define XEN_MC_NONURGENT 0x0001
+/* IN: Dom0/DomU calls hypercall to retrieve urgent telemetry */
+#define XEN_MC_URGENT 0x0002
+/* IN: Dom0 acknowledges previosly-fetched telemetry */
+#define XEN_MC_ACK 0x0004
/* OUT: All is ok */
#define XEN_MC_OK 0x0
#define MC_TYPE_GLOBAL 0
#define MC_TYPE_BANK 1
#define MC_TYPE_EXTENDED 2
+#define MC_TYPE_RECOVERY 3
struct mcinfo_common {
uint16_t type; /* structure type */
#define MC_FLAG_CORRECTABLE (1 << 0)
#define MC_FLAG_UNCORRECTABLE (1 << 1)
-
+#define MC_FLAG_RECOVERABLE (1 << 2)
+#define MC_FLAG_POLLED (1 << 3)
+#define MC_FLAG_RESET (1 << 4)
+#define MC_FLAG_CMCI (1 << 5)
+#define MC_FLAG_MCE (1 << 6)
/* contains global x86 mc information */
struct mcinfo_global {
struct mcinfo_common common;
uint16_t mc_domid;
uint32_t mc_socketid; /* physical socket of the physical core */
uint16_t mc_coreid; /* physical impacted core */
+ uint32_t mc_apicid;
uint16_t mc_core_threadid; /* core thread of physical core */
uint16_t mc_vcpuid; /* virtual cpu scheduled for mc_domid */
uint64_t mc_gstatus; /* global status */
uint64_t mc_addr; /* bank address, only valid
* if addr bit is set in mc_status */
uint64_t mc_misc;
+ uint64_t mc_ctrl2;
+ uint64_t mc_tsc;
};
* multiple times. */
uint32_t mc_msrs; /* Number of msr with valid values. */
- struct mcinfo_msr mc_msr[5];
+ /*
+ * Currently Intel extended MSR (32/64) including all gp registers
+ * and E(R)DI, E(R)BP, E(R)SP, E(R)FLAGS, E(R)IP, E(R)MISC, only 10
+ * of them might be useful. So expend this array to 10.
+ */
+ struct mcinfo_msr mc_msr[10];
+};
+
+/* Recovery Action flags. Giving recovery result information to DOM0 */
+
+/* Xen takes successful recovery action, the error is recovered */
+#define REC_ACTION_RECOVERED (0x1 << 0)
+/* No action is performed by XEN */
+#define REC_ACTION_NONE (0x1 << 1)
+/* It's possible DOM0 might take action ownership in some case */
+#define REC_ACTION_NEED_RESET (0x1 << 2)
+
+/* Different Recovery Action types, if the action is performed successfully,
+ * REC_ACTION_RECOVERED flag will be returned.
+ */
+
+/* Page Offline Action */
+#define MC_ACTION_PAGE_OFFLINE (0x1 << 0)
+/* CPU offline Action */
+#define MC_ACTION_CPU_OFFLINE (0x1 << 1)
+/* L3 cache disable Action */
+#define MC_ACTION_CACHE_SHRINK (0x1 << 2)
+
+/* Below interface used between XEN/DOM0 for passing XEN's recovery action
+ * information to DOM0.
+ * usage Senario: After offlining broken page, XEN might pass its page offline
+ * recovery action result to DOM0. DOM0 will save the information in
+ * non-volatile memory for further proactive actions, such as offlining the
+ * easy broken page earlier when doing next reboot.
+*/
+struct page_offline_action
+{
+ /* Params for passing the offlined page number to DOM0 */
+ uint64_t mfn;
+ uint64_t status;
+};
+
+struct cpu_offline_action
+{
+ /* Params for passing the identity of the offlined CPU to DOM0 */
+ uint32_t mc_socketid;
+ uint16_t mc_coreid;
+ uint16_t mc_core_threadid;
+};
+
+#define MAX_UNION_SIZE 16
+struct mc_recovery
+{
+ uint16_t mc_bank; /* bank nr */
+ uint8_t action_flags;
+ uint8_t action_types;
+ union {
+ struct page_offline_action page_retire;
+ struct cpu_offline_action cpu_offline;
+ uint8_t pad[MAX_UNION_SIZE];
+ } action_info;
};
+struct mcinfo_recovery
+{
+ struct mcinfo_common common;
+ struct mc_recovery mc_action;
+};
+
+
#define MCINFO_HYPERCALLSIZE 1024
#define MCINFO_MAXSIZE 768
uint8_t mi_data[MCINFO_MAXSIZE - sizeof(uint32_t)];
};
typedef struct mc_info mc_info_t;
-
+DEFINE_XEN_GUEST_HANDLE(mc_info_t);
+
+#define __MC_MSR_ARRAYSIZE 8
+#define __MC_NMSRS 1
+#define MC_NCAPS 7 /* 7 CPU feature flag words */
+#define MC_CAPS_STD_EDX 0 /* cpuid level 0x00000001 (%edx) */
+#define MC_CAPS_AMD_EDX 1 /* cpuid level 0x80000001 (%edx) */
+#define MC_CAPS_TM 2 /* cpuid level 0x80860001 (TransMeta) */
+#define MC_CAPS_LINUX 3 /* Linux-defined */
+#define MC_CAPS_STD_ECX 4 /* cpuid level 0x00000001 (%ecx) */
+#define MC_CAPS_VIA 5 /* cpuid level 0xc0000001 */
+#define MC_CAPS_AMD_ECX 6 /* cpuid level 0x80000001 (%ecx) */
+
+typedef struct mcinfo_logical_cpu {
+ uint32_t mc_cpunr;
+ uint32_t mc_chipid;
+ uint16_t mc_coreid;
+ uint16_t mc_threadid;
+ uint32_t mc_apicid;
+ uint32_t mc_clusterid;
+ uint32_t mc_ncores;
+ uint32_t mc_ncores_active;
+ uint32_t mc_nthreads;
+ int32_t mc_cpuid_level;
+ uint32_t mc_family;
+ uint32_t mc_vendor;
+ uint32_t mc_model;
+ uint32_t mc_step;
+ char mc_vendorid[16];
+ char mc_brandid[64];
+ uint32_t mc_cpu_caps[MC_NCAPS];
+ uint32_t mc_cache_size;
+ uint32_t mc_cache_alignment;
+ int32_t mc_nmsrvals;
+ struct mcinfo_msr mc_msrvalues[__MC_MSR_ARRAYSIZE];
+} xen_mc_logical_cpu_t;
+DEFINE_XEN_GUEST_HANDLE(xen_mc_logical_cpu_t);
/*
#define XEN_MC_fetch 1
struct xen_mc_fetch {
/* IN/OUT variables. */
- uint32_t flags;
-
-/* IN: XEN_MC_CORRECTABLE, XEN_MC_TRAP */
-/* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED, XEN_MC_NODATA, XEN_MC_NOMATCH */
+ uint32_t flags; /* IN: XEN_MC_NONURGENT, XEN_MC_URGENT,
+ XEN_MC_ACK if ack'ing an earlier fetch */
+ /* OUT: XEN_MC_OK, XEN_MC_FETCHFAILED,
+ XEN_MC_NODATA, XEN_MC_NOMATCH */
+ uint64_t fetch_id; /* OUT: id for ack, IN: id we are ack'ing */
/* OUT variables. */
- uint32_t fetch_idx; /* only useful for Dom0 for the notify hypercall */
- struct mc_info mc_info;
+ XEN_GUEST_HANDLE(mc_info_t) data;
};
typedef struct xen_mc_fetch xen_mc_fetch_t;
DEFINE_XEN_GUEST_HANDLE(xen_mc_fetch_t);
uint16_t mc_domid; /* The unprivileged domain to notify. */
uint16_t mc_vcpuid; /* The vcpu in mc_domid to notify.
* Usually echo'd value from the fetch hypercall. */
- uint32_t fetch_idx; /* echo'd value from the fetch hypercall. */
/* IN/OUT variables. */
uint32_t flags;
typedef struct xen_mc_notifydomain xen_mc_notifydomain_t;
DEFINE_XEN_GUEST_HANDLE(xen_mc_notifydomain_t);
+#define XEN_MC_physcpuinfo 3
+struct xen_mc_physcpuinfo {
+ /* IN/OUT */
+ uint32_t ncpus;
+ uint32_t pad0;
+ /* OUT */
+ XEN_GUEST_HANDLE(xen_mc_logical_cpu_t) info;
+};
+
+#define XEN_MC_msrinject 4
+#define MC_MSRINJ_MAXMSRS 8
+struct xen_mc_msrinject {
+ /* IN */
+ unsigned int mcinj_cpunr; /* target processor id */
+ uint32_t mcinj_flags; /* see MC_MSRINJ_F_* below */
+ uint32_t mcinj_count; /* 0 .. count-1 in array are valid */
+ uint32_t mcinj_pad0;
+ struct mcinfo_msr mcinj_msr[MC_MSRINJ_MAXMSRS];
+};
+
+/* Flags for mcinj_flags above; bits 16-31 are reserved */
+#define MC_MSRINJ_F_INTERPOSE 0x1
+
+#define XEN_MC_mceinject 5
+struct xen_mc_mceinject {
+ unsigned int mceinj_cpunr; /* target processor id */
+};
+
+typedef union {
+ struct xen_mc_fetch mc_fetch;
+ struct xen_mc_notifydomain mc_notifydomain;
+ struct xen_mc_physcpuinfo mc_physcpuinfo;
+ struct xen_mc_msrinject mc_msrinject;
+ struct xen_mc_mceinject mc_mceinject;
+} xen_mc_arg_t;
struct xen_mc {
uint32_t cmd;
uint32_t interface_version; /* XEN_MCA_INTERFACE_VERSION */
- union {
- struct xen_mc_fetch mc_fetch;
- struct xen_mc_notifydomain mc_notifydomain;
- uint8_t pad[MCINFO_HYPERCALLSIZE];
- } u;
+ xen_mc_arg_t u;
};
typedef struct xen_mc xen_mc_t;
DEFINE_XEN_GUEST_HANDLE(xen_mc_t);
uint32_t ssidref;
xen_domain_handle_t handle;
/* Is this an HVM guest (as opposed to a PV guest)? */
-#define _XEN_DOMCTL_CDF_hvm_guest 0
-#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest)
+#define _XEN_DOMCTL_CDF_hvm_guest 0
+#define XEN_DOMCTL_CDF_hvm_guest (1U<<_XEN_DOMCTL_CDF_hvm_guest)
/* Use hardware-assisted paging if available? */
-#define _XEN_DOMCTL_CDF_hap 1
-#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap)
+#define _XEN_DOMCTL_CDF_hap 1
+#define XEN_DOMCTL_CDF_hap (1U<<_XEN_DOMCTL_CDF_hap)
+ /* Should domain memory integrity be verifed by tboot during Sx? */
+#define _XEN_DOMCTL_CDF_s3_integrity 2
+#define XEN_DOMCTL_CDF_s3_integrity (1U<<_XEN_DOMCTL_CDF_s3_integrity)
uint32_t flags;
};
typedef struct xen_domctl_createdomain xen_domctl_createdomain_t;
PT_IRQ_TYPE_PCI,
PT_IRQ_TYPE_ISA,
PT_IRQ_TYPE_MSI,
+ PT_IRQ_TYPE_MSI_TRANSLATE,
} pt_irq_type_t;
struct xen_domctl_bind_pt_irq {
uint32_t machine_irq;
struct {
uint8_t gvec;
uint32_t gflags;
+ uint64_aligned_t gtable;
} msi;
} u;
};
*/
#define XEN_DOMCTL_suppress_spurious_page_faults 53
+#define XEN_DOMCTL_debug_op 54
+#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF 0
+#define XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON 1
+struct xen_domctl_debug_op {
+ uint32_t op; /* IN */
+ uint32_t vcpu; /* IN */
+};
+typedef struct xen_domctl_debug_op xen_domctl_debug_op_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_debug_op_t);
+
+/*
+ * Request a particular record from the HVM context
+ */
+#define XEN_DOMCTL_gethvmcontext_partial 55
+typedef struct xen_domctl_hvmcontext_partial {
+ uint32_t type; /* IN: Type of record required */
+ uint32_t instance; /* IN: Instance of that type */
+ XEN_GUEST_HANDLE_64(uint8) buffer; /* OUT: buffer to write record into */
+} xen_domctl_hvmcontext_partial_t;
+DEFINE_XEN_GUEST_HANDLE(xen_domctl_hvmcontext_partial_t);
+
+
struct xen_domctl {
uint32_t cmd;
uint32_t interface_version; /* XEN_DOMCTL_INTERFACE_VERSION */
struct xen_domctl_settimeoffset settimeoffset;
struct xen_domctl_real_mode_area real_mode_area;
struct xen_domctl_hvmcontext hvmcontext;
+ struct xen_domctl_hvmcontext_partial hvmcontext_partial;
struct xen_domctl_address_size address_size;
struct xen_domctl_sendtrigger sendtrigger;
struct xen_domctl_get_device_group get_device_group;
struct xen_domctl_set_opt_feature set_opt_feature;
struct xen_domctl_set_target set_target;
struct xen_domctl_subscribe subscribe;
+ struct xen_domctl_debug_op debug_op;
#if defined(__i386__) || defined(__x86_64__)
struct xen_domctl_cpuid cpuid;
#endif
*/
#define XEN_ELFNOTE_SUSPEND_CANCEL 14
+/*
+ * The (non-default) location the initial phys-to-machine map should be
+ * placed at by the hypervisor (Dom0) or the tools (DomU).
+ * The kernel must be prepared for this mapping to be established using
+ * large pages, despite such otherwise not being available to guests.
+ * The kernel must also be able to handle the page table pages used for
+ * this mapping not being accessible through the initial mapping.
+ * (Only x86-64 supports this at present.)
+ */
+#define XEN_ELFNOTE_INIT_P2M 15
+
/*
* The number of the highest elfnote defined.
*/
-#define XEN_ELFNOTE_MAX XEN_ELFNOTE_SUSPEND_CANCEL
+#define XEN_ELFNOTE_MAX XEN_ELFNOTE_INIT_P2M
/*
* System information exported through crash notes.
/* x86: Does this Xen host support the MMU_PT_UPDATE_PRESERVE_AD hypercall? */
#define XENFEAT_mmu_pt_update_preserve_ad 5
+/* x86: Does this Xen host support the MMU_{CLEAR,COPY}_PAGE hypercall? */
+#define XENFEAT_highmem_assist 6
+
+/*
+ * If set, GNTTABOP_map_grant_ref honors flags to be placed into guest kernel
+ * available pte bits.
+ */
+#define XENFEAT_gnttab_map_avail_bits 7
+
#define XENFEAT_NR_SUBMAPS 1
#endif /* __XEN_PUBLIC_FEATURES_H__ */
/*
- * Bitfield values for update_pin_status.flags.
+ * Bitfield values for gnttab_map_grant_ref.flags.
*/
/* Map the grant entry for access by I/O devices. */
#define _GNTMAP_device_map (0)
#define _GNTMAP_contains_pte (4)
#define GNTMAP_contains_pte (1<<_GNTMAP_contains_pte)
+/*
+ * Bits to be placed in guest kernel available PTE bits (architecture
+ * dependent; only supported when XENFEAT_gnttab_map_avail_bits is set).
+ */
+#define _GNTMAP_guest_avail0 (16)
+#define GNTMAP_guest_avail_mask ((uint32_t)~0 << _GNTMAP_guest_avail0)
+
/*
* Values for error status returns. All errors are -ve.
*/
char signature[8]; /* "HVM INFO" */
uint32_t length;
uint8_t checksum;
+
+ /* Should firmware build ACPI tables? */
uint8_t acpi_enabled;
+
+ /* Should firmware build APIC descriptors (APIC MADT / MP BIOS)? */
uint8_t apic_mode;
+
+ /* How many CPUs does this domain have? */
uint32_t nr_vcpus;
+
+ /*
+ * MEMORY MAP provided by HVM domain builder.
+ * Notes:
+ * 1. page_to_phys(x) = x << 12
+ * 2. If a field is zero, the corresponding range does not exist.
+ */
+ /*
+ * 0x0 to page_to_phys(low_mem_pgend)-1:
+ * RAM below 4GB (except for VGA hole 0xA0000-0xBFFFF)
+ */
+ uint32_t low_mem_pgend;
+ /*
+ * page_to_phys(reserved_mem_pgstart) to 0xFFFFFFFF:
+ * Reserved for special memory mappings
+ */
+ uint32_t reserved_mem_pgstart;
+ /*
+ * 0x100000000 to page_to_phys(high_mem_pgend)-1:
+ * RAM above 4GB
+ */
+ uint32_t high_mem_pgend;
};
#endif /* __XEN_PUBLIC_HVM_HVM_INFO_TABLE_H__ */
/* ACPI S state: currently support S0 and S3 on x86. */
#define HVM_PARAM_ACPI_S_STATE 14
-#define HVM_NR_PARAMS 15
+/* TSS used on Intel when CR0.PE=0. */
+#define HVM_PARAM_VM86_TSS 15
+
+/* Boolean: Enable aligning all periodic vpts to reduce interrupts */
+#define HVM_PARAM_VPT_ALIGN 16
+
+#define HVM_NR_PARAMS 17
#endif /* __XEN_PUBLIC_HVM_PARAMS_H__ */
#define STATE_INITIALISED "init"
#define STATE_READY "ready"
-
+#define STATE_CLOSING "closing"
+#define STATE_CLOSED "closed"
#endif
/* xen_pci_sharedinfo flags */
#define _XEN_PCIF_active (0)
-#define XEN_PCIF_active (1<<_XEN_PCI_active)
+#define XEN_PCIF_active (1<<_XEN_PCIF_active)
+#define _XEN_PCIB_AERHANDLER (1)
+#define XEN_PCIB_AERHANDLER (1<<_XEN_PCIB_AERHANDLER)
+#define _XEN_PCIB_active (2)
+#define XEN_PCIB_active (1<<_XEN_PCIB_active)
/* xen_pci_op commands */
-#define XEN_PCI_OP_conf_read (0)
-#define XEN_PCI_OP_conf_write (1)
-#define XEN_PCI_OP_enable_msi (2)
-#define XEN_PCI_OP_disable_msi (3)
-#define XEN_PCI_OP_enable_msix (4)
-#define XEN_PCI_OP_disable_msix (5)
+#define XEN_PCI_OP_conf_read (0)
+#define XEN_PCI_OP_conf_write (1)
+#define XEN_PCI_OP_enable_msi (2)
+#define XEN_PCI_OP_disable_msi (3)
+#define XEN_PCI_OP_enable_msix (4)
+#define XEN_PCI_OP_disable_msix (5)
+#define XEN_PCI_OP_aer_detected (6)
+#define XEN_PCI_OP_aer_resume (7)
+#define XEN_PCI_OP_aer_mmio (8)
+#define XEN_PCI_OP_aer_slotreset (9)
/* xen_pci_op error numbers */
#define XEN_PCI_ERR_success (0)
struct xen_msix_entry msix_entries[SH_INFO_MAX_VEC];
};
+/*used for pcie aer handling*/
+struct xen_pcie_aer_op
+{
+
+ /* IN: what action to perform: XEN_PCI_OP_* */
+ uint32_t cmd;
+ /*IN/OUT: return aer_op result or carry error_detected state as input*/
+ int32_t err;
+
+ /* IN: which device to touch */
+ uint32_t domain; /* PCI Domain/Segment*/
+ uint32_t bus;
+ uint32_t devfn;
+};
struct xen_pci_sharedinfo {
/* flags - XEN_PCIF_* */
uint32_t flags;
struct xen_pci_op op;
+ struct xen_pcie_aer_op aer_op;
};
#endif /* __XEN_PCI_COMMON_H__ */
--- /dev/null
+/*
+ * usbif.h
+ *
+ * USB I/O interface for Xen guest OSes.
+ *
+ * Copyright (C) 2009, FUJITSU LABORATORIES LTD.
+ * Author: Noboru Iwamatsu <n_iwamatsu@jp.fujitsu.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_PUBLIC_IO_USBIF_H__
+#define __XEN_PUBLIC_IO_USBIF_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/*
+ * USB pipe in usbif_request
+ *
+ * bits 0-5 are specific bits for virtual USB driver.
+ * bits 7-31 are standard urb pipe.
+ *
+ * - port number(NEW): bits 0-4
+ * (USB_MAXCHILDREN is 31)
+ *
+ * - operation flag(NEW): bit 5
+ * (0 = submit urb,
+ * 1 = unlink urb)
+ *
+ * - direction: bit 7
+ * (0 = Host-to-Device [Out]
+ * 1 = Device-to-Host [In])
+ *
+ * - device address: bits 8-14
+ *
+ * - endpoint: bits 15-18
+ *
+ * - pipe type: bits 30-31
+ * (00 = isochronous, 01 = interrupt,
+ * 10 = control, 11 = bulk)
+ */
+#define usbif_pipeportnum(pipe) ((pipe) & 0x1f)
+#define usbif_setportnum_pipe(pipe,portnum) \
+ ((pipe)|(portnum))
+#define usbif_pipeunlink(pipe) ((pipe) & 0x20)
+#define usbif_setunlink_pipe(pipe) ((pipe)|(0x20))
+
+#define USBIF_BACK_MAX_PENDING_REQS (128)
+#define USBIF_MAX_SEGMENTS_PER_REQUEST (10)
+
+struct usbif_request_segment {
+ grant_ref_t gref;
+ uint16_t offset;
+ uint16_t length;
+};
+
+struct usbif_request {
+ uint16_t id; /* request id */
+ uint16_t nr_buffer_segs; /* number of urb->transfer_buffer segments */
+
+ /* basic urb parameter */
+ uint32_t pipe;
+ uint16_t transfer_flags;
+ uint16_t buffer_length;
+ union {
+ uint8_t ctrl[8]; /* setup_packet (Ctrl) */
+
+ struct {
+ uint16_t interval; /* maximum (1024*8) in usb core */
+ uint16_t start_frame; /* start frame */
+ uint16_t number_of_packets; /* number of ISO packet */
+ uint16_t nr_frame_desc_segs; /* number of iso_frame_desc segments */
+ } isoc;
+
+ struct {
+ uint16_t interval; /* maximum (1024*8) in usb core */
+ uint16_t pad[3];
+ } intr;
+
+ struct {
+ uint16_t unlink_id; /* unlink request id */
+ uint16_t pad[3];
+ } unlink;
+
+ } u;
+
+ /* urb data segments */
+ struct usbif_request_segment seg[USBIF_MAX_SEGMENTS_PER_REQUEST];
+};
+typedef struct usbif_request usbif_request_t;
+
+struct usbif_response {
+ uint16_t id; /* request id */
+ uint16_t start_frame; /* start frame (ISO) */
+ int32_t status; /* status (non-ISO) */
+ int32_t actual_length; /* actual transfer length */
+ int32_t error_count; /* number of ISO errors */
+};
+typedef struct usbif_response usbif_response_t;
+
+DEFINE_RING_TYPES(usbif, struct usbif_request, struct usbif_response);
+#define USB_RING_SIZE __RING_SIZE((struct usbif_sring *)0, PAGE_SIZE)
+
+#endif /* __XEN_PUBLIC_IO_USBIF_H__ */
--- /dev/null
+/******************************************************************************
+ * vscsiif.h
+ *
+ * Based on the blkif.h code.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ *
+ * Copyright(c) FUJITSU Limited 2008.
+ */
+
+#ifndef __XEN__PUBLIC_IO_SCSI_H__
+#define __XEN__PUBLIC_IO_SCSI_H__
+
+#include "ring.h"
+#include "../grant_table.h"
+
+/* command between backend and frontend */
+#define VSCSIIF_ACT_SCSI_CDB 1 /* SCSI CDB command */
+#define VSCSIIF_ACT_SCSI_ABORT 2 /* SCSI Device(Lun) Abort*/
+#define VSCSIIF_ACT_SCSI_RESET 3 /* SCSI Device(Lun) Reset*/
+
+
+#define VSCSIIF_BACK_MAX_PENDING_REQS 128
+
+/*
+ * Maximum scatter/gather segments per request.
+ *
+ * Considering balance between allocating al least 16 "vscsiif_request"
+ * structures on one page (4096bytes) and number of scatter gather
+ * needed, we decided to use 26 as a magic number.
+ */
+#define VSCSIIF_SG_TABLESIZE 26
+
+/*
+ * base on linux kernel 2.6.18
+ */
+#define VSCSIIF_MAX_COMMAND_SIZE 16
+#define VSCSIIF_SENSE_BUFFERSIZE 96
+
+
+struct vscsiif_request {
+ uint16_t rqid; /* private guest value, echoed in resp */
+ uint8_t act; /* command between backend and frontend */
+ uint8_t cmd_len;
+
+ uint8_t cmnd[VSCSIIF_MAX_COMMAND_SIZE];
+ uint16_t timeout_per_command; /* The command is issued by twice
+ the value in Backend. */
+ uint16_t channel, id, lun;
+ uint16_t padding;
+ uint8_t sc_data_direction; /* for DMA_TO_DEVICE(1)
+ DMA_FROM_DEVICE(2)
+ DMA_NONE(3) requests */
+ uint8_t nr_segments; /* Number of pieces of scatter-gather */
+
+ struct scsiif_request_segment {
+ grant_ref_t gref;
+ uint16_t offset;
+ uint16_t length;
+ } seg[VSCSIIF_SG_TABLESIZE];
+ uint32_t reserved[3];
+};
+typedef struct vscsiif_request vscsiif_request_t;
+
+struct vscsiif_response {
+ uint16_t rqid;
+ uint8_t padding;
+ uint8_t sense_len;
+ uint8_t sense_buffer[VSCSIIF_SENSE_BUFFERSIZE];
+ int32_t rslt;
+ uint32_t residual_len; /* request bufflen -
+ return the value from physical device */
+ uint32_t reserved[36];
+};
+typedef struct vscsiif_response vscsiif_response_t;
+
+DEFINE_RING_TYPES(vscsiif, struct vscsiif_request, struct vscsiif_response);
+
+
+#endif /*__XEN__PUBLIC_IO_SCSI_H__*/
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
unsigned long start;
} xen_kexec_range_t;
-/* vmcoreinfo stuff */
-#define VMCOREINFO_BYTES (4096)
-#define VMCOREINFO_NOTE_NAME "VMCOREINFO_XEN"
-void arch_crash_save_vmcoreinfo(void);
-void vmcoreinfo_append_str(const char *fmt, ...)
- __attribute__ ((format (printf, 1, 2)));
-#define VMCOREINFO_PAGESIZE(value) \
- vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
-#define VMCOREINFO_SYMBOL(name) \
- vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
-#define VMCOREINFO_SYMBOL_ALIAS(alias, name) \
- vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #alias, (unsigned long)&name)
-#define VMCOREINFO_STRUCT_SIZE(name) \
- vmcoreinfo_append_str("SIZE(%s)=%zu\n", #name, sizeof(struct name))
-#define VMCOREINFO_OFFSET(name, field) \
- vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
- (unsigned long)offsetof(struct name, field))
-#define VMCOREINFO_OFFSET_ALIAS(name, field, alias) \
- vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #alias, \
- (unsigned long)offsetof(struct name, field))
-
#endif /* _XEN_PUBLIC_KEXEC_H */
/*
/* NUMA node to allocate from. */
#define XENMEMF_node(x) (((x) + 1) << 8)
#define XENMEMF_get_node(x) ((((x) >> 8) - 1) & 0xffu)
+/* Flag to populate physmap with populate-on-demand entries */
+#define XENMEMF_populate_on_demand (1<<16)
#endif
struct xen_memory_reservation {
/* Source mapping space. */
#define XENMAPSPACE_shared_info 0 /* shared info page */
#define XENMAPSPACE_grant_table 1 /* grant table page */
-#define XENMAPSPACE_mfn 2 /* usual MFN */
+#define XENMAPSPACE_gmfn 2 /* GMFN */
unsigned int space;
/* Index into source mapping space. */
typedef struct xen_add_to_physmap xen_add_to_physmap_t;
DEFINE_XEN_GUEST_HANDLE(xen_add_to_physmap_t);
-/*
- * Unmaps the page appearing at a particular GPFN from the specified guest's
- * pseudophysical address space.
- * arg == addr of xen_remove_from_physmap_t.
- */
-#define XENMEM_remove_from_physmap 15
-struct xen_remove_from_physmap {
- /* Which domain to change the mapping for. */
- domid_t domid;
-
- /* GPFN of the current mapping of the page. */
- xen_pfn_t gpfn;
-};
-typedef struct xen_remove_from_physmap xen_remove_from_physmap_t;
-DEFINE_XEN_GUEST_HANDLE(xen_remove_from_physmap_t);
-
-/*
- * Translates a list of domain-specific GPFNs into MFNs. Returns a -ve error
- * code on failure. This call only works for auto-translated guests.
- */
-#define XENMEM_translate_gpfn_list 8
-struct xen_translate_gpfn_list {
- /* Which domain to translate for? */
- domid_t domid;
-
- /* Length of list. */
- xen_ulong_t nr_gpfns;
-
- /* List of GPFNs to translate. */
- XEN_GUEST_HANDLE(xen_pfn_t) gpfn_list;
-
- /*
- * Output list to contain MFN translations. May be the same as the input
- * list (in which case each input GPFN is overwritten with the output MFN).
- */
- XEN_GUEST_HANDLE(xen_pfn_t) mfn_list;
-};
-typedef struct xen_translate_gpfn_list xen_translate_gpfn_list_t;
-DEFINE_XEN_GUEST_HANDLE(xen_translate_gpfn_list_t);
+/*** REMOVED ***/
+/*#define XENMEM_translate_gpfn_list 8*/
/*
* Returns the pseudo-physical memory map as it was when the domain
typedef struct xen_foreign_memory_map xen_foreign_memory_map_t;
DEFINE_XEN_GUEST_HANDLE(xen_foreign_memory_map_t);
+#define XENMEM_set_pod_target 16
+#define XENMEM_get_pod_target 17
+struct xen_pod_target {
+ /* IN */
+ uint64_t target_pages;
+ /* OUT */
+ uint64_t tot_pages;
+ uint64_t pod_cache_pages;
+ uint64_t pod_entries;
+ /* IN */
+ domid_t domid;
+};
+typedef struct xen_pod_target xen_pod_target_t;
#endif /* __XEN_PUBLIC_MEMORY_H__ */
/*
typedef struct physdev_eoi physdev_eoi_t;
DEFINE_XEN_GUEST_HANDLE(physdev_eoi_t);
+/*
+ * Register a shared page for the hypervisor to indicate whether the guest
+ * must issue PHYSDEVOP_eoi. The semantics of PHYSDEVOP_eoi change slightly
+ * once the guest used this function in that the associated event channel
+ * will automatically get unmasked. The page registered is used as a bit
+ * array indexed by Xen's PIRQ value.
+ */
+#define PHYSDEVOP_pirq_eoi_gmfn 17
+struct physdev_pirq_eoi_gmfn {
+ /* IN */
+ xen_pfn_t gmfn;
+};
+typedef struct physdev_pirq_eoi_gmfn physdev_pirq_eoi_gmfn_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_pirq_eoi_gmfn_t);
+
/*
* Query the status of an IRQ line.
* @arg == pointer to physdev_irq_status_query structure.
typedef struct physdev_manage_pci physdev_manage_pci_t;
DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_t);
+#define PHYSDEVOP_restore_msi 19
+struct physdev_restore_msi {
+ /* IN */
+ uint8_t bus;
+ uint8_t devfn;
+};
+typedef struct physdev_restore_msi physdev_restore_msi_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_restore_msi_t);
+
+#define PHYSDEVOP_manage_pci_add_ext 20
+struct physdev_manage_pci_ext {
+ /* IN */
+ uint8_t bus;
+ uint8_t devfn;
+ unsigned is_extfn;
+ unsigned is_virtfn;
+ struct {
+ uint8_t bus;
+ uint8_t devfn;
+ } physfn;
+};
+
+typedef struct physdev_manage_pci_ext physdev_manage_pci_ext_t;
+DEFINE_XEN_GUEST_HANDLE(physdev_manage_pci_ext_t);
+
/*
* Argument to physdev_op_compat() hypercall. Superceded by new physdev_op()
* hypercall since 0x00030202.
typedef struct xen_sysctl_get_pmstat xen_sysctl_get_pmstat_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_get_pmstat_t);
+/*
+ * Status codes. Must be greater than 0 to avoid confusing
+ * sysctl callers that see 0 as a plain successful return.
+ */
+#define XEN_CPU_HOTPLUG_STATUS_OFFLINE 1
+#define XEN_CPU_HOTPLUG_STATUS_ONLINE 2
+#define XEN_CPU_HOTPLUG_STATUS_NEW 3
+
#define XEN_SYSCTL_cpu_hotplug 11
struct xen_sysctl_cpu_hotplug {
/* IN variables */
uint32_t cpu; /* Physical cpu. */
#define XEN_SYSCTL_CPU_HOTPLUG_ONLINE 0
#define XEN_SYSCTL_CPU_HOTPLUG_OFFLINE 1
+#define XEN_SYSCTL_CPU_HOTPLUG_STATUS 2
uint32_t op; /* hotplug opcode */
};
typedef struct xen_sysctl_cpu_hotplug xen_sysctl_cpu_hotplug_t;
DEFINE_XEN_GUEST_HANDLE(xen_sysctl_cpu_hotplug_t);
+/*
+ * Get/set xen power management, include
+ * 1. cpufreq governors and related parameters
+ */
+#define XEN_SYSCTL_pm_op 12
+struct xen_userspace {
+ uint32_t scaling_setspeed;
+};
+typedef struct xen_userspace xen_userspace_t;
+
+struct xen_ondemand {
+ uint32_t sampling_rate_max;
+ uint32_t sampling_rate_min;
+
+ uint32_t sampling_rate;
+ uint32_t up_threshold;
+};
+typedef struct xen_ondemand xen_ondemand_t;
+
+/*
+ * cpufreq para name of this structure named
+ * same as sysfs file name of native linux
+ */
+#define CPUFREQ_NAME_LEN 16
+struct xen_get_cpufreq_para {
+ /* IN/OUT variable */
+ uint32_t cpu_num;
+ uint32_t freq_num;
+ uint32_t gov_num;
+
+ /* for all governors */
+ /* OUT variable */
+ XEN_GUEST_HANDLE_64(uint32) affected_cpus;
+ XEN_GUEST_HANDLE_64(uint32) scaling_available_frequencies;
+ XEN_GUEST_HANDLE_64(char) scaling_available_governors;
+ char scaling_driver[CPUFREQ_NAME_LEN];
+
+ uint32_t cpuinfo_cur_freq;
+ uint32_t cpuinfo_max_freq;
+ uint32_t cpuinfo_min_freq;
+ uint32_t scaling_cur_freq;
+
+ char scaling_governor[CPUFREQ_NAME_LEN];
+ uint32_t scaling_max_freq;
+ uint32_t scaling_min_freq;
+
+ /* for specific governor */
+ union {
+ struct xen_userspace userspace;
+ struct xen_ondemand ondemand;
+ } u;
+};
+
+struct xen_set_cpufreq_gov {
+ char scaling_governor[CPUFREQ_NAME_LEN];
+};
+
+struct xen_set_cpufreq_para {
+ #define SCALING_MAX_FREQ 1
+ #define SCALING_MIN_FREQ 2
+ #define SCALING_SETSPEED 3
+ #define SAMPLING_RATE 4
+ #define UP_THRESHOLD 5
+
+ uint32_t ctrl_type;
+ uint32_t ctrl_value;
+};
+
+/* Get physical CPU topology information. */
+#define INVALID_TOPOLOGY_ID (~0U)
+struct xen_get_cputopo {
+ /* IN: maximum addressable entry in
+ * the caller-provided cpu_to_core/socket.
+ */
+ uint32_t max_cpus;
+ XEN_GUEST_HANDLE_64(uint32) cpu_to_core;
+ XEN_GUEST_HANDLE_64(uint32) cpu_to_socket;
+
+ /* OUT: number of cpus returned
+ * If OUT is greater than IN then the cpu_to_core/socket is truncated!
+ */
+ uint32_t nr_cpus;
+};
+
+struct xen_sysctl_pm_op {
+ #define PM_PARA_CATEGORY_MASK 0xf0
+ #define CPUFREQ_PARA 0x10
+
+ /* cpufreq command type */
+ #define GET_CPUFREQ_PARA (CPUFREQ_PARA | 0x01)
+ #define SET_CPUFREQ_GOV (CPUFREQ_PARA | 0x02)
+ #define SET_CPUFREQ_PARA (CPUFREQ_PARA | 0x03)
+ #define GET_CPUFREQ_AVGFREQ (CPUFREQ_PARA | 0x04)
+
+ /* get CPU topology */
+ #define XEN_SYSCTL_pm_op_get_cputopo 0x20
+
+ /* set/reset scheduler power saving option */
+ #define XEN_SYSCTL_pm_op_set_sched_opt_smt 0x21
+
+ uint32_t cmd;
+ uint32_t cpuid;
+ union {
+ struct xen_get_cpufreq_para get_para;
+ struct xen_set_cpufreq_gov set_gov;
+ struct xen_set_cpufreq_para set_para;
+ uint64_t get_avgfreq;
+ struct xen_get_cputopo get_topo;
+ uint32_t set_sched_opt_smt;
+ };
+};
+
+#define XEN_SYSCTL_page_offline_op 14
+struct xen_sysctl_page_offline_op {
+ /* IN: range of page to be offlined */
+#define sysctl_page_offline 1
+#define sysctl_page_online 2
+#define sysctl_query_page_offline 3
+ uint32_t cmd;
+ uint32_t start;
+ uint32_t end;
+ /* OUT: result of page offline request */
+ /*
+ * bit 0~15: result flags
+ * bit 16~31: owner
+ */
+ XEN_GUEST_HANDLE(uint32) status;
+};
+
+#define PG_OFFLINE_STATUS_MASK (0xFFUL)
+
+/* The result is invalid, i.e. HV does not handle it */
+#define PG_OFFLINE_INVALID (0x1UL << 0)
+
+#define PG_OFFLINE_OFFLINED (0x1UL << 1)
+#define PG_OFFLINE_PENDING (0x1UL << 2)
+#define PG_OFFLINE_FAILED (0x1UL << 3)
+
+#define PG_ONLINE_FAILED PG_OFFLINE_FAILED
+#define PG_ONLINE_ONLINED PG_OFFLINE_OFFLINED
+
+#define PG_OFFLINE_STATUS_OFFLINED (0x1UL << 1)
+#define PG_OFFLINE_STATUS_ONLINE (0x1UL << 2)
+#define PG_OFFLINE_STATUS_OFFLINE_PENDING (0x1UL << 3)
+#define PG_OFFLINE_STATUS_BROKEN (0x1UL << 4)
+
+#define PG_OFFLINE_MISC_MASK (0xFFUL << 4)
+
+/* only valid when PG_OFFLINE_FAILED */
+#define PG_OFFLINE_XENPAGE (0x1UL << 8)
+#define PG_OFFLINE_DOM0PAGE (0x1UL << 9)
+#define PG_OFFLINE_ANONYMOUS (0x1UL << 10)
+#define PG_OFFLINE_NOT_CONV_RAM (0x1UL << 11)
+#define PG_OFFLINE_OWNED (0x1UL << 12)
+
+#define PG_OFFLINE_BROKEN (0x1UL << 13)
+#define PG_ONLINE_BROKEN PG_OFFLINE_BROKEN
+
+#define PG_OFFLINE_OWNER_SHIFT 16
struct xen_sysctl {
uint32_t cmd;
struct xen_sysctl_availheap availheap;
struct xen_sysctl_get_pmstat get_pmstat;
struct xen_sysctl_cpu_hotplug cpu_hotplug;
+ struct xen_sysctl_pm_op pm_op;
+ struct xen_sysctl_page_offline_op page_offline;
uint8_t pad[128];
} u;
};
#define TRC_TRACE_WRAP_BUFFER (TRC_GEN + 2)
#define TRC_TRACE_CPU_CHANGE (TRC_GEN + 3)
-#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1)
+#define TRC_SCHED_RUNSTATE_CHANGE (TRC_SCHED_MIN + 1)
+#define TRC_SCHED_CONTINUE_RUNNING (TRC_SCHED_MIN + 2)
#define TRC_SCHED_DOM_ADD (TRC_SCHED_VERBOSE + 1)
#define TRC_SCHED_DOM_REM (TRC_SCHED_VERBOSE + 2)
#define TRC_SCHED_SLEEP (TRC_SCHED_VERBOSE + 3)
#define TRC_HVM_INVLPG64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x14)
#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15)
#define TRC_HVM_IO_ASSIST (TRC_HVM_HANDLER + 0x16)
+#define TRC_HVM_IO_ASSIST64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x16)
#define TRC_HVM_MMIO_ASSIST (TRC_HVM_HANDLER + 0x17)
+#define TRC_HVM_MMIO_ASSIST64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x17)
#define TRC_HVM_CLTS (TRC_HVM_HANDLER + 0x18)
#define TRC_HVM_LMSW (TRC_HVM_HANDLER + 0x19)
#define TRC_HVM_LMSW64 (TRC_HVM_HANDLER + TRC_64_FLAG + 0x19)
+#define TRC_HVM_INTR_WINDOW (TRC_HVM_HANDLER + 0X20)
/* trace subclasses for power management */
#define TRC_PM_FREQ 0x00801000 /* xen cpu freq events */
* cmd: MMUEXT_SET_LDT
* linear_addr: Linear address of LDT base (NB. must be page-aligned).
* nr_ents: Number of entries in LDT.
+ *
+ * cmd: MMUEXT_CLEAR_PAGE
+ * mfn: Machine frame number to be cleared.
+ *
+ * cmd: MMUEXT_COPY_PAGE
+ * mfn: Machine frame number of the destination page.
+ * src_mfn: Machine frame number of the source page.
*/
#define MMUEXT_PIN_L1_TABLE 0
#define MMUEXT_PIN_L2_TABLE 1
#define MMUEXT_FLUSH_CACHE 12
#define MMUEXT_SET_LDT 13
#define MMUEXT_NEW_USER_BASEPTR 15
+#define MMUEXT_CLEAR_PAGE 16
+#define MMUEXT_COPY_PAGE 17
#ifndef __ASSEMBLY__
struct mmuext_op {
unsigned int cmd;
union {
- /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR */
+ /* [UN]PIN_TABLE, NEW_BASEPTR, NEW_USER_BASEPTR
+ * CLEAR_PAGE, COPY_PAGE */
xen_pfn_t mfn;
/* INVLPG_LOCAL, INVLPG_ALL, SET_LDT */
unsigned long linear_addr;
#else
void *vcpumask;
#endif
+ /* COPY_PAGE */
+ xen_pfn_t src_mfn;
} arg2;
};
typedef struct mmuext_op mmuext_op_t;
*/
#define DOMID_XEN (0x7FF2U)
+/* DOMID_INVALID is used to identity invalid domid */
+#define DOMID_INVALID (0x7FFFU)
+
/*
* Send an array of these to HYPERVISOR_mmu_update().
* NB. The fields are natural pointer/address size for this architecture.
* a. relocated kernel image
* b. initial ram disk [mod_start, mod_len]
* c. list of allocated page frames [mfn_list, nr_pages]
+ * (unless relocated due to XEN_ELFNOTE_INIT_P2M)
* d. start_info_t structure [register ESI (x86)]
* e. bootstrap page tables [pt_base, CR3 (x86)]
* f. bootstrap stack [register ESP (x86)]
unsigned long mod_start; /* VIRTUAL address of pre-loaded module. */
unsigned long mod_len; /* Size (bytes) of pre-loaded module. */
int8_t cmd_line[MAX_GUEST_CMDLINE];
+ /* The pfn range here covers both page table and p->m table frames. */
+ unsigned long first_p2m_pfn;/* 1st pfn forming initial P->M table. */
+ unsigned long nr_p2m_frames;/* # of pfns forming initial P->M table. */
};
typedef struct start_info start_info_t;
unsigned int acpi_get_processor_id (unsigned int cpu);
char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
-unsigned long acpi_find_rsdp (void);
int acpi_boot_init (void);
int acpi_boot_table_init (void);
int acpi_numa_init (void);
int switch_compat(struct domain *);
int switch_native(struct domain *);
-#define BITS_PER_GUEST_LONG(d) \
- (!IS_COMPAT(d) ? BITS_PER_LONG : COMPAT_BITS_PER_LONG)
-
#else
#define compat_handle_is_null(hnd) 0
-#define BITS_PER_GUEST_LONG(d) BITS_PER_LONG
-
#endif
#endif /* __XEN_COMPAT_H__ */
#define ACPI_PROCESSOR_MAX_POWER 8
#define CPUIDLE_NAME_LEN 16
+#define ACPI_CSTATE_EM_NONE 0
+#define ACPI_CSTATE_EM_SYSIO 1
+#define ACPI_CSTATE_EM_FFH 2
+#define ACPI_CSTATE_EM_HALT 3
+
struct acpi_processor_cx
{
+ u8 idx;
u8 valid;
u8 type;
u32 address;
- u8 space_id;
+ u8 entry_method; /* ACPI_CSTATE_EM_xxx */
u32 latency;
u32 latency_ticks;
u32 power;
*
* int first_cpu(mask) Number lowest set bit, or NR_CPUS
* int next_cpu(cpu, mask) Next cpu past 'cpu', or NR_CPUS
+ * int last_cpu(mask) Number highest set bit, or NR_CPUS
+ * int cycle_cpu(cpu, mask) Next cpu cycling from 'cpu', or NR_CPUS
*
* cpumask_t cpumask_of_cpu(cpu) Return cpumask with bit 'cpu' set
* CPU_MASK_ALL Initializer - all bits set
#define last_cpu(src) __last_cpu(&(src), NR_CPUS)
static inline int __last_cpu(const cpumask_t *srcp, int nbits)
{
- int cpu, pcpu = NR_CPUS;
- for (cpu = first_cpu(*srcp); cpu < NR_CPUS; cpu = next_cpu(cpu, *srcp))
+ int cpu, pcpu = nbits;
+ for (cpu = __first_cpu(srcp, nbits);
+ cpu < nbits;
+ cpu = __next_cpu(cpu, srcp, nbits))
pcpu = cpu;
return pcpu;
}
+#define cycle_cpu(n, src) __cycle_cpu((n), &(src), NR_CPUS)
+static inline int __cycle_cpu(int n, const cpumask_t *srcp, int nbits)
+{
+ int nxt = __next_cpu(n, srcp, nbits);
+ if (nxt == nbits)
+ nxt = __first_cpu(srcp, nbits);
+ return nxt;
+}
+
#define cpumask_of_cpu(cpu) \
({ \
typeof(_unused_cpumask_arg_) m; \
* Arch-specifics.
*/
+/* Allocate/free a domain structure. */
+struct domain *alloc_domain_struct(void);
+void free_domain_struct(struct domain *d);
+
/* Allocate/free a VCPU structure. */
struct vcpu *alloc_vcpu_struct(void);
void free_vcpu_struct(struct vcpu *v);
* Pass a VA within a page previously mapped in the context of the
* currently-executing VCPU via a call to map_domain_page().
*/
-void unmap_domain_page(void *va);
+void unmap_domain_page(const void *va);
/*
* Similar to the above calls, except the mapping is accessible in all
* mappings can also be unmapped from any context.
*/
void *map_domain_page_global(unsigned long mfn);
-void unmap_domain_page_global(void *va);
+void unmap_domain_page_global(const void *va);
#define DMCACHE_ENTRY_VALID 1U
#define DMCACHE_ENTRY_HELD 2U
}
static inline void
-unmap_domain_page_with_cache(void *va, struct domain_mmap_cache *cache)
+unmap_domain_page_with_cache(const void *va, struct domain_mmap_cache *cache)
{
ASSERT(cache != NULL);
cache->flags &= ~DMCACHE_ENTRY_HELD;
#ifndef __XEN_ELF_H__
#define __XEN_ELF_H__
-#include <public/elfstructs.h>
+#include <xen/elfstructs.h>
#define ELFNOTE_ALIGN(_n_) (((_n_)+3)&~3)
#define ELFNOTE_NAME(_n_) ((char*)(_n_) + sizeof(*(_n_)))
--- /dev/null
+#ifndef __XEN_ELFSTRUCTS_H__
+#define __XEN_ELFSTRUCTS_H__
+/*
+ * Copyright (c) 1995, 1996 Erik Theisen. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote products
+ * derived from this software without specific prior written permission
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+typedef uint8_t Elf_Byte;
+
+typedef uint32_t Elf32_Addr; /* Unsigned program address */
+typedef uint32_t Elf32_Off; /* Unsigned file offset */
+typedef int32_t Elf32_Sword; /* Signed large integer */
+typedef uint32_t Elf32_Word; /* Unsigned large integer */
+typedef uint16_t Elf32_Half; /* Unsigned medium integer */
+
+typedef uint64_t Elf64_Addr;
+typedef uint64_t Elf64_Off;
+typedef int32_t Elf64_Shalf;
+
+typedef int32_t Elf64_Sword;
+typedef uint32_t Elf64_Word;
+
+typedef int64_t Elf64_Sxword;
+typedef uint64_t Elf64_Xword;
+
+typedef uint32_t Elf64_Half;
+typedef uint16_t Elf64_Quarter;
+
+/*
+ * e_ident[] identification indexes
+ * See http://www.caldera.com/developers/gabi/2000-07-17/ch4.eheader.html
+ */
+#define EI_MAG0 0 /* file ID */
+#define EI_MAG1 1 /* file ID */
+#define EI_MAG2 2 /* file ID */
+#define EI_MAG3 3 /* file ID */
+#define EI_CLASS 4 /* file class */
+#define EI_DATA 5 /* data encoding */
+#define EI_VERSION 6 /* ELF header version */
+#define EI_OSABI 7 /* OS/ABI ID */
+#define EI_ABIVERSION 8 /* ABI version */
+#define EI_PAD 9 /* start of pad bytes */
+#define EI_NIDENT 16 /* Size of e_ident[] */
+
+/* e_ident[] magic number */
+#define ELFMAG0 0x7f /* e_ident[EI_MAG0] */
+#define ELFMAG1 'E' /* e_ident[EI_MAG1] */
+#define ELFMAG2 'L' /* e_ident[EI_MAG2] */
+#define ELFMAG3 'F' /* e_ident[EI_MAG3] */
+#define ELFMAG "\177ELF" /* magic */
+#define SELFMAG 4 /* size of magic */
+
+/* e_ident[] file class */
+#define ELFCLASSNONE 0 /* invalid */
+#define ELFCLASS32 1 /* 32-bit objs */
+#define ELFCLASS64 2 /* 64-bit objs */
+#define ELFCLASSNUM 3 /* number of classes */
+
+/* e_ident[] data encoding */
+#define ELFDATANONE 0 /* invalid */
+#define ELFDATA2LSB 1 /* Little-Endian */
+#define ELFDATA2MSB 2 /* Big-Endian */
+#define ELFDATANUM 3 /* number of data encode defines */
+
+/* e_ident[] Operating System/ABI */
+#define ELFOSABI_SYSV 0 /* UNIX System V ABI */
+#define ELFOSABI_HPUX 1 /* HP-UX operating system */
+#define ELFOSABI_NETBSD 2 /* NetBSD */
+#define ELFOSABI_LINUX 3 /* GNU/Linux */
+#define ELFOSABI_HURD 4 /* GNU/Hurd */
+#define ELFOSABI_86OPEN 5 /* 86Open common IA32 ABI */
+#define ELFOSABI_SOLARIS 6 /* Solaris */
+#define ELFOSABI_MONTEREY 7 /* Monterey */
+#define ELFOSABI_IRIX 8 /* IRIX */
+#define ELFOSABI_FREEBSD 9 /* FreeBSD */
+#define ELFOSABI_TRU64 10 /* TRU64 UNIX */
+#define ELFOSABI_MODESTO 11 /* Novell Modesto */
+#define ELFOSABI_OPENBSD 12 /* OpenBSD */
+#define ELFOSABI_ARM 97 /* ARM */
+#define ELFOSABI_STANDALONE 255 /* Standalone (embedded) application */
+
+/* e_ident */
+#define IS_ELF(ehdr) ((ehdr).e_ident[EI_MAG0] == ELFMAG0 && \
+ (ehdr).e_ident[EI_MAG1] == ELFMAG1 && \
+ (ehdr).e_ident[EI_MAG2] == ELFMAG2 && \
+ (ehdr).e_ident[EI_MAG3] == ELFMAG3)
+
+/* ELF Header */
+typedef struct elfhdr {
+ unsigned char e_ident[EI_NIDENT]; /* ELF Identification */
+ Elf32_Half e_type; /* object file type */
+ Elf32_Half e_machine; /* machine */
+ Elf32_Word e_version; /* object file version */
+ Elf32_Addr e_entry; /* virtual entry point */
+ Elf32_Off e_phoff; /* program header table offset */
+ Elf32_Off e_shoff; /* section header table offset */
+ Elf32_Word e_flags; /* processor-specific flags */
+ Elf32_Half e_ehsize; /* ELF header size */
+ Elf32_Half e_phentsize; /* program header entry size */
+ Elf32_Half e_phnum; /* number of program header entries */
+ Elf32_Half e_shentsize; /* section header entry size */
+ Elf32_Half e_shnum; /* number of section header entries */
+ Elf32_Half e_shstrndx; /* section header table's "section
+ header string table" entry offset */
+} Elf32_Ehdr;
+
+typedef struct {
+ unsigned char e_ident[EI_NIDENT]; /* Id bytes */
+ Elf64_Quarter e_type; /* file type */
+ Elf64_Quarter e_machine; /* machine type */
+ Elf64_Half e_version; /* version number */
+ Elf64_Addr e_entry; /* entry point */
+ Elf64_Off e_phoff; /* Program hdr offset */
+ Elf64_Off e_shoff; /* Section hdr offset */
+ Elf64_Half e_flags; /* Processor flags */
+ Elf64_Quarter e_ehsize; /* sizeof ehdr */
+ Elf64_Quarter e_phentsize; /* Program header entry size */
+ Elf64_Quarter e_phnum; /* Number of program headers */
+ Elf64_Quarter e_shentsize; /* Section header entry size */
+ Elf64_Quarter e_shnum; /* Number of section headers */
+ Elf64_Quarter e_shstrndx; /* String table index */
+} Elf64_Ehdr;
+
+/* e_type */
+#define ET_NONE 0 /* No file type */
+#define ET_REL 1 /* relocatable file */
+#define ET_EXEC 2 /* executable file */
+#define ET_DYN 3 /* shared object file */
+#define ET_CORE 4 /* core file */
+#define ET_NUM 5 /* number of types */
+#define ET_LOPROC 0xff00 /* reserved range for processor */
+#define ET_HIPROC 0xffff /* specific e_type */
+
+/* e_machine */
+#define EM_NONE 0 /* No Machine */
+#define EM_M32 1 /* AT&T WE 32100 */
+#define EM_SPARC 2 /* SPARC */
+#define EM_386 3 /* Intel 80386 */
+#define EM_68K 4 /* Motorola 68000 */
+#define EM_88K 5 /* Motorola 88000 */
+#define EM_486 6 /* Intel 80486 - unused? */
+#define EM_860 7 /* Intel 80860 */
+#define EM_MIPS 8 /* MIPS R3000 Big-Endian only */
+/*
+ * Don't know if EM_MIPS_RS4_BE,
+ * EM_SPARC64, EM_PARISC,
+ * or EM_PPC are ABI compliant
+ */
+#define EM_MIPS_RS4_BE 10 /* MIPS R4000 Big-Endian */
+#define EM_SPARC64 11 /* SPARC v9 64-bit unoffical */
+#define EM_PARISC 15 /* HPPA */
+#define EM_SPARC32PLUS 18 /* Enhanced instruction set SPARC */
+#define EM_PPC 20 /* PowerPC */
+#define EM_PPC64 21 /* PowerPC 64-bit */
+#define EM_ARM 40 /* Advanced RISC Machines ARM */
+#define EM_ALPHA 41 /* DEC ALPHA */
+#define EM_SPARCV9 43 /* SPARC version 9 */
+#define EM_ALPHA_EXP 0x9026 /* DEC ALPHA */
+#define EM_IA_64 50 /* Intel Merced */
+#define EM_X86_64 62 /* AMD x86-64 architecture */
+#define EM_VAX 75 /* DEC VAX */
+
+/* Version */
+#define EV_NONE 0 /* Invalid */
+#define EV_CURRENT 1 /* Current */
+#define EV_NUM 2 /* number of versions */
+
+/* Section Header */
+typedef struct {
+ Elf32_Word sh_name; /* name - index into section header
+ string table section */
+ Elf32_Word sh_type; /* type */
+ Elf32_Word sh_flags; /* flags */
+ Elf32_Addr sh_addr; /* address */
+ Elf32_Off sh_offset; /* file offset */
+ Elf32_Word sh_size; /* section size */
+ Elf32_Word sh_link; /* section header table index link */
+ Elf32_Word sh_info; /* extra information */
+ Elf32_Word sh_addralign; /* address alignment */
+ Elf32_Word sh_entsize; /* section entry size */
+} Elf32_Shdr;
+
+typedef struct {
+ Elf64_Half sh_name; /* section name */
+ Elf64_Half sh_type; /* section type */
+ Elf64_Xword sh_flags; /* section flags */
+ Elf64_Addr sh_addr; /* virtual address */
+ Elf64_Off sh_offset; /* file offset */
+ Elf64_Xword sh_size; /* section size */
+ Elf64_Half sh_link; /* link to another */
+ Elf64_Half sh_info; /* misc info */
+ Elf64_Xword sh_addralign; /* memory alignment */
+ Elf64_Xword sh_entsize; /* table entry size */
+} Elf64_Shdr;
+
+/* Special Section Indexes */
+#define SHN_UNDEF 0 /* undefined */
+#define SHN_LORESERVE 0xff00 /* lower bounds of reserved indexes */
+#define SHN_LOPROC 0xff00 /* reserved range for processor */
+#define SHN_HIPROC 0xff1f /* specific section indexes */
+#define SHN_ABS 0xfff1 /* absolute value */
+#define SHN_COMMON 0xfff2 /* common symbol */
+#define SHN_HIRESERVE 0xffff /* upper bounds of reserved indexes */
+
+/* sh_type */
+#define SHT_NULL 0 /* inactive */
+#define SHT_PROGBITS 1 /* program defined information */
+#define SHT_SYMTAB 2 /* symbol table section */
+#define SHT_STRTAB 3 /* string table section */
+#define SHT_RELA 4 /* relocation section with addends*/
+#define SHT_HASH 5 /* symbol hash table section */
+#define SHT_DYNAMIC 6 /* dynamic section */
+#define SHT_NOTE 7 /* note section */
+#define SHT_NOBITS 8 /* no space section */
+#define SHT_REL 9 /* relation section without addends */
+#define SHT_SHLIB 10 /* reserved - purpose unknown */
+#define SHT_DYNSYM 11 /* dynamic symbol table section */
+#define SHT_NUM 12 /* number of section types */
+#define SHT_LOPROC 0x70000000 /* reserved range for processor */
+#define SHT_HIPROC 0x7fffffff /* specific section header types */
+#define SHT_LOUSER 0x80000000 /* reserved range for application */
+#define SHT_HIUSER 0xffffffff /* specific indexes */
+
+/* Section names */
+#define ELF_BSS ".bss" /* uninitialized data */
+#define ELF_DATA ".data" /* initialized data */
+#define ELF_DEBUG ".debug" /* debug */
+#define ELF_DYNAMIC ".dynamic" /* dynamic linking information */
+#define ELF_DYNSTR ".dynstr" /* dynamic string table */
+#define ELF_DYNSYM ".dynsym" /* dynamic symbol table */
+#define ELF_FINI ".fini" /* termination code */
+#define ELF_GOT ".got" /* global offset table */
+#define ELF_HASH ".hash" /* symbol hash table */
+#define ELF_INIT ".init" /* initialization code */
+#define ELF_REL_DATA ".rel.data" /* relocation data */
+#define ELF_REL_FINI ".rel.fini" /* relocation termination code */
+#define ELF_REL_INIT ".rel.init" /* relocation initialization code */
+#define ELF_REL_DYN ".rel.dyn" /* relocaltion dynamic link info */
+#define ELF_REL_RODATA ".rel.rodata" /* relocation read-only data */
+#define ELF_REL_TEXT ".rel.text" /* relocation code */
+#define ELF_RODATA ".rodata" /* read-only data */
+#define ELF_SHSTRTAB ".shstrtab" /* section header string table */
+#define ELF_STRTAB ".strtab" /* string table */
+#define ELF_SYMTAB ".symtab" /* symbol table */
+#define ELF_TEXT ".text" /* code */
+
+
+/* Section Attribute Flags - sh_flags */
+#define SHF_WRITE 0x1 /* Writable */
+#define SHF_ALLOC 0x2 /* occupies memory */
+#define SHF_EXECINSTR 0x4 /* executable */
+#define SHF_MASKPROC 0xf0000000 /* reserved bits for processor */
+ /* specific section attributes */
+
+/* Symbol Table Entry */
+typedef struct elf32_sym {
+ Elf32_Word st_name; /* name - index into string table */
+ Elf32_Addr st_value; /* symbol value */
+ Elf32_Word st_size; /* symbol size */
+ unsigned char st_info; /* type and binding */
+ unsigned char st_other; /* 0 - no defined meaning */
+ Elf32_Half st_shndx; /* section header index */
+} Elf32_Sym;
+
+typedef struct {
+ Elf64_Half st_name; /* Symbol name index in str table */
+ Elf_Byte st_info; /* type / binding attrs */
+ Elf_Byte st_other; /* unused */
+ Elf64_Quarter st_shndx; /* section index of symbol */
+ Elf64_Xword st_value; /* value of symbol */
+ Elf64_Xword st_size; /* size of symbol */
+} Elf64_Sym;
+
+/* Symbol table index */
+#define STN_UNDEF 0 /* undefined */
+
+/* Extract symbol info - st_info */
+#define ELF32_ST_BIND(x) ((x) >> 4)
+#define ELF32_ST_TYPE(x) (((unsigned int) x) & 0xf)
+#define ELF32_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf))
+
+#define ELF64_ST_BIND(x) ((x) >> 4)
+#define ELF64_ST_TYPE(x) (((unsigned int) x) & 0xf)
+#define ELF64_ST_INFO(b,t) (((b) << 4) + ((t) & 0xf))
+
+/* Symbol Binding - ELF32_ST_BIND - st_info */
+#define STB_LOCAL 0 /* Local symbol */
+#define STB_GLOBAL 1 /* Global symbol */
+#define STB_WEAK 2 /* like global - lower precedence */
+#define STB_NUM 3 /* number of symbol bindings */
+#define STB_LOPROC 13 /* reserved range for processor */
+#define STB_HIPROC 15 /* specific symbol bindings */
+
+/* Symbol type - ELF32_ST_TYPE - st_info */
+#define STT_NOTYPE 0 /* not specified */
+#define STT_OBJECT 1 /* data object */
+#define STT_FUNC 2 /* function */
+#define STT_SECTION 3 /* section */
+#define STT_FILE 4 /* file */
+#define STT_NUM 5 /* number of symbol types */
+#define STT_LOPROC 13 /* reserved range for processor */
+#define STT_HIPROC 15 /* specific symbol types */
+
+/* Relocation entry with implicit addend */
+typedef struct {
+ Elf32_Addr r_offset; /* offset of relocation */
+ Elf32_Word r_info; /* symbol table index and type */
+} Elf32_Rel;
+
+/* Relocation entry with explicit addend */
+typedef struct {
+ Elf32_Addr r_offset; /* offset of relocation */
+ Elf32_Word r_info; /* symbol table index and type */
+ Elf32_Sword r_addend;
+} Elf32_Rela;
+
+/* Extract relocation info - r_info */
+#define ELF32_R_SYM(i) ((i) >> 8)
+#define ELF32_R_TYPE(i) ((unsigned char) (i))
+#define ELF32_R_INFO(s,t) (((s) << 8) + (unsigned char)(t))
+
+typedef struct {
+ Elf64_Xword r_offset; /* where to do it */
+ Elf64_Xword r_info; /* index & type of relocation */
+} Elf64_Rel;
+
+typedef struct {
+ Elf64_Xword r_offset; /* where to do it */
+ Elf64_Xword r_info; /* index & type of relocation */
+ Elf64_Sxword r_addend; /* adjustment value */
+} Elf64_Rela;
+
+#define ELF64_R_SYM(info) ((info) >> 32)
+#define ELF64_R_TYPE(info) ((info) & 0xFFFFFFFF)
+#define ELF64_R_INFO(s,t) (((s) << 32) + (u_int32_t)(t))
+
+/* Program Header */
+typedef struct {
+ Elf32_Word p_type; /* segment type */
+ Elf32_Off p_offset; /* segment offset */
+ Elf32_Addr p_vaddr; /* virtual address of segment */
+ Elf32_Addr p_paddr; /* physical address - ignored? */
+ Elf32_Word p_filesz; /* number of bytes in file for seg. */
+ Elf32_Word p_memsz; /* number of bytes in mem. for seg. */
+ Elf32_Word p_flags; /* flags */
+ Elf32_Word p_align; /* memory alignment */
+} Elf32_Phdr;
+
+typedef struct {
+ Elf64_Half p_type; /* entry type */
+ Elf64_Half p_flags; /* flags */
+ Elf64_Off p_offset; /* offset */
+ Elf64_Addr p_vaddr; /* virtual address */
+ Elf64_Addr p_paddr; /* physical address */
+ Elf64_Xword p_filesz; /* file size */
+ Elf64_Xword p_memsz; /* memory size */
+ Elf64_Xword p_align; /* memory & file alignment */
+} Elf64_Phdr;
+
+/* Segment types - p_type */
+#define PT_NULL 0 /* unused */
+#define PT_LOAD 1 /* loadable segment */
+#define PT_DYNAMIC 2 /* dynamic linking section */
+#define PT_INTERP 3 /* the RTLD */
+#define PT_NOTE 4 /* auxiliary information */
+#define PT_SHLIB 5 /* reserved - purpose undefined */
+#define PT_PHDR 6 /* program header */
+#define PT_NUM 7 /* Number of segment types */
+#define PT_LOPROC 0x70000000 /* reserved range for processor */
+#define PT_HIPROC 0x7fffffff /* specific segment types */
+
+/* Segment flags - p_flags */
+#define PF_X 0x1 /* Executable */
+#define PF_W 0x2 /* Writable */
+#define PF_R 0x4 /* Readable */
+#define PF_MASKPROC 0xf0000000 /* reserved bits for processor */
+ /* specific segment flags */
+
+/* Dynamic structure */
+typedef struct {
+ Elf32_Sword d_tag; /* controls meaning of d_val */
+ union {
+ Elf32_Word d_val; /* Multiple meanings - see d_tag */
+ Elf32_Addr d_ptr; /* program virtual address */
+ } d_un;
+} Elf32_Dyn;
+
+typedef struct {
+ Elf64_Xword d_tag; /* controls meaning of d_val */
+ union {
+ Elf64_Addr d_ptr;
+ Elf64_Xword d_val;
+ } d_un;
+} Elf64_Dyn;
+
+/* Dynamic Array Tags - d_tag */
+#define DT_NULL 0 /* marks end of _DYNAMIC array */
+#define DT_NEEDED 1 /* string table offset of needed lib */
+#define DT_PLTRELSZ 2 /* size of relocation entries in PLT */
+#define DT_PLTGOT 3 /* address PLT/GOT */
+#define DT_HASH 4 /* address of symbol hash table */
+#define DT_STRTAB 5 /* address of string table */
+#define DT_SYMTAB 6 /* address of symbol table */
+#define DT_RELA 7 /* address of relocation table */
+#define DT_RELASZ 8 /* size of relocation table */
+#define DT_RELAENT 9 /* size of relocation entry */
+#define DT_STRSZ 10 /* size of string table */
+#define DT_SYMENT 11 /* size of symbol table entry */
+#define DT_INIT 12 /* address of initialization func. */
+#define DT_FINI 13 /* address of termination function */
+#define DT_SONAME 14 /* string table offset of shared obj */
+#define DT_RPATH 15 /* string table offset of library
+ search path */
+#define DT_SYMBOLIC 16 /* start sym search in shared obj. */
+#define DT_REL 17 /* address of rel. tbl. w addends */
+#define DT_RELSZ 18 /* size of DT_REL relocation table */
+#define DT_RELENT 19 /* size of DT_REL relocation entry */
+#define DT_PLTREL 20 /* PLT referenced relocation entry */
+#define DT_DEBUG 21 /* bugger */
+#define DT_TEXTREL 22 /* Allow rel. mod. to unwritable seg */
+#define DT_JMPREL 23 /* add. of PLT's relocation entries */
+#define DT_BIND_NOW 24 /* Bind now regardless of env setting */
+#define DT_NUM 25 /* Number used. */
+#define DT_LOPROC 0x70000000 /* reserved range for processor */
+#define DT_HIPROC 0x7fffffff /* specific dynamic array tags */
+
+/* Standard ELF hashing function */
+unsigned int elf_hash(const unsigned char *name);
+
+/*
+ * Note Definitions
+ */
+typedef struct {
+ Elf32_Word namesz;
+ Elf32_Word descsz;
+ Elf32_Word type;
+} Elf32_Note;
+
+typedef struct {
+ Elf64_Half namesz;
+ Elf64_Half descsz;
+ Elf64_Half type;
+} Elf64_Note;
+
+
+#if defined(ELFSIZE)
+#define CONCAT(x,y) __CONCAT(x,y)
+#define ELFNAME(x) CONCAT(elf,CONCAT(ELFSIZE,CONCAT(_,x)))
+#define ELFNAME2(x,y) CONCAT(x,CONCAT(_elf,CONCAT(ELFSIZE,CONCAT(_,y))))
+#define ELFNAMEEND(x) CONCAT(x,CONCAT(_elf,ELFSIZE))
+#define ELFDEFNNAME(x) CONCAT(ELF,CONCAT(ELFSIZE,CONCAT(_,x)))
+#endif
+
+#if defined(ELFSIZE) && (ELFSIZE == 32)
+#define Elf_Ehdr Elf32_Ehdr
+#define Elf_Phdr Elf32_Phdr
+#define Elf_Shdr Elf32_Shdr
+#define Elf_Sym Elf32_Sym
+#define Elf_Rel Elf32_Rel
+#define Elf_RelA Elf32_Rela
+#define Elf_Dyn Elf32_Dyn
+#define Elf_Word Elf32_Word
+#define Elf_Sword Elf32_Sword
+#define Elf_Addr Elf32_Addr
+#define Elf_Off Elf32_Off
+#define Elf_Nhdr Elf32_Nhdr
+#define Elf_Note Elf32_Note
+
+#define ELF_R_SYM ELF32_R_SYM
+#define ELF_R_TYPE ELF32_R_TYPE
+#define ELF_R_INFO ELF32_R_INFO
+#define ELFCLASS ELFCLASS32
+
+#define ELF_ST_BIND ELF32_ST_BIND
+#define ELF_ST_TYPE ELF32_ST_TYPE
+#define ELF_ST_INFO ELF32_ST_INFO
+
+#define AuxInfo Aux32Info
+#elif defined(ELFSIZE) && (ELFSIZE == 64)
+#define Elf_Ehdr Elf64_Ehdr
+#define Elf_Phdr Elf64_Phdr
+#define Elf_Shdr Elf64_Shdr
+#define Elf_Sym Elf64_Sym
+#define Elf_Rel Elf64_Rel
+#define Elf_RelA Elf64_Rela
+#define Elf_Dyn Elf64_Dyn
+#define Elf_Word Elf64_Word
+#define Elf_Sword Elf64_Sword
+#define Elf_Addr Elf64_Addr
+#define Elf_Off Elf64_Off
+#define Elf_Nhdr Elf64_Nhdr
+#define Elf_Note Elf64_Note
+
+#define ELF_R_SYM ELF64_R_SYM
+#define ELF_R_TYPE ELF64_R_TYPE
+#define ELF_R_INFO ELF64_R_INFO
+#define ELFCLASS ELFCLASS64
+
+#define ELF_ST_BIND ELF64_ST_BIND
+#define ELF_ST_TYPE ELF64_ST_TYPE
+#define ELF_ST_INFO ELF64_ST_INFO
+
+#define AuxInfo Aux64Info
+#endif
+
+#endif /* __XEN_ELFSTRUCTS_H__ */
/* Bind a local event-channel port to the specified VCPU. */
long evtchn_bind_vcpu(unsigned int port, unsigned int vcpu_id);
+/* Unmask a local event-channel port. */
+int evtchn_unmask(unsigned int port);
+
/* Allocate/free a Xen-attached event channel port. */
int alloc_unbound_xen_event_channel(
struct vcpu *local_vcpu, domid_t remote_domid);
struct active_grant_entry {
u32 pin; /* Reference count information. */
domid_t domid; /* Domain being granted access. */
+ unsigned long gfn; /* Guest's idea of the frame being granted. */
unsigned long frame; /* Frame being granted. */
};
/* amd iommu support */
int domain_id;
int paging_mode;
- void *root_table;
+ struct page_info *root_table;
bool_t p2m_synchronized;
/* iommu_ops */
--- /dev/null
+/******************************************************************************
+ * irq.h
+ *
+ * Interrupt distribution and delivery logic.
+ *
+ * Copyright (c) 2006, K A Fraser, XenSource Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#ifndef __XEN_HVM_IRQ_H__
+#define __XEN_HVM_IRQ_H__
+
+#include <xen/types.h>
+#include <xen/spinlock.h>
+#include <asm/irq.h>
+#include <public/hvm/save.h>
+
+struct dev_intx_gsi_link {
+ struct list_head list;
+ uint8_t device;
+ uint8_t intx;
+ uint8_t gsi;
+ uint8_t link;
+};
+
+#define _HVM_IRQ_DPCI_MACH_PCI_SHIFT 0
+#define _HVM_IRQ_DPCI_MACH_MSI_SHIFT 1
+#define _HVM_IRQ_DPCI_GUEST_PCI_SHIFT 4
+#define _HVM_IRQ_DPCI_GUEST_MSI_SHIFT 5
+#define _HVM_IRQ_DPCI_TRANSLATE_SHIFT 15
+#define HVM_IRQ_DPCI_MACH_PCI (1 << _HVM_IRQ_DPCI_MACH_PCI_SHIFT)
+#define HVM_IRQ_DPCI_MACH_MSI (1 << _HVM_IRQ_DPCI_MACH_MSI_SHIFT)
+#define HVM_IRQ_DPCI_GUEST_PCI (1 << _HVM_IRQ_DPCI_GUEST_PCI_SHIFT)
+#define HVM_IRQ_DPCI_GUEST_MSI (1 << _HVM_IRQ_DPCI_GUEST_MSI_SHIFT)
+#define HVM_IRQ_DPCI_TRANSLATE (1 << _HVM_IRQ_DPCI_TRANSLATE_SHIFT)
+
+struct hvm_gmsi_info {
+ uint32_t gvec;
+ uint32_t gflags;
+};
+
+struct hvm_mirq_dpci_mapping {
+ uint32_t flags;
+ int pending;
+ struct list_head digl_list;
+ struct domain *dom;
+ struct hvm_gmsi_info gmsi;
+};
+
+struct hvm_girq_dpci_mapping {
+ struct list_head list;
+ uint8_t device;
+ uint8_t intx;
+ uint8_t machine_gsi;
+};
+
+#define NR_ISAIRQS 16
+#define NR_LINK 4
+
+/* Protected by domain's event_lock */
+struct hvm_irq_dpci {
+ /* Machine IRQ to guest device/intx mapping. */
+ DECLARE_BITMAP(mapping, NR_IRQS);
+ struct hvm_mirq_dpci_mapping mirq[NR_IRQS];
+ /* Guest IRQ to guest device/intx mapping. */
+ struct list_head girq[NR_IRQS];
+ uint8_t msi_gvec_pirq[NR_VECTORS];
+ DECLARE_BITMAP(dirq_mask, NR_IRQS);
+ /* Record of mapped ISA IRQs */
+ DECLARE_BITMAP(isairq_map, NR_ISAIRQS);
+ /* Record of mapped Links */
+ uint8_t link_cnt[NR_LINK];
+ struct timer hvm_timer[NR_IRQS];
+};
+
+/* Modify state of a PCI INTx wire. */
+void hvm_pci_intx_assert(
+ struct domain *d, unsigned int device, unsigned int intx);
+void hvm_pci_intx_deassert(
+ struct domain *d, unsigned int device, unsigned int intx);
+
+/* Modify state of an ISA device's IRQ wire. */
+void hvm_isa_irq_assert(
+ struct domain *d, unsigned int isa_irq);
+void hvm_isa_irq_deassert(
+ struct domain *d, unsigned int isa_irq);
+
+void hvm_set_pci_link_route(struct domain *d, u8 link, u8 isa_irq);
+
+void hvm_maybe_deassert_evtchn_irq(void);
+void hvm_assert_evtchn_irq(struct vcpu *v);
+void hvm_set_callback_via(struct domain *d, uint64_t via);
+
+void hvm_dirq_assist(struct vcpu *v);
+
+#endif /* __XEN_HVM_IRQ_H__ */
/* Entry points for saving and restoring HVM domain state */
size_t hvm_save_size(struct domain *d);
int hvm_save(struct domain *d, hvm_domain_context_t *h);
+int hvm_save_one(struct domain *d, uint16_t typecode, uint16_t instance,
+ XEN_GUEST_HANDLE_64(uint8) handle);
int hvm_load(struct domain *d, hvm_domain_context_t *h);
/* Arch-specific definitions. */
* at what point in the page list to resume. For this purpose I steal the
* high-order bits of the @cmd parameter, which are otherwise unused and zero.
*/
-#define MEMOP_EXTENT_SHIFT 4 /* cmd[:4] == start_extent */
+#define MEMOP_EXTENT_SHIFT 6 /* cmd[:6] == start_extent */
#define MEMOP_CMD_MASK ((1 << MEMOP_EXTENT_SHIFT) - 1)
extern long
unsigned int cmd,
XEN_GUEST_HANDLE(void) arg);
+extern int
+compat_vcpu_op(
+ int cmd,
+ int vcpuid,
+ XEN_GUEST_HANDLE(void) arg);
+
#endif
#endif /* __XEN_HYPERCALL_H__ */
rangeset_contains_singleton((d)->irq_caps, i)
#define multipage_allocation_permitted(d) \
- (!rangeset_is_empty((d)->iomem_caps))
+ (!rangeset_is_empty((d)->iomem_caps) || \
+ !rangeset_is_empty((d)->arch.ioport_caps))
#endif /* __XEN_IOCAP_H__ */
extern int iommu_pv_enabled;
extern int force_iommu;
extern int iommu_passthrough;
+extern int iommu_snoop;
+extern int iommu_qinval;
+extern int iommu_intremap;
#define domain_hvm_iommu(d) (&d->arch.hvm_domain.hvm_iommu)
void iommu_domain_destroy(struct domain *d);
int device_assigned(u8 bus, u8 devfn);
int assign_device(struct domain *d, u8 bus, u8 devfn);
-void deassign_device(struct domain *d, u8 bus, u8 devfn);
+int deassign_device(struct domain *d, u8 bus, u8 devfn);
int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn,
XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs);
int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
void iommu_update_ire_from_apic(unsigned int apic, unsigned int reg, unsigned int value);
void iommu_update_ire_from_msi(struct msi_desc *msi_desc, struct msi_msg *msg);
-int iommu_suspend(void);
-int iommu_resume(void);
+void iommu_suspend(void);
+void iommu_resume(void);
+
+void iommu_set_dom0_mapping(struct domain *d);
#endif /* _IOMMU_H_ */
#define IRQ_PENDING 4 /* IRQ pending - replay on enable */
#define IRQ_REPLAY 8 /* IRQ has been replayed but not acked yet */
#define IRQ_GUEST 16 /* IRQ is handled by guest OS(es) */
+#define IRQ_GUEST_EOI_PENDING 32 /* IRQ was disabled, pending a guest EOI */
#define IRQ_PER_CPU 256 /* IRQ is per CPU */
+/* Special IRQ numbers. */
+#define AUTO_ASSIGN_IRQ (-1)
+#define NEVER_ASSIGN_IRQ (-2)
+#define FREE_TO_ASSIGN_IRQ (-3)
+
/*
* Interrupt controller descriptor. This is all we need
* to describe about the low-level hardware.
cpumask_t affinity;
} __cacheline_aligned irq_desc_t;
-extern irq_desc_t irq_desc[NR_IRQS];
+extern irq_desc_t irq_desc[NR_VECTORS];
-extern int setup_irq(unsigned int, struct irqaction *);
-extern void free_irq(unsigned int);
-extern int request_irq(unsigned int irq,
+extern int setup_irq_vector(unsigned int, struct irqaction *);
+extern void release_irq_vector(unsigned int);
+extern int request_irq_vector(unsigned int vector,
void (*handler)(int, void *, struct cpu_user_regs *),
unsigned long irqflags, const char * devname, void *dev_id);
+#define setup_irq(irq, action) \
+ setup_irq_vector(irq_to_vector(irq), action)
+
+#define release_irq(irq) \
+ release_irq_vector(irq_to_vector(irq))
+
+#define request_irq(irq, handler, irqflags, devname, devid) \
+ request_irq_vector(irq_to_vector(irq), handler, irqflags, devname, devid)
+
extern hw_irq_controller no_irq_type;
extern void no_action(int cpl, void *dev_id, struct cpu_user_regs *regs);
extern irq_desc_t *domain_spin_lock_irq_desc(
struct domain *d, int irq, unsigned long *pflags);
-static inline void set_native_irq_info(int irq, cpumask_t mask)
+static inline void set_native_irq_info(unsigned int vector, cpumask_t mask)
{
- irq_desc[irq].affinity = mask;
+ irq_desc[vector].affinity = mask;
}
+#ifdef irq_to_vector
static inline void set_irq_info(int irq, cpumask_t mask)
{
- set_native_irq_info(irq, mask);
+ set_native_irq_info(irq_to_vector(irq), mask);
}
+#endif
+
#endif /* __XEN_IRQ_H__ */
void machine_crash_shutdown(void);
int machine_kexec_get(xen_kexec_range_t *range);
+/* vmcoreinfo stuff */
+#define VMCOREINFO_BYTES (4096)
+#define VMCOREINFO_NOTE_NAME "VMCOREINFO_XEN"
+void arch_crash_save_vmcoreinfo(void);
+void vmcoreinfo_append_str(const char *fmt, ...)
+ __attribute__ ((format (printf, 1, 2)));
+#define VMCOREINFO_PAGESIZE(value) \
+ vmcoreinfo_append_str("PAGESIZE=%ld\n", value)
+#define VMCOREINFO_SYMBOL(name) \
+ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name)
+#define VMCOREINFO_SYMBOL_ALIAS(alias, name) \
+ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #alias, (unsigned long)&name)
+#define VMCOREINFO_STRUCT_SIZE(name) \
+ vmcoreinfo_append_str("SIZE(%s)=%zu\n", #name, sizeof(struct name))
+#define VMCOREINFO_OFFSET(name, field) \
+ vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \
+ (unsigned long)offsetof(struct name, field))
+#define VMCOREINFO_OFFSET_ALIAS(name, field, alias) \
+ vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #alias, \
+ (unsigned long)offsetof(struct name, field))
+
#endif /* __XEN_KEXEC_H__ */
/*
#define WARN_ON(p) do { if (p) WARN(); } while (0)
/* Force a compilation error if condition is true */
-#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2 * !!(condition)]))
+#define BUILD_BUG_ON(condition) ((void)sizeof(struct { int:-!!(condition); }))
#ifndef assert_failed
#define assert_failed(p) \
#define TAINT_MACHINE_CHECK (1<<1)
#define TAINT_BAD_PAGE (1<<2)
#define TAINT_SYNC_CONSOLE (1<<3)
+#define TAINT_ERROR_INJECT (1<<4)
extern int tainted;
#define TAINT_STRING_MAX_LEN 20
extern char *print_tainted(char *str);
--- /dev/null
+/******************************************************************************
+ * libelf.h
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef __XEN_LIBELF_H__
+#define __XEN_LIBELF_H__
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__ia64__)
+#define XEN_ELF_LITTLE_ENDIAN
+#else
+#error define architectural endianness
+#endif
+
+#undef ELFSIZE
+#include "elfstructs.h"
+#ifdef __XEN__
+#include <public/elfnote.h>
+#include <public/features.h>
+#else
+#include <xen/elfnote.h>
+#include <xen/features.h>
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+typedef union {
+ Elf32_Ehdr e32;
+ Elf64_Ehdr e64;
+} elf_ehdr;
+
+typedef union {
+ Elf32_Phdr e32;
+ Elf64_Phdr e64;
+} elf_phdr;
+
+typedef union {
+ Elf32_Shdr e32;
+ Elf64_Shdr e64;
+} elf_shdr;
+
+typedef union {
+ Elf32_Sym e32;
+ Elf64_Sym e64;
+} elf_sym;
+
+typedef union {
+ Elf32_Rel e32;
+ Elf64_Rel e64;
+} elf_rel;
+
+typedef union {
+ Elf32_Rela e32;
+ Elf64_Rela e64;
+} elf_rela;
+
+typedef union {
+ Elf32_Note e32;
+ Elf64_Note e64;
+} elf_note;
+
+struct elf_binary {
+ /* elf binary */
+ const char *image;
+ size_t size;
+ char class;
+ char data;
+
+ const elf_ehdr *ehdr;
+ const char *sec_strtab;
+ const elf_shdr *sym_tab;
+ const char *sym_strtab;
+
+ /* loaded to */
+ char *dest;
+ uint64_t pstart;
+ uint64_t pend;
+ uint64_t reloc_offset;
+
+ uint64_t bsd_symtab_pstart;
+ uint64_t bsd_symtab_pend;
+
+#ifndef __XEN__
+ /* misc */
+ FILE *log;
+#endif
+ int verbose;
+};
+
+/* ------------------------------------------------------------------------ */
+/* accessing elf header fields */
+
+#ifdef XEN_ELF_BIG_ENDIAN
+# define NATIVE_ELFDATA ELFDATA2MSB
+#else
+# define NATIVE_ELFDATA ELFDATA2LSB
+#endif
+
+#define elf_32bit(elf) (ELFCLASS32 == (elf)->class)
+#define elf_64bit(elf) (ELFCLASS64 == (elf)->class)
+#define elf_msb(elf) (ELFDATA2MSB == (elf)->data)
+#define elf_lsb(elf) (ELFDATA2LSB == (elf)->data)
+#define elf_swap(elf) (NATIVE_ELFDATA != (elf)->data)
+
+#define elf_uval(elf, str, elem) \
+ ((ELFCLASS64 == (elf)->class) \
+ ? elf_access_unsigned((elf), (str), \
+ offsetof(typeof(*(str)),e64.elem), \
+ sizeof((str)->e64.elem)) \
+ : elf_access_unsigned((elf), (str), \
+ offsetof(typeof(*(str)),e32.elem), \
+ sizeof((str)->e32.elem)))
+
+#define elf_sval(elf, str, elem) \
+ ((ELFCLASS64 == (elf)->class) \
+ ? elf_access_signed((elf), (str), \
+ offsetof(typeof(*(str)),e64.elem), \
+ sizeof((str)->e64.elem)) \
+ : elf_access_signed((elf), (str), \
+ offsetof(typeof(*(str)),e32.elem), \
+ sizeof((str)->e32.elem)))
+
+#define elf_size(elf, str) \
+ ((ELFCLASS64 == (elf)->class) \
+ ? sizeof((str)->e64) : sizeof((str)->e32))
+
+uint64_t elf_access_unsigned(struct elf_binary *elf, const void *ptr,
+ uint64_t offset, size_t size);
+int64_t elf_access_signed(struct elf_binary *elf, const void *ptr,
+ uint64_t offset, size_t size);
+
+uint64_t elf_round_up(struct elf_binary *elf, uint64_t addr);
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_tools.c */
+
+int elf_shdr_count(struct elf_binary *elf);
+int elf_phdr_count(struct elf_binary *elf);
+
+const elf_shdr *elf_shdr_by_name(struct elf_binary *elf, const char *name);
+const elf_shdr *elf_shdr_by_index(struct elf_binary *elf, int index);
+const elf_phdr *elf_phdr_by_index(struct elf_binary *elf, int index);
+
+const char *elf_section_name(struct elf_binary *elf, const elf_shdr * shdr);
+const void *elf_section_start(struct elf_binary *elf, const elf_shdr * shdr);
+const void *elf_section_end(struct elf_binary *elf, const elf_shdr * shdr);
+
+const void *elf_segment_start(struct elf_binary *elf, const elf_phdr * phdr);
+const void *elf_segment_end(struct elf_binary *elf, const elf_phdr * phdr);
+
+const elf_sym *elf_sym_by_name(struct elf_binary *elf, const char *symbol);
+const elf_sym *elf_sym_by_index(struct elf_binary *elf, int index);
+
+const char *elf_note_name(struct elf_binary *elf, const elf_note * note);
+const void *elf_note_desc(struct elf_binary *elf, const elf_note * note);
+uint64_t elf_note_numeric(struct elf_binary *elf, const elf_note * note);
+const elf_note *elf_note_next(struct elf_binary *elf, const elf_note * note);
+
+int elf_is_elfbinary(const void *image);
+int elf_phdr_is_loadable(struct elf_binary *elf, const elf_phdr * phdr);
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_loader.c */
+
+int elf_init(struct elf_binary *elf, const char *image, size_t size);
+#ifdef __XEN__
+void elf_set_verbose(struct elf_binary *elf);
+#else
+void elf_set_logfile(struct elf_binary *elf, FILE * log, int verbose);
+#endif
+
+void elf_parse_binary(struct elf_binary *elf);
+void elf_load_binary(struct elf_binary *elf);
+
+void *elf_get_ptr(struct elf_binary *elf, unsigned long addr);
+uint64_t elf_lookup_addr(struct elf_binary *elf, const char *symbol);
+
+void elf_parse_bsdsyms(struct elf_binary *elf, uint64_t pstart); /* private */
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_relocate.c */
+
+int elf_reloc(struct elf_binary *elf);
+
+/* ------------------------------------------------------------------------ */
+/* xc_libelf_dominfo.c */
+
+#define UNSET_ADDR ((uint64_t)-1)
+
+enum xen_elfnote_type {
+ XEN_ENT_NONE = 0,
+ XEN_ENT_LONG = 1,
+ XEN_ENT_STR = 2
+};
+
+struct xen_elfnote {
+ enum xen_elfnote_type type;
+ const char *name;
+ union {
+ const char *str;
+ uint64_t num;
+ } data;
+};
+
+struct elf_dom_parms {
+ /* raw */
+ const char *guest_info;
+ const void *elf_note_start;
+ const void *elf_note_end;
+ struct xen_elfnote elf_notes[XEN_ELFNOTE_MAX + 1];
+
+ /* parsed */
+ char guest_os[16];
+ char guest_ver[16];
+ char xen_ver[16];
+ char loader[16];
+ int pae;
+ int bsd_symtab;
+ uint64_t virt_base;
+ uint64_t virt_entry;
+ uint64_t virt_hypercall;
+ uint64_t virt_hv_start_low;
+ uint64_t p2m_base;
+ uint64_t elf_paddr_offset;
+ uint32_t f_supported[XENFEAT_NR_SUBMAPS];
+ uint32_t f_required[XENFEAT_NR_SUBMAPS];
+
+ /* calculated */
+ uint64_t virt_offset;
+ uint64_t virt_kstart;
+ uint64_t virt_kend;
+};
+
+static inline void elf_xen_feature_set(int nr, uint32_t * addr)
+{
+ addr[nr >> 5] |= 1 << (nr & 31);
+}
+static inline int elf_xen_feature_get(int nr, uint32_t * addr)
+{
+ return !!(addr[nr >> 5] & (1 << (nr & 31)));
+}
+
+int elf_xen_parse_features(const char *features,
+ uint32_t *supported,
+ uint32_t *required);
+int elf_xen_parse_note(struct elf_binary *elf,
+ struct elf_dom_parms *parms,
+ const elf_note *note);
+int elf_xen_parse_guest_info(struct elf_binary *elf,
+ struct elf_dom_parms *parms);
+int elf_xen_parse(struct elf_binary *elf,
+ struct elf_dom_parms *parms);
+
+#endif /* __XEN_LIBELF_H__ */
/* Xen suballocator. These functions are interrupt-safe. */
void init_xenheap_pages(paddr_t ps, paddr_t pe);
-void *alloc_xenheap_pages(unsigned int order);
+void *alloc_xenheap_pages(unsigned int order, unsigned int memflags);
void free_xenheap_pages(void *v, unsigned int order);
-#define alloc_xenheap_page() (alloc_xenheap_pages(0))
+#define alloc_xenheap_page() (alloc_xenheap_pages(0,0))
#define free_xenheap_page(v) (free_xenheap_pages(v,0))
/* Domain suballocator. These functions are *not* interrupt-safe.*/
unsigned long avail_domheap_pages(void);
#define alloc_domheap_page(d,f) (alloc_domheap_pages(d,0,f))
#define free_domheap_page(p) (free_domheap_pages(p,0))
+unsigned int online_page(unsigned long mfn, uint32_t *status);
+int offline_page(unsigned long mfn, int broken, uint32_t *status);
+int query_page_offline(unsigned long mfn, uint32_t *status);
void scrub_heap_pages(void);
/* memflags: */
#define _MEMF_no_refcount 0
#define MEMF_no_refcount (1U<<_MEMF_no_refcount)
+#define _MEMF_populate_on_demand 1
+#define MEMF_populate_on_demand (1U<<_MEMF_populate_on_demand)
#define _MEMF_node 8
#define MEMF_node(n) ((((n)+1)&0xff)<<_MEMF_node)
#define _MEMF_bits 24
#define MAX_ORDER 20 /* 2^20 contiguous pages */
#endif
+#define page_list_entry list_head
+
+#include <asm/mm.h>
+
+#ifndef page_list_entry
+struct page_list_head
+{
+ struct page_info *next, *tail;
+};
+/* These must only have instances in struct page_info. */
+# define page_list_entry
+
+# define PAGE_LIST_HEAD_INIT(name) { NULL, NULL }
+# define PAGE_LIST_HEAD(name) \
+ struct page_list_head name = PAGE_LIST_HEAD_INIT(name)
+# define INIT_PAGE_LIST_HEAD(head) ((head)->tail = (head)->next = NULL)
+# define INIT_PAGE_LIST_ENTRY(ent) ((ent)->prev = (ent)->next = ~0)
+
+static inline int
+page_list_empty(const struct page_list_head *head)
+{
+ return !head->next;
+}
+static inline struct page_info *
+page_list_first(const struct page_list_head *head)
+{
+ return head->next;
+}
+static inline struct page_info *
+page_list_next(const struct page_info *page,
+ const struct page_list_head *head)
+{
+ return page != head->tail ? mfn_to_page(page->list.next) : NULL;
+}
+static inline struct page_info *
+page_list_prev(const struct page_info *page,
+ const struct page_list_head *head)
+{
+ return page != head->next ? mfn_to_page(page->list.prev) : NULL;
+}
+static inline void
+page_list_add(struct page_info *page, struct page_list_head *head)
+{
+ if ( head->next )
+ {
+ page->list.next = page_to_mfn(head->next);
+ head->next->list.prev = page_to_mfn(page);
+ }
+ else
+ {
+ head->tail = page;
+ page->list.next = ~0;
+ }
+ page->list.prev = ~0;
+ head->next = page;
+}
+static inline void
+page_list_add_tail(struct page_info *page, struct page_list_head *head)
+{
+ page->list.next = ~0;
+ if ( head->next )
+ {
+ page->list.prev = page_to_mfn(head->tail);
+ head->tail->list.next = page_to_mfn(page);
+ }
+ else
+ {
+ page->list.prev = ~0;
+ head->next = page;
+ }
+ head->tail = page;
+}
+static inline bool_t
+__page_list_del_head(struct page_info *page, struct page_list_head *head,
+ struct page_info *next, struct page_info *prev)
+{
+ if ( head->next == page )
+ {
+ if ( head->tail != page )
+ {
+ next->list.prev = ~0;
+ head->next = next;
+ }
+ else
+ head->tail = head->next = NULL;
+ return 1;
+ }
+
+ if ( head->tail == page )
+ {
+ prev->list.next = ~0;
+ head->tail = prev;
+ return 1;
+ }
+
+ return 0;
+}
+static inline void
+page_list_del(struct page_info *page, struct page_list_head *head)
+{
+ struct page_info *next = mfn_to_page(page->list.next);
+ struct page_info *prev = mfn_to_page(page->list.prev);
+
+ if ( !__page_list_del_head(page, head, next, prev) )
+ {
+ next->list.prev = page->list.prev;
+ prev->list.next = page->list.next;
+ }
+}
+static inline void
+page_list_del2(struct page_info *page, struct page_list_head *head1,
+ struct page_list_head *head2)
+{
+ struct page_info *next = mfn_to_page(page->list.next);
+ struct page_info *prev = mfn_to_page(page->list.prev);
+
+ if ( !__page_list_del_head(page, head1, next, prev) &&
+ !__page_list_del_head(page, head2, next, prev) )
+ {
+ next->list.prev = page->list.prev;
+ prev->list.next = page->list.next;
+ }
+}
+static inline struct page_info *
+page_list_remove_head(struct page_list_head *head)
+{
+ struct page_info *page = head->next;
+
+ if ( page )
+ page_list_del(page, head);
+
+ return page;
+}
+
+#define page_list_for_each(pos, head) \
+ for ( pos = (head)->next; pos; pos = page_list_next(pos, head) )
+#define page_list_for_each_safe(pos, tmp, head) \
+ for ( pos = (head)->next; \
+ pos ? (tmp = page_list_next(pos, head), 1) : 0; \
+ pos = tmp )
+#define page_list_for_each_safe_reverse(pos, tmp, head) \
+ for ( pos = (head)->tail; \
+ pos ? (tmp = page_list_prev(pos, head), 1) : 0; \
+ pos = tmp )
+#else
+# define page_list_head list_head
+# define PAGE_LIST_HEAD_INIT LIST_HEAD_INIT
+# define PAGE_LIST_HEAD LIST_HEAD
+# define INIT_PAGE_LIST_HEAD INIT_LIST_HEAD
+# define INIT_PAGE_LIST_ENTRY INIT_LIST_HEAD
+# define page_list_empty list_empty
+# define page_list_first(hd) list_entry((hd)->next, \
+ struct page_info, list)
+# define page_list_next(pg, hd) list_entry((pg)->list.next, \
+ struct page_info, list)
+# define page_list_add(pg, hd) list_add(&(pg)->list, hd)
+# define page_list_add_tail(pg, hd) list_add_tail(&(pg)->list, hd)
+# define page_list_del(pg, hd) list_del(&(pg)->list)
+# define page_list_del2(pg, hd1, hd2) list_del(&(pg)->list)
+# define page_list_remove_head(hd) (!page_list_empty(hd) ? \
+ ({ \
+ struct page_info *__pg = page_list_first(hd); \
+ list_del(&__pg->list); \
+ __pg; \
+ }) : NULL)
+# define page_list_for_each(pos, head) list_for_each_entry(pos, head, list)
+# define page_list_for_each_safe(pos, tmp, head) \
+ list_for_each_entry_safe(pos, tmp, head, list)
+# define page_list_for_each_safe_reverse(pos, tmp, head) \
+ list_for_each_entry_safe_reverse(pos, tmp, head, list)
+#endif
+
/* Automatic page scrubbing for dead domains. */
-extern struct list_head page_scrub_list;
-#define page_scrub_schedule_work() \
- do { \
- if ( !list_empty(&page_scrub_list) ) \
- raise_softirq(PAGE_SCRUB_SOFTIRQ); \
+extern struct page_list_head page_scrub_list;
+#define page_scrub_schedule_work() \
+ do { \
+ if ( !page_list_empty(&page_scrub_list) ) \
+ raise_softirq(PAGE_SCRUB_SOFTIRQ); \
} while ( 0 )
#define page_scrub_kick() \
do { \
- if ( !list_empty(&page_scrub_list) ) \
+ if ( !page_list_empty(&page_scrub_list) ) \
cpumask_raise_softirq(cpu_online_map, PAGE_SCRUB_SOFTIRQ); \
} while ( 0 )
unsigned long avail_scrub_pages(void);
-#include <asm/mm.h>
-
int guest_remove_page(struct domain *d, unsigned long gmfn);
-/* Returns TRUE if the memory at address @p is ordinary RAM. */
-int memory_is_conventional_ram(paddr_t p);
+#define RAM_TYPE_CONVENTIONAL 0x00000001
+#define RAM_TYPE_RESERVED 0x00000002
+#define RAM_TYPE_UNUSABLE 0x00000004
+#define RAM_TYPE_ACPI 0x00000008
+/* Returns TRUE if the whole page at @mfn is of the requested RAM type(s) above. */
+int page_is_ram_type(unsigned long mfn, unsigned long mem_type);
extern unsigned long *alloc_bitmap; /* for vmcoreinfo */
#define PCI_BDF(b,d,f) ((((b) & 0xff) << 8) | PCI_DEVFN(d,f))
#define PCI_BDF2(b,df) ((((b) & 0xff) << 8) | ((df) & 0xff))
+#define MAX_MSIX_TABLE_ENTRIES 2048
+#define MAX_MSIX_TABLE_PAGES 8
+struct pci_dev_info {
+ unsigned is_extfn;
+ unsigned is_virtfn;
+ struct {
+ u8 bus;
+ u8 devfn;
+ } physfn;
+};
+
struct pci_dev {
struct list_head alldevs_list;
struct list_head domain_list;
+
struct list_head msi_list;
+ int msix_table_refcnt[MAX_MSIX_TABLE_PAGES];
+ int msix_table_idx[MAX_MSIX_TABLE_PAGES];
+ spinlock_t msix_table_lock;
+
struct domain *domain;
const u8 bus;
const u8 devfn;
- spinlock_t lock;
+ struct pci_dev_info info;
};
#define for_each_pdev(domain, pdev) \
list_for_each_entry(pdev, &(domain->arch.pdev_list), domain_list)
/*
- * The pcidevs_lock write-lock must be held when doing alloc_pdev() or
- * free_pdev(). Never de-reference pdev without holding pdev->lock or
- * pcidevs_lock. Always aquire pcidevs_lock before pdev->lock when
- * doing free_pdev().
+ * The pcidevs_lock protect alldevs_list, and the assignment for the
+ * devices, it also sync the access to the msi capability that is not
+ * interrupt handling related (the mask bit register).
*/
-extern rwlock_t pcidevs_lock;
+extern spinlock_t pcidevs_lock;
struct pci_dev *alloc_pdev(u8 bus, u8 devfn);
void free_pdev(struct pci_dev *pdev);
void pci_release_devices(struct domain *d);
int pci_add_device(u8 bus, u8 devfn);
int pci_remove_device(u8 bus, u8 devfn);
+int pci_add_device_ext(u8 bus, u8 devfn, struct pci_dev_info *info);
+struct pci_dev *pci_get_pdev(int bus, int devfn);
+struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn);
uint8_t pci_conf_read8(
unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);
int pci_find_cap_offset(u8 bus, u8 dev, u8 func, u8 cap);
int pci_find_next_cap(u8 bus, unsigned int devfn, u8 pos, int cap);
+int msixtbl_pt_register(struct domain *d, int pirq, uint64_t gtable);
+void msixtbl_pt_unregister(struct domain *d, int pirq);
+
#endif /* __XEN_PCI_H__ */
#define PCI_PM_CAP_PME_D3cold 0x8000 /* PME# from D3 (cold) */
#define PCI_PM_CTRL 4 /* PM control and status register */
#define PCI_PM_CTRL_STATE_MASK 0x0003 /* Current power state (D0 to D3) */
-#define PCI_PM_CTRL_NO_SOFT_RESET 0x0004 /* No reset for D3hot->D0 */
+#define PCI_PM_CTRL_NO_SOFT_RESET 0x0008 /* No reset for D3hot->D0 */
#define PCI_PM_CTRL_PME_ENABLE 0x0100 /* PME pin enable */
#define PCI_PM_CTRL_DATA_SEL_MASK 0x1e00 /* Data select (??) */
#define PCI_PM_CTRL_DATA_SCALE_MASK 0x6000 /* Data scale (??) */
PERFCOUNTER(sched_run, "sched: runs through scheduler")
PERFCOUNTER(sched_ctx, "sched: context switches")
+PERFCOUNTER(vcpu_check, "csched: vcpu_check")
+PERFCOUNTER(schedule, "csched: schedule")
+PERFCOUNTER(acct_run, "csched: acct_run")
+PERFCOUNTER(acct_no_work, "csched: acct_no_work")
+PERFCOUNTER(acct_balance, "csched: acct_balance")
+PERFCOUNTER(acct_reorder, "csched: acct_reorder")
+PERFCOUNTER(acct_min_credit, "csched: acct_min_credit")
+PERFCOUNTER(acct_vcpu_active, "csched: acct_vcpu_active")
+PERFCOUNTER(acct_vcpu_idle, "csched: acct_vcpu_idle")
+PERFCOUNTER(vcpu_sleep, "csched: vcpu_sleep")
+PERFCOUNTER(vcpu_wake_running, "csched: vcpu_wake_running")
+PERFCOUNTER(vcpu_wake_onrunq, "csched: vcpu_wake_onrunq")
+PERFCOUNTER(vcpu_wake_runnable, "csched: vcpu_wake_runnable")
+PERFCOUNTER(vcpu_wake_not_runnable, "csched: vcpu_wake_not_runnable")
+PERFCOUNTER(vcpu_park, "csched: vcpu_park")
+PERFCOUNTER(vcpu_unpark, "csched: vcpu_unpark")
+PERFCOUNTER(tickle_local_idler, "csched: tickle_local_idler")
+PERFCOUNTER(tickle_local_over, "csched: tickle_local_over")
+PERFCOUNTER(tickle_local_under, "csched: tickle_local_under")
+PERFCOUNTER(tickle_local_other, "csched: tickle_local_other")
+PERFCOUNTER(tickle_idlers_none, "csched: tickle_idlers_none")
+PERFCOUNTER(tickle_idlers_some, "csched: tickle_idlers_some")
+PERFCOUNTER(load_balance_idle, "csched: load_balance_idle")
+PERFCOUNTER(load_balance_over, "csched: load_balance_over")
+PERFCOUNTER(load_balance_other, "csched: load_balance_other")
+PERFCOUNTER(steal_trylock_failed, "csched: steal_trylock_failed")
+PERFCOUNTER(steal_peer_idle, "csched: steal_peer_idle")
+PERFCOUNTER(migrate_queued, "csched: migrate_queued")
+PERFCOUNTER(migrate_running, "csched: migrate_running")
+PERFCOUNTER(dom_init, "csched: dom_init")
+PERFCOUNTER(dom_destroy, "csched: dom_destroy")
+PERFCOUNTER(vcpu_init, "csched: vcpu_init")
+PERFCOUNTER(vcpu_destroy, "csched: vcpu_destroy")
+PERFCOUNTER(vcpu_hot, "csched: vcpu_hot")
+
PERFCOUNTER(need_flush_tlb_flush, "PG_need_flush tlb flushes")
/*#endif*/ /* __XEN_PERFC_DEFN_H__ */
#include <xen/xenoprof.h>
#include <xen/rcupdate.h>
#include <xen/irq.h>
+#include <xen/mm.h>
#ifdef CONFIG_COMPAT
#include <compat/vcpu.h>
extern struct domain *dom0;
#ifndef CONFIG_COMPAT
-#define MAX_EVTCHNS(d) NR_EVENT_CHANNELS
+#define BITS_PER_EVTCHN_WORD(d) BITS_PER_LONG
#else
-#define MAX_EVTCHNS(d) (!IS_COMPAT(d) ? \
- NR_EVENT_CHANNELS : \
- sizeof(unsigned int) * sizeof(unsigned int) * 64)
+#define BITS_PER_EVTCHN_WORD(d) (has_32bit_shinfo(d) ? 32 : BITS_PER_LONG)
#endif
+#define MAX_EVTCHNS(d) (BITS_PER_EVTCHN_WORD(d) * BITS_PER_EVTCHN_WORD(d))
#define EVTCHNS_PER_BUCKET 128
#define NR_EVTCHN_BUCKETS (NR_EVENT_CHANNELS / EVTCHNS_PER_BUCKET)
} runstate_guest; /* guest address */
#endif
+ /* last time when vCPU is scheduled out */
+ uint64_t last_run_time;
+
/* Has the FPU been initialised? */
bool_t fpu_initialised;
/* Has the FPU been used since it was last saved? */
spinlock_t domain_lock;
spinlock_t page_alloc_lock; /* protects all the following fields */
- struct list_head page_list; /* linked list, of size tot_pages */
- struct list_head xenpage_list; /* linked list, of size xenheap_pages */
+ struct page_list_head page_list; /* linked list, of size tot_pages */
+ struct page_list_head xenpage_list; /* linked list (size xenheap_pages) */
unsigned int tot_pages; /* number of pages currently possesed */
unsigned int max_pages; /* maximum value for tot_pages */
unsigned int xenheap_pages; /* # pages allocated from Xen heap */
struct domain *domain_create(
domid_t domid, unsigned int domcr_flags, ssidref_t ssidref);
/* DOMCRF_hvm: Create an HVM domain, as opposed to a PV domain. */
-#define _DOMCRF_hvm 0
-#define DOMCRF_hvm (1U<<_DOMCRF_hvm)
+#define _DOMCRF_hvm 0
+#define DOMCRF_hvm (1U<<_DOMCRF_hvm)
/* DOMCRF_hap: Create a domain with hardware-assisted paging. */
-#define _DOMCRF_hap 1
-#define DOMCRF_hap (1U<<_DOMCRF_hap)
+#define _DOMCRF_hap 1
+#define DOMCRF_hap (1U<<_DOMCRF_hap)
+ /* DOMCRF_s3_integrity: Create a domain with tboot memory integrity protection
+ by tboot */
+#define _DOMCRF_s3_integrity 2
+#define DOMCRF_s3_integrity (1U<<_DOMCRF_s3_integrity)
/* DOMCRF_dummy: Create a dummy domain (not scheduled; not on domain list) */
-#define _DOMCRF_dummy 2
-#define DOMCRF_dummy (1U<<_DOMCRF_dummy)
-
-int construct_dom0(
- struct domain *d,
- unsigned long image_start, unsigned long image_len,
- unsigned long initrd_start, unsigned long initrd_len,
- char *cmdline);
+#define _DOMCRF_dummy 3
+#define DOMCRF_dummy (1U<<_DOMCRF_dummy)
/*
* rcu_lock_domain_by_id() is more efficient than get_domain_by_id().
void vcpu_unlock_affinity(struct vcpu *v, cpumask_t *affinity);
void vcpu_runstate_get(struct vcpu *v, struct vcpu_runstate_info *runstate);
+uint64_t get_cpu_idle_time(unsigned int cpu);
#define IS_PRIV(_d) ((_d)->is_privileged)
#define IS_PRIV_FOR(_d, _t) (IS_PRIV(_d) || ((_d)->target && (_d)->target == (_t)))
-#ifndef IS_COMPAT
-#define IS_COMPAT(d) 0
-#endif
-
#define VM_ASSIST(_d,_t) (test_bit((_t), &(_d)->vm_assist))
#define is_hvm_domain(d) ((d)->is_hvm)
#define is_hvm_vcpu(v) (is_hvm_domain(v->domain))
#define need_iommu(d) ((d)->need_iommu && !(d)->is_hvm)
+extern int sched_smt_power_savings;
+
extern enum cpufreq_controller {
FREQCTL_none, FREQCTL_dom0_kernel, FREQCTL_xen
} cpufreq_controller;
#include <xen/config.h>
#include <asm/system.h>
+#include <asm/spinlock.h>
-#define spin_lock_irqsave(lock, flags) \
- do { local_irq_save(flags); spin_lock(lock); } while ( 0 )
-#define spin_lock_irq(lock) \
- do { local_irq_disable(); spin_lock(lock); } while ( 0 )
+#ifndef NDEBUG
+struct lock_debug {
+ int irq_safe; /* +1: IRQ-safe; 0: not IRQ-safe; -1: don't know yet */
+};
+#define _LOCK_DEBUG { -1 }
+void spin_debug_enable(void);
+void spin_debug_disable(void);
+#else
+struct lock_debug { };
+#define _LOCK_DEBUG { }
+#define spin_debug_enable() ((void)0)
+#define spin_debug_disable() ((void)0)
+#endif
-#define read_lock_irqsave(lock, flags) \
- do { local_irq_save(flags); read_lock(lock); } while ( 0 )
-#define read_lock_irq(lock) \
- do { local_irq_disable(); read_lock(lock); } while ( 0 )
+typedef struct {
+ raw_spinlock_t raw;
+ u16 recurse_cpu:12;
+ u16 recurse_cnt:4;
+ struct lock_debug debug;
+} spinlock_t;
-#define write_lock_irqsave(lock, flags) \
- do { local_irq_save(flags); write_lock(lock); } while ( 0 )
-#define write_lock_irq(lock) \
- do { local_irq_disable(); write_lock(lock); } while ( 0 )
-#define spin_unlock_irqrestore(lock, flags) \
- do { spin_unlock(lock); local_irq_restore(flags); } while ( 0 )
-#define spin_unlock_irq(lock) \
- do { spin_unlock(lock); local_irq_enable(); } while ( 0 )
+#define SPIN_LOCK_UNLOCKED { _RAW_SPIN_LOCK_UNLOCKED, 0xfffu, 0, _LOCK_DEBUG }
+#define DEFINE_SPINLOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
+#define spin_lock_init(l) (*(l) = (spinlock_t)SPIN_LOCK_UNLOCKED)
-#define read_unlock_irqrestore(lock, flags) \
- do { read_unlock(lock); local_irq_restore(flags); } while ( 0 )
-#define read_unlock_irq(lock) \
- do { read_unlock(lock); local_irq_enable(); } while ( 0 )
+typedef struct {
+ raw_rwlock_t raw;
+ struct lock_debug debug;
+} rwlock_t;
-#define write_unlock_irqrestore(lock, flags) \
- do { write_unlock(lock); local_irq_restore(flags); } while ( 0 )
-#define write_unlock_irq(lock) \
- do { write_unlock(lock); local_irq_enable(); } while ( 0 )
+#define RW_LOCK_UNLOCKED { _RAW_RW_LOCK_UNLOCKED, _LOCK_DEBUG }
+#define DEFINE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED
+#define rwlock_init(l) (*(l) = (rwlock_t)RW_LOCK_UNLOCKED)
-#ifdef CONFIG_SMP
+void _spin_lock(spinlock_t *lock);
+void _spin_lock_irq(spinlock_t *lock);
+unsigned long _spin_lock_irqsave(spinlock_t *lock);
-#include <asm/spinlock.h>
+void _spin_unlock(spinlock_t *lock);
+void _spin_unlock_irq(spinlock_t *lock);
+void _spin_unlock_irqrestore(spinlock_t *lock, unsigned long flags);
-#else
+int _spin_is_locked(spinlock_t *lock);
+int _spin_trylock(spinlock_t *lock);
+void _spin_barrier(spinlock_t *lock);
+void _spin_barrier_irq(spinlock_t *lock);
-#if (__GNUC__ > 2)
-typedef struct { } spinlock_t;
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { }
-#else
-typedef struct { int gcc_is_buggy; } spinlock_t;
-#define SPIN_LOCK_UNLOCKED /*(spinlock_t)*/ { 0 }
-#endif
+void _spin_lock_recursive(spinlock_t *lock);
+void _spin_unlock_recursive(spinlock_t *lock);
-#define spin_lock_init(lock) do { } while(0)
-#define spin_is_locked(lock) (0)
-#define _raw_spin_lock(lock) (void)(lock)
-#define _raw_spin_trylock(lock) ({1; })
-#define _raw_spin_unlock(lock) do { } while(0)
-#define _raw_spin_lock_recursive(lock) do { } while(0)
-#define _raw_spin_unlock_recursive(lock) do { } while(0)
-
-#if (__GNUC__ > 2)
-typedef struct { } rwlock_t;
-#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { }
-#else
-typedef struct { int gcc_is_buggy; } rwlock_t;
-#define RW_LOCK_UNLOCKED /*(rwlock_t)*/ { 0 }
-#endif
+void _read_lock(rwlock_t *lock);
+void _read_lock_irq(rwlock_t *lock);
+unsigned long _read_lock_irqsave(rwlock_t *lock);
-#define rwlock_init(lock) do { } while(0)
-#define _raw_read_lock(lock) (void)(lock) /* Not "unused variable". */
-#define _raw_read_unlock(lock) do { } while(0)
-#define _raw_write_lock(lock) (void)(lock) /* Not "unused variable". */
-#define _raw_write_unlock(lock) do { } while(0)
+void _read_unlock(rwlock_t *lock);
+void _read_unlock_irq(rwlock_t *lock);
+void _read_unlock_irqrestore(rwlock_t *lock, unsigned long flags);
-#endif
+void _write_lock(rwlock_t *lock);
+void _write_lock_irq(rwlock_t *lock);
+unsigned long _write_lock_irqsave(rwlock_t *lock);
+
+void _write_unlock(rwlock_t *lock);
+void _write_unlock_irq(rwlock_t *lock);
+void _write_unlock_irqrestore(rwlock_t *lock, unsigned long flags);
+
+int _rw_is_locked(rwlock_t *lock);
+
+#define spin_lock(l) _spin_lock(l)
+#define spin_lock_irq(l) _spin_lock_irq(l)
+#define spin_lock_irqsave(l, f) ((f) = _spin_lock_irqsave(l))
+
+#define spin_unlock(l) _spin_unlock(l)
+#define spin_unlock_irq(l) _spin_unlock_irq(l)
+#define spin_unlock_irqrestore(l, f) _spin_unlock_irqrestore(l, f)
-#define spin_lock(_lock) _raw_spin_lock(_lock)
-#define spin_trylock(_lock) _raw_spin_trylock(_lock)
-#define spin_unlock(_lock) _raw_spin_unlock(_lock)
-#define spin_lock_recursive(_lock) _raw_spin_lock_recursive(_lock)
-#define spin_unlock_recursive(_lock) _raw_spin_unlock_recursive(_lock)
-#define read_lock(_lock) _raw_read_lock(_lock)
-#define read_unlock(_lock) _raw_read_unlock(_lock)
-#define write_lock(_lock) _raw_write_lock(_lock)
-#define write_unlock(_lock) _raw_write_unlock(_lock)
+#define spin_is_locked(l) _spin_is_locked(l)
+#define spin_trylock(l) _spin_trylock(l)
/* Ensure a lock is quiescent between two critical operations. */
-static inline void spin_barrier(spinlock_t *lock)
-{
- do { mb(); } while ( spin_is_locked(lock) );
- mb();
-}
-
-#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
-#define DEFINE_RWLOCK(x) rwlock_t x = RW_LOCK_UNLOCKED
+#define spin_barrier(l) _spin_barrier(l)
+#define spin_barrier_irq(l) _spin_barrier_irq(l)
+
+/*
+ * spin_[un]lock_recursive(): Use these forms when the lock can (safely!) be
+ * reentered recursively on the same CPU. All critical regions that may form
+ * part of a recursively-nested set must be protected by these forms. If there
+ * are any critical regions that cannot form part of such a set, they can use
+ * standard spin_[un]lock().
+ */
+#define spin_lock_recursive(l) _spin_lock_recursive(l)
+#define spin_unlock_recursive(l) _spin_unlock_recursive(l)
+
+#define read_lock(l) _read_lock(l)
+#define read_lock_irq(l) _read_lock_irq(l)
+#define read_lock_irqsave(l, f) ((f) = _read_lock_irqsave(l))
+
+#define read_unlock(l) _read_unlock(l)
+#define read_unlock_irq(l) _read_unlock_irq(l)
+#define read_unlock_irqrestore(l, f) _read_unlock_irqrestore(l, f)
+
+#define write_lock(l) _write_lock(l)
+#define write_lock_irq(l) _write_lock_irq(l)
+#define write_lock_irqsave(l, f) ((f) = _write_lock_irqsave(l))
+
+#define write_unlock(l) _write_unlock(l)
+#define write_unlock_irq(l) _write_unlock_irq(l)
+#define write_unlock_irqrestore(l, f) _write_unlock_irqrestore(l, f)
+
+#define rw_is_locked(l) _rw_is_locked(l)
#endif /* __SPINLOCK_H__ */
#include <asm/time.h>
extern int init_xen_time(void);
-extern void cstate_save_tsc(void);
extern void cstate_restore_tsc(void);
extern unsigned long cpu_khz;
#define SECONDS(_s) ((s_time_t)((_s) * 1000000000ULL))
#define MILLISECS(_ms) ((s_time_t)((_ms) * 1000000ULL))
#define MICROSECS(_us) ((s_time_t)((_us) * 1000ULL))
+#define STIME_MAX ((s_time_t)((uint64_t)~0ull>>1))
extern void update_vcpu_system_time(struct vcpu *v);
extern void update_domain_wallclock_time(struct domain *d);
struct timer {
/* System time expiry value (nanoseconds since boot). */
s_time_t expires;
+ s_time_t expires_end;
/* Position in active-timer data structure. */
union {
/* Timer-heap offset. */
unsigned int heap_offset;
- /* Overflow linked list. */
+ /* Linked list. */
struct timer *list_next;
};
/* Arch-defined function to reprogram timer hardware for new deadline. */
extern int reprogram_timer(s_time_t timeout);
+/* calculate the aligned first tick time for a given periodic timer */
+extern s_time_t align_timer(s_time_t firsttick, uint64_t period);
+
#endif /* _TIMER_H_ */
/*
#define XENOPROF_DOMAIN_PASSIVE 2
#define XENOPROF_IDLE 0
-#define XENOPROF_COUNTERS_RESERVED 1
-#define XENOPROF_READY 2
-#define XENOPROF_PROFILING 3
+#define XENOPROF_INITIALIZED 1
+#define XENOPROF_COUNTERS_RESERVED 2
+#define XENOPROF_READY 3
+#define XENOPROF_PROFILING 4
#ifndef CONFIG_COMPAT
typedef struct xenoprof_buf xenoprof_buf_t;
! kexec_image kexec.h
! kexec_range kexec.h
! add_to_physmap memory.h
-! remove_from_physmap memory.h
! foreign_memory_map memory.h
! memory_exchange memory.h
! memory_map memory.h
! memory_reservation memory.h
-! translate_gpfn_list memory.h
+! pod_target memory.h
! sched_poll sched.h
? sched_remote_shutdown sched.h
? sched_shutdown sched.h
! processor_cx platform.h
! processor_flags platform.h
! processor_power platform.h
+! pct_register platform.h
+? processor_px platform.h
+! psd_package platform.h
+! processor_performance platform.h
int (*alloc_security_evtchn) (struct evtchn *chn);
void (*free_security_evtchn) (struct evtchn *chn);
- int (*translate_gpfn_list) (struct domain *d, unsigned long mfn);
int (*memory_adjust_reservation) (struct domain *d1, struct domain *d2);
int (*memory_stat_reservation) (struct domain *d1, struct domain *d2);
int (*memory_pin_page) (struct domain *d, struct page_info *page);
int (*update_va_mapping) (struct domain *d, struct domain *f,
l1_pgentry_t pte);
int (*add_to_physmap) (struct domain *d1, struct domain *d2);
- int (*remove_from_physmap) (struct domain *d1, struct domain *d2);
int (*sendtrigger) (struct domain *d);
int (*test_assign_device) (uint32_t machine_bdf);
int (*assign_device) (struct domain *d, uint32_t machine_bdf);
xsm_call(free_security_evtchn(chn));
}
-static inline int xsm_translate_gpfn_list (struct domain *d, unsigned long mfn)
-{
- return xsm_call(translate_gpfn_list(d, mfn));
-}
-
static inline int xsm_memory_adjust_reservation (struct domain *d1, struct
domain *d2)
{
return xsm_call(add_to_physmap(d1, d2));
}
-static inline int xsm_remove_from_physmap(struct domain *d1, struct domain *d2)
-{
- return xsm_call(remove_from_physmap(d1, d2));
-}
-
static inline int xsm_sendtrigger(struct domain *d)
{
return xsm_call(sendtrigger(d));
if (rc != 3) {
if (rc != EOF) {
/* skip line */
- fgets(str, 500, in);
+ if (fgets(str, 500, in) == NULL)
+ return -1; /* must check fgets result */
}
return -1;
}
return 0;
}
-static int dummy_translate_gpfn_list (struct domain *d, unsigned long mfn)
-{
- return 0;
-}
-
static int dummy_memory_adjust_reservation (struct domain *d1,
struct domain *d2)
{
return 0;
}
-static int dummy_remove_from_physmap (struct domain *d1, struct domain *d2)
-{
- return 0;
-}
#endif
struct xsm_operations dummy_xsm_ops;
set_to_dummy_if_null(ops, alloc_security_evtchn);
set_to_dummy_if_null(ops, free_security_evtchn);
- set_to_dummy_if_null(ops, translate_gpfn_list);
set_to_dummy_if_null(ops, memory_adjust_reservation);
set_to_dummy_if_null(ops, memory_stat_reservation);
set_to_dummy_if_null(ops, memory_pin_page);
set_to_dummy_if_null(ops, mmu_machphys_update);
set_to_dummy_if_null(ops, update_va_mapping);
set_to_dummy_if_null(ops, add_to_physmap);
- set_to_dummy_if_null(ops, remove_from_physmap);
set_to_dummy_if_null(ops, sendtrigger);
set_to_dummy_if_null(ops, test_assign_device);
set_to_dummy_if_null(ops, assign_device);
return rc;
}
-static int flask_translate_gpfn_list(struct domain *d, unsigned long mfn)
-{
- int rc = 0;
- u32 sid;
- struct domain_security_struct *dsec;
- dsec = d->ssid;
-
- rc = get_mfn_sid(mfn, &sid);
- if ( rc )
- return rc;
-
- return avc_has_perm(dsec->sid, sid, SECCLASS_MMU, MMU__TRANSLATEGP, NULL);
-}
-
static int flask_memory_adjust_reservation(struct domain *d1, struct domain *d2)
{
return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__ADJUST);
perm = HVM__SETHVMC;
break;
case XEN_DOMCTL_gethvmcontext:
+ case XEN_DOMCTL_gethvmcontext_partial:
perm = HVM__GETHVMC;
break;
default:
return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP);
}
-static int flask_remove_from_physmap(struct domain *d1, struct domain *d2)
-{
- return domain_has_perm(d1, d2, SECCLASS_MMU, MMU__PHYSMAP);
-}
-
static int flask_sendtrigger(struct domain *d)
{
return domain_has_perm(current->domain, d, SECCLASS_DOMAIN, DOMAIN__TRIGGER);
.alloc_security_evtchn = flask_alloc_security_evtchn,
.free_security_evtchn = flask_free_security_evtchn,
- .translate_gpfn_list = flask_translate_gpfn_list,
.memory_adjust_reservation = flask_memory_adjust_reservation,
.memory_stat_reservation = flask_memory_stat_reservation,
.memory_pin_page = flask_memory_pin_page,
.mmu_machphys_update = flask_mmu_machphys_update,
.update_va_mapping = flask_update_va_mapping,
.add_to_physmap = flask_add_to_physmap,
- .remove_from_physmap = flask_remove_from_physmap,
.sendtrigger = flask_sendtrigger,
.test_assign_device = flask_test_assign_device,
.assign_device = flask_assign_device,
if ( len != strlen(POLICYDB_STRING) )
{
printk(KERN_ERR "security: policydb string length %d does not "
- "match expected length %Zu\n",
- len, (u32) strlen(POLICYDB_STRING));
+ "match expected length %lu\n",
+ len, strlen(POLICYDB_STRING));
goto bad;
}
policydb_str = xmalloc_array(char, len + 1);