GPL when distributed separately or included in software packages
outside this repository. Instead we specify a much more relaxed
BSD-style license. Affected files include the Xen interface headers
-(xen/include/public/COPYING), and various drivers, support functions
-and header files within the Linux source trees on
-http://xenbits.xensource.com/linux-2.6.X-xen.hg. In all such cases,
-license terms are stated at the top of the file or in a COPYING file
-in the same directory. Note that _any_ file that is modified and then
-distributed within a Linux kernel is still subject to the GNU GPL.
+(xen/include/public/COPYING), MiniOS (extras/mini-os) and various
+drivers, support functions and header files within Xen-aware Linux
+source trees. In all such cases, license terms are stated at the top
+of the file or in a COPYING file in the same directory. Note that
+_any_ file that is modified and then distributed within a Linux kernel
+is still subject to the GNU GPL.
-- Keir Fraser (on behalf of the Xen team)
# the internet. The original download URL is preserved as a comment
# near the place in the Xen Makefiles where the file is used.
-QEMU_REMOTE=http://xenbits.xensource.com/git-http/qemu-xen-unstable.git
+QEMU_TAG=xen-3.4.1-rc7
+QEMU_REMOTE=http://xenbits.xensource.com/git-http/qemu-xen-3.4-testing.git
# Specify which qemu-dm to use. This may be `ioemu' to use the old
# Mercurial in-tree version, or a local directory, or a git URL.
#################################
- __ __ _____ _____
- \ \/ /___ _ __ |___ / |___ /
- \ // _ \ '_ \ |_ \ |_ \
- / \ __/ | | | ___) | ___) |
- /_/\_\___|_| |_| |____(_)____/
+ __ __ _____ _ _
+ \ \/ /___ _ __ |___ /| || |
+ \ // _ \ '_ \ |_ \| || |_
+ / \ __/ | | | ___) |__ _|
+ /_/\_\___|_| |_| |____(_) |_|
#################################
by the original Xen development team to build enterprise products
around Xen.
-The 3.3 release offers excellent performance, hardware support and
+The 3.4 release offers excellent performance, hardware support and
enterprise-grade features such as x86_32-PAE, x86_64, SMP guests and
live relocation of VMs. Ports to Linux 2.6, Linux 2.4, NetBSD, FreeBSD
and Solaris are available from the community.
/boot/grub/menu.lst: edit this file to include an entry like the
following:
- title Xen 3.3 / XenLinux 2.6
- kernel /boot/xen-3.3.gz console=vga
+ title Xen 3.4 / XenLinux 2.6
+ kernel /boot/xen-3.4.gz console=vga
module /boot/vmlinuz-2.6-xen root=<root-dev> ro console=tty0
module /boot/initrd-2.6-xen.img
32MB memory for internal use, which is not available for allocation
to virtual machines.
-3. Reboot your system and select the "Xen 3.3 / XenLinux 2.6" menu
+3. Reboot your system and select the "Xen 3.4 / XenLinux 2.6" menu
option. After booting Xen, Linux will start and your initialisation
scripts should execute in the usual way.
here:
http://lists.xensource.com/archives/html/xen-devel/2008-01/msg00010.html
+Xen 3.4 release (and later)
+---------------------------
+
+This file not updated.
+
Xen 3.3 release
---------------
--- /dev/null
+Copyright (c) 2009 Citrix Systems, Inc. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+1. Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+2. Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGE.
+
/*
* Done by Dietmar Hahn <dietmar.hahn@fujitsu-siemens.com>
* Description: simple ia64 specific time handling
- * mktime() is taken from Linux (see copyright below)
* Parts are taken from FreeBSD.
*
****************************************************************************
- * For the copy of the mktime() from linux.
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
- *
- ****************************************************************************
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
static uint64_t processor_frequency;
static uint64_t itm_val;
+static int is_leap_year(int year)
+{
+ if( year % 4 == 0 )
+ {
+ if( year % 100 == 0 )
+ {
+ if( year % 400 == 0 ) return 1;
+ else return 0;
+ }
+ return 1;
+ }
+ return 0;
+}
+
+static int count_leap_years(int epoch, int year)
+{
+ int i, result = 0;
+ for( i = epoch ; i < year ; i++ ) if( is_leap_year(i) ) result++;
+ return result;
+}
+
+static int get_day(int year, int mon, int day) {
+ int result;
+ switch(mon)
+ {
+ case 0: result = 0; break;
+ case 1: result = 31; break; /* 1: 31 */
+ case 2: result = 59; break; /* 2: 31+28 */
+ case 3: result = 90; break; /* 3: 59+31 */
+ case 4: result = 120;break; /* 4: 90+30 */
+ case 5: result = 151;break; /* 5: 120+31 */
+ case 6: result = 181;break; /* 6: 151+30 */
+ case 7: result = 212;break; /* 7: 181+31 */
+ case 8: result = 243;break; /* 8: 212+31 */
+ case 9: result = 273;break; /* 9: 243+30 */
+ case 10:result = 304;break; /* 10:273+31 */
+ case 11:result = 334;break; /* 11:304+30 */
+ default: break;
+ }
+ if( is_leap_year(year) && mon > 2 ) result++;
+ result += day - 1;
+ return result;
+}
/*
- * mktime() is take from Linux. See copyright above.
* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
* Assumes input in normal date format, i.e. 1980-12-31 23:59:59
* => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
*
- * [For the Julian calendar (which was used in Russia before 1917,
- * Britain & colonies before 1752, anywhere else before 1582,
- * and is still in use by some communities) leave out the
- * -year/100+year/400 terms, and add 10.]
- *
- * This algorithm was first published by Gauss (I think).
- *
* WARNING: this function will overflow on 2106-02-07 06:28:16 on
* machines were long is 32-bit! (However, as time_t is signed, we
* will already get problems at other places on 2038-01-19 03:14:08)
*/
-static unsigned long
-_mktime(const unsigned int year0, const unsigned int mon0,
- const unsigned int day, const unsigned int hour,
- const unsigned int min, const unsigned int sec)
+static unsigned long _mktime(const unsigned int year, const unsigned int mon,
+ const unsigned int day, const unsigned int hour,
+ const unsigned int min, const unsigned int sec)
{
- unsigned int mon = mon0, year = year0;
+ unsigned long result = 0;
- /* 1..12 -> 11,12,1..10 */
- if (0 >= (int) (mon -= 2)) {
- mon += 12; /* Puts Feb last since it has leap day */
- year -= 1;
- }
+ result = sec;
+ result += min * 60;
+ result += hour * 3600;
+ result += get_day(year, mon - 1, day) * 86400;
+ result += (year - 1970) * 31536000;
+ result += count_leap_years(1970, year) * 86400;
- return (
- (
- ((unsigned long)
- (year/4 - year/100 + year/400 + 367*mon/12 + day) +
- year*365 - 719499
- ) * 24 + hour /* now have hours */
- ) * 60 + min /* now have minutes */
- ) * 60 + sec; /* finally seconds */
+ return result;
}
static inline uint64_t
#define ORIG_RAX 120 /* + error_code */
#define EFLAGS 144
-#define REST_SKIP 6*8
-.macro SAVE_REST
- subq $REST_SKIP,%rsp
-# CFI_ADJUST_CFA_OFFSET REST_SKIP
- movq %rbx,5*8(%rsp)
-# CFI_REL_OFFSET rbx,5*8
- movq %rbp,4*8(%rsp)
-# CFI_REL_OFFSET rbp,4*8
- movq %r12,3*8(%rsp)
-# CFI_REL_OFFSET r12,3*8
- movq %r13,2*8(%rsp)
-# CFI_REL_OFFSET r13,2*8
- movq %r14,1*8(%rsp)
-# CFI_REL_OFFSET r14,1*8
- movq %r15,(%rsp)
-# CFI_REL_OFFSET r15,0*8
-.endm
-
-
-.macro RESTORE_REST
- movq (%rsp),%r15
-# CFI_RESTORE r15
- movq 1*8(%rsp),%r14
-# CFI_RESTORE r14
- movq 2*8(%rsp),%r13
-# CFI_RESTORE r13
- movq 3*8(%rsp),%r12
-# CFI_RESTORE r12
- movq 4*8(%rsp),%rbp
-# CFI_RESTORE rbp
- movq 5*8(%rsp),%rbx
-# CFI_RESTORE rbx
- addq $REST_SKIP,%rsp
-# CFI_ADJUST_CFA_OFFSET -(REST_SKIP)
-.endm
-
-
-#define ARG_SKIP 9*8
-.macro RESTORE_ARGS skiprax=0,addskip=0,skiprcx=0,skipr11=0,skipr8910=0,skiprdx=0
- .if \skipr11
- .else
+.macro RESTORE_ALL
movq (%rsp),%r11
-# CFI_RESTORE r11
- .endif
- .if \skipr8910
- .else
movq 1*8(%rsp),%r10
-# CFI_RESTORE r10
movq 2*8(%rsp),%r9
-# CFI_RESTORE r9
movq 3*8(%rsp),%r8
-# CFI_RESTORE r8
- .endif
- .if \skiprax
- .else
movq 4*8(%rsp),%rax
-# CFI_RESTORE rax
- .endif
- .if \skiprcx
- .else
movq 5*8(%rsp),%rcx
-# CFI_RESTORE rcx
- .endif
- .if \skiprdx
- .else
movq 6*8(%rsp),%rdx
-# CFI_RESTORE rdx
- .endif
movq 7*8(%rsp),%rsi
-# CFI_RESTORE rsi
movq 8*8(%rsp),%rdi
-# CFI_RESTORE rdi
- .if ARG_SKIP+\addskip > 0
- addq $ARG_SKIP+\addskip,%rsp
-# CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip)
- .endif
+ addq $9*8+8,%rsp
.endm
.macro HYPERVISOR_IRET flag
-# testb $3,1*8(%rsp) /* Don't need to do that in Mini-os, as */
-# jnz 2f /* there is no userspace? */
testl $NMI_MASK,2*8(%rsp)
jnz 2f
* and the exception handler in %rax.
*/
ENTRY(error_entry)
-# _frame RDI
/* rdi slot contains rax, oldrax contains error code */
cld
subq $14*8,%rsp
-# CFI_ADJUST_CFA_OFFSET (14*8)
movq %rsi,13*8(%rsp)
-# CFI_REL_OFFSET rsi,RSI
movq 14*8(%rsp),%rsi /* load rax from rdi slot */
movq %rdx,12*8(%rsp)
-# CFI_REL_OFFSET rdx,RDX
movq %rcx,11*8(%rsp)
-# CFI_REL_OFFSET rcx,RCX
movq %rsi,10*8(%rsp) /* store rax */
-# CFI_REL_OFFSET rax,RAX
movq %r8, 9*8(%rsp)
-# CFI_REL_OFFSET r8,R8
movq %r9, 8*8(%rsp)
-# CFI_REL_OFFSET r9,R9
movq %r10,7*8(%rsp)
-# CFI_REL_OFFSET r10,R10
movq %r11,6*8(%rsp)
-# CFI_REL_OFFSET r11,R11
movq %rbx,5*8(%rsp)
-# CFI_REL_OFFSET rbx,RBX
movq %rbp,4*8(%rsp)
-# CFI_REL_OFFSET rbp,RBP
movq %r12,3*8(%rsp)
-# CFI_REL_OFFSET r12,R12
movq %r13,2*8(%rsp)
-# CFI_REL_OFFSET r13,R13
movq %r14,1*8(%rsp)
-# CFI_REL_OFFSET r14,R14
movq %r15,(%rsp)
-# CFI_REL_OFFSET r15,R15
-#if 0
- cmpl $__KERNEL_CS,CS(%rsp)
- je error_kernelspace
-#endif
+
error_call_handler:
movq %rdi, RDI(%rsp)
movq %rsp,%rdi
jmp error_exit
.macro zeroentry sym
-# INTR_FRAME
movq (%rsp),%rcx
movq 8(%rsp),%r11
addq $0x10,%rsp /* skip rcx and r11 */
pushq $0 /* push error code/oldrax */
-# CFI_ADJUST_CFA_OFFSET 8
pushq %rax /* push real oldrax to the rdi slot */
-# CFI_ADJUST_CFA_OFFSET 8
leaq \sym(%rip),%rax
jmp error_entry
-# CFI_ENDPROC
.endm
.macro errorentry sym
-# XCPT_FRAME
movq (%rsp),%rcx
movq 8(%rsp),%r11
addq $0x10,%rsp /* rsp points to the error code */
pushq %rax
-# CFI_ADJUST_CFA_OFFSET 8
leaq \sym(%rip),%rax
jmp error_entry
-# CFI_ENDPROC
.endm
#define XEN_GET_VCPU_INFO(reg) movq HYPERVISOR_shared_info,reg
decl %gs:0
jmp error_exit
-# ALIGN
restore_all_enable_events:
XEN_UNBLOCK_EVENTS(%rsi) # %rsi is already set up...
XEN_TEST_PENDING(%rsi)
jnz 14f # process more events if necessary...
XEN_PUT_VCPU_INFO(%rsi)
- RESTORE_ARGS 0,8,0
+ RESTORE_ALL
HYPERVISOR_IRET 0
14: XEN_LOCKED_BLOCK_EVENTS(%rsi)
XEN_PUT_VCPU_INFO(%rsi)
- SAVE_REST
+ subq $6*8,%rsp
+ movq %rbx,5*8(%rsp)
+ movq %rbp,4*8(%rsp)
+ movq %r12,3*8(%rsp)
+ movq %r13,2*8(%rsp)
+ movq %r14,1*8(%rsp)
+ movq %r15,(%rsp)
movq %rsp,%rdi # set the argument again
jmp 11b
ecrit: /**** END OF CRITICAL REGION ****/
retint_kernel:
retint_restore_args:
- movl EFLAGS-REST_SKIP(%rsp), %eax
+ movl EFLAGS-6*8(%rsp), %eax
shr $9, %eax # EAX[0] == IRET_EFLAGS.IF
XEN_GET_VCPU_INFO(%rsi)
andb evtchn_upcall_mask(%rsi),%al
jnz restore_all_enable_events # != 0 => enable event delivery
XEN_PUT_VCPU_INFO(%rsi)
- RESTORE_ARGS 0,8,0
+ RESTORE_ALL
HYPERVISOR_IRET 0
error_exit:
- RESTORE_REST
-/* cli */
+ movq (%rsp),%r15
+ movq 1*8(%rsp),%r14
+ movq 2*8(%rsp),%r13
+ movq 3*8(%rsp),%r12
+ movq 4*8(%rsp),%rbp
+ movq 5*8(%rsp),%rbx
+ addq $6*8,%rsp
XEN_BLOCK_EVENTS(%rsi)
jmp retint_kernel
ENTRY(debug)
-# INTR_FRAME
-# CFI_ADJUST_CFA_OFFSET 8 */
zeroentry do_debug
-# CFI_ENDPROC
ENTRY(int3)
-# INTR_FRAME
-# CFI_ADJUST_CFA_OFFSET 8 */
zeroentry do_int3
-# CFI_ENDPROC
ENTRY(overflow)
zeroentry do_overflow
/* runs on exception stack */
ENTRY(stack_segment)
-# XCPT_FRAME
errorentry do_stack_segment
-# CFI_ENDPROC
ENTRY(general_protection)
xenbus_wait_for_value(path, "5", &dev->events);
err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 6);
+ xenbus_wait_for_value(path, "6", &dev->events);
+
+ err = xenbus_printf(XBT_NIL, nodename, "state", "%u", 1);
+ xenbus_wait_for_value(path, "2", &dev->events);
xenbus_unwatch_path(XBT_NIL, path);
req->nr_segments = n;
req->handle = dev->handle;
req->id = (uintptr_t) aiocbp;
- req->sector_number = aiocbp->aio_offset / dev->info.sector_size;
+ req->sector_number = aiocbp->aio_offset / 512;
for (j = 0; j < n; j++) {
req->seg[j].first_sect = 0;
- req->seg[j].last_sect = PAGE_SIZE / dev->info.sector_size - 1;
+ req->seg[j].last_sect = PAGE_SIZE / 512 - 1;
}
- req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / dev->info.sector_size;
- req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / dev->info.sector_size;
+ req->seg[0].first_sect = ((uintptr_t)aiocbp->aio_buf & ~PAGE_MASK) / 512;
+ req->seg[n-1].last_sect = (((uintptr_t)aiocbp->aio_buf + aiocbp->aio_nbytes - 1) & ~PAGE_MASK) / 512;
for (j = 0; j < n; j++) {
uintptr_t data = start + j * PAGE_SIZE;
if (!write) {
snprintf(path, sizeof(path), "%s/feature-update", nodename);
xenbus_rm(XBT_NIL, path);
- unbind_evtchn(dev->evtchn);
-
free_fbfront(dev);
}
#define INT_MIN (-INT_MAX-1)
#define UINT_MAX 0xffffffff
+#define SHRT_MIN (-0x8000)
#define SHRT_MAX 0x7fff
+#define USHRT_MAX 0xffff
#if defined(__x86_64__) || defined(__ia64__)
# define LONG_MAX 0x7fffffffffffffffL
[ -x "$(which udevinfo)" ] && \
UDEV_VERSION=$(udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/')
+[ -z "$UDEV_VERSION" -a -x /sbin/udevadm ] && \
+ UDEV_VERSION=$(/sbin/udevadm info -V | awk '{print $NF}')
+
if [ -n "$UDEV_VERSION" ] && [ $UDEV_VERSION -ge 059 ]; then
echo " - installing for udev-based system"
rm -rf "$tmp/etc/hotplug"
kill %1
(
[ -n "$vncpid" ] && kill -9 $vncpid
+ rm /etc/xen/stubdoms/$domname-dm
xm destroy $domname-dm
- rm /etc/xen/stubdoms/$domname-dm
) &
# We need to exit immediately so as to let xend do the commands above
exit 0
vbd_dev=`xenstore-read $i/dev`
vbd_front=`xenstore-read $i/frontend`
vbd_devtype=`xenstore-read $vbd_front/device-type`
- if [ $vbd_type = "file" ]
- then
- vbd_type="tap:aio"
- fi
if [ $j -ne 0 ]
then
echo -n "," >> /etc/xen/stubdoms/$domname-dm
DPRINTF("Received a poll for a new vbd\n");
if ( ((blk=blkif->info) != NULL) && (blk->params != NULL) ) {
+ if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0)
+ return -1;
+
if (test_path(blk->params, &ptr, &type, &exist, &use_ioemu) != 0) {
DPRINTF("Error in blktap device string(%s).\n",
blk->params);
blkif->fds[WRITE] = exist->fds[WRITE];
}
+ add_disktype(blkif, type);
+ blkif->major = major;
+ blkif->minor = minor;
+
image = (image_t *)malloc(sizeof(image_t));
blkif->prv = (void *)image;
blkif->ops = &tapdisk_ops;
goto fail;
}
- if (blktap_interface_create(ctlfd, &major, &minor, blkif) < 0)
- return -1;
-
- blkif->major = major;
- blkif->minor = minor;
-
- add_disktype(blkif, type);
-
} else return -1;
return 0;
fail:
+ ioctl(ctlfd, BLKTAP_IOCTL_FREEINTF, minor);
return -EINVAL;
}
#define ROUNDUP(l, s) \
({ \
(uint64_t)( \
- (l + (s - 1)) - ((l + (s - 1)) % s)); \
+ ((l) + ((s) - 1)) - (((l) + ((s) - 1)) % (s))); \
})
#undef IOCB_IDX
/* read the level 1 table */
shift = s->cluster_bits + s->l2_bits;
- s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
+ s->l1_size = ROUNDUP(header->size, 1LL << shift);
s->l1_table_offset = header->l1_table_offset;
/*allocate a 4Kbyte multiple of memory*/
l1_table_size = s->l1_size * sizeof(uint64_t);
if (l1_table_size % 4096 > 0) {
- l1_table_size = ((l1_table_size >> 12) + 1) << 12;
+ l1_table_size = ROUNDUP(l1_table_size, 4096);
}
ret = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
if (ret != 0) goto fail;
lseek(fd, 0, SEEK_SET);
l1_table_block = l1_table_size + s->l1_table_offset;
- l1_table_block = l1_table_block + 512 - (l1_table_block % 512);
+ l1_table_block = ROUNDUP(l1_table_block, 512);
ret = posix_memalign((void **)&buf2, 4096, l1_table_block);
if (ret != 0) goto fail;
- if (read(fd, buf2, l1_table_block) != l1_table_block)
+ if (read(fd, buf2, l1_table_block) < l1_table_size + s->l1_table_offset)
goto fail;
memcpy(s->l1_table, buf2 + s->l1_table_offset, l1_table_size);
memcpy(buf2 + s->l1_table_offset, s->l1_table, l1_table_size);
lseek(fd, 0, SEEK_SET);
- if (write(fd, buf2, l1_table_block) != l1_table_block) {
+ if (write(fd, buf2, l1_table_block) <
+ l1_table_size + s->l1_table_offset) {
DPRINTF("qcow: Failed to write new L1 table\n");
goto fail;
}
header_size = (header_size + 7) & ~7;
if (header_size % 4096 > 0) {
- header_size = ((header_size >> 12) + 1) << 12;
+ header_size = ROUNDUP(header_size, 4096);
}
shift = header.cluster_bits + header.l2_bits;
- l1_size = ((size * 512) + (1LL << shift) - 1) >> shift;
+ l1_size = ROUNDUP(size * 512, 1LL << shift);
header.l1_table_offset = cpu_to_be64(header_size);
DPRINTF("L1 Table offset: %d, size %d\n",
has_or_fail vnconfig
;;
Linux)
- has_or_fail udevinfo
- [ "`udevinfo -V | awk '{print $NF}'`" -ge 59 ] 2>/dev/null || \
+ has /sbin/udevadm && \
+ udevver=`/sbin/udevadm info -V | awk '{print $NF}'`
+ [ -z "$udevver" ] && has_or_fail udevinfo && \
+ udevver=`udevinfo -V | awk '{print $NF}'`
+ [ "$udevver" -ge 59 ] 2>/dev/null || \
has hotplug || \
fail "udev is too old, upgrade to version 59 or later"
;;
has_header X11/keysymdef.h || \
has_header /usr/X11R6/include/X11/keysymdef.h || \
-fail "can't find X11 headers"
+has_header /usr/X11R7/include/X11/keysymdef.h || \
+warning "can't find X11 headers"
fi
}
+warning() {
+ echo
+ echo " *** `basename "$0"` FAILED${*+: $*}"
+}
+
fail() {
echo
echo " *** `basename "$0"` FAILED${*+: $*}"
const char *last_byte = data + sz - 1;
while (data <= last_byte) {
- const char *nl = memchr(data, '\n', sz);
+ const char *nl = memchr(data, '\n', last_byte + 1 - data);
int found_nl = (nl != NULL);
if (!found_nl)
nl = last_byte;
dom->buffer.capacity = 0;
dom->buffer.max_capacity = 0;
dom->event_count = 0;
- dom->next_period = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000) + RATE_LIMIT_PERIOD;
+ dom->next_period = ((long long)ts.tv_sec * 1000) + (ts.tv_nsec / 1000000) + RATE_LIMIT_PERIOD;
dom->next = NULL;
dom->ring_ref = -1;
if (clock_gettime(CLOCK_MONOTONIC, &ts) < 0)
return;
- now = (ts.tv_sec * 1000) + (ts.tv_nsec / 1000000);
+ now = ((long long)ts.tv_sec * 1000) + (ts.tv_nsec / 1000000);
/* Re-calculate any event counter allowances & unblock
domains with new allowance */
{
uint8_t devfn;
uint16_t class, vendor_id, device_id;
+ int rom_size = 0;
- for ( devfn = 0; devfn < 128; devfn++ )
+ for ( devfn = 0; (devfn < 128) && !rom_size; devfn++ )
{
class = pci_readw(devfn, PCI_CLASS_DEVICE);
vendor_id = pci_readw(devfn, PCI_VENDOR_ID);
if ( (vendor_id != 0xffff) &&
(device_id != 0xffff) &&
(class == 0x0200) )
- return scan_option_rom(
+ rom_size = scan_option_rom(
devfn, vendor_id, device_id, etherboot, copy_rom_dest);
}
- return 0;
+ return rom_size;
}
/*
/* Jump to the boot vector */
ASM_START
mov bp, sp
-// push cs
-// push #int18_handler
+ push cs
+ push #int18_handler
;; Build an iret stack frame that will take us to the boot vector.
;; iret pops ip, then cs, then flags, so push them in the opposite order.
pushf
ret
rom_checksum:
- push ax
- push bx
- push cx
+ pusha
+ push ds
+
xor ax, ax
xor bx, bx
xor cx, cx
+ xor dx, dx
+
mov ch, [2]
shl cx, #1
+
+ jnc checksum_loop
+ jz checksum_loop
+ xchg dx, cx
+ dec cx
+
checksum_loop:
add al, [bx]
inc bx
loop checksum_loop
+
+ test dx, dx
+ je checksum_out
+
+ add al, [bx]
+ mov cx, dx
+ mov dx, ds
+ add dh, #0x10
+ mov ds, dx
+ xor dx, dx
+ xor bx, bx
+
+ jmp checksum_loop
+
+checksum_out:
and al, #0xff
- pop cx
- pop bx
- pop ax
+ pop ds
+ popa
ret
xc_evtchn_notify(mount->evth, mount->local_evtchn);
}
-static void terminate_mount_request(struct fs_mount *mount) {
+void terminate_mount_request(struct fs_mount *mount)
+{
int count = 0, i;
FS_DEBUG("terminate_mount_request %s\n", mount->frontend);
}
mount->nr_entries = count;
- while (!xenbus_frontend_state_changed(mount, STATE_CLOSING));
+ /* wait for the frontend to shut down but don't wait more than 3
+ * seconds */
+ i = 0;
+ while (!xenbus_frontend_state_changed(mount, STATE_CLOSING) && i < 3) {
+ sleep(1);
+ i++;
+ }
xenbus_write_backend_state(mount, STATE_CLOSED);
xc_gnttab_munmap(mount->gnth, mount->ring.sring, mount->shared_ring_size);
{
struct fs_mount *mount;
struct fs_export *export;
- struct fsif_sring *sring;
+ struct fsif_sring *sring = NULL;
uint32_t dom_ids[MAX_RING_SIZE];
int i;
}
mount = (struct fs_mount*)malloc(sizeof(struct fs_mount));
+ memset(mount, 0, sizeof(struct fs_mount));
mount->dom_id = frontend_dom_id;
mount->export = export;
mount->mount_id = mount_id++;
- xenbus_read_mount_request(mount, frontend);
+ if (xenbus_read_mount_request(mount, frontend) < 0)
+ goto error;
FS_DEBUG("Frontend found at: %s (gref=%d, evtchn=%d)\n",
mount->frontend, mount->grefs[0], mount->remote_evtchn);
- xenbus_write_backend_node(mount);
+ if (!xenbus_write_backend_node(mount)) {
+ FS_DEBUG("ERROR: failed to write backend node on xenbus\n");
+ goto error;
+ }
mount->evth = -1;
mount->evth = xc_evtchn_open();
- assert(mount->evth != -1);
+ if (mount->evth < 0) {
+ FS_DEBUG("ERROR: Couldn't open evtchn!\n");
+ goto error;
+ }
mount->local_evtchn = -1;
mount->local_evtchn = xc_evtchn_bind_interdomain(mount->evth,
mount->dom_id,
mount->remote_evtchn);
- assert(mount->local_evtchn != -1);
+ if (mount->local_evtchn < 0) {
+ FS_DEBUG("ERROR: Couldn't bind evtchn!\n");
+ goto error;
+ }
mount->gnth = -1;
mount->gnth = xc_gnttab_open();
- assert(mount->gnth != -1);
+ if (mount->gnth < 0) {
+ FS_DEBUG("ERROR: Couldn't open gnttab!\n");
+ goto error;
+ }
for(i=0; i<mount->shared_ring_size; i++)
dom_ids[i] = mount->dom_id;
sring = xc_gnttab_map_grant_refs(mount->gnth,
mount->grefs,
PROT_READ | PROT_WRITE);
+ if (!sring) {
+ FS_DEBUG("ERROR: Couldn't amp grant refs!\n");
+ goto error;
+ }
+
BACK_RING_INIT(&mount->ring, sring, mount->shared_ring_size * XC_PAGE_SIZE);
mount->nr_entries = mount->ring.nr_ents;
for (i = 0; i < MAX_FDS; i++)
mount->fds[i] = -1;
LIST_INSERT_HEAD(&mount_requests_head, mount, entries);
- xenbus_watch_frontend_state(mount);
- xenbus_write_backend_state(mount, STATE_READY);
-
+ if (!xenbus_watch_frontend_state(mount)) {
+ FS_DEBUG("ERROR: failed to watch frontend state on xenbus\n");
+ goto error;
+ }
+ if (!xenbus_write_backend_state(mount, STATE_READY)) {
+ FS_DEBUG("ERROR: failed to write backend state to xenbus\n");
+ goto error;
+ }
+
allocate_request_array(mount);
+
+ return;
+
+error:
+ xenbus_write_backend_state(mount, STATE_CLOSED);
+ if (sring)
+ xc_gnttab_munmap(mount->gnth, mount->ring.sring, mount->shared_ring_size);
+ if (mount->gnth > 0)
+ xc_gnttab_close(mount->gnth);
+ if (mount->local_evtchn > 0)
+ xc_evtchn_unbind(mount->evth, mount->local_evtchn);
+ if (mount->evth > 0)
+ xc_evtchn_close(mount->evth);
}
static void await_connections(void)
xenbus_create_request_node();
/* Create & register the default export */
- export = create_export("default", "/exports");
+ export = create_export("default", "/var/lib/xen");
xenbus_register_export(export);
if (socketpair(PF_UNIX,SOCK_STREAM, 0, pipefds) == -1)
LIST_ENTRY(fs_mount) entries;
};
+void terminate_mount_request(struct fs_mount *mount);
/* Handle to XenStore driver */
extern struct xs_handle *xsh;
bool xenbus_create_request_node(void);
int xenbus_register_export(struct fs_export *export);
int xenbus_get_watch_fd(void);
-void xenbus_read_mount_request(struct fs_mount *mount, char *frontend);
-void xenbus_write_backend_node(struct fs_mount *mount);
-void xenbus_write_backend_state(struct fs_mount *mount, const char *state);
+int xenbus_read_mount_request(struct fs_mount *mount, char *frontend);
+bool xenbus_write_backend_node(struct fs_mount *mount);
+bool xenbus_write_backend_state(struct fs_mount *mount, const char *state);
int xenbus_frontend_state_changed(struct fs_mount *mount, const char *oldstate);
-void xenbus_watch_frontend_state(struct fs_mount *mount);
-void xenbus_unwatch_frontend_state(struct fs_mount *mount);
+bool xenbus_watch_frontend_state(struct fs_mount *mount);
+bool xenbus_unwatch_frontend_state(struct fs_mount *mount);
char* xenbus_read_frontend_state(struct fs_mount *mount);
/* File operations, implemented in fs-ops.c */
#include <sys/statvfs.h>
#include <sys/mount.h>
#include <unistd.h>
+#include <ctype.h>
#include "fs-backend.h"
#include "fs-debug.h"
#define BUFFER_SIZE 1024
+static int check_export_path(const char *export_path, const char *path)
+{
+ int i;
+ if (!export_path || !path)
+ return -1;
+ if (strlen(path) < strlen(export_path))
+ return -1;
+ if (strstr(path, "..") != NULL)
+ return -1;
+ for (i = 0; i < strlen(path); i++) {
+ if (!isascii(path[i]))
+ return -1;
+ }
+ if (strncmp(export_path, path, strlen(export_path)))
+ return -1;
+ else
+ return 0;
+}
+
static unsigned short get_request(struct fs_mount *mount, struct fsif_request *req)
{
unsigned short id = get_id_from_freelist(mount->freelist);
static void dispatch_file_open(struct fs_mount *mount, struct fsif_request *req)
{
- char *file_name, full_path[BUFFER_SIZE];
+ char *file_name;
int fd;
RING_IDX rsp_idx;
fsif_response_t *rsp;
req_id = req->id;
FS_DEBUG("File open issued for %s\n", file_name);
- assert(BUFFER_SIZE >
- strlen(file_name) + strlen(mount->export->export_path) + 1);
- snprintf(full_path, sizeof(full_path), "%s/%s",
- mount->export->export_path, file_name);
- assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
- FS_DEBUG("Issuing open for %s\n", full_path);
+ if (check_export_path(mount->export->export_path, file_name) < 0) {
+ FS_DEBUG("Filename check failed\n");
+ fd = -1;
+ goto out;
+ }
fd = get_fd(mount);
if (fd >= 0) {
- int real_fd = open(full_path, O_RDWR);
+ int real_fd = open(file_name, O_RDWR);
if (real_fd < 0)
fd = -1;
else
FS_DEBUG("Got FD: %d for real %d\n", fd, real_fd);
}
}
+out:
+ if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
+ FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+ terminate_mount_request(mount);
+ }
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
- assert(aio_read(&priv_req->aiocb) >= 0);
+ if (aio_read(&priv_req->aiocb) < 0) {
+ FS_DEBUG("ERROR: aio_read failed errno=%d\n", errno);
+ xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count);
+ terminate_mount_request(mount);
+ }
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
uint16_t req_id;
/* Release the grant */
- assert(xc_gnttab_munmap(mount->gnth,
- priv_req->page,
- priv_req->count) == 0);
+ if (xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count) != 0) {
+ FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+ terminate_mount_request(mount);
+ }
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
- assert(aio_write(&priv_req->aiocb) >= 0);
+ if (aio_write(&priv_req->aiocb) < 0) {
+ FS_DEBUG("ERROR: aio_write failed errno=%d\n", errno);
+ xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count);
+ terminate_mount_request(mount);
+ }
/* We can advance the request consumer index, from here on, the request
uint16_t req_id;
/* Release the grant */
- assert(xc_gnttab_munmap(mount->gnth,
- priv_req->page,
- priv_req->count) == 0);
+ if (xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count) != 0) {
+ FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+ terminate_mount_request(mount);
+ }
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
static void dispatch_remove(struct fs_mount *mount, struct fsif_request *req)
{
- char *file_name, full_path[BUFFER_SIZE];
+ char *file_name;
int ret;
RING_IDX rsp_idx;
fsif_response_t *rsp;
req_id = req->id;
FS_DEBUG("File remove issued for %s\n", file_name);
- assert(BUFFER_SIZE >
- strlen(file_name) + strlen(mount->export->export_path) + 1);
- snprintf(full_path, sizeof(full_path), "%s/%s",
- mount->export->export_path, file_name);
- assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
- FS_DEBUG("Issuing remove for %s\n", full_path);
- ret = remove(full_path);
+ if (check_export_path(mount->export->export_path, file_name) < 0) {
+ FS_DEBUG("Filename check failed\n");
+ ret = -1;
+ } else {
+ ret = remove(file_name);
+ }
FS_DEBUG("Got ret: %d\n", ret);
+ if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
+ FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+ terminate_mount_request(mount);
+ }
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
static void dispatch_rename(struct fs_mount *mount, struct fsif_request *req)
{
char *buf, *old_file_name, *new_file_name;
- char old_full_path[BUFFER_SIZE], new_full_path[BUFFER_SIZE];
int ret;
RING_IDX rsp_idx;
fsif_response_t *rsp;
new_file_name = buf + req->u.frename.new_name_offset;
FS_DEBUG("File rename issued for %s -> %s (buf=%s)\n",
old_file_name, new_file_name, buf);
- assert(BUFFER_SIZE >
- strlen(old_file_name) + strlen(mount->export->export_path) + 1);
- assert(BUFFER_SIZE >
- strlen(new_file_name) + strlen(mount->export->export_path) + 1);
- snprintf(old_full_path, sizeof(old_full_path), "%s/%s",
- mount->export->export_path, old_file_name);
- snprintf(new_full_path, sizeof(new_full_path), "%s/%s",
- mount->export->export_path, new_file_name);
- assert(xc_gnttab_munmap(mount->gnth, buf, 1) == 0);
- FS_DEBUG("Issuing rename for %s -> %s\n", old_full_path, new_full_path);
- ret = rename(old_full_path, new_full_path);
+ if (check_export_path(mount->export->export_path, old_file_name) < 0 ||
+ check_export_path(mount->export->export_path, new_file_name) < 0) {
+ FS_DEBUG("Filename check failed\n");
+ ret = -1;
+ } else {
+ ret = rename(old_file_name, new_file_name);
+ }
FS_DEBUG("Got ret: %d\n", ret);
+ if (xc_gnttab_munmap(mount->gnth, buf, 1) != 0) {
+ FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+ terminate_mount_request(mount);
+ }
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
static void dispatch_create(struct fs_mount *mount, struct fsif_request *req)
{
- char *file_name, full_path[BUFFER_SIZE];
+ char *file_name;
int ret;
int8_t directory;
int32_t mode;
PROT_READ);
req_id = req->id;
- FS_DEBUG("File create issued for %s\n", file_name);
- assert(BUFFER_SIZE >
- strlen(file_name) + strlen(mount->export->export_path) + 1);
- snprintf(full_path, sizeof(full_path), "%s/%s",
- mount->export->export_path, file_name);
- assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
+ if (check_export_path(mount->export->export_path, file_name) < 0) {
+ FS_DEBUG("Filename check failed\n");
+ ret = -1;
+ goto out;
+ }
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
if(directory)
{
- FS_DEBUG("Issuing create for directory: %s\n", full_path);
- ret = mkdir(full_path, mode);
+ FS_DEBUG("Issuing create for directory: %s\n", file_name);
+ ret = mkdir(file_name, mode);
}
else
{
- FS_DEBUG("Issuing create for file: %s\n", full_path);
+ FS_DEBUG("Issuing create for file: %s\n", file_name);
ret = get_fd(mount);
if (ret >= 0) {
- int real_fd = creat(full_path, mode);
+ int real_fd = creat(file_name, mode);
if (real_fd < 0)
ret = -1;
else
}
}
}
+out:
+ if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
+ FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+ terminate_mount_request(mount);
+ }
FS_DEBUG("Got ret %d (errno=%d)\n", ret, errno);
/* Get a response from the ring */
static void dispatch_list(struct fs_mount *mount, struct fsif_request *req)
{
- char *file_name, *buf, full_path[BUFFER_SIZE];
- uint32_t offset, nr_files, error_code;
+ char *file_name, *buf;
+ uint32_t offset = 0, nr_files = 0, error_code = 0;
uint64_t ret_val;
RING_IDX rsp_idx;
fsif_response_t *rsp;
req_id = req->id;
FS_DEBUG("Dir list issued for %s\n", file_name);
- assert(BUFFER_SIZE >
- strlen(file_name) + strlen(mount->export->export_path) + 1);
- snprintf(full_path, sizeof(full_path), "%s/%s",
- mount->export->export_path, file_name);
+ if (check_export_path(mount->export->export_path, file_name) < 0) {
+ FS_DEBUG("Filename check failed\n");
+ error_code = 1;
+ goto error_out;
+ }
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
ret_val = 0;
nr_files = 0;
- dir = opendir(full_path);
+ dir = opendir(file_name);
if(dir == NULL)
{
error_code = errno;
/* If there was any error with reading the directory, errno will be set */
error_code = errno;
/* Copy file names of the remaining non-NULL dirents into buf */
- assert(NAME_MAX < XC_PAGE_SIZE >> 1);
+ if (NAME_MAX >= XC_PAGE_SIZE >> 1)
+ goto error_out;
while(dirent != NULL &&
(XC_PAGE_SIZE - ((unsigned long)buf & XC_PAGE_MASK) > NAME_MAX))
{
ret_val = ((nr_files << NR_FILES_SHIFT) & NR_FILES_MASK) |
((error_code << ERROR_SHIFT) & ERROR_MASK) |
(dirent != NULL ? HAS_MORE_FLAG : 0);
- assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
-
+ if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
+ FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+ terminate_mount_request(mount);
+ }
+
/* Get a response from the ring */
rsp_idx = mount->ring.rsp_prod_pvt++;
FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
static void dispatch_fs_space(struct fs_mount *mount, struct fsif_request *req)
{
- char *file_name, full_path[BUFFER_SIZE];
+ char *file_name;
RING_IDX rsp_idx;
fsif_response_t *rsp;
uint16_t req_id;
req_id = req->id;
FS_DEBUG("Fs space issued for %s\n", file_name);
- assert(BUFFER_SIZE >
- strlen(file_name) + strlen(mount->export->export_path) + 1);
- snprintf(full_path, sizeof(full_path), "%s/%s",
- mount->export->export_path, file_name);
- assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
- FS_DEBUG("Issuing fs space for %s\n", full_path);
- ret = statvfs(full_path, &stat);
+ if (check_export_path(mount->export->export_path, file_name) < 0) {
+ FS_DEBUG("Filename check failed\n");
+ ret = -1;
+ } else {
+ ret = statvfs(file_name, &stat);
+ }
if(ret >= 0)
ret = stat.f_bsize * stat.f_bfree;
+ if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
+ FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
+ terminate_mount_request(mount);
+ }
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
- assert(aio_fsync(O_SYNC, &priv_req->aiocb) >= 0);
+ if (aio_fsync(O_SYNC, &priv_req->aiocb) < 0) {
+ FS_DEBUG("ERROR: aio_fsync failed errno=%d\n", errno);
+ terminate_mount_request(mount);
+ }
-
/* We can advance the request consumer index, from here on, the request
* should not be used (it may be overrinden by a response) */
mount->ring.req_cons++;
int res;
assert(xsh != NULL);
res = xs_watch(xsh, WATCH_NODE, "conn-watch");
- assert(res);
+ if (!res) {
+ FS_DEBUG("ERROR: xs_watch %s failed ret=%d errno=%d\n",
+ WATCH_NODE, res, errno);
+ return -1;
+ }
return xs_fileno(xsh);
}
-void xenbus_read_mount_request(struct fs_mount *mount, char *frontend)
+int xenbus_read_mount_request(struct fs_mount *mount, char *frontend)
{
char node[1024];
char *s;
mount->frontend = frontend;
snprintf(node, sizeof(node), "%s/state", frontend);
s = xs_read(xsh, XBT_NULL, node, NULL);
- assert(strcmp(s, STATE_READY) == 0);
+ if (strcmp(s, STATE_READY) != 0) {
+ FS_DEBUG("ERROR: frontend not read\n");
+ goto error;
+ }
free(s);
snprintf(node, sizeof(node), "%s/ring-size", frontend);
s = xs_read(xsh, XBT_NULL, node, NULL);
mount->shared_ring_size = atoi(s);
- assert(mount->shared_ring_size <= MAX_RING_SIZE);
+ if (mount->shared_ring_size > MAX_RING_SIZE) {
+ FS_DEBUG("ERROR: shared_ring_size (%d) > MAX_RING_SIZE\n", mount->shared_ring_size);
+ goto error;
+ }
free(s);
for(i=0; i<mount->shared_ring_size; i++)
{
s = xs_read(xsh, XBT_NULL, node, NULL);
mount->remote_evtchn = atoi(s);
free(s);
+ return 0;
+
+error:
+ free(s);
+ return -1;
}
/* Small utility function to figure out our domain id */
}
-void xenbus_write_backend_node(struct fs_mount *mount)
+bool xenbus_write_backend_node(struct fs_mount *mount)
{
char node[1024], backend_node[1024];
int self_id;
xs_write(xsh, XBT_NULL, node, backend_node, strlen(backend_node));
snprintf(node, sizeof(node), ROOT_NODE"/%d/state", mount->mount_id);
- xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, strlen(STATE_INITIALISED));
+ return xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, strlen(STATE_INITIALISED));
}
-void xenbus_write_backend_state(struct fs_mount *mount, const char *state)
+bool xenbus_write_backend_state(struct fs_mount *mount, const char *state)
{
char node[1024];
int self_id;
assert(xsh != NULL);
self_id = get_self_id();
snprintf(node, sizeof(node), ROOT_NODE"/%d/state", mount->mount_id);
- xs_write(xsh, XBT_NULL, node, state, strlen(state));
+ return xs_write(xsh, XBT_NULL, node, state, strlen(state));
}
-void xenbus_watch_frontend_state(struct fs_mount *mount)
+bool xenbus_watch_frontend_state(struct fs_mount *mount)
{
- int res;
char statepath[1024];
assert(xsh != NULL);
snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend);
- res = xs_watch(xsh, statepath, "frontend-state");
- assert(res);
+ return xs_watch(xsh, statepath, "frontend-state");
}
-void xenbus_unwatch_frontend_state(struct fs_mount *mount)
+bool xenbus_unwatch_frontend_state(struct fs_mount *mount)
{
- int res;
char statepath[1024];
assert(xsh != NULL);
snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend);
- res = xs_unwatch(xsh, statepath, "frontend-state");
- assert(res);
+ return xs_unwatch(xsh, statepath, "frontend-state");
}
int xenbus_frontend_state_changed(struct fs_mount *mount, const char *oldstate)
XEN_HOTPLUG_DIR = /etc/hotplug
XEN_HOTPLUG_SCRIPTS = xen-backend.agent
+UDEVVER = 0
+ifeq ($(shell [ -x /sbin/udevadm ] && echo 1),1)
+UDEVVER = $(shell /sbin/udevadm info -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/' )
+endif
+ifeq ($(shell [ -x /usr/bin/udevinfo ] && echo 1),1)
+UDEVVER = $(shell /usr/bin/udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/' )
+endif
+
UDEV_RULES_DIR = /etc/udev
UDEV_RULES = xen-backend.rules xend.rules
ifeq ($(findstring $(DI),$(DE)),$(DI))
HOTPLUGS=install-hotplug install-udev
else
-ifeq ($(shell [ -x /usr/bin/udevinfo ] && [ `/usr/bin/udevinfo -V | sed -e 's/^[^0-9]* \([0-9]\{1,\}\)[^0-9]\{0,\}/\1/'` -ge 059 ] && echo 1),1)
+ifeq ($(shell [ $(UDEVVER) -ge 059 ] && echo 1),1)
HOTPLUGS=install-udev
else
HOTPLUGS=install-hotplug
p=$(xenstore_read "$XENBUS_PATH/params")
mode=$(xenstore_read "$XENBUS_PATH/mode")
fi
+ FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id")
+ FRONTEND_UUID=$(xenstore_read_default \
+ "/local/domain/$FRONTEND_ID/vm" 'unknown')
case $t in
phy)
dev=$(expand_dev $p)
- FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id")
- FRONTEND_UUID=$(xenstore_read_default \
- "/local/domain/$FRONTEND_ID/vm" 'unknown')
if [ -L "$dev" ]
then
# allowed.
local othervm=$(xenstore_read_default "/local/domain/$otherdom/vm" \
"$FRONTEND_UUID")
-
- [ "$FRONTEND_UUID" = "$othervm" ]
+ local target=$(xenstore_read_default "/local/domain/$FRONTEND_ID/target" \
+ "-1")
+ [ "$FRONTEND_UUID" = "$othervm" -o "$target" = "$otherdom" ]
}
case "$1" in
start)
+ mkdir -p /var/lock/subsys
touch /var/lock/subsys/xend
xend start
await_daemons_up
return
fi
+ claim_lock "iptables"
+
if [ "$ip" != "" ]
then
local addr
# No IP addresses have been specified, so allow anything.
frob_iptable
fi
+
+ release_lock "iptables"
}
diff -u reference.size tmp.size
rm tmp.size
-x86_32.h: mkheader.py $(ROOT)/arch-x86/xen-x86_32.h $(ROOT)/arch-x86/xen.h $(ROOT)/xen.h
+x86_32.h: mkheader.py structs.py $(ROOT)/arch-x86/xen-x86_32.h $(ROOT)/arch-x86/xen.h $(ROOT)/xen.h
python $< $* $@ $(filter %.h,$^)
-x86_64.h: mkheader.py $(ROOT)/arch-x86/xen-x86_64.h $(ROOT)/arch-x86/xen.h $(ROOT)/xen.h
+x86_64.h: mkheader.py structs.py $(ROOT)/arch-x86/xen-x86_64.h $(ROOT)/arch-x86/xen.h $(ROOT)/xen.h
python $< $* $@ $(filter %.h,$^)
-ia64.h: mkheader.py $(ROOT)/arch-ia64.h $(ROOT)/xen.h
+ia64.h: mkheader.py structs.py $(ROOT)/arch-ia64.h $(ROOT)/xen.h
python $< $* $@ $(filter %.h,$^)
-checker.c: mkchecker.py
+checker.c: mkchecker.py structs.py
python $< $@ $(architectures)
# string, hence the sed black magic. This avoids the expensive
# re-generation of .po files on every single build
refresh-pot: $(I18NSRCFILES)
- xgettext --default-domain=$(POPACAKGE) \
+ xgettext --default-domain=$(POPACKAGE) \
--keyword=N_ \
--keyword=_ \
-o $(POTFILE)-tmp \
#============================================================================
import os
-import sha
import stat
import array
import struct
import shutil
import commands
+
+# sha is deprecated as of python 2.6
+try:
+ from hashlib import sha1
+except ImportError:
+ # but hashlib was only added in python 2.5
+ from sha import new as sha1
+
from xml.dom import minidom, Node
from xen.xend.XendLogging import log
from xen.util import xsconstants, bootloader, mkdir
return None
def hash(self):
- """ Calculate a SAH1 hash of the XML policy """
- return sha.sha(self.toxml())
+ """ Calculate a SHA1 hash of the XML policy """
+ return sha1(self.toxml())
def save(self):
### Save the XML policy into a file ###
return (domain, bus, slot, func)
+def extract_the_exact_pci_names(pci_names):
+ result = []
+
+ if isinstance(pci_names, types.StringTypes):
+ pci_names = pci_names.split()
+ elif isinstance(pci_names, types.ListType):
+ pci_names = re.findall(PCI_DEV_REG_EXPRESS_STR, '%s' % pci_names)
+ else:
+ raise PciDeviceParseError('Invalid argument: %s' % pci_names)
+
+ for pci in pci_names:
+ # The length of DDDD:bb:dd.f is 12.
+ if len(pci) != 12:
+ continue
+ if re.match(PCI_DEV_REG_EXPRESS_STR, pci) is None:
+ continue
+ result = result + [pci]
+ return result
+
def find_sysfs_mnt():
try:
return utils.find_sysfs_mount()
sysfs_mnt = find_sysfs_mnt()
pciback_path = sysfs_mnt + SYSFS_PCIBACK_PATH
pci_names = os.popen('ls ' + pciback_path).read()
- pci_list = re.findall(PCI_DEV_REG_EXPRESS_STR, pci_names)
+ pci_list = extract_the_exact_pci_names(pci_names)
dev_list = []
for pci in pci_list:
(dom, b, d, f) = parse_pci_name(pci)
def __init__(self,msg):
self.message = msg
def __str__(self):
- return 'pci: impproper device assignment spcified: ' + \
+ return 'pci: improper device assignment specified: ' + \
self.message
class PciDevice:
sysfs_mnt = find_sysfs_mnt()
self_path = sysfs_mnt + SYSFS_PCI_DEVS_PATH + '/' + self.name
pci_names = os.popen('ls ' + self_path).read()
- dev_list = re.findall(PCI_DEV_REG_EXPRESS_STR, pci_names)
+ dev_list = extract_the_exact_pci_names(pci_names)
list = [self.name]
for pci_str in dev_list:
return [self.name]
dev_list = dev.find_all_devices_behind_the_bridge(ignore_bridge)
- dev_list = re.findall(PCI_DEV_REG_EXPRESS_STR, '%s' % dev_list)
+ dev_list = extract_the_exact_pci_names(dev_list)
return dev_list
def do_secondary_bus_reset(self, target_bus, devs):
def find_all_the_multi_functions(self):
sysfs_mnt = find_sysfs_mnt()
- pci_names = os.popen('ls ' + sysfs_mnt + SYSFS_PCI_DEVS_PATH).read()
- p = self.name
- p = p[0 : p.rfind('.')] + '.[0-7]'
- funcs = re.findall(p, pci_names)
+ parent = PCI_DEV_FORMAT_STR % self.find_parent()
+ pci_names = os.popen('ls ' + sysfs_mnt + SYSFS_PCI_DEVS_PATH + '/' + \
+ parent + '/').read()
+ funcs = extract_the_exact_pci_names(pci_names)
return funcs
def find_coassigned_devices(self):
for TCP and unix-domain sockets (see tcp.py and unix.py).
"""
-BUFFER_SIZE = 1024
+BUFFER_SIZE = 16384
BACKLOG = 5
import inspect
import os
import Queue
-import sets
import string
import sys
import traceback
import time
import xmlrpclib
+# sets is deprecated as of python 2.6, but set is unavailable in 2.3
+try:
+ set
+except NameError:
+ from sets import Set as set
+
import XendDomain, XendDomainInfo, XendNode, XendDmesg
import XendLogging, XendTaskManager, XendAPIStore
def event_register(session, reg_classes):
if session not in event_registrations:
event_registrations[session] = {
- 'classes' : sets.Set(),
+ 'classes' : set(),
'queue' : Queue.Queue(EVENT_QUEUE_LENGTH),
'next-id' : 1
}
if not reg_classes:
reg_classes = classes
- if hasattr(set, 'union_update'):
- event_registrations[session]['classes'].union_update(reg_classes)
+ sessionclasses = event_registrations[session]['classes']
+ if hasattr(sessionclasses, 'union_update'):
+ sessionclasses.union_update(reg_classes)
else:
- event_registrations[session]['classes'].update(reg_classes)
+ sessionclasses.update(reg_classes)
# co-assignment devices hasn't been assigned, or has been assigned to
# domN.
coassignment_list = pci_device.find_coassigned_devices()
+ pci_device.devs_check_driver(coassignment_list)
assigned_pci_device_str_list = self._get_assigned_pci_devices()
for pci_str in coassignment_list:
(domain, bus, dev, func) = parse_pci_name(pci_str)
pci_str_list = pci_str_list + [pci_str]
pci_dev_list = pci_dev_list + [(domain, bus, slot, func)]
+ if len(pci_str_list) != len(set(pci_str_list)):
+ raise VmError('pci: duplicate devices specified in guest config?')
+
for (domain, bus, slot, func) in pci_dev_list:
try:
dev = PciDevice(domain, bus, slot, func)
log.warn(err_msg % dev.name)
else:
funcs = dev.find_all_the_multi_functions()
+ dev.devs_check_driver(funcs)
for f in funcs:
if not f in pci_str_list:
(f_dom, f_bus, f_slot, f_func) = parse_pci_name(f)
# Remove the element 0 which is a bridge
del devs_str[0]
+ dev.devs_check_driver(devs_str)
for s in devs_str:
if not s in pci_str_list:
(s_dom, s_bus, s_slot, s_func) = parse_pci_name(s)
for i in range(10):
# Catch failure of the create process
time.sleep(1)
- (p, rv) = os.waitpid(cpid, os.WNOHANG)
+ try:
+ (p, rv) = os.waitpid(cpid, os.WNOHANG)
+ except OSError:
+ # Domain has started cleanly and then exiting,
+ # the child process used to do this has detached
+ print("Domain has already finished");
+ break
if os.WIFEXITED(rv):
if os.WEXITSTATUS(rv) != 0:
sys.exit(os.WEXITSTATUS(rv))
#include "xenstat_priv.h"
-#define SYSFS_VBD_PATH "/sys/devices/xen-backend/"
+#define SYSFS_VBD_PATH "/sys/bus/xen-backend/devices"
struct priv_data {
FILE *procnetdev;
#define setup_xen_features xen_setup_features
#endif
+#ifndef atomic_cmpxchg
+#define atomic_cmpxchg(v, old, new) (cmpxchg(&((v)->counter), (old), (new)))
+#endif
+
#endif
# All other places this is stored (eg. compile.h) should be autogenerated.
export XEN_VERSION = 3
export XEN_SUBVERSION = 4
-export XEN_EXTRAVERSION ?= .0-rc4-pre$(XEN_VENDORVERSION)
+export XEN_EXTRAVERSION ?= .1-rc7$(XEN_VENDORVERSION)
export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION)
-include xen-version
spin_unlock_irqrestore(&irq_descp(vec)->lock, flags);
return 0;
}
+
+/* for vtd interrupt remapping. xen/drivers/vtd/intremap.c */
+int iosapic_get_nr_iosapics(void)
+{
+ int index;
+
+ for (index = NR_IOSAPICS - 1; index >= 0; index--) {
+ if (iosapic_lists[index].addr)
+ break;
+ }
+
+ return index + 1;
+}
+
+int iosapic_get_nr_pins(int index)
+{
+ return iosapic_lists[index].num_rte;
+}
#endif /* XEN */
cpufreq_dbs_timer_suspend();
sched_tick_suspend();
- /*
- * sched_tick_suspend may raise TIMER_SOFTIRQ by __stop_timer,
- * which will break the later assumption of no sofirq pending,
- * so add do_softirq
- */
- if ( softirq_pending(smp_processor_id()) )
- do_softirq();
+ /* sched_tick_suspend() can raise TIMER_SOFTIRQ. Process it now. */
+ process_pending_timers();
/*
* Interrupts must be disabled during bus mastering calculations and
}
static unsigned int latency_factor = 2;
+integer_param("idle_latency_factor", latency_factor);
static void set_cx(
struct acpi_processor_power *acpi_power,
static DEFINE_PER_CPU(struct menu_device, menu_devices);
-static s_time_t get_sleep_length_ns(void)
+static unsigned int get_sleep_length_us(void)
{
- return per_cpu(timer_deadline, smp_processor_id()) - NOW();
+ s_time_t us = (per_cpu(timer_deadline, smp_processor_id()) - NOW()) / 1000;
+ /*
+ * while us < 0 or us > (u32)-1, return a large u32,
+ * choose (unsigned int)-2000 to avoid wrapping while added with exit
+ * latency because the latency should not larger than 2ms
+ */
+ return (us >> 32) ? (unsigned int)-2000 : (unsigned int)us;
}
static int menu_select(struct acpi_processor_power *power)
int i;
/* determine the expected residency time */
- data->expected_us = (u32) get_sleep_length_ns() / 1000;
+ data->expected_us = get_sleep_length_us();
/* find the deepest idle state that satisfies our constraints */
for ( i = 2; i < power->count; i++ )
LOAD_GREG(sp)
#if defined(__x86_64__)
+ /* Reload code selector */
+ pushq $(__HYPERVISOR_CS64)
+ leaq 1f(%rip),%rax
+ pushq %rax
+ lretq
+1:
mov REF(saved_cr8), %rax
mov %rax, %cr8
# Add offset for any reference to xen specific symbols
wakeup_32:
+ /* Set up segment registers and initial stack for protected mode */
mov $BOOT_DS, %eax
mov %eax, %ds
mov %eax, %ss
wbinvd
+ /* Enable paging and flush prefetch queue */
mov $0x80050033,%eax /* hi-to-lo: PG,AM,WP,NE,ET,MP,PE */
mov %eax,%cr0
jmp 1f
ljmp $BOOT_CS64, $bootsym_phys(wakeup_64)
.code64
- .align 8
- .word 0,0,0
-lgdt_descr:
- .word LAST_RESERVED_GDT_BYTE
- .quad boot_cpu_gdt_table - FIRST_RESERVED_GDT_BYTE
-
wakeup_64:
- lgdt lgdt_descr(%rip)
- mov $(__HYPERVISOR_DS64), %eax
- mov %eax, %ds
-
- # long jump to return point, with cs reload
- rex64 ljmp *ret_point(%rip)
+ /* Jump to high mappings and the higher-level wakeup code. */
+ movq ret_point(%rip), %rbx
+ jmp *%rbx
- .align 8
ret_point:
.quad __ret_point
- .word __HYPERVISOR_CS64
#else /* !defined(__x86_64__) */
+
lgdt gdt_descr
mov $(__HYPERVISOR_DS), %eax
mov %eax, %ds
ljmp $(__HYPERVISOR_CS), $__ret_point
+
#endif
bogus_saved_magic:
#include "cpu.h"
#include "amd.h"
-int start_svm(struct cpuinfo_x86 *c);
+void start_svm(struct cpuinfo_x86 *c);
/*
* Pre-canned values for overriding the CPUID features
obj-y += irq.o
obj-y += mtrr.o
obj-y += pmtimer.o
+obj-y += quirks.o
obj-y += rtc.o
obj-y += hpet.o
obj-y += vpt.o
#include <xen/event.h>
#include <xen/paging.h>
#include <asm/shadow.h>
+#include <asm/hap.h>
#include <asm/current.h>
#include <asm/e820.h>
#include <asm/io.h>
void hvm_enable(struct hvm_function_table *fns)
{
+ extern int hvm_port80_allowed;
+
BUG_ON(hvm_enabled);
printk("HVM: %s enabled\n", fns->name);
* often used for I/O delays, but the vmexits simply slow things down).
*/
memset(hvm_io_bitmap, ~0, sizeof(hvm_io_bitmap));
- __clear_bit(0x80, hvm_io_bitmap);
+ if ( hvm_port80_allowed )
+ __clear_bit(0x80, hvm_io_bitmap);
__clear_bit(0xed, hvm_io_bitmap);
hvm_funcs = *fns;
vc = &v->arch.guest_context;
/* Need to init this vcpu before loading its contents */
+ rc = 0;
domain_lock(d);
if ( !v->is_initialised )
- if ( (rc = boot_vcpu(d, vcpuid, vc)) != 0 )
- return rc;
+ rc = boot_vcpu(d, vcpuid, vc);
domain_unlock(d);
+ if ( rc != 0 )
+ return rc;
if ( hvm_load_entry(CPU, h, &ctxt) != 0 )
return -EINVAL;
v->fpu_initialised = 1;
/* Auxiliary processors should be woken immediately. */
- if ( test_and_clear_bit(_VPF_down, &v->pause_flags) )
- vcpu_wake(v);
+ v->is_initialised = 1;
+ clear_bit(_VPF_down, &v->pause_flags);
+ vcpu_wake(v);
return 0;
}
vpic_reset(d);
vioapic_reset(d);
pit_reset(d);
- rtc_reset(d);
+ rtc_reset(d);
pmtimer_reset(d);
hpet_reset(d);
goto param_fail2;
rc = -EINVAL;
- if ( !shadow_mode_enabled(d))
- goto param_fail2;
if ( d->vcpu[0] == NULL )
goto param_fail2;
- rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
+ if ( shadow_mode_enabled(d) )
+ rc = shadow_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
+ else
+ rc = hap_track_dirty_vram(d, a.first_pfn, a.nr, a.dirty_bitmap);
param_fail2:
rcu_unlock_domain(d);
#include <asm/hvm/vpt.h>
#include <asm/current.h>
-#define domain_vpit(d) (&(d)->arch.hvm_domain.pl_time.vpit)
-#define vcpu_vpit(vcpu) (domain_vpit((vcpu)->domain))
-#define vpit_domain(pit) (container_of((pit), struct domain, \
- arch.hvm_domain.pl_time.vpit))
-#define vpit_vcpu(pit) (vpit_domain(pit)->vcpu[0])
+#define domain_vpit(x) (&(x)->arch.hvm_domain.pl_time.vpit)
+#define vcpu_vpit(x) (domain_vpit((x)->domain))
+#define vpit_domain(x) (container_of((x), struct domain, \
+ arch.hvm_domain.pl_time.vpit))
+#define vpit_vcpu(x) (vpit_domain(x)->vcpu[0])
#define RW_STATE_LSB 1
#define RW_STATE_MSB 2
--- /dev/null
+/******************************************************************************
+ * x86/hvm/quirks.c
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ */
+
+#include <xen/config.h>
+#include <xen/types.h>
+#include <xen/init.h>
+#include <xen/lib.h>
+#include <xen/dmi.h>
+#include <xen/bitmap.h>
+#include <asm/hvm/support.h>
+
+int hvm_port80_allowed = -1;
+boolean_param("hvm_port80", hvm_port80_allowed);
+
+static int __init dmi_hvm_deny_port80(/*const*/ struct dmi_system_id *id)
+{
+ printk(XENLOG_WARNING "%s: port 0x80 access %s allowed for HVM guests\n",
+ id->ident, hvm_port80_allowed > 0 ? "forcibly" : "not");
+
+ if ( hvm_port80_allowed < 0 )
+ hvm_port80_allowed = 0;
+
+ return 0;
+}
+
+static int __init check_port80(void)
+{
+ /*
+ * Quirk table for systems that misbehave (lock up, etc.) if port
+ * 0x80 is used:
+ */
+ static struct dmi_system_id __initdata hvm_no_port80_dmi_table[] =
+ {
+ {
+ .callback = dmi_hvm_deny_port80,
+ .ident = "Compaq Presario V6000",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B7")
+ }
+ },
+ {
+ .callback = dmi_hvm_deny_port80,
+ .ident = "HP Pavilion dv9000z",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B9")
+ }
+ },
+ {
+ .callback = dmi_hvm_deny_port80,
+ .ident = "HP Pavilion dv6000",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30B8")
+ }
+ },
+ {
+ .callback = dmi_hvm_deny_port80,
+ .ident = "HP Pavilion tx1000",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30BF")
+ }
+ },
+ {
+ .callback = dmi_hvm_deny_port80,
+ .ident = "Presario F700",
+ .matches = {
+ DMI_MATCH(DMI_BOARD_VENDOR, "Quanta"),
+ DMI_MATCH(DMI_BOARD_NAME, "30D3")
+ }
+ },
+ { }
+ };
+
+ dmi_check_system(hvm_no_port80_dmi_table);
+
+ if ( !hvm_port80_allowed )
+ __set_bit(0x80, hvm_io_bitmap);
+
+ return 0;
+}
+__initcall(check_port80);
#include <asm/hvm/support.h>
#include <asm/current.h>
-#define domain_vrtc(d) (&(d)->arch.hvm_domain.pl_time.vrtc)
-#define vcpu_vrtc(vcpu) (domain_vrtc((vcpu)->domain))
-#define vrtc_domain(rtc) (container_of((rtc), struct domain, \
- arch.hvm_domain.pl_time.vrtc))
-#define vrtc_vcpu(rtc) (vrtc_domain(rtc)->vcpu[0])
+#define domain_vrtc(x) (&(x)->arch.hvm_domain.pl_time.vrtc)
+#define vcpu_vrtc(x) (domain_vrtc((x)->domain))
+#define vrtc_domain(x) (container_of((x), struct domain, \
+ arch.hvm_domain.pl_time.vrtc))
+#define vrtc_vcpu(x) (vrtc_domain(x)->vcpu[0])
static void rtc_periodic_cb(struct vcpu *v, void *opaque)
{
u32 next_asid;
u32 max_asid;
u32 erratum170:1;
+ u32 initialised:1;
};
static DEFINE_PER_CPU(struct svm_asid_data, svm_asid_data);
*/
static struct svm_asid_data *svm_asid_core_data(void)
{
- return &get_cpu_var(svm_asid_data);
+ return &this_cpu(svm_asid_data);
}
/*
int nasids;
struct svm_asid_data *data = svm_asid_core_data();
+ /*
+ * If already initialised, we just bump the generation to force a TLB
+ * flush. Resetting the generation could be dangerous, if VCPUs still
+ * exist that reference earlier generations on this CPU.
+ */
+ if ( data->initialised )
+ return svm_asid_inc_generation();
+ data->initialised = 1;
+
/* Find #ASID. */
nasids = cpuid_ebx(0x8000000A);
data->max_asid = nasids - 1;
.invlpg_intercept = svm_invlpg_intercept
};
-int start_svm(struct cpuinfo_x86 *c)
+static int svm_cpu_up(struct cpuinfo_x86 *c)
{
- u32 eax, ecx, edx;
- u32 phys_hsa_lo, phys_hsa_hi;
+ u32 eax, edx, phys_hsa_lo, phys_hsa_hi;
u64 phys_hsa;
int cpu = smp_processor_id();
- /* Xen does not fill x86_capability words except 0. */
- ecx = cpuid_ecx(0x80000001);
- boot_cpu_data.x86_capability[5] = ecx;
-
- if ( !(test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability)) )
- return 0;
-
/* Check whether SVM feature is disabled in BIOS */
rdmsr(MSR_K8_VM_CR, eax, edx);
if ( eax & K8_VMCR_SVME_DISABLE )
{
- printk("AMD SVM Extension is disabled in BIOS.\n");
+ printk("CPU%d: AMD SVM Extension is disabled in BIOS.\n", cpu);
return 0;
}
- if ( ((hsa[cpu] = alloc_host_save_area()) == NULL) ||
- ((root_vmcb[cpu] = alloc_vmcb()) == NULL) )
+ if ( ((hsa[cpu] == NULL) &&
+ ((hsa[cpu] = alloc_host_save_area()) == NULL)) ||
+ ((root_vmcb[cpu] == NULL) &&
+ ((root_vmcb[cpu] = alloc_vmcb()) == NULL)) )
return 0;
write_efer(read_efer() | EFER_SVME);
/* Initialize the HSA for this core. */
- phys_hsa = (u64) virt_to_maddr(hsa[cpu]);
- phys_hsa_lo = (u32) phys_hsa;
- phys_hsa_hi = (u32) (phys_hsa >> 32);
+ phys_hsa = (u64)virt_to_maddr(hsa[cpu]);
+ phys_hsa_lo = (u32)phys_hsa;
+ phys_hsa_hi = (u32)(phys_hsa >> 32);
wrmsr(MSR_K8_VM_HSAVE_PA, phys_hsa_lo, phys_hsa_hi);
/* Initialize core's ASID handling. */
svm_asid_init(c);
- if ( cpu != 0 )
- return 1;
+ return 1;
+}
+
+void start_svm(struct cpuinfo_x86 *c)
+{
+ static bool_t bootstrapped;
+
+ if ( test_and_set_bool(bootstrapped) )
+ {
+ if ( hvm_enabled && !svm_cpu_up(c) )
+ {
+ printk("SVM: FATAL: failed to initialise CPU%d!\n",
+ smp_processor_id());
+ BUG();
+ }
+ return;
+ }
+
+ /* Xen does not fill x86_capability words except 0. */
+ boot_cpu_data.x86_capability[5] = cpuid_ecx(0x80000001);
+
+ if ( !test_bit(X86_FEATURE_SVME, &boot_cpu_data.x86_capability) )
+ return;
+
+ if ( !svm_cpu_up(c) )
+ {
+ printk("SVM: failed to initialise.\n");
+ return;
+ }
setup_vmcb_dump();
svm_function_table.hap_supported = cpu_has_svm_npt;
hvm_enable(&svm_function_table);
-
- return 1;
}
static void svm_do_nested_pgfault(paddr_t gpa, struct cpu_user_regs *regs)
#define VIRIDIAN_MSR_EOI 0x40000070
#define VIRIDIAN_MSR_ICR 0x40000071
#define VIRIDIAN_MSR_TPR 0x40000072
+#define VIRIDIAN_MSR_APIC_ASSIST 0x40000073
/* Viridian Hypercall Status Codes. */
#define HV_STATUS_SUCCESS 0x0000
return 0;
leaf -= 0x40000000;
- if ( leaf > 5 )
+ if ( leaf > 6 )
return 0;
*eax = *ebx = *ecx = *edx = 0;
switch ( leaf )
{
case 0:
- *eax = 0x40000005; /* Maximum leaf */
+ *eax = 0x40000006; /* Maximum leaf */
*ebx = 0x7263694d; /* Magic numbers */
*ecx = 0x666F736F;
*edx = 0x76482074;
vlapic_set_reg(vcpu_vlapic(current), APIC_TASKPRI, eax & 0xff);
break;
+ case VIRIDIAN_MSR_APIC_ASSIST:
+ /*
+ * We don't support the APIC assist page, and that fact is reflected in
+ * our CPUID flags. However, Windows 7 build 7000 has a bug which means
+ * that it doesn't recognise that, and tries to use the page anyway. We
+ * therefore have to fake up just enough to keep win7 happy.
+ * Fortunately, that's really easy: just setting the first four bytes
+ * in the page to zero effectively disables the page again, so that's
+ * what we do. Semantically, the first four bytes are supposed to be a
+ * flag saying whether the guest really needs to issue an EOI. Setting
+ * that flag to zero means that it must always issue one, which is what
+ * we want. Once a page has been repurposed as an APIC assist page the
+ * guest isn't allowed to set anything in it, so the flag remains zero
+ * and all is fine. The guest is allowed to clear flags in the page,
+ * but that doesn't cause us any problems.
+ */
+ if ( val & 1 ) /* APIC assist page enabled? */
+ {
+ uint32_t word = 0;
+ paddr_t page_start = val & ~1ul;
+ hvm_copy_to_guest_phys(page_start, &word, sizeof(word));
+ }
+ break;
+
default:
return 0;
}
if ( delivery_mode == APIC_DM_LOWEST )
{
- target = apic_lowest_prio(vlapic_domain(v), lpr_map);
+ target = apic_lowest_prio(vlapic_domain(vlapic), lpr_map);
if ( target != NULL )
rc = vlapic_accept_irq(vlapic_vcpu(target), delivery_mode,
vector, level, trig_mode);
goto out;
entry = msixtbl_find_entry(v, address);
- nr_entry = (address - entry->gtable) % PCI_MSIX_ENTRY_SIZE;
+ nr_entry = (address - entry->gtable) / PCI_MSIX_ENTRY_SIZE;
offset = address & (PCI_MSIX_ENTRY_SIZE - 1);
if ( offset != PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET)
void start_vmx(void)
{
- static int bootstrapped;
+ static bool_t bootstrapped;
vmx_save_host_msrs();
- if ( bootstrapped )
+ if ( test_and_set_bool(bootstrapped) )
{
if ( hvm_enabled && !vmx_cpu_up() )
{
return;
}
- bootstrapped = 1;
-
/* Xen does not fill x86_capability words except 0. */
boot_cpu_data.x86_capability[4] = cpuid_ecx(1);
case EXIT_REASON_MSR_LOADING:
printk("caused by MSR entry %ld loading.\n", exit_qualification);
break;
- case EXIT_REASON_MACHINE_CHECK:
+ case EXIT_REASON_MCE_DURING_VMENTRY:
printk("caused by machine check.\n");
HVMTRACE_0D(MCE);
do_machine_check(regs);
case EXIT_REASON_SIPI:
case EXIT_REASON_PENDING_VIRT_INTR:
case EXIT_REASON_PENDING_VIRT_NMI:
- case EXIT_REASON_MACHINE_CHECK:
+ case EXIT_REASON_MCE_DURING_VMENTRY:
break;
default:
v->arch.hvm_vmx.vmx_emulate = 1;
spin_lock(&vector_lock);
- if ((irq != AUTO_ASSIGN_IRQ) && (IO_APIC_VECTOR(irq) > 0)) {
+ if ((irq != AUTO_ASSIGN_IRQ) && (irq_to_vector(irq) > 0)) {
spin_unlock(&vector_lock);
- return IO_APIC_VECTOR(irq);
+ return irq_to_vector(irq);
}
vector = current_vector;
void machine_kexec(xen_kexec_image_t *image)
{
+ struct desc_ptr gdt_desc = {
+ .base = (unsigned long)(boot_cpu_gdt_table - FIRST_RESERVED_GDT_ENTRY),
+ .limit = LAST_RESERVED_GDT_BYTE
+ };
+
+ /*
+ * compat_machine_kexec() returns to idle pagetables, which requires us
+ * to be running on a static GDT mapping (idle pagetables have no GDT
+ * mappings in their per-domain mapping area).
+ */
+ asm volatile ( "lgdt %0" : : "m" (gdt_desc) );
+
#ifdef CONFIG_COMPAT
if ( is_pv_32on64_domain(dom0) )
{
return (page_get_owner(page) == dom_io);
}
-
int
get_page_from_l1e(
- l1_pgentry_t l1e, struct domain *d)
+ l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner)
{
unsigned long mfn = l1e_get_pfn(l1e);
struct page_info *page = mfn_to_page(mfn);
uint32_t l1f = l1e_get_flags(l1e);
struct vcpu *curr = current;
- struct domain *owner;
+ struct domain *real_pg_owner;
if ( !(l1f & _PAGE_PRESENT) )
return 1;
- if ( unlikely(l1f & l1_disallow_mask(d)) )
+ if ( unlikely(l1f & l1_disallow_mask(l1e_owner)) )
{
- MEM_LOG("Bad L1 flags %x", l1f & l1_disallow_mask(d));
+ MEM_LOG("Bad L1 flags %x", l1f & l1_disallow_mask(l1e_owner));
return 0;
}
if ( !mfn_valid(mfn) ||
- (owner = page_get_owner_and_reference(page)) == dom_io )
+ (real_pg_owner = page_get_owner_and_reference(page)) == dom_io )
{
/* Only needed the reference to confirm dom_io ownership. */
if ( mfn_valid(mfn) )
put_page(page);
/* DOMID_IO reverts to caller for privilege checks. */
- if ( d == dom_io )
- d = curr->domain;
+ if ( pg_owner == dom_io )
+ pg_owner = curr->domain;
- if ( !iomem_access_permitted(d, mfn, mfn) )
+ if ( !iomem_access_permitted(pg_owner, mfn, mfn) )
{
if ( mfn != (PADDR_MASK >> PAGE_SHIFT) ) /* INVALID_MFN? */
MEM_LOG("Non-privileged (%u) attempt to map I/O space %08lx",
- d->domain_id, mfn);
+ pg_owner->domain_id, mfn);
return 0;
}
return 1;
}
- if ( owner == NULL )
+ if ( real_pg_owner == NULL )
goto could_not_pin;
- /*
- * Let privileged domains transfer the right to map their target
- * domain's pages. This is used to allow stub-domain pvfb export to dom0,
- * until pvfb supports granted mappings. At that time this minor hack
- * can go away.
- */
- if ( unlikely(d != owner) && (d != curr->domain) && IS_PRIV_FOR(d, owner) )
- d = owner;
+ if ( unlikely(real_pg_owner != pg_owner) )
+ {
+ /*
+ * Let privileged domains transfer the right to map their target
+ * domain's pages. This is used to allow stub-domain pvfb export to
+ * dom0, until pvfb supports granted mappings. At that time this
+ * minor hack can go away.
+ */
+ if ( (pg_owner == l1e_owner) || !IS_PRIV_FOR(pg_owner, real_pg_owner) )
+ goto could_not_pin;
+ pg_owner = real_pg_owner;
+ }
/* Foreign mappings into guests in shadow external mode don't
* contribute to writeable mapping refcounts. (This allows the
* qemu-dm helper process in dom0 to map the domain's memory without
* messing up the count of "real" writable mappings.) */
if ( (l1f & _PAGE_RW) &&
- !(paging_mode_external(d) && (d != curr->domain)) &&
+ ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) &&
!get_page_type(page, PGT_writable_page) )
goto could_not_pin;
if ( is_xen_heap_page(page) )
{
if ( (l1f & _PAGE_RW) &&
- !(unlikely(paging_mode_external(d) &&
- (d != curr->domain))) )
+ ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) )
put_page_type(page);
put_page(page);
MEM_LOG("Attempt to change cache attributes of Xen heap page");
could_not_pin:
MEM_LOG("Error getting mfn %lx (pfn %lx) from L1 entry %" PRIpte
- " for dom%d",
+ " for l1e_owner=%d, pg_owner=%d",
mfn, get_gpfn_from_mfn(mfn),
- l1e_get_intpte(l1e), d->domain_id);
- if ( owner != NULL )
+ l1e_get_intpte(l1e), l1e_owner->domain_id, pg_owner->domain_id);
+ if ( real_pg_owner != NULL )
put_page(page);
return 0;
}
#define unadjust_guest_l3e(_p, _d) ((void)(_d))
#endif
-void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d)
+void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner)
{
unsigned long pfn = l1e_get_pfn(l1e);
struct page_info *page;
- struct domain *e;
+ struct domain *pg_owner;
struct vcpu *v;
if ( !(l1e_get_flags(l1e) & _PAGE_PRESENT) || is_iomem_page(pfn) )
return;
page = mfn_to_page(pfn);
-
- e = page_get_owner(page);
+ pg_owner = page_get_owner(page);
/*
* Check if this is a mapping that was established via a grant reference.
* Xen. All active grants can safely be cleaned up when the domain dies.)
*/
if ( (l1e_get_flags(l1e) & _PAGE_GNTTAB) &&
- !d->is_shutting_down && !d->is_dying )
+ !l1e_owner->is_shutting_down && !l1e_owner->is_dying )
{
MEM_LOG("Attempt to implicitly unmap a granted PTE %" PRIpte,
l1e_get_intpte(l1e));
- domain_crash(d);
+ domain_crash(l1e_owner);
}
/* Remember we didn't take a type-count of foreign writable mappings
* to paging-external domains */
if ( (l1e_get_flags(l1e) & _PAGE_RW) &&
- !(unlikely((e != d) && paging_mode_external(e))) )
+ ((l1e_owner == pg_owner) || !paging_mode_external(pg_owner)) )
{
put_page_and_type(page);
}
if ( unlikely(((page->u.inuse.type_info & PGT_type_mask) ==
PGT_seg_desc_page)) &&
unlikely(((page->u.inuse.type_info & PGT_count_mask) != 0)) &&
- (d == e) )
+ (l1e_owner == pg_owner) )
{
- for_each_vcpu ( d, v )
+ for_each_vcpu ( pg_owner, v )
invalidate_shadow_ldt(v, 1);
}
put_page(page);
for ( i = 0; i < L1_PAGETABLE_ENTRIES; i++ )
{
if ( is_guest_l1_slot(i) &&
- unlikely(!get_page_from_l1e(pl1e[i], d)) )
+ unlikely(!get_page_from_l1e(pl1e[i], d, d)) )
goto fail;
adjust_guest_l1e(pl1e[i], d);
return rc;
}
- if ( unlikely(!get_page_from_l1e(nl1e, FOREIGNDOM)) )
+ if ( unlikely(!get_page_from_l1e(nl1e, d, FOREIGNDOM)) )
return 0;
adjust_guest_l1e(nl1e, d);
/* Check the new PTE. */
nl1e = l1e_from_intpte(val);
- if ( unlikely(!get_page_from_l1e(nl1e, d)) )
+ if ( unlikely(!get_page_from_l1e(nl1e, d, d)) )
{
if ( is_pv_32bit_domain(d) && (bytes == 4) && (unaligned_addr & 4) &&
!do_cmpxchg && (l1e_get_flags(nl1e) & _PAGE_PRESENT) )
#undef page_to_mfn
#define page_to_mfn(_pg) _mfn(__page_to_mfn(_pg))
+/************************************************/
+/* HAP VRAM TRACKING SUPPORT */
+/************************************************/
+
+int hap_enable_vram_tracking(struct domain *d)
+{
+ int i;
+
+ if ( !d->dirty_vram )
+ return -EINVAL;
+
+ /* turn on PG_log_dirty bit in paging mode */
+ hap_lock(d);
+ d->arch.paging.mode |= PG_log_dirty;
+ hap_unlock(d);
+
+ /* set l1e entries of P2M table to be read-only. */
+ for (i = d->dirty_vram->begin_pfn; i < d->dirty_vram->end_pfn; i++)
+ p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty);
+
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ return 0;
+}
+
+int hap_disable_vram_tracking(struct domain *d)
+{
+ int i;
+
+ if ( !d->dirty_vram )
+ return -EINVAL;
+
+ hap_lock(d);
+ d->arch.paging.mode &= ~PG_log_dirty;
+ hap_unlock(d);
+
+ /* set l1e entries of P2M table with normal mode */
+ for (i = d->dirty_vram->begin_pfn; i < d->dirty_vram->end_pfn; i++)
+ p2m_change_type(d, i, p2m_ram_logdirty, p2m_ram_rw);
+
+ flush_tlb_mask(d->domain_dirty_cpumask);
+ return 0;
+}
+
+void hap_clean_vram_tracking(struct domain *d)
+{
+ int i;
+
+ if ( !d->dirty_vram )
+ return;
+
+ /* set l1e entries of P2M table to be read-only. */
+ for (i = d->dirty_vram->begin_pfn; i < d->dirty_vram->end_pfn; i++)
+ p2m_change_type(d, i, p2m_ram_rw, p2m_ram_logdirty);
+
+ flush_tlb_mask(d->domain_dirty_cpumask);
+}
+
+void hap_vram_tracking_init(struct domain *d)
+{
+ paging_log_dirty_init(d, hap_enable_vram_tracking,
+ hap_disable_vram_tracking,
+ hap_clean_vram_tracking);
+}
+
+int hap_track_dirty_vram(struct domain *d,
+ unsigned long begin_pfn,
+ unsigned long nr,
+ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+{
+ long rc = 0;
+
+ if ( nr )
+ {
+ if ( paging_mode_log_dirty(d) && d->dirty_vram )
+ {
+ if ( begin_pfn != d->dirty_vram->begin_pfn ||
+ begin_pfn + nr != d->dirty_vram->end_pfn )
+ {
+ paging_log_dirty_disable(d);
+ d->dirty_vram->begin_pfn = begin_pfn;
+ d->dirty_vram->end_pfn = begin_pfn + nr;
+ rc = paging_log_dirty_enable(d);
+ if (rc != 0)
+ goto param_fail;
+ }
+ }
+ else if ( !paging_mode_log_dirty(d) && !d->dirty_vram )
+ {
+ rc -ENOMEM;
+ if ( (d->dirty_vram = xmalloc(struct sh_dirty_vram)) == NULL )
+ goto param_fail;
+
+ d->dirty_vram->begin_pfn = begin_pfn;
+ d->dirty_vram->end_pfn = begin_pfn + nr;
+ hap_vram_tracking_init(d);
+ rc = paging_log_dirty_enable(d);
+ if (rc != 0)
+ goto param_fail;
+ }
+ else
+ {
+ if ( !paging_mode_log_dirty(d) && d->dirty_vram )
+ rc = -EINVAL;
+ else
+ rc = -ENODATA;
+ goto param_fail;
+ }
+ /* get the bitmap */
+ rc = paging_log_dirty_range(d, begin_pfn, nr, dirty_bitmap);
+ }
+ else
+ {
+ if ( paging_mode_log_dirty(d) && d->dirty_vram ) {
+ rc = paging_log_dirty_disable(d);
+ xfree(d->dirty_vram);
+ d->dirty_vram = NULL;
+ } else
+ rc = 0;
+ }
+
+ return rc;
+
+param_fail:
+ if ( d->dirty_vram )
+ {
+ xfree(d->dirty_vram);
+ d->dirty_vram = NULL;
+ }
+ return rc;
+}
+
/************************************************/
/* HAP LOG DIRTY SUPPORT */
/************************************************/
+
/* hap code to call when log_dirty is enable. return 0 if no problem found. */
int hap_enable_log_dirty(struct domain *d)
{
flush_tlb_mask(d->domain_dirty_cpumask);
}
+void hap_logdirty_init(struct domain *d)
+{
+ if ( paging_mode_log_dirty(d) && d->dirty_vram )
+ {
+ paging_log_dirty_disable(d);
+ xfree(d->dirty_vram);
+ d->dirty_vram = NULL;
+ }
+
+ /* Reinitialize logdirty mechanism */
+ paging_log_dirty_init(d, hap_enable_log_dirty,
+ hap_disable_log_dirty,
+ hap_clean_dirty_bitmap);
+}
+
/************************************************/
/* HAP SUPPORT FUNCTIONS */
/************************************************/
{
hap_lock_init(d);
INIT_PAGE_LIST_HEAD(&d->arch.paging.hap.freelist);
-
- /* This domain will use HAP for log-dirty mode */
- paging_log_dirty_init(d, hap_enable_log_dirty, hap_disable_log_dirty,
- hap_clean_dirty_bitmap);
}
/* return 0 for success, -errno for failure */
return rv;
}
+int paging_log_dirty_range(struct domain *d,
+ unsigned long begin_pfn,
+ unsigned long nr,
+ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap)
+{
+ int rv = 0;
+ unsigned long pages = 0;
+ mfn_t *l4, *l3, *l2;
+ unsigned long *l1;
+ int b1, b2, b3, b4;
+ int i2, i3, i4;
+
+ d->arch.paging.log_dirty.clean_dirty_bitmap(d);
+ log_dirty_lock(d);
+
+ PAGING_DEBUG(LOGDIRTY, "log-dirty-range: dom %u faults=%u dirty=%u\n",
+ d->domain_id,
+ d->arch.paging.log_dirty.fault_count,
+ d->arch.paging.log_dirty.dirty_count);
+
+ if ( !mfn_valid(d->arch.paging.log_dirty.top) )
+ {
+ rv = -EINVAL; /* perhaps should be ENOMEM? */
+ goto out;
+ }
+
+ if ( unlikely(d->arch.paging.log_dirty.failed_allocs) ) {
+ printk("%s: %d failed page allocs while logging dirty pages\n",
+ __FUNCTION__, d->arch.paging.log_dirty.failed_allocs);
+ rv = -ENOMEM;
+ goto out;
+ }
+
+ if ( !d->arch.paging.log_dirty.fault_count &&
+ !d->arch.paging.log_dirty.dirty_count ) {
+ int size = (nr + BITS_PER_LONG - 1) / BITS_PER_LONG;
+ unsigned long zeroes[size];
+ memset(zeroes, 0x00, size * BYTES_PER_LONG);
+ rv = 0;
+ if ( copy_to_guest_offset(dirty_bitmap, 0, (uint8_t *) zeroes,
+ size * BYTES_PER_LONG) != 0 )
+ rv = -EFAULT;
+ goto out;
+ }
+ d->arch.paging.log_dirty.fault_count = 0;
+ d->arch.paging.log_dirty.dirty_count = 0;
+
+ b1 = L1_LOGDIRTY_IDX(begin_pfn);
+ b2 = L2_LOGDIRTY_IDX(begin_pfn);
+ b3 = L3_LOGDIRTY_IDX(begin_pfn);
+ b4 = L4_LOGDIRTY_IDX(begin_pfn);
+ l4 = map_domain_page(mfn_x(d->arch.paging.log_dirty.top));
+
+ for ( i4 = b4;
+ (pages < nr) && (i4 < LOGDIRTY_NODE_ENTRIES);
+ i4++ )
+ {
+ l3 = mfn_valid(l4[i4]) ? map_domain_page(mfn_x(l4[i4])) : NULL;
+ for ( i3 = b3;
+ (pages < nr) && (i3 < LOGDIRTY_NODE_ENTRIES);
+ i3++ )
+ {
+ l2 = ((l3 && mfn_valid(l3[i3])) ?
+ map_domain_page(mfn_x(l3[i3])) : NULL);
+ for ( i2 = b2;
+ (pages < nr) && (i2 < LOGDIRTY_NODE_ENTRIES);
+ i2++ )
+ {
+ static unsigned long zeroes[PAGE_SIZE/BYTES_PER_LONG];
+ unsigned int bytes = PAGE_SIZE;
+ uint8_t *s;
+ l1 = ((l2 && mfn_valid(l2[i2])) ?
+ map_domain_page(mfn_x(l2[i2])) : zeroes);
+
+ s = ((uint8_t*)l1) + (b1 >> 3);
+ bytes -= b1 >> 3;
+
+ if ( likely(((nr - pages + 7) >> 3) < bytes) )
+ bytes = (unsigned int)((nr - pages + 7) >> 3);
+
+ /* begin_pfn is not 32K aligned, hence we have to bit
+ * shift the bitmap */
+ if ( b1 & 0x7 )
+ {
+ int i, j;
+ uint32_t *l = (uint32_t*) s;
+ int bits = b1 & 0x7;
+ int bitmask = (1 << bits) - 1;
+ int size = (bytes + BYTES_PER_LONG - 1) / BYTES_PER_LONG;
+ unsigned long bitmap[size];
+ static unsigned long printed = 0;
+
+ if ( printed != begin_pfn )
+ {
+ dprintk(XENLOG_DEBUG, "%s: begin_pfn %lx is not 32K aligned!\n",
+ __FUNCTION__, begin_pfn);
+ printed = begin_pfn;
+ }
+
+ for ( i = 0; i < size - 1; i++, l++ ) {
+ bitmap[i] = ((*l) >> bits) |
+ (((*((uint8_t*)(l + 1))) & bitmask) << (sizeof(*l) * 8 - bits));
+ }
+ s = (uint8_t*) l;
+ size = BYTES_PER_LONG - ((b1 >> 3) & 0x3);
+ bitmap[i] = 0;
+ for ( j = 0; j < size; j++, s++ )
+ bitmap[i] |= (*s) << (j * 8);
+ bitmap[i] = (bitmap[i] >> bits) | (bitmask << (size * 8 - bits));
+ if ( copy_to_guest_offset(dirty_bitmap, (pages >> 3),
+ (uint8_t*) bitmap, bytes) != 0 )
+ {
+ rv = -EFAULT;
+ goto out;
+ }
+ }
+ else
+ {
+ if ( copy_to_guest_offset(dirty_bitmap, pages >> 3,
+ s, bytes) != 0 )
+ {
+ rv = -EFAULT;
+ goto out;
+ }
+ }
+
+ if ( l1 != zeroes )
+ clear_page(l1);
+ pages += bytes << 3;
+ if ( l1 != zeroes )
+ unmap_domain_page(l1);
+ b1 = b1 & 0x7;
+ }
+ b2 = 0;
+ if ( l2 )
+ unmap_domain_page(l2);
+ }
+ b3 = 0;
+ if ( l3 )
+ unmap_domain_page(l3);
+ }
+ unmap_domain_page(l4);
+
+ log_dirty_unlock(d);
+
+ return rv;
+
+ out:
+ log_dirty_unlock(d);
+ return rv;
+}
/* Note that this function takes three function pointers. Callers must supply
* these functions for log dirty code to call. This function usually is
switch ( sc->op )
{
case XEN_DOMCTL_SHADOW_OP_ENABLE_LOGDIRTY:
+ if ( hap_enabled(d) )
+ hap_logdirty_init(d);
return paging_log_dirty_enable(d);
case XEN_DOMCTL_SHADOW_OP_ENABLE:
if ( sc->mode & XEN_DOMCTL_SHADOW_ENABLE_LOG_DIRTY )
+ {
+ if ( hap_enabled(d) )
+ hap_logdirty_init(d);
return paging_log_dirty_enable(d);
+ }
case XEN_DOMCTL_SHADOW_OP_OFF:
if ( paging_mode_log_dirty(d) )
if ( unlikely(!(gflags & _PAGE_PRESENT)) )
{
+#if !(SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
/* If a guest l1 entry is not present, shadow with the magic
* guest-not-present entry. */
if ( level == 1 )
*sp = sh_l1e_gnp();
else
+#endif /* !OOS */
*sp = shadow_l1e_empty();
goto done;
}
if ( !shadow_mode_refcounts(d) )
return 1;
- res = get_page_from_l1e(sl1e, d);
+ res = get_page_from_l1e(sl1e, d, d);
// If a privileged domain is attempting to install a map of a page it does
// not own, we let it succeed anyway.
(d != owner) &&
IS_PRIV_FOR(d, owner))
{
- res = get_page_from_l1e(sl1e, owner);
+ res = get_page_from_l1e(sl1e, d, owner);
SHADOW_PRINTK("privileged domain %d installs map of mfn %05lx "
"which is owned by domain %d: %s\n",
d->domain_id, mfn_x(mfn), owner->domain_id,
if ( r & SHADOW_SET_ERROR )
return NULL;
-#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
*resync |= 1;
#endif
if ( r & SHADOW_SET_ERROR )
return NULL;
-#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
*resync |= 1;
#endif
(void) shadow_l1_index(sl1mfn, guest_l1_table_offset(gw->va));
}
-#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
/* All pages walked are now pagetables. Safe to resync pages
in case level 4 or 3 shadows were set. */
if ( resync )
else if ( p2mt != p2m_populate_on_demand )
result |= SHADOW_SET_ERROR;
-#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
if ( mfn_valid(sl3mfn) )
shadow_resync_all(v, 0);
#endif
else if ( p2mt != p2m_populate_on_demand )
result |= SHADOW_SET_ERROR;
-#if (SHADOW_OPTIMIZATIONS && SHOPT_OUT_OF_SYNC )
+#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC )
if ( mfn_valid(sl2mfn) )
shadow_resync_all(v, 0);
#endif
#if (SHADOW_OPTIMIZATIONS & SHOPT_FAST_FAULT_PATH)
if ( (regs->error_code & PFEC_reserved_bit) )
{
+#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
+ /* First, need to check that this isn't an out-of-sync
+ * shadow l1e. If it is, we fall back to the slow path, which
+ * will sync it up again. */
+ {
+ shadow_l2e_t sl2e;
+ mfn_t gl1mfn;
+ if ( (__copy_from_user(&sl2e,
+ (sh_linear_l2_table(v)
+ + shadow_l2_linear_offset(va)),
+ sizeof(sl2e)) != 0)
+ || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT)
+ || !mfn_valid(gl1mfn = _mfn(mfn_to_page(
+ shadow_l2e_get_mfn(sl2e))->v.sh.back))
+ || unlikely(mfn_is_out_of_sync(gl1mfn)) )
+ {
+ /* Hit the slow path as if there had been no
+ * shadow entry at all, and let it tidy up */
+ ASSERT(regs->error_code & PFEC_page_present);
+ regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present);
+ goto page_fault_slow_path;
+ }
+ }
+#endif /* SHOPT_OUT_OF_SYNC */
/* The only reasons for reserved bits to be set in shadow entries
* are the two "magic" shadow_l1e entries. */
if ( likely((__copy_from_user(&sl1e,
sizeof(sl1e)) == 0)
&& sh_l1e_is_magic(sl1e)) )
{
-#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
- /* First, need to check that this isn't an out-of-sync
- * shadow l1e. If it is, we fall back to the slow path, which
- * will sync it up again. */
- {
- shadow_l2e_t sl2e;
- mfn_t gl1mfn;
- if ( (__copy_from_user(&sl2e,
- (sh_linear_l2_table(v)
- + shadow_l2_linear_offset(va)),
- sizeof(sl2e)) != 0)
- || !(shadow_l2e_get_flags(sl2e) & _PAGE_PRESENT)
- || !mfn_valid(gl1mfn = _mfn(mfn_to_page(
- shadow_l2e_get_mfn(sl2e))->v.sh.back))
- || unlikely(mfn_is_out_of_sync(gl1mfn)) )
- {
- /* Hit the slow path as if there had been no
- * shadow entry at all, and let it tidy up */
- ASSERT(regs->error_code & PFEC_page_present);
- regs->error_code ^= (PFEC_reserved_bit|PFEC_page_present);
- goto page_fault_slow_path;
- }
- }
-#endif /* SHOPT_OUT_OF_SYNC */
if ( sh_l1e_is_gnp(sl1e) )
{
return rv;
}
-/**************************************************************************/
-/* VRAM dirty tracking support */
-
-struct sh_dirty_vram {
- unsigned long begin_pfn;
- unsigned long end_pfn;
- paddr_t *sl1ma;
- uint8_t *dirty_bitmap;
- s_time_t last_dirty;
-};
-
/**************************************************************************/
/* Shadow-page refcounting. */
{
uint32_t cpu;
uint64_t idletime, now = NOW();
- struct vcpu *v;
struct xenctl_cpumap ctlmap;
cpumask_t cpumap;
XEN_GUEST_HANDLE(uint8) cpumap_bitmap;
for_each_cpu_mask ( cpu, cpumap )
{
- if ( (v = idle_vcpu[cpu]) != NULL )
+ if ( idle_vcpu[cpu] == NULL )
cpu_clear(cpu, cpumap);
idletime = get_cpu_idle_time(cpu);
per_cpu(doublefault_tss, cpu) = alloc_xenheap_page();
memset(per_cpu(doublefault_tss, cpu), 0, PAGE_SIZE);
}
+#else
+ if (!per_cpu(compat_arg_xlat, cpu))
+ setup_compat_arg_xlat(cpu, apicid_to_node[apicid]);
#endif
if (!idt_tables[cpu]) {
/* per-cpu data */
g_tboot_shared->mac_regions[2].start = (uint64_t)__pa(&__per_cpu_start);
g_tboot_shared->mac_regions[2].size =
- g_tboot_shared->mac_regions[2].start +
(((uint64_t)last_cpu(cpu_possible_map) + 1) << PERCPU_SHIFT);
/* bss */
g_tboot_shared->mac_regions[3].start = (uint64_t)__pa(&__bss_start);
case XENMEM_add_to_physmap:
{
struct compat_add_to_physmap cmp;
- struct xen_add_to_physmap *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE;
+ struct xen_add_to_physmap *nat = COMPAT_ARG_XLAT_VIRT_BASE;
if ( copy_from_guest(&cmp, arg, 1) )
return -EFAULT;
case XENMEM_set_memory_map:
{
struct compat_foreign_memory_map cmp;
- struct xen_foreign_memory_map *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE;
+ struct xen_foreign_memory_map *nat = COMPAT_ARG_XLAT_VIRT_BASE;
if ( copy_from_guest(&cmp, arg, 1) )
return -EFAULT;
case XENMEM_machine_memory_map:
{
struct compat_memory_map cmp;
- struct xen_memory_map *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE;
+ struct xen_memory_map *nat = COMPAT_ARG_XLAT_VIRT_BASE;
if ( copy_from_guest(&cmp, arg, 1) )
return -EFAULT;
case XENMEM_get_pod_target:
{
struct compat_pod_target cmp;
- struct xen_pod_target *nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE;
+ struct xen_pod_target *nat = COMPAT_ARG_XLAT_VIRT_BASE;
if ( copy_from_guest(&cmp, arg, 1) )
return -EFAULT;
if ( unlikely(!guest_handle_okay(cmp_uops, count)) )
return -EFAULT;
- set_xen_guest_handle(nat_ops, (void *)COMPAT_ARG_XLAT_VIRT_BASE);
+ set_xen_guest_handle(nat_ops, COMPAT_ARG_XLAT_VIRT_BASE);
for ( ; count; count -= i )
{
#include <xen/init.h>
#include <xen/mm.h>
#include <xen/sched.h>
+#include <xen/numa.h>
#include <xen/guest_access.h>
#include <asm/current.h>
#include <asm/asm_defns.h>
#include <asm/fixmap.h>
#include <asm/hypercall.h>
#include <asm/msr.h>
-#include <asm/numa.h>
#include <public/memory.h>
#ifdef CONFIG_COMPAT
unsigned int m2p_compat_vstart = __HYPERVISOR_COMPAT_VIRT_START;
#endif
-DEFINE_PER_CPU(char, compat_arg_xlat[COMPAT_ARG_XLAT_SIZE]);
+DEFINE_PER_CPU(void *, compat_arg_xlat);
/* Top-level master (and idle-domain) page directory. */
l4_pgentry_t __attribute__ ((__section__ (".bss.page_aligned")))
0x10, __PAGE_HYPERVISOR);
}
+int __cpuinit setup_compat_arg_xlat(unsigned int cpu, int node)
+{
+ unsigned int order = get_order_from_bytes(COMPAT_ARG_XLAT_SIZE);
+ unsigned long sz = PAGE_SIZE << order;
+ unsigned int memflags = node != NUMA_NO_NODE ? MEMF_node(node) : 0;
+ struct page_info *pg;
+
+ pg = alloc_domheap_pages(NULL, order, memflags);
+ if ( !pg )
+ return -ENOMEM;
+
+ for ( ; (sz -= PAGE_SIZE) >= COMPAT_ARG_XLAT_SIZE; ++pg )
+ free_domheap_page(pg);
+
+ per_cpu(compat_arg_xlat, cpu) = page_to_virt(pg);
+
+ return 0;
+}
+
void __init subarch_init_memory(void)
{
unsigned long i, n, v, m2p_start_mfn;
share_xen_page_with_privileged_guests(page, XENSHARE_readonly);
}
}
+
+ if ( setup_compat_arg_xlat(smp_processor_id(),
+ apicid_to_node[boot_cpu_physical_apicid]) )
+ panic("Could not setup argument translation area");
}
long subarch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
if ( copy_from_guest(&cmp, arg, 1) )
return -EFAULT;
- nat = (void *)COMPAT_ARG_XLAT_VIRT_BASE;
+ nat = COMPAT_ARG_XLAT_VIRT_BASE;
XLAT_vcpu_set_singleshot_timer(nat, &cmp);
rc = do_vcpu_op(cmd, vcpuid, guest_handle_from_ptr(nat, void));
break;
struct compat_gnttab_copy copy;
} cmp;
- set_xen_guest_handle(nat.uop, (void *)COMPAT_ARG_XLAT_VIRT_BASE);
+ set_xen_guest_handle(nat.uop, COMPAT_ARG_XLAT_VIRT_BASE);
switch ( cmd )
{
case GNTTABOP_setup_table:
struct compat_memory_exchange xchg;
} cmp;
- set_xen_guest_handle(nat.hnd, (void *)COMPAT_ARG_XLAT_VIRT_BASE);
+ set_xen_guest_handle(nat.hnd, COMPAT_ARG_XLAT_VIRT_BASE);
split = 0;
switch ( op )
{
if ( unlikely((h = t->maptrack_head) == (t->maptrack_limit - 1)) )
return -1;
t->maptrack_head = maptrack_entry(t, h).ref;
- t->map_count++;
return h;
}
{
maptrack_entry(t, handle).ref = t->maptrack_head;
t->maptrack_head = handle;
- t->map_count--;
}
static inline int
unsigned long frame = 0, nr_gets = 0;
int rc = GNTST_okay;
u32 old_pin;
+ u32 act_pin;
unsigned int cache_flags;
struct active_grant_entry *act;
struct grant_mapping *mt;
GNTPIN_hstr_inc : GNTPIN_hstw_inc;
frame = act->frame;
+ act_pin = act->pin;
cache_flags = (sha->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
if ( need_iommu(ld) &&
!(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) &&
- (act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
+ (act_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
{
if ( iommu_map_page(ld, mfn_to_gmfn(ld, frame), frame) )
{
static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info)
{
- unsigned int load = 0;
- uint64_t cur_ns, idle_ns, total_ns;
-
+ uint64_t cur_ns, total_ns;
+ uint64_t max_load_freq = 0;
struct cpufreq_policy *policy;
unsigned int j;
return;
/* Get Idle Time */
- idle_ns = UINT_MAX;
for_each_cpu_mask(j, policy->cpus) {
- uint64_t total_idle_ns;
- unsigned int tmp_idle_ns;
+ uint64_t idle_ns, total_idle_ns;
+ uint64_t load, load_freq, freq_avg;
struct cpu_dbs_info_s *j_dbs_info;
j_dbs_info = &per_cpu(cpu_dbs_info, j);
total_idle_ns = get_cpu_idle_time(j);
- tmp_idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
+ idle_ns = total_idle_ns - j_dbs_info->prev_cpu_idle;
j_dbs_info->prev_cpu_idle = total_idle_ns;
- if (tmp_idle_ns < idle_ns)
- idle_ns = tmp_idle_ns;
- }
+ if (unlikely(total_ns < idle_ns))
+ continue;
- if (likely(total_ns > idle_ns))
- load = (100 * (total_ns - idle_ns)) / total_ns;
+ load = 100 * (total_ns - idle_ns) / total_ns;
+
+ freq_avg = cpufreq_driver_getavg(j, GOV_GETAVG);
+
+ load_freq = load * freq_avg;
+ if (load_freq > max_load_freq)
+ max_load_freq = load_freq;
+ }
/* Check for frequency increase */
- if (load > dbs_tuners_ins.up_threshold) {
+ if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) {
/* if we are already at full speed then break out early */
if (policy->cur == policy->max)
return;
- __cpufreq_driver_target(policy, policy->max,CPUFREQ_RELATION_H);
+ __cpufreq_driver_target(policy, policy->max, CPUFREQ_RELATION_H);
return;
}
* can support the current CPU usage without triggering the up
* policy. To be safe, we focus 10 points under the threshold.
*/
- if (load < (dbs_tuners_ins.up_threshold - 10)) {
- unsigned int freq_next, freq_cur;
-
- freq_cur = cpufreq_driver_getavg(policy->cpu, GOV_GETAVG);
+ if (max_load_freq < (dbs_tuners_ins.up_threshold - 10) * policy->cur) {
+ uint64_t freq_next;
- freq_next = (freq_cur * load) / (dbs_tuners_ins.up_threshold - 10);
+ freq_next = max_load_freq / (dbs_tuners_ins.up_threshold - 10);
__cpufreq_driver_target(policy, freq_next, CPUFREQ_RELATION_L);
}
irq < NR_IRQS;
irq = find_next_bit(hvm_irq_dpci->dirq_mask, NR_IRQS, irq + 1) )
{
- if ( !test_and_clear_bit(irq, &hvm_irq_dpci->dirq_mask) )
+ if ( !test_and_clear_bit(irq, hvm_irq_dpci->dirq_mask) )
continue;
spin_lock(&d->event_lock);
i = find_next_bit(hvm_irq_dpci->mapping, NR_IRQS, i + 1) )
{
pirq_guest_unbind(d, i);
- kill_timer(&hvm_irq_dpci->hvm_timer[irq_to_vector(i)]);
+ kill_timer(&hvm_irq_dpci->hvm_timer[domain_irq_to_vector(d, i)]);
list_for_each_safe ( digl_list, tmp,
&hvm_irq_dpci->mirq[i].digl_list )
#include <xen/iommu.h>
#include <asm/hvm/iommu.h>
#include <xen/time.h>
+#include <xen/list.h>
#include <xen/pci.h>
#include <xen/pci_regs.h>
#include "iommu.h"
#include "vtd.h"
#include "extern.h"
-#ifndef dest_SMI
+#ifdef __ia64__
#define dest_SMI -1
+#define nr_ioapics iosapic_get_nr_iosapics()
+#define nr_ioapic_registers(i) iosapic_get_nr_pins(i)
+#else
+#define nr_ioapic_registers(i) nr_ioapic_registers[i]
#endif
-/* The max number of IOAPIC (or IOSAPIC) pin. The typical values can be 24 or
- * 48 on x86 and Itanium platforms. Here we use a biger number 256. This
- * should be big enough. Actually now IREMAP_ENTRY_NR is also 256.
- */
-#define MAX_IOAPIC_PIN_NUM 256
+/* apic_pin_2_ir_idx[apicid][pin] = interrupt remapping table index */
+static unsigned int **apic_pin_2_ir_idx;
+
+static int init_apic_pin_2_ir_idx(void)
+{
+ unsigned int *_apic_pin_2_ir_idx;
+ unsigned int nr_pins, i;
+
+ nr_pins = 0;
+ for ( i = 0; i < nr_ioapics; i++ )
+ nr_pins += nr_ioapic_registers(i);
-static int ioapic_pin_to_intremap_index[MAX_IOAPIC_PIN_NUM] =
- { [0 ... MAX_IOAPIC_PIN_NUM-1] = -1 };
+ _apic_pin_2_ir_idx = xmalloc_array(unsigned int, nr_pins);
+ apic_pin_2_ir_idx = xmalloc_array(unsigned int *, nr_ioapics);
+ if ( (_apic_pin_2_ir_idx == NULL) || (apic_pin_2_ir_idx == NULL) )
+ {
+ xfree(_apic_pin_2_ir_idx);
+ xfree(apic_pin_2_ir_idx);
+ return -ENOMEM;
+ }
+
+ for ( i = 0; i < nr_pins; i++ )
+ _apic_pin_2_ir_idx[i] = -1;
+
+ nr_pins = 0;
+ for ( i = 0; i < nr_ioapics; i++ )
+ {
+ apic_pin_2_ir_idx[i] = &_apic_pin_2_ir_idx[nr_pins];
+ nr_pins += nr_ioapic_registers(i);
+ }
+
+ return 0;
+}
u16 apicid_to_bdf(int apic_id)
{
}
static int ioapic_rte_to_remap_entry(struct iommu *iommu,
- int apic_id, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
+ int apic, unsigned int ioapic_pin, struct IO_xAPIC_route_entry *old_rte,
unsigned int rte_upper, unsigned int value)
{
struct iremap_entry *iremap_entry = NULL, *iremap_entries;
remap_rte = (struct IO_APIC_route_remap_entry *) old_rte;
spin_lock_irqsave(&ir_ctrl->iremap_lock, flags);
- if ( ioapic_pin_to_intremap_index[ioapic_pin] < 0 )
+ index = apic_pin_2_ir_idx[apic][ioapic_pin];
+ if ( index < 0 )
{
ir_ctrl->iremap_index++;
index = ir_ctrl->iremap_index;
- ioapic_pin_to_intremap_index[ioapic_pin] = index;
+ apic_pin_2_ir_idx[apic][ioapic_pin] = index;
}
- else
- index = ioapic_pin_to_intremap_index[ioapic_pin];
if ( index > IREMAP_ENTRY_NR - 1 )
{
new_ire.lo.res_1 = 0;
new_ire.lo.vector = new_rte.vector;
new_ire.lo.res_2 = 0;
- new_ire.hi.sid = apicid_to_bdf(apic_id);
+ new_ire.hi.sid = apicid_to_bdf(IO_APIC_ID(apic));
new_ire.hi.sq = 0; /* comparing all 16-bit of SID */
new_ire.hi.svt = 1; /* requestor ID verification SID/SQ */
*(IO_APIC_BASE(apic)+4) = *(((int *)&old_rte)+0);
remap_rte->mask = saved_mask;
- ASSERT(ioapic_pin < MAX_IOAPIC_PIN_NUM);
- if ( ioapic_rte_to_remap_entry(iommu, IO_APIC_ID(apic), ioapic_pin,
+ if ( ioapic_rte_to_remap_entry(iommu, apic, ioapic_pin,
&old_rte, rte_upper, value) )
{
*IO_APIC_BASE(apic) = rte_upper ? (reg + 1) : reg;
/* After set SIRTP, we should do globally invalidate the IEC */
iommu_flush_iec_global(iommu);
- return 0;
+ return init_apic_pin_2_ir_idx();
}
void disable_intremap(struct iommu *iommu)
#define IOSAPIC_RTEINDEX(reg) (((reg) - 0x10) >> 1)
extern unsigned long ia64_vector_mask[];
extern unsigned long ia64_xen_vector[];
+
+int iosapic_get_nr_iosapics(void);
+int iosapic_get_nr_pins(int index);
#endif /* XEN */
#define IO_APIC_BASE(idx) ((unsigned int *)iosapic_lists[idx].addr)
void hap_final_teardown(struct domain *d);
void hap_teardown(struct domain *d);
void hap_vcpu_init(struct vcpu *v);
+void hap_logdirty_init(struct domain *d);
+int hap_track_dirty_vram(struct domain *d,
+ unsigned long begin_pfn,
+ unsigned long nr,
+ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
extern struct paging_mode hap_paging_real_mode;
extern struct paging_mode hap_paging_protected_mode;
#define MAX_VECTOR 256
-#define vcpu_vlapic(vcpu) (&(vcpu)->arch.hvm_vcpu.vlapic)
-#define vlapic_vcpu(vpic) (container_of((vpic), struct vcpu, \
- arch.hvm_vcpu.vlapic))
-#define vlapic_domain(vpic) (vlapic_vcpu(vlapic)->domain)
+#define vcpu_vlapic(x) (&(x)->arch.hvm_vcpu.vlapic)
+#define vlapic_vcpu(x) (container_of((x), struct vcpu, arch.hvm_vcpu.vlapic))
+#define vlapic_domain(x) (vlapic_vcpu(x)->domain)
#define VLAPIC_ID(vlapic) \
(GET_xAPIC_ID(vlapic_get_reg((vlapic), APIC_ID)))
#define EXIT_REASON_MONITOR_TRAP_FLAG 37
#define EXIT_REASON_MONITOR_INSTRUCTION 39
#define EXIT_REASON_PAUSE_INSTRUCTION 40
-#define EXIT_REASON_MACHINE_CHECK 41
+#define EXIT_REASON_MCE_DURING_VMENTRY 41
#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
#define EXIT_REASON_APIC_ACCESS 44
#define EXIT_REASON_EPT_VIOLATION 48
int get_page_type(struct page_info *page, unsigned long type);
int put_page_type_preemptible(struct page_info *page);
int get_page_type_preemptible(struct page_info *page, unsigned long type);
-int get_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
-void put_page_from_l1e(l1_pgentry_t l1e, struct domain *d);
+int get_page_from_l1e(
+ l1_pgentry_t l1e, struct domain *l1e_owner, struct domain *pg_owner);
+void put_page_from_l1e(l1_pgentry_t l1e, struct domain *l1e_owner);
static inline void put_page_and_type(struct page_info *page)
{
/* free log dirty bitmap resource */
void paging_free_log_dirty_bitmap(struct domain *d);
+/* get the dirty bitmap for a specific range of pfns */
+int paging_log_dirty_range(struct domain *d,
+ unsigned long begin_pfn,
+ unsigned long nr,
+ XEN_GUEST_HANDLE_64(uint8) dirty_bitmap);
+
/* enable log dirty */
int paging_log_dirty_enable(struct domain *d);
#define L4_LOGDIRTY_IDX(pfn) 0
#endif
+/* VRAM dirty tracking support */
+struct sh_dirty_vram {
+ unsigned long begin_pfn;
+ unsigned long end_pfn;
+ paddr_t *sl1ma;
+ uint8_t *dirty_bitmap;
+ s_time_t last_dirty;
+};
+
/*****************************************************************************
* Entry points into the paging-assistance code */
#ifndef __X86_PERCPU_H__
#define __X86_PERCPU_H__
-#define PERCPU_SHIFT 13
+#define PERCPU_SHIFT 12
#define PERCPU_SIZE (1UL << PERCPU_SHIFT)
/* Separate out the type, so (int[3], foo) works. */
#define __X86_64_UACCESS_H
#define COMPAT_ARG_XLAT_VIRT_BASE this_cpu(compat_arg_xlat)
-#define COMPAT_ARG_XLAT_SIZE PAGE_SIZE
-DECLARE_PER_CPU(char, compat_arg_xlat[COMPAT_ARG_XLAT_SIZE]);
+#define COMPAT_ARG_XLAT_SIZE (2*PAGE_SIZE)
+DECLARE_PER_CPU(void *, compat_arg_xlat);
+int setup_compat_arg_xlat(unsigned int cpu, int node);
#define is_compat_arg_xlat_range(addr, size) ({ \
unsigned long __off; \
__off = (unsigned long)(addr) - (unsigned long)COMPAT_ARG_XLAT_VIRT_BASE; \
*/
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
+/*
+ * NB. first_sect and last_sect in blkif_request_segment, as well as
+ * sector_number in blkif_request, are always expressed in 512-byte units.
+ * However they must be properly aligned to the real sector size of the
+ * physical disk, which is reported in the "sector-size" node in the backend
+ * xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units.
+ */
struct blkif_request_segment {
grant_ref_t gref; /* reference to I/O buffer frame */
/* @first_sect: first sector in frame to transfer (inclusive). */
unsigned int nr_ents;
/* TLB_FLUSH_MULTI, INVLPG_MULTI */
#if __XEN_INTERFACE_VERSION__ >= 0x00030205
- XEN_GUEST_HANDLE(void) vcpumask;
+ XEN_GUEST_HANDLE(const_void) vcpumask;
#else
- void *vcpumask;
+ const void *vcpumask;
#endif
/* COPY_PAGE */
xen_pfn_t src_mfn;
struct grant_mapping **maptrack;
unsigned int maptrack_head;
unsigned int maptrack_limit;
- unsigned int map_count;
/* Lock protecting updates to active and shared grant tables. */
spinlock_t lock;
};