From: Jean Guyader Date: Thu, 8 Oct 2009 16:32:47 +0000 (+0100) Subject: Backport netchannel2 from xen-unstable. X-Git-Url: http://xenbits.xen.org/gitweb?a=commitdiff_plain;h=003725739a872fc483a8cbd4ae45cf20d07b41e6;p=xenclient%2Fxen-pq.git Backport netchannel2 from xen-unstable. --- diff --git a/master/series b/master/series index 31c3a33..7b18aaa 100644 --- a/master/series +++ b/master/series @@ -1,3 +1,4 @@ +xen-unstable-netchannel2 xen-unstable-19932-c0cb307d927f check-open-pv-log-file diff --git a/master/xen-unstable-19932-c0cb307d927f b/master/xen-unstable-19932-c0cb307d927f index 1a68478..7860486 100644 --- a/master/xen-unstable-19932-c0cb307d927f +++ b/master/xen-unstable-19932-c0cb307d927f @@ -92,7 +92,7 @@ index ad5ec35..a8c6d1a 100644 dgpa, reps, bytes_per_rep, sgpa, IOREQ_WRITE, df, NULL); diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index 42652ca..bd1faba 100644 +index 1c7bbb8..f4c1907 100644 --- a/xen/arch/x86/hvm/hvm.c +++ b/xen/arch/x86/hvm/hvm.c @@ -1536,6 +1536,8 @@ static enum hvm_copy_result __hvm_copy( @@ -104,17 +104,7 @@ index 42652ca..bd1faba 100644 if ( !p2m_is_ram(p2mt) ) return HVMCOPY_bad_gfn_to_mfn; ASSERT(mfn_valid(mfn)); -@@ -1929,7 +1931,8 @@ enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack) - static long hvm_grant_table_op( - unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count) - { -- if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) ) -+ if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) && -+ (cmd != GNTTABOP_map_grant_ref) && (cmd != GNTTABOP_unmap_grant_ref) ) - return -ENOSYS; /* all other commands need auditing */ - return do_grant_table_op(cmd, uop, count); - } -@@ -2736,17 +2739,36 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) +@@ -2764,17 +2766,36 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE(void) arg) if ( a.hvmmem_type >= ARRAY_SIZE(memtype) ) goto param_fail4; @@ -255,7 +245,7 @@ index ca4f224..37d2615 100644 break; diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 0d6d5ee..cc0f9ee 100644 +index 067e136..9dc7020 100644 --- a/xen/arch/x86/mm.c +++ b/xen/arch/x86/mm.c @@ -2655,10 +2655,16 @@ int do_mmuext_op( @@ -825,10 +815,10 @@ index b8cd752..b97ed37 100644 " --> %" PRI_mfn " != mfn %" PRI_mfn, gfn_x(gfn), mfn_x(gmfn), mfn_x(mfn)); diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index 5306354..7e3052c 100644 +index 40fc183..5856be3 100644 --- a/xen/common/grant_table.c +++ b/xen/common/grant_table.c -@@ -226,6 +226,15 @@ __gnttab_map_grant_ref( +@@ -416,6 +416,15 @@ __gnttab_map_grant_ref( return; } @@ -844,7 +834,7 @@ index 5306354..7e3052c 100644 if ( unlikely((rd = rcu_lock_domain_by_id(op->dom)) == NULL) ) { gdprintk(XENLOG_INFO, "Could not find domain %d\n", op->dom); -@@ -343,6 +352,13 @@ __gnttab_map_grant_ref( +@@ -515,6 +524,13 @@ __gnttab_map_grant_ref( if ( mfn_valid(frame) ) put_page(mfn_to_page(frame)); @@ -858,7 +848,7 @@ index 5306354..7e3052c 100644 if ( !iomem_access_permitted(rd, frame, frame) ) { gdprintk(XENLOG_WARNING, -@@ -395,7 +411,12 @@ __gnttab_map_grant_ref( +@@ -567,7 +583,12 @@ __gnttab_map_grant_ref( !(old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && (act_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) { @@ -872,7 +862,7 @@ index 5306354..7e3052c 100644 { rc = GNTST_general_error; goto undo_out; -@@ -573,7 +594,8 @@ __gnttab_unmap_common( +@@ -743,7 +764,8 @@ __gnttab_unmap_common( (old_pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) && !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) { @@ -882,7 +872,7 @@ index 5306354..7e3052c 100644 { rc = GNTST_general_error; goto unmap_out; -@@ -1717,7 +1739,7 @@ gnttab_release_mappings( +@@ -2343,7 +2365,7 @@ gnttab_release_mappings( { BUG_ON(!(act->pin & GNTPIN_hstr_mask)); act->pin -= GNTPIN_hstr_inc; @@ -891,7 +881,7 @@ index 5306354..7e3052c 100644 !is_iomem_page(act->frame) ) put_page(mfn_to_page(act->frame)); } -@@ -1736,7 +1758,7 @@ gnttab_release_mappings( +@@ -2362,7 +2384,7 @@ gnttab_release_mappings( { BUG_ON(!(act->pin & GNTPIN_hstw_mask)); act->pin -= GNTPIN_hstw_inc; @@ -914,10 +904,10 @@ index 903a930..58aa635 100644 static inline int replace_grant_supported(void) { diff --git a/xen/include/asm-x86/grant_table.h b/xen/include/asm-x86/grant_table.h -index 3a7fb2a..4c72d7c 100644 +index 4e97d9d..07e96b6 100644 --- a/xen/include/asm-x86/grant_table.h +++ b/xen/include/asm-x86/grant_table.h -@@ -44,7 +44,7 @@ static inline void gnttab_clear_flag(unsigned long nr, uint16_t *addr) +@@ -59,7 +59,7 @@ static inline void gnttab_clear_flag(unsigned long nr, uint16_t *addr) (((ld) == (rd)) || !paging_mode_external(rd))) /* Done implicitly when page tables are destroyed. */ diff --git a/master/xen-unstable-netchannel2 b/master/xen-unstable-netchannel2 new file mode 100644 index 0000000..39061dd --- /dev/null +++ b/master/xen-unstable-netchannel2 @@ -0,0 +1,3459 @@ + + Backport netchannel 2 support from xen-unstable + 20284,20283,20282,20281,20280,20278,20277. + +diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile +index bdd1cc0..bc50e65 100644 +--- a/tools/hotplug/Linux/Makefile ++++ b/tools/hotplug/Linux/Makefile +@@ -14,6 +14,7 @@ XEN_SCRIPT_DIR = /etc/xen/scripts + XEN_SCRIPTS = network-bridge vif-bridge + XEN_SCRIPTS += network-route vif-route + XEN_SCRIPTS += network-nat vif-nat ++XEN_SCRIPTS += vif2 + XEN_SCRIPTS += block + XEN_SCRIPTS += block-enbd block-nbd + XEN_SCRIPTS += blktap +diff --git a/tools/hotplug/Linux/blktap b/tools/hotplug/Linux/blktap +new file mode 100644 +index 0000000..01a0f6c +--- /dev/null ++++ b/tools/hotplug/Linux/blktap +@@ -0,0 +1,93 @@ ++#!/bin/bash ++ ++# Copyright (c) 2005, XenSource Ltd. ++ ++dir=$(dirname "$0") ++. "$dir/xen-hotplug-common.sh" ++. "$dir/block-common.sh" ++ ++findCommand "$@" ++ ++## ++# check_blktap_sharing file mode ++# ++# Perform the sharing check for the given blktap and mode. ++# ++check_blktap_sharing() ++{ ++ local file="$1" ++ local mode="$2" ++ ++ local base_path="$XENBUS_BASE_PATH/$XENBUS_TYPE" ++ for dom in $(xenstore-list "$base_path") ++ do ++ for dev in $(xenstore-list "$base_path/$dom") ++ do ++ params=$(xenstore_read "$base_path/$dom/$dev/params" | cut -d: -f2) ++ if [ "$file" = "$params" ] ++ then ++ ++ if [ "$mode" = 'w' ] ++ then ++ if ! same_vm "$dom" ++ then ++ echo 'guest' ++ return ++ fi ++ else ++ local m=$(xenstore_read "$base_path/$dom/$dev/mode") ++ m=$(canonicalise_mode "$m") ++ ++ if [ "$m" = 'w' ] ++ then ++ if ! same_vm "$dom" ++ then ++ echo 'guest' ++ return ++ fi ++ fi ++ fi ++ fi ++ done ++ done ++ ++ echo 'ok' ++} ++ ++ ++t=$(xenstore_read_default "$XENBUS_PATH/type" 'MISSING') ++if [ -n "$t" ] ++then ++ p=$(xenstore_read "$XENBUS_PATH/params") ++ # if we have a ':', chew from head including : ++ if echo $p | grep -q \: ++ then ++ p=${p#*:} ++ fi ++fi ++# some versions of readlink cannot be passed a regular file ++if [ -L "$p" ]; then ++ file=$(readlink -f "$p") || fatal "$p link does not exist." ++else ++ file="$p" ++fi ++ ++if [ "$command" = 'add' ] ++then ++ [ -e "$file" ] || { fatal $file does not exist; } ++ ++ FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id") ++ FRONTEND_UUID=$(xenstore_read "/local/domain/$FRONTEND_ID/vm") ++ mode=$(xenstore_read "$XENBUS_PATH/mode") ++ mode=$(canonicalise_mode "$mode") ++ ++ if [ "$mode" != '!' ] ++ then ++ result=$(check_blktap_sharing "$file" "$mode") ++ [ "$result" = 'ok' ] || ebusy "$file already in use by other domain" ++ fi ++ ++ success ++fi ++ ++exit 0 +diff --git a/tools/hotplug/Linux/vif2 b/tools/hotplug/Linux/vif2 +new file mode 100644 +index 0000000..247fa67 +--- /dev/null ++++ b/tools/hotplug/Linux/vif2 +@@ -0,0 +1,46 @@ ++#!/bin/bash ++ ++dir=$(dirname "$0") ++. "$dir/xen-hotplug-common.sh" ++. "$dir/xen-network-common.sh" ++ ++bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge") ++if [ -z "$bridge" ] ++ then ++ nr_bridges=$(($(brctl show | cut -f 1 | grep -v "^$" | wc -l) - 1)) ++ if [ "$nr_bridges" != 1 ] ++ then ++ fatal "no bridge specified, and don't know which one to use ($nr_bridges found)" ++ fi ++ bridge=$(brctl show | cut -d " ++" -f 2 | cut -f 1) ++fi ++ ++command="$1" ++shift ++ ++case "$command" in ++ "online") ++ if [ "$bridge" != "-" ] ++ then ++ setup_bridge_port "$vif" ++ add_to_bridge "$bridge" "$vif" ++ else ++ # Just let the normal udev rules for interfaces handle it. ++ true ++ fi ++ success ++ ;; ++ ++ "add") ++ success ++ ;; ++ ++ "remove") ++ ;; ++ ++ *) ++ echo "Unknown command: $command" ++ echo 'Valid commands are: add, remove, online' ++ exit 1 ++esac +diff --git a/tools/hotplug/Linux/xen-backend.rules b/tools/hotplug/Linux/xen-backend.rules +index fe21fc1..9dd88a8 100644 +--- a/tools/hotplug/Linux/xen-backend.rules ++++ b/tools/hotplug/Linux/xen-backend.rules +@@ -1,8 +1,9 @@ + SUBSYSTEM=="xen-backend", KERNEL=="tap*", RUN+="/etc/xen/scripts/blktap $env{ACTION}" + SUBSYSTEM=="xen-backend", KERNEL=="vbd*", RUN+="/etc/xen/scripts/block $env{ACTION}" + SUBSYSTEM=="xen-backend", KERNEL=="vtpm*", RUN+="/etc/xen/scripts/vtpm $env{ACTION}" +-SUBSYSTEM=="xen-backend", KERNEL=="vif*", ACTION=="online", RUN+="$env{script} online" +-SUBSYSTEM=="xen-backend", KERNEL=="vif*", ACTION=="offline", RUN+="$env{script} offline" ++SUBSYSTEM=="xen-backend", KERNEL=="vif2-*", RUN+="/etc/xen/scripts/vif2 $env{ACTION}" ++SUBSYSTEM=="xen-backend", KERNEL=="vif-*", ACTION=="online", RUN+="$env{script} online" ++SUBSYSTEM=="xen-backend", KERNEL=="vif-*", ACTION=="offline", RUN+="$env{script} offline" + SUBSYSTEM=="xen-backend", KERNEL=="vscsi*", RUN+="/etc/xen/scripts/vscsi $env{ACTION}" + SUBSYSTEM=="xen-backend", ACTION=="remove", RUN+="/etc/xen/scripts/xen-hotplug-cleanup" + KERNEL=="evtchn", NAME="xen/%k" +diff --git a/tools/libxc/xc_linux.c b/tools/libxc/xc_linux.c +index 2480b3c..c9be4f7 100644 +--- a/tools/libxc/xc_linux.c ++++ b/tools/libxc/xc_linux.c +@@ -562,6 +562,141 @@ int xc_gnttab_set_max_grants(int xcg_handle, + return 0; + } + ++int xc_gnttab_op(int xc_handle, int cmd, ++ void * op, int op_size, int count) ++{ ++ int ret = 0; ++ DECLARE_HYPERCALL; ++ ++ hypercall.op = __HYPERVISOR_grant_table_op; ++ hypercall.arg[0] = cmd; ++ hypercall.arg[1] = (unsigned long)op; ++ hypercall.arg[2] = count; ++ ++ if ( lock_pages(op, count* op_size) != 0 ) ++ { ++ PERROR("Could not lock memory for Xen hypercall"); ++ goto out1; ++ } ++ ++ ret = do_xen_hypercall(xc_handle, &hypercall); ++ ++ unlock_pages(op, count * op_size); ++ ++ out1: ++ return ret; ++} ++ ++int xc_gnttab_get_version(int xc_handle, int domid) ++{ ++ struct gnttab_get_version query; ++ int rc; ++ ++ query.dom = domid; ++ rc = xc_gnttab_op(xc_handle, GNTTABOP_get_version, ++ &query, sizeof(query), 1); ++ if (rc < 0) ++ return rc; ++ else ++ return query.version; ++} ++ ++static void *_gnttab_map_table(int xc_handle, int domid, int *gnt_num) ++{ ++ int rc, i; ++ struct gnttab_query_size query; ++ struct gnttab_setup_table setup; ++ unsigned long *frame_list = NULL; ++ xen_pfn_t *pfn_list = NULL; ++ struct grant_entry_v1 *gnt = NULL; ++ ++ if (!gnt_num) ++ return NULL; ++ ++ query.dom = domid; ++ rc = xc_gnttab_op(xc_handle, GNTTABOP_query_size, ++ &query, sizeof(query), 1); ++ ++ if (rc || (query.status != GNTST_okay) ) ++ { ++ ERROR("Could not query dom's grant size\n", domid); ++ return NULL; ++ } ++ ++ *gnt_num = query.nr_frames * ++ (PAGE_SIZE / sizeof(struct grant_entry_v1) ); ++ ++ frame_list = malloc(query.nr_frames * sizeof(unsigned long)); ++ if (!frame_list || lock_pages(frame_list, query.nr_frames * ++ sizeof(unsigned long))) ++ { ++ ERROR("Alloc/lock frame_list in xc_gnttab_map_table\n"); ++ if (frame_list) ++ free(frame_list); ++ return NULL; ++ } ++ ++ pfn_list = malloc(query.nr_frames * sizeof(xen_pfn_t)); ++ ++ if (!pfn_list) ++ { ++ ERROR("Could not lock pfn_list in xc_gnttab_map_table\n"); ++ goto err; ++ } ++ ++ setup.dom = domid; ++ setup.nr_frames = query.nr_frames; ++ set_xen_guest_handle(setup.frame_list, frame_list); ++ ++ /* XXX Any race with other setup_table hypercall? */ ++ rc = xc_gnttab_op(xc_handle, GNTTABOP_setup_table, ++ &setup, sizeof(setup), 1); ++ ++ if (rc ||( setup.status != GNTST_okay) ) ++ { ++ ERROR("Could not get grant table frame list\n"); ++ goto err; ++ } ++ ++ for (i = 0; i < setup.nr_frames; i++) ++ pfn_list[i] = frame_list[i]; ++ ++ gnt = xc_map_foreign_pages(xc_handle, domid, PROT_READ, ++ pfn_list, setup.nr_frames); ++ if (!gnt) ++ { ++ ERROR("Could not map grant table\n"); ++ goto err; ++ } ++ ++err: ++ if (frame_list) ++ { ++ unlock_pages(frame_list, query.nr_frames * sizeof(unsigned long)); ++ free(frame_list); ++ } ++ if (pfn_list) ++ free(pfn_list); ++ ++ return gnt; ++} ++ ++struct grant_entry_v1 *xc_gnttab_map_table_v1(int xc_handle, int domid, ++ int *gnt_num) ++{ ++ if (xc_gnttab_get_version(xc_handle, domid) == 2) ++ return NULL; ++ return _gnttab_map_table(xc_handle, domid, gnt_num); ++} ++ ++struct grant_entry_v2 *xc_gnttab_map_table_v2(int xc_handle, int domid, ++ int *gnt_num) ++{ ++ if (xc_gnttab_get_version(xc_handle, domid) != 2) ++ return NULL; ++ return _gnttab_map_table(xc_handle, domid, gnt_num); ++} ++ + /* + * Local variables: + * mode: C +diff --git a/tools/libxc/xc_offline_page.c b/tools/libxc/xc_offline_page.c +new file mode 100644 +index 0000000..21d26bd +--- /dev/null ++++ b/tools/libxc/xc_offline_page.c +@@ -0,0 +1,789 @@ ++/****************************************************************************** ++ * xc_offline_page.c ++ * ++ * Helper functions to offline/online one page ++ * ++ * Copyright (c) 2003, K A Fraser. ++ * Copyright (c) 2009, Intel Corporation. ++ */ ++ ++#include ++#include ++#include ++#include ++#include ++#include ++#include ++ ++#include "xc_private.h" ++#include "xc_dom.h" ++#include "xg_private.h" ++#include "xg_save_restore.h" ++ ++struct domain_mem_info{ ++ int domid; ++ unsigned int pt_level; ++ unsigned int guest_width; ++ uint32_t *pfn_type; ++ xen_pfn_t *p2m_table; ++ unsigned long p2m_size; ++ xen_pfn_t *m2p_table; ++ int max_mfn; ++}; ++ ++struct pte_backup_entry ++{ ++ xen_pfn_t table_mfn; ++ int offset; ++}; ++ ++#define DEFAULT_BACKUP_COUNT 1024 ++struct pte_backup ++{ ++ struct pte_backup_entry *entries; ++ int max; ++ int cur; ++}; ++ ++/* Global definition for some MACRO */ ++int guest_width, p2m_size; ++ ++int xc_mark_page_online(int xc, unsigned long start, ++ unsigned long end, uint32_t *status) ++{ ++ DECLARE_SYSCTL; ++ int ret = -1; ++ ++ if ( !status || (end < start) ) ++ return -EINVAL; ++ ++ if (lock_pages(status, sizeof(uint32_t)*(end - start + 1))) ++ { ++ ERROR("Could not lock memory for xc_mark_page_online\n"); ++ return -EINVAL; ++ } ++ ++ sysctl.cmd = XEN_SYSCTL_page_offline_op; ++ sysctl.u.page_offline.start = start; ++ sysctl.u.page_offline.cmd = sysctl_page_online; ++ sysctl.u.page_offline.end = end; ++ set_xen_guest_handle(sysctl.u.page_offline.status, status); ++ ret = xc_sysctl(xc, &sysctl); ++ ++ unlock_pages(status, sizeof(uint32_t)*(end - start + 1)); ++ ++ return ret; ++} ++ ++int xc_mark_page_offline(int xc, unsigned long start, ++ unsigned long end, uint32_t *status) ++{ ++ DECLARE_SYSCTL; ++ int ret = -1; ++ ++ if ( !status || (end < start) ) ++ return -EINVAL; ++ ++ if (lock_pages(status, sizeof(uint32_t)*(end - start + 1))) ++ { ++ ERROR("Could not lock memory for xc_mark_page_offline"); ++ return -EINVAL; ++ } ++ ++ sysctl.cmd = XEN_SYSCTL_page_offline_op; ++ sysctl.u.page_offline.start = start; ++ sysctl.u.page_offline.cmd = sysctl_page_offline; ++ sysctl.u.page_offline.end = end; ++ set_xen_guest_handle(sysctl.u.page_offline.status, status); ++ ret = xc_sysctl(xc, &sysctl); ++ ++ unlock_pages(status, sizeof(uint32_t)*(end - start + 1)); ++ ++ return ret; ++} ++ ++int xc_query_page_offline_status(int xc, unsigned long start, ++ unsigned long end, uint32_t *status) ++{ ++ DECLARE_SYSCTL; ++ int ret = -1; ++ ++ if ( !status || (end < start) ) ++ return -EINVAL; ++ ++ if (lock_pages(status, sizeof(uint32_t)*(end - start + 1))) ++ { ++ ERROR("Could not lock memory for xc_query_page_offline_status\n"); ++ return -EINVAL; ++ } ++ ++ sysctl.cmd = XEN_SYSCTL_page_offline_op; ++ sysctl.u.page_offline.start = start; ++ sysctl.u.page_offline.cmd = sysctl_query_page_offline; ++ sysctl.u.page_offline.end = end; ++ set_xen_guest_handle(sysctl.u.page_offline.status, status); ++ ret = xc_sysctl(xc, &sysctl); ++ ++ unlock_pages(status, sizeof(uint32_t)*(end - start + 1)); ++ ++ return ret; ++} ++ ++ /* ++ * There should no update to the grant when domain paused ++ */ ++static int xc_is_page_granted_v1(int xc_handle, xen_pfn_t gpfn, ++ struct grant_entry_v1 *gnttab, int gnt_num) ++{ ++ int i = 0; ++ ++ if (!gnttab) ++ return 0; ++ ++ for (i = 0; i < gnt_num; i++) ++ if ( ((gnttab[i].flags & GTF_type_mask) != GTF_invalid) && ++ (gnttab[i].frame == gpfn) ) ++ break; ++ ++ return (i != gnt_num); ++} ++ ++static int xc_is_page_granted_v2(int xc_handle, xen_pfn_t gpfn, ++ struct grant_entry_v2 *gnttab, int gnt_num) ++{ ++ int i = 0; ++ ++ if (!gnttab) ++ return 0; ++ ++ for (i = 0; i < gnt_num; i++) ++ if ( ((gnttab[i].hdr.flags & GTF_type_mask) != GTF_invalid) && ++ (gnttab[i].frame == gpfn) ) ++ break; ++ ++ return (i != gnt_num); ++} ++ ++static xen_pfn_t pfn_to_mfn(xen_pfn_t pfn, xen_pfn_t *p2m, int gwidth) ++{ ++ return ((xen_pfn_t) ((gwidth==8)? ++ (((uint64_t *)p2m)[(pfn)]): ++ ((((uint32_t *)p2m)[(pfn)]) == 0xffffffffU ? ++ (-1UL) : ++ (((uint32_t *)p2m)[(pfn)])))); ++} ++ ++static int get_pt_level(int xc_handle, uint32_t domid, ++ unsigned int *pt_level, ++ unsigned int *gwidth) ++{ ++ DECLARE_DOMCTL; ++ xen_capabilities_info_t xen_caps = ""; ++ ++ if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0) ++ return -1; ++ ++ memset(&domctl, 0, sizeof(domctl)); ++ domctl.domain = domid; ++ domctl.cmd = XEN_DOMCTL_get_address_size; ++ ++ if ( do_domctl(xc_handle, &domctl) != 0 ) ++ return -1; ++ ++ *gwidth = domctl.u.address_size.size / 8; ++ ++ if (strstr(xen_caps, "xen-3.0-x86_64")) ++ /* Depends on whether it's a compat 32-on-64 guest */ ++ *pt_level = ( (*gwidth == 8) ? 4 : 3 ); ++ else if (strstr(xen_caps, "xen-3.0-x86_32p")) ++ *pt_level = 3; ++ else if (strstr(xen_caps, "xen-3.0-x86_32")) ++ *pt_level = 2; ++ else ++ return -1; ++ ++ return 0; ++} ++ ++static int close_mem_info(int xc_handle, struct domain_mem_info *minfo) ++{ ++ if (minfo->pfn_type) ++ free(minfo->pfn_type); ++ munmap(minfo->m2p_table, M2P_SIZE(minfo->max_mfn)); ++ munmap(minfo->p2m_table, P2M_FLL_ENTRIES * PAGE_SIZE); ++ minfo->p2m_table = minfo->m2p_table = NULL; ++ ++ return 0; ++} ++ ++static int init_mem_info(int xc_handle, int domid, ++ struct domain_mem_info *minfo, ++ xc_dominfo_t *info) ++{ ++ uint64_aligned_t shared_info_frame; ++ shared_info_any_t *live_shinfo = NULL; ++ int i, rc; ++ ++ /* Only be initialized once */ ++ if (minfo->pfn_type || minfo->m2p_table || minfo->p2m_table) ++ return -EINVAL; ++ ++ if ( get_pt_level(xc_handle, domid, &minfo->pt_level, ++ &minfo->guest_width) ) ++ { ++ ERROR("Unable to get PT level info."); ++ return -EFAULT; ++ } ++ guest_width = minfo->guest_width; ++ ++ shared_info_frame = info->shared_info_frame; ++ ++ live_shinfo = xc_map_foreign_range(xc_handle, domid, ++ PAGE_SIZE, PROT_READ, shared_info_frame); ++ if ( !live_shinfo ) ++ { ++ ERROR("Couldn't map live_shinfo"); ++ return -EFAULT; ++ } ++ ++ if ( (rc = xc_core_arch_map_p2m_writable(xc_handle, minfo->guest_width, ++ info, live_shinfo, &minfo->p2m_table, &minfo->p2m_size)) ) ++ { ++ ERROR("Couldn't map p2m table %x\n", rc); ++ goto failed; ++ } ++ munmap(live_shinfo, PAGE_SIZE); ++ live_shinfo = NULL; ++ ++ p2m_size = minfo->p2m_size; ++ ++ minfo->max_mfn = xc_memory_op(xc_handle, XENMEM_maximum_ram_page, NULL); ++ if ( !(minfo->m2p_table = ++ xc_map_m2p(xc_handle, minfo->max_mfn, PROT_READ, NULL)) ) ++ { ++ ERROR("Failed to map live M2P table"); ++ goto failed; ++ } ++ ++ /* Get pfn type */ ++ minfo->pfn_type = malloc(sizeof(uint32_t) * minfo->p2m_size); ++ if (!minfo->pfn_type) ++ { ++ ERROR("Failed to malloc pfn_type\n"); ++ goto failed; ++ } ++ memset(minfo->pfn_type, 0, sizeof(uint32_t) * minfo->p2m_size); ++ ++ for (i = 0; i < minfo->p2m_size; i++) ++ minfo->pfn_type[i] = pfn_to_mfn(i, minfo->p2m_table, ++ minfo->guest_width); ++ ++ if ( lock_pages(minfo->pfn_type, minfo->p2m_size * sizeof(uint32_t)) ) ++ { ++ ERROR("Unable to lock pfn_type array"); ++ goto failed; ++ } ++ ++ for (i = 0; i < minfo->p2m_size ; i+=1024) ++ { ++ int count = ((p2m_size - i ) > 1024 ) ? 1024: (p2m_size - i); ++ if ( ( rc = xc_get_pfn_type_batch(xc_handle, domid, count, ++ minfo->pfn_type + i)) ) ++ { ++ ERROR("Failed to get pfn_type %x\n", rc); ++ goto unlock; ++ } ++ } ++ return 0; ++ ++unlock: ++ unlock_pages(minfo->pfn_type, minfo->p2m_size * sizeof(uint32_t)); ++failed: ++ if (minfo->pfn_type) ++ { ++ minfo->pfn_type = NULL; ++ free(minfo->pfn_type); ++ } ++ if (live_shinfo) ++ munmap(live_shinfo, PAGE_SIZE); ++ munmap(minfo->m2p_table, M2P_SIZE(minfo->max_mfn)); ++ munmap(minfo->p2m_table, P2M_FLL_ENTRIES * PAGE_SIZE); ++ minfo->p2m_table = minfo->m2p_table = NULL; ++ ++ return -1; ++} ++ ++static int backup_ptes(xen_pfn_t table_mfn, int offset, ++ struct pte_backup *backup) ++{ ++ if (!backup) ++ return -EINVAL; ++ ++ if (backup->max == backup->cur) ++ { ++ backup->entries = realloc(backup->entries, ++ backup->max * 2 * sizeof(struct pte_backup_entry)); ++ if (backup->entries == NULL) ++ return -1; ++ else ++ backup->max *= 2; ++ } ++ ++ backup->entries[backup->cur].table_mfn = table_mfn; ++ backup->entries[backup->cur++].offset = offset; ++ ++ return 0; ++} ++ ++/* ++ * return: ++ * 1 when MMU update is required ++ * 0 when no changes ++ * <0 when error happen ++ */ ++typedef int (*pte_func)(uint64_t pte, uint64_t *new_pte, ++ unsigned long table_mfn, int table_offset, ++ struct pte_backup *backup, ++ unsigned long no_use); ++ ++static int __clear_pte(uint64_t pte, uint64_t *new_pte, ++ unsigned long table_mfn, int table_offset, ++ struct pte_backup *backup, ++ unsigned long mfn) ++{ ++ /* If no new_pte pointer, same as no changes needed */ ++ if (!new_pte || !backup) ++ return -EINVAL; ++ ++ if ( !(pte & _PAGE_PRESENT)) ++ return 0; ++ ++ /* XXX Check for PSE bit here */ ++ /* Hit one entry */ ++ if ( ((pte >> PAGE_SHIFT_X86) & MFN_MASK_X86) == mfn) ++ { ++ *new_pte = pte & ~_PAGE_PRESENT; ++ if (!backup_ptes(table_mfn, table_offset, backup)) ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int __update_pte(uint64_t pte, uint64_t *new_pte, ++ unsigned long table_mfn, int table_offset, ++ struct pte_backup *backup, ++ unsigned long new_mfn) ++{ ++ int index; ++ ++ if (!new_pte) ++ return 0; ++ ++ for (index = 0; index < backup->cur; index ++) ++ if ( (backup->entries[index].table_mfn == table_mfn) && ++ (backup->entries[index].offset == table_offset) ) ++ break; ++ ++ if (index != backup->cur) ++ { ++ if (pte & _PAGE_PRESENT) ++ ERROR("Page present while in backup ptes\n"); ++ pte &= ~MFN_MASK_X86; ++ pte |= (new_mfn << PAGE_SHIFT_X86) | _PAGE_PRESENT; ++ *new_pte = pte; ++ return 1; ++ } ++ ++ return 0; ++} ++ ++static int change_pte(int xc_handle, int domid, ++ struct domain_mem_info *minfo, ++ struct pte_backup *backup, ++ struct xc_mmu *mmu, ++ pte_func func, ++ unsigned long data) ++{ ++ int pte_num, rc; ++ uint64_t i; ++ void *content = NULL; ++ ++ pte_num = PAGE_SIZE / ((minfo->pt_level == 2) ? 4 : 8); ++ ++ for (i = 0; i < minfo->p2m_size; i++) ++ { ++ xen_pfn_t table_mfn = pfn_to_mfn(i, minfo->p2m_table, ++ minfo->guest_width); ++ uint64_t pte, new_pte; ++ int j; ++ ++ if ( table_mfn == INVALID_P2M_ENTRY ) ++ continue; ++ ++ if ( minfo->pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) ++ { ++ content = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, ++ PROT_READ, table_mfn); ++ if (!content) ++ goto failed; ++ ++ for (j = 0; j < pte_num; j++) ++ { ++ if ( minfo->pt_level == 2 ) ++ pte = ((const uint32_t*)content)[j]; ++ else ++ pte = ((const uint64_t*)content)[j]; ++ ++ rc = func(pte, &new_pte, table_mfn, j, backup, data); ++ ++ switch (rc) ++ { ++ case 1: ++ if ( xc_add_mmu_update(xc_handle, mmu, ++ table_mfn << PAGE_SHIFT | ++ j * ( (minfo->pt_level == 2) ? ++ sizeof(uint32_t): sizeof(uint64_t)) | ++ MMU_PT_UPDATE_PRESERVE_AD, ++ new_pte) ) ++ goto failed; ++ break; ++ ++ case 0: ++ break; ++ ++ default: ++ goto failed; ++ } ++ } ++ } ++ ++ munmap(content, PAGE_SIZE); ++ content = NULL; ++ } ++ ++ if ( xc_flush_mmu_updates(xc_handle, mmu) ) ++ goto failed; ++ ++ return 0; ++failed: ++ /* XXX Shall we take action if we have fail to swap? */ ++ if (content) ++ munmap(content, PAGE_SIZE); ++ ++ return -1; ++} ++ ++static int update_pte(int xc_handle, int domid, ++ struct domain_mem_info *minfo, ++ struct pte_backup *backup, ++ struct xc_mmu *mmu, ++ unsigned long new_mfn) ++{ ++ return change_pte(xc_handle, domid, minfo, backup, mmu, ++ __update_pte, new_mfn); ++} ++ ++static int clear_pte(int xc_handle, int domid, ++ struct domain_mem_info *minfo, ++ struct pte_backup *backup, ++ struct xc_mmu *mmu, ++ xen_pfn_t mfn) ++{ ++ return change_pte(xc_handle, domid, minfo, backup, mmu, ++ __clear_pte, mfn); ++} ++ ++static int exchange_page(int xc_handle, xen_pfn_t mfn, ++ xen_pfn_t *new_mfn, int domid) ++{ ++ int rc; ++ xen_pfn_t out_mfn; ++ ++ struct xen_memory_exchange exchange = { ++ .in = { ++ .nr_extents = 1, ++ .extent_order = 0, ++ .domid = domid ++ }, ++ .out = { ++ .nr_extents = 1, ++ .extent_order = 0, ++ .domid = domid ++ } ++ }; ++ set_xen_guest_handle(exchange.in.extent_start, &mfn); ++ set_xen_guest_handle(exchange.out.extent_start, &out_mfn); ++ ++ rc = xc_memory_op(xc_handle, XENMEM_exchange, &exchange); ++ ++ if (!rc) ++ *new_mfn = out_mfn; ++ ++ return rc; ++} ++ ++/* ++ * Check if a page can be exchanged successfully ++ */ ++ ++static int is_page_exchangable(int xc_handle, int domid, xen_pfn_t mfn, ++ xc_dominfo_t *info) ++{ ++ uint32_t status; ++ int rc; ++ ++ /* domain checking */ ++ if ( !domid || (domid > DOMID_FIRST_RESERVED) ) ++ { ++ DPRINTF("Dom0's page can't be LM"); ++ return 0; ++ } ++ if (info->hvm) ++ { ++ DPRINTF("Currently we can only live change PV guest's page\n"); ++ return 0; ++ } ++ ++ /* Check if pages are offline pending or not */ ++ rc = xc_query_page_offline_status(xc_handle, mfn, mfn, &status); ++ ++ if ( rc || !(status & PG_OFFLINE_STATUS_OFFLINE_PENDING) ) ++ { ++ ERROR("Page %lx is not offline pending %x\n", ++ mfn, status); ++ return 0; ++ } ++ ++ return 1; ++} ++ ++/* The domain should be suspended when called here */ ++int xc_exchange_page(int xc_handle, int domid, xen_pfn_t mfn) ++{ ++ xc_dominfo_t info; ++ struct domain_mem_info minfo; ++ struct xc_mmu *mmu = NULL; ++ struct pte_backup old_ptes = {NULL, 0, 0}; ++ struct grant_entry_v1 *gnttab_v1 = NULL; ++ struct grant_entry_v2 *gnttab_v2 = NULL; ++ struct mmuext_op mops; ++ int gnt_num, unpined = 0; ++ void *old_p, *backup = NULL; ++ int rc, result = -1; ++ uint32_t status; ++ xen_pfn_t new_mfn, gpfn; ++ ++ if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 ) ++ { ++ ERROR("Could not get domain info"); ++ return -EFAULT; ++ } ++ ++ if (!info.shutdown || info.shutdown_reason != SHUTDOWN_suspend) ++ { ++ ERROR("Can't exchange page unless domain is suspended\n"); ++ return -EINVAL; ++ } ++ ++ if (!is_page_exchangable(xc_handle, domid, mfn, &info)) ++ { ++ ERROR("Could not exchange page\n"); ++ return -EINVAL; ++ } ++ ++ /* Get domain's memory information */ ++ memset(&minfo, 0, sizeof(minfo)); ++ init_mem_info(xc_handle, domid, &minfo, &info); ++ gpfn = minfo.m2p_table[mfn]; ++ ++ /* Don't exchange CR3 for PAE guest in PAE host environment */ ++ if (minfo.guest_width > sizeof(long)) ++ { ++ if ( (minfo.pfn_type[gpfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) == ++ XEN_DOMCTL_PFINFO_L3TAB ) ++ goto failed; ++ } ++ ++ gnttab_v2 = xc_gnttab_map_table_v2(xc_handle, domid, &gnt_num); ++ if (!gnttab_v2) ++ { ++ gnttab_v1 = xc_gnttab_map_table_v1(xc_handle, domid, &gnt_num); ++ if (!gnttab_v1) ++ { ++ ERROR("Failed to map grant table\n"); ++ goto failed; ++ } ++ } ++ ++ if (gnttab_v1 ++ ? xc_is_page_granted_v1(xc_handle, mfn, gnttab_v1, gnt_num) ++ : xc_is_page_granted_v2(xc_handle, mfn, gnttab_v2, gnt_num)) ++ { ++ ERROR("Page %lx is granted now\n", mfn); ++ goto failed; ++ } ++ ++ /* allocate required data structure */ ++ backup = malloc(PAGE_SIZE); ++ if (!backup) ++ { ++ ERROR("Failed to allocate backup pages pointer\n"); ++ goto failed; ++ } ++ ++ old_ptes.max = DEFAULT_BACKUP_COUNT; ++ old_ptes.entries = malloc(sizeof(struct pte_backup_entry) * ++ DEFAULT_BACKUP_COUNT); ++ ++ if (!old_ptes.entries) ++ { ++ ERROR("Faield to allocate backup\n"); ++ goto failed; ++ } ++ old_ptes.cur = 0; ++ ++ /* Unpin the page if it is pined */ ++ if (minfo.pfn_type[gpfn] & XEN_DOMCTL_PFINFO_LPINTAB) ++ { ++ mops.cmd = MMUEXT_UNPIN_TABLE; ++ mops.arg1.mfn = mfn; ++ ++ if ( xc_mmuext_op(xc_handle, &mops, 1, domid) < 0 ) ++ { ++ ERROR("Failed to unpin page %lx", mfn); ++ goto failed; ++ } ++ mops.arg1.mfn = mfn; ++ unpined = 1; ++ } ++ ++ /* backup the content */ ++ old_p = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, ++ PROT_READ, mfn); ++ if (!old_p) ++ { ++ ERROR("Failed to map foreign page %lx\n", mfn); ++ goto failed; ++ } ++ ++ memcpy(backup, old_p, PAGE_SIZE); ++ munmap(old_p, PAGE_SIZE); ++ ++ mmu = xc_alloc_mmu_updates(xc_handle, domid); ++ if ( mmu == NULL ) ++ { ++ ERROR("%s: failed at %d\n", __FUNCTION__, __LINE__); ++ goto failed; ++ } ++ ++ /* Firstly update all pte to be invalid to remove the reference */ ++ rc = clear_pte(xc_handle, domid, &minfo, &old_ptes, mmu, mfn); ++ ++ if (rc) ++ { ++ ERROR("clear pte failed\n"); ++ goto failed; ++ } ++ ++ rc = exchange_page(xc_handle, mfn, &new_mfn, domid); ++ ++ if (rc) ++ { ++ ERROR("Exchange the page failed\n"); ++ /* Exchange fail means there are refere to the page still */ ++ rc = update_pte(xc_handle, domid, &minfo, &old_ptes, mmu, mfn); ++ if (rc) ++ result = -2; ++ goto failed; ++ } ++ ++ rc = update_pte(xc_handle, domid, &minfo, &old_ptes, mmu, new_mfn); ++ ++ if (rc) ++ { ++ ERROR("update pte failed guest may be broken now\n"); ++ /* No recover action now for swap fail */ ++ result = -2; ++ goto failed; ++ } ++ ++ /* Check if pages are offlined already */ ++ rc = xc_query_page_offline_status(xc_handle, mfn, mfn, ++ &status); ++ ++ if (rc) ++ { ++ ERROR("Fail to query offline status\n"); ++ }else if ( !(status & PG_OFFLINE_STATUS_OFFLINED) ) ++ { ++ ERROR("page is still online or pending\n"); ++ goto failed; ++ } ++ else ++ { ++ void *new_p; ++ IPRINTF("Now page is offlined %lx\n", mfn); ++ /* Update the p2m table */ ++ minfo.p2m_table[gpfn] = new_mfn; ++ ++ new_p = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE, ++ PROT_READ|PROT_WRITE, new_mfn); ++ memcpy(new_p, backup, PAGE_SIZE); ++ munmap(new_p, PAGE_SIZE); ++ mops.arg1.mfn = new_mfn; ++ result = 0; ++ } ++ ++failed: ++ ++ if (unpined && (minfo.pfn_type[mfn] & XEN_DOMCTL_PFINFO_LPINTAB)) ++ { ++ switch ( minfo.pfn_type[mfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK ) ++ { ++ case XEN_DOMCTL_PFINFO_L1TAB: ++ mops.cmd = MMUEXT_PIN_L1_TABLE; ++ break; ++ ++ case XEN_DOMCTL_PFINFO_L2TAB: ++ mops.cmd = MMUEXT_PIN_L2_TABLE; ++ break; ++ ++ case XEN_DOMCTL_PFINFO_L3TAB: ++ mops.cmd = MMUEXT_PIN_L3_TABLE; ++ break; ++ ++ case XEN_DOMCTL_PFINFO_L4TAB: ++ mops.cmd = MMUEXT_PIN_L4_TABLE; ++ break; ++ ++ default: ++ ERROR("Unpined for non pate table page\n"); ++ break; ++ } ++ ++ if ( xc_mmuext_op(xc_handle, &mops, 1, domid) < 0 ) ++ { ++ ERROR("failed to pin the mfn again\n"); ++ result = -2; ++ } ++ } ++ ++ if (mmu) ++ free(mmu); ++ ++ if (old_ptes.entries) ++ free(old_ptes.entries); ++ ++ if (backup) ++ free(backup); ++ ++ if (gnttab_v1) ++ munmap(gnttab_v1, gnt_num / (PAGE_SIZE/sizeof(struct grant_entry_v1))); ++ if (gnttab_v2) ++ munmap(gnttab_v2, gnt_num / (PAGE_SIZE/sizeof(struct grant_entry_v2))); ++ ++ close_mem_info(xc_handle, &minfo); ++ ++ return result; ++} +diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h +index c9b1866..10ab1ea 100644 +--- a/tools/libxc/xenctrl.h ++++ b/tools/libxc/xenctrl.h +@@ -937,6 +937,13 @@ int xc_gnttab_munmap(int xcg_handle, + int xc_gnttab_set_max_grants(int xcg_handle, + uint32_t count); + ++int xc_gnttab_op(int xc_handle, int cmd, ++ void * op, int op_size, int count); ++ ++int xc_gnttab_get_version(int xc_handle, int domid); ++struct grant_entry_v1 *xc_gnttab_map_table_v1(int xc_handle, int domid, int *gnt_num); ++struct grant_entry_v2 *xc_gnttab_map_table_v2(int xc_handle, int domid, int *gnt_num); ++ + int xc_physdev_map_pirq(int xc_handle, + int domid, + int index, +diff --git a/tools/python/xen/xend/XendDevices.py b/tools/python/xen/xend/XendDevices.py +index 4463842..5350781 100644 +--- a/tools/python/xen/xend/XendDevices.py ++++ b/tools/python/xen/xend/XendDevices.py +@@ -19,8 +19,8 @@ + # A collection of DevControllers + # + +-from xen.xend.server import blkif, netif, tpmif, pciif, iopif, irqif, vfbif, vscsiif +-from xen.xend.server.BlktapController import BlktapController ++from xen.xend.server import blkif, netif, tpmif, pciif, iopif, irqif, vfbif, vscsiif, netif2 ++from xen.xend.server.BlktapController import BlktapController, Blktap2Controller + from xen.xend.server.ConsoleController import ConsoleController + + +@@ -37,6 +37,7 @@ class XendDevices: + controllers = { + 'vbd': blkif.BlkifController, + 'vif': netif.NetifController, ++ 'vif2': netif2.NetifController2, + 'vtpm': tpmif.TPMifController, + 'pci': pciif.PciController, + 'ioports': iopif.IOPortsController, +diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py +index 4b74189..4edb5ad 100644 +--- a/tools/python/xen/xend/XendDomainInfo.py ++++ b/tools/python/xen/xend/XendDomainInfo.py +@@ -1162,7 +1162,7 @@ class XendDomainInfo: + break + self._waitForDevice_destroy(deviceClass, devid, backend) + +- if rm_cfg: ++ if rm_cfg and deviceClass != "vif2": + if deviceClass == 'vif': + if self.domid is not None: + for dev_num, dev_info in sxprs: +diff --git a/tools/python/xen/xend/server/netif2.py b/tools/python/xen/xend/server/netif2.py +new file mode 100644 +index 0000000..a098c13 +--- /dev/null ++++ b/tools/python/xen/xend/server/netif2.py +@@ -0,0 +1,163 @@ ++#============================================================================ ++# This library is free software; you can redistribute it and/or ++# modify it under the terms of version 2.1 of the GNU Lesser General Public ++# License as published by the Free Software Foundation. ++# ++# This library is distributed in the hope that it will be useful, ++# but WITHOUT ANY WARRANTY; without even the implied warranty of ++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ++# Lesser General Public License for more details. ++# ++# You should have received a copy of the GNU Lesser General Public ++# License along with this library; if not, write to the Free Software ++# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA ++#============================================================================ ++# Copyright (C) 2004, 2005 Mike Wray ++# Copyright (C) 2005 XenSource Ltd ++# Copyright (C) 2008 Citrix Systems Inc. ++#============================================================================ ++# ++# Based closely on netif.py. ++# ++ ++"""Support for virtual network interfaces, version 2. ++""" ++ ++import os ++import random ++import re ++import time ++ ++from xen.xend import XendOptions ++from xen.xend.server.DevController import DevController ++from xen.xend.XendError import VmError ++from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance ++from xen.xend.xenstore.xstransact import xstransact ++import xen.util.xsm.xsm as security ++ ++from xen.xend.XendLogging import log ++ ++xoptions = XendOptions.instance() ++ ++def randomMAC(): ++ """Generate a random MAC address. ++ ++ Uses OUI (Organizationally Unique Identifier) 00-16-3E, allocated to ++ Xensource, Inc. The OUI list is available at ++ http://standards.ieee.org/regauth/oui/oui.txt. ++ ++ The remaining 3 fields are random, with the first bit of the first ++ random field set 0. ++ ++ @return: MAC address string ++ """ ++ mac = [ 0x00, 0x16, 0x3e, ++ random.randint(0x00, 0x7f), ++ random.randint(0x00, 0xff), ++ random.randint(0x00, 0xff) ] ++ return ':'.join(map(lambda x: "%02x" % x, mac)) ++ ++class NetifController2(DevController): ++ def __init__(self, vm): ++ DevController.__init__(self, vm) ++ ++ def getDeviceDetails(self, config): ++ """@see DevController.getDeviceDetails""" ++ ++ devid = self.allocateDeviceID() ++ ++ bridge = config.get('bridge') ++ back_mac = config.get('back_mac') ++ if not back_mac: ++ if bridge: ++ back_mac = "fe:ff:ff:ff:ff:ff" ++ else: ++ back_mac = randomMAC() ++ front_mac = config.get('front_mac') or randomMAC() ++ front_trust = config.get("trusted") or "0" ++ back_trust = config.get("back_trusted") or "1" ++ max_bypasses = config.get("max_bypasses") or "5" ++ pdev = config.get('pdev') ++ front_filter = config.get("front_filter_mac") ++ if front_filter == None: ++ if back_trust == "0": ++ front_filter = "1" ++ else: ++ front_filter = "0" ++ back_filter = config.get("filter_mac") ++ if back_filter == None: ++ if front_trust == "0": ++ back_filter = "1" ++ else: ++ back_filter = "0" ++ back = { 'mac': back_mac, 'remote-mac': front_mac, ++ 'handle': "%i" % devid, 'local-trusted': back_trust, ++ 'remote-trusted': front_trust, 'filter-mac': back_filter, ++ 'max-bypasses': max_bypasses } ++ ++ front = { 'mac': front_mac, 'remote-mac': back_mac, ++ 'local-trusted': front_trust, 'remote-trusted': back_trust, ++ 'filter-mac': front_filter } ++ ++ if bridge: ++ back['bridge'] = bridge ++ ++ if pdev: ++ back['pdev'] = pdev ++ ++ return (devid, back, front) ++ ++ def getDeviceConfiguration(self, devid, transaction = None): ++ """@see DevController.configuration""" ++ ++ if transaction is None: ++ read_fn = xstransact.Read ++ else: ++ read_fn = transaction.read ++ def front_read(x): ++ return read_fn(frontpath + x) ++ def back_read(x): ++ return read_fn(backpath + x) ++ ++ result = DevController.getDeviceConfiguration(self, devid, transaction) ++ ++ dev = self.convertToDeviceNumber(devid) ++ frontpath = self.frontendPath(dev) + "/" ++ ++ backpath = front_read("backend") + "/" ++ ++ front_mac = front_read("mac") ++ back_mac = back_read("mac") ++ ++ front_trusted = back_read("remote-trusted") ++ back_trusted = back_read("local-trusted") ++ max_bypasses = back_read("max-bypasses") ++ ++ bridge = back_read("bridge") ++ ++ pdev = back_read("pdev") ++ ++ if front_mac: ++ result["front_mac"] = front_mac ++ if back_mac: ++ result["back_mac"] = back_mac ++ if front_trusted: ++ result["front_trusted"] = front_trusted ++ if back_trusted: ++ result["back_trusted"] = back_trusted ++ if bridge: ++ result["bridge"] = bridge ++ if pdev: ++ result["pdev"] = pdev ++ if max_bypasses: ++ result["max-bypasses"] = max_bypasses ++ return result ++ ++ def destroyDevice(self, devid, force): ++ dev = self.convertToDeviceNumber(devid) ++ self.writeBackend(dev, "online", "0") ++ if force: ++ self.writeBackend(dev, "shutdown-request", "force") ++ else: ++ self.writeBackend(dev, "shutdown-request", "normal") ++ self.vm._removeVm("device/%s/%d" % (self.deviceClass, dev)) +diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py +index 78601fe..b07590c 100644 +--- a/tools/python/xen/xm/create.py ++++ b/tools/python/xen/xm/create.py +@@ -386,6 +386,12 @@ gopts.var('vif', val="type=TYPE,mac=MAC,bridge=BRIDGE,ip=IPADDR,script=SCRIPT," + This option may be repeated to add more than one vif. + Specifying vifs will increase the number of interfaces as needed.""") + ++gopts.var('vif2', val="front_mac=MAC,back_mac=MAC,backend=DOM,pdev=PDEV,max_bypasses=N,bridge=BRIDGE,filter_mac=<0|1>,front_filter_mac=<0|1>", ++ fn=append_value, default=[], ++ use="""Add a netchannel2 network interface using given front ++ and backend MAC addresses. Randomly generated ++ addresses will be used if either address is missing.""") ++ + gopts.var('vtpm', val="instance=INSTANCE,backend=DOM,type=TYPE", + fn=append_value, default=[], + use="""Add a TPM interface. On the backend side use the given +@@ -895,6 +901,8 @@ def configure_vifs(config_devs, vals): + + vifs = vals.vif + vifs_n = len(vifs) ++ vifs2 = vals.vif2 ++ vifs2_n = len(vifs2) + + if hasattr(vals, 'nics'): + if vals.nics > 0: +@@ -921,6 +929,18 @@ def configure_vifs(config_devs, vals): + map(f, d.keys()) + config_devs.append(['device', config_vif]) + ++ for c in vifs2: ++ d = comma_sep_kv_to_dict(c) ++ config_vif = ['vif2'] ++ ++ for k in d.keys(): ++ if k not in ['front_mac', 'back_mac', 'backend', 'trusted', ++ 'back_trusted', 'front_filter_mac', 'filter_mac', ++ 'bridge', 'pdev', 'max_bypasses' ]: ++ err('Invalid vif2 option: ' + k) ++ config_vif.append([k, d[k]]) ++ config_devs.append(['device', config_vif]) ++ + + def configure_hvm(config_image, vals): + """Create the config for HVM devices. +diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py +index a460bc8..a1779f7 100644 +--- a/tools/python/xen/xm/main.py ++++ b/tools/python/xen/xm/main.py +@@ -180,6 +180,15 @@ SUBCOMMAND_HELP = { + 'Destroy a domain\'s virtual network device.'), + 'network-list' : (' [--long]', + 'List virtual network interfaces for a domain.'), ++ 'network2-attach': (' [front_mac=] [back_mac=] ' ++ '[backend=] [trusted=<0|1>] ' ++ '[back_trusted=<0|1>] [bridge=] ' ++ '[max_bypasses=n]' ++ 'Create a new version 2 virtual network device.'), ++ 'network2-detach': (' [-f|--force]', ++ 'Destroy a domain\'s version 2 virtual network device.'), ++ 'network2-list' : (' [--long]', ++ 'List version 2 virtual network interfaces for a domain.'), + 'vnet-create' : ('','Create a vnet from ConfigFile.'), + 'vnet-delete' : ('', 'Delete a Vnet.'), + 'vnet-list' : ('[-l|--long]', 'List Vnets.'), +@@ -367,6 +376,9 @@ device_commands = [ + "network-attach", + "network-detach", + "network-list", ++ "network2-attach", ++ "network2-detach", ++ "network2-list", + "vtpm-list", + "pci-attach", + "pci-detach", +@@ -2358,6 +2370,35 @@ def xm_block_configure(args): + server.xend.domain.device_configure(dom, vbd) + + ++def xm_network2_attach(args): ++ xenapi_unsupported() ++ arg_check(args, 'network2-attach', 1, 4) ++ dom = args[0] ++ vif = ['vif2'] ++ vif_params = ['front_mac', 'back_mac', 'backend', 'trusted', ++ 'back_trusted', "front_filter_mac", "filter_mac", ++ 'bridge', 'pdev', "max_bypasses" ] ++ for a in args[1:]: ++ vif_param = a.split("=") ++ if len(vif_param) != 2 or vif_param[1] == "" or \ ++ vif_param[0] not in vif_params: ++ err("Invalid argument: %s" % a) ++ usage("network2-attach") ++ vif.append(vif_param) ++ server.xend.domain.device_create(dom, vif) ++ ++def xm_network2_detach(args): ++ xenapi_unsupported() ++ arg_check(args, "network2-detch", 2, 3) ++ detach(args, "vif2") ++ ++def xm_network2_list(args): ++ xenapi_unsupported() ++ (use_long, params) = arg_check_for_resource_list(args, "network2-list") ++ dom = params[0] ++ devs = server.xend.domain.getDeviceSxprs(dom, 'vif2') ++ map(PrettyPrint.prettyprint, devs) ++ + def xm_network_attach(args): + arg_check(args, 'network-attach', 1, 11) + +@@ -2884,6 +2925,9 @@ commands = { + "network-attach": xm_network_attach, + "network-detach": xm_network_detach, + "network-list": xm_network_list, ++ "network2-attach": xm_network2_attach, ++ "network2-detach": xm_network2_detach, ++ "network2-list": xm_network2_list, + # network (as in XenAPI) + "network-new": xm_network_new, + "network-del": xm_network_del, +diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c +index 42652ca..1c7bbb8 100644 +--- a/xen/arch/x86/hvm/hvm.c ++++ b/xen/arch/x86/hvm/hvm.c +@@ -1926,10 +1926,26 @@ enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack) + return hvm_intblk_none; + } + ++static int grant_table_op_is_allowed(unsigned int cmd) ++{ ++ switch (cmd) { ++ case GNTTABOP_query_size: ++ case GNTTABOP_setup_table: ++ case GNTTABOP_set_version: ++ case GNTTABOP_copy: ++ case GNTTABOP_map_grant_ref: ++ case GNTTABOP_unmap_grant_ref: ++ return 1; ++ default: ++ /* all other commands need auditing */ ++ return 0; ++ } ++} ++ + static long hvm_grant_table_op( + unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count) + { +- if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) ) ++ if ( !grant_table_op_is_allowed(cmd) ) + return -ENOSYS; /* all other commands need auditing */ + return do_grant_table_op(cmd, uop, count); + } +@@ -1981,6 +1997,18 @@ static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = { + + #else /* defined(__x86_64__) */ + ++<<<<<<< HEAD ++======= ++static long hvm_grant_table_op_compat32(unsigned int cmd, ++ XEN_GUEST_HANDLE(void) uop, ++ unsigned int count) ++{ ++ if ( !grant_table_op_is_allowed(cmd) ) ++ return -ENOSYS; ++ return compat_grant_table_op(cmd, uop, count); ++} ++ ++>>>>>>> 5dfa1be... Introduce a grant_entry_v2 structure. + static long hvm_memory_op_compat32(int cmd, XEN_GUEST_HANDLE(void) arg) + { + long rc = compat_memory_op(cmd, arg); +diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c +index 0d6d5ee..067e136 100644 +--- a/xen/arch/x86/mm.c ++++ b/xen/arch/x86/mm.c +@@ -3882,12 +3882,25 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg) + case XENMAPSPACE_grant_table: + spin_lock(&d->grant_table->lock); + +- if ( (xatp.idx >= nr_grant_frames(d->grant_table)) && +- (xatp.idx < max_nr_grant_frames) ) +- gnttab_grow_table(d, xatp.idx + 1); ++ if ( d->grant_table->gt_version == 0 ) ++ d->grant_table->gt_version = 1; + +- if ( xatp.idx < nr_grant_frames(d->grant_table) ) +- mfn = virt_to_mfn(d->grant_table->shared[xatp.idx]); ++ if ( d->grant_table->gt_version == 2 && ++ (xatp.idx & XENMAPIDX_grant_table_status) ) ++ { ++ xatp.idx &= ~XENMAPIDX_grant_table_status; ++ if ( xatp.idx < nr_status_frames(d->grant_table) ) ++ mfn = virt_to_mfn(d->grant_table->status[xatp.idx]); ++ } ++ else ++ { ++ if ( (xatp.idx >= nr_grant_frames(d->grant_table)) && ++ (xatp.idx < max_nr_grant_frames) ) ++ gnttab_grow_table(d, xatp.idx + 1); ++ ++ if ( xatp.idx < nr_grant_frames(d->grant_table) ) ++ mfn = virt_to_mfn(d->grant_table->shared_raw[xatp.idx]); ++ } + + spin_unlock(&d->grant_table->lock); + break; +diff --git a/xen/common/compat/grant_table.c b/xen/common/compat/grant_table.c +index 5f0dc2d..cd4f1cd 100644 +--- a/xen/common/compat/grant_table.c ++++ b/xen/common/compat/grant_table.c +@@ -5,9 +5,17 @@ + + #include + +-#define xen_grant_entry grant_entry +-CHECK_grant_entry; +-#undef xen_grant_entry ++#define xen_grant_entry_v1 grant_entry_v1 ++CHECK_grant_entry_v1; ++#undef xen_grant_entry_v1 ++ ++#define xen_grant_entry_header grant_entry_header ++CHECK_grant_entry_header; ++#undef xen_grant_entry_header ++ ++#define xen_grant_entry_v2 grant_entry_v2 ++CHECK_grant_entry_v2; ++#undef xen_grant_entry_v2 + + #define xen_gnttab_map_grant_ref gnttab_map_grant_ref + CHECK_gnttab_map_grant_ref; +@@ -29,6 +37,16 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_copy_compat_t); + CHECK_gnttab_dump_table; + #undef xen_gnttab_dump_table + ++#define xen_gnttab_set_version gnttab_set_version ++CHECK_gnttab_set_version; ++#undef xen_gnttab_set_version ++ ++DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_compat_t); ++ ++#define xen_gnttab_get_version gnttab_get_version ++CHECK_gnttab_get_version; ++#undef xen_gnttab_get_version ++ + int compat_grant_table_op(unsigned int cmd, + XEN_GUEST_HANDLE(void) cmp_uop, + unsigned int count) +@@ -74,6 +92,10 @@ int compat_grant_table_op(unsigned int cmd, + CASE(dump_table); + #endif + ++#ifndef CHECK_gnttab_get_status_frames ++ CASE(get_status_frames); ++#endif ++ + #undef CASE + default: + return do_grant_table_op(cmd, cmp_uop, count); +@@ -90,11 +112,13 @@ int compat_grant_table_op(unsigned int cmd, + struct gnttab_setup_table *setup; + struct gnttab_transfer *xfer; + struct gnttab_copy *copy; ++ struct gnttab_get_status_frames *get_status; + } nat; + union { + struct compat_gnttab_setup_table setup; + struct compat_gnttab_transfer xfer; + struct compat_gnttab_copy copy; ++ struct compat_gnttab_get_status_frames get_status; + } cmp; + + set_xen_guest_handle(nat.uop, COMPAT_ARG_XLAT_VIRT_BASE); +@@ -216,6 +240,63 @@ int compat_grant_table_op(unsigned int cmd, + } + break; + ++ case GNTTABOP_get_status_frames: { ++ unsigned int max_frame_list_size_in_pages = ++ (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.get_status)) / ++ sizeof(*nat.get_status->frame_list.p); ++ if ( count != 1) ++ { ++ rc = -EINVAL; ++ break; ++ } ++ if ( unlikely(__copy_from_guest(&cmp.get_status, cmp_uop, 1) || ++ !compat_handle_okay(cmp.get_status.frame_list, ++ cmp.get_status.nr_frames)) ) ++ { ++ rc = -EFAULT; ++ break; ++ } ++ if ( max_frame_list_size_in_pages < ++ grant_to_status_frames(max_nr_grant_frames) ) ++ { ++ gdprintk(XENLOG_WARNING, ++ "grant_to_status_frames(max_nr_grant_frames) is too large (%u,%u)\n", ++ grant_to_status_frames(max_nr_grant_frames), ++ max_frame_list_size_in_pages); ++ rc = -EINVAL; ++ break; ++ } ++ ++#define XLAT_gnttab_get_status_frames_HNDL_frame_list(_d_, _s_) \ ++ set_xen_guest_handle((_d_)->frame_list, (uint64_t *)(nat.get_status + 1)) ++ XLAT_gnttab_get_status_frames(nat.get_status, &cmp.get_status); ++#undef XLAT_gnttab_get_status_frames_HNDL_frame_list ++ ++ rc = gnttab_get_status_frames( ++ guest_handle_cast(nat.uop, gnttab_get_status_frames_t), ++ count); ++ if ( rc >= 0 ) ++ { ++#define XLAT_gnttab_get_status_frames_HNDL_frame_list(_d_, _s_) \ ++ do \ ++ { \ ++ if ( (_s_)->status == GNTST_okay ) \ ++ { \ ++ for ( i = 0; i < (_s_)->nr_frames; ++i ) \ ++ { \ ++ uint64_t frame = (_s_)->frame_list.p[i]; \ ++ (void)__copy_to_compat_offset((_d_)->frame_list, i, &frame, 1); \ ++ } \ ++ } \ ++ } while (0) ++ XLAT_gnttab_get_status_frames(&cmp.get_status, nat.get_status); ++#undef XLAT_gnttab_get_status_frames_HNDL_frame_list ++ if ( unlikely(__copy_to_guest(cmp_uop, &cmp.get_status, 1)) ) ++ rc = -EFAULT; ++ } ++ break; ++ } ++ + default: + domain_crash(current->domain); + break; +diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c +index 5306354..40fc183 100644 +--- a/xen/common/grant_table.c ++++ b/xen/common/grant_table.c +@@ -104,9 +104,24 @@ static unsigned inline int max_nr_maptrack_frames(void) + } + + +-#define SHGNT_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_t)) +-#define shared_entry(t, e) \ +- ((t)->shared[(e)/SHGNT_PER_PAGE][(e)%SHGNT_PER_PAGE]) ++#define SHGNT_PER_PAGE_V1 (PAGE_SIZE / sizeof(grant_entry_v1_t)) ++#define shared_entry_v1(t, e) \ ++ ((t)->shared_v1[(e)/SHGNT_PER_PAGE_V1][(e)%SHGNT_PER_PAGE_V1]) ++#define SHGNT_PER_PAGE_V2 (PAGE_SIZE / sizeof(grant_entry_v2_t)) ++#define shared_entry_v2(t, e) \ ++ ((t)->shared_v2[(e)/SHGNT_PER_PAGE_V2][(e)%SHGNT_PER_PAGE_V2]) ++#define STGNT_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t)) ++#define status_entry(t, e) \ ++ ((t)->status[(e)/STGNT_PER_PAGE][(e)%STGNT_PER_PAGE]) ++static grant_entry_header_t * ++shared_entry_header(struct grant_table *t, grant_ref_t ref) ++{ ++ ASSERT(t->gt_version != 0); ++ if (t->gt_version == 1) ++ return (grant_entry_header_t*)&shared_entry_v1(t, ref); ++ else ++ return &shared_entry_v2(t, ref).hdr; ++} + #define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry)) + #define active_entry(t, e) \ + ((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE]) +@@ -182,6 +197,189 @@ get_maptrack_handle( + return handle; + } + ++/* Number of grant table entries. Caller must hold d's grant table lock. */ ++static unsigned int nr_grant_entries(struct grant_table *gt) ++{ ++ ASSERT(gt->gt_version != 0); ++ if (gt->gt_version == 1) ++ return (nr_grant_frames(gt) << PAGE_SHIFT) / sizeof(grant_entry_v1_t); ++ else ++ return (nr_grant_frames(gt) << PAGE_SHIFT) / sizeof(grant_entry_v2_t); ++} ++ ++static int _set_status_v1(domid_t domid, ++ int readonly, ++ int mapflag, ++ grant_entry_header_t *shah, ++ struct active_grant_entry *act) ++{ ++ int rc = GNTST_okay; ++ union grant_combo scombo, prev_scombo, new_scombo; ++ uint16_t mask = GTF_type_mask; ++ ++ /* ++ * We bound the number of times we retry CMPXCHG on memory locations that ++ * we share with a guest OS. The reason is that the guest can modify that ++ * location at a higher rate than we can read-modify-CMPXCHG, so the guest ++ * could cause us to livelock. There are a few cases where it is valid for ++ * the guest to race our updates (e.g., to change the GTF_readonly flag), ++ * so we allow a few retries before failing. ++ */ ++ int retries = 0; ++ ++ /* if this is a grant mapping operation we should ensure GTF_sub_page ++ is not set */ ++ if (mapflag) ++ mask |= GTF_sub_page; ++ ++ scombo.word = *(u32 *)shah; ++ ++ /* ++ * This loop attempts to set the access (reading/writing) flags ++ * in the grant table entry. It tries a cmpxchg on the field ++ * up to five times, and then fails under the assumption that ++ * the guest is misbehaving. ++ */ ++ for ( ; ; ) ++ { ++ /* If not already pinned, check the grant domid and type. */ ++ if ( !act->pin && ++ (((scombo.shorts.flags & mask) != ++ GTF_permit_access) || ++ (scombo.shorts.domid != domid)) ) ++ PIN_FAIL(done, GNTST_general_error, ++ "Bad flags (%x) or dom (%d). (expected dom %d)\n", ++ scombo.shorts.flags, scombo.shorts.domid, ++ domid); ++ ++ new_scombo = scombo; ++ new_scombo.shorts.flags |= GTF_reading; ++ ++ if ( !readonly ) ++ { ++ new_scombo.shorts.flags |= GTF_writing; ++ if ( unlikely(scombo.shorts.flags & GTF_readonly) ) ++ PIN_FAIL(done, GNTST_general_error, ++ "Attempt to write-pin a r/o grant entry.\n"); ++ } ++ ++ prev_scombo.word = cmpxchg((u32 *)shah, ++ scombo.word, new_scombo.word); ++ if ( likely(prev_scombo.word == scombo.word) ) ++ break; ++ ++ if ( retries++ == 4 ) ++ PIN_FAIL(done, GNTST_general_error, ++ "Shared grant entry is unstable.\n"); ++ ++ scombo = prev_scombo; ++ } ++ ++done: ++ return rc; ++} ++ ++static int _set_status_v2(domid_t domid, ++ int readonly, ++ int mapflag, ++ grant_entry_header_t *shah, ++ struct active_grant_entry *act, ++ grant_status_t *status) ++{ ++ int rc = GNTST_okay; ++ union grant_combo scombo; ++ uint16_t flags = shah->flags; ++ domid_t id = shah->domid; ++ uint16_t mask = GTF_type_mask; ++ ++ /* we read flags and domid in a single memory access. ++ this avoids the need for another memory barrier to ++ ensure access to these fields are not reordered */ ++ scombo.word = *(u32 *)shah; ++ barrier(); /* but we still need to stop the compiler from turning ++ it back into two reads */ ++ flags = scombo.shorts.flags; ++ id = scombo.shorts.domid; ++ ++ /* if this is a grant mapping operation we should ensure GTF_sub_page ++ is not set */ ++ if (mapflag) ++ mask |= GTF_sub_page; ++ ++ /* If not already pinned, check the grant domid and type. */ ++ if ( !act->pin && ++ ( (((flags & mask) != GTF_permit_access) && ++ ((flags & mask) != GTF_transitive)) || ++ (id != domid)) ) ++ PIN_FAIL(done, GNTST_general_error, ++ "Bad flags (%x) or dom (%d). (expected dom %d, flags %x)\n", ++ flags, id, domid, mask); ++ ++ if ( readonly ) ++ { ++ *status |= GTF_reading; ++ } ++ else ++ { ++ if ( unlikely(flags & GTF_readonly) ) ++ PIN_FAIL(done, GNTST_general_error, ++ "Attempt to write-pin a r/o grant entry.\n"); ++ *status |= GTF_reading | GTF_writing; ++ } ++ ++ /* Make sure guest sees status update before checking if flags are ++ still valid */ ++ mb(); ++ ++ scombo.word = *(u32 *)shah; ++ barrier(); ++ flags = scombo.shorts.flags; ++ id = scombo.shorts.domid; ++ ++ if ( !act->pin ) ++ { ++ if ( (((flags & mask) != GTF_permit_access) && ++ ((flags & mask) != GTF_transitive)) || ++ (id != domid) || ++ (!readonly && (flags & GTF_readonly)) ) ++ { ++ gnttab_clear_flag(_GTF_reading | _GTF_writing, status); ++ PIN_FAIL(done, GNTST_general_error, ++ "Unstable flags (%x) or dom (%d). (expected dom %d) " ++ "(r/w: %d)\n", ++ flags, id, domid, !readonly); ++ } ++ } ++ else ++ { ++ if ( unlikely(flags & GTF_readonly) ) ++ { ++ gnttab_clear_flag(_GTF_writing, status); ++ PIN_FAIL(done, GNTST_general_error, ++ "Unstable grant readonly flag\n"); ++ } ++ } ++ ++done: ++ return rc; ++} ++ ++ ++static int _set_status(unsigned gt_version, ++ domid_t domid, ++ int readonly, ++ int mapflag, ++ grant_entry_header_t *shah, ++ struct active_grant_entry *act, ++ grant_status_t *status) ++{ ++ ++ if (gt_version == 1) ++ return _set_status_v1(domid, readonly, mapflag, shah, act); ++ else ++ return _set_status_v2(domid, readonly, mapflag, shah, act, status); ++} ++ + /* + * Returns 0 if TLB flush / invalidate required by caller. + * va will indicate the address to be invalidated. +@@ -203,18 +401,10 @@ __gnttab_map_grant_ref( + unsigned int cache_flags; + struct active_grant_entry *act; + struct grant_mapping *mt; +- grant_entry_t *sha; +- union grant_combo scombo, prev_scombo, new_scombo; +- +- /* +- * We bound the number of times we retry CMPXCHG on memory locations that +- * we share with a guest OS. The reason is that the guest can modify that +- * location at a higher rate than we can read-modify-CMPXCHG, so the guest +- * could cause us to livelock. There are a few cases where it is valid for +- * the guest to race our updates (e.g., to change the GTF_readonly flag), +- * so we allow a few retries before failing. +- */ +- int retries = 0; ++ grant_entry_v1_t *sha1; ++ grant_entry_v2_t *sha2; ++ grant_entry_header_t *shah; ++ uint16_t *status; + + led = current; + ld = led->domain; +@@ -251,73 +441,55 @@ __gnttab_map_grant_ref( + + spin_lock(&rd->grant_table->lock); + ++ if ( rd->grant_table->gt_version == 0 ) ++ PIN_FAIL(unlock_out, GNTST_general_error, ++ "remote grant table not yet set up"); ++ + /* Bounds check on the grant ref */ + if ( unlikely(op->ref >= nr_grant_entries(rd->grant_table))) + PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref); + + act = &active_entry(rd->grant_table, op->ref); +- sha = &shared_entry(rd->grant_table, op->ref); ++ shah = shared_entry_header(rd->grant_table, op->ref); ++ if (rd->grant_table->gt_version == 1) { ++ sha1 = &shared_entry_v1(rd->grant_table, op->ref); ++ sha2 = NULL; ++ status = &shah->flags; ++ } else { ++ sha2 = &shared_entry_v2(rd->grant_table, op->ref); ++ sha1 = NULL; ++ status = &status_entry(rd->grant_table, op->ref); ++ } + + /* If already pinned, check the active domid and avoid refcnt overflow. */ + if ( act->pin && + ((act->domid != ld->domain_id) || +- (act->pin & 0x80808080U) != 0) ) ++ (act->pin & 0x80808080U) != 0 || ++ (act->is_sub_page)) ) + PIN_FAIL(unlock_out, GNTST_general_error, +- "Bad domain (%d != %d), or risk of counter overflow %08x\n", +- act->domid, ld->domain_id, act->pin); ++ "Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n", ++ act->domid, ld->domain_id, act->pin, act->is_sub_page); + + if ( !act->pin || + (!(op->flags & GNTMAP_readonly) && + !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask))) ) + { +- scombo.word = *(u32 *)&sha->flags; +- +- /* +- * This loop attempts to set the access (reading/writing) flags +- * in the grant table entry. It tries a cmpxchg on the field +- * up to five times, and then fails under the assumption that +- * the guest is misbehaving. +- */ +- for ( ; ; ) +- { +- /* If not already pinned, check the grant domid and type. */ +- if ( !act->pin && +- (((scombo.shorts.flags & GTF_type_mask) != +- GTF_permit_access) || +- (scombo.shorts.domid != ld->domain_id)) ) +- PIN_FAIL(unlock_out, GNTST_general_error, +- "Bad flags (%x) or dom (%d). (expected dom %d)\n", +- scombo.shorts.flags, scombo.shorts.domid, +- ld->domain_id); +- +- new_scombo = scombo; +- new_scombo.shorts.flags |= GTF_reading; +- +- if ( !(op->flags & GNTMAP_readonly) ) +- { +- new_scombo.shorts.flags |= GTF_writing; +- if ( unlikely(scombo.shorts.flags & GTF_readonly) ) +- PIN_FAIL(unlock_out, GNTST_general_error, +- "Attempt to write-pin a r/o grant entry.\n"); +- } +- +- prev_scombo.word = cmpxchg((u32 *)&sha->flags, +- scombo.word, new_scombo.word); +- if ( likely(prev_scombo.word == scombo.word) ) +- break; +- +- if ( retries++ == 4 ) +- PIN_FAIL(unlock_out, GNTST_general_error, +- "Shared grant entry is unstable.\n"); +- +- scombo = prev_scombo; +- } ++ if ( (rc = _set_status(rd->grant_table->gt_version, ++ ld->domain_id, op->flags & GNTMAP_readonly, ++ 1, shah, act, status) ) != GNTST_okay ) ++ goto unlock_out; + + if ( !act->pin ) + { +- act->domid = scombo.shorts.domid; +- act->gfn = sha->frame; +- act->frame = gmfn_to_mfn(rd, sha->frame); ++ act->domid = ld->domain_id; ++ if ( sha1 ) ++ act->gfn = sha1->frame; ++ else ++ act->gfn = sha2->full_page.frame; ++ act->frame = gmfn_to_mfn(rd, act->gfn); ++ act->start = 0; ++ act->length = PAGE_SIZE; ++ act->is_sub_page = 0; + } + } + +@@ -332,7 +504,7 @@ __gnttab_map_grant_ref( + frame = act->frame; + act_pin = act->pin; + +- cache_flags = (sha->flags & (GTF_PAT | GTF_PWT | GTF_PCD) ); ++ cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) ); + + spin_unlock(&rd->grant_table->lock); + +@@ -433,7 +605,7 @@ __gnttab_map_grant_ref( + spin_lock(&rd->grant_table->lock); + + act = &active_entry(rd->grant_table, op->ref); +- sha = &shared_entry(rd->grant_table, op->ref); ++ shah = shared_entry_header(rd->grant_table, op->ref); + + if ( op->flags & GNTMAP_device_map ) + act->pin -= (op->flags & GNTMAP_readonly) ? +@@ -444,10 +616,10 @@ __gnttab_map_grant_ref( + + if ( !(op->flags & GNTMAP_readonly) && + !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) ) +- gnttab_clear_flag(_GTF_writing, &sha->flags); ++ gnttab_clear_flag(_GTF_writing, status); + + if ( !act->pin ) +- gnttab_clear_flag(_GTF_reading, &sha->flags); ++ gnttab_clear_flag(_GTF_reading, status); + + unlock_out: + spin_unlock(&rd->grant_table->lock); +@@ -482,7 +654,6 @@ __gnttab_unmap_common( + domid_t dom; + struct domain *ld, *rd; + struct active_grant_entry *act; +- grant_entry_t *sha; + s16 rc = 0; + u32 old_pin; + +@@ -530,7 +701,6 @@ __gnttab_unmap_common( + spin_lock(&rd->grant_table->lock); + + act = &active_entry(rd->grant_table, op->map->ref); +- sha = &shared_entry(rd->grant_table, op->map->ref); + old_pin = act->pin; + + if ( op->frame == 0 ) +@@ -595,7 +765,9 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) + { + struct domain *ld, *rd; + struct active_grant_entry *act; +- grant_entry_t *sha; ++ grant_entry_header_t *sha; ++ struct page_info *pg; ++ uint16_t *status; + + rd = op->rd; + +@@ -614,8 +786,16 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) + rcu_lock_domain(rd); + spin_lock(&rd->grant_table->lock); + ++ if ( rd->grant_table->gt_version == 0 ) ++ goto unmap_out; ++ + act = &active_entry(rd->grant_table, op->map->ref); +- sha = &shared_entry(rd->grant_table, op->map->ref); ++ sha = shared_entry_header(rd->grant_table, op->map->ref); ++ ++ if ( rd->grant_table->gt_version == 1 ) ++ status = &sha->flags; ++ else ++ status = &status_entry(rd->grant_table, op->map->ref); + + if ( unlikely(op->frame != act->frame) ) + { +@@ -664,10 +844,10 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op) + + if ( ((act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0) && + !(op->flags & GNTMAP_readonly) ) +- gnttab_clear_flag(_GTF_writing, &sha->flags); ++ gnttab_clear_flag(_GTF_writing, status); + + if ( act->pin == 0 ) +- gnttab_clear_flag(_GTF_reading, &sha->flags); ++ gnttab_clear_flag(_GTF_reading, status); + + unmap_out: + spin_unlock(&rd->grant_table->lock); +@@ -793,6 +973,50 @@ fault: + return -EFAULT; + } + ++static int ++gnttab_populate_status_frames(struct domain *d, struct grant_table *gt) ++{ ++ unsigned i; ++ unsigned req_status_frames; ++ ++ req_status_frames = grant_to_status_frames(gt->nr_grant_frames); ++ for ( i = nr_status_frames(gt); i < req_status_frames; i++ ) ++ { ++ if ( (gt->status[i] = alloc_xenheap_page()) == NULL ) ++ goto status_alloc_failed; ++ clear_page(gt->status[i]); ++ } ++ /* Share the new status frames with the recipient domain */ ++ for ( i = nr_status_frames(gt); i < req_status_frames; i++ ) ++ gnttab_create_status_page(d, gt, i); ++ ++ gt->nr_status_frames = req_status_frames; ++ ++ return 0; ++ ++status_alloc_failed: ++ for ( i = nr_status_frames(gt); i < req_status_frames; i++ ) ++ { ++ free_xenheap_page(gt->status[i]); ++ gt->status[i] = NULL; ++ } ++ return -ENOMEM; ++} ++ ++static void ++gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt) ++{ ++ int i; ++ ++ for ( i = 0; i < nr_status_frames(gt); i++ ) ++ { ++ page_set_owner(virt_to_page(gt->status[i]), dom_xen); ++ free_xenheap_page(gt->status[i]); ++ gt->status[i] = NULL; ++ } ++ gt->nr_status_frames = 0; ++} ++ + int + gnttab_grow_table(struct domain *d, unsigned int req_nr_frames) + { +@@ -819,9 +1043,9 @@ gnttab_grow_table(struct domain *d, unsigned int req_nr_frames) + /* Shared */ + for ( i = nr_grant_frames(gt); i < req_nr_frames; i++ ) + { +- if ( (gt->shared[i] = alloc_xenheap_page()) == NULL ) ++ if ( (gt->shared_raw[i] = alloc_xenheap_page()) == NULL ) + goto shared_alloc_failed; +- clear_page(gt->shared[i]); ++ clear_page(gt->shared_raw[i]); + } + + /* Share the new shared frames with the recipient domain */ +@@ -830,13 +1054,20 @@ gnttab_grow_table(struct domain *d, unsigned int req_nr_frames) + + gt->nr_grant_frames = req_nr_frames; + ++ /* Status pages - version 2 */ ++ if (gt->gt_version > 1) ++ { ++ if ( gnttab_populate_status_frames(d, gt) ) ++ goto shared_alloc_failed; ++ } ++ + return 1; + + shared_alloc_failed: + for ( i = nr_grant_frames(gt); i < req_nr_frames; i++ ) + { +- free_xenheap_page(gt->shared[i]); +- gt->shared[i] = NULL; ++ free_xenheap_page(gt->shared_raw[i]); ++ gt->shared_raw[i] = NULL; + } + active_alloc_failed: + for ( i = nr_active_grant_frames(gt); +@@ -906,7 +1137,13 @@ gnttab_setup_table( + + spin_lock(&d->grant_table->lock); + +- if ( (op.nr_frames > nr_grant_frames(d->grant_table)) && ++ if ( d->grant_table->gt_version == 0 ) ++ d->grant_table->gt_version = 1; ++ ++ if ( (op.nr_frames > nr_grant_frames(d->grant_table) || ++ ( (d->grant_table->gt_version > 1 ) && ++ (grant_to_status_frames(op.nr_frames) > ++ nr_status_frames(d->grant_table)) ) ) && + !gnttab_grow_table(d, op.nr_frames) ) + { + gdprintk(XENLOG_INFO, +@@ -1010,7 +1247,7 @@ gnttab_prepare_for_transfer( + struct domain *rd, struct domain *ld, grant_ref_t ref) + { + struct grant_table *rgt; +- struct grant_entry *sha; ++ grant_entry_header_t *sha; + union grant_combo scombo, prev_scombo, new_scombo; + int retries = 0; + +@@ -1022,6 +1259,14 @@ gnttab_prepare_for_transfer( + + spin_lock(&rgt->lock); + ++ if ( rgt->gt_version == 0 ) ++ { ++ gdprintk(XENLOG_INFO, ++ "Grant table not ready for transfer to domain(%d).\n", ++ rd->domain_id); ++ goto fail; ++ } ++ + if ( unlikely(ref >= nr_grant_entries(rd->grant_table)) ) + { + gdprintk(XENLOG_INFO, +@@ -1030,7 +1275,7 @@ gnttab_prepare_for_transfer( + goto fail; + } + +- sha = &shared_entry(rgt, ref); ++ sha = shared_entry_header(rgt, ref); + + scombo.word = *(u32 *)&sha->flags; + +@@ -1079,7 +1324,6 @@ gnttab_transfer( + struct domain *e; + struct page_info *page; + int i; +- grant_entry_t *sha; + struct gnttab_transfer gop; + unsigned long mfn; + unsigned int max_bitsize; +@@ -1209,11 +1453,21 @@ gnttab_transfer( + /* Tell the guest about its new page frame. */ + spin_lock(&e->grant_table->lock); + +- sha = &shared_entry(e->grant_table, gop.ref); +- guest_physmap_add_page(e, sha->frame, mfn, 0); +- sha->frame = mfn; ++ if ( e->grant_table->gt_version == 1 ) ++ { ++ grant_entry_v1_t *sha = &shared_entry_v1(e->grant_table, gop.ref); ++ guest_physmap_add_page(e, sha->frame, mfn, 0); ++ sha->frame = mfn; ++ } ++ else ++ { ++ grant_entry_v2_t *sha = &shared_entry_v2(e->grant_table, gop.ref); ++ guest_physmap_add_page(e, sha->full_page.frame, mfn, 0); ++ sha->full_page.frame = mfn; ++ } + wmb(); +- sha->flags |= GTF_transfer_completed; ++ shared_entry_header(e->grant_table, gop.ref)->flags |= ++ GTF_transfer_completed; + + spin_unlock(&e->grant_table->lock); + +@@ -1239,16 +1493,40 @@ static void + __release_grant_for_copy( + struct domain *rd, unsigned long gref, int readonly) + { +- grant_entry_t *sha; ++ grant_entry_header_t *sha; + struct active_grant_entry *act; + unsigned long r_frame; ++ uint16_t *status; ++ domid_t trans_domid; ++ grant_ref_t trans_gref; ++ int released_read; ++ int released_write; ++ struct domain *trans_dom; ++ ++ released_read = 0; ++ released_write = 0; + + spin_lock(&rd->grant_table->lock); + + act = &active_entry(rd->grant_table, gref); +- sha = &shared_entry(rd->grant_table, gref); ++ sha = shared_entry_header(rd->grant_table, gref); + r_frame = act->frame; + ++ if (rd->grant_table->gt_version == 1) ++ { ++ status = &sha->flags; ++ trans_domid = rd->domain_id; ++ /* Shut the compiler up. This'll never be used, because ++ trans_domid == rd->domain_id, but gcc doesn't know that. */ ++ trans_gref = 0x1234567; ++ } ++ else ++ { ++ status = &status_entry(rd->grant_table, gref); ++ trans_domid = act->trans_dom; ++ trans_gref = act->trans_gref; ++ } ++ + if ( readonly ) + { + act->pin -= GNTPIN_hstr_inc; +@@ -1259,13 +1537,51 @@ __release_grant_for_copy( + + act->pin -= GNTPIN_hstw_inc; + if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) ) +- gnttab_clear_flag(_GTF_writing, &sha->flags); ++ { ++ released_write = 1; ++ gnttab_clear_flag(_GTF_writing, status); ++ } + } + + if ( !act->pin ) +- gnttab_clear_flag(_GTF_reading, &sha->flags); ++ { ++ gnttab_clear_flag(_GTF_reading, status); ++ released_read = 1; ++ } + + spin_unlock(&rd->grant_table->lock); ++ ++ if ( trans_domid != rd->domain_id ) ++ { ++ if ( released_write || released_read ) ++ { ++ trans_dom = rcu_lock_domain_by_id(trans_domid); ++ if ( trans_dom != NULL ) ++ { ++ /* Recursive calls, but they're tail calls, so it's ++ okay. */ ++ if ( released_write ) ++ __release_grant_for_copy(trans_dom, trans_gref, 0); ++ else if ( released_read ) ++ __release_grant_for_copy(trans_dom, trans_gref, 1); ++ } ++ } ++ } ++} ++ ++/* The status for a grant indicates that we're taking more access than ++ the pin requires. Fix up the status to match the pin. Called ++ under the domain's grant table lock. */ ++/* Only safe on transitive grants. Even then, note that we don't ++ attempt to drop any pin on the referent grant. */ ++static void __fixup_status_for_pin(struct active_grant_entry *act, ++ uint16_t *status) ++{ ++ if ( !(act->pin & GNTPIN_hstw_mask) ) ++ *status &= ~_GTF_writing; ++ ++ if ( !(act->pin & GNTPIN_hstr_mask) ) ++ *status &= ~_GTF_reading; + } + + /* Grab a frame number from a grant entry and update the flags and pin +@@ -1274,82 +1590,182 @@ __release_grant_for_copy( + actually valid. */ + static int + __acquire_grant_for_copy( +- struct domain *rd, unsigned long gref, int readonly, +- unsigned long *frame) ++ struct domain *rd, unsigned long gref, struct domain *ld, int readonly, ++ unsigned long *frame, unsigned *page_off, unsigned *length, ++ unsigned allow_transitive, struct domain **owning_domain) + { +- grant_entry_t *sha; ++ grant_entry_v1_t *sha1; ++ grant_entry_v2_t *sha2; ++ grant_entry_header_t *shah; + struct active_grant_entry *act; ++ grant_status_t *status; ++ uint32_t old_pin; ++ domid_t trans_domid; ++ grant_ref_t trans_gref; ++ struct domain *rrd; ++ unsigned long grant_frame; ++ unsigned trans_page_off; ++ unsigned trans_length; ++ int is_sub_page; ++ struct domain *ignore; + s16 rc = GNTST_okay; +- int retries = 0; +- union grant_combo scombo, prev_scombo, new_scombo; ++ ++ *owning_domain = NULL; + + spin_lock(&rd->grant_table->lock); + ++ if ( rd->grant_table->gt_version == 0 ) ++ PIN_FAIL(unlock_out, GNTST_general_error, ++ "remote grant table not ready\n"); ++ + if ( unlikely(gref >= nr_grant_entries(rd->grant_table)) ) + PIN_FAIL(unlock_out, GNTST_bad_gntref, + "Bad grant reference %ld\n", gref); + + act = &active_entry(rd->grant_table, gref); +- sha = &shared_entry(rd->grant_table, gref); +- ++ shah = shared_entry_header(rd->grant_table, gref); ++ if ( rd->grant_table->gt_version == 1 ) ++ { ++ sha1 = &shared_entry_v1(rd->grant_table, gref); ++ sha2 = NULL; ++ status = &shah->flags; ++ } ++ else ++ { ++ sha1 = NULL; ++ sha2 = &shared_entry_v2(rd->grant_table, gref); ++ status = &status_entry(rd->grant_table, gref); ++ } ++ + /* If already pinned, check the active domid and avoid refcnt overflow. */ + if ( act->pin && +- ((act->domid != current->domain->domain_id) || ++ ((act->domid != ld->domain_id) || + (act->pin & 0x80808080U) != 0) ) + PIN_FAIL(unlock_out, GNTST_general_error, + "Bad domain (%d != %d), or risk of counter overflow %08x\n", +- act->domid, current->domain->domain_id, act->pin); ++ act->domid, ld->domain_id, act->pin); + ++ old_pin = act->pin; + if ( !act->pin || + (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) ) + { +- scombo.word = *(u32 *)&sha->flags; ++ if ( (rc = _set_status(rd->grant_table->gt_version, ++ ld->domain_id, ++ readonly, 0, shah, act, ++ status) ) != GNTST_okay ) ++ goto unlock_out; + +- for ( ; ; ) ++ trans_domid = ld->domain_id; ++ trans_gref = 0; ++ if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive ) + { +- /* If not already pinned, check the grant domid and type. */ +- if ( !act->pin && +- (((scombo.shorts.flags & GTF_type_mask) != +- GTF_permit_access) || +- (scombo.shorts.domid != current->domain->domain_id)) ) +- PIN_FAIL(unlock_out, GNTST_general_error, +- "Bad flags (%x) or dom (%d). (expected dom %d)\n", +- scombo.shorts.flags, scombo.shorts.domid, +- current->domain->domain_id); +- +- new_scombo = scombo; +- new_scombo.shorts.flags |= GTF_reading; +- +- if ( !readonly ) +- { +- new_scombo.shorts.flags |= GTF_writing; +- if ( unlikely(scombo.shorts.flags & GTF_readonly) ) +- PIN_FAIL(unlock_out, GNTST_general_error, +- "Attempt to write-pin a r/o grant entry.\n"); +- } ++ if ( !allow_transitive ) ++ PIN_FAIL(unlock_out, GNTST_general_error, ++ "transitive grant when transitivity not allowed\n"); + +- prev_scombo.word = cmpxchg((u32 *)&sha->flags, +- scombo.word, new_scombo.word); +- if ( likely(prev_scombo.word == scombo.word) ) +- break; ++ trans_domid = sha2->transitive.trans_domid; ++ trans_gref = sha2->transitive.gref; ++ barrier(); /* Stop the compiler from re-loading ++ trans_domid from shared memory */ ++ if ( trans_domid == rd->domain_id ) ++ PIN_FAIL(unlock_out, GNTST_general_error, ++ "transitive grants cannot be self-referential\n"); ++ ++ /* We allow the trans_domid == ld->domain_id case, which ++ corresponds to a grant being issued by one domain, sent ++ to another one, and then transitively granted back to ++ the original domain. Allowing it is easy, and means ++ that you don't need to go out of your way to avoid it ++ in the guest. */ + +- if ( retries++ == 4 ) ++ rrd = rcu_lock_domain_by_id(trans_domid); ++ if ( rrd == NULL ) + PIN_FAIL(unlock_out, GNTST_general_error, +- "Shared grant entry is unstable.\n"); ++ "transitive grant referenced bad domain %d\n", ++ trans_domid); ++ spin_unlock(&rd->grant_table->lock); ++ ++ rc = __acquire_grant_for_copy(rrd, trans_gref, rd, ++ readonly, &grant_frame, ++ &trans_page_off, &trans_length, ++ 0, &ignore); ++ ++ spin_lock(&rd->grant_table->lock); ++ if ( rc != GNTST_okay ) { ++ __fixup_status_for_pin(act, status); ++ spin_unlock(&rd->grant_table->lock); ++ return rc; ++ } ++ ++ /* We dropped the lock, so we have to check that nobody ++ else tried to pin (or, for that matter, unpin) the ++ reference in *this* domain. If they did, just give up ++ and try again. */ ++ if ( act->pin != old_pin ) ++ { ++ __fixup_status_for_pin(act, status); ++ spin_unlock(&rd->grant_table->lock); ++ return __acquire_grant_for_copy(rd, gref, ld, readonly, ++ frame, page_off, length, ++ allow_transitive, ++ owning_domain); ++ } + +- scombo = prev_scombo; ++ /* The actual remote remote grant may or may not be a ++ sub-page, but we always treat it as one because that ++ blocks mappings of transitive grants. */ ++ is_sub_page = 1; ++ *owning_domain = rrd; ++ act->gfn = INVALID_GFN; ++ } ++ else if ( sha1 ) ++ { ++ act->gfn = sha1->frame; ++ grant_frame = gmfn_to_mfn(rd, act->gfn); ++ is_sub_page = 0; ++ trans_page_off = 0; ++ trans_length = PAGE_SIZE; ++ *owning_domain = rd; ++ } ++ else if ( !(sha2->hdr.flags & GTF_sub_page) ) ++ { ++ act->gfn = sha2->full_page.frame; ++ grant_frame = gmfn_to_mfn(rd, act->gfn); ++ is_sub_page = 0; ++ trans_page_off = 0; ++ trans_length = PAGE_SIZE; ++ *owning_domain = rd; ++ } ++ else ++ { ++ act->gfn = sha2->sub_page.frame; ++ grant_frame = gmfn_to_mfn(rd, act->gfn); ++ is_sub_page = 1; ++ trans_page_off = sha2->sub_page.page_off; ++ trans_length = sha2->sub_page.length; ++ *owning_domain = rd; + } + + if ( !act->pin ) + { +- act->domid = scombo.shorts.domid; +- act->gfn = sha->frame; +- act->frame = gmfn_to_mfn(rd, sha->frame); ++ act->domid = ld->domain_id; ++ act->is_sub_page = is_sub_page; ++ act->start = trans_page_off; ++ act->length = trans_length; ++ act->trans_dom = trans_domid; ++ act->trans_gref = trans_gref; ++ act->frame = grant_frame; + } + } ++ else ++ { ++ *owning_domain = rd; ++ } + + act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc; + ++ *page_off = act->start; ++ *length = act->length; + *frame = act->frame; + + unlock_out: +@@ -1362,6 +1778,7 @@ __gnttab_copy( + struct gnttab_copy *op) + { + struct domain *sd = NULL, *dd = NULL; ++ struct domain *source_domain = NULL, *dest_domain = NULL; + unsigned long s_frame, d_frame; + char *sp, *dp; + s16 rc = GNTST_okay; +@@ -1401,19 +1818,29 @@ __gnttab_copy( + + if ( src_is_gref ) + { +- rc = __acquire_grant_for_copy(sd, op->source.u.ref, 1, &s_frame); ++ unsigned source_off, source_len; ++ rc = __acquire_grant_for_copy(sd, op->source.u.ref, current->domain, 1, ++ &s_frame, &source_off, &source_len, 1, ++ &source_domain); + if ( rc != GNTST_okay ) + goto error_out; + have_s_grant = 1; ++ if ( op->source.offset < source_off || ++ op->len > source_len ) ++ PIN_FAIL(error_out, GNTST_general_error, ++ "copy source out of bounds: %d < %d || %d > %d\n", ++ op->source.offset, source_off, ++ op->len, source_len); + } + else + { + s_frame = gmfn_to_mfn(sd, op->source.u.gmfn); ++ source_domain = sd; + } + if ( unlikely(!mfn_valid(s_frame)) ) + PIN_FAIL(error_out, GNTST_general_error, + "source frame %lx invalid.\n", s_frame); +- if ( !get_page(mfn_to_page(s_frame), sd) ) ++ if ( !get_page(mfn_to_page(s_frame), source_domain) ) + { + if ( !sd->is_dying ) + gdprintk(XENLOG_WARNING, "Could not get src frame %lx\n", s_frame); +@@ -1424,19 +1851,30 @@ __gnttab_copy( + + if ( dest_is_gref ) + { +- rc = __acquire_grant_for_copy(dd, op->dest.u.ref, 0, &d_frame); ++ unsigned dest_off, dest_len; ++ rc = __acquire_grant_for_copy(dd, op->dest.u.ref, current->domain, 0, ++ &d_frame, &dest_off, &dest_len, 1, ++ &dest_domain); + if ( rc != GNTST_okay ) + goto error_out; + have_d_grant = 1; ++ if ( op->dest.offset < dest_off || ++ op->len > dest_len ) ++ PIN_FAIL(error_out, GNTST_general_error, ++ "copy dest out of bounds: %d < %d || %d > %d\n", ++ op->dest.offset, dest_off, ++ op->len, dest_len); + } + else + { + d_frame = gmfn_to_mfn(dd, op->dest.u.gmfn); ++ dest_domain = dd; + } + if ( unlikely(!mfn_valid(d_frame)) ) + PIN_FAIL(error_out, GNTST_general_error, + "destination frame %lx invalid.\n", d_frame); +- if ( !get_page_and_type(mfn_to_page(d_frame), dd, PGT_writable_page) ) ++ if ( !get_page_and_type(mfn_to_page(d_frame), dest_domain, ++ PGT_writable_page) ) + { + if ( !dd->is_dying ) + gdprintk(XENLOG_WARNING, "Could not get dst frame %lx\n", d_frame); +@@ -1487,6 +1925,165 @@ gnttab_copy( + return 0; + } + ++static long ++gnttab_set_version(XEN_GUEST_HANDLE(gnttab_set_version_t uop)) ++{ ++ gnttab_set_version_t op; ++ struct domain *d = current->domain; ++ struct grant_table *gt = d->grant_table; ++ struct active_grant_entry *act; ++ long res = 0; ++ int i; ++ ++ if (copy_from_guest(&op, uop, 1)) ++ return -EFAULT; ++ ++ if (op.version != 1 && op.version != 2) ++ return -EINVAL; ++ ++ spin_lock(>->lock); ++ /* Make sure that the grant table isn't currently in use when we ++ change the version number. */ ++ /* (You need to change the version number for e.g. kexec.) */ ++ if ( gt->gt_version != 0 ) ++ { ++ for ( i = 0; i < nr_grant_entries(gt); i++ ) ++ { ++ act = &active_entry(gt, i); ++ if ( act->pin != 0 ) ++ { ++ gdprintk(XENLOG_WARNING, ++ "tried to change grant table version from %d to %d, but some grant entries still in use\n", ++ gt->gt_version, ++ op.version); ++ res = -EBUSY; ++ goto out; ++ } ++ } ++ } ++ ++ /* XXX: If we're going to version 2, we could maybe shrink the ++ active grant table here. */ ++ ++ if ( op.version == 2 && gt->gt_version < 2 ) ++ { ++ res = gnttab_populate_status_frames(d, gt); ++ if ( res < 0) ++ goto out; ++ } ++ ++ if ( op.version < 2 && gt->gt_version == 2 ) ++ gnttab_unpopulate_status_frames(d, gt); ++ ++ if ( op.version != gt->gt_version ) ++ { ++ /* Make sure there's no crud left over in the table from the ++ old version. */ ++ for ( i = 0; i < nr_grant_frames(gt); i++ ) ++ memset(gt->shared_raw[i], 0, PAGE_SIZE); ++ } ++ ++ gt->gt_version = op.version; ++ ++out: ++ spin_unlock(>->lock); ++ ++ return res; ++} ++ ++static long ++gnttab_get_status_frames(XEN_GUEST_HANDLE(gnttab_get_status_frames_t) uop, ++ int count) ++{ ++ gnttab_get_status_frames_t op; ++ struct domain *d; ++ struct grant_table *gt; ++ uint64_t gmfn; ++ int i; ++ int rc; ++ ++ if ( count != 1 ) ++ return -EINVAL; ++ ++ if ( unlikely(copy_from_guest(&op, uop, 1) != 0) ) ++ { ++ gdprintk(XENLOG_INFO, ++ "Fault while reading gnttab_get_status_frames_t.\n"); ++ return -EFAULT; ++ } ++ ++ rc = rcu_lock_target_domain_by_id(op.dom, &d); ++ if ( rc < 0 ) ++ { ++ if ( rc == -ESRCH ) ++ op.status = GNTST_bad_domain; ++ else if ( rc == -EPERM ) ++ op.status = GNTST_permission_denied; ++ else ++ op.status = GNTST_general_error; ++ goto out1; ++ } ++ ++ gt = d->grant_table; ++ ++ if ( unlikely(op.nr_frames > nr_status_frames(gt)) ) { ++ gdprintk(XENLOG_INFO, "Guest requested addresses for %d grant status " ++ "frames, but only %d are available.\n", ++ op.nr_frames, nr_status_frames(gt)); ++ op.status = GNTST_general_error; ++ goto out2; ++ } ++ ++ op.status = GNTST_okay; ++ ++ spin_lock(>->lock); ++ ++ for ( i = 0; i < op.nr_frames; i++ ) ++ { ++ gmfn = gnttab_status_gmfn(d, d->grant_table, i); ++ if (copy_to_guest_offset(op.frame_list, ++ i, ++ &gmfn, ++ 1)) ++ op.status = GNTST_bad_virt_addr; ++ } ++ ++ spin_unlock(>->lock); ++out2: ++ rcu_unlock_domain(d); ++out1: ++ if ( unlikely(copy_to_guest(uop, &op, 1)) ) ++ return -EFAULT; ++ ++ return 0; ++} ++ ++static long ++gnttab_get_version(XEN_GUEST_HANDLE(gnttab_get_version_t uop)) ++{ ++ gnttab_get_version_t op; ++ struct domain *d; ++ ++ if ( copy_from_guest(&op, uop, 1) ) ++ return -EFAULT; ++ d = rcu_lock_domain_by_id(op.dom); ++ if ( d == NULL ) ++ return -ESRCH; ++ if ( !IS_PRIV_FOR(current->domain, d) ) ++ { ++ rcu_unlock_domain(d); ++ return -EPERM; ++ } ++ spin_lock(&d->grant_table->lock); ++ op.version = d->grant_table->gt_version; ++ spin_unlock(&d->grant_table->lock); ++ ++ if ( copy_to_guest(uop, &op, 1) ) ++ return -EFAULT; ++ else ++ return 0; ++} ++ + long + do_grant_table_op( + unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count) +@@ -1562,6 +2159,22 @@ do_grant_table_op( + guest_handle_cast(uop, gnttab_query_size_t), count); + break; + } ++ case GNTTABOP_set_version: ++ { ++ rc = gnttab_set_version(guest_handle_cast(uop, gnttab_set_version_t)); ++ break; ++ } ++ case GNTTABOP_get_status_frames: ++ { ++ rc = gnttab_get_status_frames( ++ guest_handle_cast(uop, gnttab_get_status_frames_t), count); ++ break; ++ } ++ case GNTTABOP_get_version: ++ { ++ rc = gnttab_get_version(guest_handle_cast(uop, gnttab_get_version_t)); ++ break; ++ } + default: + rc = -ENOSYS; + break; +@@ -1579,7 +2192,7 @@ do_grant_table_op( + + static unsigned int max_nr_active_grant_frames(void) + { +- return (((max_nr_grant_frames * (PAGE_SIZE / sizeof(grant_entry_t))) + ++ return (((max_nr_grant_frames * (PAGE_SIZE / sizeof(grant_entry_v1_t))) + + ((PAGE_SIZE / sizeof(struct active_grant_entry))-1)) + / (PAGE_SIZE / sizeof(struct active_grant_entry))); + } +@@ -1591,9 +2204,6 @@ grant_table_create( + struct grant_table *t; + int i; + +- /* If this sizeof assertion fails, fix the function: shared_index */ +- ASSERT(sizeof(grant_entry_t) == 8); +- + if ( (t = xmalloc(struct grant_table)) == NULL ) + goto no_mem_0; + +@@ -1628,28 +2238,36 @@ grant_table_create( + t->maptrack[0][i].ref = i+1; + + /* Shared grant table. */ +- if ( (t->shared = xmalloc_array(struct grant_entry *, +- max_nr_grant_frames)) == NULL ) ++ if ( (t->shared_raw = xmalloc_array(void *, max_nr_grant_frames)) == NULL ) + goto no_mem_3; +- memset(t->shared, 0, max_nr_grant_frames * sizeof(t->shared[0])); ++ memset(t->shared_raw, 0, max_nr_grant_frames * sizeof(t->shared_raw[0])); + for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ ) + { +- if ( (t->shared[i] = alloc_xenheap_page()) == NULL ) ++ if ( (t->shared_raw[i] = alloc_xenheap_page()) == NULL ) + goto no_mem_4; +- clear_page(t->shared[i]); ++ clear_page(t->shared_raw[i]); + } + + for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ ) + gnttab_create_shared_page(d, t, i); + ++ /* Status pages for grant table - for version 2 */ ++ t->status = xmalloc_array(grant_status_t *, ++ grant_to_status_frames(max_nr_grant_frames)); ++ if ( t->status == NULL ) ++ goto no_mem_4; ++ memset(t->status, 0, ++ grant_to_status_frames(max_nr_grant_frames) * sizeof(t->status[0])); ++ t->nr_status_frames = 0; ++ + /* Okay, install the structure. */ + d->grant_table = t; + return 0; + + no_mem_4: + for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ ) +- free_xenheap_page(t->shared[i]); +- xfree(t->shared); ++ free_xenheap_page(t->shared_raw[i]); ++ xfree(t->shared_raw); + no_mem_3: + free_xenheap_page(t->maptrack[0]); + xfree(t->maptrack); +@@ -1674,7 +2292,9 @@ gnttab_release_mappings( + grant_handle_t handle; + struct domain *rd; + struct active_grant_entry *act; +- struct grant_entry *sha; ++ grant_entry_header_t *sha; ++ uint16_t *status; ++ struct page_info *pg; + + BUG_ON(!d->is_dying); + +@@ -1701,7 +2321,13 @@ gnttab_release_mappings( + spin_lock(&rd->grant_table->lock); + + act = &active_entry(rd->grant_table, ref); +- sha = &shared_entry(rd->grant_table, ref); ++ sha = shared_entry_header(rd->grant_table, ref); ++ if (rd->grant_table->gt_version == 1) ++ status = &sha->flags; ++ else ++ status = &status_entry(rd->grant_table, ref); ++ ++ pg = mfn_to_page(act->frame); + + if ( map->flags & GNTMAP_readonly ) + { +@@ -1746,11 +2372,11 @@ gnttab_release_mappings( + } + + if ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0 ) +- gnttab_clear_flag(_GTF_writing, &sha->flags); ++ gnttab_clear_flag(_GTF_writing, status); + } + + if ( act->pin == 0 ) +- gnttab_clear_flag(_GTF_reading, &sha->flags); ++ gnttab_clear_flag(_GTF_reading, status); + + spin_unlock(&rd->grant_table->lock); + +@@ -1772,8 +2398,8 @@ grant_table_destroy( + return; + + for ( i = 0; i < nr_grant_frames(t); i++ ) +- free_xenheap_page(t->shared[i]); +- xfree(t->shared); ++ free_xenheap_page(t->shared_raw[i]); ++ xfree(t->shared_raw); + + for ( i = 0; i < nr_maptrack_frames(t); i++ ) + free_xenheap_page(t->maptrack[i]); +@@ -1783,6 +2409,10 @@ grant_table_destroy( + free_xenheap_page(t->active[i]); + xfree(t->active); + ++ for ( i = 0; i < nr_status_frames(t); i++ ) ++ free_xenheap_page(t->status[i]); ++ xfree(t->status); ++ + xfree(t); + d->grant_table = NULL; + } +diff --git a/xen/include/Makefile b/xen/include/Makefile +index 8427371..4c95ed7 100644 +--- a/xen/include/Makefile ++++ b/xen/include/Makefile +@@ -37,7 +37,7 @@ endif + .PHONY: all + all: $(headers-y) + +-compat/%.h: compat/%.i Makefile ++compat/%.h: compat/%.i Makefile $(BASEDIR)/tools/compat-build-header.py + set -e; id=_$$(echo $@ | tr '[:lower:]-/.' '[:upper:]___'); \ + echo "#ifndef $$id" >$@.new; \ + echo "#define $$id" >>$@.new; \ +@@ -51,9 +51,9 @@ compat/%.h: compat/%.i Makefile + mv -f $@.new $@ + + compat/%.i: compat/%.c Makefile +- $(CPP) $(filter-out -M% .%.d,$(CFLAGS)) $(cppflags-y) -o $@ $< ++ $(CPP) -include public/xen-compat.h $(filter-out -M% .%.d,$(CFLAGS)) $(cppflags-y) -o $@ $< + +-compat/%.c: public/%.h xlat.lst Makefile ++compat/%.c: public/%.h xlat.lst Makefile $(BASEDIR)/tools/compat-build-source.py + mkdir -p $(@D) + grep -v 'DEFINE_XEN_GUEST_HANDLE(long)' $< | \ + $(BASEDIR)/tools/compat-build-source.py >$@.new +diff --git a/xen/include/asm-x86/grant_table.h b/xen/include/asm-x86/grant_table.h +index 3a7fb2a..4e97d9d 100644 +--- a/xen/include/asm-x86/grant_table.h ++++ b/xen/include/asm-x86/grant_table.h +@@ -21,16 +21,31 @@ int replace_grant_host_mapping( + #define gnttab_create_shared_page(d, t, i) \ + do { \ + share_xen_page_with_guest( \ +- virt_to_page((char *)(t)->shared[i]), \ ++ virt_to_page((char *)(t)->shared_raw[i]), \ + (d), XENSHARE_writable); \ + } while ( 0 ) + ++#define gnttab_create_status_page(d, t, i) \ ++ do { \ ++ share_xen_page_with_guest( \ ++ virt_to_page((char *)(t)->status[i]), \ ++ (d), XENSHARE_writable); \ ++ } while ( 0 ) ++ ++ + #define gnttab_shared_mfn(d, t, i) \ +- ((virt_to_maddr((t)->shared[i]) >> PAGE_SHIFT)) ++ ((virt_to_maddr((t)->shared_raw[i]) >> PAGE_SHIFT)) + + #define gnttab_shared_gmfn(d, t, i) \ + (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i))) + ++ ++#define gnttab_status_mfn(d, t, i) \ ++ ((virt_to_maddr((t)->status[i]) >> PAGE_SHIFT)) ++ ++#define gnttab_status_gmfn(d, t, i) \ ++ (mfn_to_gmfn(d, gnttab_status_mfn(d, t, i))) ++ + #define gnttab_mark_dirty(d, f) paging_mark_dirty((d), (f)) + + static inline void gnttab_clear_flag(unsigned long nr, uint16_t *addr) +diff --git a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h +index ad116e7..3edb7d1 100644 +--- a/xen/include/public/grant_table.h ++++ b/xen/include/public/grant_table.h +@@ -84,12 +84,26 @@ + */ + + /* ++ * Reference to a grant entry in a specified domain's grant table. ++ */ ++typedef uint32_t grant_ref_t; ++ ++/* + * A grant table comprises a packed array of grant entries in one or more + * page frames shared between Xen and a guest. + * [XEN]: This field is written by Xen and read by the sharing guest. + * [GST]: This field is written by the guest and read by Xen. + */ +-struct grant_entry { ++ ++/* ++ * Version 1 of the grant table entry structure is maintained purely ++ * for backwards compatibility. New guests should use version 2. ++ */ ++#if __XEN_INTERFACE_VERSION__ < 0x0003020a ++#define grant_entry_v1 grant_entry ++#define grant_entry_v1_t grant_entry_t ++#endif ++struct grant_entry_v1 { + /* GTF_xxx: various type and flag information. [XEN,GST] */ + uint16_t flags; + /* The domain being granted foreign privileges. [GST] */ +@@ -100,7 +114,7 @@ struct grant_entry { + */ + uint32_t frame; + }; +-typedef struct grant_entry grant_entry_t; ++typedef struct grant_entry_v1 grant_entry_v1_t; + + /* + * Type of grant entry. +@@ -108,10 +122,13 @@ typedef struct grant_entry grant_entry_t; + * GTF_permit_access: Allow @domid to map/access @frame. + * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame + * to this guest. Xen writes the page number to @frame. ++ * GTF_transitive: Allow @domid to transitively access a subrange of ++ * @trans_grant in @trans_domid. No mappings are allowed. + */ + #define GTF_invalid (0U<<0) + #define GTF_permit_access (1U<<0) + #define GTF_accept_transfer (2U<<0) ++#define GTF_transitive (3U<<0) + #define GTF_type_mask (3U<<0) + + /* +@@ -120,6 +137,9 @@ typedef struct grant_entry grant_entry_t; + * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN] + * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN] + * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST] ++ * GTF_sub_page: Grant access to only a subrange of the page. @domid ++ * will only be allowed to copy from the grant, and not ++ * map it. [GST] + */ + #define _GTF_readonly (2) + #define GTF_readonly (1U<<_GTF_readonly) +@@ -133,6 +153,8 @@ typedef struct grant_entry grant_entry_t; + #define GTF_PCD (1U<<_GTF_PCD) + #define _GTF_PAT (7) + #define GTF_PAT (1U<<_GTF_PAT) ++#define _GTF_sub_page (8) ++#define GTF_sub_page (1U<<_GTF_sub_page) + + /* + * Subflags for GTF_accept_transfer: +@@ -149,15 +171,87 @@ typedef struct grant_entry grant_entry_t; + #define _GTF_transfer_completed (3) + #define GTF_transfer_completed (1U<<_GTF_transfer_completed) + +- +-/*********************************** +- * GRANT TABLE QUERIES AND USES ++/* ++ * Version 2 grant table entries. These fulfil the same role as ++ * version 1 entries, but can represent more complicated operations. ++ * Any given domain will have either a version 1 or a version 2 table, ++ * and every entry in the table will be the same version. ++ * ++ * The interface by which domains use grant references does not depend ++ * on the grant table version in use by the other domain. ++ */ ++#if __XEN_INTERFACE_VERSION__ >= 0x0003020a ++/* ++ * Version 1 and version 2 grant entries share a common prefix. The ++ * fields of the prefix are documented as part of struct ++ * grant_entry_v1. + */ ++struct grant_entry_header { ++ uint16_t flags; ++ domid_t domid; ++}; ++typedef struct grant_entry_header grant_entry_header_t; + + /* +- * Reference to a grant entry in a specified domain's grant table. ++ * Version 2 of the grant entry structure. ++ */ ++union grant_entry_v2 { ++ grant_entry_header_t hdr; ++ ++ /* ++ * This member is used for V1-style full page grants, where either: ++ * ++ * -- hdr.type is GTF_accept_transfer, or ++ * -- hdr.type is GTF_permit_access and GTF_sub_page is not set. ++ * ++ * In that case, the frame field has the same semantics as the ++ * field of the same name in the V1 entry structure. ++ */ ++ struct { ++ grant_entry_header_t hdr; ++ uint32_t pad0; ++ uint64_t frame; ++ } full_page; ++ ++ /* ++ * If the grant type is GTF_grant_access and GTF_sub_page is set, ++ * @domid is allowed to access bytes [@page_off,@page_off+@length) ++ * in frame @frame. ++ */ ++ struct { ++ grant_entry_header_t hdr; ++ uint16_t page_off; ++ uint16_t length; ++ uint64_t frame; ++ } sub_page; ++ ++ /* ++ * If the grant is GTF_transitive, @domid is allowed to use the ++ * grant @gref in domain @trans_domid, as if it was the local ++ * domain. Obviously, the transitive access must be compatible ++ * with the original grant. ++ * ++ * The current version of Xen does not allow transitive grants ++ * to be mapped. ++ */ ++ struct { ++ grant_entry_header_t hdr; ++ domid_t trans_domid; ++ uint16_t pad0; ++ grant_ref_t gref; ++ } transitive; ++ ++ uint32_t __spacer[4]; /* Pad to a power of two */ ++}; ++typedef union grant_entry_v2 grant_entry_v2_t; ++ ++typedef uint16_t grant_status_t; ++ ++#endif /* __XEN_INTERFACE_VERSION__ */ ++ ++/*********************************** ++ * GRANT TABLE QUERIES AND USES + */ +-typedef uint32_t grant_ref_t; + + /* + * Handle to track a mapping created via a grant reference. +@@ -358,6 +452,63 @@ struct gnttab_unmap_and_replace { + typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t; + DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t); + ++#if __XEN_INTERFACE_VERSION__ >= 0x0003020a ++/* ++ * GNTTABOP_set_version: Request a particular version of the grant ++ * table shared table structure. This operation can only be performed ++ * once in any given domain. It must be performed before any grants ++ * are activated; otherwise, the domain will be stuck with version 1. ++ * The only defined versions are 1 and 2. ++ */ ++#define GNTTABOP_set_version 8 ++struct gnttab_set_version { ++ /* IN parameters */ ++ uint32_t version; ++}; ++typedef struct gnttab_set_version gnttab_set_version_t; ++DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t); ++ ++ ++/* ++ * GNTTABOP_get_status_frames: Get the list of frames used to store grant ++ * status for . In grant format version 2, the status is separated ++ * from the other shared grant fields to allow more efficient synchronization ++ * using barriers instead of atomic cmpexch operations. ++ * specify the size of vector . ++ * The frame addresses are returned in the . ++ * Only addresses are returned, even if the table is larger. ++ * NOTES: ++ * 1. may be specified as DOMID_SELF. ++ * 2. Only a sufficiently-privileged domain may specify != DOMID_SELF. ++ */ ++#define GNTTABOP_get_status_frames 9 ++struct gnttab_get_status_frames { ++ /* IN parameters. */ ++ uint32_t nr_frames; ++ domid_t dom; ++ /* OUT parameters. */ ++ int16_t status; /* GNTST_* */ ++ XEN_GUEST_HANDLE(uint64_t) frame_list; ++}; ++typedef struct gnttab_get_status_frames gnttab_get_status_frames_t; ++DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t); ++ ++/* ++ * GNTTABOP_get_version: Get the grant table version which is in ++ * effect for domain . ++ */ ++#define GNTTABOP_get_version 10 ++struct gnttab_get_version { ++ /* IN parameters */ ++ domid_t dom; ++ uint16_t pad; ++ /* OUT parameters */ ++ uint32_t version; ++}; ++typedef struct gnttab_get_version gnttab_get_version_t; ++DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t); ++ ++#endif /* __XEN_INTERFACE_VERSION__ */ + + /* + * Bitfield values for gnttab_map_grant_ref.flags. +diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h +index ba4051e..35d1dd4 100644 +--- a/xen/include/public/memory.h ++++ b/xen/include/public/memory.h +@@ -209,6 +209,8 @@ struct xen_add_to_physmap { + #define XENMAPSPACE_gmfn 2 /* GMFN */ + unsigned int space; + ++#define XENMAPIDX_grant_table_status 0x80000000 ++ + /* Index into source mapping space. */ + xen_ulong_t idx; + +diff --git a/xen/include/public/xen-compat.h b/xen/include/public/xen-compat.h +index 329be07..2e38003 100644 +--- a/xen/include/public/xen-compat.h ++++ b/xen/include/public/xen-compat.h +@@ -27,7 +27,7 @@ + #ifndef __XEN_PUBLIC_XEN_COMPAT_H__ + #define __XEN_PUBLIC_XEN_COMPAT_H__ + +-#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030209 ++#define __XEN_LATEST_INTERFACE_VERSION__ 0x0003020a + + #if defined(__XEN__) || defined(__XEN_TOOLS__) + /* Xen is built with matching headers and implements the latest interface. */ +diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h +index 524118b..2fe92fe 100644 +--- a/xen/include/public/xen.h ++++ b/xen/include/public/xen.h +@@ -47,6 +47,7 @@ DEFINE_XEN_GUEST_HANDLE(long); + __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long); + DEFINE_XEN_GUEST_HANDLE(void); + ++DEFINE_XEN_GUEST_HANDLE(uint64_t); + DEFINE_XEN_GUEST_HANDLE(xen_pfn_t); + #endif + +diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h +index 096af9b..20c8354 100644 +--- a/xen/include/xen/grant_table.h ++++ b/xen/include/xen/grant_table.h +@@ -30,10 +30,17 @@ + + /* Active grant entry - used for shadowing GTF_permit_access grants. */ + struct active_grant_entry { +- u32 pin; /* Reference count information. */ +- domid_t domid; /* Domain being granted access. */ ++ u32 pin; /* Reference count information. */ ++ domid_t domid; /* Domain being granted access. */ ++ domid_t trans_dom; ++ uint32_t trans_gref; ++ unsigned long frame; /* Frame being granted. */ + unsigned long gfn; /* Guest's idea of the frame being granted. */ +- unsigned long frame; /* Frame being granted. */ ++ unsigned is_sub_page:1; /* True if this is a sub-page grant. */ ++ unsigned start:15; /* For sub-page grants, the start offset ++ in the page. */ ++ unsigned length:16; /* For sub-page grants, the length of the ++ grant. */ + }; + + /* Count of writable host-CPU mappings. */ +@@ -53,10 +60,6 @@ struct active_grant_entry { + #define GNTPIN_devr_inc (1 << GNTPIN_devr_shift) + #define GNTPIN_devr_mask (0xFFU << GNTPIN_devr_shift) + +-/* Initial size of a grant table. */ +-#define INITIAL_NR_GRANT_ENTRIES ((INITIAL_NR_GRANT_FRAMES << PAGE_SHIFT) / \ +- sizeof(grant_entry_t)) +- + #ifndef DEFAULT_MAX_NR_GRANT_FRAMES /* to allow arch to override */ + /* Default maximum size of a grant table. [POLICY] */ + #define DEFAULT_MAX_NR_GRANT_FRAMES 32 +@@ -84,7 +87,15 @@ struct grant_table { + /* Table size. Number of frames shared with guest */ + unsigned int nr_grant_frames; + /* Shared grant table (see include/public/grant_table.h). */ +- struct grant_entry **shared; ++ union { ++ void **shared_raw; ++ struct grant_entry_v1 **shared_v1; ++ union grant_entry_v2 **shared_v2; ++ }; ++ /* Number of grant status frames shared with guest (for version 2) */ ++ unsigned int nr_status_frames; ++ /* State grant table (see include/public/grant_table.h). */ ++ grant_status_t **status; + /* Active grant table. */ + struct active_grant_entry **active; + /* Mapping tracking table. */ +@@ -93,6 +104,9 @@ struct grant_table { + unsigned int maptrack_limit; + /* Lock protecting updates to active and shared grant tables. */ + spinlock_t lock; ++ /* The defined versions are 1 and 2. Set to 0 if we don't know ++ what version to use yet. */ ++ unsigned gt_version; + }; + + /* Create/destroy per-domain grant table context. */ +@@ -118,26 +132,32 @@ static inline unsigned int nr_grant_frames(struct grant_table *gt) + return gt->nr_grant_frames; + } + +-/* Number of grant table entries. Caller must hold d's grant table lock. */ +-static inline unsigned int nr_grant_entries(struct grant_table *gt) ++/* Number of status grant table frames. Caller must hold d's gr. table lock.*/ ++static inline unsigned int nr_status_frames(struct grant_table *gt) ++{ ++ return gt->nr_status_frames; ++} ++ ++#define GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t)) ++#define GRANT_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_v2_t)) ++/* Number of grant table status entries. Caller must hold d's gr. table lock.*/ ++static inline unsigned int grant_to_status_frames(int grant_frames) + { +- return (nr_grant_frames(gt) << PAGE_SHIFT) / sizeof(grant_entry_t); ++ return (grant_frames * GRANT_PER_PAGE + GRANT_STATUS_PER_PAGE - 1) / ++ GRANT_STATUS_PER_PAGE; + } + + static inline unsigned int + num_act_frames_from_sha_frames(const unsigned int num) + { + /* How many frames are needed for the active grant table, +- * given the size of the shared grant table? +- * +- * act_per_page = PAGE_SIZE / sizeof(active_grant_entry_t); +- * sha_per_page = PAGE_SIZE / sizeof(grant_entry_t); +- * num_sha_entries = num * sha_per_page; +- * num_act_frames = (num_sha_entries + (act_per_page-1)) / act_per_page; +- */ +- return ((num * (PAGE_SIZE / sizeof(grant_entry_t))) + +- ((PAGE_SIZE / sizeof(struct active_grant_entry))-1)) +- / (PAGE_SIZE / sizeof(struct active_grant_entry)); ++ * given the size of the shared grant table? */ ++ unsigned act_per_page = PAGE_SIZE / sizeof(struct active_grant_entry); ++ unsigned sha_per_page = PAGE_SIZE / sizeof(grant_entry_v1_t); ++ unsigned num_sha_entries = num * sha_per_page; ++ unsigned num_act_frames = ++ (num_sha_entries + (act_per_page-1)) / act_per_page; ++ return num_act_frames; + } + + static inline unsigned int +diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst +index f2e4597..3188925 100644 +--- a/xen/include/xlat.lst ++++ b/xen/include/xlat.lst +@@ -44,7 +44,12 @@ + ! gnttab_transfer grant_table.h + ? gnttab_unmap_grant_ref grant_table.h + ? gnttab_unmap_and_replace grant_table.h +-? grant_entry grant_table.h ++? gnttab_set_version grant_table.h ++? gnttab_get_version grant_table.h ++! gnttab_get_status_frames grant_table.h ++? grant_entry_v1 grant_table.h ++? grant_entry_header grant_table.h ++? grant_entry_v2 grant_table.h + ? kexec_exec kexec.h + ! kexec_image kexec.h + ! kexec_range kexec.h