--- /dev/null
+
+ Backport netchannel 2 support from xen-unstable
+ 20284,20283,20282,20281,20280,20278,20277.
+
+diff --git a/tools/hotplug/Linux/Makefile b/tools/hotplug/Linux/Makefile
+index bdd1cc0..bc50e65 100644
+--- a/tools/hotplug/Linux/Makefile
++++ b/tools/hotplug/Linux/Makefile
+@@ -14,6 +14,7 @@ XEN_SCRIPT_DIR = /etc/xen/scripts
+ XEN_SCRIPTS = network-bridge vif-bridge
+ XEN_SCRIPTS += network-route vif-route
+ XEN_SCRIPTS += network-nat vif-nat
++XEN_SCRIPTS += vif2
+ XEN_SCRIPTS += block
+ XEN_SCRIPTS += block-enbd block-nbd
+ XEN_SCRIPTS += blktap
+diff --git a/tools/hotplug/Linux/blktap b/tools/hotplug/Linux/blktap
+new file mode 100644
+index 0000000..01a0f6c
+--- /dev/null
++++ b/tools/hotplug/Linux/blktap
+@@ -0,0 +1,93 @@
++#!/bin/bash
++
++# Copyright (c) 2005, XenSource Ltd.
++
++dir=$(dirname "$0")
++. "$dir/xen-hotplug-common.sh"
++. "$dir/block-common.sh"
++
++findCommand "$@"
++
++##
++# check_blktap_sharing file mode
++#
++# Perform the sharing check for the given blktap and mode.
++#
++check_blktap_sharing()
++{
++ local file="$1"
++ local mode="$2"
++
++ local base_path="$XENBUS_BASE_PATH/$XENBUS_TYPE"
++ for dom in $(xenstore-list "$base_path")
++ do
++ for dev in $(xenstore-list "$base_path/$dom")
++ do
++ params=$(xenstore_read "$base_path/$dom/$dev/params" | cut -d: -f2)
++ if [ "$file" = "$params" ]
++ then
++
++ if [ "$mode" = 'w' ]
++ then
++ if ! same_vm "$dom"
++ then
++ echo 'guest'
++ return
++ fi
++ else
++ local m=$(xenstore_read "$base_path/$dom/$dev/mode")
++ m=$(canonicalise_mode "$m")
++
++ if [ "$m" = 'w' ]
++ then
++ if ! same_vm "$dom"
++ then
++ echo 'guest'
++ return
++ fi
++ fi
++ fi
++ fi
++ done
++ done
++
++ echo 'ok'
++}
++
++
++t=$(xenstore_read_default "$XENBUS_PATH/type" 'MISSING')
++if [ -n "$t" ]
++then
++ p=$(xenstore_read "$XENBUS_PATH/params")
++ # if we have a ':', chew from head including :
++ if echo $p | grep -q \:
++ then
++ p=${p#*:}
++ fi
++fi
++# some versions of readlink cannot be passed a regular file
++if [ -L "$p" ]; then
++ file=$(readlink -f "$p") || fatal "$p link does not exist."
++else
++ file="$p"
++fi
++
++if [ "$command" = 'add' ]
++then
++ [ -e "$file" ] || { fatal $file does not exist; }
++
++ FRONTEND_ID=$(xenstore_read "$XENBUS_PATH/frontend-id")
++ FRONTEND_UUID=$(xenstore_read "/local/domain/$FRONTEND_ID/vm")
++ mode=$(xenstore_read "$XENBUS_PATH/mode")
++ mode=$(canonicalise_mode "$mode")
++
++ if [ "$mode" != '!' ]
++ then
++ result=$(check_blktap_sharing "$file" "$mode")
++ [ "$result" = 'ok' ] || ebusy "$file already in use by other domain"
++ fi
++
++ success
++fi
++
++exit 0
+diff --git a/tools/hotplug/Linux/vif2 b/tools/hotplug/Linux/vif2
+new file mode 100644
+index 0000000..247fa67
+--- /dev/null
++++ b/tools/hotplug/Linux/vif2
+@@ -0,0 +1,46 @@
++#!/bin/bash
++
++dir=$(dirname "$0")
++. "$dir/xen-hotplug-common.sh"
++. "$dir/xen-network-common.sh"
++
++bridge=$(xenstore_read_default "$XENBUS_PATH/bridge" "$bridge")
++if [ -z "$bridge" ]
++ then
++ nr_bridges=$(($(brctl show | cut -f 1 | grep -v "^$" | wc -l) - 1))
++ if [ "$nr_bridges" != 1 ]
++ then
++ fatal "no bridge specified, and don't know which one to use ($nr_bridges found)"
++ fi
++ bridge=$(brctl show | cut -d "
++" -f 2 | cut -f 1)
++fi
++
++command="$1"
++shift
++
++case "$command" in
++ "online")
++ if [ "$bridge" != "-" ]
++ then
++ setup_bridge_port "$vif"
++ add_to_bridge "$bridge" "$vif"
++ else
++ # Just let the normal udev rules for interfaces handle it.
++ true
++ fi
++ success
++ ;;
++
++ "add")
++ success
++ ;;
++
++ "remove")
++ ;;
++
++ *)
++ echo "Unknown command: $command"
++ echo 'Valid commands are: add, remove, online'
++ exit 1
++esac
+diff --git a/tools/hotplug/Linux/xen-backend.rules b/tools/hotplug/Linux/xen-backend.rules
+index fe21fc1..9dd88a8 100644
+--- a/tools/hotplug/Linux/xen-backend.rules
++++ b/tools/hotplug/Linux/xen-backend.rules
+@@ -1,8 +1,9 @@
+ SUBSYSTEM=="xen-backend", KERNEL=="tap*", RUN+="/etc/xen/scripts/blktap $env{ACTION}"
+ SUBSYSTEM=="xen-backend", KERNEL=="vbd*", RUN+="/etc/xen/scripts/block $env{ACTION}"
+ SUBSYSTEM=="xen-backend", KERNEL=="vtpm*", RUN+="/etc/xen/scripts/vtpm $env{ACTION}"
+-SUBSYSTEM=="xen-backend", KERNEL=="vif*", ACTION=="online", RUN+="$env{script} online"
+-SUBSYSTEM=="xen-backend", KERNEL=="vif*", ACTION=="offline", RUN+="$env{script} offline"
++SUBSYSTEM=="xen-backend", KERNEL=="vif2-*", RUN+="/etc/xen/scripts/vif2 $env{ACTION}"
++SUBSYSTEM=="xen-backend", KERNEL=="vif-*", ACTION=="online", RUN+="$env{script} online"
++SUBSYSTEM=="xen-backend", KERNEL=="vif-*", ACTION=="offline", RUN+="$env{script} offline"
+ SUBSYSTEM=="xen-backend", KERNEL=="vscsi*", RUN+="/etc/xen/scripts/vscsi $env{ACTION}"
+ SUBSYSTEM=="xen-backend", ACTION=="remove", RUN+="/etc/xen/scripts/xen-hotplug-cleanup"
+ KERNEL=="evtchn", NAME="xen/%k"
+diff --git a/tools/libxc/xc_linux.c b/tools/libxc/xc_linux.c
+index 2480b3c..c9be4f7 100644
+--- a/tools/libxc/xc_linux.c
++++ b/tools/libxc/xc_linux.c
+@@ -562,6 +562,141 @@ int xc_gnttab_set_max_grants(int xcg_handle,
+ return 0;
+ }
+
++int xc_gnttab_op(int xc_handle, int cmd,
++ void * op, int op_size, int count)
++{
++ int ret = 0;
++ DECLARE_HYPERCALL;
++
++ hypercall.op = __HYPERVISOR_grant_table_op;
++ hypercall.arg[0] = cmd;
++ hypercall.arg[1] = (unsigned long)op;
++ hypercall.arg[2] = count;
++
++ if ( lock_pages(op, count* op_size) != 0 )
++ {
++ PERROR("Could not lock memory for Xen hypercall");
++ goto out1;
++ }
++
++ ret = do_xen_hypercall(xc_handle, &hypercall);
++
++ unlock_pages(op, count * op_size);
++
++ out1:
++ return ret;
++}
++
++int xc_gnttab_get_version(int xc_handle, int domid)
++{
++ struct gnttab_get_version query;
++ int rc;
++
++ query.dom = domid;
++ rc = xc_gnttab_op(xc_handle, GNTTABOP_get_version,
++ &query, sizeof(query), 1);
++ if (rc < 0)
++ return rc;
++ else
++ return query.version;
++}
++
++static void *_gnttab_map_table(int xc_handle, int domid, int *gnt_num)
++{
++ int rc, i;
++ struct gnttab_query_size query;
++ struct gnttab_setup_table setup;
++ unsigned long *frame_list = NULL;
++ xen_pfn_t *pfn_list = NULL;
++ struct grant_entry_v1 *gnt = NULL;
++
++ if (!gnt_num)
++ return NULL;
++
++ query.dom = domid;
++ rc = xc_gnttab_op(xc_handle, GNTTABOP_query_size,
++ &query, sizeof(query), 1);
++
++ if (rc || (query.status != GNTST_okay) )
++ {
++ ERROR("Could not query dom's grant size\n", domid);
++ return NULL;
++ }
++
++ *gnt_num = query.nr_frames *
++ (PAGE_SIZE / sizeof(struct grant_entry_v1) );
++
++ frame_list = malloc(query.nr_frames * sizeof(unsigned long));
++ if (!frame_list || lock_pages(frame_list, query.nr_frames *
++ sizeof(unsigned long)))
++ {
++ ERROR("Alloc/lock frame_list in xc_gnttab_map_table\n");
++ if (frame_list)
++ free(frame_list);
++ return NULL;
++ }
++
++ pfn_list = malloc(query.nr_frames * sizeof(xen_pfn_t));
++
++ if (!pfn_list)
++ {
++ ERROR("Could not lock pfn_list in xc_gnttab_map_table\n");
++ goto err;
++ }
++
++ setup.dom = domid;
++ setup.nr_frames = query.nr_frames;
++ set_xen_guest_handle(setup.frame_list, frame_list);
++
++ /* XXX Any race with other setup_table hypercall? */
++ rc = xc_gnttab_op(xc_handle, GNTTABOP_setup_table,
++ &setup, sizeof(setup), 1);
++
++ if (rc ||( setup.status != GNTST_okay) )
++ {
++ ERROR("Could not get grant table frame list\n");
++ goto err;
++ }
++
++ for (i = 0; i < setup.nr_frames; i++)
++ pfn_list[i] = frame_list[i];
++
++ gnt = xc_map_foreign_pages(xc_handle, domid, PROT_READ,
++ pfn_list, setup.nr_frames);
++ if (!gnt)
++ {
++ ERROR("Could not map grant table\n");
++ goto err;
++ }
++
++err:
++ if (frame_list)
++ {
++ unlock_pages(frame_list, query.nr_frames * sizeof(unsigned long));
++ free(frame_list);
++ }
++ if (pfn_list)
++ free(pfn_list);
++
++ return gnt;
++}
++
++struct grant_entry_v1 *xc_gnttab_map_table_v1(int xc_handle, int domid,
++ int *gnt_num)
++{
++ if (xc_gnttab_get_version(xc_handle, domid) == 2)
++ return NULL;
++ return _gnttab_map_table(xc_handle, domid, gnt_num);
++}
++
++struct grant_entry_v2 *xc_gnttab_map_table_v2(int xc_handle, int domid,
++ int *gnt_num)
++{
++ if (xc_gnttab_get_version(xc_handle, domid) != 2)
++ return NULL;
++ return _gnttab_map_table(xc_handle, domid, gnt_num);
++}
++
+ /*
+ * Local variables:
+ * mode: C
+diff --git a/tools/libxc/xc_offline_page.c b/tools/libxc/xc_offline_page.c
+new file mode 100644
+index 0000000..21d26bd
+--- /dev/null
++++ b/tools/libxc/xc_offline_page.c
+@@ -0,0 +1,789 @@
++/******************************************************************************
++ * xc_offline_page.c
++ *
++ * Helper functions to offline/online one page
++ *
++ * Copyright (c) 2003, K A Fraser.
++ * Copyright (c) 2009, Intel Corporation.
++ */
++
++#include <inttypes.h>
++#include <time.h>
++#include <stdlib.h>
++#include <unistd.h>
++#include <sys/time.h>
++#include <xs.h>
++#include <xc_core.h>
++
++#include "xc_private.h"
++#include "xc_dom.h"
++#include "xg_private.h"
++#include "xg_save_restore.h"
++
++struct domain_mem_info{
++ int domid;
++ unsigned int pt_level;
++ unsigned int guest_width;
++ uint32_t *pfn_type;
++ xen_pfn_t *p2m_table;
++ unsigned long p2m_size;
++ xen_pfn_t *m2p_table;
++ int max_mfn;
++};
++
++struct pte_backup_entry
++{
++ xen_pfn_t table_mfn;
++ int offset;
++};
++
++#define DEFAULT_BACKUP_COUNT 1024
++struct pte_backup
++{
++ struct pte_backup_entry *entries;
++ int max;
++ int cur;
++};
++
++/* Global definition for some MACRO */
++int guest_width, p2m_size;
++
++int xc_mark_page_online(int xc, unsigned long start,
++ unsigned long end, uint32_t *status)
++{
++ DECLARE_SYSCTL;
++ int ret = -1;
++
++ if ( !status || (end < start) )
++ return -EINVAL;
++
++ if (lock_pages(status, sizeof(uint32_t)*(end - start + 1)))
++ {
++ ERROR("Could not lock memory for xc_mark_page_online\n");
++ return -EINVAL;
++ }
++
++ sysctl.cmd = XEN_SYSCTL_page_offline_op;
++ sysctl.u.page_offline.start = start;
++ sysctl.u.page_offline.cmd = sysctl_page_online;
++ sysctl.u.page_offline.end = end;
++ set_xen_guest_handle(sysctl.u.page_offline.status, status);
++ ret = xc_sysctl(xc, &sysctl);
++
++ unlock_pages(status, sizeof(uint32_t)*(end - start + 1));
++
++ return ret;
++}
++
++int xc_mark_page_offline(int xc, unsigned long start,
++ unsigned long end, uint32_t *status)
++{
++ DECLARE_SYSCTL;
++ int ret = -1;
++
++ if ( !status || (end < start) )
++ return -EINVAL;
++
++ if (lock_pages(status, sizeof(uint32_t)*(end - start + 1)))
++ {
++ ERROR("Could not lock memory for xc_mark_page_offline");
++ return -EINVAL;
++ }
++
++ sysctl.cmd = XEN_SYSCTL_page_offline_op;
++ sysctl.u.page_offline.start = start;
++ sysctl.u.page_offline.cmd = sysctl_page_offline;
++ sysctl.u.page_offline.end = end;
++ set_xen_guest_handle(sysctl.u.page_offline.status, status);
++ ret = xc_sysctl(xc, &sysctl);
++
++ unlock_pages(status, sizeof(uint32_t)*(end - start + 1));
++
++ return ret;
++}
++
++int xc_query_page_offline_status(int xc, unsigned long start,
++ unsigned long end, uint32_t *status)
++{
++ DECLARE_SYSCTL;
++ int ret = -1;
++
++ if ( !status || (end < start) )
++ return -EINVAL;
++
++ if (lock_pages(status, sizeof(uint32_t)*(end - start + 1)))
++ {
++ ERROR("Could not lock memory for xc_query_page_offline_status\n");
++ return -EINVAL;
++ }
++
++ sysctl.cmd = XEN_SYSCTL_page_offline_op;
++ sysctl.u.page_offline.start = start;
++ sysctl.u.page_offline.cmd = sysctl_query_page_offline;
++ sysctl.u.page_offline.end = end;
++ set_xen_guest_handle(sysctl.u.page_offline.status, status);
++ ret = xc_sysctl(xc, &sysctl);
++
++ unlock_pages(status, sizeof(uint32_t)*(end - start + 1));
++
++ return ret;
++}
++
++ /*
++ * There should no update to the grant when domain paused
++ */
++static int xc_is_page_granted_v1(int xc_handle, xen_pfn_t gpfn,
++ struct grant_entry_v1 *gnttab, int gnt_num)
++{
++ int i = 0;
++
++ if (!gnttab)
++ return 0;
++
++ for (i = 0; i < gnt_num; i++)
++ if ( ((gnttab[i].flags & GTF_type_mask) != GTF_invalid) &&
++ (gnttab[i].frame == gpfn) )
++ break;
++
++ return (i != gnt_num);
++}
++
++static int xc_is_page_granted_v2(int xc_handle, xen_pfn_t gpfn,
++ struct grant_entry_v2 *gnttab, int gnt_num)
++{
++ int i = 0;
++
++ if (!gnttab)
++ return 0;
++
++ for (i = 0; i < gnt_num; i++)
++ if ( ((gnttab[i].hdr.flags & GTF_type_mask) != GTF_invalid) &&
++ (gnttab[i].frame == gpfn) )
++ break;
++
++ return (i != gnt_num);
++}
++
++static xen_pfn_t pfn_to_mfn(xen_pfn_t pfn, xen_pfn_t *p2m, int gwidth)
++{
++ return ((xen_pfn_t) ((gwidth==8)?
++ (((uint64_t *)p2m)[(pfn)]):
++ ((((uint32_t *)p2m)[(pfn)]) == 0xffffffffU ?
++ (-1UL) :
++ (((uint32_t *)p2m)[(pfn)]))));
++}
++
++static int get_pt_level(int xc_handle, uint32_t domid,
++ unsigned int *pt_level,
++ unsigned int *gwidth)
++{
++ DECLARE_DOMCTL;
++ xen_capabilities_info_t xen_caps = "";
++
++ if (xc_version(xc_handle, XENVER_capabilities, &xen_caps) != 0)
++ return -1;
++
++ memset(&domctl, 0, sizeof(domctl));
++ domctl.domain = domid;
++ domctl.cmd = XEN_DOMCTL_get_address_size;
++
++ if ( do_domctl(xc_handle, &domctl) != 0 )
++ return -1;
++
++ *gwidth = domctl.u.address_size.size / 8;
++
++ if (strstr(xen_caps, "xen-3.0-x86_64"))
++ /* Depends on whether it's a compat 32-on-64 guest */
++ *pt_level = ( (*gwidth == 8) ? 4 : 3 );
++ else if (strstr(xen_caps, "xen-3.0-x86_32p"))
++ *pt_level = 3;
++ else if (strstr(xen_caps, "xen-3.0-x86_32"))
++ *pt_level = 2;
++ else
++ return -1;
++
++ return 0;
++}
++
++static int close_mem_info(int xc_handle, struct domain_mem_info *minfo)
++{
++ if (minfo->pfn_type)
++ free(minfo->pfn_type);
++ munmap(minfo->m2p_table, M2P_SIZE(minfo->max_mfn));
++ munmap(minfo->p2m_table, P2M_FLL_ENTRIES * PAGE_SIZE);
++ minfo->p2m_table = minfo->m2p_table = NULL;
++
++ return 0;
++}
++
++static int init_mem_info(int xc_handle, int domid,
++ struct domain_mem_info *minfo,
++ xc_dominfo_t *info)
++{
++ uint64_aligned_t shared_info_frame;
++ shared_info_any_t *live_shinfo = NULL;
++ int i, rc;
++
++ /* Only be initialized once */
++ if (minfo->pfn_type || minfo->m2p_table || minfo->p2m_table)
++ return -EINVAL;
++
++ if ( get_pt_level(xc_handle, domid, &minfo->pt_level,
++ &minfo->guest_width) )
++ {
++ ERROR("Unable to get PT level info.");
++ return -EFAULT;
++ }
++ guest_width = minfo->guest_width;
++
++ shared_info_frame = info->shared_info_frame;
++
++ live_shinfo = xc_map_foreign_range(xc_handle, domid,
++ PAGE_SIZE, PROT_READ, shared_info_frame);
++ if ( !live_shinfo )
++ {
++ ERROR("Couldn't map live_shinfo");
++ return -EFAULT;
++ }
++
++ if ( (rc = xc_core_arch_map_p2m_writable(xc_handle, minfo->guest_width,
++ info, live_shinfo, &minfo->p2m_table, &minfo->p2m_size)) )
++ {
++ ERROR("Couldn't map p2m table %x\n", rc);
++ goto failed;
++ }
++ munmap(live_shinfo, PAGE_SIZE);
++ live_shinfo = NULL;
++
++ p2m_size = minfo->p2m_size;
++
++ minfo->max_mfn = xc_memory_op(xc_handle, XENMEM_maximum_ram_page, NULL);
++ if ( !(minfo->m2p_table =
++ xc_map_m2p(xc_handle, minfo->max_mfn, PROT_READ, NULL)) )
++ {
++ ERROR("Failed to map live M2P table");
++ goto failed;
++ }
++
++ /* Get pfn type */
++ minfo->pfn_type = malloc(sizeof(uint32_t) * minfo->p2m_size);
++ if (!minfo->pfn_type)
++ {
++ ERROR("Failed to malloc pfn_type\n");
++ goto failed;
++ }
++ memset(minfo->pfn_type, 0, sizeof(uint32_t) * minfo->p2m_size);
++
++ for (i = 0; i < minfo->p2m_size; i++)
++ minfo->pfn_type[i] = pfn_to_mfn(i, minfo->p2m_table,
++ minfo->guest_width);
++
++ if ( lock_pages(minfo->pfn_type, minfo->p2m_size * sizeof(uint32_t)) )
++ {
++ ERROR("Unable to lock pfn_type array");
++ goto failed;
++ }
++
++ for (i = 0; i < minfo->p2m_size ; i+=1024)
++ {
++ int count = ((p2m_size - i ) > 1024 ) ? 1024: (p2m_size - i);
++ if ( ( rc = xc_get_pfn_type_batch(xc_handle, domid, count,
++ minfo->pfn_type + i)) )
++ {
++ ERROR("Failed to get pfn_type %x\n", rc);
++ goto unlock;
++ }
++ }
++ return 0;
++
++unlock:
++ unlock_pages(minfo->pfn_type, minfo->p2m_size * sizeof(uint32_t));
++failed:
++ if (minfo->pfn_type)
++ {
++ minfo->pfn_type = NULL;
++ free(minfo->pfn_type);
++ }
++ if (live_shinfo)
++ munmap(live_shinfo, PAGE_SIZE);
++ munmap(minfo->m2p_table, M2P_SIZE(minfo->max_mfn));
++ munmap(minfo->p2m_table, P2M_FLL_ENTRIES * PAGE_SIZE);
++ minfo->p2m_table = minfo->m2p_table = NULL;
++
++ return -1;
++}
++
++static int backup_ptes(xen_pfn_t table_mfn, int offset,
++ struct pte_backup *backup)
++{
++ if (!backup)
++ return -EINVAL;
++
++ if (backup->max == backup->cur)
++ {
++ backup->entries = realloc(backup->entries,
++ backup->max * 2 * sizeof(struct pte_backup_entry));
++ if (backup->entries == NULL)
++ return -1;
++ else
++ backup->max *= 2;
++ }
++
++ backup->entries[backup->cur].table_mfn = table_mfn;
++ backup->entries[backup->cur++].offset = offset;
++
++ return 0;
++}
++
++/*
++ * return:
++ * 1 when MMU update is required
++ * 0 when no changes
++ * <0 when error happen
++ */
++typedef int (*pte_func)(uint64_t pte, uint64_t *new_pte,
++ unsigned long table_mfn, int table_offset,
++ struct pte_backup *backup,
++ unsigned long no_use);
++
++static int __clear_pte(uint64_t pte, uint64_t *new_pte,
++ unsigned long table_mfn, int table_offset,
++ struct pte_backup *backup,
++ unsigned long mfn)
++{
++ /* If no new_pte pointer, same as no changes needed */
++ if (!new_pte || !backup)
++ return -EINVAL;
++
++ if ( !(pte & _PAGE_PRESENT))
++ return 0;
++
++ /* XXX Check for PSE bit here */
++ /* Hit one entry */
++ if ( ((pte >> PAGE_SHIFT_X86) & MFN_MASK_X86) == mfn)
++ {
++ *new_pte = pte & ~_PAGE_PRESENT;
++ if (!backup_ptes(table_mfn, table_offset, backup))
++ return 1;
++ }
++
++ return 0;
++}
++
++static int __update_pte(uint64_t pte, uint64_t *new_pte,
++ unsigned long table_mfn, int table_offset,
++ struct pte_backup *backup,
++ unsigned long new_mfn)
++{
++ int index;
++
++ if (!new_pte)
++ return 0;
++
++ for (index = 0; index < backup->cur; index ++)
++ if ( (backup->entries[index].table_mfn == table_mfn) &&
++ (backup->entries[index].offset == table_offset) )
++ break;
++
++ if (index != backup->cur)
++ {
++ if (pte & _PAGE_PRESENT)
++ ERROR("Page present while in backup ptes\n");
++ pte &= ~MFN_MASK_X86;
++ pte |= (new_mfn << PAGE_SHIFT_X86) | _PAGE_PRESENT;
++ *new_pte = pte;
++ return 1;
++ }
++
++ return 0;
++}
++
++static int change_pte(int xc_handle, int domid,
++ struct domain_mem_info *minfo,
++ struct pte_backup *backup,
++ struct xc_mmu *mmu,
++ pte_func func,
++ unsigned long data)
++{
++ int pte_num, rc;
++ uint64_t i;
++ void *content = NULL;
++
++ pte_num = PAGE_SIZE / ((minfo->pt_level == 2) ? 4 : 8);
++
++ for (i = 0; i < minfo->p2m_size; i++)
++ {
++ xen_pfn_t table_mfn = pfn_to_mfn(i, minfo->p2m_table,
++ minfo->guest_width);
++ uint64_t pte, new_pte;
++ int j;
++
++ if ( table_mfn == INVALID_P2M_ENTRY )
++ continue;
++
++ if ( minfo->pfn_type[i] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
++ {
++ content = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
++ PROT_READ, table_mfn);
++ if (!content)
++ goto failed;
++
++ for (j = 0; j < pte_num; j++)
++ {
++ if ( minfo->pt_level == 2 )
++ pte = ((const uint32_t*)content)[j];
++ else
++ pte = ((const uint64_t*)content)[j];
++
++ rc = func(pte, &new_pte, table_mfn, j, backup, data);
++
++ switch (rc)
++ {
++ case 1:
++ if ( xc_add_mmu_update(xc_handle, mmu,
++ table_mfn << PAGE_SHIFT |
++ j * ( (minfo->pt_level == 2) ?
++ sizeof(uint32_t): sizeof(uint64_t)) |
++ MMU_PT_UPDATE_PRESERVE_AD,
++ new_pte) )
++ goto failed;
++ break;
++
++ case 0:
++ break;
++
++ default:
++ goto failed;
++ }
++ }
++ }
++
++ munmap(content, PAGE_SIZE);
++ content = NULL;
++ }
++
++ if ( xc_flush_mmu_updates(xc_handle, mmu) )
++ goto failed;
++
++ return 0;
++failed:
++ /* XXX Shall we take action if we have fail to swap? */
++ if (content)
++ munmap(content, PAGE_SIZE);
++
++ return -1;
++}
++
++static int update_pte(int xc_handle, int domid,
++ struct domain_mem_info *minfo,
++ struct pte_backup *backup,
++ struct xc_mmu *mmu,
++ unsigned long new_mfn)
++{
++ return change_pte(xc_handle, domid, minfo, backup, mmu,
++ __update_pte, new_mfn);
++}
++
++static int clear_pte(int xc_handle, int domid,
++ struct domain_mem_info *minfo,
++ struct pte_backup *backup,
++ struct xc_mmu *mmu,
++ xen_pfn_t mfn)
++{
++ return change_pte(xc_handle, domid, minfo, backup, mmu,
++ __clear_pte, mfn);
++}
++
++static int exchange_page(int xc_handle, xen_pfn_t mfn,
++ xen_pfn_t *new_mfn, int domid)
++{
++ int rc;
++ xen_pfn_t out_mfn;
++
++ struct xen_memory_exchange exchange = {
++ .in = {
++ .nr_extents = 1,
++ .extent_order = 0,
++ .domid = domid
++ },
++ .out = {
++ .nr_extents = 1,
++ .extent_order = 0,
++ .domid = domid
++ }
++ };
++ set_xen_guest_handle(exchange.in.extent_start, &mfn);
++ set_xen_guest_handle(exchange.out.extent_start, &out_mfn);
++
++ rc = xc_memory_op(xc_handle, XENMEM_exchange, &exchange);
++
++ if (!rc)
++ *new_mfn = out_mfn;
++
++ return rc;
++}
++
++/*
++ * Check if a page can be exchanged successfully
++ */
++
++static int is_page_exchangable(int xc_handle, int domid, xen_pfn_t mfn,
++ xc_dominfo_t *info)
++{
++ uint32_t status;
++ int rc;
++
++ /* domain checking */
++ if ( !domid || (domid > DOMID_FIRST_RESERVED) )
++ {
++ DPRINTF("Dom0's page can't be LM");
++ return 0;
++ }
++ if (info->hvm)
++ {
++ DPRINTF("Currently we can only live change PV guest's page\n");
++ return 0;
++ }
++
++ /* Check if pages are offline pending or not */
++ rc = xc_query_page_offline_status(xc_handle, mfn, mfn, &status);
++
++ if ( rc || !(status & PG_OFFLINE_STATUS_OFFLINE_PENDING) )
++ {
++ ERROR("Page %lx is not offline pending %x\n",
++ mfn, status);
++ return 0;
++ }
++
++ return 1;
++}
++
++/* The domain should be suspended when called here */
++int xc_exchange_page(int xc_handle, int domid, xen_pfn_t mfn)
++{
++ xc_dominfo_t info;
++ struct domain_mem_info minfo;
++ struct xc_mmu *mmu = NULL;
++ struct pte_backup old_ptes = {NULL, 0, 0};
++ struct grant_entry_v1 *gnttab_v1 = NULL;
++ struct grant_entry_v2 *gnttab_v2 = NULL;
++ struct mmuext_op mops;
++ int gnt_num, unpined = 0;
++ void *old_p, *backup = NULL;
++ int rc, result = -1;
++ uint32_t status;
++ xen_pfn_t new_mfn, gpfn;
++
++ if ( xc_domain_getinfo(xc_handle, domid, 1, &info) != 1 )
++ {
++ ERROR("Could not get domain info");
++ return -EFAULT;
++ }
++
++ if (!info.shutdown || info.shutdown_reason != SHUTDOWN_suspend)
++ {
++ ERROR("Can't exchange page unless domain is suspended\n");
++ return -EINVAL;
++ }
++
++ if (!is_page_exchangable(xc_handle, domid, mfn, &info))
++ {
++ ERROR("Could not exchange page\n");
++ return -EINVAL;
++ }
++
++ /* Get domain's memory information */
++ memset(&minfo, 0, sizeof(minfo));
++ init_mem_info(xc_handle, domid, &minfo, &info);
++ gpfn = minfo.m2p_table[mfn];
++
++ /* Don't exchange CR3 for PAE guest in PAE host environment */
++ if (minfo.guest_width > sizeof(long))
++ {
++ if ( (minfo.pfn_type[gpfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK) ==
++ XEN_DOMCTL_PFINFO_L3TAB )
++ goto failed;
++ }
++
++ gnttab_v2 = xc_gnttab_map_table_v2(xc_handle, domid, &gnt_num);
++ if (!gnttab_v2)
++ {
++ gnttab_v1 = xc_gnttab_map_table_v1(xc_handle, domid, &gnt_num);
++ if (!gnttab_v1)
++ {
++ ERROR("Failed to map grant table\n");
++ goto failed;
++ }
++ }
++
++ if (gnttab_v1
++ ? xc_is_page_granted_v1(xc_handle, mfn, gnttab_v1, gnt_num)
++ : xc_is_page_granted_v2(xc_handle, mfn, gnttab_v2, gnt_num))
++ {
++ ERROR("Page %lx is granted now\n", mfn);
++ goto failed;
++ }
++
++ /* allocate required data structure */
++ backup = malloc(PAGE_SIZE);
++ if (!backup)
++ {
++ ERROR("Failed to allocate backup pages pointer\n");
++ goto failed;
++ }
++
++ old_ptes.max = DEFAULT_BACKUP_COUNT;
++ old_ptes.entries = malloc(sizeof(struct pte_backup_entry) *
++ DEFAULT_BACKUP_COUNT);
++
++ if (!old_ptes.entries)
++ {
++ ERROR("Faield to allocate backup\n");
++ goto failed;
++ }
++ old_ptes.cur = 0;
++
++ /* Unpin the page if it is pined */
++ if (minfo.pfn_type[gpfn] & XEN_DOMCTL_PFINFO_LPINTAB)
++ {
++ mops.cmd = MMUEXT_UNPIN_TABLE;
++ mops.arg1.mfn = mfn;
++
++ if ( xc_mmuext_op(xc_handle, &mops, 1, domid) < 0 )
++ {
++ ERROR("Failed to unpin page %lx", mfn);
++ goto failed;
++ }
++ mops.arg1.mfn = mfn;
++ unpined = 1;
++ }
++
++ /* backup the content */
++ old_p = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
++ PROT_READ, mfn);
++ if (!old_p)
++ {
++ ERROR("Failed to map foreign page %lx\n", mfn);
++ goto failed;
++ }
++
++ memcpy(backup, old_p, PAGE_SIZE);
++ munmap(old_p, PAGE_SIZE);
++
++ mmu = xc_alloc_mmu_updates(xc_handle, domid);
++ if ( mmu == NULL )
++ {
++ ERROR("%s: failed at %d\n", __FUNCTION__, __LINE__);
++ goto failed;
++ }
++
++ /* Firstly update all pte to be invalid to remove the reference */
++ rc = clear_pte(xc_handle, domid, &minfo, &old_ptes, mmu, mfn);
++
++ if (rc)
++ {
++ ERROR("clear pte failed\n");
++ goto failed;
++ }
++
++ rc = exchange_page(xc_handle, mfn, &new_mfn, domid);
++
++ if (rc)
++ {
++ ERROR("Exchange the page failed\n");
++ /* Exchange fail means there are refere to the page still */
++ rc = update_pte(xc_handle, domid, &minfo, &old_ptes, mmu, mfn);
++ if (rc)
++ result = -2;
++ goto failed;
++ }
++
++ rc = update_pte(xc_handle, domid, &minfo, &old_ptes, mmu, new_mfn);
++
++ if (rc)
++ {
++ ERROR("update pte failed guest may be broken now\n");
++ /* No recover action now for swap fail */
++ result = -2;
++ goto failed;
++ }
++
++ /* Check if pages are offlined already */
++ rc = xc_query_page_offline_status(xc_handle, mfn, mfn,
++ &status);
++
++ if (rc)
++ {
++ ERROR("Fail to query offline status\n");
++ }else if ( !(status & PG_OFFLINE_STATUS_OFFLINED) )
++ {
++ ERROR("page is still online or pending\n");
++ goto failed;
++ }
++ else
++ {
++ void *new_p;
++ IPRINTF("Now page is offlined %lx\n", mfn);
++ /* Update the p2m table */
++ minfo.p2m_table[gpfn] = new_mfn;
++
++ new_p = xc_map_foreign_range(xc_handle, domid, PAGE_SIZE,
++ PROT_READ|PROT_WRITE, new_mfn);
++ memcpy(new_p, backup, PAGE_SIZE);
++ munmap(new_p, PAGE_SIZE);
++ mops.arg1.mfn = new_mfn;
++ result = 0;
++ }
++
++failed:
++
++ if (unpined && (minfo.pfn_type[mfn] & XEN_DOMCTL_PFINFO_LPINTAB))
++ {
++ switch ( minfo.pfn_type[mfn] & XEN_DOMCTL_PFINFO_LTABTYPE_MASK )
++ {
++ case XEN_DOMCTL_PFINFO_L1TAB:
++ mops.cmd = MMUEXT_PIN_L1_TABLE;
++ break;
++
++ case XEN_DOMCTL_PFINFO_L2TAB:
++ mops.cmd = MMUEXT_PIN_L2_TABLE;
++ break;
++
++ case XEN_DOMCTL_PFINFO_L3TAB:
++ mops.cmd = MMUEXT_PIN_L3_TABLE;
++ break;
++
++ case XEN_DOMCTL_PFINFO_L4TAB:
++ mops.cmd = MMUEXT_PIN_L4_TABLE;
++ break;
++
++ default:
++ ERROR("Unpined for non pate table page\n");
++ break;
++ }
++
++ if ( xc_mmuext_op(xc_handle, &mops, 1, domid) < 0 )
++ {
++ ERROR("failed to pin the mfn again\n");
++ result = -2;
++ }
++ }
++
++ if (mmu)
++ free(mmu);
++
++ if (old_ptes.entries)
++ free(old_ptes.entries);
++
++ if (backup)
++ free(backup);
++
++ if (gnttab_v1)
++ munmap(gnttab_v1, gnt_num / (PAGE_SIZE/sizeof(struct grant_entry_v1)));
++ if (gnttab_v2)
++ munmap(gnttab_v2, gnt_num / (PAGE_SIZE/sizeof(struct grant_entry_v2)));
++
++ close_mem_info(xc_handle, &minfo);
++
++ return result;
++}
+diff --git a/tools/libxc/xenctrl.h b/tools/libxc/xenctrl.h
+index c9b1866..10ab1ea 100644
+--- a/tools/libxc/xenctrl.h
++++ b/tools/libxc/xenctrl.h
+@@ -937,6 +937,13 @@ int xc_gnttab_munmap(int xcg_handle,
+ int xc_gnttab_set_max_grants(int xcg_handle,
+ uint32_t count);
+
++int xc_gnttab_op(int xc_handle, int cmd,
++ void * op, int op_size, int count);
++
++int xc_gnttab_get_version(int xc_handle, int domid);
++struct grant_entry_v1 *xc_gnttab_map_table_v1(int xc_handle, int domid, int *gnt_num);
++struct grant_entry_v2 *xc_gnttab_map_table_v2(int xc_handle, int domid, int *gnt_num);
++
+ int xc_physdev_map_pirq(int xc_handle,
+ int domid,
+ int index,
+diff --git a/tools/python/xen/xend/XendDevices.py b/tools/python/xen/xend/XendDevices.py
+index 4463842..5350781 100644
+--- a/tools/python/xen/xend/XendDevices.py
++++ b/tools/python/xen/xend/XendDevices.py
+@@ -19,8 +19,8 @@
+ # A collection of DevControllers
+ #
+
+-from xen.xend.server import blkif, netif, tpmif, pciif, iopif, irqif, vfbif, vscsiif
+-from xen.xend.server.BlktapController import BlktapController
++from xen.xend.server import blkif, netif, tpmif, pciif, iopif, irqif, vfbif, vscsiif, netif2
++from xen.xend.server.BlktapController import BlktapController, Blktap2Controller
+ from xen.xend.server.ConsoleController import ConsoleController
+
+
+@@ -37,6 +37,7 @@ class XendDevices:
+ controllers = {
+ 'vbd': blkif.BlkifController,
+ 'vif': netif.NetifController,
++ 'vif2': netif2.NetifController2,
+ 'vtpm': tpmif.TPMifController,
+ 'pci': pciif.PciController,
+ 'ioports': iopif.IOPortsController,
+diff --git a/tools/python/xen/xend/XendDomainInfo.py b/tools/python/xen/xend/XendDomainInfo.py
+index 4b74189..4edb5ad 100644
+--- a/tools/python/xen/xend/XendDomainInfo.py
++++ b/tools/python/xen/xend/XendDomainInfo.py
+@@ -1162,7 +1162,7 @@ class XendDomainInfo:
+ break
+ self._waitForDevice_destroy(deviceClass, devid, backend)
+
+- if rm_cfg:
++ if rm_cfg and deviceClass != "vif2":
+ if deviceClass == 'vif':
+ if self.domid is not None:
+ for dev_num, dev_info in sxprs:
+diff --git a/tools/python/xen/xend/server/netif2.py b/tools/python/xen/xend/server/netif2.py
+new file mode 100644
+index 0000000..a098c13
+--- /dev/null
++++ b/tools/python/xen/xend/server/netif2.py
+@@ -0,0 +1,163 @@
++#============================================================================
++# This library is free software; you can redistribute it and/or
++# modify it under the terms of version 2.1 of the GNU Lesser General Public
++# License as published by the Free Software Foundation.
++#
++# This library is distributed in the hope that it will be useful,
++# but WITHOUT ANY WARRANTY; without even the implied warranty of
++# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
++# Lesser General Public License for more details.
++#
++# You should have received a copy of the GNU Lesser General Public
++# License along with this library; if not, write to the Free Software
++# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
++#============================================================================
++# Copyright (C) 2004, 2005 Mike Wray <mike.wray@hp.com>
++# Copyright (C) 2005 XenSource Ltd
++# Copyright (C) 2008 Citrix Systems Inc.
++#============================================================================
++#
++# Based closely on netif.py.
++#
++
++"""Support for virtual network interfaces, version 2.
++"""
++
++import os
++import random
++import re
++import time
++
++from xen.xend import XendOptions
++from xen.xend.server.DevController import DevController
++from xen.xend.XendError import VmError
++from xen.xend.XendXSPolicyAdmin import XSPolicyAdminInstance
++from xen.xend.xenstore.xstransact import xstransact
++import xen.util.xsm.xsm as security
++
++from xen.xend.XendLogging import log
++
++xoptions = XendOptions.instance()
++
++def randomMAC():
++ """Generate a random MAC address.
++
++ Uses OUI (Organizationally Unique Identifier) 00-16-3E, allocated to
++ Xensource, Inc. The OUI list is available at
++ http://standards.ieee.org/regauth/oui/oui.txt.
++
++ The remaining 3 fields are random, with the first bit of the first
++ random field set 0.
++
++ @return: MAC address string
++ """
++ mac = [ 0x00, 0x16, 0x3e,
++ random.randint(0x00, 0x7f),
++ random.randint(0x00, 0xff),
++ random.randint(0x00, 0xff) ]
++ return ':'.join(map(lambda x: "%02x" % x, mac))
++
++class NetifController2(DevController):
++ def __init__(self, vm):
++ DevController.__init__(self, vm)
++
++ def getDeviceDetails(self, config):
++ """@see DevController.getDeviceDetails"""
++
++ devid = self.allocateDeviceID()
++
++ bridge = config.get('bridge')
++ back_mac = config.get('back_mac')
++ if not back_mac:
++ if bridge:
++ back_mac = "fe:ff:ff:ff:ff:ff"
++ else:
++ back_mac = randomMAC()
++ front_mac = config.get('front_mac') or randomMAC()
++ front_trust = config.get("trusted") or "0"
++ back_trust = config.get("back_trusted") or "1"
++ max_bypasses = config.get("max_bypasses") or "5"
++ pdev = config.get('pdev')
++ front_filter = config.get("front_filter_mac")
++ if front_filter == None:
++ if back_trust == "0":
++ front_filter = "1"
++ else:
++ front_filter = "0"
++ back_filter = config.get("filter_mac")
++ if back_filter == None:
++ if front_trust == "0":
++ back_filter = "1"
++ else:
++ back_filter = "0"
++ back = { 'mac': back_mac, 'remote-mac': front_mac,
++ 'handle': "%i" % devid, 'local-trusted': back_trust,
++ 'remote-trusted': front_trust, 'filter-mac': back_filter,
++ 'max-bypasses': max_bypasses }
++
++ front = { 'mac': front_mac, 'remote-mac': back_mac,
++ 'local-trusted': front_trust, 'remote-trusted': back_trust,
++ 'filter-mac': front_filter }
++
++ if bridge:
++ back['bridge'] = bridge
++
++ if pdev:
++ back['pdev'] = pdev
++
++ return (devid, back, front)
++
++ def getDeviceConfiguration(self, devid, transaction = None):
++ """@see DevController.configuration"""
++
++ if transaction is None:
++ read_fn = xstransact.Read
++ else:
++ read_fn = transaction.read
++ def front_read(x):
++ return read_fn(frontpath + x)
++ def back_read(x):
++ return read_fn(backpath + x)
++
++ result = DevController.getDeviceConfiguration(self, devid, transaction)
++
++ dev = self.convertToDeviceNumber(devid)
++ frontpath = self.frontendPath(dev) + "/"
++
++ backpath = front_read("backend") + "/"
++
++ front_mac = front_read("mac")
++ back_mac = back_read("mac")
++
++ front_trusted = back_read("remote-trusted")
++ back_trusted = back_read("local-trusted")
++ max_bypasses = back_read("max-bypasses")
++
++ bridge = back_read("bridge")
++
++ pdev = back_read("pdev")
++
++ if front_mac:
++ result["front_mac"] = front_mac
++ if back_mac:
++ result["back_mac"] = back_mac
++ if front_trusted:
++ result["front_trusted"] = front_trusted
++ if back_trusted:
++ result["back_trusted"] = back_trusted
++ if bridge:
++ result["bridge"] = bridge
++ if pdev:
++ result["pdev"] = pdev
++ if max_bypasses:
++ result["max-bypasses"] = max_bypasses
++ return result
++
++ def destroyDevice(self, devid, force):
++ dev = self.convertToDeviceNumber(devid)
++ self.writeBackend(dev, "online", "0")
++ if force:
++ self.writeBackend(dev, "shutdown-request", "force")
++ else:
++ self.writeBackend(dev, "shutdown-request", "normal")
++ self.vm._removeVm("device/%s/%d" % (self.deviceClass, dev))
+diff --git a/tools/python/xen/xm/create.py b/tools/python/xen/xm/create.py
+index 78601fe..b07590c 100644
+--- a/tools/python/xen/xm/create.py
++++ b/tools/python/xen/xm/create.py
+@@ -386,6 +386,12 @@ gopts.var('vif', val="type=TYPE,mac=MAC,bridge=BRIDGE,ip=IPADDR,script=SCRIPT,"
+ This option may be repeated to add more than one vif.
+ Specifying vifs will increase the number of interfaces as needed.""")
+
++gopts.var('vif2', val="front_mac=MAC,back_mac=MAC,backend=DOM,pdev=PDEV,max_bypasses=N,bridge=BRIDGE,filter_mac=<0|1>,front_filter_mac=<0|1>",
++ fn=append_value, default=[],
++ use="""Add a netchannel2 network interface using given front
++ and backend MAC addresses. Randomly generated
++ addresses will be used if either address is missing.""")
++
+ gopts.var('vtpm', val="instance=INSTANCE,backend=DOM,type=TYPE",
+ fn=append_value, default=[],
+ use="""Add a TPM interface. On the backend side use the given
+@@ -895,6 +901,8 @@ def configure_vifs(config_devs, vals):
+
+ vifs = vals.vif
+ vifs_n = len(vifs)
++ vifs2 = vals.vif2
++ vifs2_n = len(vifs2)
+
+ if hasattr(vals, 'nics'):
+ if vals.nics > 0:
+@@ -921,6 +929,18 @@ def configure_vifs(config_devs, vals):
+ map(f, d.keys())
+ config_devs.append(['device', config_vif])
+
++ for c in vifs2:
++ d = comma_sep_kv_to_dict(c)
++ config_vif = ['vif2']
++
++ for k in d.keys():
++ if k not in ['front_mac', 'back_mac', 'backend', 'trusted',
++ 'back_trusted', 'front_filter_mac', 'filter_mac',
++ 'bridge', 'pdev', 'max_bypasses' ]:
++ err('Invalid vif2 option: ' + k)
++ config_vif.append([k, d[k]])
++ config_devs.append(['device', config_vif])
++
+
+ def configure_hvm(config_image, vals):
+ """Create the config for HVM devices.
+diff --git a/tools/python/xen/xm/main.py b/tools/python/xen/xm/main.py
+index a460bc8..a1779f7 100644
+--- a/tools/python/xen/xm/main.py
++++ b/tools/python/xen/xm/main.py
+@@ -180,6 +180,15 @@ SUBCOMMAND_HELP = {
+ 'Destroy a domain\'s virtual network device.'),
+ 'network-list' : ('<Domain> [--long]',
+ 'List virtual network interfaces for a domain.'),
++ 'network2-attach': ('<Domain> [front_mac=<mac>] [back_mac=<mac>] '
++ '[backend=<BackDomain>] [trusted=<0|1>] '
++ '[back_trusted=<0|1>] [bridge=<bridge>] '
++ '[max_bypasses=n]'
++ 'Create a new version 2 virtual network device.'),
++ 'network2-detach': ('<Domain> <DevId> [-f|--force]',
++ 'Destroy a domain\'s version 2 virtual network device.'),
++ 'network2-list' : ('<Domain> [--long]',
++ 'List version 2 virtual network interfaces for a domain.'),
+ 'vnet-create' : ('<ConfigFile>','Create a vnet from ConfigFile.'),
+ 'vnet-delete' : ('<VnetId>', 'Delete a Vnet.'),
+ 'vnet-list' : ('[-l|--long]', 'List Vnets.'),
+@@ -367,6 +376,9 @@ device_commands = [
+ "network-attach",
+ "network-detach",
+ "network-list",
++ "network2-attach",
++ "network2-detach",
++ "network2-list",
+ "vtpm-list",
+ "pci-attach",
+ "pci-detach",
+@@ -2358,6 +2370,35 @@ def xm_block_configure(args):
+ server.xend.domain.device_configure(dom, vbd)
+
+
++def xm_network2_attach(args):
++ xenapi_unsupported()
++ arg_check(args, 'network2-attach', 1, 4)
++ dom = args[0]
++ vif = ['vif2']
++ vif_params = ['front_mac', 'back_mac', 'backend', 'trusted',
++ 'back_trusted', "front_filter_mac", "filter_mac",
++ 'bridge', 'pdev', "max_bypasses" ]
++ for a in args[1:]:
++ vif_param = a.split("=")
++ if len(vif_param) != 2 or vif_param[1] == "" or \
++ vif_param[0] not in vif_params:
++ err("Invalid argument: %s" % a)
++ usage("network2-attach")
++ vif.append(vif_param)
++ server.xend.domain.device_create(dom, vif)
++
++def xm_network2_detach(args):
++ xenapi_unsupported()
++ arg_check(args, "network2-detch", 2, 3)
++ detach(args, "vif2")
++
++def xm_network2_list(args):
++ xenapi_unsupported()
++ (use_long, params) = arg_check_for_resource_list(args, "network2-list")
++ dom = params[0]
++ devs = server.xend.domain.getDeviceSxprs(dom, 'vif2')
++ map(PrettyPrint.prettyprint, devs)
++
+ def xm_network_attach(args):
+ arg_check(args, 'network-attach', 1, 11)
+
+@@ -2884,6 +2925,9 @@ commands = {
+ "network-attach": xm_network_attach,
+ "network-detach": xm_network_detach,
+ "network-list": xm_network_list,
++ "network2-attach": xm_network2_attach,
++ "network2-detach": xm_network2_detach,
++ "network2-list": xm_network2_list,
+ # network (as in XenAPI)
+ "network-new": xm_network_new,
+ "network-del": xm_network_del,
+diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
+index 42652ca..1c7bbb8 100644
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -1926,10 +1926,26 @@ enum hvm_intblk hvm_interrupt_blocked(struct vcpu *v, struct hvm_intack intack)
+ return hvm_intblk_none;
+ }
+
++static int grant_table_op_is_allowed(unsigned int cmd)
++{
++ switch (cmd) {
++ case GNTTABOP_query_size:
++ case GNTTABOP_setup_table:
++ case GNTTABOP_set_version:
++ case GNTTABOP_copy:
++ case GNTTABOP_map_grant_ref:
++ case GNTTABOP_unmap_grant_ref:
++ return 1;
++ default:
++ /* all other commands need auditing */
++ return 0;
++ }
++}
++
+ static long hvm_grant_table_op(
+ unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count)
+ {
+- if ( (cmd != GNTTABOP_query_size) && (cmd != GNTTABOP_setup_table) )
++ if ( !grant_table_op_is_allowed(cmd) )
+ return -ENOSYS; /* all other commands need auditing */
+ return do_grant_table_op(cmd, uop, count);
+ }
+@@ -1981,6 +1997,18 @@ static hvm_hypercall_t *hvm_hypercall32_table[NR_hypercalls] = {
+
+ #else /* defined(__x86_64__) */
+
++<<<<<<< HEAD
++=======
++static long hvm_grant_table_op_compat32(unsigned int cmd,
++ XEN_GUEST_HANDLE(void) uop,
++ unsigned int count)
++{
++ if ( !grant_table_op_is_allowed(cmd) )
++ return -ENOSYS;
++ return compat_grant_table_op(cmd, uop, count);
++}
++
++>>>>>>> 5dfa1be... Introduce a grant_entry_v2 structure.
+ static long hvm_memory_op_compat32(int cmd, XEN_GUEST_HANDLE(void) arg)
+ {
+ long rc = compat_memory_op(cmd, arg);
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 0d6d5ee..067e136 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -3882,12 +3882,25 @@ long arch_memory_op(int op, XEN_GUEST_HANDLE(void) arg)
+ case XENMAPSPACE_grant_table:
+ spin_lock(&d->grant_table->lock);
+
+- if ( (xatp.idx >= nr_grant_frames(d->grant_table)) &&
+- (xatp.idx < max_nr_grant_frames) )
+- gnttab_grow_table(d, xatp.idx + 1);
++ if ( d->grant_table->gt_version == 0 )
++ d->grant_table->gt_version = 1;
+
+- if ( xatp.idx < nr_grant_frames(d->grant_table) )
+- mfn = virt_to_mfn(d->grant_table->shared[xatp.idx]);
++ if ( d->grant_table->gt_version == 2 &&
++ (xatp.idx & XENMAPIDX_grant_table_status) )
++ {
++ xatp.idx &= ~XENMAPIDX_grant_table_status;
++ if ( xatp.idx < nr_status_frames(d->grant_table) )
++ mfn = virt_to_mfn(d->grant_table->status[xatp.idx]);
++ }
++ else
++ {
++ if ( (xatp.idx >= nr_grant_frames(d->grant_table)) &&
++ (xatp.idx < max_nr_grant_frames) )
++ gnttab_grow_table(d, xatp.idx + 1);
++
++ if ( xatp.idx < nr_grant_frames(d->grant_table) )
++ mfn = virt_to_mfn(d->grant_table->shared_raw[xatp.idx]);
++ }
+
+ spin_unlock(&d->grant_table->lock);
+ break;
+diff --git a/xen/common/compat/grant_table.c b/xen/common/compat/grant_table.c
+index 5f0dc2d..cd4f1cd 100644
+--- a/xen/common/compat/grant_table.c
++++ b/xen/common/compat/grant_table.c
+@@ -5,9 +5,17 @@
+
+ #include <compat/grant_table.h>
+
+-#define xen_grant_entry grant_entry
+-CHECK_grant_entry;
+-#undef xen_grant_entry
++#define xen_grant_entry_v1 grant_entry_v1
++CHECK_grant_entry_v1;
++#undef xen_grant_entry_v1
++
++#define xen_grant_entry_header grant_entry_header
++CHECK_grant_entry_header;
++#undef xen_grant_entry_header
++
++#define xen_grant_entry_v2 grant_entry_v2
++CHECK_grant_entry_v2;
++#undef xen_grant_entry_v2
+
+ #define xen_gnttab_map_grant_ref gnttab_map_grant_ref
+ CHECK_gnttab_map_grant_ref;
+@@ -29,6 +37,16 @@ DEFINE_XEN_GUEST_HANDLE(gnttab_copy_compat_t);
+ CHECK_gnttab_dump_table;
+ #undef xen_gnttab_dump_table
+
++#define xen_gnttab_set_version gnttab_set_version
++CHECK_gnttab_set_version;
++#undef xen_gnttab_set_version
++
++DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_compat_t);
++
++#define xen_gnttab_get_version gnttab_get_version
++CHECK_gnttab_get_version;
++#undef xen_gnttab_get_version
++
+ int compat_grant_table_op(unsigned int cmd,
+ XEN_GUEST_HANDLE(void) cmp_uop,
+ unsigned int count)
+@@ -74,6 +92,10 @@ int compat_grant_table_op(unsigned int cmd,
+ CASE(dump_table);
+ #endif
+
++#ifndef CHECK_gnttab_get_status_frames
++ CASE(get_status_frames);
++#endif
++
+ #undef CASE
+ default:
+ return do_grant_table_op(cmd, cmp_uop, count);
+@@ -90,11 +112,13 @@ int compat_grant_table_op(unsigned int cmd,
+ struct gnttab_setup_table *setup;
+ struct gnttab_transfer *xfer;
+ struct gnttab_copy *copy;
++ struct gnttab_get_status_frames *get_status;
+ } nat;
+ union {
+ struct compat_gnttab_setup_table setup;
+ struct compat_gnttab_transfer xfer;
+ struct compat_gnttab_copy copy;
++ struct compat_gnttab_get_status_frames get_status;
+ } cmp;
+
+ set_xen_guest_handle(nat.uop, COMPAT_ARG_XLAT_VIRT_BASE);
+@@ -216,6 +240,63 @@ int compat_grant_table_op(unsigned int cmd,
+ }
+ break;
+
++ case GNTTABOP_get_status_frames: {
++ unsigned int max_frame_list_size_in_pages =
++ (COMPAT_ARG_XLAT_SIZE - sizeof(*nat.get_status)) /
++ sizeof(*nat.get_status->frame_list.p);
++ if ( count != 1)
++ {
++ rc = -EINVAL;
++ break;
++ }
++ if ( unlikely(__copy_from_guest(&cmp.get_status, cmp_uop, 1) ||
++ !compat_handle_okay(cmp.get_status.frame_list,
++ cmp.get_status.nr_frames)) )
++ {
++ rc = -EFAULT;
++ break;
++ }
++ if ( max_frame_list_size_in_pages <
++ grant_to_status_frames(max_nr_grant_frames) )
++ {
++ gdprintk(XENLOG_WARNING,
++ "grant_to_status_frames(max_nr_grant_frames) is too large (%u,%u)\n",
++ grant_to_status_frames(max_nr_grant_frames),
++ max_frame_list_size_in_pages);
++ rc = -EINVAL;
++ break;
++ }
++
++#define XLAT_gnttab_get_status_frames_HNDL_frame_list(_d_, _s_) \
++ set_xen_guest_handle((_d_)->frame_list, (uint64_t *)(nat.get_status + 1))
++ XLAT_gnttab_get_status_frames(nat.get_status, &cmp.get_status);
++#undef XLAT_gnttab_get_status_frames_HNDL_frame_list
++
++ rc = gnttab_get_status_frames(
++ guest_handle_cast(nat.uop, gnttab_get_status_frames_t),
++ count);
++ if ( rc >= 0 )
++ {
++#define XLAT_gnttab_get_status_frames_HNDL_frame_list(_d_, _s_) \
++ do \
++ { \
++ if ( (_s_)->status == GNTST_okay ) \
++ { \
++ for ( i = 0; i < (_s_)->nr_frames; ++i ) \
++ { \
++ uint64_t frame = (_s_)->frame_list.p[i]; \
++ (void)__copy_to_compat_offset((_d_)->frame_list, i, &frame, 1); \
++ } \
++ } \
++ } while (0)
++ XLAT_gnttab_get_status_frames(&cmp.get_status, nat.get_status);
++#undef XLAT_gnttab_get_status_frames_HNDL_frame_list
++ if ( unlikely(__copy_to_guest(cmp_uop, &cmp.get_status, 1)) )
++ rc = -EFAULT;
++ }
++ break;
++ }
++
+ default:
+ domain_crash(current->domain);
+ break;
+diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c
+index 5306354..40fc183 100644
+--- a/xen/common/grant_table.c
++++ b/xen/common/grant_table.c
+@@ -104,9 +104,24 @@ static unsigned inline int max_nr_maptrack_frames(void)
+ }
+
+
+-#define SHGNT_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_t))
+-#define shared_entry(t, e) \
+- ((t)->shared[(e)/SHGNT_PER_PAGE][(e)%SHGNT_PER_PAGE])
++#define SHGNT_PER_PAGE_V1 (PAGE_SIZE / sizeof(grant_entry_v1_t))
++#define shared_entry_v1(t, e) \
++ ((t)->shared_v1[(e)/SHGNT_PER_PAGE_V1][(e)%SHGNT_PER_PAGE_V1])
++#define SHGNT_PER_PAGE_V2 (PAGE_SIZE / sizeof(grant_entry_v2_t))
++#define shared_entry_v2(t, e) \
++ ((t)->shared_v2[(e)/SHGNT_PER_PAGE_V2][(e)%SHGNT_PER_PAGE_V2])
++#define STGNT_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t))
++#define status_entry(t, e) \
++ ((t)->status[(e)/STGNT_PER_PAGE][(e)%STGNT_PER_PAGE])
++static grant_entry_header_t *
++shared_entry_header(struct grant_table *t, grant_ref_t ref)
++{
++ ASSERT(t->gt_version != 0);
++ if (t->gt_version == 1)
++ return (grant_entry_header_t*)&shared_entry_v1(t, ref);
++ else
++ return &shared_entry_v2(t, ref).hdr;
++}
+ #define ACGNT_PER_PAGE (PAGE_SIZE / sizeof(struct active_grant_entry))
+ #define active_entry(t, e) \
+ ((t)->active[(e)/ACGNT_PER_PAGE][(e)%ACGNT_PER_PAGE])
+@@ -182,6 +197,189 @@ get_maptrack_handle(
+ return handle;
+ }
+
++/* Number of grant table entries. Caller must hold d's grant table lock. */
++static unsigned int nr_grant_entries(struct grant_table *gt)
++{
++ ASSERT(gt->gt_version != 0);
++ if (gt->gt_version == 1)
++ return (nr_grant_frames(gt) << PAGE_SHIFT) / sizeof(grant_entry_v1_t);
++ else
++ return (nr_grant_frames(gt) << PAGE_SHIFT) / sizeof(grant_entry_v2_t);
++}
++
++static int _set_status_v1(domid_t domid,
++ int readonly,
++ int mapflag,
++ grant_entry_header_t *shah,
++ struct active_grant_entry *act)
++{
++ int rc = GNTST_okay;
++ union grant_combo scombo, prev_scombo, new_scombo;
++ uint16_t mask = GTF_type_mask;
++
++ /*
++ * We bound the number of times we retry CMPXCHG on memory locations that
++ * we share with a guest OS. The reason is that the guest can modify that
++ * location at a higher rate than we can read-modify-CMPXCHG, so the guest
++ * could cause us to livelock. There are a few cases where it is valid for
++ * the guest to race our updates (e.g., to change the GTF_readonly flag),
++ * so we allow a few retries before failing.
++ */
++ int retries = 0;
++
++ /* if this is a grant mapping operation we should ensure GTF_sub_page
++ is not set */
++ if (mapflag)
++ mask |= GTF_sub_page;
++
++ scombo.word = *(u32 *)shah;
++
++ /*
++ * This loop attempts to set the access (reading/writing) flags
++ * in the grant table entry. It tries a cmpxchg on the field
++ * up to five times, and then fails under the assumption that
++ * the guest is misbehaving.
++ */
++ for ( ; ; )
++ {
++ /* If not already pinned, check the grant domid and type. */
++ if ( !act->pin &&
++ (((scombo.shorts.flags & mask) !=
++ GTF_permit_access) ||
++ (scombo.shorts.domid != domid)) )
++ PIN_FAIL(done, GNTST_general_error,
++ "Bad flags (%x) or dom (%d). (expected dom %d)\n",
++ scombo.shorts.flags, scombo.shorts.domid,
++ domid);
++
++ new_scombo = scombo;
++ new_scombo.shorts.flags |= GTF_reading;
++
++ if ( !readonly )
++ {
++ new_scombo.shorts.flags |= GTF_writing;
++ if ( unlikely(scombo.shorts.flags & GTF_readonly) )
++ PIN_FAIL(done, GNTST_general_error,
++ "Attempt to write-pin a r/o grant entry.\n");
++ }
++
++ prev_scombo.word = cmpxchg((u32 *)shah,
++ scombo.word, new_scombo.word);
++ if ( likely(prev_scombo.word == scombo.word) )
++ break;
++
++ if ( retries++ == 4 )
++ PIN_FAIL(done, GNTST_general_error,
++ "Shared grant entry is unstable.\n");
++
++ scombo = prev_scombo;
++ }
++
++done:
++ return rc;
++}
++
++static int _set_status_v2(domid_t domid,
++ int readonly,
++ int mapflag,
++ grant_entry_header_t *shah,
++ struct active_grant_entry *act,
++ grant_status_t *status)
++{
++ int rc = GNTST_okay;
++ union grant_combo scombo;
++ uint16_t flags = shah->flags;
++ domid_t id = shah->domid;
++ uint16_t mask = GTF_type_mask;
++
++ /* we read flags and domid in a single memory access.
++ this avoids the need for another memory barrier to
++ ensure access to these fields are not reordered */
++ scombo.word = *(u32 *)shah;
++ barrier(); /* but we still need to stop the compiler from turning
++ it back into two reads */
++ flags = scombo.shorts.flags;
++ id = scombo.shorts.domid;
++
++ /* if this is a grant mapping operation we should ensure GTF_sub_page
++ is not set */
++ if (mapflag)
++ mask |= GTF_sub_page;
++
++ /* If not already pinned, check the grant domid and type. */
++ if ( !act->pin &&
++ ( (((flags & mask) != GTF_permit_access) &&
++ ((flags & mask) != GTF_transitive)) ||
++ (id != domid)) )
++ PIN_FAIL(done, GNTST_general_error,
++ "Bad flags (%x) or dom (%d). (expected dom %d, flags %x)\n",
++ flags, id, domid, mask);
++
++ if ( readonly )
++ {
++ *status |= GTF_reading;
++ }
++ else
++ {
++ if ( unlikely(flags & GTF_readonly) )
++ PIN_FAIL(done, GNTST_general_error,
++ "Attempt to write-pin a r/o grant entry.\n");
++ *status |= GTF_reading | GTF_writing;
++ }
++
++ /* Make sure guest sees status update before checking if flags are
++ still valid */
++ mb();
++
++ scombo.word = *(u32 *)shah;
++ barrier();
++ flags = scombo.shorts.flags;
++ id = scombo.shorts.domid;
++
++ if ( !act->pin )
++ {
++ if ( (((flags & mask) != GTF_permit_access) &&
++ ((flags & mask) != GTF_transitive)) ||
++ (id != domid) ||
++ (!readonly && (flags & GTF_readonly)) )
++ {
++ gnttab_clear_flag(_GTF_reading | _GTF_writing, status);
++ PIN_FAIL(done, GNTST_general_error,
++ "Unstable flags (%x) or dom (%d). (expected dom %d) "
++ "(r/w: %d)\n",
++ flags, id, domid, !readonly);
++ }
++ }
++ else
++ {
++ if ( unlikely(flags & GTF_readonly) )
++ {
++ gnttab_clear_flag(_GTF_writing, status);
++ PIN_FAIL(done, GNTST_general_error,
++ "Unstable grant readonly flag\n");
++ }
++ }
++
++done:
++ return rc;
++}
++
++
++static int _set_status(unsigned gt_version,
++ domid_t domid,
++ int readonly,
++ int mapflag,
++ grant_entry_header_t *shah,
++ struct active_grant_entry *act,
++ grant_status_t *status)
++{
++
++ if (gt_version == 1)
++ return _set_status_v1(domid, readonly, mapflag, shah, act);
++ else
++ return _set_status_v2(domid, readonly, mapflag, shah, act, status);
++}
++
+ /*
+ * Returns 0 if TLB flush / invalidate required by caller.
+ * va will indicate the address to be invalidated.
+@@ -203,18 +401,10 @@ __gnttab_map_grant_ref(
+ unsigned int cache_flags;
+ struct active_grant_entry *act;
+ struct grant_mapping *mt;
+- grant_entry_t *sha;
+- union grant_combo scombo, prev_scombo, new_scombo;
+-
+- /*
+- * We bound the number of times we retry CMPXCHG on memory locations that
+- * we share with a guest OS. The reason is that the guest can modify that
+- * location at a higher rate than we can read-modify-CMPXCHG, so the guest
+- * could cause us to livelock. There are a few cases where it is valid for
+- * the guest to race our updates (e.g., to change the GTF_readonly flag),
+- * so we allow a few retries before failing.
+- */
+- int retries = 0;
++ grant_entry_v1_t *sha1;
++ grant_entry_v2_t *sha2;
++ grant_entry_header_t *shah;
++ uint16_t *status;
+
+ led = current;
+ ld = led->domain;
+@@ -251,73 +441,55 @@ __gnttab_map_grant_ref(
+
+ spin_lock(&rd->grant_table->lock);
+
++ if ( rd->grant_table->gt_version == 0 )
++ PIN_FAIL(unlock_out, GNTST_general_error,
++ "remote grant table not yet set up");
++
+ /* Bounds check on the grant ref */
+ if ( unlikely(op->ref >= nr_grant_entries(rd->grant_table)))
+ PIN_FAIL(unlock_out, GNTST_bad_gntref, "Bad ref (%d).\n", op->ref);
+
+ act = &active_entry(rd->grant_table, op->ref);
+- sha = &shared_entry(rd->grant_table, op->ref);
++ shah = shared_entry_header(rd->grant_table, op->ref);
++ if (rd->grant_table->gt_version == 1) {
++ sha1 = &shared_entry_v1(rd->grant_table, op->ref);
++ sha2 = NULL;
++ status = &shah->flags;
++ } else {
++ sha2 = &shared_entry_v2(rd->grant_table, op->ref);
++ sha1 = NULL;
++ status = &status_entry(rd->grant_table, op->ref);
++ }
+
+ /* If already pinned, check the active domid and avoid refcnt overflow. */
+ if ( act->pin &&
+ ((act->domid != ld->domain_id) ||
+- (act->pin & 0x80808080U) != 0) )
++ (act->pin & 0x80808080U) != 0 ||
++ (act->is_sub_page)) )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+- "Bad domain (%d != %d), or risk of counter overflow %08x\n",
+- act->domid, ld->domain_id, act->pin);
++ "Bad domain (%d != %d), or risk of counter overflow %08x, or subpage %d\n",
++ act->domid, ld->domain_id, act->pin, act->is_sub_page);
+
+ if ( !act->pin ||
+ (!(op->flags & GNTMAP_readonly) &&
+ !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask))) )
+ {
+- scombo.word = *(u32 *)&sha->flags;
+-
+- /*
+- * This loop attempts to set the access (reading/writing) flags
+- * in the grant table entry. It tries a cmpxchg on the field
+- * up to five times, and then fails under the assumption that
+- * the guest is misbehaving.
+- */
+- for ( ; ; )
+- {
+- /* If not already pinned, check the grant domid and type. */
+- if ( !act->pin &&
+- (((scombo.shorts.flags & GTF_type_mask) !=
+- GTF_permit_access) ||
+- (scombo.shorts.domid != ld->domain_id)) )
+- PIN_FAIL(unlock_out, GNTST_general_error,
+- "Bad flags (%x) or dom (%d). (expected dom %d)\n",
+- scombo.shorts.flags, scombo.shorts.domid,
+- ld->domain_id);
+-
+- new_scombo = scombo;
+- new_scombo.shorts.flags |= GTF_reading;
+-
+- if ( !(op->flags & GNTMAP_readonly) )
+- {
+- new_scombo.shorts.flags |= GTF_writing;
+- if ( unlikely(scombo.shorts.flags & GTF_readonly) )
+- PIN_FAIL(unlock_out, GNTST_general_error,
+- "Attempt to write-pin a r/o grant entry.\n");
+- }
+-
+- prev_scombo.word = cmpxchg((u32 *)&sha->flags,
+- scombo.word, new_scombo.word);
+- if ( likely(prev_scombo.word == scombo.word) )
+- break;
+-
+- if ( retries++ == 4 )
+- PIN_FAIL(unlock_out, GNTST_general_error,
+- "Shared grant entry is unstable.\n");
+-
+- scombo = prev_scombo;
+- }
++ if ( (rc = _set_status(rd->grant_table->gt_version,
++ ld->domain_id, op->flags & GNTMAP_readonly,
++ 1, shah, act, status) ) != GNTST_okay )
++ goto unlock_out;
+
+ if ( !act->pin )
+ {
+- act->domid = scombo.shorts.domid;
+- act->gfn = sha->frame;
+- act->frame = gmfn_to_mfn(rd, sha->frame);
++ act->domid = ld->domain_id;
++ if ( sha1 )
++ act->gfn = sha1->frame;
++ else
++ act->gfn = sha2->full_page.frame;
++ act->frame = gmfn_to_mfn(rd, act->gfn);
++ act->start = 0;
++ act->length = PAGE_SIZE;
++ act->is_sub_page = 0;
+ }
+ }
+
+@@ -332,7 +504,7 @@ __gnttab_map_grant_ref(
+ frame = act->frame;
+ act_pin = act->pin;
+
+- cache_flags = (sha->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
++ cache_flags = (shah->flags & (GTF_PAT | GTF_PWT | GTF_PCD) );
+
+ spin_unlock(&rd->grant_table->lock);
+
+@@ -433,7 +605,7 @@ __gnttab_map_grant_ref(
+ spin_lock(&rd->grant_table->lock);
+
+ act = &active_entry(rd->grant_table, op->ref);
+- sha = &shared_entry(rd->grant_table, op->ref);
++ shah = shared_entry_header(rd->grant_table, op->ref);
+
+ if ( op->flags & GNTMAP_device_map )
+ act->pin -= (op->flags & GNTMAP_readonly) ?
+@@ -444,10 +616,10 @@ __gnttab_map_grant_ref(
+
+ if ( !(op->flags & GNTMAP_readonly) &&
+ !(act->pin & (GNTPIN_hstw_mask|GNTPIN_devw_mask)) )
+- gnttab_clear_flag(_GTF_writing, &sha->flags);
++ gnttab_clear_flag(_GTF_writing, status);
+
+ if ( !act->pin )
+- gnttab_clear_flag(_GTF_reading, &sha->flags);
++ gnttab_clear_flag(_GTF_reading, status);
+
+ unlock_out:
+ spin_unlock(&rd->grant_table->lock);
+@@ -482,7 +654,6 @@ __gnttab_unmap_common(
+ domid_t dom;
+ struct domain *ld, *rd;
+ struct active_grant_entry *act;
+- grant_entry_t *sha;
+ s16 rc = 0;
+ u32 old_pin;
+
+@@ -530,7 +701,6 @@ __gnttab_unmap_common(
+ spin_lock(&rd->grant_table->lock);
+
+ act = &active_entry(rd->grant_table, op->map->ref);
+- sha = &shared_entry(rd->grant_table, op->map->ref);
+ old_pin = act->pin;
+
+ if ( op->frame == 0 )
+@@ -595,7 +765,9 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op)
+ {
+ struct domain *ld, *rd;
+ struct active_grant_entry *act;
+- grant_entry_t *sha;
++ grant_entry_header_t *sha;
++ struct page_info *pg;
++ uint16_t *status;
+
+ rd = op->rd;
+
+@@ -614,8 +786,16 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op)
+ rcu_lock_domain(rd);
+ spin_lock(&rd->grant_table->lock);
+
++ if ( rd->grant_table->gt_version == 0 )
++ goto unmap_out;
++
+ act = &active_entry(rd->grant_table, op->map->ref);
+- sha = &shared_entry(rd->grant_table, op->map->ref);
++ sha = shared_entry_header(rd->grant_table, op->map->ref);
++
++ if ( rd->grant_table->gt_version == 1 )
++ status = &sha->flags;
++ else
++ status = &status_entry(rd->grant_table, op->map->ref);
+
+ if ( unlikely(op->frame != act->frame) )
+ {
+@@ -664,10 +844,10 @@ __gnttab_unmap_common_complete(struct gnttab_unmap_common *op)
+
+ if ( ((act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0) &&
+ !(op->flags & GNTMAP_readonly) )
+- gnttab_clear_flag(_GTF_writing, &sha->flags);
++ gnttab_clear_flag(_GTF_writing, status);
+
+ if ( act->pin == 0 )
+- gnttab_clear_flag(_GTF_reading, &sha->flags);
++ gnttab_clear_flag(_GTF_reading, status);
+
+ unmap_out:
+ spin_unlock(&rd->grant_table->lock);
+@@ -793,6 +973,50 @@ fault:
+ return -EFAULT;
+ }
+
++static int
++gnttab_populate_status_frames(struct domain *d, struct grant_table *gt)
++{
++ unsigned i;
++ unsigned req_status_frames;
++
++ req_status_frames = grant_to_status_frames(gt->nr_grant_frames);
++ for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
++ {
++ if ( (gt->status[i] = alloc_xenheap_page()) == NULL )
++ goto status_alloc_failed;
++ clear_page(gt->status[i]);
++ }
++ /* Share the new status frames with the recipient domain */
++ for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
++ gnttab_create_status_page(d, gt, i);
++
++ gt->nr_status_frames = req_status_frames;
++
++ return 0;
++
++status_alloc_failed:
++ for ( i = nr_status_frames(gt); i < req_status_frames; i++ )
++ {
++ free_xenheap_page(gt->status[i]);
++ gt->status[i] = NULL;
++ }
++ return -ENOMEM;
++}
++
++static void
++gnttab_unpopulate_status_frames(struct domain *d, struct grant_table *gt)
++{
++ int i;
++
++ for ( i = 0; i < nr_status_frames(gt); i++ )
++ {
++ page_set_owner(virt_to_page(gt->status[i]), dom_xen);
++ free_xenheap_page(gt->status[i]);
++ gt->status[i] = NULL;
++ }
++ gt->nr_status_frames = 0;
++}
++
+ int
+ gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
+ {
+@@ -819,9 +1043,9 @@ gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
+ /* Shared */
+ for ( i = nr_grant_frames(gt); i < req_nr_frames; i++ )
+ {
+- if ( (gt->shared[i] = alloc_xenheap_page()) == NULL )
++ if ( (gt->shared_raw[i] = alloc_xenheap_page()) == NULL )
+ goto shared_alloc_failed;
+- clear_page(gt->shared[i]);
++ clear_page(gt->shared_raw[i]);
+ }
+
+ /* Share the new shared frames with the recipient domain */
+@@ -830,13 +1054,20 @@ gnttab_grow_table(struct domain *d, unsigned int req_nr_frames)
+
+ gt->nr_grant_frames = req_nr_frames;
+
++ /* Status pages - version 2 */
++ if (gt->gt_version > 1)
++ {
++ if ( gnttab_populate_status_frames(d, gt) )
++ goto shared_alloc_failed;
++ }
++
+ return 1;
+
+ shared_alloc_failed:
+ for ( i = nr_grant_frames(gt); i < req_nr_frames; i++ )
+ {
+- free_xenheap_page(gt->shared[i]);
+- gt->shared[i] = NULL;
++ free_xenheap_page(gt->shared_raw[i]);
++ gt->shared_raw[i] = NULL;
+ }
+ active_alloc_failed:
+ for ( i = nr_active_grant_frames(gt);
+@@ -906,7 +1137,13 @@ gnttab_setup_table(
+
+ spin_lock(&d->grant_table->lock);
+
+- if ( (op.nr_frames > nr_grant_frames(d->grant_table)) &&
++ if ( d->grant_table->gt_version == 0 )
++ d->grant_table->gt_version = 1;
++
++ if ( (op.nr_frames > nr_grant_frames(d->grant_table) ||
++ ( (d->grant_table->gt_version > 1 ) &&
++ (grant_to_status_frames(op.nr_frames) >
++ nr_status_frames(d->grant_table)) ) ) &&
+ !gnttab_grow_table(d, op.nr_frames) )
+ {
+ gdprintk(XENLOG_INFO,
+@@ -1010,7 +1247,7 @@ gnttab_prepare_for_transfer(
+ struct domain *rd, struct domain *ld, grant_ref_t ref)
+ {
+ struct grant_table *rgt;
+- struct grant_entry *sha;
++ grant_entry_header_t *sha;
+ union grant_combo scombo, prev_scombo, new_scombo;
+ int retries = 0;
+
+@@ -1022,6 +1259,14 @@ gnttab_prepare_for_transfer(
+
+ spin_lock(&rgt->lock);
+
++ if ( rgt->gt_version == 0 )
++ {
++ gdprintk(XENLOG_INFO,
++ "Grant table not ready for transfer to domain(%d).\n",
++ rd->domain_id);
++ goto fail;
++ }
++
+ if ( unlikely(ref >= nr_grant_entries(rd->grant_table)) )
+ {
+ gdprintk(XENLOG_INFO,
+@@ -1030,7 +1275,7 @@ gnttab_prepare_for_transfer(
+ goto fail;
+ }
+
+- sha = &shared_entry(rgt, ref);
++ sha = shared_entry_header(rgt, ref);
+
+ scombo.word = *(u32 *)&sha->flags;
+
+@@ -1079,7 +1324,6 @@ gnttab_transfer(
+ struct domain *e;
+ struct page_info *page;
+ int i;
+- grant_entry_t *sha;
+ struct gnttab_transfer gop;
+ unsigned long mfn;
+ unsigned int max_bitsize;
+@@ -1209,11 +1453,21 @@ gnttab_transfer(
+ /* Tell the guest about its new page frame. */
+ spin_lock(&e->grant_table->lock);
+
+- sha = &shared_entry(e->grant_table, gop.ref);
+- guest_physmap_add_page(e, sha->frame, mfn, 0);
+- sha->frame = mfn;
++ if ( e->grant_table->gt_version == 1 )
++ {
++ grant_entry_v1_t *sha = &shared_entry_v1(e->grant_table, gop.ref);
++ guest_physmap_add_page(e, sha->frame, mfn, 0);
++ sha->frame = mfn;
++ }
++ else
++ {
++ grant_entry_v2_t *sha = &shared_entry_v2(e->grant_table, gop.ref);
++ guest_physmap_add_page(e, sha->full_page.frame, mfn, 0);
++ sha->full_page.frame = mfn;
++ }
+ wmb();
+- sha->flags |= GTF_transfer_completed;
++ shared_entry_header(e->grant_table, gop.ref)->flags |=
++ GTF_transfer_completed;
+
+ spin_unlock(&e->grant_table->lock);
+
+@@ -1239,16 +1493,40 @@ static void
+ __release_grant_for_copy(
+ struct domain *rd, unsigned long gref, int readonly)
+ {
+- grant_entry_t *sha;
++ grant_entry_header_t *sha;
+ struct active_grant_entry *act;
+ unsigned long r_frame;
++ uint16_t *status;
++ domid_t trans_domid;
++ grant_ref_t trans_gref;
++ int released_read;
++ int released_write;
++ struct domain *trans_dom;
++
++ released_read = 0;
++ released_write = 0;
+
+ spin_lock(&rd->grant_table->lock);
+
+ act = &active_entry(rd->grant_table, gref);
+- sha = &shared_entry(rd->grant_table, gref);
++ sha = shared_entry_header(rd->grant_table, gref);
+ r_frame = act->frame;
+
++ if (rd->grant_table->gt_version == 1)
++ {
++ status = &sha->flags;
++ trans_domid = rd->domain_id;
++ /* Shut the compiler up. This'll never be used, because
++ trans_domid == rd->domain_id, but gcc doesn't know that. */
++ trans_gref = 0x1234567;
++ }
++ else
++ {
++ status = &status_entry(rd->grant_table, gref);
++ trans_domid = act->trans_dom;
++ trans_gref = act->trans_gref;
++ }
++
+ if ( readonly )
+ {
+ act->pin -= GNTPIN_hstr_inc;
+@@ -1259,13 +1537,51 @@ __release_grant_for_copy(
+
+ act->pin -= GNTPIN_hstw_inc;
+ if ( !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) )
+- gnttab_clear_flag(_GTF_writing, &sha->flags);
++ {
++ released_write = 1;
++ gnttab_clear_flag(_GTF_writing, status);
++ }
+ }
+
+ if ( !act->pin )
+- gnttab_clear_flag(_GTF_reading, &sha->flags);
++ {
++ gnttab_clear_flag(_GTF_reading, status);
++ released_read = 1;
++ }
+
+ spin_unlock(&rd->grant_table->lock);
++
++ if ( trans_domid != rd->domain_id )
++ {
++ if ( released_write || released_read )
++ {
++ trans_dom = rcu_lock_domain_by_id(trans_domid);
++ if ( trans_dom != NULL )
++ {
++ /* Recursive calls, but they're tail calls, so it's
++ okay. */
++ if ( released_write )
++ __release_grant_for_copy(trans_dom, trans_gref, 0);
++ else if ( released_read )
++ __release_grant_for_copy(trans_dom, trans_gref, 1);
++ }
++ }
++ }
++}
++
++/* The status for a grant indicates that we're taking more access than
++ the pin requires. Fix up the status to match the pin. Called
++ under the domain's grant table lock. */
++/* Only safe on transitive grants. Even then, note that we don't
++ attempt to drop any pin on the referent grant. */
++static void __fixup_status_for_pin(struct active_grant_entry *act,
++ uint16_t *status)
++{
++ if ( !(act->pin & GNTPIN_hstw_mask) )
++ *status &= ~_GTF_writing;
++
++ if ( !(act->pin & GNTPIN_hstr_mask) )
++ *status &= ~_GTF_reading;
+ }
+
+ /* Grab a frame number from a grant entry and update the flags and pin
+@@ -1274,82 +1590,182 @@ __release_grant_for_copy(
+ actually valid. */
+ static int
+ __acquire_grant_for_copy(
+- struct domain *rd, unsigned long gref, int readonly,
+- unsigned long *frame)
++ struct domain *rd, unsigned long gref, struct domain *ld, int readonly,
++ unsigned long *frame, unsigned *page_off, unsigned *length,
++ unsigned allow_transitive, struct domain **owning_domain)
+ {
+- grant_entry_t *sha;
++ grant_entry_v1_t *sha1;
++ grant_entry_v2_t *sha2;
++ grant_entry_header_t *shah;
+ struct active_grant_entry *act;
++ grant_status_t *status;
++ uint32_t old_pin;
++ domid_t trans_domid;
++ grant_ref_t trans_gref;
++ struct domain *rrd;
++ unsigned long grant_frame;
++ unsigned trans_page_off;
++ unsigned trans_length;
++ int is_sub_page;
++ struct domain *ignore;
+ s16 rc = GNTST_okay;
+- int retries = 0;
+- union grant_combo scombo, prev_scombo, new_scombo;
++
++ *owning_domain = NULL;
+
+ spin_lock(&rd->grant_table->lock);
+
++ if ( rd->grant_table->gt_version == 0 )
++ PIN_FAIL(unlock_out, GNTST_general_error,
++ "remote grant table not ready\n");
++
+ if ( unlikely(gref >= nr_grant_entries(rd->grant_table)) )
+ PIN_FAIL(unlock_out, GNTST_bad_gntref,
+ "Bad grant reference %ld\n", gref);
+
+ act = &active_entry(rd->grant_table, gref);
+- sha = &shared_entry(rd->grant_table, gref);
+-
++ shah = shared_entry_header(rd->grant_table, gref);
++ if ( rd->grant_table->gt_version == 1 )
++ {
++ sha1 = &shared_entry_v1(rd->grant_table, gref);
++ sha2 = NULL;
++ status = &shah->flags;
++ }
++ else
++ {
++ sha1 = NULL;
++ sha2 = &shared_entry_v2(rd->grant_table, gref);
++ status = &status_entry(rd->grant_table, gref);
++ }
++
+ /* If already pinned, check the active domid and avoid refcnt overflow. */
+ if ( act->pin &&
+- ((act->domid != current->domain->domain_id) ||
++ ((act->domid != ld->domain_id) ||
+ (act->pin & 0x80808080U) != 0) )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+ "Bad domain (%d != %d), or risk of counter overflow %08x\n",
+- act->domid, current->domain->domain_id, act->pin);
++ act->domid, ld->domain_id, act->pin);
+
++ old_pin = act->pin;
+ if ( !act->pin ||
+ (!readonly && !(act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask))) )
+ {
+- scombo.word = *(u32 *)&sha->flags;
++ if ( (rc = _set_status(rd->grant_table->gt_version,
++ ld->domain_id,
++ readonly, 0, shah, act,
++ status) ) != GNTST_okay )
++ goto unlock_out;
+
+- for ( ; ; )
++ trans_domid = ld->domain_id;
++ trans_gref = 0;
++ if ( sha2 && (shah->flags & GTF_type_mask) == GTF_transitive )
+ {
+- /* If not already pinned, check the grant domid and type. */
+- if ( !act->pin &&
+- (((scombo.shorts.flags & GTF_type_mask) !=
+- GTF_permit_access) ||
+- (scombo.shorts.domid != current->domain->domain_id)) )
+- PIN_FAIL(unlock_out, GNTST_general_error,
+- "Bad flags (%x) or dom (%d). (expected dom %d)\n",
+- scombo.shorts.flags, scombo.shorts.domid,
+- current->domain->domain_id);
+-
+- new_scombo = scombo;
+- new_scombo.shorts.flags |= GTF_reading;
+-
+- if ( !readonly )
+- {
+- new_scombo.shorts.flags |= GTF_writing;
+- if ( unlikely(scombo.shorts.flags & GTF_readonly) )
+- PIN_FAIL(unlock_out, GNTST_general_error,
+- "Attempt to write-pin a r/o grant entry.\n");
+- }
++ if ( !allow_transitive )
++ PIN_FAIL(unlock_out, GNTST_general_error,
++ "transitive grant when transitivity not allowed\n");
+
+- prev_scombo.word = cmpxchg((u32 *)&sha->flags,
+- scombo.word, new_scombo.word);
+- if ( likely(prev_scombo.word == scombo.word) )
+- break;
++ trans_domid = sha2->transitive.trans_domid;
++ trans_gref = sha2->transitive.gref;
++ barrier(); /* Stop the compiler from re-loading
++ trans_domid from shared memory */
++ if ( trans_domid == rd->domain_id )
++ PIN_FAIL(unlock_out, GNTST_general_error,
++ "transitive grants cannot be self-referential\n");
++
++ /* We allow the trans_domid == ld->domain_id case, which
++ corresponds to a grant being issued by one domain, sent
++ to another one, and then transitively granted back to
++ the original domain. Allowing it is easy, and means
++ that you don't need to go out of your way to avoid it
++ in the guest. */
+
+- if ( retries++ == 4 )
++ rrd = rcu_lock_domain_by_id(trans_domid);
++ if ( rrd == NULL )
+ PIN_FAIL(unlock_out, GNTST_general_error,
+- "Shared grant entry is unstable.\n");
++ "transitive grant referenced bad domain %d\n",
++ trans_domid);
++ spin_unlock(&rd->grant_table->lock);
++
++ rc = __acquire_grant_for_copy(rrd, trans_gref, rd,
++ readonly, &grant_frame,
++ &trans_page_off, &trans_length,
++ 0, &ignore);
++
++ spin_lock(&rd->grant_table->lock);
++ if ( rc != GNTST_okay ) {
++ __fixup_status_for_pin(act, status);
++ spin_unlock(&rd->grant_table->lock);
++ return rc;
++ }
++
++ /* We dropped the lock, so we have to check that nobody
++ else tried to pin (or, for that matter, unpin) the
++ reference in *this* domain. If they did, just give up
++ and try again. */
++ if ( act->pin != old_pin )
++ {
++ __fixup_status_for_pin(act, status);
++ spin_unlock(&rd->grant_table->lock);
++ return __acquire_grant_for_copy(rd, gref, ld, readonly,
++ frame, page_off, length,
++ allow_transitive,
++ owning_domain);
++ }
+
+- scombo = prev_scombo;
++ /* The actual remote remote grant may or may not be a
++ sub-page, but we always treat it as one because that
++ blocks mappings of transitive grants. */
++ is_sub_page = 1;
++ *owning_domain = rrd;
++ act->gfn = INVALID_GFN;
++ }
++ else if ( sha1 )
++ {
++ act->gfn = sha1->frame;
++ grant_frame = gmfn_to_mfn(rd, act->gfn);
++ is_sub_page = 0;
++ trans_page_off = 0;
++ trans_length = PAGE_SIZE;
++ *owning_domain = rd;
++ }
++ else if ( !(sha2->hdr.flags & GTF_sub_page) )
++ {
++ act->gfn = sha2->full_page.frame;
++ grant_frame = gmfn_to_mfn(rd, act->gfn);
++ is_sub_page = 0;
++ trans_page_off = 0;
++ trans_length = PAGE_SIZE;
++ *owning_domain = rd;
++ }
++ else
++ {
++ act->gfn = sha2->sub_page.frame;
++ grant_frame = gmfn_to_mfn(rd, act->gfn);
++ is_sub_page = 1;
++ trans_page_off = sha2->sub_page.page_off;
++ trans_length = sha2->sub_page.length;
++ *owning_domain = rd;
+ }
+
+ if ( !act->pin )
+ {
+- act->domid = scombo.shorts.domid;
+- act->gfn = sha->frame;
+- act->frame = gmfn_to_mfn(rd, sha->frame);
++ act->domid = ld->domain_id;
++ act->is_sub_page = is_sub_page;
++ act->start = trans_page_off;
++ act->length = trans_length;
++ act->trans_dom = trans_domid;
++ act->trans_gref = trans_gref;
++ act->frame = grant_frame;
+ }
+ }
++ else
++ {
++ *owning_domain = rd;
++ }
+
+ act->pin += readonly ? GNTPIN_hstr_inc : GNTPIN_hstw_inc;
+
++ *page_off = act->start;
++ *length = act->length;
+ *frame = act->frame;
+
+ unlock_out:
+@@ -1362,6 +1778,7 @@ __gnttab_copy(
+ struct gnttab_copy *op)
+ {
+ struct domain *sd = NULL, *dd = NULL;
++ struct domain *source_domain = NULL, *dest_domain = NULL;
+ unsigned long s_frame, d_frame;
+ char *sp, *dp;
+ s16 rc = GNTST_okay;
+@@ -1401,19 +1818,29 @@ __gnttab_copy(
+
+ if ( src_is_gref )
+ {
+- rc = __acquire_grant_for_copy(sd, op->source.u.ref, 1, &s_frame);
++ unsigned source_off, source_len;
++ rc = __acquire_grant_for_copy(sd, op->source.u.ref, current->domain, 1,
++ &s_frame, &source_off, &source_len, 1,
++ &source_domain);
+ if ( rc != GNTST_okay )
+ goto error_out;
+ have_s_grant = 1;
++ if ( op->source.offset < source_off ||
++ op->len > source_len )
++ PIN_FAIL(error_out, GNTST_general_error,
++ "copy source out of bounds: %d < %d || %d > %d\n",
++ op->source.offset, source_off,
++ op->len, source_len);
+ }
+ else
+ {
+ s_frame = gmfn_to_mfn(sd, op->source.u.gmfn);
++ source_domain = sd;
+ }
+ if ( unlikely(!mfn_valid(s_frame)) )
+ PIN_FAIL(error_out, GNTST_general_error,
+ "source frame %lx invalid.\n", s_frame);
+- if ( !get_page(mfn_to_page(s_frame), sd) )
++ if ( !get_page(mfn_to_page(s_frame), source_domain) )
+ {
+ if ( !sd->is_dying )
+ gdprintk(XENLOG_WARNING, "Could not get src frame %lx\n", s_frame);
+@@ -1424,19 +1851,30 @@ __gnttab_copy(
+
+ if ( dest_is_gref )
+ {
+- rc = __acquire_grant_for_copy(dd, op->dest.u.ref, 0, &d_frame);
++ unsigned dest_off, dest_len;
++ rc = __acquire_grant_for_copy(dd, op->dest.u.ref, current->domain, 0,
++ &d_frame, &dest_off, &dest_len, 1,
++ &dest_domain);
+ if ( rc != GNTST_okay )
+ goto error_out;
+ have_d_grant = 1;
++ if ( op->dest.offset < dest_off ||
++ op->len > dest_len )
++ PIN_FAIL(error_out, GNTST_general_error,
++ "copy dest out of bounds: %d < %d || %d > %d\n",
++ op->dest.offset, dest_off,
++ op->len, dest_len);
+ }
+ else
+ {
+ d_frame = gmfn_to_mfn(dd, op->dest.u.gmfn);
++ dest_domain = dd;
+ }
+ if ( unlikely(!mfn_valid(d_frame)) )
+ PIN_FAIL(error_out, GNTST_general_error,
+ "destination frame %lx invalid.\n", d_frame);
+- if ( !get_page_and_type(mfn_to_page(d_frame), dd, PGT_writable_page) )
++ if ( !get_page_and_type(mfn_to_page(d_frame), dest_domain,
++ PGT_writable_page) )
+ {
+ if ( !dd->is_dying )
+ gdprintk(XENLOG_WARNING, "Could not get dst frame %lx\n", d_frame);
+@@ -1487,6 +1925,165 @@ gnttab_copy(
+ return 0;
+ }
+
++static long
++gnttab_set_version(XEN_GUEST_HANDLE(gnttab_set_version_t uop))
++{
++ gnttab_set_version_t op;
++ struct domain *d = current->domain;
++ struct grant_table *gt = d->grant_table;
++ struct active_grant_entry *act;
++ long res = 0;
++ int i;
++
++ if (copy_from_guest(&op, uop, 1))
++ return -EFAULT;
++
++ if (op.version != 1 && op.version != 2)
++ return -EINVAL;
++
++ spin_lock(>->lock);
++ /* Make sure that the grant table isn't currently in use when we
++ change the version number. */
++ /* (You need to change the version number for e.g. kexec.) */
++ if ( gt->gt_version != 0 )
++ {
++ for ( i = 0; i < nr_grant_entries(gt); i++ )
++ {
++ act = &active_entry(gt, i);
++ if ( act->pin != 0 )
++ {
++ gdprintk(XENLOG_WARNING,
++ "tried to change grant table version from %d to %d, but some grant entries still in use\n",
++ gt->gt_version,
++ op.version);
++ res = -EBUSY;
++ goto out;
++ }
++ }
++ }
++
++ /* XXX: If we're going to version 2, we could maybe shrink the
++ active grant table here. */
++
++ if ( op.version == 2 && gt->gt_version < 2 )
++ {
++ res = gnttab_populate_status_frames(d, gt);
++ if ( res < 0)
++ goto out;
++ }
++
++ if ( op.version < 2 && gt->gt_version == 2 )
++ gnttab_unpopulate_status_frames(d, gt);
++
++ if ( op.version != gt->gt_version )
++ {
++ /* Make sure there's no crud left over in the table from the
++ old version. */
++ for ( i = 0; i < nr_grant_frames(gt); i++ )
++ memset(gt->shared_raw[i], 0, PAGE_SIZE);
++ }
++
++ gt->gt_version = op.version;
++
++out:
++ spin_unlock(>->lock);
++
++ return res;
++}
++
++static long
++gnttab_get_status_frames(XEN_GUEST_HANDLE(gnttab_get_status_frames_t) uop,
++ int count)
++{
++ gnttab_get_status_frames_t op;
++ struct domain *d;
++ struct grant_table *gt;
++ uint64_t gmfn;
++ int i;
++ int rc;
++
++ if ( count != 1 )
++ return -EINVAL;
++
++ if ( unlikely(copy_from_guest(&op, uop, 1) != 0) )
++ {
++ gdprintk(XENLOG_INFO,
++ "Fault while reading gnttab_get_status_frames_t.\n");
++ return -EFAULT;
++ }
++
++ rc = rcu_lock_target_domain_by_id(op.dom, &d);
++ if ( rc < 0 )
++ {
++ if ( rc == -ESRCH )
++ op.status = GNTST_bad_domain;
++ else if ( rc == -EPERM )
++ op.status = GNTST_permission_denied;
++ else
++ op.status = GNTST_general_error;
++ goto out1;
++ }
++
++ gt = d->grant_table;
++
++ if ( unlikely(op.nr_frames > nr_status_frames(gt)) ) {
++ gdprintk(XENLOG_INFO, "Guest requested addresses for %d grant status "
++ "frames, but only %d are available.\n",
++ op.nr_frames, nr_status_frames(gt));
++ op.status = GNTST_general_error;
++ goto out2;
++ }
++
++ op.status = GNTST_okay;
++
++ spin_lock(>->lock);
++
++ for ( i = 0; i < op.nr_frames; i++ )
++ {
++ gmfn = gnttab_status_gmfn(d, d->grant_table, i);
++ if (copy_to_guest_offset(op.frame_list,
++ i,
++ &gmfn,
++ 1))
++ op.status = GNTST_bad_virt_addr;
++ }
++
++ spin_unlock(>->lock);
++out2:
++ rcu_unlock_domain(d);
++out1:
++ if ( unlikely(copy_to_guest(uop, &op, 1)) )
++ return -EFAULT;
++
++ return 0;
++}
++
++static long
++gnttab_get_version(XEN_GUEST_HANDLE(gnttab_get_version_t uop))
++{
++ gnttab_get_version_t op;
++ struct domain *d;
++
++ if ( copy_from_guest(&op, uop, 1) )
++ return -EFAULT;
++ d = rcu_lock_domain_by_id(op.dom);
++ if ( d == NULL )
++ return -ESRCH;
++ if ( !IS_PRIV_FOR(current->domain, d) )
++ {
++ rcu_unlock_domain(d);
++ return -EPERM;
++ }
++ spin_lock(&d->grant_table->lock);
++ op.version = d->grant_table->gt_version;
++ spin_unlock(&d->grant_table->lock);
++
++ if ( copy_to_guest(uop, &op, 1) )
++ return -EFAULT;
++ else
++ return 0;
++}
++
+ long
+ do_grant_table_op(
+ unsigned int cmd, XEN_GUEST_HANDLE(void) uop, unsigned int count)
+@@ -1562,6 +2159,22 @@ do_grant_table_op(
+ guest_handle_cast(uop, gnttab_query_size_t), count);
+ break;
+ }
++ case GNTTABOP_set_version:
++ {
++ rc = gnttab_set_version(guest_handle_cast(uop, gnttab_set_version_t));
++ break;
++ }
++ case GNTTABOP_get_status_frames:
++ {
++ rc = gnttab_get_status_frames(
++ guest_handle_cast(uop, gnttab_get_status_frames_t), count);
++ break;
++ }
++ case GNTTABOP_get_version:
++ {
++ rc = gnttab_get_version(guest_handle_cast(uop, gnttab_get_version_t));
++ break;
++ }
+ default:
+ rc = -ENOSYS;
+ break;
+@@ -1579,7 +2192,7 @@ do_grant_table_op(
+
+ static unsigned int max_nr_active_grant_frames(void)
+ {
+- return (((max_nr_grant_frames * (PAGE_SIZE / sizeof(grant_entry_t))) +
++ return (((max_nr_grant_frames * (PAGE_SIZE / sizeof(grant_entry_v1_t))) +
+ ((PAGE_SIZE / sizeof(struct active_grant_entry))-1))
+ / (PAGE_SIZE / sizeof(struct active_grant_entry)));
+ }
+@@ -1591,9 +2204,6 @@ grant_table_create(
+ struct grant_table *t;
+ int i;
+
+- /* If this sizeof assertion fails, fix the function: shared_index */
+- ASSERT(sizeof(grant_entry_t) == 8);
+-
+ if ( (t = xmalloc(struct grant_table)) == NULL )
+ goto no_mem_0;
+
+@@ -1628,28 +2238,36 @@ grant_table_create(
+ t->maptrack[0][i].ref = i+1;
+
+ /* Shared grant table. */
+- if ( (t->shared = xmalloc_array(struct grant_entry *,
+- max_nr_grant_frames)) == NULL )
++ if ( (t->shared_raw = xmalloc_array(void *, max_nr_grant_frames)) == NULL )
+ goto no_mem_3;
+- memset(t->shared, 0, max_nr_grant_frames * sizeof(t->shared[0]));
++ memset(t->shared_raw, 0, max_nr_grant_frames * sizeof(t->shared_raw[0]));
+ for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
+ {
+- if ( (t->shared[i] = alloc_xenheap_page()) == NULL )
++ if ( (t->shared_raw[i] = alloc_xenheap_page()) == NULL )
+ goto no_mem_4;
+- clear_page(t->shared[i]);
++ clear_page(t->shared_raw[i]);
+ }
+
+ for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
+ gnttab_create_shared_page(d, t, i);
+
++ /* Status pages for grant table - for version 2 */
++ t->status = xmalloc_array(grant_status_t *,
++ grant_to_status_frames(max_nr_grant_frames));
++ if ( t->status == NULL )
++ goto no_mem_4;
++ memset(t->status, 0,
++ grant_to_status_frames(max_nr_grant_frames) * sizeof(t->status[0]));
++ t->nr_status_frames = 0;
++
+ /* Okay, install the structure. */
+ d->grant_table = t;
+ return 0;
+
+ no_mem_4:
+ for ( i = 0; i < INITIAL_NR_GRANT_FRAMES; i++ )
+- free_xenheap_page(t->shared[i]);
+- xfree(t->shared);
++ free_xenheap_page(t->shared_raw[i]);
++ xfree(t->shared_raw);
+ no_mem_3:
+ free_xenheap_page(t->maptrack[0]);
+ xfree(t->maptrack);
+@@ -1674,7 +2292,9 @@ gnttab_release_mappings(
+ grant_handle_t handle;
+ struct domain *rd;
+ struct active_grant_entry *act;
+- struct grant_entry *sha;
++ grant_entry_header_t *sha;
++ uint16_t *status;
++ struct page_info *pg;
+
+ BUG_ON(!d->is_dying);
+
+@@ -1701,7 +2321,13 @@ gnttab_release_mappings(
+ spin_lock(&rd->grant_table->lock);
+
+ act = &active_entry(rd->grant_table, ref);
+- sha = &shared_entry(rd->grant_table, ref);
++ sha = shared_entry_header(rd->grant_table, ref);
++ if (rd->grant_table->gt_version == 1)
++ status = &sha->flags;
++ else
++ status = &status_entry(rd->grant_table, ref);
++
++ pg = mfn_to_page(act->frame);
+
+ if ( map->flags & GNTMAP_readonly )
+ {
+@@ -1746,11 +2372,11 @@ gnttab_release_mappings(
+ }
+
+ if ( (act->pin & (GNTPIN_devw_mask|GNTPIN_hstw_mask)) == 0 )
+- gnttab_clear_flag(_GTF_writing, &sha->flags);
++ gnttab_clear_flag(_GTF_writing, status);
+ }
+
+ if ( act->pin == 0 )
+- gnttab_clear_flag(_GTF_reading, &sha->flags);
++ gnttab_clear_flag(_GTF_reading, status);
+
+ spin_unlock(&rd->grant_table->lock);
+
+@@ -1772,8 +2398,8 @@ grant_table_destroy(
+ return;
+
+ for ( i = 0; i < nr_grant_frames(t); i++ )
+- free_xenheap_page(t->shared[i]);
+- xfree(t->shared);
++ free_xenheap_page(t->shared_raw[i]);
++ xfree(t->shared_raw);
+
+ for ( i = 0; i < nr_maptrack_frames(t); i++ )
+ free_xenheap_page(t->maptrack[i]);
+@@ -1783,6 +2409,10 @@ grant_table_destroy(
+ free_xenheap_page(t->active[i]);
+ xfree(t->active);
+
++ for ( i = 0; i < nr_status_frames(t); i++ )
++ free_xenheap_page(t->status[i]);
++ xfree(t->status);
++
+ xfree(t);
+ d->grant_table = NULL;
+ }
+diff --git a/xen/include/Makefile b/xen/include/Makefile
+index 8427371..4c95ed7 100644
+--- a/xen/include/Makefile
++++ b/xen/include/Makefile
+@@ -37,7 +37,7 @@ endif
+ .PHONY: all
+ all: $(headers-y)
+
+-compat/%.h: compat/%.i Makefile
++compat/%.h: compat/%.i Makefile $(BASEDIR)/tools/compat-build-header.py
+ set -e; id=_$$(echo $@ | tr '[:lower:]-/.' '[:upper:]___'); \
+ echo "#ifndef $$id" >$@.new; \
+ echo "#define $$id" >>$@.new; \
+@@ -51,9 +51,9 @@ compat/%.h: compat/%.i Makefile
+ mv -f $@.new $@
+
+ compat/%.i: compat/%.c Makefile
+- $(CPP) $(filter-out -M% .%.d,$(CFLAGS)) $(cppflags-y) -o $@ $<
++ $(CPP) -include public/xen-compat.h $(filter-out -M% .%.d,$(CFLAGS)) $(cppflags-y) -o $@ $<
+
+-compat/%.c: public/%.h xlat.lst Makefile
++compat/%.c: public/%.h xlat.lst Makefile $(BASEDIR)/tools/compat-build-source.py
+ mkdir -p $(@D)
+ grep -v 'DEFINE_XEN_GUEST_HANDLE(long)' $< | \
+ $(BASEDIR)/tools/compat-build-source.py >$@.new
+diff --git a/xen/include/asm-x86/grant_table.h b/xen/include/asm-x86/grant_table.h
+index 3a7fb2a..4e97d9d 100644
+--- a/xen/include/asm-x86/grant_table.h
++++ b/xen/include/asm-x86/grant_table.h
+@@ -21,16 +21,31 @@ int replace_grant_host_mapping(
+ #define gnttab_create_shared_page(d, t, i) \
+ do { \
+ share_xen_page_with_guest( \
+- virt_to_page((char *)(t)->shared[i]), \
++ virt_to_page((char *)(t)->shared_raw[i]), \
+ (d), XENSHARE_writable); \
+ } while ( 0 )
+
++#define gnttab_create_status_page(d, t, i) \
++ do { \
++ share_xen_page_with_guest( \
++ virt_to_page((char *)(t)->status[i]), \
++ (d), XENSHARE_writable); \
++ } while ( 0 )
++
++
+ #define gnttab_shared_mfn(d, t, i) \
+- ((virt_to_maddr((t)->shared[i]) >> PAGE_SHIFT))
++ ((virt_to_maddr((t)->shared_raw[i]) >> PAGE_SHIFT))
+
+ #define gnttab_shared_gmfn(d, t, i) \
+ (mfn_to_gmfn(d, gnttab_shared_mfn(d, t, i)))
+
++
++#define gnttab_status_mfn(d, t, i) \
++ ((virt_to_maddr((t)->status[i]) >> PAGE_SHIFT))
++
++#define gnttab_status_gmfn(d, t, i) \
++ (mfn_to_gmfn(d, gnttab_status_mfn(d, t, i)))
++
+ #define gnttab_mark_dirty(d, f) paging_mark_dirty((d), (f))
+
+ static inline void gnttab_clear_flag(unsigned long nr, uint16_t *addr)
+diff --git a/xen/include/public/grant_table.h b/xen/include/public/grant_table.h
+index ad116e7..3edb7d1 100644
+--- a/xen/include/public/grant_table.h
++++ b/xen/include/public/grant_table.h
+@@ -84,12 +84,26 @@
+ */
+
+ /*
++ * Reference to a grant entry in a specified domain's grant table.
++ */
++typedef uint32_t grant_ref_t;
++
++/*
+ * A grant table comprises a packed array of grant entries in one or more
+ * page frames shared between Xen and a guest.
+ * [XEN]: This field is written by Xen and read by the sharing guest.
+ * [GST]: This field is written by the guest and read by Xen.
+ */
+-struct grant_entry {
++
++/*
++ * Version 1 of the grant table entry structure is maintained purely
++ * for backwards compatibility. New guests should use version 2.
++ */
++#if __XEN_INTERFACE_VERSION__ < 0x0003020a
++#define grant_entry_v1 grant_entry
++#define grant_entry_v1_t grant_entry_t
++#endif
++struct grant_entry_v1 {
+ /* GTF_xxx: various type and flag information. [XEN,GST] */
+ uint16_t flags;
+ /* The domain being granted foreign privileges. [GST] */
+@@ -100,7 +114,7 @@ struct grant_entry {
+ */
+ uint32_t frame;
+ };
+-typedef struct grant_entry grant_entry_t;
++typedef struct grant_entry_v1 grant_entry_v1_t;
+
+ /*
+ * Type of grant entry.
+@@ -108,10 +122,13 @@ typedef struct grant_entry grant_entry_t;
+ * GTF_permit_access: Allow @domid to map/access @frame.
+ * GTF_accept_transfer: Allow @domid to transfer ownership of one page frame
+ * to this guest. Xen writes the page number to @frame.
++ * GTF_transitive: Allow @domid to transitively access a subrange of
++ * @trans_grant in @trans_domid. No mappings are allowed.
+ */
+ #define GTF_invalid (0U<<0)
+ #define GTF_permit_access (1U<<0)
+ #define GTF_accept_transfer (2U<<0)
++#define GTF_transitive (3U<<0)
+ #define GTF_type_mask (3U<<0)
+
+ /*
+@@ -120,6 +137,9 @@ typedef struct grant_entry grant_entry_t;
+ * GTF_reading: Grant entry is currently mapped for reading by @domid. [XEN]
+ * GTF_writing: Grant entry is currently mapped for writing by @domid. [XEN]
+ * GTF_PAT, GTF_PWT, GTF_PCD: (x86) cache attribute flags for the grant [GST]
++ * GTF_sub_page: Grant access to only a subrange of the page. @domid
++ * will only be allowed to copy from the grant, and not
++ * map it. [GST]
+ */
+ #define _GTF_readonly (2)
+ #define GTF_readonly (1U<<_GTF_readonly)
+@@ -133,6 +153,8 @@ typedef struct grant_entry grant_entry_t;
+ #define GTF_PCD (1U<<_GTF_PCD)
+ #define _GTF_PAT (7)
+ #define GTF_PAT (1U<<_GTF_PAT)
++#define _GTF_sub_page (8)
++#define GTF_sub_page (1U<<_GTF_sub_page)
+
+ /*
+ * Subflags for GTF_accept_transfer:
+@@ -149,15 +171,87 @@ typedef struct grant_entry grant_entry_t;
+ #define _GTF_transfer_completed (3)
+ #define GTF_transfer_completed (1U<<_GTF_transfer_completed)
+
+-
+-/***********************************
+- * GRANT TABLE QUERIES AND USES
++/*
++ * Version 2 grant table entries. These fulfil the same role as
++ * version 1 entries, but can represent more complicated operations.
++ * Any given domain will have either a version 1 or a version 2 table,
++ * and every entry in the table will be the same version.
++ *
++ * The interface by which domains use grant references does not depend
++ * on the grant table version in use by the other domain.
++ */
++#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
++/*
++ * Version 1 and version 2 grant entries share a common prefix. The
++ * fields of the prefix are documented as part of struct
++ * grant_entry_v1.
+ */
++struct grant_entry_header {
++ uint16_t flags;
++ domid_t domid;
++};
++typedef struct grant_entry_header grant_entry_header_t;
+
+ /*
+- * Reference to a grant entry in a specified domain's grant table.
++ * Version 2 of the grant entry structure.
++ */
++union grant_entry_v2 {
++ grant_entry_header_t hdr;
++
++ /*
++ * This member is used for V1-style full page grants, where either:
++ *
++ * -- hdr.type is GTF_accept_transfer, or
++ * -- hdr.type is GTF_permit_access and GTF_sub_page is not set.
++ *
++ * In that case, the frame field has the same semantics as the
++ * field of the same name in the V1 entry structure.
++ */
++ struct {
++ grant_entry_header_t hdr;
++ uint32_t pad0;
++ uint64_t frame;
++ } full_page;
++
++ /*
++ * If the grant type is GTF_grant_access and GTF_sub_page is set,
++ * @domid is allowed to access bytes [@page_off,@page_off+@length)
++ * in frame @frame.
++ */
++ struct {
++ grant_entry_header_t hdr;
++ uint16_t page_off;
++ uint16_t length;
++ uint64_t frame;
++ } sub_page;
++
++ /*
++ * If the grant is GTF_transitive, @domid is allowed to use the
++ * grant @gref in domain @trans_domid, as if it was the local
++ * domain. Obviously, the transitive access must be compatible
++ * with the original grant.
++ *
++ * The current version of Xen does not allow transitive grants
++ * to be mapped.
++ */
++ struct {
++ grant_entry_header_t hdr;
++ domid_t trans_domid;
++ uint16_t pad0;
++ grant_ref_t gref;
++ } transitive;
++
++ uint32_t __spacer[4]; /* Pad to a power of two */
++};
++typedef union grant_entry_v2 grant_entry_v2_t;
++
++typedef uint16_t grant_status_t;
++
++#endif /* __XEN_INTERFACE_VERSION__ */
++
++/***********************************
++ * GRANT TABLE QUERIES AND USES
+ */
+-typedef uint32_t grant_ref_t;
+
+ /*
+ * Handle to track a mapping created via a grant reference.
+@@ -358,6 +452,63 @@ struct gnttab_unmap_and_replace {
+ typedef struct gnttab_unmap_and_replace gnttab_unmap_and_replace_t;
+ DEFINE_XEN_GUEST_HANDLE(gnttab_unmap_and_replace_t);
+
++#if __XEN_INTERFACE_VERSION__ >= 0x0003020a
++/*
++ * GNTTABOP_set_version: Request a particular version of the grant
++ * table shared table structure. This operation can only be performed
++ * once in any given domain. It must be performed before any grants
++ * are activated; otherwise, the domain will be stuck with version 1.
++ * The only defined versions are 1 and 2.
++ */
++#define GNTTABOP_set_version 8
++struct gnttab_set_version {
++ /* IN parameters */
++ uint32_t version;
++};
++typedef struct gnttab_set_version gnttab_set_version_t;
++DEFINE_XEN_GUEST_HANDLE(gnttab_set_version_t);
++
++
++/*
++ * GNTTABOP_get_status_frames: Get the list of frames used to store grant
++ * status for <dom>. In grant format version 2, the status is separated
++ * from the other shared grant fields to allow more efficient synchronization
++ * using barriers instead of atomic cmpexch operations.
++ * <nr_frames> specify the size of vector <frame_list>.
++ * The frame addresses are returned in the <frame_list>.
++ * Only <nr_frames> addresses are returned, even if the table is larger.
++ * NOTES:
++ * 1. <dom> may be specified as DOMID_SELF.
++ * 2. Only a sufficiently-privileged domain may specify <dom> != DOMID_SELF.
++ */
++#define GNTTABOP_get_status_frames 9
++struct gnttab_get_status_frames {
++ /* IN parameters. */
++ uint32_t nr_frames;
++ domid_t dom;
++ /* OUT parameters. */
++ int16_t status; /* GNTST_* */
++ XEN_GUEST_HANDLE(uint64_t) frame_list;
++};
++typedef struct gnttab_get_status_frames gnttab_get_status_frames_t;
++DEFINE_XEN_GUEST_HANDLE(gnttab_get_status_frames_t);
++
++/*
++ * GNTTABOP_get_version: Get the grant table version which is in
++ * effect for domain <dom>.
++ */
++#define GNTTABOP_get_version 10
++struct gnttab_get_version {
++ /* IN parameters */
++ domid_t dom;
++ uint16_t pad;
++ /* OUT parameters */
++ uint32_t version;
++};
++typedef struct gnttab_get_version gnttab_get_version_t;
++DEFINE_XEN_GUEST_HANDLE(gnttab_get_version_t);
++
++#endif /* __XEN_INTERFACE_VERSION__ */
+
+ /*
+ * Bitfield values for gnttab_map_grant_ref.flags.
+diff --git a/xen/include/public/memory.h b/xen/include/public/memory.h
+index ba4051e..35d1dd4 100644
+--- a/xen/include/public/memory.h
++++ b/xen/include/public/memory.h
+@@ -209,6 +209,8 @@ struct xen_add_to_physmap {
+ #define XENMAPSPACE_gmfn 2 /* GMFN */
+ unsigned int space;
+
++#define XENMAPIDX_grant_table_status 0x80000000
++
+ /* Index into source mapping space. */
+ xen_ulong_t idx;
+
+diff --git a/xen/include/public/xen-compat.h b/xen/include/public/xen-compat.h
+index 329be07..2e38003 100644
+--- a/xen/include/public/xen-compat.h
++++ b/xen/include/public/xen-compat.h
+@@ -27,7 +27,7 @@
+ #ifndef __XEN_PUBLIC_XEN_COMPAT_H__
+ #define __XEN_PUBLIC_XEN_COMPAT_H__
+
+-#define __XEN_LATEST_INTERFACE_VERSION__ 0x00030209
++#define __XEN_LATEST_INTERFACE_VERSION__ 0x0003020a
+
+ #if defined(__XEN__) || defined(__XEN_TOOLS__)
+ /* Xen is built with matching headers and implements the latest interface. */
+diff --git a/xen/include/public/xen.h b/xen/include/public/xen.h
+index 524118b..2fe92fe 100644
+--- a/xen/include/public/xen.h
++++ b/xen/include/public/xen.h
+@@ -47,6 +47,7 @@ DEFINE_XEN_GUEST_HANDLE(long);
+ __DEFINE_XEN_GUEST_HANDLE(ulong, unsigned long);
+ DEFINE_XEN_GUEST_HANDLE(void);
+
++DEFINE_XEN_GUEST_HANDLE(uint64_t);
+ DEFINE_XEN_GUEST_HANDLE(xen_pfn_t);
+ #endif
+
+diff --git a/xen/include/xen/grant_table.h b/xen/include/xen/grant_table.h
+index 096af9b..20c8354 100644
+--- a/xen/include/xen/grant_table.h
++++ b/xen/include/xen/grant_table.h
+@@ -30,10 +30,17 @@
+
+ /* Active grant entry - used for shadowing GTF_permit_access grants. */
+ struct active_grant_entry {
+- u32 pin; /* Reference count information. */
+- domid_t domid; /* Domain being granted access. */
++ u32 pin; /* Reference count information. */
++ domid_t domid; /* Domain being granted access. */
++ domid_t trans_dom;
++ uint32_t trans_gref;
++ unsigned long frame; /* Frame being granted. */
+ unsigned long gfn; /* Guest's idea of the frame being granted. */
+- unsigned long frame; /* Frame being granted. */
++ unsigned is_sub_page:1; /* True if this is a sub-page grant. */
++ unsigned start:15; /* For sub-page grants, the start offset
++ in the page. */
++ unsigned length:16; /* For sub-page grants, the length of the
++ grant. */
+ };
+
+ /* Count of writable host-CPU mappings. */
+@@ -53,10 +60,6 @@ struct active_grant_entry {
+ #define GNTPIN_devr_inc (1 << GNTPIN_devr_shift)
+ #define GNTPIN_devr_mask (0xFFU << GNTPIN_devr_shift)
+
+-/* Initial size of a grant table. */
+-#define INITIAL_NR_GRANT_ENTRIES ((INITIAL_NR_GRANT_FRAMES << PAGE_SHIFT) / \
+- sizeof(grant_entry_t))
+-
+ #ifndef DEFAULT_MAX_NR_GRANT_FRAMES /* to allow arch to override */
+ /* Default maximum size of a grant table. [POLICY] */
+ #define DEFAULT_MAX_NR_GRANT_FRAMES 32
+@@ -84,7 +87,15 @@ struct grant_table {
+ /* Table size. Number of frames shared with guest */
+ unsigned int nr_grant_frames;
+ /* Shared grant table (see include/public/grant_table.h). */
+- struct grant_entry **shared;
++ union {
++ void **shared_raw;
++ struct grant_entry_v1 **shared_v1;
++ union grant_entry_v2 **shared_v2;
++ };
++ /* Number of grant status frames shared with guest (for version 2) */
++ unsigned int nr_status_frames;
++ /* State grant table (see include/public/grant_table.h). */
++ grant_status_t **status;
+ /* Active grant table. */
+ struct active_grant_entry **active;
+ /* Mapping tracking table. */
+@@ -93,6 +104,9 @@ struct grant_table {
+ unsigned int maptrack_limit;
+ /* Lock protecting updates to active and shared grant tables. */
+ spinlock_t lock;
++ /* The defined versions are 1 and 2. Set to 0 if we don't know
++ what version to use yet. */
++ unsigned gt_version;
+ };
+
+ /* Create/destroy per-domain grant table context. */
+@@ -118,26 +132,32 @@ static inline unsigned int nr_grant_frames(struct grant_table *gt)
+ return gt->nr_grant_frames;
+ }
+
+-/* Number of grant table entries. Caller must hold d's grant table lock. */
+-static inline unsigned int nr_grant_entries(struct grant_table *gt)
++/* Number of status grant table frames. Caller must hold d's gr. table lock.*/
++static inline unsigned int nr_status_frames(struct grant_table *gt)
++{
++ return gt->nr_status_frames;
++}
++
++#define GRANT_STATUS_PER_PAGE (PAGE_SIZE / sizeof(grant_status_t))
++#define GRANT_PER_PAGE (PAGE_SIZE / sizeof(grant_entry_v2_t))
++/* Number of grant table status entries. Caller must hold d's gr. table lock.*/
++static inline unsigned int grant_to_status_frames(int grant_frames)
+ {
+- return (nr_grant_frames(gt) << PAGE_SHIFT) / sizeof(grant_entry_t);
++ return (grant_frames * GRANT_PER_PAGE + GRANT_STATUS_PER_PAGE - 1) /
++ GRANT_STATUS_PER_PAGE;
+ }
+
+ static inline unsigned int
+ num_act_frames_from_sha_frames(const unsigned int num)
+ {
+ /* How many frames are needed for the active grant table,
+- * given the size of the shared grant table?
+- *
+- * act_per_page = PAGE_SIZE / sizeof(active_grant_entry_t);
+- * sha_per_page = PAGE_SIZE / sizeof(grant_entry_t);
+- * num_sha_entries = num * sha_per_page;
+- * num_act_frames = (num_sha_entries + (act_per_page-1)) / act_per_page;
+- */
+- return ((num * (PAGE_SIZE / sizeof(grant_entry_t))) +
+- ((PAGE_SIZE / sizeof(struct active_grant_entry))-1))
+- / (PAGE_SIZE / sizeof(struct active_grant_entry));
++ * given the size of the shared grant table? */
++ unsigned act_per_page = PAGE_SIZE / sizeof(struct active_grant_entry);
++ unsigned sha_per_page = PAGE_SIZE / sizeof(grant_entry_v1_t);
++ unsigned num_sha_entries = num * sha_per_page;
++ unsigned num_act_frames =
++ (num_sha_entries + (act_per_page-1)) / act_per_page;
++ return num_act_frames;
+ }
+
+ static inline unsigned int
+diff --git a/xen/include/xlat.lst b/xen/include/xlat.lst
+index f2e4597..3188925 100644
+--- a/xen/include/xlat.lst
++++ b/xen/include/xlat.lst
+@@ -44,7 +44,12 @@
+ ! gnttab_transfer grant_table.h
+ ? gnttab_unmap_grant_ref grant_table.h
+ ? gnttab_unmap_and_replace grant_table.h
+-? grant_entry grant_table.h
++? gnttab_set_version grant_table.h
++? gnttab_get_version grant_table.h
++! gnttab_get_status_frames grant_table.h
++? grant_entry_v1 grant_table.h
++? grant_entry_header grant_table.h
++? grant_entry_v2 grant_table.h
+ ? kexec_exec kexec.h
+ ! kexec_image kexec.h
+ ! kexec_range kexec.h