debuggers.hg

changeset 20640:66ff18dd3858

VT-d: per-iommu domain-id

Currently, xen uses shared iommu domain-id across all the VT-d units
in the platform. The number of iommu domain-ids (NR_DID, e.g. 256)
supported by each VT-d unit is reported in Capability register. The
limitation of current implementation is it only can support at most
NR_DID domains with VT-d in the entire platform, even though the
platform can support N * NR_DID (where N is the number of VT-d
units). Imagine a platform with several SR_IOV NICs, and each NIC
supports 128 VFs. It possibly beyond the NR_DID.

This patch implements iommu domain-id management per iommu (VT-d
unit), hence solves above limitation. It removes the global domain-id
bitmap, instead use domain-id bitmap in struct iommu, and also involve
an array to map guest domain-id and iommu domain-id, which is used to
iommu domain-id when flush context cache or IOTLB. When a device is
assigned to a guest, choose an available iommu domain-id from the
device's iommu, and map guest domain id to the domain-id mapping
array. When a device is deassigned from a guest, clear the domain-id
bit in domain-id bitmap and clear the corresponding entry in domain-id
map array if there is no other devices under the same iommu owned by
the guest.

Signed-off-by: Weidong Han <weidong.han@intel.com>
author Keir Fraser <keir.fraser@citrix.com>
date Tue Dec 08 07:51:30 2009 +0000 (2009-12-08)
parents ab0d71f7f596
children 3122518646d3
files xen/drivers/passthrough/vtd/iommu.c xen/include/xen/hvm/iommu.h xen/include/xen/iommu.h
line diff
     1.1 --- a/xen/drivers/passthrough/vtd/iommu.c	Tue Dec 08 07:49:54 2009 +0000
     1.2 +++ b/xen/drivers/passthrough/vtd/iommu.c	Tue Dec 08 07:51:30 2009 +0000
     1.3 @@ -38,46 +38,70 @@
     1.4  #include "extern.h"
     1.5  #include "vtd.h"
     1.6  
     1.7 -#define domain_iommu_domid(d) ((d)->arch.hvm_domain.hvm_iommu.iommu_domid)
     1.8 -
     1.9  int nr_iommus;
    1.10 -static spinlock_t domid_bitmap_lock;    /* protect domain id bitmap */
    1.11 -static int domid_bitmap_size;           /* domain id bitmap size in bits */
    1.12 -static unsigned long *domid_bitmap;     /* iommu domain id bitmap */
    1.13  static bool_t rwbf_quirk;
    1.14  
    1.15  static void setup_dom0_devices(struct domain *d);
    1.16  static void setup_dom0_rmrr(struct domain *d);
    1.17  
    1.18 +static int domain_iommu_domid(struct domain *d,
    1.19 +                              struct iommu *iommu)
    1.20 +{
    1.21 +    unsigned long nr_dom, i;
    1.22 +
    1.23 +    nr_dom = cap_ndoms(iommu->cap);
    1.24 +    i = find_first_bit(iommu->domid_bitmap, nr_dom);
    1.25 +    while ( i < nr_dom )
    1.26 +    {
    1.27 +        if ( iommu->domid_map[i] == d->domain_id )
    1.28 +            return i;
    1.29 +
    1.30 +        i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
    1.31 +    }
    1.32 +
    1.33 +    gdprintk(XENLOG_ERR VTDPREFIX,
    1.34 +             "Cannot get valid iommu domid: domid=%d iommu->index=%d\n",
    1.35 +             d->domain_id, iommu->index);
    1.36 +    return -1;
    1.37 +}
    1.38 +
    1.39  #define DID_FIELD_WIDTH 16
    1.40  #define DID_HIGH_OFFSET 8
    1.41 -static void context_set_domain_id(struct context_entry *context,
    1.42 -                                  struct domain *d)
    1.43 +static int context_set_domain_id(struct context_entry *context,
    1.44 +                                 struct domain *d,
    1.45 +                                 struct iommu *iommu)
    1.46  {
    1.47 -    domid_t iommu_domid = domain_iommu_domid(d);
    1.48 +    unsigned long nr_dom, i;
    1.49 +    int found = 0;
    1.50 +
    1.51 +    ASSERT(spin_is_locked(&iommu->lock));
    1.52  
    1.53 -    if ( iommu_domid == 0 )
    1.54 +    nr_dom = cap_ndoms(iommu->cap);
    1.55 +    i = find_first_bit(iommu->domid_bitmap, nr_dom);
    1.56 +    while ( i < nr_dom )
    1.57      {
    1.58 -        spin_lock(&domid_bitmap_lock);
    1.59 -        iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size);
    1.60 -        set_bit(iommu_domid, domid_bitmap);
    1.61 -        spin_unlock(&domid_bitmap_lock);
    1.62 -        d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid;
    1.63 +        if ( iommu->domid_map[i] == d->domain_id )
    1.64 +        {
    1.65 +            found = 1;
    1.66 +            break;
    1.67 +        }
    1.68 +        i = find_next_bit(iommu->domid_bitmap, nr_dom, i+1);
    1.69      }
    1.70  
    1.71 -    context->hi &= (1 << DID_HIGH_OFFSET) - 1;
    1.72 -    context->hi |= iommu_domid << DID_HIGH_OFFSET;
    1.73 -}
    1.74 +    if ( found == 0 )
    1.75 +    {
    1.76 +        i = find_first_zero_bit(iommu->domid_bitmap, nr_dom);
    1.77 +        if ( i >= nr_dom )
    1.78 +        {
    1.79 +            gdprintk(XENLOG_ERR VTDPREFIX, "IOMMU: no free domain ids\n");
    1.80 +            return -EFAULT;
    1.81 +        }
    1.82 +        iommu->domid_map[i] = d->domain_id;
    1.83 +    }
    1.84  
    1.85 -static void iommu_domid_release(struct domain *d)
    1.86 -{
    1.87 -    domid_t iommu_domid = domain_iommu_domid(d);
    1.88 -
    1.89 -    if ( iommu_domid != 0 )
    1.90 -    {
    1.91 -        d->arch.hvm_domain.hvm_iommu.iommu_domid = 0;
    1.92 -        clear_bit(iommu_domid, domid_bitmap);
    1.93 -    }
    1.94 +    set_bit(i, iommu->domid_bitmap);
    1.95 +    context->hi |= (i & ((1 << DID_FIELD_WIDTH) - 1)) << DID_HIGH_OFFSET;
    1.96 +    return 0;
    1.97  }
    1.98  
    1.99  static struct intel_iommu *alloc_intel_iommu(void)
   1.100 @@ -526,6 +550,7 @@ static void dma_pte_clear_one(struct dom
   1.101      struct dma_pte *page = NULL, *pte = NULL;
   1.102      u64 pg_maddr;
   1.103      int flush_dev_iotlb;
   1.104 +    int iommu_domid;
   1.105  
   1.106      spin_lock(&hd->mapping_lock);
   1.107      /* get last level pte */
   1.108 @@ -557,7 +582,10 @@ static void dma_pte_clear_one(struct dom
   1.109          if ( test_bit(iommu->index, &hd->iommu_bitmap) )
   1.110          {
   1.111              flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
   1.112 -            if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(domain),
   1.113 +            iommu_domid= domain_iommu_domid(domain, iommu);
   1.114 +            if ( iommu_domid == -1 )
   1.115 +                continue;
   1.116 +            if ( iommu_flush_iotlb_psi(iommu, iommu_domid,
   1.117                                         addr, 1, 0, flush_dev_iotlb) )
   1.118                  iommu_flush_write_buffer(iommu);
   1.119          }
   1.120 @@ -982,7 +1010,7 @@ static int iommu_set_interrupt(struct io
   1.121  static int iommu_alloc(struct acpi_drhd_unit *drhd)
   1.122  {
   1.123      struct iommu *iommu;
   1.124 -    unsigned long sagaw;
   1.125 +    unsigned long sagaw, nr_dom;
   1.126      int agaw;
   1.127  
   1.128      if ( nr_iommus > MAX_IOMMUS )
   1.129 @@ -1033,6 +1061,25 @@ static int iommu_alloc(struct acpi_drhd_
   1.130      if ( !ecap_coherent(iommu->ecap) )
   1.131          iommus_incoherent = 1;
   1.132  
   1.133 +    /* allocate domain id bitmap */
   1.134 +    nr_dom = cap_ndoms(iommu->cap);
   1.135 +    iommu->domid_bitmap = xmalloc_array(unsigned long, BITS_TO_LONGS(nr_dom));
   1.136 +    if ( !iommu->domid_bitmap )
   1.137 +        return -ENOMEM ;
   1.138 +    memset(iommu->domid_bitmap, 0, nr_dom / 8);
   1.139 +
   1.140 +    /*
   1.141 +     * if Caching mode is set, then invalid translations are tagged with
   1.142 +     * domain id 0, Hence reserve bit 0 for it
   1.143 +     */
   1.144 +    if ( cap_caching_mode(iommu->cap) )
   1.145 +        set_bit(0, iommu->domid_bitmap);
   1.146 +
   1.147 +    iommu->domid_map = xmalloc_array(u16, nr_dom);
   1.148 +    if ( !iommu->domid_map )
   1.149 +        return -ENOMEM ;
   1.150 +    memset(iommu->domid_map, 0, nr_dom * sizeof(*iommu->domid_map));
   1.151 +
   1.152      spin_lock_init(&iommu->lock);
   1.153      spin_lock_init(&iommu->register_lock);
   1.154  
   1.155 @@ -1056,6 +1103,9 @@ static void iommu_free(struct acpi_drhd_
   1.156      if ( iommu->reg )
   1.157          iounmap(iommu->reg);
   1.158  
   1.159 +    xfree(iommu->domid_bitmap);
   1.160 +    xfree(iommu->domid_map);
   1.161 +
   1.162      free_intel_iommu(iommu->intel);
   1.163      destroy_irq(iommu->irq);
   1.164      xfree(iommu);
   1.165 @@ -1174,7 +1224,12 @@ static int domain_context_mapping_one(
   1.166          spin_unlock(&hd->mapping_lock);
   1.167      }
   1.168  
   1.169 -    context_set_domain_id(context, domain);
   1.170 +    if ( context_set_domain_id(context, domain, iommu) )
   1.171 +    {
   1.172 +        spin_unlock(&iommu->lock);
   1.173 +        return -EFAULT;
   1.174 +    }
   1.175 +
   1.176      context_set_address_width(*context, agaw);
   1.177      context_set_fault_enable(*context);
   1.178      context_set_present(*context);
   1.179 @@ -1292,6 +1347,10 @@ static int domain_context_unmap_one(
   1.180  {
   1.181      struct context_entry *context, *context_entries;
   1.182      u64 maddr;
   1.183 +    int iommu_domid;
   1.184 +    struct pci_dev *pdev;
   1.185 +    struct acpi_drhd_unit *drhd;
   1.186 +    int found = 0;
   1.187  
   1.188      ASSERT(spin_is_locked(&pcidevs_lock));
   1.189      spin_lock(&iommu->lock);
   1.190 @@ -1311,14 +1370,50 @@ static int domain_context_unmap_one(
   1.191      context_clear_entry(*context);
   1.192      iommu_flush_cache_entry(context, sizeof(struct context_entry));
   1.193  
   1.194 -    if ( iommu_flush_context_device(iommu, domain_iommu_domid(domain),
   1.195 +    iommu_domid= domain_iommu_domid(domain, iommu);
   1.196 +    if ( iommu_domid == -1 )
   1.197 +    {
   1.198 +        spin_unlock(&iommu->lock);
   1.199 +        unmap_vtd_domain_page(context_entries);
   1.200 +        return -EINVAL;
   1.201 +    }
   1.202 +
   1.203 +    if ( iommu_flush_context_device(iommu, iommu_domid,
   1.204                                      (((u16)bus) << 8) | devfn,
   1.205                                      DMA_CCMD_MASK_NOBIT, 0) )
   1.206          iommu_flush_write_buffer(iommu);
   1.207      else
   1.208      {
   1.209          int flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
   1.210 -        iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0, flush_dev_iotlb);
   1.211 +        iommu_flush_iotlb_dsi(iommu, iommu_domid, 0, flush_dev_iotlb);
   1.212 +    }
   1.213 +
   1.214 +
   1.215 +    /*
   1.216 +     * if no other devices under the same iommu owned by this domain,
   1.217 +     * clear iommu in iommu_bitmap and clear domain_id in domid_bitmp
   1.218 +     */
   1.219 +    for_each_pdev ( domain, pdev )
   1.220 +    {
   1.221 +        if ( pdev->bus == bus && pdev->devfn == devfn )
   1.222 +            continue;
   1.223 +
   1.224 +        drhd = acpi_find_matched_drhd_unit(pdev);
   1.225 +        if ( drhd && drhd->iommu == iommu )
   1.226 +        {
   1.227 +            found = 1;
   1.228 +            break;
   1.229 +        }
   1.230 +    }
   1.231 +
   1.232 +    if ( found == 0 )
   1.233 +    {
   1.234 +        struct hvm_iommu *hd = domain_hvm_iommu(domain);
   1.235 +
   1.236 +        clear_bit(iommu->index, &hd->iommu_bitmap);
   1.237 +
   1.238 +        clear_bit(iommu_domid, iommu->domid_bitmap);
   1.239 +        iommu->domid_map[iommu_domid] = 0;
   1.240      }
   1.241  
   1.242      spin_unlock(&iommu->lock);
   1.243 @@ -1397,11 +1492,8 @@ static int reassign_device_ownership(
   1.244      struct domain *target,
   1.245      u8 bus, u8 devfn)
   1.246  {
   1.247 -    struct hvm_iommu *source_hd = domain_hvm_iommu(source);
   1.248      struct pci_dev *pdev;
   1.249 -    struct acpi_drhd_unit *drhd;
   1.250 -    struct iommu *pdev_iommu;
   1.251 -    int ret, found = 0;
   1.252 +    int ret;
   1.253  
   1.254      ASSERT(spin_is_locked(&pcidevs_lock));
   1.255      pdev = pci_get_pdev_by_domain(source, bus, devfn);
   1.256 @@ -1409,10 +1501,9 @@ static int reassign_device_ownership(
   1.257      if (!pdev)
   1.258          return -ENODEV;
   1.259  
   1.260 -    if ( (drhd = acpi_find_matched_drhd_unit(pdev)) == NULL )
   1.261 -        return -ENODEV;
   1.262 -    pdev_iommu = drhd->iommu;
   1.263 -    domain_context_unmap(source, bus, devfn);
   1.264 +    ret = domain_context_unmap(source, bus, devfn);
   1.265 +    if ( ret )
   1.266 +        return ret;
   1.267  
   1.268      ret = domain_context_mapping(target, bus, devfn);
   1.269      if ( ret )
   1.270 @@ -1421,19 +1512,6 @@ static int reassign_device_ownership(
   1.271      list_move(&pdev->domain_list, &target->arch.pdev_list);
   1.272      pdev->domain = target;
   1.273  
   1.274 -    for_each_pdev ( source, pdev )
   1.275 -    {
   1.276 -        drhd = acpi_find_matched_drhd_unit(pdev);
   1.277 -        if ( drhd && drhd->iommu == pdev_iommu )
   1.278 -        {
   1.279 -            found = 1;
   1.280 -            break;
   1.281 -        }
   1.282 -    }
   1.283 -
   1.284 -    if ( !found )
   1.285 -        clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap);
   1.286 -
   1.287      return ret;
   1.288  }
   1.289  
   1.290 @@ -1448,8 +1526,6 @@ void iommu_domain_teardown(struct domain
   1.291      iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw));
   1.292      hd->pgd_maddr = 0;
   1.293      spin_unlock(&hd->mapping_lock);
   1.294 -
   1.295 -    iommu_domid_release(d);
   1.296  }
   1.297  
   1.298  static int intel_iommu_map_page(
   1.299 @@ -1462,6 +1538,7 @@ static int intel_iommu_map_page(
   1.300      u64 pg_maddr;
   1.301      int pte_present;
   1.302      int flush_dev_iotlb;
   1.303 +    int iommu_domid;
   1.304  
   1.305      /* do nothing if dom0 and iommu supports pass thru */
   1.306      if ( iommu_passthrough && (d->domain_id == 0) )
   1.307 @@ -1501,7 +1578,10 @@ static int intel_iommu_map_page(
   1.308              continue;
   1.309  
   1.310          flush_dev_iotlb = find_ats_dev_drhd(iommu) ? 1 : 0;
   1.311 -        if ( iommu_flush_iotlb_psi(iommu, domain_iommu_domid(d),
   1.312 +        iommu_domid= domain_iommu_domid(d, iommu);
   1.313 +        if ( iommu_domid == -1 )
   1.314 +            continue;
   1.315 +        if ( iommu_flush_iotlb_psi(iommu, iommu_domid,
   1.316                                     (paddr_t)gfn << PAGE_SHIFT_4K, 1,
   1.317                                     !pte_present, flush_dev_iotlb) )
   1.318              iommu_flush_write_buffer(iommu);
   1.319 @@ -1780,7 +1860,6 @@ int intel_vtd_setup(void)
   1.320  
   1.321      platform_quirks();
   1.322  
   1.323 -    spin_lock_init(&domid_bitmap_lock);
   1.324      clflush_size = get_cache_line_size();
   1.325  
   1.326      irq_to_iommu = xmalloc_array(struct iommu*, nr_irqs);
   1.327 @@ -1828,16 +1907,6 @@ int intel_vtd_setup(void)
   1.328      P(iommu_intremap, "Interrupt Remapping");
   1.329  #undef P
   1.330  
   1.331 -    /* Allocate domain id bitmap, and set bit 0 as reserved. */
   1.332 -    drhd = list_entry(acpi_drhd_units.next, typeof(*drhd), list);
   1.333 -    domid_bitmap_size = cap_ndoms(drhd->iommu->cap);
   1.334 -    domid_bitmap = xmalloc_array(unsigned long,
   1.335 -                                 BITS_TO_LONGS(domid_bitmap_size));
   1.336 -    if ( domid_bitmap == NULL )
   1.337 -        goto error;
   1.338 -    memset(domid_bitmap, 0, domid_bitmap_size / 8);
   1.339 -    __set_bit(0, domid_bitmap);
   1.340 -
   1.341      scan_pci_devices();
   1.342  
   1.343      if ( init_vtd_hw() )
     2.1 --- a/xen/include/xen/hvm/iommu.h	Tue Dec 08 07:49:54 2009 +0000
     2.2 +++ b/xen/include/xen/hvm/iommu.h	Tue Dec 08 07:51:30 2009 +0000
     2.3 @@ -34,7 +34,6 @@ struct hvm_iommu {
     2.4      spinlock_t mapping_lock;       /* io page table lock */
     2.5      int agaw;     /* adjusted guest address width, 0 is level 2 30-bit */
     2.6      struct list_head g2m_ioport_list;  /* guest to machine ioport mapping */
     2.7 -    domid_t iommu_domid;           /* domain id stored in iommu */
     2.8      u64 iommu_bitmap;              /* bitmap of iommu(s) that the domain uses */
     2.9  
    2.10      /* amd iommu support */
     3.1 --- a/xen/include/xen/iommu.h	Tue Dec 08 07:49:54 2009 +0000
     3.2 +++ b/xen/include/xen/iommu.h	Tue Dec 08 07:51:30 2009 +0000
     3.3 @@ -55,6 +55,8 @@ struct iommu {
     3.4      u64 root_maddr; /* root entry machine address */
     3.5      int irq;
     3.6      struct intel_iommu *intel;
     3.7 +    unsigned long *domid_bitmap;  /* domain id bitmap */
     3.8 +    u16 *domid_map;               /* domain id mapping array */
     3.9  };
    3.10  
    3.11  int iommu_setup(void);