debuggers.hg
changeset 18944:2941b1a97c60
Re-enable MSI support
Currently the MSI is disabled because of some lock issue. This patch
tries to clean up the locking related to MSI lock.
Signed-off-by: Jiang Yunhong <yunhong.jiang@intel.com>
Currently the MSI is disabled because of some lock issue. This patch
tries to clean up the locking related to MSI lock.
Signed-off-by: Jiang Yunhong <yunhong.jiang@intel.com>
author | Keir Fraser <keir.fraser@citrix.com> |
---|---|
date | Thu Dec 11 11:48:19 2008 +0000 (2008-12-11) |
parents | c15244125a69 |
children | b56d5fe594ae |
files | xen/arch/x86/domctl.c xen/arch/x86/irq.c xen/arch/x86/msi.c xen/arch/x86/physdev.c xen/arch/x86/x86_64/asm-offsets.c xen/common/domain.c xen/drivers/passthrough/amd/pci_amd_iommu.c xen/drivers/passthrough/iommu.c xen/drivers/passthrough/pci.c xen/drivers/passthrough/vtd/iommu.c xen/include/asm-x86/msi.h xen/include/xen/iommu.h xen/include/xen/pci.h |
line diff
1.1 --- a/xen/arch/x86/domctl.c Thu Dec 11 11:40:10 2008 +0000 1.2 +++ b/xen/arch/x86/domctl.c Thu Dec 11 11:48:19 2008 +0000 1.3 @@ -665,14 +665,6 @@ long arch_do_domctl( 1.4 } 1.5 1.6 ret = -EINVAL; 1.7 - if ( device_assigned(bus, devfn) ) 1.8 - { 1.9 - gdprintk(XENLOG_ERR, "XEN_DOMCTL_assign_device: " 1.10 - "%x:%x:%x already assigned, or non-existent\n", 1.11 - bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1.12 - put_domain(d); 1.13 - break; 1.14 - } 1.15 1.16 ret = assign_device(d, bus, devfn); 1.17 if ( ret ) 1.18 @@ -715,15 +707,8 @@ long arch_do_domctl( 1.19 put_domain(d); 1.20 break; 1.21 } 1.22 - 1.23 - if ( !device_assigned(bus, devfn) ) 1.24 - { 1.25 - put_domain(d); 1.26 - break; 1.27 - } 1.28 - 1.29 ret = 0; 1.30 - deassign_device(d, bus, devfn); 1.31 + ret = deassign_device(d, bus, devfn); 1.32 gdprintk(XENLOG_INFO, "XEN_DOMCTL_deassign_device: bdf = %x:%x:%x\n", 1.33 bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); 1.34
2.1 --- a/xen/arch/x86/irq.c Thu Dec 11 11:40:10 2008 +0000 2.2 +++ b/xen/arch/x86/irq.c Thu Dec 11 11:48:19 2008 +0000 2.3 @@ -847,12 +847,11 @@ int map_domain_pirq( 2.4 int old_vector, old_pirq; 2.5 irq_desc_t *desc; 2.6 unsigned long flags; 2.7 - 2.8 - ASSERT(spin_is_locked(&d->event_lock)); 2.9 + struct msi_desc *msi_desc; 2.10 + struct pci_dev *pdev = NULL; 2.11 2.12 - /* XXX Until pcidev and msi locking is fixed. */ 2.13 - if ( type == MAP_PIRQ_TYPE_MSI ) 2.14 - return -EINVAL; 2.15 + ASSERT(spin_is_locked(&pcidevs_lock)); 2.16 + ASSERT(spin_is_locked(&d->event_lock)); 2.17 2.18 if ( !IS_PRIV(current->domain) ) 2.19 return -EPERM; 2.20 @@ -884,25 +883,35 @@ int map_domain_pirq( 2.21 } 2.22 2.23 desc = &irq_desc[vector]; 2.24 - spin_lock_irqsave(&desc->lock, flags); 2.25 2.26 if ( type == MAP_PIRQ_TYPE_MSI ) 2.27 { 2.28 struct msi_info *msi = (struct msi_info *)data; 2.29 - if ( desc->handler != &no_irq_type ) 2.30 - dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n", 2.31 - d->domain_id, vector); 2.32 - desc->handler = &pci_msi_type; 2.33 - ret = pci_enable_msi(msi); 2.34 + 2.35 + pdev = pci_get_pdev(msi->bus, msi->devfn); 2.36 + ret = pci_enable_msi(msi, &msi_desc); 2.37 if ( ret ) 2.38 goto done; 2.39 + 2.40 + spin_lock_irqsave(&desc->lock, flags); 2.41 + 2.42 + if ( desc->handler != &no_irq_type ) 2.43 + dprintk(XENLOG_G_ERR, "dom%d: vector %d in use\n", 2.44 + d->domain_id, vector); 2.45 + desc->handler = &pci_msi_type; 2.46 + d->arch.pirq_vector[pirq] = vector; 2.47 + d->arch.vector_pirq[vector] = pirq; 2.48 + setup_msi_irq(pdev, msi_desc); 2.49 + spin_unlock_irqrestore(&desc->lock, flags); 2.50 + } else 2.51 + { 2.52 + spin_lock_irqsave(&desc->lock, flags); 2.53 + d->arch.pirq_vector[pirq] = vector; 2.54 + d->arch.vector_pirq[vector] = pirq; 2.55 + spin_unlock_irqrestore(&desc->lock, flags); 2.56 } 2.57 2.58 - d->arch.pirq_vector[pirq] = vector; 2.59 - d->arch.vector_pirq[vector] = pirq; 2.60 - 2.61 done: 2.62 - spin_unlock_irqrestore(&desc->lock, flags); 2.63 return ret; 2.64 } 2.65 2.66 @@ -913,6 +922,7 @@ int unmap_domain_pirq(struct domain *d, 2.67 irq_desc_t *desc; 2.68 int vector, ret = 0; 2.69 bool_t forced_unbind; 2.70 + struct msi_desc *msi_desc = NULL; 2.71 2.72 if ( (pirq < 0) || (pirq >= NR_IRQS) ) 2.73 return -EINVAL; 2.74 @@ -920,6 +930,7 @@ int unmap_domain_pirq(struct domain *d, 2.75 if ( !IS_PRIV(current->domain) ) 2.76 return -EINVAL; 2.77 2.78 + ASSERT(spin_is_locked(&pcidevs_lock)); 2.79 ASSERT(spin_is_locked(&d->event_lock)); 2.80 2.81 vector = d->arch.pirq_vector[pirq]; 2.82 @@ -937,18 +948,19 @@ int unmap_domain_pirq(struct domain *d, 2.83 d->domain_id, pirq); 2.84 2.85 desc = &irq_desc[vector]; 2.86 + 2.87 + if ( (msi_desc = desc->msi_desc) != NULL ) 2.88 + pci_disable_msi(msi_desc); 2.89 + 2.90 spin_lock_irqsave(&desc->lock, flags); 2.91 2.92 BUG_ON(vector != d->arch.pirq_vector[pirq]); 2.93 2.94 - if ( desc->msi_desc ) 2.95 - pci_disable_msi(vector); 2.96 + if ( msi_desc ) 2.97 + teardown_msi_vector(vector); 2.98 2.99 if ( desc->handler == &pci_msi_type ) 2.100 - { 2.101 desc->handler = &no_irq_type; 2.102 - free_irq_vector(vector); 2.103 - } 2.104 2.105 if ( !forced_unbind ) 2.106 { 2.107 @@ -962,6 +974,11 @@ int unmap_domain_pirq(struct domain *d, 2.108 } 2.109 2.110 spin_unlock_irqrestore(&desc->lock, flags); 2.111 + if (msi_desc) 2.112 + { 2.113 + msi_free_vector(msi_desc); 2.114 + free_irq_vector(vector); 2.115 + } 2.116 2.117 ret = irq_deny_access(d, pirq); 2.118 if ( ret ) 2.119 @@ -976,6 +993,7 @@ void free_domain_pirqs(struct domain *d) 2.120 { 2.121 int i; 2.122 2.123 + read_lock(&pcidevs_lock); 2.124 spin_lock(&d->event_lock); 2.125 2.126 for ( i = 0; i < NR_IRQS; i++ ) 2.127 @@ -983,6 +1001,7 @@ void free_domain_pirqs(struct domain *d) 2.128 unmap_domain_pirq(d, i); 2.129 2.130 spin_unlock(&d->event_lock); 2.131 + read_unlock(&pcidevs_lock); 2.132 } 2.133 2.134 extern void dump_ioapic_irq_info(void);
3.1 --- a/xen/arch/x86/msi.c Thu Dec 11 11:40:10 2008 +0000 3.2 +++ b/xen/arch/x86/msi.c Thu Dec 11 11:48:19 2008 +0000 3.3 @@ -153,6 +153,8 @@ static int set_vector_msi(struct msi_des 3.4 3.5 static int unset_vector_msi(int vector) 3.6 { 3.7 + ASSERT(spin_is_locked(&irq_desc[vector].lock)); 3.8 + 3.9 if ( vector >= NR_VECTORS ) 3.10 { 3.11 dprintk(XENLOG_ERR, "Trying to uninstall msi data for Vector %d\n", 3.12 @@ -161,6 +163,7 @@ static int unset_vector_msi(int vector) 3.13 } 3.14 3.15 irq_desc[vector].msi_desc = NULL; 3.16 + 3.17 return 0; 3.18 } 3.19 3.20 @@ -228,14 +231,12 @@ void set_msi_affinity(unsigned int vecto 3.21 return; 3.22 3.23 ASSERT(spin_is_locked(&irq_desc[vector].lock)); 3.24 - spin_lock(&desc->dev->lock); 3.25 read_msi_msg(desc, &msg); 3.26 3.27 msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; 3.28 msg.address_lo |= MSI_ADDR_DEST_ID(dest); 3.29 3.30 write_msi_msg(desc, &msg); 3.31 - spin_unlock(&desc->dev->lock); 3.32 } 3.33 3.34 static void msi_set_enable(struct pci_dev *dev, int enable) 3.35 @@ -369,7 +370,7 @@ static struct msi_desc* alloc_msi_entry( 3.36 return entry; 3.37 } 3.38 3.39 -static int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 3.40 +int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc) 3.41 { 3.42 struct msi_msg msg; 3.43 3.44 @@ -380,19 +381,13 @@ static int setup_msi_irq(struct pci_dev 3.45 return 0; 3.46 } 3.47 3.48 -static void teardown_msi_vector(int vector) 3.49 +void teardown_msi_vector(int vector) 3.50 { 3.51 unset_vector_msi(vector); 3.52 } 3.53 3.54 -static void msi_free_vector(int vector) 3.55 +int msi_free_vector(struct msi_desc *entry) 3.56 { 3.57 - struct msi_desc *entry; 3.58 - 3.59 - ASSERT(spin_is_locked(&irq_desc[vector].lock)); 3.60 - entry = irq_desc[vector].msi_desc; 3.61 - teardown_msi_vector(vector); 3.62 - 3.63 if ( entry->msi_attrib.type == PCI_CAP_ID_MSIX ) 3.64 { 3.65 unsigned long start; 3.66 @@ -407,6 +402,7 @@ static void msi_free_vector(int vector) 3.67 } 3.68 list_del(&entry->list); 3.69 xfree(entry); 3.70 + return 0; 3.71 } 3.72 3.73 static struct msi_desc *find_msi_entry(struct pci_dev *dev, 3.74 @@ -433,15 +429,18 @@ static struct msi_desc *find_msi_entry(s 3.75 * multiple messages. A return of zero indicates the successful setup 3.76 * of an entry zero with the new MSI irq or non-zero for otherwise. 3.77 **/ 3.78 -static int msi_capability_init(struct pci_dev *dev, int vector) 3.79 +static int msi_capability_init(struct pci_dev *dev, 3.80 + int vector, 3.81 + struct msi_desc **desc) 3.82 { 3.83 struct msi_desc *entry; 3.84 - int pos, ret; 3.85 + int pos; 3.86 u16 control; 3.87 u8 bus = dev->bus; 3.88 u8 slot = PCI_SLOT(dev->devfn); 3.89 u8 func = PCI_FUNC(dev->devfn); 3.90 3.91 + ASSERT(spin_is_locked(&pcidevs_lock)); 3.92 pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSI); 3.93 control = pci_conf_read16(bus, slot, func, msi_control_reg(pos)); 3.94 /* MSI Entry Initialization */ 3.95 @@ -477,14 +476,7 @@ static int msi_capability_init(struct pc 3.96 } 3.97 list_add_tail(&entry->list, &dev->msi_list); 3.98 3.99 - /* Configure MSI capability structure */ 3.100 - ret = setup_msi_irq(dev, entry); 3.101 - if ( ret ) 3.102 - { 3.103 - msi_free_vector(vector); 3.104 - return ret; 3.105 - } 3.106 - 3.107 + *desc = entry; 3.108 /* Restore the original MSI enabled bits */ 3.109 pci_conf_write16(bus, slot, func, msi_control_reg(pos), control); 3.110 3.111 @@ -501,7 +493,9 @@ static int msi_capability_init(struct pc 3.112 * single MSI-X irq. A return of zero indicates the successful setup of 3.113 * requested MSI-X entries with allocated irqs or non-zero for otherwise. 3.114 **/ 3.115 -static int msix_capability_init(struct pci_dev *dev, struct msi_info *msi) 3.116 +static int msix_capability_init(struct pci_dev *dev, 3.117 + struct msi_info *msi, 3.118 + struct msi_desc **desc) 3.119 { 3.120 struct msi_desc *entry; 3.121 int pos; 3.122 @@ -515,6 +509,9 @@ static int msix_capability_init(struct p 3.123 u8 slot = PCI_SLOT(dev->devfn); 3.124 u8 func = PCI_FUNC(dev->devfn); 3.125 3.126 + ASSERT(spin_is_locked(&pcidevs_lock)); 3.127 + ASSERT(desc); 3.128 + 3.129 pos = pci_find_cap_offset(bus, slot, func, PCI_CAP_ID_MSIX); 3.130 control = pci_conf_read16(bus, slot, func, msix_control_reg(pos)); 3.131 msix_set_enable(dev, 0);/* Ensure msix is disabled as I set it up */ 3.132 @@ -550,9 +547,13 @@ static int msix_capability_init(struct p 3.133 3.134 list_add_tail(&entry->list, &dev->msi_list); 3.135 3.136 - setup_msi_irq(dev, entry); 3.137 + /* Mask interrupt here */ 3.138 + writel(1, entry->mask_base + entry->msi_attrib.entry_nr 3.139 + * PCI_MSIX_ENTRY_SIZE 3.140 + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); 3.141 3.142 - /* Set MSI-X enabled bits */ 3.143 + *desc = entry; 3.144 + /* Restore MSI-X enabled bits */ 3.145 pci_conf_write16(bus, slot, func, msix_control_reg(pos), control); 3.146 3.147 return 0; 3.148 @@ -568,45 +569,35 @@ static int msix_capability_init(struct p 3.149 * indicates the successful setup of an entry zero with the new MSI 3.150 * irq or non-zero for otherwise. 3.151 **/ 3.152 -static int __pci_enable_msi(struct msi_info *msi) 3.153 +static int __pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) 3.154 { 3.155 int status; 3.156 struct pci_dev *pdev; 3.157 3.158 - pdev = pci_lock_pdev(msi->bus, msi->devfn); 3.159 + ASSERT(spin_is_locked(&pcidevs_lock)); 3.160 + pdev = pci_get_pdev(msi->bus, msi->devfn); 3.161 if ( !pdev ) 3.162 return -ENODEV; 3.163 3.164 if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSI) ) 3.165 { 3.166 - spin_unlock(&pdev->lock); 3.167 dprintk(XENLOG_WARNING, "vector %d has already mapped to MSI on " 3.168 "device %02x:%02x.%01x.\n", msi->vector, msi->bus, 3.169 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); 3.170 return 0; 3.171 } 3.172 3.173 - status = msi_capability_init(pdev, msi->vector); 3.174 - spin_unlock(&pdev->lock); 3.175 + status = msi_capability_init(pdev, msi->vector, desc); 3.176 return status; 3.177 } 3.178 3.179 -static void __pci_disable_msi(int vector) 3.180 +static void __pci_disable_msi(struct msi_desc *entry) 3.181 { 3.182 - struct msi_desc *entry; 3.183 struct pci_dev *dev; 3.184 int pos; 3.185 u16 control; 3.186 u8 bus, slot, func; 3.187 3.188 - entry = irq_desc[vector].msi_desc; 3.189 - if ( !entry ) 3.190 - return; 3.191 - /* 3.192 - * Lock here is safe. msi_desc can not be removed without holding 3.193 - * both irq_desc[].lock (which we do) and pdev->lock. 3.194 - */ 3.195 - spin_lock(&entry->dev->lock); 3.196 dev = entry->dev; 3.197 bus = dev->bus; 3.198 slot = PCI_SLOT(dev->devfn); 3.199 @@ -618,10 +609,6 @@ static void __pci_disable_msi(int vector 3.200 3.201 BUG_ON(list_empty(&dev->msi_list)); 3.202 3.203 - msi_free_vector(vector); 3.204 - 3.205 - pci_conf_write16(bus, slot, func, msi_control_reg(pos), control); 3.206 - spin_unlock(&dev->lock); 3.207 } 3.208 3.209 /** 3.210 @@ -639,7 +626,7 @@ static void __pci_disable_msi(int vector 3.211 * of irqs available. Driver should use the returned value to re-send 3.212 * its request. 3.213 **/ 3.214 -static int __pci_enable_msix(struct msi_info *msi) 3.215 +static int __pci_enable_msix(struct msi_info *msi, struct msi_desc **desc) 3.216 { 3.217 int status, pos, nr_entries; 3.218 struct pci_dev *pdev; 3.219 @@ -647,7 +634,8 @@ static int __pci_enable_msix(struct msi_ 3.220 u8 slot = PCI_SLOT(msi->devfn); 3.221 u8 func = PCI_FUNC(msi->devfn); 3.222 3.223 - pdev = pci_lock_pdev(msi->bus, msi->devfn); 3.224 + ASSERT(spin_is_locked(&pcidevs_lock)); 3.225 + pdev = pci_get_pdev(msi->bus, msi->devfn); 3.226 if ( !pdev ) 3.227 return -ENODEV; 3.228 3.229 @@ -655,41 +643,27 @@ static int __pci_enable_msix(struct msi_ 3.230 control = pci_conf_read16(msi->bus, slot, func, msi_control_reg(pos)); 3.231 nr_entries = multi_msix_capable(control); 3.232 if (msi->entry_nr >= nr_entries) 3.233 - { 3.234 - spin_unlock(&pdev->lock); 3.235 return -EINVAL; 3.236 - } 3.237 3.238 if ( find_msi_entry(pdev, msi->vector, PCI_CAP_ID_MSIX) ) 3.239 { 3.240 - spin_unlock(&pdev->lock); 3.241 dprintk(XENLOG_WARNING, "vector %d has already mapped to MSIX on " 3.242 "device %02x:%02x.%01x.\n", msi->vector, msi->bus, 3.243 PCI_SLOT(msi->devfn), PCI_FUNC(msi->devfn)); 3.244 return 0; 3.245 } 3.246 3.247 - status = msix_capability_init(pdev, msi); 3.248 - spin_unlock(&pdev->lock); 3.249 + status = msix_capability_init(pdev, msi, desc); 3.250 return status; 3.251 } 3.252 3.253 -static void __pci_disable_msix(int vector) 3.254 +static void __pci_disable_msix(struct msi_desc *entry) 3.255 { 3.256 - struct msi_desc *entry; 3.257 struct pci_dev *dev; 3.258 int pos; 3.259 u16 control; 3.260 u8 bus, slot, func; 3.261 3.262 - entry = irq_desc[vector].msi_desc; 3.263 - if ( !entry ) 3.264 - return; 3.265 - /* 3.266 - * Lock here is safe. msi_desc can not be removed without holding 3.267 - * both irq_desc[].lock (which we do) and pdev->lock. 3.268 - */ 3.269 - spin_lock(&entry->dev->lock); 3.270 dev = entry->dev; 3.271 bus = dev->bus; 3.272 slot = PCI_SLOT(dev->devfn); 3.273 @@ -701,50 +675,51 @@ static void __pci_disable_msix(int vecto 3.274 3.275 BUG_ON(list_empty(&dev->msi_list)); 3.276 3.277 - msi_free_vector(vector); 3.278 + writel(1, entry->mask_base + entry->msi_attrib.entry_nr 3.279 + * PCI_MSIX_ENTRY_SIZE 3.280 + + PCI_MSIX_ENTRY_VECTOR_CTRL_OFFSET); 3.281 3.282 pci_conf_write16(bus, slot, func, msix_control_reg(pos), control); 3.283 - spin_unlock(&dev->lock); 3.284 } 3.285 3.286 -int pci_enable_msi(struct msi_info *msi) 3.287 +/* 3.288 + * Notice: only construct the msi_desc 3.289 + * no change to irq_desc here, and the interrupt is masked 3.290 + */ 3.291 +int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc) 3.292 { 3.293 - ASSERT(spin_is_locked(&irq_desc[msi->vector].lock)); 3.294 + ASSERT(spin_is_locked(&pcidevs_lock)); 3.295 3.296 - return msi->table_base ? __pci_enable_msix(msi) : 3.297 - __pci_enable_msi(msi); 3.298 + return msi->table_base ? __pci_enable_msix(msi, desc) : 3.299 + __pci_enable_msi(msi, desc); 3.300 } 3.301 3.302 -void pci_disable_msi(int vector) 3.303 +/* 3.304 + * Device only, no irq_desc 3.305 + */ 3.306 +void pci_disable_msi(struct msi_desc *msi_desc) 3.307 { 3.308 - irq_desc_t *desc = &irq_desc[vector]; 3.309 - ASSERT(spin_is_locked(&desc->lock)); 3.310 - if ( !desc->msi_desc ) 3.311 - return; 3.312 - 3.313 - if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) 3.314 - __pci_disable_msi(vector); 3.315 - else if ( desc->msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX ) 3.316 - __pci_disable_msix(vector); 3.317 + if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSI ) 3.318 + __pci_disable_msi(msi_desc); 3.319 + else if ( msi_desc->msi_attrib.type == PCI_CAP_ID_MSIX ) 3.320 + __pci_disable_msix(msi_desc); 3.321 } 3.322 3.323 static void msi_free_vectors(struct pci_dev* dev) 3.324 { 3.325 struct msi_desc *entry, *tmp; 3.326 irq_desc_t *desc; 3.327 - unsigned long flags; 3.328 + unsigned long flags, vector; 3.329 3.330 - retry: 3.331 list_for_each_entry_safe( entry, tmp, &dev->msi_list, list ) 3.332 { 3.333 - desc = &irq_desc[entry->vector]; 3.334 + vector = entry->vector; 3.335 + desc = &irq_desc[vector]; 3.336 + pci_disable_msi(entry); 3.337 3.338 - local_irq_save(flags); 3.339 - if ( !spin_trylock(&desc->lock) ) 3.340 - { 3.341 - local_irq_restore(flags); 3.342 - goto retry; 3.343 - } 3.344 + spin_lock_irqsave(&desc->lock, flags); 3.345 + 3.346 + teardown_msi_vector(vector); 3.347 3.348 if ( desc->handler == &pci_msi_type ) 3.349 { 3.350 @@ -753,8 +728,8 @@ static void msi_free_vectors(struct pci_ 3.351 desc->handler = &no_irq_type; 3.352 } 3.353 3.354 - msi_free_vector(entry->vector); 3.355 spin_unlock_irqrestore(&desc->lock, flags); 3.356 + msi_free_vector(entry); 3.357 } 3.358 } 3.359
4.1 --- a/xen/arch/x86/physdev.c Thu Dec 11 11:40:10 2008 +0000 4.2 +++ b/xen/arch/x86/physdev.c Thu Dec 11 11:48:19 2008 +0000 4.3 @@ -100,6 +100,7 @@ static int physdev_map_pirq(struct physd 4.4 goto free_domain; 4.5 } 4.6 4.7 + read_lock(&pcidevs_lock); 4.8 /* Verify or get pirq. */ 4.9 spin_lock(&d->event_lock); 4.10 if ( map->pirq < 0 ) 4.11 @@ -147,6 +148,7 @@ static int physdev_map_pirq(struct physd 4.12 4.13 done: 4.14 spin_unlock(&d->event_lock); 4.15 + read_unlock(&pcidevs_lock); 4.16 if ( (ret != 0) && (map->type == MAP_PIRQ_TYPE_MSI) && (map->index == -1) ) 4.17 free_irq_vector(vector); 4.18 free_domain: 4.19 @@ -170,9 +172,11 @@ static int physdev_unmap_pirq(struct phy 4.20 if ( d == NULL ) 4.21 return -ESRCH; 4.22 4.23 + read_lock(&pcidevs_lock); 4.24 spin_lock(&d->event_lock); 4.25 ret = unmap_domain_pirq(d, unmap->pirq); 4.26 spin_unlock(&d->event_lock); 4.27 + read_unlock(&pcidevs_lock); 4.28 4.29 rcu_unlock_domain(d); 4.30 4.31 @@ -341,10 +345,12 @@ ret_t do_physdev_op(int cmd, XEN_GUEST_H 4.32 4.33 irq_op.vector = assign_irq_vector(irq); 4.34 4.35 + read_lock(&pcidevs_lock); 4.36 spin_lock(&dom0->event_lock); 4.37 ret = map_domain_pirq(dom0, irq_op.irq, irq_op.vector, 4.38 MAP_PIRQ_TYPE_GSI, NULL); 4.39 spin_unlock(&dom0->event_lock); 4.40 + read_unlock(&pcidevs_lock); 4.41 4.42 if ( copy_to_guest(arg, &irq_op, 1) != 0 ) 4.43 ret = -EFAULT;
5.1 --- a/xen/arch/x86/x86_64/asm-offsets.c Thu Dec 11 11:40:10 2008 +0000 5.2 +++ b/xen/arch/x86/x86_64/asm-offsets.c Thu Dec 11 11:48:19 2008 +0000 5.3 @@ -60,6 +60,8 @@ void __dummy__(void) 5.4 DEFINE(UREGS_user_sizeof, sizeof(struct cpu_user_regs)); 5.5 BLANK(); 5.6 5.7 + OFFSET(irq_caps_offset, struct domain, irq_caps); 5.8 + OFFSET(next_in_list_offset, struct domain, next_in_list); 5.9 OFFSET(VCPU_processor, struct vcpu, processor); 5.10 OFFSET(VCPU_domain, struct vcpu, domain); 5.11 OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info);
6.1 --- a/xen/common/domain.c Thu Dec 11 11:40:10 2008 +0000 6.2 +++ b/xen/common/domain.c Thu Dec 11 11:48:19 2008 +0000 6.3 @@ -558,12 +558,12 @@ static void complete_domain_destroy(stru 6.4 sched_destroy_vcpu(v); 6.5 } 6.6 6.7 - rangeset_domain_destroy(d); 6.8 - 6.9 grant_table_destroy(d); 6.10 6.11 arch_domain_destroy(d); 6.12 6.13 + rangeset_domain_destroy(d); 6.14 + 6.15 sched_destroy_domain(d); 6.16 6.17 /* Free page used by xen oprofile buffer. */
7.1 --- a/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Dec 11 11:40:10 2008 +0000 7.2 +++ b/xen/drivers/passthrough/amd/pci_amd_iommu.c Thu Dec 11 11:48:19 2008 +0000 7.3 @@ -282,9 +282,13 @@ static int reassign_device( struct domai 7.4 struct amd_iommu *iommu; 7.5 int bdf; 7.6 7.7 - pdev = pci_lock_domain_pdev(source, bus, devfn); 7.8 + read_lock(&pcidevs_lock); 7.9 + pdev = pci_get_pdev_by_domain(source, bus, devfn); 7.10 if ( !pdev ) 7.11 - return -ENODEV; 7.12 + { 7.13 + read_unlock(&pcidevs_lock); 7.14 + return -ENODEV; 7.15 + } 7.16 7.17 bdf = (bus << 8) | devfn; 7.18 /* supported device? */ 7.19 @@ -293,8 +297,8 @@ static int reassign_device( struct domai 7.20 7.21 if ( !iommu ) 7.22 { 7.23 - spin_unlock(&pdev->lock); 7.24 - amd_iov_error("Fail to find iommu." 7.25 + read_unlock(&pcidevs_lock); 7.26 + amd_iov_error("Fail to find iommu." 7.27 " %x:%x.%x cannot be assigned to domain %d\n", 7.28 bus, PCI_SLOT(devfn), PCI_FUNC(devfn), target->domain_id); 7.29 return -ENODEV; 7.30 @@ -302,9 +306,7 @@ static int reassign_device( struct domai 7.31 7.32 amd_iommu_disable_domain_device(source, iommu, bdf); 7.33 7.34 - write_lock(&pcidevs_lock); 7.35 list_move(&pdev->domain_list, &target->arch.pdev_list); 7.36 - write_unlock(&pcidevs_lock); 7.37 pdev->domain = target; 7.38 7.39 amd_iommu_setup_domain_device(target, iommu, bdf); 7.40 @@ -312,7 +314,7 @@ static int reassign_device( struct domai 7.41 bus, PCI_SLOT(devfn), PCI_FUNC(devfn), 7.42 source->domain_id, target->domain_id); 7.43 7.44 - spin_unlock(&pdev->lock); 7.45 + read_unlock(&pcidevs_lock); 7.46 return 0; 7.47 } 7.48
8.1 --- a/xen/drivers/passthrough/iommu.c Thu Dec 11 11:40:10 2008 +0000 8.2 +++ b/xen/drivers/passthrough/iommu.c Thu Dec 11 11:48:19 2008 +0000 8.3 @@ -83,9 +83,12 @@ int iommu_domain_init(struct domain *dom 8.4 int iommu_add_device(struct pci_dev *pdev) 8.5 { 8.6 struct hvm_iommu *hd; 8.7 + 8.8 if ( !pdev->domain ) 8.9 return -EINVAL; 8.10 8.11 + ASSERT(spin_is_locked(&pcidevs_lock)); 8.12 + 8.13 hd = domain_hvm_iommu(pdev->domain); 8.14 if ( !iommu_enabled || !hd->platform_ops ) 8.15 return 0; 8.16 @@ -109,20 +112,24 @@ int iommu_remove_device(struct pci_dev * 8.17 int assign_device(struct domain *d, u8 bus, u8 devfn) 8.18 { 8.19 struct hvm_iommu *hd = domain_hvm_iommu(d); 8.20 - int rc; 8.21 + int rc = 0; 8.22 8.23 if ( !iommu_enabled || !hd->platform_ops ) 8.24 return 0; 8.25 8.26 + read_lock(&pcidevs_lock); 8.27 if ( (rc = hd->platform_ops->assign_device(d, bus, devfn)) ) 8.28 - return rc; 8.29 + goto done; 8.30 8.31 if ( has_arch_pdevs(d) && !is_hvm_domain(d) && !need_iommu(d) ) 8.32 { 8.33 d->need_iommu = 1; 8.34 - return iommu_populate_page_table(d); 8.35 + rc = iommu_populate_page_table(d); 8.36 + goto done; 8.37 } 8.38 - return 0; 8.39 +done: 8.40 + read_unlock(&pcidevs_lock); 8.41 + return rc; 8.42 } 8.43 8.44 static int iommu_populate_page_table(struct domain *d) 8.45 @@ -204,12 +211,29 @@ int iommu_unmap_page(struct domain *d, u 8.46 return hd->platform_ops->unmap_page(d, gfn); 8.47 } 8.48 8.49 -void deassign_device(struct domain *d, u8 bus, u8 devfn) 8.50 +int deassign_device(struct domain *d, u8 bus, u8 devfn) 8.51 { 8.52 struct hvm_iommu *hd = domain_hvm_iommu(d); 8.53 + struct pci_dev *pdev = NULL; 8.54 8.55 if ( !iommu_enabled || !hd->platform_ops ) 8.56 - return; 8.57 + return -EINVAL; 8.58 + 8.59 + read_lock(&pcidevs_lock); 8.60 + pdev = pci_get_pdev(bus, devfn); 8.61 + if (!pdev) 8.62 + { 8.63 + read_unlock(&pcidevs_lock); 8.64 + return -ENODEV; 8.65 + } 8.66 + 8.67 + if (pdev->domain != d) 8.68 + { 8.69 + read_unlock(&pcidevs_lock); 8.70 + gdprintk(XENLOG_ERR VTDPREFIX, 8.71 + "IOMMU: deassign a device not owned\n"); 8.72 + return -EINVAL; 8.73 + } 8.74 8.75 hd->platform_ops->reassign_device(d, dom0, bus, devfn); 8.76 8.77 @@ -218,6 +242,10 @@ void deassign_device(struct domain *d, u 8.78 d->need_iommu = 0; 8.79 hd->platform_ops->teardown(d); 8.80 } 8.81 + 8.82 + read_unlock(&pcidevs_lock); 8.83 + 8.84 + return 0; 8.85 } 8.86 8.87 static int iommu_setup(void)
9.1 --- a/xen/drivers/passthrough/pci.c Thu Dec 11 11:40:10 2008 +0000 9.2 +++ b/xen/drivers/passthrough/pci.c Thu Dec 11 11:48:19 2008 +0000 9.3 @@ -41,11 +41,11 @@ struct pci_dev *alloc_pdev(u8 bus, u8 de 9.4 pdev = xmalloc(struct pci_dev); 9.5 if ( !pdev ) 9.6 return NULL; 9.7 + memset(pdev, 0, sizeof(struct pci_dev)); 9.8 9.9 *((u8*) &pdev->bus) = bus; 9.10 *((u8*) &pdev->devfn) = devfn; 9.11 pdev->domain = NULL; 9.12 - spin_lock_init(&pdev->lock); 9.13 INIT_LIST_HEAD(&pdev->msi_list); 9.14 list_add(&pdev->alldevs_list, &alldevs_list); 9.15 9.16 @@ -58,42 +58,35 @@ void free_pdev(struct pci_dev *pdev) 9.17 xfree(pdev); 9.18 } 9.19 9.20 -struct pci_dev *pci_lock_pdev(int bus, int devfn) 9.21 +struct pci_dev *pci_get_pdev(int bus, int devfn) 9.22 { 9.23 - struct pci_dev *pdev; 9.24 + struct pci_dev *pdev = NULL; 9.25 9.26 - read_lock(&pcidevs_lock); 9.27 + ASSERT(spin_is_locked(&pcidevs_lock)); 9.28 + 9.29 list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) 9.30 if ( (pdev->bus == bus || bus == -1) && 9.31 (pdev->devfn == devfn || devfn == -1) ) 9.32 - { 9.33 - spin_lock(&pdev->lock); 9.34 - read_unlock(&pcidevs_lock); 9.35 - return pdev; 9.36 - } 9.37 - read_unlock(&pcidevs_lock); 9.38 + { 9.39 + return pdev; 9.40 + } 9.41 9.42 return NULL; 9.43 } 9.44 9.45 -struct pci_dev *pci_lock_domain_pdev(struct domain *d, int bus, int devfn) 9.46 +struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn) 9.47 { 9.48 - struct pci_dev *pdev; 9.49 + struct pci_dev *pdev = NULL; 9.50 + 9.51 + ASSERT(spin_is_locked(&pcidevs_lock)); 9.52 9.53 - read_lock(&pcidevs_lock); 9.54 - list_for_each_entry ( pdev, &d->arch.pdev_list, domain_list ) 9.55 - { 9.56 - spin_lock(&pdev->lock); 9.57 - if ( (pdev->bus == bus || bus == -1) && 9.58 - (pdev->devfn == devfn || devfn == -1) && 9.59 - (pdev->domain == d) ) 9.60 - { 9.61 - read_unlock(&pcidevs_lock); 9.62 - return pdev; 9.63 - } 9.64 - spin_unlock(&pdev->lock); 9.65 - } 9.66 - read_unlock(&pcidevs_lock); 9.67 + list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) 9.68 + if ( (pdev->bus == bus || bus == -1) && 9.69 + (pdev->devfn == devfn || devfn == -1) && 9.70 + (pdev->domain == d) ) 9.71 + { 9.72 + return pdev; 9.73 + } 9.74 9.75 return NULL; 9.76 } 9.77 @@ -109,24 +102,20 @@ int pci_add_device(u8 bus, u8 devfn) 9.78 goto out; 9.79 9.80 ret = 0; 9.81 - spin_lock(&pdev->lock); 9.82 if ( !pdev->domain ) 9.83 { 9.84 pdev->domain = dom0; 9.85 ret = iommu_add_device(pdev); 9.86 if ( ret ) 9.87 - { 9.88 - spin_unlock(&pdev->lock); 9.89 goto out; 9.90 - } 9.91 + 9.92 list_add(&pdev->domain_list, &dom0->arch.pdev_list); 9.93 } 9.94 - spin_unlock(&pdev->lock); 9.95 - printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus, 9.96 - PCI_SLOT(devfn), PCI_FUNC(devfn)); 9.97 9.98 out: 9.99 write_unlock(&pcidevs_lock); 9.100 + printk(XENLOG_DEBUG "PCI add device %02x:%02x.%x\n", bus, 9.101 + PCI_SLOT(devfn), PCI_FUNC(devfn)); 9.102 return ret; 9.103 } 9.104 9.105 @@ -139,7 +128,6 @@ int pci_remove_device(u8 bus, u8 devfn) 9.106 list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) 9.107 if ( pdev->bus == bus && pdev->devfn == devfn ) 9.108 { 9.109 - spin_lock(&pdev->lock); 9.110 ret = iommu_remove_device(pdev); 9.111 if ( pdev->domain ) 9.112 list_del(&pdev->domain_list); 9.113 @@ -199,14 +187,15 @@ void pci_release_devices(struct domain * 9.114 struct pci_dev *pdev; 9.115 u8 bus, devfn; 9.116 9.117 + read_lock(&pcidevs_lock); 9.118 pci_clean_dpci_irqs(d); 9.119 - while ( (pdev = pci_lock_domain_pdev(d, -1, -1)) ) 9.120 + while ( (pdev = pci_get_pdev_by_domain(d, -1, -1)) ) 9.121 { 9.122 pci_cleanup_msi(pdev); 9.123 bus = pdev->bus; devfn = pdev->devfn; 9.124 - spin_unlock(&pdev->lock); 9.125 deassign_device(d, bus, devfn); 9.126 } 9.127 + read_unlock(&pcidevs_lock); 9.128 } 9.129 9.130 #ifdef SUPPORT_MSI_REMAPPING 9.131 @@ -220,14 +209,12 @@ static void dump_pci_devices(unsigned ch 9.132 9.133 list_for_each_entry ( pdev, &alldevs_list, alldevs_list ) 9.134 { 9.135 - spin_lock(&pdev->lock); 9.136 printk("%02x:%02x.%x - dom %-3d - MSIs < ", 9.137 pdev->bus, PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn), 9.138 pdev->domain ? pdev->domain->domain_id : -1); 9.139 list_for_each_entry ( msi, &pdev->msi_list, list ) 9.140 printk("%d ", msi->vector); 9.141 printk(">\n"); 9.142 - spin_unlock(&pdev->lock); 9.143 } 9.144 9.145 read_unlock(&pcidevs_lock);
10.1 --- a/xen/drivers/passthrough/vtd/iommu.c Thu Dec 11 11:40:10 2008 +0000 10.2 +++ b/xen/drivers/passthrough/vtd/iommu.c Thu Dec 11 11:48:19 2008 +0000 10.3 @@ -49,15 +49,14 @@ static void setup_dom0_rmrr(struct domai 10.4 static void context_set_domain_id(struct context_entry *context, 10.5 struct domain *d) 10.6 { 10.7 - unsigned long flags; 10.8 domid_t iommu_domid = domain_iommu_domid(d); 10.9 10.10 if ( iommu_domid == 0 ) 10.11 { 10.12 - spin_lock_irqsave(&domid_bitmap_lock, flags); 10.13 + spin_lock(&domid_bitmap_lock); 10.14 iommu_domid = find_first_zero_bit(domid_bitmap, domid_bitmap_size); 10.15 set_bit(iommu_domid, domid_bitmap); 10.16 - spin_unlock_irqrestore(&domid_bitmap_lock, flags); 10.17 + spin_unlock(&domid_bitmap_lock); 10.18 d->arch.hvm_domain.hvm_iommu.iommu_domid = iommu_domid; 10.19 } 10.20 10.21 @@ -140,10 +139,9 @@ int nr_iommus; 10.22 static u64 bus_to_context_maddr(struct iommu *iommu, u8 bus) 10.23 { 10.24 struct root_entry *root, *root_entries; 10.25 - unsigned long flags; 10.26 u64 maddr; 10.27 10.28 - spin_lock_irqsave(&iommu->lock, flags); 10.29 + ASSERT(spin_is_locked(&iommu->lock)); 10.30 root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr); 10.31 root = &root_entries[bus]; 10.32 if ( !root_present(*root) ) 10.33 @@ -152,7 +150,6 @@ static u64 bus_to_context_maddr(struct i 10.34 if ( maddr == 0 ) 10.35 { 10.36 unmap_vtd_domain_page(root_entries); 10.37 - spin_unlock_irqrestore(&iommu->lock, flags); 10.38 return 0; 10.39 } 10.40 set_root_value(*root, maddr); 10.41 @@ -161,36 +158,9 @@ static u64 bus_to_context_maddr(struct i 10.42 } 10.43 maddr = (u64) get_context_addr(*root); 10.44 unmap_vtd_domain_page(root_entries); 10.45 - spin_unlock_irqrestore(&iommu->lock, flags); 10.46 return maddr; 10.47 } 10.48 10.49 -static int device_context_mapped(struct iommu *iommu, u8 bus, u8 devfn) 10.50 -{ 10.51 - struct root_entry *root, *root_entries; 10.52 - struct context_entry *context; 10.53 - u64 context_maddr; 10.54 - int ret; 10.55 - unsigned long flags; 10.56 - 10.57 - spin_lock_irqsave(&iommu->lock, flags); 10.58 - root_entries = (struct root_entry *)map_vtd_domain_page(iommu->root_maddr); 10.59 - root = &root_entries[bus]; 10.60 - if ( !root_present(*root) ) 10.61 - { 10.62 - ret = 0; 10.63 - goto out; 10.64 - } 10.65 - context_maddr = get_context_addr(*root); 10.66 - context = (struct context_entry *)map_vtd_domain_page(context_maddr); 10.67 - ret = context_present(context[devfn]); 10.68 - unmap_vtd_domain_page(context); 10.69 - out: 10.70 - unmap_vtd_domain_page(root_entries); 10.71 - spin_unlock_irqrestore(&iommu->lock, flags); 10.72 - return ret; 10.73 -} 10.74 - 10.75 static u64 addr_to_dma_page_maddr(struct domain *domain, u64 addr, int alloc) 10.76 { 10.77 struct hvm_iommu *hd = domain_hvm_iommu(domain); 10.78 @@ -198,12 +168,11 @@ static u64 addr_to_dma_page_maddr(struct 10.79 struct dma_pte *parent, *pte = NULL; 10.80 int level = agaw_to_level(hd->agaw); 10.81 int offset; 10.82 - unsigned long flags; 10.83 u64 pte_maddr = 0, maddr; 10.84 u64 *vaddr = NULL; 10.85 10.86 addr &= (((u64)1) << addr_width) - 1; 10.87 - spin_lock_irqsave(&hd->mapping_lock, flags); 10.88 + ASSERT(spin_is_locked(&hd->mapping_lock)); 10.89 if ( hd->pgd_maddr == 0 ) 10.90 if ( !alloc || ((hd->pgd_maddr = alloc_pgtable_maddr(domain)) == 0) ) 10.91 goto out; 10.92 @@ -252,7 +221,6 @@ static u64 addr_to_dma_page_maddr(struct 10.93 10.94 unmap_vtd_domain_page(parent); 10.95 out: 10.96 - spin_unlock_irqrestore(&hd->mapping_lock, flags); 10.97 return pte_maddr; 10.98 } 10.99 10.100 @@ -536,22 +504,30 @@ static void dma_pte_clear_one(struct dom 10.101 struct dma_pte *page = NULL, *pte = NULL; 10.102 u64 pg_maddr; 10.103 10.104 + spin_lock(&hd->mapping_lock); 10.105 /* get last level pte */ 10.106 pg_maddr = addr_to_dma_page_maddr(domain, addr, 0); 10.107 if ( pg_maddr == 0 ) 10.108 + { 10.109 + spin_unlock(&hd->mapping_lock); 10.110 return; 10.111 + } 10.112 + 10.113 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); 10.114 pte = page + address_level_offset(addr, 1); 10.115 10.116 if ( !dma_pte_present(*pte) ) 10.117 { 10.118 + spin_unlock(&hd->mapping_lock); 10.119 unmap_vtd_domain_page(page); 10.120 return; 10.121 } 10.122 10.123 dma_clear_pte(*pte); 10.124 + spin_unlock(&hd->mapping_lock); 10.125 iommu_flush_cache_entry(pte); 10.126 10.127 + /* No need pcidevs_lock here since do that on assign/deassign device*/ 10.128 for_each_drhd_unit ( drhd ) 10.129 { 10.130 iommu = drhd->iommu; 10.131 @@ -598,16 +574,18 @@ static int iommu_set_root_entry(struct i 10.132 unsigned long flags; 10.133 s_time_t start_time; 10.134 10.135 - spin_lock_irqsave(&iommu->register_lock, flags); 10.136 + spin_lock(&iommu->lock); 10.137 10.138 if ( iommu->root_maddr == 0 ) 10.139 iommu->root_maddr = alloc_pgtable_maddr(NULL); 10.140 if ( iommu->root_maddr == 0 ) 10.141 { 10.142 - spin_unlock_irqrestore(&iommu->register_lock, flags); 10.143 + spin_unlock(&iommu->lock); 10.144 return -ENOMEM; 10.145 } 10.146 10.147 + spin_unlock(&iommu->lock); 10.148 + spin_lock_irqsave(&iommu->register_lock, flags); 10.149 dmar_writeq(iommu->reg, DMAR_RTADDR_REG, iommu->root_maddr); 10.150 cmd = iommu->gcmd | DMA_GCMD_SRTP; 10.151 dmar_writel(iommu->reg, DMAR_GCMD_REG, cmd); 10.152 @@ -742,9 +720,7 @@ static void iommu_page_fault(int vector, 10.153 dprintk(XENLOG_WARNING VTDPREFIX, 10.154 "iommu_page_fault: iommu->reg = %p\n", iommu->reg); 10.155 10.156 - spin_lock_irqsave(&iommu->register_lock, flags); 10.157 fault_status = dmar_readl(iommu->reg, DMAR_FSTS_REG); 10.158 - spin_unlock_irqrestore(&iommu->register_lock, flags); 10.159 10.160 iommu_fault_status(fault_status); 10.161 10.162 @@ -1057,21 +1033,30 @@ static int domain_context_mapping_one( 10.163 { 10.164 struct hvm_iommu *hd = domain_hvm_iommu(domain); 10.165 struct context_entry *context, *context_entries; 10.166 - unsigned long flags; 10.167 u64 maddr, pgd_maddr; 10.168 + struct pci_dev *pdev = NULL; 10.169 int agaw; 10.170 10.171 + ASSERT(spin_is_locked(&pcidevs_lock)); 10.172 + spin_lock(&iommu->lock); 10.173 maddr = bus_to_context_maddr(iommu, bus); 10.174 context_entries = (struct context_entry *)map_vtd_domain_page(maddr); 10.175 context = &context_entries[devfn]; 10.176 10.177 if ( context_present(*context) ) 10.178 { 10.179 + int res = 0; 10.180 + 10.181 + pdev = pci_get_pdev(bus, devfn); 10.182 + if (!pdev) 10.183 + res = -ENODEV; 10.184 + else if (pdev->domain != domain) 10.185 + res = -EINVAL; 10.186 unmap_vtd_domain_page(context_entries); 10.187 - return 0; 10.188 + spin_unlock(&iommu->lock); 10.189 + return res; 10.190 } 10.191 10.192 - spin_lock_irqsave(&iommu->lock, flags); 10.193 if ( iommu_passthrough && 10.194 ecap_pass_thru(iommu->ecap) && (domain->domain_id == 0) ) 10.195 { 10.196 @@ -1080,6 +1065,8 @@ static int domain_context_mapping_one( 10.197 } 10.198 else 10.199 { 10.200 + spin_lock(&hd->mapping_lock); 10.201 + 10.202 /* Ensure we have pagetables allocated down to leaf PTE. */ 10.203 if ( hd->pgd_maddr == 0 ) 10.204 { 10.205 @@ -1087,8 +1074,9 @@ static int domain_context_mapping_one( 10.206 if ( hd->pgd_maddr == 0 ) 10.207 { 10.208 nomem: 10.209 + spin_unlock(&hd->mapping_lock); 10.210 + spin_unlock(&iommu->lock); 10.211 unmap_vtd_domain_page(context_entries); 10.212 - spin_unlock_irqrestore(&iommu->lock, flags); 10.213 return -ENOMEM; 10.214 } 10.215 } 10.216 @@ -1108,6 +1096,7 @@ static int domain_context_mapping_one( 10.217 10.218 context_set_address_root(*context, pgd_maddr); 10.219 context_set_translation_type(*context, CONTEXT_TT_MULTI_LEVEL); 10.220 + spin_unlock(&hd->mapping_lock); 10.221 } 10.222 10.223 /* 10.224 @@ -1119,8 +1108,7 @@ static int domain_context_mapping_one( 10.225 context_set_fault_enable(*context); 10.226 context_set_present(*context); 10.227 iommu_flush_cache_entry(context); 10.228 - 10.229 - unmap_vtd_domain_page(context_entries); 10.230 + spin_unlock(&iommu->lock); 10.231 10.232 /* Context entry was previously non-present (with domid 0). */ 10.233 if ( iommu_flush_context_device(iommu, 0, (((u16)bus) << 8) | devfn, 10.234 @@ -1130,7 +1118,8 @@ static int domain_context_mapping_one( 10.235 iommu_flush_iotlb_dsi(iommu, 0, 1); 10.236 10.237 set_bit(iommu->index, &hd->iommu_bitmap); 10.238 - spin_unlock_irqrestore(&iommu->lock, flags); 10.239 + 10.240 + unmap_vtd_domain_page(context_entries); 10.241 10.242 return 0; 10.243 } 10.244 @@ -1174,17 +1163,15 @@ int pdev_type(u8 bus, u8 devfn) 10.245 } 10.246 10.247 #define MAX_BUSES 256 10.248 +static DEFINE_SPINLOCK(bus2bridge_lock); 10.249 static struct { u8 map, bus, devfn; } bus2bridge[MAX_BUSES]; 10.250 10.251 -static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus) 10.252 +static int _find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus) 10.253 { 10.254 int cnt = 0; 10.255 *secbus = *bus; 10.256 10.257 - if ( *bus == 0 ) 10.258 - /* assume integrated PCI devices in RC have valid requester-id */ 10.259 - return 1; 10.260 - 10.261 + ASSERT(spin_is_locked(&bus2bridge_lock)); 10.262 if ( !bus2bridge[*bus].map ) 10.263 return 0; 10.264 10.265 @@ -1200,6 +1187,21 @@ static int find_pcie_endpoint(u8 *bus, u 10.266 return 1; 10.267 } 10.268 10.269 +static int find_pcie_endpoint(u8 *bus, u8 *devfn, u8 *secbus) 10.270 +{ 10.271 + int ret = 0; 10.272 + 10.273 + if ( *bus == 0 ) 10.274 + /* assume integrated PCI devices in RC have valid requester-id */ 10.275 + return 1; 10.276 + 10.277 + spin_lock(&bus2bridge_lock); 10.278 + ret = _find_pcie_endpoint(bus, devfn, secbus); 10.279 + spin_unlock(&bus2bridge_lock); 10.280 + 10.281 + return ret; 10.282 +} 10.283 + 10.284 static int domain_context_mapping(struct domain *domain, u8 bus, u8 devfn) 10.285 { 10.286 struct acpi_drhd_unit *drhd; 10.287 @@ -1212,6 +1214,8 @@ static int domain_context_mapping(struct 10.288 if ( !drhd ) 10.289 return -ENODEV; 10.290 10.291 + ASSERT(spin_is_locked(&pcidevs_lock)); 10.292 + 10.293 type = pdev_type(bus, devfn); 10.294 switch ( type ) 10.295 { 10.296 @@ -1226,12 +1230,14 @@ static int domain_context_mapping(struct 10.297 if ( type == DEV_TYPE_PCIe_BRIDGE ) 10.298 break; 10.299 10.300 + spin_lock(&bus2bridge_lock); 10.301 for ( sub_bus &= 0xff; sec_bus <= sub_bus; sec_bus++ ) 10.302 { 10.303 bus2bridge[sec_bus].map = 1; 10.304 bus2bridge[sec_bus].bus = bus; 10.305 bus2bridge[sec_bus].devfn = devfn; 10.306 } 10.307 + spin_unlock(&bus2bridge_lock); 10.308 break; 10.309 10.310 case DEV_TYPE_PCIe_ENDPOINT: 10.311 @@ -1290,20 +1296,22 @@ static int domain_context_unmap_one( 10.312 u8 bus, u8 devfn) 10.313 { 10.314 struct context_entry *context, *context_entries; 10.315 - unsigned long flags; 10.316 u64 maddr; 10.317 10.318 + ASSERT(spin_is_locked(&pcidevs_lock)); 10.319 + spin_lock(&iommu->lock); 10.320 + 10.321 maddr = bus_to_context_maddr(iommu, bus); 10.322 context_entries = (struct context_entry *)map_vtd_domain_page(maddr); 10.323 context = &context_entries[devfn]; 10.324 10.325 if ( !context_present(*context) ) 10.326 { 10.327 + spin_unlock(&iommu->lock); 10.328 unmap_vtd_domain_page(context_entries); 10.329 return 0; 10.330 } 10.331 10.332 - spin_lock_irqsave(&iommu->lock, flags); 10.333 context_clear_present(*context); 10.334 context_clear_entry(*context); 10.335 iommu_flush_cache_entry(context); 10.336 @@ -1315,8 +1323,8 @@ static int domain_context_unmap_one( 10.337 else 10.338 iommu_flush_iotlb_dsi(iommu, domain_iommu_domid(domain), 0); 10.339 10.340 + spin_unlock(&iommu->lock); 10.341 unmap_vtd_domain_page(context_entries); 10.342 - spin_unlock_irqrestore(&iommu->lock, flags); 10.343 10.344 return 0; 10.345 } 10.346 @@ -1380,7 +1388,10 @@ static int reassign_device_ownership( 10.347 struct iommu *pdev_iommu; 10.348 int ret, found = 0; 10.349 10.350 - if ( !(pdev = pci_lock_domain_pdev(source, bus, devfn)) ) 10.351 + ASSERT(spin_is_locked(&pcidevs_lock)); 10.352 + pdev = pci_get_pdev_by_domain(source, bus, devfn); 10.353 + 10.354 + if (!pdev) 10.355 return -ENODEV; 10.356 10.357 drhd = acpi_find_matched_drhd_unit(bus, devfn); 10.358 @@ -1391,14 +1402,9 @@ static int reassign_device_ownership( 10.359 if ( ret ) 10.360 return ret; 10.361 10.362 - write_lock(&pcidevs_lock); 10.363 list_move(&pdev->domain_list, &target->arch.pdev_list); 10.364 - write_unlock(&pcidevs_lock); 10.365 pdev->domain = target; 10.366 10.367 - spin_unlock(&pdev->lock); 10.368 - 10.369 - read_lock(&pcidevs_lock); 10.370 for_each_pdev ( source, pdev ) 10.371 { 10.372 drhd = acpi_find_matched_drhd_unit(pdev->bus, pdev->devfn); 10.373 @@ -1408,7 +1414,6 @@ static int reassign_device_ownership( 10.374 break; 10.375 } 10.376 } 10.377 - read_unlock(&pcidevs_lock); 10.378 10.379 if ( !found ) 10.380 clear_bit(pdev_iommu->index, &source_hd->iommu_bitmap); 10.381 @@ -1423,20 +1428,13 @@ void iommu_domain_teardown(struct domain 10.382 if ( list_empty(&acpi_drhd_units) ) 10.383 return; 10.384 10.385 + ASSERT(spin_is_locked(&pcidevs_lock)); 10.386 + spin_lock(&hd->mapping_lock); 10.387 iommu_free_pagetable(hd->pgd_maddr, agaw_to_level(hd->agaw)); 10.388 hd->pgd_maddr = 0; 10.389 - iommu_domid_release(d); 10.390 -} 10.391 + spin_unlock(&hd->mapping_lock); 10.392 10.393 -static int domain_context_mapped(u8 bus, u8 devfn) 10.394 -{ 10.395 - struct acpi_drhd_unit *drhd; 10.396 - 10.397 - for_each_drhd_unit ( drhd ) 10.398 - if ( device_context_mapped(drhd->iommu, bus, devfn) ) 10.399 - return 1; 10.400 - 10.401 - return 0; 10.402 + iommu_domid_release(d); 10.403 } 10.404 10.405 int intel_iommu_map_page( 10.406 @@ -1457,17 +1455,27 @@ int intel_iommu_map_page( 10.407 ecap_pass_thru(iommu->ecap) && (d->domain_id == 0) ) 10.408 return 0; 10.409 10.410 + spin_lock(&hd->mapping_lock); 10.411 + 10.412 pg_maddr = addr_to_dma_page_maddr(d, (paddr_t)gfn << PAGE_SHIFT_4K, 1); 10.413 if ( pg_maddr == 0 ) 10.414 + { 10.415 + spin_unlock(&hd->mapping_lock); 10.416 return -ENOMEM; 10.417 + } 10.418 page = (struct dma_pte *)map_vtd_domain_page(pg_maddr); 10.419 pte = page + (gfn & LEVEL_MASK); 10.420 pte_present = dma_pte_present(*pte); 10.421 dma_set_pte_addr(*pte, (paddr_t)mfn << PAGE_SHIFT_4K); 10.422 dma_set_pte_prot(*pte, DMA_PTE_READ | DMA_PTE_WRITE); 10.423 iommu_flush_cache_entry(pte); 10.424 + spin_unlock(&hd->mapping_lock); 10.425 unmap_vtd_domain_page(page); 10.426 10.427 + /* 10.428 + * No need pcideves_lock here because we have flush 10.429 + * when assign/deassign device 10.430 + */ 10.431 for_each_drhd_unit ( drhd ) 10.432 { 10.433 iommu = drhd->iommu; 10.434 @@ -1510,6 +1518,7 @@ static int iommu_prepare_rmrr_dev(struct 10.435 u64 base, end; 10.436 unsigned long base_pfn, end_pfn; 10.437 10.438 + ASSERT(spin_is_locked(&pcidevs_lock)); 10.439 ASSERT(rmrr->base_address < rmrr->end_address); 10.440 10.441 base = rmrr->base_address & PAGE_MASK_4K; 10.442 @@ -1523,8 +1532,7 @@ static int iommu_prepare_rmrr_dev(struct 10.443 base_pfn++; 10.444 } 10.445 10.446 - if ( domain_context_mapped(bus, devfn) == 0 ) 10.447 - ret = domain_context_mapping(d, bus, devfn); 10.448 + ret = domain_context_mapping(d, bus, devfn); 10.449 10.450 return ret; 10.451 } 10.452 @@ -1535,6 +1543,8 @@ static int intel_iommu_add_device(struct 10.453 u16 bdf; 10.454 int ret, i; 10.455 10.456 + ASSERT(spin_is_locked(&pcidevs_lock)); 10.457 + 10.458 if ( !pdev->domain ) 10.459 return -EINVAL; 10.460 10.461 @@ -1689,6 +1699,7 @@ static void setup_dom0_rmrr(struct domai 10.462 u16 bdf; 10.463 int ret, i; 10.464 10.465 + read_lock(&pcidevs_lock); 10.466 for_each_rmrr_device ( rmrr, bdf, i ) 10.467 { 10.468 ret = iommu_prepare_rmrr_dev(d, rmrr, PCI_BUS(bdf), PCI_DEVFN2(bdf)); 10.469 @@ -1696,6 +1707,7 @@ static void setup_dom0_rmrr(struct domai 10.470 gdprintk(XENLOG_ERR VTDPREFIX, 10.471 "IOMMU: mapping reserved region failed\n"); 10.472 } 10.473 + read_unlock(&pcidevs_lock); 10.474 } 10.475 10.476 int intel_vtd_setup(void) 10.477 @@ -1748,27 +1760,43 @@ int device_assigned(u8 bus, u8 devfn) 10.478 { 10.479 struct pci_dev *pdev; 10.480 10.481 - if ( (pdev = pci_lock_domain_pdev(dom0, bus, devfn)) ) 10.482 + read_lock(&pcidevs_lock); 10.483 + pdev = pci_get_pdev_by_domain(dom0, bus, devfn); 10.484 + if (!pdev) 10.485 { 10.486 - spin_unlock(&pdev->lock); 10.487 - return 0; 10.488 + read_unlock(&pcidevs_lock); 10.489 + return -1; 10.490 } 10.491 10.492 - return 1; 10.493 + read_unlock(&pcidevs_lock); 10.494 + return 0; 10.495 } 10.496 10.497 int intel_iommu_assign_device(struct domain *d, u8 bus, u8 devfn) 10.498 { 10.499 struct acpi_rmrr_unit *rmrr; 10.500 int ret = 0, i; 10.501 + struct pci_dev *pdev; 10.502 u16 bdf; 10.503 10.504 if ( list_empty(&acpi_drhd_units) ) 10.505 return -ENODEV; 10.506 10.507 + ASSERT(spin_is_locked(&pcidevs_lock)); 10.508 + pdev = pci_get_pdev(bus, devfn); 10.509 + if (!pdev) 10.510 + return -ENODEV; 10.511 + 10.512 + if (pdev->domain != dom0) 10.513 + { 10.514 + gdprintk(XENLOG_ERR VTDPREFIX, 10.515 + "IOMMU: assign a assigned device\n"); 10.516 + return -EBUSY; 10.517 + } 10.518 + 10.519 ret = reassign_device_ownership(dom0, d, bus, devfn); 10.520 if ( ret ) 10.521 - return ret; 10.522 + goto done; 10.523 10.524 /* Setup rmrr identity mapping */ 10.525 for_each_rmrr_device( rmrr, bdf, i ) 10.526 @@ -1779,16 +1807,20 @@ int intel_iommu_assign_device(struct dom 10.527 * ignore USB RMRR temporarily. 10.528 */ 10.529 if ( is_usb_device(bus, devfn) ) 10.530 - return 0; 10.531 + { 10.532 + ret = 0; 10.533 + goto done; 10.534 + } 10.535 10.536 ret = iommu_prepare_rmrr_dev(d, rmrr, bus, devfn); 10.537 if ( ret ) 10.538 gdprintk(XENLOG_ERR VTDPREFIX, 10.539 "IOMMU: mapping reserved region failed\n"); 10.540 - return ret; 10.541 + goto done; 10.542 } 10.543 } 10.544 10.545 +done: 10.546 return ret; 10.547 } 10.548
11.1 --- a/xen/include/asm-x86/msi.h Thu Dec 11 11:40:10 2008 +0000 11.2 +++ b/xen/include/asm-x86/msi.h Thu Dec 11 11:48:19 2008 +0000 11.3 @@ -68,13 +68,17 @@ struct msi_msg { 11.4 u32 data; /* 16 bits of msi message data */ 11.5 }; 11.6 11.7 +struct msi_desc; 11.8 /* Helper functions */ 11.9 extern void mask_msi_vector(unsigned int vector); 11.10 extern void unmask_msi_vector(unsigned int vector); 11.11 extern void set_msi_affinity(unsigned int vector, cpumask_t mask); 11.12 -extern int pci_enable_msi(struct msi_info *msi); 11.13 -extern void pci_disable_msi(int vector); 11.14 +extern int pci_enable_msi(struct msi_info *msi, struct msi_desc **desc); 11.15 +extern void pci_disable_msi(struct msi_desc *desc); 11.16 extern void pci_cleanup_msi(struct pci_dev *pdev); 11.17 +extern int setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc); 11.18 +extern void teardown_msi_vector(int vector); 11.19 +extern int msi_free_vector(struct msi_desc *entry); 11.20 11.21 struct msi_desc { 11.22 struct {
12.1 --- a/xen/include/xen/iommu.h Thu Dec 11 11:40:10 2008 +0000 12.2 +++ b/xen/include/xen/iommu.h Thu Dec 11 11:48:19 2008 +0000 12.3 @@ -62,7 +62,7 @@ int iommu_domain_init(struct domain *d); 12.4 void iommu_domain_destroy(struct domain *d); 12.5 int device_assigned(u8 bus, u8 devfn); 12.6 int assign_device(struct domain *d, u8 bus, u8 devfn); 12.7 -void deassign_device(struct domain *d, u8 bus, u8 devfn); 12.8 +int deassign_device(struct domain *d, u8 bus, u8 devfn); 12.9 int iommu_get_device_group(struct domain *d, u8 bus, u8 devfn, 12.10 XEN_GUEST_HANDLE_64(uint32) buf, int max_sdevs); 12.11 int iommu_map_page(struct domain *d, unsigned long gfn, unsigned long mfn);
13.1 --- a/xen/include/xen/pci.h Thu Dec 11 11:40:10 2008 +0000 13.2 +++ b/xen/include/xen/pci.h Thu Dec 11 11:48:19 2008 +0000 13.3 @@ -36,7 +36,6 @@ struct pci_dev { 13.4 struct domain *domain; 13.5 const u8 bus; 13.6 const u8 devfn; 13.7 - spinlock_t lock; 13.8 }; 13.9 13.10 #define for_each_pdev(domain, pdev) \ 13.11 @@ -59,6 +58,8 @@ struct pci_dev *pci_lock_domain_pdev(str 13.12 void pci_release_devices(struct domain *d); 13.13 int pci_add_device(u8 bus, u8 devfn); 13.14 int pci_remove_device(u8 bus, u8 devfn); 13.15 +struct pci_dev *pci_get_pdev(int bus, int devfn); 13.16 +struct pci_dev *pci_get_pdev_by_domain(struct domain *d, int bus, int devfn); 13.17 13.18 uint8_t pci_conf_read8( 13.19 unsigned int bus, unsigned int dev, unsigned int func, unsigned int reg);