debuggers.hg

view xen/common/physdev.c @ 3685:bbe8541361dd

bitkeeper revision 1.1159.1.542 (42038a42_52IAalMZRKdTn0UbVN5fw)

Merge tempest.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xeno.bk
into tempest.cl.cam.ac.uk:/local/scratch/smh22/xen-unstable.bk
author smh22@tempest.cl.cam.ac.uk
date Fri Feb 04 14:44:18 2005 +0000 (2005-02-04)
parents d8ba911dce48 0ef6e8e6e85d
children 88957a238191
line source
1 /* -*- Mode:C; c-basic-offset:4; tab-width:4 -*-
2 ****************************************************************************
3 * (c) 2004 - Rolf Neugebauer - Intel Research Cambridge
4 * (c) 2004 - Keir Fraser - University of Cambridge
5 ****************************************************************************
6 *
7 * Description: allows a domain to access devices on the PCI bus
8 *
9 * A guest OS may be given access to particular devices on the PCI bus.
10 * For each domain a list of PCI devices is maintained, describing the
11 * access mode for the domain.
12 *
13 * Guests can figure out the virtualised PCI space through normal PCI config
14 * register access. Some of the accesses, in particular write accesses, are
15 * faked. For example the sequence for detecting the IO regions, which requires
16 * writes to determine the size of the region, is faked out by a very simple
17 * state machine, preventing direct writes to the PCI config registers by a
18 * guest.
19 */
21 #include <xen/config.h>
22 #include <xen/init.h>
23 #include <xen/lib.h>
24 #include <xen/types.h>
25 #include <xen/sched.h>
26 #include <xen/pci.h>
27 #include <xen/irq.h>
28 #include <xen/event.h>
29 #include <asm/pci.h>
30 #include <public/xen.h>
31 #include <public/physdev.h>
33 /* Called by PHYSDEV_PCI_INITIALISE_DEVICE to finalise IRQ routing. */
34 extern void pcibios_enable_irq(struct pci_dev *dev);
36 #if 0
37 #define VERBOSE_INFO(_f, _a...) printk( _f , ## _a )
38 #else
39 #define VERBOSE_INFO(_f, _a...) ((void)0)
40 #endif
42 #ifdef VERBOSE
43 #define INFO(_f, _a...) printk( _f, ## _a )
44 #else
45 #define INFO(_f, _a...) ((void)0)
46 #endif
48 #define SLOPPY_CHECKING
50 #define ACC_READ 1
51 #define ACC_WRITE 2
53 /* Upper bounds for PCI-device addressing. */
54 #define PCI_BUSMAX 255
55 #define PCI_DEVMAX 31
56 #define PCI_FUNCMAX 7
57 #define PCI_REGMAX 255
59 /* Bit offsets into state. */
60 #define ST_BASE_ADDRESS 0 /* bits 0-5: are for base address access */
61 #define ST_ROM_ADDRESS 6 /* bit 6: is for rom address access */
63 typedef struct _phys_dev_st {
64 int flags; /* flags for access etc */
65 struct pci_dev *dev; /* the device */
66 struct list_head node; /* link to the list */
67 struct domain *owner; /* 'owner of this device' */
68 int state; /* state for various checks */
69 } phys_dev_t;
72 /* Find a device on a per-domain device list. */
73 static phys_dev_t *find_pdev(struct domain *p, struct pci_dev *dev)
74 {
75 phys_dev_t *t, *res = NULL;
77 list_for_each_entry ( t, &p->pcidev_list, node )
78 {
79 if ( dev == t->dev )
80 {
81 res = t;
82 break;
83 }
84 }
85 return res;
86 }
88 /* Add a device to a per-domain device-access list. */
89 static void add_dev_to_task(struct domain *p,
90 struct pci_dev *dev, int acc)
91 {
92 phys_dev_t *pdev;
94 if ( (pdev = find_pdev(p, dev)) )
95 {
96 /* Sevice already on list: update access permissions. */
97 pdev->flags = acc;
98 return;
99 }
101 if ( (pdev = xmalloc(phys_dev_t)) == NULL )
102 {
103 INFO("Error allocating pdev structure.\n");
104 return;
105 }
107 pdev->dev = dev;
108 pdev->flags = acc;
109 pdev->state = 0;
110 list_add(&pdev->node, &p->pcidev_list);
112 if ( acc == ACC_WRITE )
113 pdev->owner = p;
114 }
116 /*
117 * physdev_pci_access_modify:
118 * Allow/disallow access to a specific PCI device. Guests should not be
119 * allowed to see bridge devices as it needlessly complicates things (one
120 * possible exception to this is the AGP bridge). If the given device is a
121 * bridge, then the domain should get access to all the leaf devices below
122 * that bridge (XXX this is unimplemented!).
123 */
124 int physdev_pci_access_modify(
125 domid_t dom, int bus, int dev, int func, int enable)
126 {
127 struct domain *p;
128 struct exec_domain *ed, *edc;
129 struct pci_dev *pdev;
130 int i, j, rc = 0;
132 if ( !IS_PRIV(current->domain) )
133 BUG();
135 if ( (bus > PCI_BUSMAX) || (dev > PCI_DEVMAX) || (func > PCI_FUNCMAX) )
136 return -EINVAL;
138 if ( !enable )
139 {
140 INFO("Disallowing access is not yet supported.\n");
141 return -EINVAL;
142 }
144 INFO("physdev_pci_access_modify: %02x:%02x:%02x\n", bus, dev, func);
146 if ( (p = find_domain_by_id(dom)) == NULL )
147 return -ESRCH;
149 ed = p->exec_domain[0]; /* XXX */
151 /* Make the domain privileged. */
152 set_bit(DF_PHYSDEV, &p->d_flags);
153 /* FIXME: MAW for now make the domain REALLY privileged so that it
154 * can run a backend driver (hw access should work OK otherwise) */
155 set_bit(DF_PRIVILEGED, &p->d_flags);
157 /* Grant write access to the specified device. */
158 if ( (pdev = pci_find_slot(bus, PCI_DEVFN(dev, func))) == NULL )
159 {
160 INFO(" dev does not exist\n");
161 rc = -ENODEV;
162 goto out;
163 }
164 add_dev_to_task(p, pdev, ACC_WRITE);
166 INFO(" add RW %02x:%02x:%02x\n", pdev->bus->number,
167 PCI_SLOT(pdev->devfn), PCI_FUNC(pdev->devfn));
169 /* Is the device a bridge or cardbus? */
170 if ( pdev->hdr_type != PCI_HEADER_TYPE_NORMAL )
171 INFO("XXX can't give access to bridge devices yet\n");
173 /* Now, setup access to the IO ports and memory regions for the device. */
175 if ( ed->thread.io_bitmap == NULL )
176 {
177 if ( (ed->thread.io_bitmap = xmalloc_array(u8, IOBMP_BYTES)) == NULL )
178 {
179 rc = -ENOMEM;
180 goto out;
181 }
182 memset(ed->thread.io_bitmap, 0xFF, IOBMP_BYTES);
184 ed->thread.io_bitmap_sel = ~0ULL;
186 for_each_exec_domain(p, edc) {
187 if (edc == ed)
188 continue;
189 edc->thread.io_bitmap = ed->thread.io_bitmap;
190 }
191 }
193 for ( i = 0; i < DEVICE_COUNT_RESOURCE; i++ )
194 {
195 struct resource *r = &pdev->resource[i];
197 if ( r->flags & IORESOURCE_IO )
198 {
199 /* Give the domain access to the IO ports it needs. Currently,
200 * this will allow all processes in that domain access to those
201 * ports as well. This will do for now, since driver domains don't
202 * run untrusted processes! */
203 INFO("Giving domain %u IO resources (%lx - %lx) "
204 "for device %s\n", dom, r->start, r->end, pdev->slot_name);
205 for ( j = r->start; j < r->end + 1; j++ )
206 {
207 clear_bit(j, ed->thread.io_bitmap);
208 clear_bit(j / IOBMP_BITS_PER_SELBIT, &ed->thread.io_bitmap_sel);
209 }
210 }
212 /* rights to IO memory regions are checked when the domain maps them */
213 }
215 for_each_exec_domain(p, edc) {
216 if (edc == ed)
217 continue;
218 edc->thread.io_bitmap_sel = ed->thread.io_bitmap_sel;
219 }
221 out:
222 put_domain(p);
223 return rc;
224 }
226 /* Check if a domain controls a device with IO memory within frame @pfn.
227 * Returns: 1 if the domain should be allowed to map @pfn, 0 otherwise. */
228 int domain_iomem_in_pfn(struct domain *p, unsigned long pfn)
229 {
230 int ret = 0;
231 phys_dev_t *phys_dev;
233 VERBOSE_INFO("Checking if physdev-capable domain %u needs access to "
234 "pfn %08lx\n", p->id, pfn);
236 spin_lock(&p->pcidev_lock);
238 list_for_each_entry ( phys_dev, &p->pcidev_list, node )
239 {
240 int i;
241 struct pci_dev *pci_dev = phys_dev->dev;
243 for ( i = 0; (i < DEVICE_COUNT_RESOURCE) && (ret == 0); i++ )
244 {
245 struct resource *r = &pci_dev->resource[i];
247 if ( r->flags & IORESOURCE_MEM )
248 if ( (r->start >> PAGE_SHIFT) == pfn
249 || (r->end >> PAGE_SHIFT) == pfn
250 || ((r->start >> PAGE_SHIFT < pfn)
251 && (r->end >> PAGE_SHIFT > pfn)) )
252 ret = 1;
253 }
255 if ( ret != 0 ) break;
256 }
258 spin_unlock(&p->pcidev_lock);
260 VERBOSE_INFO("Domain %u %s mapping of pfn %08lx\n",
261 p->id, ret ? "allowed" : "disallowed", pfn);
263 return ret;
264 }
266 /* check if a domain has general access to a device */
267 inline static int check_dev_acc (struct domain *p,
268 int bus, int dev, int func,
269 phys_dev_t **pdev)
270 {
271 struct pci_dev *target_dev;
272 phys_dev_t *target_pdev;
273 unsigned int target_devfn;
275 *pdev = NULL;
277 if ( !IS_CAPABLE_PHYSDEV(p) )
278 return -EPERM; /* no pci access permission */
280 if ( bus > PCI_BUSMAX || dev > PCI_DEVMAX || func > PCI_FUNCMAX )
281 return -EINVAL;
283 VERBOSE_INFO("b=%x d=%x f=%x ", bus, dev, func);
285 /* check target device */
286 target_devfn = PCI_DEVFN(dev, func);
287 target_dev = pci_find_slot(bus, target_devfn);
288 if ( !target_dev )
289 {
290 VERBOSE_INFO("target does not exist\n");
291 return -ENODEV;
292 }
294 /* check access */
295 target_pdev = find_pdev(p, target_dev);
296 if ( !target_pdev )
297 {
298 VERBOSE_INFO("dom has no access to target\n");
299 return -EPERM;
300 }
302 *pdev = target_pdev;
303 return 0;
304 }
306 #ifndef SLOPPY_CHECKING
307 /*
308 * Base address registers contain the base address for IO regions.
309 * The length can be determined by writing all 1s to the register and
310 * reading the value again. The device will zero the lower unused bits.
311 *
312 * to work out the length of the io region a device probe typically does:
313 * 1) a = read_base_addr_reg()
314 * 2) write_base_addr_reg(0xffffffff)
315 * 3) b = read_base_addr_reg() [device zeros lower bits]
316 * 4) write_base_addr_reg(a) [restore original value]
317 * this function fakes out step 2-4. *no* writes are made to the device.
318 *
319 * phys_dev_t contains a bit field (a bit for each base address register).
320 * if the bit for a register is set the guest had writen all 1s to the
321 * register and subsequent read request need to fake out the b.
322 * if the guest restores the original value (step 4 above) the bit is
323 * cleared again. If the guest attempts to "restores" a wrong value an
324 * error is flagged.
325 */
326 static int do_base_address_access(phys_dev_t *pdev, int acc, int idx,
327 int len, u32 *val)
328 {
329 int st_bit, reg = PCI_BASE_ADDRESS_0 + (idx*4), ret = -EINVAL;
330 struct pci_dev *dev = pdev->dev;
331 u32 orig_val, sz;
332 struct resource *res;
334 if ( len != sizeof(u32) )
335 {
336 /* This isn't illegal, but there doesn't seem to be a very good reason
337 * to do it for normal devices (bridges are another matter). Since it
338 * would complicate the code below, we don't support this for now. */
340 /* We could set *val to some value but the guest may well be in trouble
341 * anyway if this write fails. Hopefully the printk will give us a
342 * clue what went wrong. */
343 INFO("Guest %u attempting sub-dword %s to BASE_ADDRESS %d\n",
344 pdev->owner->id, (acc == ACC_READ) ? "read" : "write", idx);
346 return -EPERM;
347 }
349 st_bit = idx + ST_BASE_ADDRESS;
350 res = &(pdev->dev->resource[idx]);
352 if ( acc == ACC_WRITE )
353 {
354 if ( (*val == 0xffffffff) ||
355 ((res->flags & IORESOURCE_IO) && (*val == 0xffff)) )
356 {
357 /* Set bit and return. */
358 set_bit(st_bit, &pdev->state);
359 ret = 0;
360 }
361 else
362 {
363 /* Assume guest wants to set the base address. */
364 clear_bit(st_bit, &pdev->state);
366 /* check if guest tries to restore orig value */
367 ret = pci_read_config_dword(dev, reg, &orig_val);
368 if ( (ret == 0) && (*val != orig_val) )
369 {
370 INFO("Guest attempting update to BASE_ADDRESS %d\n", idx);
371 ret = -EPERM;
372 }
373 }
374 VERBOSE_INFO("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
375 " val=0x%08x %x\n",
376 dev->bus->number, PCI_SLOT(dev->devfn),
377 PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
378 }
379 else if ( acc == ACC_READ )
380 {
381 ret = pci_read_config_dword(dev, reg, val);
382 if ( (ret == 0) && test_bit(st_bit, &pdev->state) )
383 {
384 /* Cook the value. */
385 sz = res->end - res->start;
386 if ( res->flags & IORESOURCE_MEM )
387 {
388 /* this is written out explicitly for clarity */
389 *val = 0xffffffff;
390 /* bit 0 = 0 */
391 /* bit 21 = memory type */
392 /* bit 3 = prefetchable */
393 /* bit 4-31 width */
394 sz = sz >> 4; /* size in blocks of 16 byte */
395 sz = ~sz; /* invert */
396 *val = *val & (sz << 4); /* and in the size */
397 /* use read values for low 4 bits */
398 *val = *val | (orig_val & 0xf);
399 }
400 else if ( res->flags & IORESOURCE_IO )
401 {
402 *val = 0x0000ffff;
403 /* bit 10 = 01 */
404 /* bit 2-31 width */
405 sz = sz >> 2; /* size in dwords */
406 sz = ~sz & 0x0000ffff;
407 *val = *val & (sz << 2);
408 *val = *val | 0x1;
409 }
410 }
411 VERBOSE_INFO("fixed pci read: %02x:%02x:%02x reg=0x%02x len=0x%02x"
412 " val=0x%08x %x\n",
413 dev->bus->number, PCI_SLOT(dev->devfn),
414 PCI_FUNC(dev->devfn), reg, len, *val, pdev->state);
415 }
417 return ret;
418 }
421 static int do_rom_address_access(phys_dev_t *pdev, int acc, int len, u32 *val)
422 {
423 int st_bit, ret = -EINVAL;
424 struct pci_dev *dev = pdev->dev;
425 u32 orig_val, sz;
426 struct resource *res;
428 if ( len != sizeof(u32) )
429 {
430 INFO("Guest attempting sub-dword %s to ROM_ADDRESS\n",
431 (acc == ACC_READ) ? "read" : "write");
432 return -EPERM;
433 }
435 st_bit = ST_ROM_ADDRESS;
436 res = &(pdev->dev->resource[PCI_ROM_RESOURCE]);
438 if ( acc == ACC_WRITE )
439 {
440 if ( (*val == 0xffffffff) || (*val == 0xfffffffe) )
441 {
442 /* NB. 0xffffffff would be unusual, but we trap it anyway. */
443 set_bit(st_bit, &pdev->state);
444 ret = 0;
445 }
446 else
447 {
448 /* Assume guest wants simply to set the base address. */
449 clear_bit(st_bit, &pdev->state);
451 /* Check if guest tries to restore the original value. */
452 ret = pci_read_config_dword(dev, PCI_ROM_ADDRESS, &orig_val);
453 if ( (ret == 0) && (*val != orig_val) )
454 {
455 if ( (*val != 0x00000000) )
456 {
457 INFO("caution: guest tried to change rom address.\n");
458 ret = -EPERM;
459 }
460 else
461 {
462 INFO("guest disabled rom access for %02x:%02x:%02x\n",
463 dev->bus->number, PCI_SLOT(dev->devfn),
464 PCI_FUNC(dev->devfn));
465 }
466 }
467 }
468 VERBOSE_INFO("fixed pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x"
469 " val=0x%08x %x\n",
470 dev->bus->number, PCI_SLOT(dev->devfn),
471 PCI_FUNC(dev->devfn), PCI_ROM_ADDRESS, len, *val, pdev->state);
472 }
473 else if ( acc == ACC_READ )
474 {
475 ret = pci_read_config_dword(dev, PCI_ROM_ADDRESS, val);
476 if ( (ret == 0) && test_bit(st_bit, &pdev->state) )
477 {
478 /* Cook the value. */
479 sz = res->end - res->start;
480 *val = 0xffffffff;
481 /* leave bit 0 untouched */
482 /* bit 1-10 reserved, harwired to 0 */
483 sz = sz >> 11; /* size is in 2KB blocks */
484 sz = ~sz;
485 *val = *val & (sz << 11);
486 *val = *val | (orig_val & 0x1);
487 }
488 VERBOSE_INFO("fixed pci read: %02x:%02x:%02x reg=0x%02x len=0x%02x"
489 " val=0x%08x %x\n",
490 dev->bus->number, PCI_SLOT(dev->devfn),
491 PCI_FUNC(dev->devfn), PCI_ROM_ADDRESS, len, *val, pdev->state);
492 }
494 return ret;
496 }
497 #endif /* SLOPPY_CHECKING */
499 /*
500 * Handle a PCI config space read access if the domain has access privileges.
501 */
502 static long pci_cfgreg_read(int bus, int dev, int func, int reg,
503 int len, u32 *val)
504 {
505 int ret;
506 phys_dev_t *pdev;
508 if ( (ret = check_dev_acc(current->domain, bus, dev, func, &pdev)) != 0 )
509 {
510 /* PCI spec states that reads from non-existent devices should return
511 * all 1s. In this case the domain has no read access, which should
512 * also look like the device is non-existent. */
513 *val = 0xFFFFFFFF;
514 return ret;
515 }
517 /* Fake out read requests for some registers. */
518 switch ( reg )
519 {
520 #ifndef SLOPPY_CHECKING
521 case PCI_BASE_ADDRESS_0:
522 ret = do_base_address_access(pdev, ACC_READ, 0, len, val);
523 break;
525 case PCI_BASE_ADDRESS_1:
526 ret = do_base_address_access(pdev, ACC_READ, 1, len, val);
527 break;
529 case PCI_BASE_ADDRESS_2:
530 ret = do_base_address_access(pdev, ACC_READ, 2, len, val);
531 break;
533 case PCI_BASE_ADDRESS_3:
534 ret = do_base_address_access(pdev, ACC_READ, 3, len, val);
535 break;
537 case PCI_BASE_ADDRESS_4:
538 ret = do_base_address_access(pdev, ACC_READ, 4, len, val);
539 break;
541 case PCI_BASE_ADDRESS_5:
542 ret = do_base_address_access(pdev, ACC_READ, 5, len, val);
543 break;
545 case PCI_ROM_ADDRESS:
546 ret = do_rom_address_access(pdev, ACC_READ, len, val);
547 break;
548 #endif
550 case PCI_INTERRUPT_LINE:
551 *val = pdev->dev->irq;
552 ret = 0;
553 break;
555 default:
556 ret = pci_config_read(0, bus, dev, func, reg, len, val);
557 VERBOSE_INFO("pci read : %02x:%02x:%02x reg=0x%02x len=0x%02x "
558 "val=0x%08x\n", bus, dev, func, reg, len, *val);
559 break;
560 }
562 return ret;
563 }
566 /*
567 * Handle a PCI config space write access if the domain has access privileges.
568 */
569 static long pci_cfgreg_write(int bus, int dev, int func, int reg,
570 int len, u32 val)
571 {
572 int ret;
573 phys_dev_t *pdev;
575 if ( (ret = check_dev_acc(current->domain, bus, dev, func, &pdev)) != 0 )
576 return ret;
578 /* special treatment for some registers */
579 switch (reg)
580 {
581 #ifndef SLOPPY_CHECKING
582 case PCI_BASE_ADDRESS_0:
583 ret = do_base_address_access(pdev, ACC_WRITE, 0, len, &val);
584 break;
586 case PCI_BASE_ADDRESS_1:
587 ret = do_base_address_access(pdev, ACC_WRITE, 1, len, &val);
588 break;
590 case PCI_BASE_ADDRESS_2:
591 ret = do_base_address_access(pdev, ACC_WRITE, 2, len, &val);
592 break;
594 case PCI_BASE_ADDRESS_3:
595 ret = do_base_address_access(pdev, ACC_WRITE, 3, len, &val);
596 break;
598 case PCI_BASE_ADDRESS_4:
599 ret = do_base_address_access(pdev, ACC_WRITE, 4, len, &val);
600 break;
602 case PCI_BASE_ADDRESS_5:
603 ret = do_base_address_access(pdev, ACC_WRITE, 5, len, &val);
604 break;
606 case PCI_ROM_ADDRESS:
607 ret = do_rom_address_access(pdev, ACC_WRITE, len, &val);
608 break;
609 #endif
611 default:
612 if ( pdev->flags != ACC_WRITE )
613 {
614 INFO("pci write not allowed %02x:%02x:%02x: "
615 "reg=0x%02x len=0x%02x val=0x%08x\n",
616 bus, dev, func, reg, len, val);
617 ret = -EPERM;
618 }
619 else
620 {
621 ret = pci_config_write(0, bus, dev, func, reg, len, val);
622 VERBOSE_INFO("pci write: %02x:%02x:%02x reg=0x%02x len=0x%02x "
623 "val=0x%08x\n", bus, dev, func, reg, len, val);
624 }
625 break;
626 }
628 return ret;
629 }
632 static long pci_probe_root_buses(u32 *busmask)
633 {
634 phys_dev_t *pdev;
636 memset(busmask, 0, 256/8);
638 list_for_each_entry ( pdev, &current->domain->pcidev_list, node )
639 set_bit(pdev->dev->bus->number, busmask);
641 return 0;
642 }
645 /*
646 * Demuxing hypercall.
647 */
648 long do_physdev_op(physdev_op_t *uop)
649 {
650 phys_dev_t *pdev;
651 physdev_op_t op;
652 long ret;
653 int irq;
655 if ( unlikely(copy_from_user(&op, uop, sizeof(op)) != 0) )
656 return -EFAULT;
658 switch ( op.cmd )
659 {
660 case PHYSDEVOP_PCI_CFGREG_READ:
661 ret = pci_cfgreg_read(op.u.pci_cfgreg_read.bus,
662 op.u.pci_cfgreg_read.dev,
663 op.u.pci_cfgreg_read.func,
664 op.u.pci_cfgreg_read.reg,
665 op.u.pci_cfgreg_read.len,
666 &op.u.pci_cfgreg_read.value);
667 break;
669 case PHYSDEVOP_PCI_CFGREG_WRITE:
670 ret = pci_cfgreg_write(op.u.pci_cfgreg_write.bus,
671 op.u.pci_cfgreg_write.dev,
672 op.u.pci_cfgreg_write.func,
673 op.u.pci_cfgreg_write.reg,
674 op.u.pci_cfgreg_write.len,
675 op.u.pci_cfgreg_write.value);
676 break;
678 case PHYSDEVOP_PCI_INITIALISE_DEVICE:
679 if ( (ret = check_dev_acc(current->domain,
680 op.u.pci_initialise_device.bus,
681 op.u.pci_initialise_device.dev,
682 op.u.pci_initialise_device.func,
683 &pdev)) == 0 )
684 pcibios_enable_irq(pdev->dev);
685 break;
687 case PHYSDEVOP_PCI_PROBE_ROOT_BUSES:
688 ret = pci_probe_root_buses(op.u.pci_probe_root_buses.busmask);
689 break;
691 case PHYSDEVOP_IRQ_UNMASK_NOTIFY:
692 ret = pirq_guest_unmask(current->domain);
693 break;
695 case PHYSDEVOP_IRQ_STATUS_QUERY:
696 irq = op.u.irq_status_query.irq;
697 ret = -EINVAL;
698 if ( (irq < 0) || (irq >= NR_IRQS) )
699 break;
700 op.u.irq_status_query.flags = 0;
701 /* Edge-triggered interrupts don't need an explicit unmask downcall. */
702 if ( strstr(irq_desc[irq].handler->typename, "edge") == NULL )
703 op.u.irq_status_query.flags |= PHYSDEVOP_IRQ_NEEDS_UNMASK_NOTIFY;
704 ret = 0;
705 break;
707 default:
708 ret = -EINVAL;
709 break;
710 }
712 copy_to_user(uop, &op, sizeof(op));
713 return ret;
714 }
716 /* opt_physdev_dom0_hide: list of PCI slots to hide from domain 0. */
717 /* Format is '(%02x:%02x.%1x)(%02x:%02x.%1x)' and so on. */
718 static char opt_physdev_dom0_hide[200] = "";
719 string_param("physdev_dom0_hide", opt_physdev_dom0_hide);
721 /* Test if boot params specify this device should NOT be visible to DOM0
722 * (e.g. so that another domain can control it instead) */
723 int pcidev_dom0_hidden(struct pci_dev *dev)
724 {
725 char cmp[10] = "(.......)";
727 strncpy(&cmp[1], dev->slot_name, 7);
729 if ( strstr(opt_physdev_dom0_hide, dev->slot_name) == NULL )
730 return 0;
732 return 1;
733 }
736 /* Domain 0 has read access to all devices. */
737 void physdev_init_dom0(struct domain *p)
738 {
739 struct pci_dev *dev;
740 phys_dev_t *pdev;
742 INFO("Give DOM0 read access to all PCI devices\n");
744 pci_for_each_dev(dev)
745 {
746 if ( pcidev_dom0_hidden(dev) )
747 {
748 printk("Hiding PCI device %s from DOM0\n", dev->slot_name);
749 continue;
750 }
752 /* Skip bridges and other peculiarities for now.
753 *
754 * Note that this can prevent the guest from detecting devices
755 * with fn>0 on slots where the fn=0 device is a bridge. We
756 * can identify such slots by looking at the multifunction bit
757 * (top bit of hdr_type, masked out in dev->hdr_type).
758 *
759 * In Linux2.4 we find all devices because the detection code
760 * scans all functions if the read of the fn=0 device's header
761 * type fails.
762 *
763 * In Linux2.6 we set pcibios_scan_all_fns().
764 */
765 if ( (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) &&
766 (dev->hdr_type != PCI_HEADER_TYPE_CARDBUS) )
767 continue;
768 pdev = xmalloc(phys_dev_t);
769 pdev->dev = dev;
770 pdev->flags = ACC_WRITE;
771 pdev->state = 0;
772 pdev->owner = p;
773 list_add(&pdev->node, &p->pcidev_list);
774 }
776 set_bit(DF_PHYSDEV, &p->d_flags);
777 }