debuggers.hg

view tools/libxl/libxl_pci.c @ 22855:1d1eec7e1fb4

xl: Perform minimal validation of virtual disk file while parsing config file

This patch performs some very basic validation on the virtual disk
file passed through the config file. This validation ensures that we
don't go too far with the initialization like spawn qemu and more
while there could be some potentially fundamental issues.

[ Patch fixed up to work with PHYSTYPE_EMPTY 22808:6ec61438713a -iwj ]

Signed-off-by: Kamala Narasimhan <kamala.narasimhan@citrix.com>
Acked-by: Ian Jackson <ian.jackson@eu.citrix.com>
Signed-off-by: Ian Jackson <ian.jackson@eu.citrix.com>
Committed-by: Ian Jackson <ian.jackson@eu.citrix.com>
author Kamala Narasimhan <kamala.narasimhan@gmail.com>
date Tue Jan 25 18:09:49 2011 +0000 (2011-01-25)
parents 5b8034ce8b8c
children 5429204f3c06
line source
1 /*
2 * Copyright (C) 2009 Citrix Ltd.
3 * Author Vincent Hanquez <vincent.hanquez@eu.citrix.com>
4 * Author Stefano Stabellini <stefano.stabellini@eu.citrix.com>
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License as published
8 * by the Free Software Foundation; version 2.1 only. with the special
9 * exception on linking described in file LICENSE.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 */
17 #include "libxl_osdeps.h"
19 #include <stdio.h>
20 #include <string.h>
21 #include <stdlib.h>
22 #include <sys/types.h>
23 #include <fcntl.h>
24 #include <sys/select.h>
25 #include <sys/mman.h>
26 #include <sys/wait.h>
27 #include <sys/stat.h>
28 #include <signal.h>
29 #include <unistd.h> /* for write, unlink and close */
30 #include <stdint.h>
31 #include <inttypes.h>
32 #include <dirent.h>
33 #include <assert.h>
35 #include "libxl.h"
36 #include "libxl_utils.h"
37 #include "libxl_internal.h"
38 #include "flexarray.h"
40 #define PCI_BDF "%04x:%02x:%02x.%01x"
41 #define PCI_BDF_SHORT "%02x:%02x.%01x"
42 #define PCI_BDF_VDEVFN "%04x:%02x:%02x.%01x@%02x"
44 static unsigned int pcidev_value(libxl_device_pci *pcidev)
45 {
46 union {
47 unsigned int value;
48 struct {
49 unsigned int reserved1:2;
50 unsigned int reg:6;
51 unsigned int func:3;
52 unsigned int dev:5;
53 unsigned int bus:8;
54 unsigned int reserved2:7;
55 unsigned int enable:1;
56 }fields;
57 }u;
59 u.value = 0;
60 u.fields.reg = pcidev->reg;
61 u.fields.func = pcidev->func;
62 u.fields.dev = pcidev->dev;
63 u.fields.bus = pcidev->bus;
64 u.fields.enable = pcidev->enable;
66 return u.value;
67 }
69 static int pcidev_init(libxl_device_pci *pcidev, unsigned int domain,
70 unsigned int bus, unsigned int dev,
71 unsigned int func, unsigned int vdevfn)
72 {
73 pcidev->domain = domain;
74 pcidev->bus = bus;
75 pcidev->dev = dev;
76 pcidev->func = func;
77 pcidev->vdevfn = vdevfn;
78 return 0;
79 }
81 static int hex_convert(const char *str, unsigned int *val, unsigned int mask)
82 {
83 unsigned long ret;
84 char *end;
86 ret = strtoul(str, &end, 16);
87 if ( end == str || *end != '\0' )
88 return -1;
89 if ( ret & ~mask )
90 return -1;
91 *val = (unsigned int)ret & mask;
92 return 0;
93 }
95 #define STATE_DOMAIN 0
96 #define STATE_BUS 1
97 #define STATE_DEV 2
98 #define STATE_FUNC 3
99 #define STATE_VSLOT 4
100 #define STATE_OPTIONS_K 6
101 #define STATE_OPTIONS_V 7
102 #define STATE_TERMINAL 8
103 int libxl_device_pci_parse_bdf(libxl_ctx *ctx, libxl_device_pci *pcidev, const char *str)
104 {
105 unsigned state = STATE_DOMAIN;
106 unsigned dom, bus, dev, func, vslot = 0;
107 char *buf2, *tok, *ptr, *end, *optkey = NULL;
109 if ( NULL == (buf2 = ptr = strdup(str)) )
110 return ERROR_NOMEM;
112 for(tok = ptr, end = ptr + strlen(ptr) + 1; ptr < end; ptr++) {
113 switch(state) {
114 case STATE_DOMAIN:
115 if ( *ptr == ':' ) {
116 state = STATE_BUS;
117 *ptr = '\0';
118 if ( hex_convert(tok, &dom, 0xffff) )
119 goto parse_error;
120 tok = ptr + 1;
121 }
122 break;
123 case STATE_BUS:
124 if ( *ptr == ':' ) {
125 state = STATE_DEV;
126 *ptr = '\0';
127 if ( hex_convert(tok, &bus, 0xff) )
128 goto parse_error;
129 tok = ptr + 1;
130 }else if ( *ptr == '.' ) {
131 state = STATE_FUNC;
132 *ptr = '\0';
133 if ( dom & ~0xff )
134 goto parse_error;
135 bus = dom;
136 dom = 0;
137 if ( hex_convert(tok, &dev, 0xff) )
138 goto parse_error;
139 tok = ptr + 1;
140 }
141 break;
142 case STATE_DEV:
143 if ( *ptr == '.' ) {
144 state = STATE_FUNC;
145 *ptr = '\0';
146 if ( hex_convert(tok, &dev, 0xff) )
147 goto parse_error;
148 tok = ptr + 1;
149 }
150 break;
151 case STATE_FUNC:
152 if ( *ptr == '\0' || *ptr == '@' || *ptr == ',' ) {
153 switch( *ptr ) {
154 case '\0':
155 state = STATE_TERMINAL;
156 break;
157 case '@':
158 state = STATE_VSLOT;
159 break;
160 case ',':
161 state = STATE_OPTIONS_K;
162 break;
163 }
164 *ptr = '\0';
165 if ( !strcmp(tok, "*") ) {
166 pcidev->vfunc_mask = LIBXL_PCI_FUNC_ALL;
167 }else{
168 if ( hex_convert(tok, &func, 0x7) )
169 goto parse_error;
170 pcidev->vfunc_mask = (1 << 0);
171 }
172 tok = ptr + 1;
173 }
174 break;
175 case STATE_VSLOT:
176 if ( *ptr == '\0' || *ptr == ',' ) {
177 state = ( *ptr == ',' ) ? STATE_OPTIONS_K : STATE_TERMINAL;
178 *ptr = '\0';
179 if ( hex_convert(tok, &vslot, 0xff) )
180 goto parse_error;
181 tok = ptr + 1;
182 }
183 break;
184 case STATE_OPTIONS_K:
185 if ( *ptr == '=' ) {
186 state = STATE_OPTIONS_V;
187 *ptr = '\0';
188 optkey = tok;
189 tok = ptr + 1;
190 }
191 break;
192 case STATE_OPTIONS_V:
193 if ( *ptr == ',' || *ptr == '\0' ) {
194 state = (*ptr == ',') ? STATE_OPTIONS_K : STATE_TERMINAL;
195 *ptr = '\0';
196 if ( !strcmp(optkey, "msitranslate") ) {
197 pcidev->msitranslate = atoi(tok);
198 }else if ( !strcmp(optkey, "power_mgmt") ) {
199 pcidev->power_mgmt = atoi(tok);
200 }else{
201 LIBXL__LOG(ctx, LIBXL__LOG_WARNING,
202 "Unknown PCI BDF option: %s", optkey);
203 }
204 tok = ptr + 1;
205 }
206 default:
207 break;
208 }
209 }
211 free(buf2);
213 if ( tok != ptr || state != STATE_TERMINAL )
214 goto parse_error;
216 pcidev_init(pcidev, dom, bus, dev, func, vslot << 3);
218 return 0;
220 parse_error:
221 return ERROR_INVAL;
222 }
224 static void libxl_create_pci_backend_device(libxl__gc *gc, flexarray_t *back, int num, libxl_device_pci *pcidev)
225 {
226 flexarray_append(back, libxl__sprintf(gc, "key-%d", num));
227 flexarray_append(back, libxl__sprintf(gc, PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func));
228 flexarray_append(back, libxl__sprintf(gc, "dev-%d", num));
229 flexarray_append(back, libxl__sprintf(gc, PCI_BDF, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func));
230 if (pcidev->vdevfn)
231 flexarray_vappend(back, libxl__sprintf(gc, "vdevfn-%d", num), libxl__sprintf(gc, "%x", pcidev->vdevfn), NULL);
232 flexarray_append(back, libxl__sprintf(gc, "opts-%d", num));
233 flexarray_append(back, libxl__sprintf(gc, "msitranslate=%d,power_mgmt=%d", pcidev->msitranslate, pcidev->power_mgmt));
234 flexarray_vappend(back, libxl__sprintf(gc, "state-%d", num), libxl__sprintf(gc, "%d", 1), NULL);
235 }
237 static int libxl_create_pci_backend(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev, int num)
238 {
239 libxl_ctx *ctx = libxl__gc_owner(gc);
240 flexarray_t *front = NULL;
241 flexarray_t *back = NULL;
242 libxl__device device;
243 int ret = ERROR_NOMEM, i;
245 front = flexarray_make(16, 1);
246 if (!front)
247 goto out;
248 back = flexarray_make(16, 1);
249 if (!back)
250 goto out;
252 ret = 0;
254 LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Creating pci backend");
256 /* add pci device */
257 device.backend_devid = 0;
258 device.backend_domid = 0;
259 device.backend_kind = DEVICE_PCI;
260 device.devid = 0;
261 device.domid = domid;
262 device.kind = DEVICE_PCI;
264 flexarray_vappend(back, "frontend-id", libxl__sprintf(gc, "%d", domid),
265 "online", "1", "state", libxl__sprintf(gc, "%d", 1),
266 "domain", libxl__domid_to_name(gc, domid), NULL);
268 for (i = 0; i < num; i++, pcidev++)
269 libxl_create_pci_backend_device(gc, back, i, pcidev);
271 flexarray_vappend(back, "num_devs", libxl__sprintf(gc, "%d", num));
273 flexarray_vappend(front,
274 "backend-id", libxl__sprintf(gc, "%d", 0),
275 "state", libxl__sprintf(gc, "%d", 1), NULL);
277 libxl__device_generic_add(ctx, &device,
278 libxl__xs_kvs_of_flexarray(gc, back, back->count),
279 libxl__xs_kvs_of_flexarray(gc, front, front->count));
281 out:
282 if (back)
283 flexarray_free(back);
284 if (front)
285 flexarray_free(front);
286 return 0;
287 }
289 static int libxl_device_pci_add_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev)
290 {
291 libxl_ctx *ctx = libxl__gc_owner(gc);
292 flexarray_t *back;
293 char *num_devs, *be_path;
294 int num = 0;
295 xs_transaction_t t;
297 be_path = libxl__sprintf(gc, "%s/backend/pci/%d/0", libxl__xs_get_dompath(gc, 0), domid);
298 num_devs = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/num_devs", be_path));
299 if (!num_devs)
300 return libxl_create_pci_backend(gc, domid, pcidev, 1);
302 if (!libxl__domain_is_hvm(ctx, domid)) {
303 if (libxl__wait_for_backend(ctx, be_path, "4") < 0)
304 return ERROR_FAIL;
305 }
307 back = flexarray_make(16, 1);
308 if (!back)
309 return ERROR_NOMEM;
311 LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "Adding new pci device to xenstore");
312 num = atoi(num_devs);
313 libxl_create_pci_backend_device(gc, back, num, pcidev);
314 flexarray_vappend(back, "num_devs", libxl__sprintf(gc, "%d", num + 1), NULL);
315 flexarray_vappend(back, "state", libxl__sprintf(gc, "%d", 7), NULL);
317 retry_transaction:
318 t = xs_transaction_start(ctx->xsh);
319 libxl__xs_writev(gc, t, be_path,
320 libxl__xs_kvs_of_flexarray(gc, back, back->count));
321 if (!xs_transaction_end(ctx->xsh, t, 0))
322 if (errno == EAGAIN)
323 goto retry_transaction;
325 flexarray_free(back);
326 return 0;
327 }
329 static int libxl_device_pci_remove_xenstore(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev)
330 {
331 libxl_ctx *ctx = libxl__gc_owner(gc);
332 char *be_path, *num_devs_path, *num_devs, *xsdev, *tmp, *tmppath;
333 int num, i, j;
334 xs_transaction_t t;
335 unsigned int domain = 0, bus = 0, dev = 0, func = 0;
337 be_path = libxl__sprintf(gc, "%s/backend/pci/%d/0", libxl__xs_get_dompath(gc, 0), domid);
338 num_devs_path = libxl__sprintf(gc, "%s/num_devs", be_path);
339 num_devs = libxl__xs_read(gc, XBT_NULL, num_devs_path);
340 if (!num_devs)
341 return ERROR_INVAL;
342 num = atoi(num_devs);
344 if (!libxl__domain_is_hvm(ctx, domid)) {
345 if (libxl__wait_for_backend(ctx, be_path, "4") < 0) {
346 LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "pci backend at %s is not ready", be_path);
347 return ERROR_FAIL;
348 }
349 }
351 for (i = 0; i < num; i++) {
352 xsdev = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/dev-%d", be_path, i));
353 sscanf(xsdev, PCI_BDF, &domain, &bus, &dev, &func);
354 if (domain == pcidev->domain && bus == pcidev->bus &&
355 pcidev->dev == dev && pcidev->func == func) {
356 break;
357 }
358 }
359 if (i == num) {
360 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Couldn't find the device on xenstore");
361 return ERROR_INVAL;
362 }
364 retry_transaction:
365 t = xs_transaction_start(ctx->xsh);
366 xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/state-%d", be_path, i), "5", strlen("5"));
367 xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/state", be_path), "7", strlen("7"));
368 if (!xs_transaction_end(ctx->xsh, t, 0))
369 if (errno == EAGAIN)
370 goto retry_transaction;
372 if (!libxl__domain_is_hvm(ctx, domid)) {
373 if (libxl__wait_for_backend(ctx, be_path, "4") < 0) {
374 LIBXL__LOG(ctx, LIBXL__LOG_DEBUG, "pci backend at %s is not ready", be_path);
375 return ERROR_FAIL;
376 }
377 }
379 retry_transaction2:
380 t = xs_transaction_start(ctx->xsh);
381 xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/state-%d", be_path, i));
382 xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/key-%d", be_path, i));
383 xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/dev-%d", be_path, i));
384 xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/vdev-%d", be_path, i));
385 xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/opts-%d", be_path, i));
386 xs_rm(ctx->xsh, t, libxl__sprintf(gc, "%s/vdevfn-%d", be_path, i));
387 libxl__xs_write(gc, t, num_devs_path, "%d", num - 1);
388 for (j = i + 1; j < num; j++) {
389 tmppath = libxl__sprintf(gc, "%s/state-%d", be_path, j);
390 tmp = libxl__xs_read(gc, t, tmppath);
391 xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/state-%d", be_path, j - 1), tmp, strlen(tmp));
392 xs_rm(ctx->xsh, t, tmppath);
393 tmppath = libxl__sprintf(gc, "%s/dev-%d", be_path, j);
394 tmp = libxl__xs_read(gc, t, tmppath);
395 xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/dev-%d", be_path, j - 1), tmp, strlen(tmp));
396 xs_rm(ctx->xsh, t, tmppath);
397 tmppath = libxl__sprintf(gc, "%s/key-%d", be_path, j);
398 tmp = libxl__xs_read(gc, t, tmppath);
399 xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/key-%d", be_path, j - 1), tmp, strlen(tmp));
400 xs_rm(ctx->xsh, t, tmppath);
401 tmppath = libxl__sprintf(gc, "%s/vdev-%d", be_path, j);
402 tmp = libxl__xs_read(gc, t, tmppath);
403 if (tmp) {
404 xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/vdev-%d", be_path, j - 1), tmp, strlen(tmp));
405 xs_rm(ctx->xsh, t, tmppath);
406 }
407 tmppath = libxl__sprintf(gc, "%s/opts-%d", be_path, j);
408 tmp = libxl__xs_read(gc, t, tmppath);
409 if (tmp) {
410 xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/opts-%d", be_path, j - 1), tmp, strlen(tmp));
411 xs_rm(ctx->xsh, t, tmppath);
412 }
413 tmppath = libxl__sprintf(gc, "%s/vdevfn-%d", be_path, j);
414 tmp = libxl__xs_read(gc, t, tmppath);
415 if (tmp) {
416 xs_write(ctx->xsh, t, libxl__sprintf(gc, "%s/vdevfn-%d", be_path, j - 1), tmp, strlen(tmp));
417 xs_rm(ctx->xsh, t, tmppath);
418 }
419 }
420 if (!xs_transaction_end(ctx->xsh, t, 0))
421 if (errno == EAGAIN)
422 goto retry_transaction2;
424 if (num == 1) {
425 char *fe_path = libxl__xs_read(gc, XBT_NULL, libxl__sprintf(gc, "%s/frontend", be_path));
426 libxl__device_destroy(ctx, be_path, 1);
427 xs_rm(ctx->xsh, XBT_NULL, be_path);
428 xs_rm(ctx->xsh, XBT_NULL, fe_path);
429 return 0;
430 }
432 return 0;
433 }
435 static int get_all_assigned_devices(libxl__gc *gc, libxl_device_pci **list, int *num)
436 {
437 libxl_device_pci *pcidevs = NULL;
438 char **domlist;
439 unsigned int nd = 0, i;
441 *list = NULL;
442 *num = 0;
444 domlist = libxl__xs_directory(gc, XBT_NULL, "/local/domain", &nd);
445 for(i = 0; i < nd; i++) {
446 char *path, *num_devs;
448 path = libxl__sprintf(gc, "/local/domain/0/backend/pci/%s/0/num_devs", domlist[i]);
449 num_devs = libxl__xs_read(gc, XBT_NULL, path);
450 if ( num_devs ) {
451 int ndev = atoi(num_devs), j;
452 char *devpath, *bdf;
454 pcidevs = libxl__calloc(gc, sizeof(*pcidevs), ndev);
455 for(j = (pcidevs) ? 0 : ndev; j < ndev; j++) {
456 devpath = libxl__sprintf(gc, "/local/domain/0/backend/pci/%s/0/dev-%u",
457 domlist[i], j);
458 bdf = libxl__xs_read(gc, XBT_NULL, devpath);
459 if ( bdf ) {
460 unsigned dom, bus, dev, func;
461 if ( sscanf(bdf, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
462 continue;
464 pcidev_init(pcidevs + *num, dom, bus, dev, func, 0);
465 (*num)++;
466 }
467 }
468 }
469 }
471 if ( 0 == *num ) {
472 free(pcidevs);
473 pcidevs = NULL;
474 }else{
475 *list = pcidevs;
476 }
478 return 0;
479 }
481 static int is_assigned(libxl_device_pci *assigned, int num_assigned,
482 int dom, int bus, int dev, int func)
483 {
484 int i;
486 for(i = 0; i < num_assigned; i++) {
487 if ( assigned[i].domain != dom )
488 continue;
489 if ( assigned[i].bus != bus )
490 continue;
491 if ( assigned[i].dev != dev )
492 continue;
493 if ( assigned[i].func != func )
494 continue;
495 return 1;
496 }
498 return 0;
499 }
501 int libxl_device_pci_list_assignable(libxl_ctx *ctx, libxl_device_pci **list, int *num)
502 {
503 libxl__gc gc = LIBXL_INIT_GC(ctx);
504 libxl_device_pci *pcidevs = NULL, *new, *assigned;
505 struct dirent *de;
506 DIR *dir;
507 int rc, num_assigned;
509 *num = 0;
510 *list = NULL;
512 rc = get_all_assigned_devices(&gc, &assigned, &num_assigned);
513 if ( rc ) {
514 libxl__free_all(&gc);
515 return rc;
516 }
518 dir = opendir(SYSFS_PCIBACK_DRIVER);
519 if ( NULL == dir ) {
520 if ( errno == ENOENT ) {
521 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Looks like pciback driver not loaded");
522 }else{
523 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", SYSFS_PCIBACK_DRIVER);
524 }
525 libxl__free_all(&gc);
526 return ERROR_FAIL;
527 }
529 while( (de = readdir(dir)) ) {
530 unsigned dom, bus, dev, func;
531 if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
532 continue;
534 if ( is_assigned(assigned, num_assigned, dom, bus, dev, func) )
535 continue;
537 new = realloc(pcidevs, ((*num) + 1) * sizeof(*new));
538 if ( NULL == new )
539 continue;
541 pcidevs = new;
542 new = pcidevs + *num;
544 memset(new, 0, sizeof(*new));
545 pcidev_init(new, dom, bus, dev, func, 0);
546 (*num)++;
547 }
549 closedir(dir);
550 *list = pcidevs;
551 libxl__free_all(&gc);
552 return 0;
553 }
555 /*
556 * This function checks that all functions of a device are bound to pciback
557 * driver. It also initialises a bit-mask of which function numbers are present
558 * on that device.
559 */
560 static int pci_multifunction_check(libxl__gc *gc, libxl_device_pci *pcidev, unsigned int *func_mask)
561 {
562 libxl_ctx *ctx = libxl__gc_owner(gc);
563 struct dirent *de;
564 DIR *dir;
566 *func_mask = 0;
568 dir = opendir(SYSFS_PCI_DEV);
569 if ( NULL == dir ) {
570 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", SYSFS_PCI_DEV);
571 return -1;
572 }
574 while( (de = readdir(dir)) ) {
575 unsigned dom, bus, dev, func;
576 struct stat st;
577 char *path;
579 if ( sscanf(de->d_name, PCI_BDF, &dom, &bus, &dev, &func) != 4 )
580 continue;
581 if ( pcidev->domain != dom )
582 continue;
583 if ( pcidev->bus != bus )
584 continue;
585 if ( pcidev->dev != dev )
586 continue;
588 path = libxl__sprintf(gc, "%s/" PCI_BDF, SYSFS_PCIBACK_DRIVER, dom, bus, dev, func);
589 if ( lstat(path, &st) ) {
590 if ( errno == ENOENT )
591 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, PCI_BDF " is not assigned to pciback driver",
592 dom, bus, dev, func);
593 else
594 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't lstat %s", path);
595 closedir(dir);
596 return -1;
597 }
598 (*func_mask) |= (1 << func);
599 }
601 closedir(dir);
602 return 0;
603 }
605 static int pci_ins_check(libxl_ctx *ctx, uint32_t domid, const char *state, void *priv)
606 {
607 char *orig_state = priv;
609 if ( !strcmp(state, "pci-insert-failed") )
610 return -1;
611 if ( !strcmp(state, "pci-inserted") )
612 return 0;
613 if ( !strcmp(state, orig_state) )
614 return 1;
616 return 1;
617 }
619 static int do_pci_add(libxl__gc *gc, uint32_t domid, libxl_device_pci *pcidev)
620 {
621 libxl_ctx *ctx = libxl__gc_owner(gc);
622 char *path;
623 char *state, *vdevfn;
624 int rc, hvm;
626 hvm = libxl__domain_is_hvm(ctx, domid);
627 if (hvm) {
628 if (libxl__wait_for_device_model(ctx, domid, "running", NULL, NULL) < 0) {
629 return ERROR_FAIL;
630 }
631 path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
632 state = libxl__xs_read(gc, XBT_NULL, path);
633 path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter", domid);
634 if (pcidev->vdevfn)
635 libxl__xs_write(gc, XBT_NULL, path, PCI_BDF_VDEVFN, pcidev->domain,
636 pcidev->bus, pcidev->dev, pcidev->func, pcidev->vdevfn);
637 else
638 libxl__xs_write(gc, XBT_NULL, path, PCI_BDF, pcidev->domain,
639 pcidev->bus, pcidev->dev, pcidev->func);
640 path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", domid);
641 xs_write(ctx->xsh, XBT_NULL, path, "pci-ins", strlen("pci-ins"));
642 rc = libxl__wait_for_device_model(ctx, domid, NULL, pci_ins_check, state);
643 path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter", domid);
644 vdevfn = libxl__xs_read(gc, XBT_NULL, path);
645 path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
646 if ( rc < 0 )
647 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "qemu refused to add device: %s", vdevfn);
648 else if ( sscanf(vdevfn, "0x%x", &pcidev->vdevfn) != 1 )
649 rc = -1;
650 xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
651 if ( rc )
652 return ERROR_FAIL;
653 } else {
654 char *sysfs_path = libxl__sprintf(gc, SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain,
655 pcidev->bus, pcidev->dev, pcidev->func);
656 FILE *f = fopen(sysfs_path, "r");
657 unsigned long long start = 0, end = 0, flags = 0, size = 0;
658 int irq = 0;
659 int i;
661 if (f == NULL) {
662 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", sysfs_path);
663 return ERROR_FAIL;
664 }
665 for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
666 if (fscanf(f, "0x%llx 0x%llx 0x%llx\n", &start, &end, &flags) != 3)
667 continue;
668 size = end - start + 1;
669 if (start) {
670 if (flags & PCI_BAR_IO) {
671 rc = xc_domain_ioport_permission(ctx->xch, domid, start, size, 1);
672 if (rc < 0) {
673 LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "Error: xc_domain_ioport_permission error 0x%llx/0x%llx", start, size);
674 fclose(f);
675 return ERROR_FAIL;
676 }
677 } else {
678 rc = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
679 (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 1);
680 if (rc < 0) {
681 LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "Error: xc_domain_iomem_permission error 0x%llx/0x%llx", start, size);
682 fclose(f);
683 return ERROR_FAIL;
684 }
685 }
686 }
687 }
688 fclose(f);
689 sysfs_path = libxl__sprintf(gc, SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain,
690 pcidev->bus, pcidev->dev, pcidev->func);
691 f = fopen(sysfs_path, "r");
692 if (f == NULL) {
693 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", sysfs_path);
694 goto out;
695 }
696 if ((fscanf(f, "%u", &irq) == 1) && irq) {
697 rc = xc_physdev_map_pirq(ctx->xch, domid, irq, &irq);
698 if (rc < 0) {
699 LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "Error: xc_physdev_map_pirq irq=%d", irq);
700 fclose(f);
701 return ERROR_FAIL;
702 }
703 rc = xc_domain_irq_permission(ctx->xch, domid, irq, 1);
704 if (rc < 0) {
705 LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "Error: xc_domain_irq_permission irq=%d", irq);
706 fclose(f);
707 return ERROR_FAIL;
708 }
709 }
710 fclose(f);
711 }
712 out:
713 if (!libxl_is_stubdom(ctx, domid, NULL)) {
714 rc = xc_assign_device(ctx->xch, domid, pcidev_value(pcidev));
715 if (rc < 0 && (hvm || errno != ENOSYS)) {
716 LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_assign_device failed");
717 return ERROR_FAIL;
718 }
719 }
721 libxl_device_pci_add_xenstore(gc, domid, pcidev);
722 return 0;
723 }
725 static int libxl_device_pci_reset(libxl__gc *gc, unsigned int domain, unsigned int bus,
726 unsigned int dev, unsigned int func)
727 {
728 libxl_ctx *ctx = libxl__gc_owner(gc);
729 char *reset;
730 int fd, rc;
732 reset = libxl__sprintf(gc, "%s/pciback/do_flr", SYSFS_PCI_DEV);
733 fd = open(reset, O_WRONLY);
734 if (fd > 0) {
735 char *buf = libxl__sprintf(gc, PCI_BDF, domain, bus, dev, func);
736 rc = write(fd, buf, strlen(buf));
737 if (rc < 0)
738 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "write to %s returned %d", reset, rc);
739 close(fd);
740 return rc < 0 ? rc : 0;
741 }
742 if (errno != ENOENT)
743 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Failed to access pciback path %s", reset);
744 reset = libxl__sprintf(gc, "%s/"PCI_BDF"/reset", SYSFS_PCI_DEV, domain, bus, dev, func);
745 fd = open(reset, O_WRONLY);
746 if (fd > 0) {
747 rc = write(fd, "1", 1);
748 if (rc < 0)
749 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "write to %s returned %d", reset, rc);
750 close(fd);
751 return rc < 0 ? rc : 0;
752 }
753 if (errno == ENOENT) {
754 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "The kernel doesn't support reset from sysfs for PCI device "PCI_BDF, domain, bus, dev, func);
755 } else {
756 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Failed to access reset path %s", reset);
757 }
758 return -1;
759 }
761 int libxl_device_pci_add(libxl_ctx *ctx, uint32_t domid, libxl_device_pci *pcidev)
762 {
763 libxl__gc gc = LIBXL_INIT_GC(ctx);
764 unsigned int orig_vdev, pfunc_mask;
765 libxl_device_pci *assigned;
766 int num_assigned, i, rc;
767 int stubdomid = 0;
769 rc = get_all_assigned_devices(&gc, &assigned, &num_assigned);
770 if ( rc ) {
771 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "cannot determine if device is assigned, refusing to continue");
772 goto out;
773 }
774 if ( is_assigned(assigned, num_assigned, pcidev->domain,
775 pcidev->bus, pcidev->dev, pcidev->func) ) {
776 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "PCI device already attached to a domain");
777 rc = ERROR_FAIL;
778 goto out;
779 }
781 libxl_device_pci_reset(&gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
783 stubdomid = libxl_get_stubdom_id(ctx, domid);
784 if (stubdomid != 0) {
785 libxl_device_pci pcidev_s = *pcidev;
786 rc = do_pci_add(&gc, stubdomid, &pcidev_s);
787 if ( rc )
788 goto out;
789 }
791 orig_vdev = pcidev->vdevfn & ~7U;
793 if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
794 if ( !(pcidev->vdevfn >> 3) ) {
795 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Must specify a v-slot for multi-function devices");
796 rc = ERROR_INVAL;
797 goto out;
798 }
799 if ( pci_multifunction_check(&gc, pcidev, &pfunc_mask) ) {
800 rc = ERROR_FAIL;
801 goto out;
802 }
803 pcidev->vfunc_mask &= pfunc_mask;
804 /* so now vfunc_mask == pfunc_mask */
805 }else{
806 pfunc_mask = (1 << pcidev->func);
807 }
809 for(rc = 0, i = 7; i >= 0; --i) {
810 if ( (1 << i) & pfunc_mask ) {
811 if ( pcidev->vfunc_mask == pfunc_mask ) {
812 pcidev->func = i;
813 pcidev->vdevfn = orig_vdev | i;
814 }else{
815 /* if not passing through multiple devices in a block make
816 * sure that virtual function number 0 is always used otherwise
817 * guest won't see the device
818 */
819 pcidev->vdevfn = orig_vdev;
820 }
821 if ( do_pci_add(&gc, domid, pcidev) )
822 rc = ERROR_FAIL;
823 }
824 }
826 out:
827 libxl__free_all(&gc);
828 return rc;
829 }
831 static int do_pci_remove(libxl__gc *gc, uint32_t domid,
832 libxl_device_pci *pcidev, int force)
833 {
834 libxl_ctx *ctx = libxl__gc_owner(gc);
835 libxl_device_pci *assigned;
836 char *path;
837 char *state;
838 int hvm, rc, num;
839 int stubdomid = 0;
841 if ( !libxl_device_pci_list_assigned(ctx, &assigned, domid, &num) ) {
842 if ( !is_assigned(assigned, num, pcidev->domain,
843 pcidev->bus, pcidev->dev, pcidev->func) ) {
844 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "PCI device not attached to this domain");
845 return ERROR_INVAL;
846 }
847 }
849 hvm = libxl__domain_is_hvm(ctx, domid);
850 if (hvm) {
851 if (libxl__wait_for_device_model(ctx, domid, "running", NULL, NULL) < 0) {
852 return ERROR_FAIL;
853 }
854 path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
855 state = libxl__xs_read(gc, XBT_NULL, path);
856 path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/parameter", domid);
857 libxl__xs_write(gc, XBT_NULL, path, PCI_BDF, pcidev->domain,
858 pcidev->bus, pcidev->dev, pcidev->func);
859 path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/command", domid);
861 /* Remove all functions at once atomically by only signalling
862 * device-model for function 0 */
863 if ( (pcidev->vdevfn & 0x7) == 0 ) {
864 xs_write(ctx->xsh, XBT_NULL, path, "pci-rem", strlen("pci-rem"));
865 if (libxl__wait_for_device_model(ctx, domid, "pci-removed", NULL, NULL) < 0) {
866 LIBXL__LOG(ctx, LIBXL__LOG_ERROR, "Device Model didn't respond in time");
867 /* This depends on guest operating system acknowledging the
868 * SCI, if it doesn't respond in time then we may wish to
869 * force the removal.
870 */
871 if ( !force )
872 return ERROR_FAIL;
873 }
874 }
875 path = libxl__sprintf(gc, "/local/domain/0/device-model/%d/state", domid);
876 xs_write(ctx->xsh, XBT_NULL, path, state, strlen(state));
877 } else {
878 char *sysfs_path = libxl__sprintf(gc, SYSFS_PCI_DEV"/"PCI_BDF"/resource", pcidev->domain,
879 pcidev->bus, pcidev->dev, pcidev->func);
880 FILE *f = fopen(sysfs_path, "r");
881 unsigned int start = 0, end = 0, flags = 0, size = 0;
882 int irq = 0;
883 int i;
885 if (f == NULL) {
886 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", sysfs_path);
887 goto skip1;
888 }
889 for (i = 0; i < PROC_PCI_NUM_RESOURCES; i++) {
890 if (fscanf(f, "0x%x 0x%x 0x%x\n", &start, &end, &flags) != 3)
891 continue;
892 size = end - start + 1;
893 if (start) {
894 if (flags & PCI_BAR_IO) {
895 rc = xc_domain_ioport_permission(ctx->xch, domid, start, size, 0);
896 if (rc < 0)
897 LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_ioport_permission error 0x%x/0x%x", start, size);
898 } else {
899 rc = xc_domain_iomem_permission(ctx->xch, domid, start>>XC_PAGE_SHIFT,
900 (size+(XC_PAGE_SIZE-1))>>XC_PAGE_SHIFT, 0);
901 if (rc < 0)
902 LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_iomem_permission error 0x%x/0x%x", start, size);
903 }
904 }
905 }
906 fclose(f);
907 skip1:
908 sysfs_path = libxl__sprintf(gc, SYSFS_PCI_DEV"/"PCI_BDF"/irq", pcidev->domain,
909 pcidev->bus, pcidev->dev, pcidev->func);
910 f = fopen(sysfs_path, "r");
911 if (f == NULL) {
912 LIBXL__LOG_ERRNO(ctx, LIBXL__LOG_ERROR, "Couldn't open %s", sysfs_path);
913 goto out;
914 }
915 if ((fscanf(f, "%u", &irq) == 1) && irq) {
916 rc = xc_physdev_unmap_pirq(ctx->xch, domid, irq);
917 if (rc < 0) {
918 LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_physdev_unmap_pirq irq=%d", irq);
919 }
920 rc = xc_domain_irq_permission(ctx->xch, domid, irq, 0);
921 if (rc < 0) {
922 LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_domain_irq_permission irq=%d", irq);
923 }
924 }
925 fclose(f);
926 }
927 out:
928 /* don't do multiple resets while some functions are still passed through */
929 if ( (pcidev->vdevfn & 0x7) == 0 ) {
930 libxl_device_pci_reset(gc, pcidev->domain, pcidev->bus, pcidev->dev, pcidev->func);
931 }
933 if (!libxl_is_stubdom(ctx, domid, NULL)) {
934 rc = xc_deassign_device(ctx->xch, domid, pcidev_value(pcidev));
935 if (rc < 0 && (hvm || errno != ENOSYS))
936 LIBXL__LOG_ERRNOVAL(ctx, LIBXL__LOG_ERROR, rc, "xc_deassign_device failed");
937 }
939 stubdomid = libxl_get_stubdom_id(ctx, domid);
940 if (stubdomid != 0) {
941 libxl_device_pci pcidev_s = *pcidev;
942 libxl_device_pci_remove(ctx, stubdomid, &pcidev_s, force);
943 }
945 libxl_device_pci_remove_xenstore(gc, domid, pcidev);
947 return 0;
948 }
950 int libxl_device_pci_remove(libxl_ctx *ctx, uint32_t domid,
951 libxl_device_pci *pcidev, int force)
952 {
953 libxl__gc gc = LIBXL_INIT_GC(ctx);
954 unsigned int orig_vdev, pfunc_mask;
955 int i, rc;
957 orig_vdev = pcidev->vdevfn & ~7U;
959 if ( pcidev->vfunc_mask == LIBXL_PCI_FUNC_ALL ) {
960 if ( pci_multifunction_check(&gc, pcidev, &pfunc_mask) ) {
961 rc = ERROR_FAIL;
962 goto out;
963 }
964 pcidev->vfunc_mask &= pfunc_mask;
965 }else{
966 pfunc_mask = (1 << pcidev->func);
967 }
969 for(rc = 0, i = 7; i >= 0; --i) {
970 if ( (1 << i) & pfunc_mask ) {
971 if ( pcidev->vfunc_mask == pfunc_mask ) {
972 pcidev->func = i;
973 pcidev->vdevfn = orig_vdev | i;
974 }else{
975 pcidev->vdevfn = orig_vdev;
976 }
977 if ( do_pci_remove(&gc, domid, pcidev, force) )
978 rc = ERROR_FAIL;
979 }
980 }
982 out:
983 libxl__free_all(&gc);
984 return rc;
985 }
987 int libxl_device_pci_list_assigned(libxl_ctx *ctx, libxl_device_pci **list, uint32_t domid, int *num)
988 {
989 libxl__gc gc = LIBXL_INIT_GC(ctx);
990 char *be_path, *num_devs, *xsdev, *xsvdevfn, *xsopts;
991 int n, i;
992 unsigned int domain = 0, bus = 0, dev = 0, func = 0, vdevfn = 0;
993 libxl_device_pci *pcidevs;
995 be_path = libxl__sprintf(&gc, "%s/backend/pci/%d/0", libxl__xs_get_dompath(&gc, 0), domid);
996 num_devs = libxl__xs_read(&gc, XBT_NULL, libxl__sprintf(&gc, "%s/num_devs", be_path));
997 if (!num_devs) {
998 *num = 0;
999 *list = NULL;
1000 libxl__free_all(&gc);
1001 return 0;
1003 n = atoi(num_devs);
1004 pcidevs = calloc(n, sizeof(libxl_device_pci));
1005 *num = n;
1007 for (i = 0; i < n; i++) {
1008 xsdev = libxl__xs_read(&gc, XBT_NULL, libxl__sprintf(&gc, "%s/dev-%d", be_path, i));
1009 sscanf(xsdev, PCI_BDF, &domain, &bus, &dev, &func);
1010 xsvdevfn = libxl__xs_read(&gc, XBT_NULL, libxl__sprintf(&gc, "%s/vdevfn-%d", be_path, i));
1011 if (xsvdevfn)
1012 vdevfn = strtol(xsvdevfn, (char **) NULL, 16);
1013 pcidev_init(pcidevs + i, domain, bus, dev, func, vdevfn);
1014 xsopts = libxl__xs_read(&gc, XBT_NULL, libxl__sprintf(&gc, "%s/opts-%d", be_path, i));
1015 if (xsopts) {
1016 char *saveptr;
1017 char *p = strtok_r(xsopts, ",=", &saveptr);
1018 do {
1019 while (*p == ' ')
1020 p++;
1021 if (!strcmp(p, "msitranslate")) {
1022 p = strtok_r(NULL, ",=", &saveptr);
1023 pcidevs[i].msitranslate = atoi(p);
1024 } else if (!strcmp(p, "power_mgmt")) {
1025 p = strtok_r(NULL, ",=", &saveptr);
1026 pcidevs[i].power_mgmt = atoi(p);
1028 } while ((p = strtok_r(NULL, ",=", &saveptr)) != NULL);
1031 if ( *num )
1032 *list = pcidevs;
1033 libxl__free_all(&gc);
1034 return 0;
1037 int libxl_device_pci_shutdown(libxl_ctx *ctx, uint32_t domid)
1039 libxl_device_pci *pcidevs;
1040 int num, i, rc;
1042 rc = libxl_device_pci_list_assigned(ctx, &pcidevs, domid, &num);
1043 if ( rc )
1044 return rc;
1045 for (i = 0; i < num; i++) {
1046 /* Force remove on shutdown since, on HVM, qemu will not always
1047 * respond to SCI interrupt because the guest kernel has shut down the
1048 * devices by the time we even get here!
1049 */
1050 if (libxl_device_pci_remove(ctx, domid, pcidevs + i, 1) < 0)
1051 return ERROR_FAIL;
1053 free(pcidevs);
1054 return 0;