debuggers.hg

view tools/blktap2/drivers/tapdisk-vbd.c @ 22848:6341fe0f4e5a

Added tag 4.1.0-rc2 for changeset 9dca60d88c63
author Keir Fraser <keir@xen.org>
date Tue Jan 25 14:06:55 2011 +0000 (2011-01-25)
parents feee0abed6aa
children
line source
1 /*
2 * Copyright (c) 2008, XenSource Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of XenSource Inc. nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28 #include <stdio.h>
29 #include <errno.h>
30 #include <fcntl.h>
31 #include <regex.h>
32 #include <unistd.h>
33 #include <stdlib.h>
34 #include <libgen.h>
35 #include <sys/mman.h>
36 #include <sys/ioctl.h>
37 #ifdef MEMSHR
38 #include <memshr.h>
39 #endif
41 #include "tapdisk-image.h"
42 #include "tapdisk-driver.h"
43 #include "tapdisk-server.h"
44 #include "tapdisk-interface.h"
45 #include "tapdisk-disktype.h"
46 #include "tapdisk-vbd.h"
47 #include "blktap2.h"
49 #define DBG(_level, _f, _a...) tlog_write(_level, _f, ##_a)
50 #define ERR(_err, _f, _a...) tlog_error(_err, _f, ##_a)
52 #if 1
53 #define ASSERT(p) \
54 do { \
55 if (!(p)) { \
56 DPRINTF("Assertion '%s' failed, line %d, " \
57 "file %s", #p, __LINE__, __FILE__); \
58 *(int*)0 = 0; \
59 } \
60 } while (0)
61 #else
62 #define ASSERT(p) ((void)0)
63 #endif
66 #define TD_VBD_EIO_RETRIES 10
67 #define TD_VBD_EIO_SLEEP 1
68 #define TD_VBD_WATCHDOG_TIMEOUT 10
70 static void tapdisk_vbd_ring_event(event_id_t, char, void *);
71 static void tapdisk_vbd_callback(void *, blkif_response_t *);
73 /*
74 * initialization
75 */
77 static inline void
78 tapdisk_vbd_initialize_vreq(td_vbd_request_t *vreq)
79 {
80 memset(vreq, 0, sizeof(td_vbd_request_t));
81 INIT_LIST_HEAD(&vreq->next);
82 }
84 void
85 tapdisk_vbd_free(td_vbd_t *vbd)
86 {
87 if (vbd) {
88 tapdisk_vbd_free_stack(vbd);
89 list_del_init(&vbd->next);
90 free(vbd->name);
91 free(vbd);
92 }
93 }
95 td_vbd_t*
96 tapdisk_vbd_create(uint16_t uuid)
97 {
98 td_vbd_t *vbd;
99 int i;
101 vbd = calloc(1, sizeof(td_vbd_t));
102 if (!vbd) {
103 EPRINTF("failed to allocate tapdisk state\n");
104 return NULL;
105 }
107 vbd->uuid = uuid;
108 vbd->minor = -1;
109 vbd->ring.fd = -1;
111 /* default blktap ring completion */
112 vbd->callback = tapdisk_vbd_callback;
113 vbd->argument = vbd;
115 #ifdef MEMSHR
116 memshr_vbd_initialize();
117 #endif
119 INIT_LIST_HEAD(&vbd->driver_stack);
120 INIT_LIST_HEAD(&vbd->images);
121 INIT_LIST_HEAD(&vbd->new_requests);
122 INIT_LIST_HEAD(&vbd->pending_requests);
123 INIT_LIST_HEAD(&vbd->failed_requests);
124 INIT_LIST_HEAD(&vbd->completed_requests);
125 INIT_LIST_HEAD(&vbd->next);
126 gettimeofday(&vbd->ts, NULL);
128 for (i = 0; i < MAX_REQUESTS; i++)
129 tapdisk_vbd_initialize_vreq(vbd->request_list + i);
131 return vbd;
132 }
134 int
135 tapdisk_vbd_initialize(uint16_t uuid)
136 {
137 td_vbd_t *vbd;
139 vbd = tapdisk_server_get_vbd(uuid);
140 if (vbd) {
141 EPRINTF("duplicate vbds! %u\n", uuid);
142 return -EEXIST;
143 }
145 vbd = tapdisk_vbd_create(uuid);
147 tapdisk_server_add_vbd(vbd);
149 return 0;
150 }
152 void
153 tapdisk_vbd_set_callback(td_vbd_t *vbd, td_vbd_cb_t callback, void *argument)
154 {
155 vbd->callback = callback;
156 vbd->argument = argument;
157 }
159 static int
160 tapdisk_vbd_validate_chain(td_vbd_t *vbd)
161 {
162 int err;
163 td_image_t *image, *parent, *tmp;
165 DPRINTF("VBD CHAIN:\n");
167 tapdisk_vbd_for_each_image(vbd, image, tmp) {
168 DPRINTF("%s: %d\n", image->name, image->type);
170 if (tapdisk_vbd_is_last_image(vbd, image))
171 break;
173 parent = tapdisk_vbd_next_image(image);
174 err = td_validate_parent(image, parent);
175 if (err)
176 return err;
177 }
179 return 0;
180 }
182 void
183 tapdisk_vbd_close_vdi(td_vbd_t *vbd)
184 {
185 td_image_t *image, *tmp;
187 tapdisk_vbd_for_each_image(vbd, image, tmp) {
188 td_close(image);
189 tapdisk_image_free(image);
190 }
192 INIT_LIST_HEAD(&vbd->images);
193 td_flag_set(vbd->state, TD_VBD_CLOSED);
195 tapdisk_vbd_free_stack(vbd);
196 }
198 static int
199 tapdisk_vbd_add_block_cache(td_vbd_t *vbd)
200 {
201 int err;
202 td_driver_t *driver;
203 td_image_t *cache, *image, *target, *tmp;
205 target = NULL;
207 tapdisk_vbd_for_each_image(vbd, image, tmp)
208 if (td_flag_test(image->flags, TD_OPEN_RDONLY) &&
209 td_flag_test(image->flags, TD_OPEN_SHAREABLE)) {
210 target = image;
211 break;
212 }
214 if (!target)
215 return 0;
217 cache = tapdisk_image_allocate(target->name,
218 DISK_TYPE_BLOCK_CACHE,
219 target->storage,
220 target->flags,
221 target->private);
222 if (!cache)
223 return -ENOMEM;
225 /* try to load existing cache */
226 err = td_load(cache);
227 if (!err)
228 goto done;
230 /* hack driver to send open() correct image size */
231 if (!target->driver) {
232 err = -ENODEV;
233 goto fail;
234 }
236 cache->driver = tapdisk_driver_allocate(cache->type,
237 cache->name,
238 cache->flags,
239 cache->storage);
240 if (!cache->driver) {
241 err = -ENOMEM;
242 goto fail;
243 }
245 cache->driver->info = target->driver->info;
247 /* try to open new cache */
248 err = td_open(cache);
249 if (!err)
250 goto done;
252 fail:
253 /* give up */
254 tapdisk_image_free(target);
255 return err;
257 done:
258 /* insert cache before image */
259 list_add(&cache->next, target->next.prev);
260 return 0;
261 }
263 static int
264 tapdisk_vbd_add_dirty_log(td_vbd_t *vbd)
265 {
266 int err;
267 td_driver_t *driver;
268 td_image_t *log, *parent;
270 driver = NULL;
271 log = NULL;
273 parent = tapdisk_vbd_first_image(vbd);
275 log = tapdisk_image_allocate(parent->name,
276 DISK_TYPE_LOG,
277 parent->storage,
278 parent->flags,
279 vbd);
280 if (!log)
281 return -ENOMEM;
283 driver = tapdisk_driver_allocate(log->type,
284 log->name,
285 log->flags,
286 log->storage);
287 if (!driver) {
288 err = -ENOMEM;
289 goto fail;
290 }
292 driver->info = parent->driver->info;
293 log->driver = driver;
295 err = td_open(log);
296 if (err)
297 goto fail;
299 list_add(&log->next, &vbd->images);
300 return 0;
302 fail:
303 tapdisk_image_free(log);
304 return err;
305 }
307 static int
308 tapdisk_vbd_open_level(td_vbd_t *vbd, struct list_head *head,
309 const char *params, int driver_type,
310 td_disk_info_t *driver_info, td_flag_t flags)
311 {
312 const char *name;
313 int type, err;
314 td_image_t *image;
315 td_disk_id_t id;
316 td_driver_t *driver;
318 name = params;
319 id.name = NULL;
320 type = driver_type;
321 INIT_LIST_HEAD(head);
323 for (;;) {
324 err = -ENOMEM;
325 image = tapdisk_image_allocate(name, type,
326 vbd->storage, flags, vbd);
328 free(id.name);
330 if (!image)
331 goto out;
334 /* this breaks if a driver modifies its info within a layer */
335 err = __td_open(image, driver_info);
336 if (err)
337 goto out;
339 /* TODO: non-sink drivers that don't care about their child
340 * currently return EINVAL. Could return TD_PARENT_OK or
341 * TD_ANY_PARENT */
343 err = td_get_parent_id(image, &id);
344 if (err && (err != TD_NO_PARENT && err != -EINVAL)) {
345 td_close(image);
346 goto out;
347 }
349 /* add this image to the end of the list */
350 list_add_tail(&image->next, head);
351 image = NULL;
353 /* if the image does not have a parent we return the
354 * list of images generated by this level of the stack */
355 if (err == TD_NO_PARENT || err == -EINVAL) {
356 err = 0;
357 goto out;
358 }
360 name = id.name;
361 type = id.drivertype;
363 flags |= (TD_OPEN_RDONLY | TD_OPEN_SHAREABLE);
364 }
366 out:
367 if (err) {
368 if (image) {
369 td_close(image);
370 tapdisk_image_free(image);
371 }
372 while (!list_empty(head)) {
373 image = list_entry(&head->next, td_image_t, next);
374 td_close(image);
375 tapdisk_image_free(image);
376 }
377 }
379 return err;
380 }
382 static int
383 __tapdisk_vbd_open_vdi(td_vbd_t *vbd, td_flag_t extra_flags)
384 {
385 int err;
386 td_flag_t flags;
387 td_image_t *tmp;
388 td_vbd_driver_info_t *driver_info;
389 struct list_head *images;
390 td_disk_info_t *parent_info = NULL;
392 if (list_empty(&vbd->driver_stack))
393 return -ENOENT;
395 flags = (vbd->flags & ~TD_OPEN_SHAREABLE) | extra_flags;
397 /* loop on each user specified driver.
398 * NOTE: driver_info is in reverse order. That is, the first
399 * item is the 'parent' or 'sink' driver */
400 list_for_each_entry(driver_info, &vbd->driver_stack, next) {
401 LIST_HEAD(images);
403 err = tapdisk_vbd_open_level(vbd, &images,
404 driver_info->params,
405 driver_info->type,
406 parent_info, flags);
407 if (err)
408 goto fail;
410 /* after each loop,
411 * append the created stack to the result stack */
412 list_splice(&images, &vbd->images);
414 /* set the parent_info to the first diskinfo on the stack */
415 tmp = tapdisk_vbd_first_image(vbd);
416 parent_info = &tmp->info;
417 }
419 if (td_flag_test(vbd->flags, TD_OPEN_LOG_DIRTY)) {
420 err = tapdisk_vbd_add_dirty_log(vbd);
421 if (err)
422 goto fail;
423 }
425 if (td_flag_test(vbd->flags, TD_OPEN_ADD_CACHE)) {
426 err = tapdisk_vbd_add_block_cache(vbd);
427 if (err)
428 goto fail;
429 }
431 err = tapdisk_vbd_validate_chain(vbd);
432 if (err)
433 goto fail;
435 td_flag_clear(vbd->state, TD_VBD_CLOSED);
437 return 0;
439 fail:
440 tapdisk_vbd_close_vdi(vbd);
441 return err;
442 }
444 /* this populates a vbd type based on path */
445 int
446 tapdisk_vbd_parse_stack(td_vbd_t *vbd, const char *path)
447 {
448 int err;
449 char *params, *driver_str;
450 td_vbd_driver_info_t *driver;
452 err = tapdisk_namedup(&params, path);
453 if (err)
454 return err;
456 /* tokenize params based on pipe '|' */
457 driver_str = strtok(params, "|");
458 while (driver_str != NULL) {
459 const char *path;
460 int type;
462 /* parse driver info and add to vbd */
463 driver = calloc(1, sizeof(td_vbd_driver_info_t));
464 if (!driver) {
465 PERROR("malloc");
466 err = -errno;
467 goto out;
468 }
469 INIT_LIST_HEAD(&driver->next);
471 err = tapdisk_parse_disk_type(driver_str, &path, &type);
472 if (err) {
473 free(driver);
474 goto out;
475 }
477 driver->type = type;
478 driver->params = strdup(path);
479 if (!driver->params) {
480 err = -ENOMEM;
481 free(driver);
482 goto out;
483 }
485 /* build the list backwards as the last driver will be the
486 * first driver to open in the stack */
487 list_add(&driver->next, &vbd->driver_stack);
489 /* get next driver string */
490 driver_str = strtok(NULL, "|");
491 }
493 out:
494 free(params);
495 if (err)
496 tapdisk_vbd_free_stack(vbd);
498 return err;
499 }
501 void
502 tapdisk_vbd_free_stack(td_vbd_t *vbd)
503 {
504 td_vbd_driver_info_t *driver;
506 while (!list_empty(&vbd->driver_stack)) {
507 driver = list_entry(vbd->driver_stack.next,
508 td_vbd_driver_info_t, next);
509 list_del(&driver->next);
510 free(driver->params);
511 free(driver);
512 }
513 }
515 /* NOTE: driver type, etc. must be set */
516 int
517 tapdisk_vbd_open_stack(td_vbd_t *vbd, uint16_t storage, td_flag_t flags)
518 {
519 int i, err;
521 vbd->flags = flags;
522 vbd->storage = storage;
524 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
525 err = __tapdisk_vbd_open_vdi(vbd, 0);
526 if (err != -EIO)
527 break;
529 sleep(TD_VBD_EIO_SLEEP);
530 }
531 if (err)
532 goto fail;
534 return 0;
536 fail:
537 return err;
538 }
540 int
541 tapdisk_vbd_open_vdi(td_vbd_t *vbd, const char *path,
542 uint16_t drivertype, uint16_t storage, td_flag_t flags)
543 {
544 int i, err;
545 const struct tap_disk *ops;
547 ops = tapdisk_disk_drivers[drivertype];
548 if (!ops)
549 return -EINVAL;
550 DPRINTF("Loaded %s driver for vbd %u %s 0x%08x\n",
551 ops->disk_type, vbd->uuid, path, flags);
553 err = tapdisk_namedup(&vbd->name, path);
554 if (err)
555 return err;
557 vbd->flags = flags;
558 vbd->storage = storage;
560 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
561 err = __tapdisk_vbd_open_vdi(vbd, 0);
562 if (err != -EIO)
563 break;
565 sleep(TD_VBD_EIO_SLEEP);
566 }
567 if (err)
568 goto fail;
570 return 0;
572 fail:
573 free(vbd->name);
574 vbd->name = NULL;
575 return err;
576 }
578 static int
579 tapdisk_vbd_register_event_watches(td_vbd_t *vbd)
580 {
581 event_id_t id;
583 id = tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
584 vbd->ring.fd, 0,
585 tapdisk_vbd_ring_event, vbd);
586 if (id < 0)
587 return id;
589 vbd->ring_event_id = id;
591 return 0;
592 }
594 static void
595 tapdisk_vbd_unregister_events(td_vbd_t *vbd)
596 {
597 if (vbd->ring_event_id)
598 tapdisk_server_unregister_event(vbd->ring_event_id);
599 }
601 static int
602 tapdisk_vbd_map_device(td_vbd_t *vbd, const char *devname)
603 {
605 int err, psize;
606 td_ring_t *ring;
608 ring = &vbd->ring;
609 psize = getpagesize();
611 ring->fd = open(devname, O_RDWR);
612 if (ring->fd == -1) {
613 err = -errno;
614 EPRINTF("failed to open %s: %d\n", devname, err);
615 goto fail;
616 }
618 ring->mem = mmap(0, psize * BLKTAP_MMAP_REGION_SIZE,
619 PROT_READ | PROT_WRITE, MAP_SHARED, ring->fd, 0);
620 if (ring->mem == MAP_FAILED) {
621 err = -errno;
622 EPRINTF("failed to mmap %s: %d\n", devname, err);
623 goto fail;
624 }
626 ring->sring = (blkif_sring_t *)((unsigned long)ring->mem);
627 BACK_RING_INIT(&ring->fe_ring, ring->sring, psize);
629 ring->vstart =
630 (unsigned long)ring->mem + (BLKTAP_RING_PAGES * psize);
632 ioctl(ring->fd, BLKTAP_IOCTL_SETMODE, BLKTAP_MODE_INTERPOSE);
634 return 0;
636 fail:
637 if (ring->mem && ring->mem != MAP_FAILED)
638 munmap(ring->mem, psize * BLKTAP_MMAP_REGION_SIZE);
639 if (ring->fd != -1)
640 close(ring->fd);
641 ring->fd = -1;
642 ring->mem = NULL;
643 return err;
644 }
646 static int
647 tapdisk_vbd_unmap_device(td_vbd_t *vbd)
648 {
649 int psize;
651 psize = getpagesize();
653 if (vbd->ring.fd != -1)
654 close(vbd->ring.fd);
655 if (vbd->ring.mem > 0)
656 munmap(vbd->ring.mem, psize * BLKTAP_MMAP_REGION_SIZE);
658 return 0;
659 }
661 void
662 tapdisk_vbd_detach(td_vbd_t *vbd)
663 {
664 tapdisk_vbd_unregister_events(vbd);
666 tapdisk_vbd_unmap_device(vbd);
667 vbd->minor = -1;
668 }
671 int
672 tapdisk_vbd_attach(td_vbd_t *vbd, const char *devname, int minor)
673 {
674 int err;
676 err = tapdisk_vbd_map_device(vbd, devname);
677 if (err)
678 goto fail;
680 err = tapdisk_vbd_register_event_watches(vbd);
681 if (err)
682 goto fail;
684 vbd->minor = minor;
686 return 0;
688 fail:
689 tapdisk_vbd_detach(vbd);
691 return err;
692 }
694 int
695 tapdisk_vbd_open(td_vbd_t *vbd, const char *name, uint16_t type,
696 uint16_t storage, int minor, const char *ring, td_flag_t flags)
697 {
698 int err;
700 err = tapdisk_vbd_open_stack(vbd, storage, flags);
701 if (err)
702 goto out;
704 err = tapdisk_vbd_attach(vbd, ring, minor);
705 if (err)
706 goto out;
708 return 0;
710 out:
711 tapdisk_vbd_detach(vbd);
712 tapdisk_vbd_close_vdi(vbd);
713 free(vbd->name);
714 vbd->name = NULL;
715 return err;
716 }
718 static void
719 tapdisk_vbd_queue_count(td_vbd_t *vbd, int *new,
720 int *pending, int *failed, int *completed)
721 {
722 int n, p, f, c;
723 td_vbd_request_t *vreq, *tvreq;
725 n = 0;
726 p = 0;
727 f = 0;
728 c = 0;
730 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->new_requests)
731 n++;
733 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->pending_requests)
734 p++;
736 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->failed_requests)
737 f++;
739 tapdisk_vbd_for_each_request(vreq, tvreq, &vbd->completed_requests)
740 c++;
742 *new = n;
743 *pending = p;
744 *failed = f;
745 *completed = c;
746 }
748 static int
749 tapdisk_vbd_shutdown(td_vbd_t *vbd)
750 {
751 int new, pending, failed, completed;
753 if (!list_empty(&vbd->pending_requests))
754 return -EAGAIN;
756 tapdisk_vbd_kick(vbd);
757 tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed);
759 DPRINTF("%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, "
760 "failed: 0x%02x, completed: 0x%02x\n",
761 vbd->name, vbd->state, new, pending, failed, completed);
762 DPRINTF("last activity: %010ld.%06lld, errors: 0x%04"PRIx64", "
763 "retries: 0x%04"PRIx64", received: 0x%08"PRIx64", "
764 "returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64"\n",
765 vbd->ts.tv_sec, (unsigned long long)vbd->ts.tv_usec,
766 vbd->errors, vbd->retries, vbd->received, vbd->returned,
767 vbd->kicked);
769 tapdisk_vbd_close_vdi(vbd);
770 tapdisk_vbd_detach(vbd);
771 tapdisk_server_remove_vbd(vbd);
772 tapdisk_vbd_free(vbd);
774 tlog_print_errors();
776 return 0;
777 }
779 int
780 tapdisk_vbd_close(td_vbd_t *vbd)
781 {
782 /*
783 * don't close if any requests are pending in the aio layer
784 */
785 if (!list_empty(&vbd->pending_requests))
786 goto fail;
788 /*
789 * if the queue is still active and we have more
790 * requests, try to complete them before closing.
791 */
792 if (tapdisk_vbd_queue_ready(vbd) &&
793 (!list_empty(&vbd->new_requests) ||
794 !list_empty(&vbd->failed_requests) ||
795 !list_empty(&vbd->completed_requests)))
796 goto fail;
798 return tapdisk_vbd_shutdown(vbd);
800 fail:
801 td_flag_set(vbd->state, TD_VBD_SHUTDOWN_REQUESTED);
802 DBG(TLOG_WARN, "%s: requests pending\n", vbd->name);
803 return -EAGAIN;
804 }
806 /*
807 * control operations
808 */
810 void
811 tapdisk_vbd_debug(td_vbd_t *vbd)
812 {
813 td_image_t *image, *tmp;
814 int new, pending, failed, completed;
816 tapdisk_vbd_queue_count(vbd, &new, &pending, &failed, &completed);
818 DBG(TLOG_WARN, "%s: state: 0x%08x, new: 0x%02x, pending: 0x%02x, "
819 "failed: 0x%02x, completed: 0x%02x, last activity: %010ld.%06lld, "
820 "errors: 0x%04"PRIx64", retries: 0x%04"PRIx64", received: 0x%08"PRIx64", "
821 "returned: 0x%08"PRIx64", kicked: 0x%08"PRIx64"\n",
822 vbd->name, vbd->state, new, pending, failed, completed,
823 vbd->ts.tv_sec, (unsigned long long)vbd->ts.tv_usec,
824 vbd->errors, vbd->retries,
825 vbd->received, vbd->returned, vbd->kicked);
827 tapdisk_vbd_for_each_image(vbd, image, tmp)
828 td_debug(image);
829 }
831 static void
832 tapdisk_vbd_drop_log(td_vbd_t *vbd)
833 {
834 if (td_flag_test(vbd->state, TD_VBD_LOG_DROPPED))
835 return;
837 tapdisk_vbd_debug(vbd);
838 tlog_flush();
839 td_flag_set(vbd->state, TD_VBD_LOG_DROPPED);
840 }
842 int
843 tapdisk_vbd_get_image_info(td_vbd_t *vbd, image_t *img)
844 {
845 td_image_t *image;
847 memset(img, 0, sizeof(image_t));
849 if (list_empty(&vbd->images))
850 return -EINVAL;
852 image = tapdisk_vbd_first_image(vbd);
853 img->size = image->info.size;
854 img->secsize = image->info.sector_size;
855 img->info = image->info.info;
857 return 0;
858 }
860 int
861 tapdisk_vbd_queue_ready(td_vbd_t *vbd)
862 {
863 return (!td_flag_test(vbd->state, TD_VBD_DEAD) &&
864 !td_flag_test(vbd->state, TD_VBD_CLOSED) &&
865 !td_flag_test(vbd->state, TD_VBD_QUIESCED) &&
866 !td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED));
867 }
869 int
870 tapdisk_vbd_retry_needed(td_vbd_t *vbd)
871 {
872 return td_flag_test(vbd->state, TD_VBD_RETRY_NEEDED);
873 }
875 int
876 tapdisk_vbd_lock(td_vbd_t *vbd)
877 {
878 return 0;
879 }
881 int
882 tapdisk_vbd_quiesce_queue(td_vbd_t *vbd)
883 {
884 if (!list_empty(&vbd->pending_requests)) {
885 td_flag_set(vbd->state, TD_VBD_QUIESCE_REQUESTED);
886 return -EAGAIN;
887 }
889 td_flag_clear(vbd->state, TD_VBD_QUIESCE_REQUESTED);
890 td_flag_set(vbd->state, TD_VBD_QUIESCED);
891 return 0;
892 }
894 int
895 tapdisk_vbd_start_queue(td_vbd_t *vbd)
896 {
897 td_flag_clear(vbd->state, TD_VBD_QUIESCED);
898 td_flag_clear(vbd->state, TD_VBD_QUIESCE_REQUESTED);
899 return 0;
900 }
902 int
903 tapdisk_vbd_kill_queue(td_vbd_t *vbd)
904 {
905 tapdisk_vbd_quiesce_queue(vbd);
906 td_flag_set(vbd->state, TD_VBD_DEAD);
907 return 0;
908 }
910 static int
911 tapdisk_vbd_open_image(td_vbd_t *vbd, td_image_t *image)
912 {
913 int err;
914 td_image_t *parent;
916 err = td_open(image);
917 if (err)
918 return err;
920 if (!tapdisk_vbd_is_last_image(vbd, image)) {
921 parent = tapdisk_vbd_next_image(image);
922 err = td_validate_parent(image, parent);
923 if (err) {
924 td_close(image);
925 return err;
926 }
927 }
929 return 0;
930 }
932 static int
933 tapdisk_vbd_close_and_reopen_image(td_vbd_t *vbd, td_image_t *image)
934 {
935 int i, err;
937 td_close(image);
939 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
940 err = tapdisk_vbd_open_image(vbd, image);
941 if (err != -EIO)
942 break;
944 sleep(TD_VBD_EIO_SLEEP);
945 }
947 if (err)
948 td_flag_set(vbd->state, TD_VBD_CLOSED);
950 return err;
951 }
953 int
954 tapdisk_vbd_pause(td_vbd_t *vbd)
955 {
956 int err;
958 td_flag_set(vbd->state, TD_VBD_PAUSE_REQUESTED);
960 err = tapdisk_vbd_quiesce_queue(vbd);
961 if (err)
962 return err;
964 tapdisk_vbd_close_vdi(vbd);
966 td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED);
967 td_flag_set(vbd->state, TD_VBD_PAUSED);
969 return 0;
970 }
972 int
973 tapdisk_vbd_resume(td_vbd_t *vbd, const char *path, uint16_t drivertype)
974 {
975 int i, err;
977 if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) {
978 EPRINTF("resume request for unpaused vbd %s\n", vbd->name);
979 return -EINVAL;
980 }
982 if (path) {
983 free(vbd->name);
984 vbd->name = strdup(path);
985 if (!vbd->name) {
986 EPRINTF("copying new vbd %s name failed\n", path);
987 return -EINVAL;
988 }
989 }
991 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
992 err = __tapdisk_vbd_open_vdi(vbd, TD_OPEN_STRICT);
993 if (err != -EIO)
994 break;
996 sleep(TD_VBD_EIO_SLEEP);
997 }
999 if (err)
1000 return err;
1002 tapdisk_vbd_start_queue(vbd);
1003 td_flag_clear(vbd->state, TD_VBD_PAUSED);
1004 td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED);
1005 tapdisk_vbd_check_state(vbd);
1007 return 0;
1010 int
1011 tapdisk_vbd_kick(td_vbd_t *vbd)
1013 int n;
1014 td_ring_t *ring;
1016 tapdisk_vbd_check_state(vbd);
1018 ring = &vbd->ring;
1019 if (!ring->sring)
1020 return 0;
1022 n = (ring->fe_ring.rsp_prod_pvt - ring->fe_ring.sring->rsp_prod);
1023 if (!n)
1024 return 0;
1026 vbd->kicked += n;
1027 RING_PUSH_RESPONSES(&ring->fe_ring);
1028 ioctl(ring->fd, BLKTAP_IOCTL_KICK_FE, 0);
1030 DBG(TLOG_INFO, "kicking %d: rec: 0x%08"PRIx64", ret: 0x%08"PRIx64", kicked: "
1031 "0x%08"PRIx64"\n", n, vbd->received, vbd->returned, vbd->kicked);
1033 return n;
1036 static inline void
1037 tapdisk_vbd_write_response_to_ring(td_vbd_t *vbd, blkif_response_t *rsp)
1039 td_ring_t *ring;
1040 blkif_response_t *rspp;
1042 ring = &vbd->ring;
1043 rspp = RING_GET_RESPONSE(&ring->fe_ring, ring->fe_ring.rsp_prod_pvt);
1044 memcpy(rspp, rsp, sizeof(blkif_response_t));
1045 ring->fe_ring.rsp_prod_pvt++;
1048 static void
1049 tapdisk_vbd_callback(void *arg, blkif_response_t *rsp)
1051 td_vbd_t *vbd = (td_vbd_t *)arg;
1052 tapdisk_vbd_write_response_to_ring(vbd, rsp);
1055 static void
1056 tapdisk_vbd_make_response(td_vbd_t *vbd, td_vbd_request_t *vreq)
1058 blkif_request_t tmp;
1059 blkif_response_t *rsp;
1061 tmp = vreq->req;
1062 rsp = (blkif_response_t *)&vreq->req;
1064 rsp->id = tmp.id;
1065 rsp->operation = tmp.operation;
1066 rsp->status = vreq->status;
1068 DBG(TLOG_DBG, "writing req %d, sec 0x%08"PRIx64", res %d to ring\n",
1069 (int)tmp.id, tmp.sector_number, vreq->status);
1071 if (rsp->status != BLKIF_RSP_OKAY)
1072 ERR(EIO, "returning BLKIF_RSP %d", rsp->status);
1074 vbd->returned++;
1075 vbd->callback(vbd->argument, rsp);
1078 void
1079 tapdisk_vbd_check_state(td_vbd_t *vbd)
1081 td_vbd_request_t *vreq, *tmp;
1083 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests)
1084 if (vreq->num_retries >= TD_VBD_MAX_RETRIES)
1085 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1087 if (!list_empty(&vbd->new_requests) ||
1088 !list_empty(&vbd->failed_requests))
1089 tapdisk_vbd_issue_requests(vbd);
1091 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->completed_requests) {
1092 tapdisk_vbd_make_response(vbd, vreq);
1093 list_del(&vreq->next);
1094 tapdisk_vbd_initialize_vreq(vreq);
1097 if (td_flag_test(vbd->state, TD_VBD_QUIESCE_REQUESTED))
1098 tapdisk_vbd_quiesce_queue(vbd);
1100 if (td_flag_test(vbd->state, TD_VBD_PAUSE_REQUESTED))
1101 tapdisk_vbd_pause(vbd);
1103 if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
1104 tapdisk_vbd_close(vbd);
1107 void
1108 tapdisk_vbd_check_progress(td_vbd_t *vbd)
1110 int diff;
1111 struct timeval now;
1113 if (list_empty(&vbd->pending_requests))
1114 return;
1116 gettimeofday(&now, NULL);
1117 diff = now.tv_sec - vbd->ts.tv_sec;
1119 if (diff >= TD_VBD_WATCHDOG_TIMEOUT) {
1120 DBG(TLOG_WARN, "%s: watchdog timeout: pending requests "
1121 "idle for %d seconds\n", vbd->name, diff);
1122 tapdisk_vbd_drop_log(vbd);
1123 return;
1126 tapdisk_server_set_max_timeout(TD_VBD_WATCHDOG_TIMEOUT - diff);
1129 /*
1130 * request submission
1131 */
1133 static int
1134 tapdisk_vbd_check_queue(td_vbd_t *vbd)
1136 int err;
1137 td_image_t *image;
1139 if (list_empty(&vbd->images))
1140 return -ENOSYS;
1142 if (!tapdisk_vbd_queue_ready(vbd))
1143 return -EAGAIN;
1145 if (!vbd->reopened) {
1146 if (td_flag_test(vbd->state, TD_VBD_LOCKING)) {
1147 err = tapdisk_vbd_lock(vbd);
1148 if (err)
1149 return err;
1152 image = tapdisk_vbd_first_image(vbd);
1153 td_flag_set(image->flags, TD_OPEN_STRICT);
1155 if (tapdisk_vbd_close_and_reopen_image(vbd, image))
1156 EPRINTF("reopening disks failed\n");
1157 else {
1158 DPRINTF("reopening disks succeeded\n");
1159 vbd->reopened = 1;
1163 return 0;
1166 void
1167 tapdisk_vbd_complete_vbd_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
1169 if (!vreq->submitting && !vreq->secs_pending) {
1170 if (vreq->status == BLKIF_RSP_ERROR &&
1171 vreq->num_retries < TD_VBD_MAX_RETRIES &&
1172 !td_flag_test(vbd->state, TD_VBD_DEAD) &&
1173 !td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
1174 tapdisk_vbd_move_request(vreq, &vbd->failed_requests);
1175 else
1176 tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
1180 static uint64_t
1181 tapdisk_vbd_breq_get_sector(blkif_request_t *breq, td_request_t treq)
1183 int seg, nsects;
1184 uint64_t sector_nr = breq->sector_number;
1186 for(seg=0; seg < treq.sidx; seg++) {
1187 nsects = breq->seg[seg].last_sect - breq->seg[seg].first_sect + 1;
1188 sector_nr += nsects;
1191 return sector_nr;
1194 static void
1195 __tapdisk_vbd_complete_td_request(td_vbd_t *vbd, td_vbd_request_t *vreq,
1196 td_request_t treq, int res)
1198 int err;
1199 td_image_t *image = treq.image;
1201 err = (res <= 0 ? res : -res);
1202 vbd->secs_pending -= treq.secs;
1203 vreq->secs_pending -= treq.secs;
1205 vreq->blocked = treq.blocked;
1207 if (err) {
1208 vreq->status = BLKIF_RSP_ERROR;
1209 vreq->error = (vreq->error ? : err);
1210 if (err != -EBUSY) {
1211 vbd->errors++;
1212 ERR(err, "req %"PRIu64": %s 0x%04x secs to "
1213 "0x%08"PRIx64, vreq->req.id,
1214 (treq.op == TD_OP_WRITE ? "write" : "read"),
1215 treq.secs, treq.sec);
1217 } else {
1218 #ifdef MEMSHR
1219 if (treq.op == TD_OP_READ
1220 && td_flag_test(image->flags, TD_OPEN_RDONLY)) {
1221 uint64_t hnd = treq.memshr_hnd;
1222 uint16_t uid = image->memshr_id;
1223 blkif_request_t *breq = &vreq->req;
1224 uint64_t sec = tapdisk_vbd_breq_get_sector(breq, treq);
1225 int secs = breq->seg[treq.sidx].last_sect -
1226 breq->seg[treq.sidx].first_sect + 1;
1228 if (hnd != 0)
1229 memshr_vbd_complete_ro_request(hnd, uid,
1230 sec, secs);
1232 #endif
1235 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1238 static void
1239 __tapdisk_vbd_reissue_td_request(td_vbd_t *vbd,
1240 td_image_t *image, td_request_t treq)
1242 td_image_t *parent;
1243 td_vbd_request_t *vreq;
1245 vreq = (td_vbd_request_t *)treq.private;
1246 gettimeofday(&vreq->last_try, NULL);
1248 vreq->submitting++;
1250 if (tapdisk_vbd_is_last_image(vbd, image)) {
1251 memset(treq.buf, 0, treq.secs << SECTOR_SHIFT);
1252 td_complete_request(treq, 0);
1253 goto done;
1256 parent = tapdisk_vbd_next_image(image);
1257 treq.image = parent;
1259 /* return zeros for requests that extend beyond end of parent image */
1260 if (treq.sec + treq.secs > parent->info.size) {
1261 td_request_t clone = treq;
1263 if (parent->info.size > treq.sec) {
1264 int secs = parent->info.size - treq.sec;
1265 clone.sec += secs;
1266 clone.secs -= secs;
1267 clone.buf += (secs << SECTOR_SHIFT);
1268 treq.secs = secs;
1269 } else
1270 treq.secs = 0;
1272 memset(clone.buf, 0, clone.secs << SECTOR_SHIFT);
1273 td_complete_request(clone, 0);
1275 if (!treq.secs)
1276 goto done;
1279 switch (treq.op) {
1280 case TD_OP_WRITE:
1281 td_queue_write(parent, treq);
1282 break;
1284 case TD_OP_READ:
1285 #ifdef MEMSHR
1286 if(td_flag_test(parent->flags, TD_OPEN_RDONLY)) {
1287 int ret, seg = treq.sidx;
1288 blkif_request_t *breq = &vreq->req;
1290 ret = memshr_vbd_issue_ro_request(treq.buf,
1291 breq->seg[seg].gref,
1292 parent->memshr_id,
1293 treq.sec,
1294 treq.secs,
1295 &treq.memshr_hnd);
1296 if(ret == 0) {
1297 /* Reset memshr handle. This'll prevent
1298 * memshr_vbd_complete_ro_request being called
1299 */
1300 treq.memshr_hnd = 0;
1301 td_complete_request(treq, 0);
1302 } else
1303 td_queue_read(parent, treq);
1304 } else
1305 #endif
1306 td_queue_read(parent, treq);
1307 break;
1310 done:
1311 vreq->submitting--;
1312 if (!vreq->secs_pending)
1313 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1316 void
1317 tapdisk_vbd_forward_request(td_request_t treq)
1319 td_vbd_t *vbd;
1320 td_image_t *image;
1321 td_vbd_request_t *vreq;
1323 image = treq.image;
1324 vbd = (td_vbd_t *)image->private;
1325 vreq = (td_vbd_request_t *)treq.private;
1327 gettimeofday(&vbd->ts, NULL);
1329 if (tapdisk_vbd_queue_ready(vbd))
1330 __tapdisk_vbd_reissue_td_request(vbd, image, treq);
1331 else
1332 __tapdisk_vbd_complete_td_request(vbd, vreq, treq, -EIO);
1335 static void
1336 tapdisk_vbd_complete_td_request(td_request_t treq, int res)
1338 td_vbd_t *vbd;
1339 td_image_t *image;
1340 td_vbd_request_t *vreq;
1342 image = treq.image;
1343 vbd = (td_vbd_t *)image->private;
1344 vreq = (td_vbd_request_t *)treq.private;
1346 gettimeofday(&vbd->ts, NULL);
1347 DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08"PRIx64" "
1348 "secs 0x%04x buf %p op %d res %d\n", image->name,
1349 (int)treq.id, treq.sidx, treq.sec, treq.secs,
1350 treq.buf, (int)vreq->req.operation, res);
1352 __tapdisk_vbd_complete_td_request(vbd, vreq, treq, res);
1355 static int
1356 tapdisk_vbd_issue_request(td_vbd_t *vbd, td_vbd_request_t *vreq)
1358 char *page;
1359 td_ring_t *ring;
1360 td_image_t *image;
1361 td_request_t treq;
1362 uint64_t sector_nr;
1363 blkif_request_t *req;
1364 int i, err, id, nsects;
1366 req = &vreq->req;
1367 id = req->id;
1368 ring = &vbd->ring;
1369 sector_nr = req->sector_number;
1370 image = tapdisk_vbd_first_image(vbd);
1372 vreq->submitting = 1;
1373 gettimeofday(&vbd->ts, NULL);
1374 gettimeofday(&vreq->last_try, NULL);
1375 tapdisk_vbd_move_request(vreq, &vbd->pending_requests);
1377 #if 0
1378 err = tapdisk_vbd_check_queue(vbd);
1379 if (err)
1380 goto fail;
1381 #endif
1383 err = tapdisk_image_check_ring_request(image, req);
1384 if (err)
1385 goto fail;
1387 for (i = 0; i < req->nr_segments; i++) {
1388 nsects = req->seg[i].last_sect - req->seg[i].first_sect + 1;
1389 page = (char *)MMAP_VADDR(ring->vstart,
1390 (unsigned long)req->id, i);
1391 page += (req->seg[i].first_sect << SECTOR_SHIFT);
1393 treq.id = id;
1394 treq.sidx = i;
1395 treq.blocked = 0;
1396 treq.buf = page;
1397 treq.sec = sector_nr;
1398 treq.secs = nsects;
1399 treq.image = image;
1400 treq.cb = tapdisk_vbd_complete_td_request;
1401 treq.cb_data = NULL;
1402 treq.private = vreq;
1404 DBG(TLOG_DBG, "%s: req %d seg %d sec 0x%08"PRIx64" secs 0x%04x "
1405 "buf %p op %d\n", image->name, id, i, treq.sec, treq.secs,
1406 treq.buf, (int)req->operation);
1408 vreq->secs_pending += nsects;
1409 vbd->secs_pending += nsects;
1411 switch (req->operation) {
1412 case BLKIF_OP_WRITE:
1413 treq.op = TD_OP_WRITE;
1414 td_queue_write(image, treq);
1415 break;
1417 case BLKIF_OP_READ:
1418 treq.op = TD_OP_READ;
1419 td_queue_read(image, treq);
1420 break;
1423 sector_nr += nsects;
1426 err = 0;
1428 out:
1429 vreq->submitting--;
1430 if (!vreq->secs_pending) {
1431 err = (err ? : vreq->error);
1432 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1435 return err;
1437 fail:
1438 vreq->status = BLKIF_RSP_ERROR;
1439 goto out;
1442 static int
1443 tapdisk_vbd_reissue_failed_requests(td_vbd_t *vbd)
1445 int err;
1446 struct timeval now;
1447 td_vbd_request_t *vreq, *tmp;
1449 err = 0;
1450 gettimeofday(&now, NULL);
1452 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) {
1453 if (vreq->secs_pending)
1454 continue;
1456 if (td_flag_test(vbd->state, TD_VBD_SHUTDOWN_REQUESTED))
1457 goto fail;
1459 if (vreq->error != -EBUSY &&
1460 now.tv_sec - vreq->last_try.tv_sec < TD_VBD_RETRY_INTERVAL)
1461 continue;
1463 if (vreq->num_retries >= TD_VBD_MAX_RETRIES) {
1464 fail:
1465 DBG(TLOG_INFO, "req %"PRIu64"retried %d times\n",
1466 vreq->req.id, vreq->num_retries);
1467 tapdisk_vbd_complete_vbd_request(vbd, vreq);
1468 continue;
1471 /*
1472 * never fail due to too many retries if we are blocked on a
1473 * dependency
1474 */
1475 if (vreq->blocked) {
1476 vreq->blocked = 0;
1477 } else {
1478 vbd->retries++;
1479 vreq->num_retries++;
1481 vreq->error = 0;
1482 vreq->status = BLKIF_RSP_OKAY;
1483 DBG(TLOG_DBG, "retry #%d of req %"PRIu64", "
1484 "sec 0x%08"PRIx64", nr_segs: %d\n", vreq->num_retries,
1485 vreq->req.id, vreq->req.sector_number,
1486 vreq->req.nr_segments);
1488 err = tapdisk_vbd_issue_request(vbd, vreq);
1489 if (err)
1490 break;
1493 if (list_empty(&vbd->failed_requests))
1494 td_flag_clear(vbd->state, TD_VBD_RETRY_NEEDED);
1495 else
1496 td_flag_set(vbd->state, TD_VBD_RETRY_NEEDED);
1498 return err;
1501 static int
1502 tapdisk_vbd_issue_new_requests(td_vbd_t *vbd)
1504 int err;
1505 td_vbd_request_t *vreq, *tmp;
1507 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) {
1508 err = tapdisk_vbd_issue_request(vbd, vreq);
1509 if (err)
1510 return err;
1513 return 0;
1516 static int
1517 tapdisk_vbd_kill_requests(td_vbd_t *vbd)
1519 td_vbd_request_t *vreq, *tmp;
1521 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->new_requests) {
1522 vreq->status = BLKIF_RSP_ERROR;
1523 tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
1526 tapdisk_vbd_for_each_request(vreq, tmp, &vbd->failed_requests) {
1527 vreq->status = BLKIF_RSP_ERROR;
1528 tapdisk_vbd_move_request(vreq, &vbd->completed_requests);
1531 return 0;
1534 int
1535 tapdisk_vbd_issue_requests(td_vbd_t *vbd)
1537 int err;
1539 if (td_flag_test(vbd->state, TD_VBD_DEAD))
1540 return tapdisk_vbd_kill_requests(vbd);
1542 if (!tapdisk_vbd_queue_ready(vbd))
1543 return -EAGAIN;
1545 err = tapdisk_vbd_reissue_failed_requests(vbd);
1546 if (err)
1547 return err;
1549 return tapdisk_vbd_issue_new_requests(vbd);
1552 static void
1553 tapdisk_vbd_pull_ring_requests(td_vbd_t *vbd)
1555 int idx;
1556 RING_IDX rp, rc;
1557 td_ring_t *ring;
1558 blkif_request_t *req;
1559 td_vbd_request_t *vreq;
1561 ring = &vbd->ring;
1562 if (!ring->sring)
1563 return;
1565 rp = ring->fe_ring.sring->req_prod;
1566 xen_rmb();
1568 for (rc = ring->fe_ring.req_cons; rc != rp; rc++) {
1569 req = RING_GET_REQUEST(&ring->fe_ring, rc);
1570 ++ring->fe_ring.req_cons;
1572 idx = req->id;
1573 vreq = &vbd->request_list[idx];
1575 ASSERT(list_empty(&vreq->next));
1576 ASSERT(vreq->secs_pending == 0);
1578 memcpy(&vreq->req, req, sizeof(blkif_request_t));
1579 vbd->received++;
1580 vreq->vbd = vbd;
1582 tapdisk_vbd_move_request(vreq, &vbd->new_requests);
1584 DBG(TLOG_DBG, "%s: request %d \n", vbd->name, idx);
1588 static int
1589 tapdisk_vbd_pause_ring(td_vbd_t *vbd)
1591 int err;
1593 if (td_flag_test(vbd->state, TD_VBD_PAUSED))
1594 return 0;
1596 td_flag_set(vbd->state, TD_VBD_PAUSE_REQUESTED);
1598 err = tapdisk_vbd_quiesce_queue(vbd);
1599 if (err) {
1600 EPRINTF("%s: ring pause request on active queue\n", vbd->name);
1601 return err;
1604 tapdisk_vbd_close_vdi(vbd);
1606 err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_PAUSE, 0);
1607 if (err)
1608 EPRINTF("%s: pause ioctl failed: %d\n", vbd->name, errno);
1609 else {
1610 td_flag_clear(vbd->state, TD_VBD_PAUSE_REQUESTED);
1611 td_flag_set(vbd->state, TD_VBD_PAUSED);
1614 return err;
1617 static int
1618 tapdisk_vbd_resume_ring(td_vbd_t *vbd)
1620 int i, err, type;
1621 char message[BLKTAP2_MAX_MESSAGE_LEN];
1622 const char *path;
1624 memset(message, 0, sizeof(message));
1626 if (!td_flag_test(vbd->state, TD_VBD_PAUSED)) {
1627 EPRINTF("%s: resume message for unpaused vbd\n", vbd->name);
1628 return -EINVAL;
1631 err = ioctl(vbd->ring.fd, BLKTAP2_IOCTL_REOPEN, &message);
1632 if (err) {
1633 EPRINTF("%s: resume ioctl failed: %d\n", vbd->name, errno);
1634 return err;
1637 err = tapdisk_parse_disk_type(message, &path, &type);
1638 if (err) {
1639 EPRINTF("%s: invalid resume string %s\n", vbd->name, message);
1640 goto out;
1643 free(vbd->name);
1644 vbd->name = strdup(path);
1645 if (!vbd->name) {
1646 EPRINTF("resume malloc failed\n");
1647 err = -ENOMEM;
1648 goto out;
1651 tapdisk_vbd_start_queue(vbd);
1653 for (i = 0; i < TD_VBD_EIO_RETRIES; i++) {
1654 err = __tapdisk_vbd_open_vdi(vbd, TD_OPEN_STRICT);
1655 if (err != -EIO)
1656 break;
1658 sleep(TD_VBD_EIO_SLEEP);
1661 out:
1662 if (!err) {
1663 image_t image;
1664 struct blktap2_params params;
1666 memset(&params, 0, sizeof(params));
1667 tapdisk_vbd_get_image_info(vbd, &image);
1669 params.sector_size = image.secsize;
1670 params.capacity = image.size;
1671 snprintf(params.name, sizeof(params.name) - 1, "%s", message);
1673 ioctl(vbd->ring.fd, BLKTAP2_IOCTL_SET_PARAMS, &params);
1674 td_flag_clear(vbd->state, TD_VBD_PAUSED);
1677 ioctl(vbd->ring.fd, BLKTAP2_IOCTL_RESUME, err);
1678 return err;
1681 static int
1682 tapdisk_vbd_check_ring_message(td_vbd_t *vbd)
1684 if (!vbd->ring.sring)
1685 return -EINVAL;
1687 switch (vbd->ring.sring->private.tapif_user.msg) {
1688 case 0:
1689 return 0;
1691 case BLKTAP2_RING_MESSAGE_PAUSE:
1692 return tapdisk_vbd_pause_ring(vbd);
1694 case BLKTAP2_RING_MESSAGE_RESUME:
1695 return tapdisk_vbd_resume_ring(vbd);
1697 case BLKTAP2_RING_MESSAGE_CLOSE:
1698 return tapdisk_vbd_close(vbd);
1700 default:
1701 return -EINVAL;
1705 static void
1706 tapdisk_vbd_ring_event(event_id_t id, char mode, void *private)
1708 td_vbd_t *vbd;
1710 vbd = (td_vbd_t *)private;
1712 tapdisk_vbd_pull_ring_requests(vbd);
1713 tapdisk_vbd_issue_requests(vbd);
1715 /* vbd may be destroyed after this call */
1716 tapdisk_vbd_check_ring_message(vbd);
1719 td_image_t *
1720 tapdisk_vbd_first_image(td_vbd_t *vbd)
1722 return list_entry(vbd->images.next, td_image_t, next);