debuggers.hg

view tools/blktap2/drivers/block-qcow.c @ 19715:50e048b77ad1

blktap2: fix a compilation error (missing PATH_MAX)

Signed-off-by: KUWAMURA Shin'ya <kuwa@jp.fujitsu.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri May 29 09:28:15 2009 +0100 (2009-05-29)
parents 1c627434605e
children b7f73a7f3078
line source
1 /* block-qcow.c
2 *
3 * Asynchronous Qemu copy-on-write disk implementation.
4 * Code based on the Qemu implementation
5 * (see copyright notice below)
6 *
7 * (c) 2006 Andrew Warfield and Julian Chesterfield
8 *
9 */
11 /*
12 * Block driver for the QCOW format
13 *
14 * Copyright (c) 2004 Fabrice Bellard
15 *
16 * Permission is hereby granted, free of charge, to any person obtaining a copy
17 * of this software and associated documentation files(the "Software"), to deal
18 * in the Software without restriction, including without limitation the rights
19 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
20 * copies of the Software, and to permit persons to whom the Software is
21 * furnished to do so, subject to the following conditions:
22 */
24 #include <errno.h>
25 #include <fcntl.h>
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <unistd.h>
29 #include <sys/statvfs.h>
30 #include <sys/stat.h>
31 #include <sys/ioctl.h>
32 #include <linux/fs.h>
33 #include <string.h>
34 #include <zlib.h>
35 #include <inttypes.h>
36 #include <libaio.h>
37 #include <openssl/md5.h>
38 #include <limits.h>
39 #include "bswap.h"
40 #include "aes.h"
42 #include "tapdisk.h"
43 #include "tapdisk-driver.h"
44 #include "tapdisk-interface.h"
45 #include "qcow.h"
46 #include "blk.h"
47 #include "atomicio.h"
49 /* *BSD has no O_LARGEFILE */
50 #ifndef O_LARGEFILE
51 #define O_LARGEFILE 0
52 #endif
54 #if 1
55 #define ASSERT(_p) \
56 if ( !(_p) ) { DPRINTF("Assertion '%s' failed, line %d, file %s", #_p , \
57 __LINE__, __FILE__); *(int*)0=0; }
58 #else
59 #define ASSERT(_p) ((void)0)
60 #endif
62 struct pending_aio {
63 td_callback_t cb;
64 int id;
65 void *private;
66 int nb_sectors;
67 char *buf;
68 uint64_t sector;
69 };
71 #undef IOCB_IDX
72 #define IOCB_IDX(_s, _io) ((_io) - (_s)->iocb_list)
74 #define ZERO_TEST(_b) (_b | 0x00)
76 struct qcow_request {
77 td_request_t treq;
78 struct tiocb tiocb;
79 struct tdqcow_state *state;
80 };
82 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset);
84 #ifdef USE_GCRYPT
86 #include <gcrypt.h>
88 uint32_t gen_cksum(char *ptr, int len)
89 {
90 int i;
91 uint32_t md[4];
93 /* Generate checksum */
94 gcry_md_hash_buffer(GCRY_MD_MD5, md, ptr, len);
96 return md[0];
97 }
99 #else /* use libcrypto */
101 #include <openssl/md5.h>
103 uint32_t gen_cksum(char *ptr, int len)
104 {
105 int i;
106 unsigned char *md;
107 uint32_t ret;
109 md = malloc(MD5_DIGEST_LENGTH);
110 if(!md) return 0;
112 /* Generate checksum */
113 if (MD5((unsigned char *)ptr, len, md) != md)
114 ret = 0;
115 else
116 memcpy(&ret, md, sizeof(uint32_t));
118 free(md);
119 return ret;
120 }
122 #endif
125 static void free_aio_state(struct tdqcow_state* s)
126 {
127 free(s->aio_requests);
128 free(s->aio_free_list);
129 }
131 static int init_aio_state(td_driver_t *driver)
132 {
133 int i, ret;
134 td_disk_info_t *bs = &(driver->info);
135 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
137 // A segment (i.e. a page) can span multiple clusters
138 s->max_aio_reqs = ((getpagesize() / s->cluster_size) + 1) *
139 MAX_SEGMENTS_PER_REQ * MAX_REQUESTS;
141 s->aio_free_count = s->max_aio_reqs;
143 if (!(s->aio_requests = calloc(s->max_aio_reqs, sizeof(struct qcow_request))) ||
144 !(s->aio_free_list = calloc(s->max_aio_reqs, sizeof(struct qcow_request)))) {
145 DPRINTF("Failed to allocate AIO structs (max_aio_reqs = %d)\n",
146 s->max_aio_reqs);
147 goto fail;
148 }
150 for (i = 0; i < s->max_aio_reqs; i++)
151 s->aio_free_list[i] = &s->aio_requests[i];
153 DPRINTF("AIO state initialised\n");
155 return 0;
156 fail:
157 return -1;
158 }
160 int get_filesize(char *filename, uint64_t *size, struct stat *st)
161 {
162 int fd;
163 QCowHeader header;
165 /*Set to the backing file size*/
166 fd = open(filename, O_RDONLY);
167 if (fd < 0)
168 return -1;
169 if (read(fd, &header, sizeof(header)) < sizeof(header)) {
170 close(fd);
171 return -1;
172 }
173 close(fd);
175 be32_to_cpus(&header.magic);
176 be64_to_cpus(&header.size);
177 if (header.magic == QCOW_MAGIC) {
178 *size = header.size >> SECTOR_SHIFT;
179 return 0;
180 }
182 if(S_ISBLK(st->st_mode)) {
183 fd = open(filename, O_RDONLY);
184 if (fd < 0)
185 return -1;
186 if (blk_getimagesize(fd, size) != 0) {
187 printf("Unable to get Block device size\n");
188 close(fd);
189 return -1;
190 }
191 close(fd);
192 } else *size = (st->st_size >> SECTOR_SHIFT);
193 return 0;
194 }
196 static int qcow_set_key(struct tdqcow_state *s, const char *key)
197 {
198 uint8_t keybuf[16];
199 int len, i;
201 memset(keybuf, 0, 16);
202 len = strlen(key);
203 if (len > 16)
204 len = 16;
205 /* XXX: we could compress the chars to 7 bits to increase
206 entropy */
207 for (i = 0; i < len; i++) {
208 keybuf[i] = key[i];
209 }
210 s->crypt_method = s->crypt_method_header;
212 if (AES_set_encrypt_key(keybuf, 128, &s->aes_encrypt_key) != 0)
213 return -1;
214 if (AES_set_decrypt_key(keybuf, 128, &s->aes_decrypt_key) != 0)
215 return -1;
216 #if 0
217 /* test */
218 {
219 uint8_t in[16];
220 uint8_t out[16];
221 uint8_t tmp[16];
222 for (i=0; i<16; i++)
223 in[i] = i;
224 AES_encrypt(in, tmp, &s->aes_encrypt_key);
225 AES_decrypt(tmp, out, &s->aes_decrypt_key);
226 for (i = 0; i < 16; i++)
227 DPRINTF(" %02x", tmp[i]);
228 DPRINTF("\n");
229 for (i = 0; i < 16; i++)
230 DPRINTF(" %02x", out[i]);
231 DPRINTF("\n");
232 }
233 #endif
234 return 0;
235 }
237 void tdqcow_complete(void *arg, struct tiocb *tiocb, int err)
238 {
239 struct qcow_request *aio = (struct qcow_request *)arg;
240 struct tdqcow_state *s = aio->state;
242 td_complete_request(aio->treq, err);
244 s->aio_free_list[s->aio_free_count++] = aio;
245 }
247 static void async_read(td_driver_t *driver, td_request_t treq)
248 {
249 int size;
250 uint64_t offset;
251 struct qcow_request *aio;
252 struct tdqcow_state *prv;
254 prv = (struct tdqcow_state *)driver->data;
255 size = treq.secs * driver->info.sector_size;
256 offset = treq.sec * (uint64_t)driver->info.sector_size;
258 if (prv->aio_free_count == 0)
259 goto fail;
261 aio = prv->aio_free_list[--prv->aio_free_count];
262 aio->treq = treq;
263 aio->state = prv;
265 td_prep_read(&aio->tiocb, prv->fd, treq.buf,
266 size, offset, tdqcow_complete, aio);
267 td_queue_tiocb(driver, &aio->tiocb);
269 return;
271 fail:
272 td_complete_request(treq, -EBUSY);
273 }
275 static void async_write(td_driver_t *driver, td_request_t treq)
276 {
277 int size;
278 uint64_t offset;
279 struct qcow_request *aio;
280 struct tdqcow_state *prv;
282 prv = (struct tdqcow_state *)driver->data;
283 size = treq.secs * driver->info.sector_size;
284 offset = treq.sec * (uint64_t)driver->info.sector_size;
286 if (prv->aio_free_count == 0)
287 goto fail;
289 aio = prv->aio_free_list[--prv->aio_free_count];
290 aio->treq = treq;
291 aio->state = prv;
293 td_prep_write(&aio->tiocb, prv->fd, treq.buf,
294 size, offset, tdqcow_complete, aio);
295 td_queue_tiocb(driver, &aio->tiocb);
297 return;
299 fail:
300 td_complete_request(treq, -EBUSY);
301 }
303 /*
304 * The crypt function is compatible with the linux cryptoloop
305 * algorithm for < 4 GB images. NOTE: out_buf == in_buf is
306 * supported .
307 */
308 static void encrypt_sectors(struct tdqcow_state *s, int64_t sector_num,
309 uint8_t *out_buf, const uint8_t *in_buf,
310 int nb_sectors, int enc,
311 const AES_KEY *key)
312 {
313 union {
314 uint64_t ll[2];
315 uint8_t b[16];
316 } ivec;
317 int i;
319 for (i = 0; i < nb_sectors; i++) {
320 ivec.ll[0] = cpu_to_le64(sector_num);
321 ivec.ll[1] = 0;
322 AES_cbc_encrypt(in_buf, out_buf, 512, key,
323 ivec.b, enc);
324 sector_num++;
325 in_buf += 512;
326 out_buf += 512;
327 }
328 }
330 int qtruncate(int fd, off_t length, int sparse)
331 {
332 int ret, i;
333 int current = 0, rem = 0;
334 uint64_t sectors;
335 struct stat st;
336 char *buf;
338 /* If length is greater than the current file len
339 * we synchronously write zeroes to the end of the
340 * file, otherwise we truncate the length down
341 */
342 ret = fstat(fd, &st);
343 if (ret == -1)
344 return -1;
345 if (S_ISBLK(st.st_mode))
346 return 0;
348 sectors = (length + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
349 current = (st.st_size + DEFAULT_SECTOR_SIZE - 1)/DEFAULT_SECTOR_SIZE;
350 rem = st.st_size % DEFAULT_SECTOR_SIZE;
352 /* If we are extending this file, we write zeros to the end --
353 * this tries to ensure that the extents allocated wind up being
354 * contiguous on disk.
355 */
356 if(st.st_size < sectors * DEFAULT_SECTOR_SIZE) {
357 /*We are extending the file*/
358 if ((ret = posix_memalign((void **)&buf,
359 512, DEFAULT_SECTOR_SIZE))) {
360 DPRINTF("posix_memalign failed: %d\n", ret);
361 return -1;
362 }
363 memset(buf, 0x00, DEFAULT_SECTOR_SIZE);
364 if (lseek(fd, 0, SEEK_END)==-1) {
365 DPRINTF("Lseek EOF failed (%d), internal error\n",
366 errno);
367 free(buf);
368 return -1;
369 }
370 if (rem) {
371 ret = write(fd, buf, rem);
372 if (ret != rem) {
373 DPRINTF("write failed: ret = %d, err = %s\n",
374 ret, strerror(errno));
375 free(buf);
376 return -1;
377 }
378 }
379 for (i = current; i < sectors; i++ ) {
380 ret = write(fd, buf, DEFAULT_SECTOR_SIZE);
381 if (ret != DEFAULT_SECTOR_SIZE) {
382 DPRINTF("write failed: ret = %d, err = %s\n",
383 ret, strerror(errno));
384 free(buf);
385 return -1;
386 }
387 }
388 free(buf);
389 } else if(sparse && (st.st_size > sectors * DEFAULT_SECTOR_SIZE))
390 if (ftruncate(fd, (off_t)sectors * DEFAULT_SECTOR_SIZE)==-1) {
391 DPRINTF("Ftruncate failed (%s)\n", strerror(errno));
392 return -1;
393 }
394 return 0;
395 }
397 /* 'allocate' is:
398 *
399 * 0 to not allocate.
400 *
401 * 1 to allocate a normal cluster (for sector indexes 'n_start' to
402 * 'n_end')
403 *
404 * 2 to allocate a compressed cluster of size
405 * 'compressed_size'. 'compressed_size' must be > 0 and <
406 * cluster_size
407 *
408 * return 0 if not allocated.
409 */
410 static uint64_t get_cluster_offset(struct tdqcow_state *s,
411 uint64_t offset, int allocate,
412 int compressed_size,
413 int n_start, int n_end)
414 {
415 int min_index, i, j, l1_index, l2_index, l2_sector, l1_sector;
416 char *tmp_ptr2, *l2_ptr, *l1_ptr;
417 uint64_t *tmp_ptr;
418 uint64_t l2_offset, *l2_table, cluster_offset, tmp;
419 uint32_t min_count;
420 int new_l2_table;
422 /*Check L1 table for the extent offset*/
423 l1_index = offset >> (s->l2_bits + s->cluster_bits);
424 l2_offset = s->l1_table[l1_index];
425 new_l2_table = 0;
426 if (!l2_offset) {
427 if (!allocate)
428 return 0;
429 /*
430 * allocating a new l2 entry + extent
431 * at the end of the file, we must also
432 * update the L1 entry safely.
433 */
434 l2_offset = s->fd_end;
436 /* round to cluster size */
437 l2_offset = (l2_offset + s->cluster_size - 1)
438 & ~(s->cluster_size - 1);
440 /* update the L1 entry */
441 s->l1_table[l1_index] = l2_offset;
443 /*Truncate file for L2 table
444 *(initialised to zero in case we crash)*/
445 if (qtruncate(s->fd,
446 l2_offset + (s->l2_size * sizeof(uint64_t)),
447 s->sparse) != 0) {
448 DPRINTF("ERROR truncating file\n");
449 return 0;
450 }
451 s->fd_end = l2_offset + (s->l2_size * sizeof(uint64_t));
453 /*Update the L1 table entry on disk
454 * (for O_DIRECT we write 4KByte blocks)*/
455 l1_sector = (l1_index * sizeof(uint64_t)) >> 12;
456 l1_ptr = (char *)s->l1_table + (l1_sector << 12);
458 if (posix_memalign((void **)&tmp_ptr, 4096, 4096) != 0) {
459 DPRINTF("ERROR allocating memory for L1 table\n");
460 }
461 memcpy(tmp_ptr, l1_ptr, 4096);
463 /* Convert block to write to big endian */
464 for(i = 0; i < 4096 / sizeof(uint64_t); i++) {
465 cpu_to_be64s(&tmp_ptr[i]);
466 }
468 /*
469 * Issue non-asynchronous L1 write.
470 * For safety, we must ensure that
471 * entry is written before blocks.
472 */
473 lseek(s->fd, s->l1_table_offset + (l1_sector << 12), SEEK_SET);
474 if (write(s->fd, tmp_ptr, 4096) != 4096) {
475 free(tmp_ptr);
476 return 0;
477 }
478 free(tmp_ptr);
480 new_l2_table = 1;
481 goto cache_miss;
482 } else if (s->min_cluster_alloc == s->l2_size) {
483 /*Fast-track the request*/
484 cluster_offset = l2_offset + (s->l2_size * sizeof(uint64_t));
485 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
486 return cluster_offset + (l2_index * s->cluster_size);
487 }
489 /*Check to see if L2 entry is already cached*/
490 for (i = 0; i < L2_CACHE_SIZE; i++) {
491 if (l2_offset == s->l2_cache_offsets[i]) {
492 /* increment the hit count */
493 if (++s->l2_cache_counts[i] == 0xffffffff) {
494 for (j = 0; j < L2_CACHE_SIZE; j++) {
495 s->l2_cache_counts[j] >>= 1;
496 }
497 }
498 l2_table = s->l2_cache + (i << s->l2_bits);
499 goto found;
500 }
501 }
503 cache_miss:
504 /* not found: load a new entry in the least used one */
505 min_index = 0;
506 min_count = 0xffffffff;
507 for (i = 0; i < L2_CACHE_SIZE; i++) {
508 if (s->l2_cache_counts[i] < min_count) {
509 min_count = s->l2_cache_counts[i];
510 min_index = i;
511 }
512 }
513 l2_table = s->l2_cache + (min_index << s->l2_bits);
515 /*If extent pre-allocated, read table from disk,
516 *otherwise write new table to disk*/
517 if (new_l2_table) {
518 /*Should we allocate the whole extent? Adjustable parameter.*/
519 if (s->cluster_alloc == s->l2_size) {
520 cluster_offset = l2_offset +
521 (s->l2_size * sizeof(uint64_t));
522 cluster_offset = (cluster_offset + s->cluster_size - 1)
523 & ~(s->cluster_size - 1);
524 if (qtruncate(s->fd, cluster_offset +
525 (s->cluster_size * s->l2_size),
526 s->sparse) != 0) {
527 DPRINTF("ERROR truncating file\n");
528 return 0;
529 }
530 s->fd_end = cluster_offset +
531 (s->cluster_size * s->l2_size);
532 for (i = 0; i < s->l2_size; i++) {
533 l2_table[i] = cpu_to_be64(cluster_offset +
534 (i*s->cluster_size));
535 }
536 } else memset(l2_table, 0, s->l2_size * sizeof(uint64_t));
538 lseek(s->fd, l2_offset, SEEK_SET);
539 if (write(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
540 s->l2_size * sizeof(uint64_t))
541 return 0;
542 } else {
543 lseek(s->fd, l2_offset, SEEK_SET);
544 if (read(s->fd, l2_table, s->l2_size * sizeof(uint64_t)) !=
545 s->l2_size * sizeof(uint64_t))
546 return 0;
547 }
549 /*Update the cache entries*/
550 s->l2_cache_offsets[min_index] = l2_offset;
551 s->l2_cache_counts[min_index] = 1;
553 found:
554 /*The extent is split into 's->l2_size' blocks of
555 *size 's->cluster_size'*/
556 l2_index = (offset >> s->cluster_bits) & (s->l2_size - 1);
557 cluster_offset = be64_to_cpu(l2_table[l2_index]);
559 if (!cluster_offset ||
560 ((cluster_offset & QCOW_OFLAG_COMPRESSED) && allocate == 1) ) {
561 if (!allocate)
562 return 0;
564 if ((cluster_offset & QCOW_OFLAG_COMPRESSED) &&
565 (n_end - n_start) < s->cluster_sectors) {
566 /* cluster is already allocated but compressed, we must
567 decompress it in the case it is not completely
568 overwritten */
569 if (decompress_cluster(s, cluster_offset) < 0)
570 return 0;
571 cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
572 cluster_offset = (cluster_offset + s->cluster_size - 1)
573 & ~(s->cluster_size - 1);
574 /* write the cluster content - not asynchronous */
575 lseek(s->fd, cluster_offset, SEEK_SET);
576 if (write(s->fd, s->cluster_cache, s->cluster_size) !=
577 s->cluster_size)
578 return -1;
579 } else {
580 /* allocate a new cluster */
581 cluster_offset = lseek(s->fd, s->fd_end, SEEK_SET);
582 if (allocate == 1) {
583 /* round to cluster size */
584 cluster_offset =
585 (cluster_offset + s->cluster_size - 1)
586 & ~(s->cluster_size - 1);
587 if (qtruncate(s->fd, cluster_offset +
588 s->cluster_size, s->sparse)!=0) {
589 DPRINTF("ERROR truncating file\n");
590 return 0;
591 }
592 s->fd_end = (cluster_offset + s->cluster_size);
593 /* if encrypted, we must initialize the cluster
594 content which won't be written */
595 if (s->crypt_method &&
596 (n_end - n_start) < s->cluster_sectors) {
597 uint64_t start_sect;
598 start_sect = (offset &
599 ~(s->cluster_size - 1))
600 >> 9;
601 memset(s->cluster_data + 512,
602 0xaa, 512);
603 for (i = 0; i < s->cluster_sectors;i++)
604 {
605 if (i < n_start || i >= n_end)
606 {
607 encrypt_sectors(s, start_sect + i,
608 s->cluster_data,
609 s->cluster_data + 512, 1, 1,
610 &s->aes_encrypt_key);
611 lseek(s->fd, cluster_offset + i * 512, SEEK_SET);
612 if (write(s->fd, s->cluster_data, 512) != 512)
613 return -1;
614 }
615 }
616 }
617 } else {
618 cluster_offset |= QCOW_OFLAG_COMPRESSED |
619 (uint64_t)compressed_size
620 << (63 - s->cluster_bits);
621 }
622 }
623 /* update L2 table */
624 tmp = cpu_to_be64(cluster_offset);
625 l2_table[l2_index] = tmp;
627 /*For IO_DIRECT we write 4KByte blocks*/
628 l2_sector = (l2_index * sizeof(uint64_t)) >> 12;
629 l2_ptr = (char *)l2_table + (l2_sector << 12);
631 if (posix_memalign((void **)&tmp_ptr2, 4096, 4096) != 0) {
632 DPRINTF("ERROR allocating memory for L1 table\n");
633 }
634 memcpy(tmp_ptr2, l2_ptr, 4096);
635 lseek(s->fd, l2_offset + (l2_sector << 12), SEEK_SET);
636 if (write(s->fd, tmp_ptr2, 4096) != 4096) {
637 free(tmp_ptr2);
638 return -1;
639 }
640 free(tmp_ptr2);
641 }
642 return cluster_offset;
643 }
645 static int qcow_is_allocated(struct tdqcow_state *s, int64_t sector_num,
646 int nb_sectors, int *pnum)
647 {
648 int index_in_cluster, n;
649 uint64_t cluster_offset;
651 cluster_offset = get_cluster_offset(s, sector_num << 9, 0, 0, 0, 0);
652 index_in_cluster = sector_num & (s->cluster_sectors - 1);
653 n = s->cluster_sectors - index_in_cluster;
654 if (n > nb_sectors)
655 n = nb_sectors;
656 *pnum = n;
657 return (cluster_offset != 0);
658 }
660 static int decompress_buffer(uint8_t *out_buf, int out_buf_size,
661 const uint8_t *buf, int buf_size)
662 {
663 z_stream strm1, *strm = &strm1;
664 int ret, out_len;
666 memset(strm, 0, sizeof(*strm));
668 strm->next_in = (uint8_t *)buf;
669 strm->avail_in = buf_size;
670 strm->next_out = out_buf;
671 strm->avail_out = out_buf_size;
673 ret = inflateInit2(strm, -12);
674 if (ret != Z_OK)
675 return -1;
676 ret = inflate(strm, Z_FINISH);
677 out_len = strm->next_out - out_buf;
678 if ( (ret != Z_STREAM_END && ret != Z_BUF_ERROR) ||
679 (out_len != out_buf_size) ) {
680 inflateEnd(strm);
681 return -1;
682 }
683 inflateEnd(strm);
684 return 0;
685 }
687 static int decompress_cluster(struct tdqcow_state *s, uint64_t cluster_offset)
688 {
689 int ret, csize;
690 uint64_t coffset;
692 coffset = cluster_offset & s->cluster_offset_mask;
693 if (s->cluster_cache_offset != coffset) {
694 csize = cluster_offset >> (63 - s->cluster_bits);
695 csize &= (s->cluster_size - 1);
696 lseek(s->fd, coffset, SEEK_SET);
697 ret = read(s->fd, s->cluster_data, csize);
698 if (ret != csize)
699 return -1;
700 if (decompress_buffer(s->cluster_cache, s->cluster_size,
701 s->cluster_data, csize) < 0) {
702 return -1;
703 }
704 s->cluster_cache_offset = coffset;
705 }
706 return 0;
707 }
709 static int
710 tdqcow_read_header(int fd, QCowHeader *header)
711 {
712 int err;
713 char *buf;
714 struct stat st;
715 size_t size, expected;
717 memset(header, 0, sizeof(*header));
719 err = fstat(fd, &st);
720 if (err)
721 return -errno;
723 err = lseek(fd, 0, SEEK_SET);
724 if (err == (off_t)-1)
725 return -errno;
727 size = (sizeof(*header) + 511) & ~511;
728 err = posix_memalign((void **)&buf, 512, size);
729 if (err)
730 return err;
732 expected = size;
733 if (st.st_size < size)
734 expected = st.st_size;
736 errno = 0;
737 err = read(fd, buf, size);
738 if (err != expected) {
739 err = (errno ? -errno : -EIO);
740 goto out;
741 }
743 memcpy(header, buf, sizeof(*header));
744 be32_to_cpus(&header->magic);
745 be32_to_cpus(&header->version);
746 be64_to_cpus(&header->backing_file_offset);
747 be32_to_cpus(&header->backing_file_size);
748 be32_to_cpus(&header->mtime);
749 be64_to_cpus(&header->size);
750 be32_to_cpus(&header->crypt_method);
751 be64_to_cpus(&header->l1_table_offset);
753 err = 0;
755 out:
756 free(buf);
757 return err;
758 }
760 static int
761 tdqcow_load_l1_table(struct tdqcow_state *s, QCowHeader *header)
762 {
763 char *buf;
764 struct stat st;
765 size_t expected;
766 int i, err, shift;
767 QCowHeader_ext *exthdr;
768 uint32_t l1_table_bytes, l1_table_block, l1_table_size;
770 buf = NULL;
771 s->l1_table = NULL;
773 shift = s->cluster_bits + s->l2_bits;
775 s->l1_size = (header->size + (1LL << shift) - 1) >> shift;
776 s->l1_table_offset = header->l1_table_offset;
778 s->min_cluster_alloc = 1; /* default */
780 l1_table_bytes = s->l1_size * sizeof(uint64_t);
781 l1_table_size = (l1_table_bytes + 4095) & ~4095;
782 l1_table_block = (l1_table_bytes + s->l1_table_offset + 4095) & ~4095;
784 DPRINTF("L1 Table offset detected: %"PRIu64", size %d (%d)\n",
785 (uint64_t)s->l1_table_offset,
786 (int) (s->l1_size * sizeof(uint64_t)),
787 l1_table_size);
789 err = fstat(s->fd, &st);
790 if (err) {
791 err = -errno;
792 goto out;
793 }
795 err = lseek(s->fd, 0, SEEK_SET);
796 if (err == (off_t)-1) {
797 err = -errno;
798 goto out;
799 }
801 err = posix_memalign((void **)&buf, 512, l1_table_block);
802 if (err) {
803 buf = NULL;
804 goto out;
805 }
807 err = posix_memalign((void **)&s->l1_table, 4096, l1_table_size);
808 if (err) {
809 s->l1_table = NULL;
810 goto out;
811 }
813 memset(buf, 0, l1_table_block);
814 memset(s->l1_table, 0, l1_table_size);
816 expected = l1_table_block;
817 if (st.st_size < l1_table_block)
818 expected = st.st_size;
820 errno = 0;
821 err = read(s->fd, buf, l1_table_block);
822 if (err != expected) {
823 err = (errno ? -errno : -EIO);
824 goto out;
825 }
827 memcpy(s->l1_table, buf + s->l1_table_offset, l1_table_size);
828 exthdr = (QCowHeader_ext *)(buf + sizeof(QCowHeader));
830 /* check for xen extended header */
831 if (s->l1_table_offset % 4096 == 0 &&
832 be32_to_cpu(exthdr->xmagic) == XEN_MAGIC) {
833 uint32_t flags = be32_to_cpu(exthdr->flags);
834 uint32_t cksum = be32_to_cpu(exthdr->cksum);
836 /*
837 * Try to detect old tapdisk images. They have to be fixed
838 * because they use big endian rather than native endian for
839 * the L1 table. After this block, the l1 table will
840 * definitely be in BIG endian.
841 */
842 if (!(flags & EXTHDR_L1_BIG_ENDIAN)) {
843 DPRINTF("qcow: converting to big endian L1 table\n");
845 /* convert to big endian */
846 for (i = 0; i < s->l1_size; i++)
847 cpu_to_be64s(&s->l1_table[i]);
849 flags |= EXTHDR_L1_BIG_ENDIAN;
850 exthdr->flags = cpu_to_be32(flags);
852 memcpy(buf + s->l1_table_offset,
853 s->l1_table, l1_table_size);
855 err = lseek(s->fd, 0, SEEK_SET);
856 if (err == (off_t)-1) {
857 err = -errno;
858 goto out;
859 }
861 err = atomicio(vwrite, s->fd, buf, l1_table_block);
862 if (err != l1_table_block) {
863 err = -errno;
864 goto out;
865 }
866 }
868 /* check the L1 table checksum */
869 if (cksum != gen_cksum((char *)s->l1_table,
870 s->l1_size * sizeof(uint64_t)))
871 DPRINTF("qcow: bad L1 checksum\n");
872 else {
873 s->extended = 1;
874 s->sparse = (be32_to_cpu(exthdr->flags) & SPARSE_FILE);
875 s->min_cluster_alloc =
876 be32_to_cpu(exthdr->min_cluster_alloc);
877 }
878 }
880 /* convert L1 table to native endian for operation */
881 for (i = 0; i < s->l1_size; i++)
882 be64_to_cpus(&s->l1_table[i]);
884 err = 0;
886 out:
887 if (err) {
888 free(buf);
889 free(s->l1_table);
890 s->l1_table = NULL;
891 }
892 return err;
893 }
895 /* Open the disk file and initialize qcow state. */
896 int tdqcow_open (td_driver_t *driver, const char *name, td_flag_t flags)
897 {
898 int fd, len, i, ret, size, o_flags;
899 td_disk_info_t *bs = &(driver->info);
900 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
901 QCowHeader header;
902 uint64_t final_cluster = 0;
904 DPRINTF("QCOW: Opening %s\n", name);
906 o_flags = O_DIRECT | O_LARGEFILE |
907 ((flags == TD_OPEN_RDONLY) ? O_RDONLY : O_RDWR);
908 fd = open(name, o_flags);
909 if (fd < 0) {
910 DPRINTF("Unable to open %s (%d)\n", name, -errno);
911 return -1;
912 }
914 s->fd = fd;
915 s->name = strdup(name);
916 if (!s->name)
917 goto fail;
919 if (tdqcow_read_header(fd, &header))
920 goto fail;
922 if (header.magic != QCOW_MAGIC)
923 goto fail;
925 switch (header.version) {
926 case QCOW_VERSION:
927 break;
928 case 2:
929 //TODO: Port qcow2 to new blktap framework.
930 // close(fd);
931 // dd->drv = &tapdisk_qcow2;
932 // return dd->drv->td_open(dd, name, flags);
933 goto fail;
934 default:
935 goto fail;
936 }
938 if (header.size <= 1 || header.cluster_bits < 9)
939 goto fail;
940 if (header.crypt_method > QCOW_CRYPT_AES)
941 goto fail;
942 s->crypt_method_header = header.crypt_method;
943 if (s->crypt_method_header)
944 s->encrypted = 1;
945 s->cluster_bits = header.cluster_bits;
946 s->cluster_size = 1 << s->cluster_bits;
947 s->cluster_sectors = 1 << (s->cluster_bits - 9);
948 s->l2_bits = header.l2_bits;
949 s->l2_size = 1 << s->l2_bits;
950 s->cluster_alloc = s->l2_size;
951 bs->size = header.size / 512;
952 s->cluster_offset_mask = (1LL << (63 - s->cluster_bits)) - 1;
953 s->backing_file_offset = header.backing_file_offset;
954 s->backing_file_size = header.backing_file_size;
956 /* allocate and load l1 table */
957 if (tdqcow_load_l1_table(s, &header))
958 goto fail;
960 /* alloc L2 cache */
961 size = s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t);
962 ret = posix_memalign((void **)&s->l2_cache, 4096, size);
963 if(ret != 0) goto fail;
965 size = s->cluster_size;
966 ret = posix_memalign((void **)&s->cluster_cache, 4096, size);
967 if(ret != 0) goto fail;
969 ret = posix_memalign((void **)&s->cluster_data, 4096, size);
970 if(ret != 0) goto fail;
971 s->cluster_cache_offset = -1;
973 if (s->backing_file_offset != 0)
974 s->cluster_alloc = 1; /*Cannot use pre-alloc*/
976 bs->sector_size = 512;
977 bs->info = 0;
979 for(i = 0; i < s->l1_size; i++)
980 if (s->l1_table[i] > final_cluster)
981 final_cluster = s->l1_table[i];
983 if (init_aio_state(driver)!=0) {
984 DPRINTF("Unable to initialise AIO state\n");
985 free_aio_state(s);
986 goto fail;
987 }
989 if (!final_cluster)
990 s->fd_end = s->l1_table_offset +
991 ((s->l1_size * sizeof(uint64_t) + 4095) & ~4095);
992 else {
993 s->fd_end = lseek64(fd, 0, SEEK_END);
994 if (s->fd_end == (off64_t)-1)
995 goto fail;
996 }
998 return 0;
1000 fail:
1001 DPRINTF("QCOW Open failed\n");
1003 free_aio_state(s);
1004 free(s->l1_table);
1005 free(s->l2_cache);
1006 free(s->cluster_cache);
1007 free(s->cluster_data);
1008 close(fd);
1009 return -1;
1012 void tdqcow_queue_read(td_driver_t *driver, td_request_t treq)
1014 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
1015 int ret = 0, index_in_cluster, n, i;
1016 uint64_t cluster_offset, sector, nb_sectors;
1017 struct qcow_prv* prv;
1018 td_request_t clone = treq;
1019 char* buf = treq.buf;
1021 sector = treq.sec;
1022 nb_sectors = treq.secs;
1024 /*We store a local record of the request*/
1025 while (nb_sectors > 0) {
1026 cluster_offset =
1027 get_cluster_offset(s, sector << 9, 0, 0, 0, 0);
1028 index_in_cluster = sector & (s->cluster_sectors - 1);
1029 n = s->cluster_sectors - index_in_cluster;
1030 if (n > nb_sectors)
1031 n = nb_sectors;
1033 if (s->aio_free_count == 0) {
1034 td_complete_request(treq, -EBUSY);
1035 return;
1038 if(!cluster_offset) {
1039 treq.buf = buf;
1040 treq.sec = sector;
1041 treq.secs = n;
1042 td_forward_request(treq);
1044 } else if (cluster_offset & QCOW_OFLAG_COMPRESSED) {
1045 if (decompress_cluster(s, cluster_offset) < 0) {
1046 td_complete_request(treq, -EIO);
1047 goto done;
1049 memcpy(buf, s->cluster_cache + index_in_cluster * 512,
1050 512 * n);
1052 treq.buf = buf;
1053 treq.sec = sector;
1054 treq.secs = n;
1055 td_complete_request(treq, 0);
1056 } else {
1057 clone.buf = buf;
1058 clone.sec = (cluster_offset>>9)+index_in_cluster;
1059 clone.secs = n;
1060 async_read(driver, clone);
1062 nb_sectors -= n;
1063 sector += n;
1064 buf += n * 512;
1066 done:
1067 return;
1070 void tdqcow_queue_write(td_driver_t *driver, td_request_t treq)
1072 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
1073 int ret = 0, index_in_cluster, n, i;
1074 uint64_t cluster_offset, sector, nb_sectors;
1075 td_callback_t cb;
1076 struct qcow_prv* prv;
1077 char* buf = treq.buf;
1078 td_request_t clone=treq;
1080 sector = treq.sec;
1081 nb_sectors = treq.secs;
1083 /*We store a local record of the request*/
1084 while (nb_sectors > 0) {
1085 index_in_cluster = sector & (s->cluster_sectors - 1);
1086 n = s->cluster_sectors - index_in_cluster;
1087 if (n > nb_sectors)
1088 n = nb_sectors;
1090 if (s->aio_free_count == 0) {
1091 td_complete_request(treq, -EBUSY);
1092 return;
1095 cluster_offset = get_cluster_offset(s, sector << 9, 1, 0,
1096 index_in_cluster,
1097 index_in_cluster+n);
1098 if (!cluster_offset) {
1099 DPRINTF("Ooops, no write cluster offset!\n");
1100 td_complete_request(treq, -EIO);
1101 return;
1104 if (s->crypt_method) {
1105 encrypt_sectors(s, sector, s->cluster_data,
1106 (unsigned char *)buf, n, 1,
1107 &s->aes_encrypt_key);
1109 clone.buf = buf;
1110 clone.sec = (cluster_offset>>9) + index_in_cluster;
1111 clone.secs = n;
1112 async_write(driver, clone);
1113 } else {
1114 clone.buf = buf;
1115 clone.sec = (cluster_offset>>9) + index_in_cluster;
1116 clone.secs = n;
1118 async_write(driver, clone);
1121 nb_sectors -= n;
1122 sector += n;
1123 buf += n * 512;
1125 s->cluster_cache_offset = -1; /* disable compressed cache */
1127 return;
1130 static int
1131 tdqcow_update_checksum(struct tdqcow_state *s)
1133 int i, fd, err;
1134 uint32_t offset, cksum, out;
1136 if (!s->extended)
1137 return 0;
1139 fd = open(s->name, O_WRONLY | O_LARGEFILE); /* open without O_DIRECT */
1140 if (fd == -1) {
1141 err = errno;
1142 goto out;
1145 offset = sizeof(QCowHeader) + offsetof(QCowHeader_ext, cksum);
1146 if (lseek(fd, offset, SEEK_SET) == (off_t)-1) {
1147 err = errno;
1148 goto out;
1151 /* convert to big endian for checksum */
1152 for (i = 0; i < s->l1_size; i++)
1153 cpu_to_be64s(&s->l1_table[i]);
1155 cksum = gen_cksum((char *)s->l1_table, s->l1_size * sizeof(uint64_t));
1157 /* and back again... */
1158 for (i = 0; i < s->l1_size; i++)
1159 be64_to_cpus(&s->l1_table[i]);
1161 DPRINTF("Writing cksum: %d", cksum);
1163 out = cpu_to_be32(cksum);
1164 if (write(fd, &out, sizeof(out)) != sizeof(out)) {
1165 err = errno;
1166 goto out;
1169 err = 0;
1171 out:
1172 if (err)
1173 DPRINTF("failed to update checksum: %d\n", err);
1174 if (fd != -1)
1175 close(fd);
1176 return err;
1179 int tdqcow_close(td_driver_t *driver)
1181 struct tdqcow_state *s = (struct tdqcow_state *)driver->data;
1183 /*Update the hdr cksum*/
1184 tdqcow_update_checksum(s);
1186 free_aio_state(s);
1187 free(s->name);
1188 free(s->l1_table);
1189 free(s->l2_cache);
1190 free(s->cluster_cache);
1191 free(s->cluster_data);
1192 close(s->fd);
1193 return 0;
1196 int qcow_create(const char *filename, uint64_t total_size,
1197 const char *backing_file, int sparse)
1199 int fd, header_size, backing_filename_len, l1_size, i;
1200 int shift, length, adjust, flags = 0, ret = 0;
1201 QCowHeader header;
1202 QCowHeader_ext exthdr;
1203 char backing_filename[PATH_MAX], *ptr;
1204 uint64_t tmp, size, total_length;
1205 struct stat st;
1207 DPRINTF("Qcow_create: size %"PRIu64"\n",total_size);
1209 fd = open(filename,
1210 O_WRONLY | O_CREAT | O_TRUNC | O_BINARY | O_LARGEFILE,
1211 0644);
1212 if (fd < 0)
1213 return -1;
1215 memset(&header, 0, sizeof(header));
1216 header.magic = cpu_to_be32(QCOW_MAGIC);
1217 header.version = cpu_to_be32(QCOW_VERSION);
1219 /*Create extended header fields*/
1220 exthdr.xmagic = cpu_to_be32(XEN_MAGIC);
1222 header_size = sizeof(header) + sizeof(QCowHeader_ext);
1223 backing_filename_len = 0;
1224 size = (total_size >> SECTOR_SHIFT);
1225 if (backing_file) {
1226 if (strcmp(backing_file, "fat:")) {
1227 const char *p;
1228 /* XXX: this is a hack: we do not attempt to
1229 *check for URL like syntax */
1230 p = strchr(backing_file, ':');
1231 if (p && (p - backing_file) >= 2) {
1232 /* URL like but exclude "c:" like filenames */
1233 strncpy(backing_filename, backing_file,
1234 sizeof(backing_filename));
1235 } else {
1236 if (realpath(backing_file, backing_filename) == NULL ||
1237 stat(backing_filename, &st) != 0) {
1238 return -1;
1241 header.backing_file_offset = cpu_to_be64(header_size);
1242 backing_filename_len = strlen(backing_filename);
1243 header.backing_file_size = cpu_to_be32(
1244 backing_filename_len);
1245 header_size += backing_filename_len;
1247 /*Set to the backing file size*/
1248 if(get_filesize(backing_filename, &size, &st)) {
1249 return -1;
1251 DPRINTF("Backing file size detected: %"PRId64" sectors"
1252 "(total %"PRId64" [%"PRId64" MB])\n",
1253 size,
1254 (uint64_t)(size << SECTOR_SHIFT),
1255 (uint64_t)(size >> 11));
1256 } else {
1257 backing_file = NULL;
1258 DPRINTF("Setting file size: %"PRId64" (total %"PRId64")\n",
1259 total_size,
1260 (uint64_t) (total_size << SECTOR_SHIFT));
1262 header.mtime = cpu_to_be32(st.st_mtime);
1263 header.cluster_bits = 9; /* 512 byte cluster to avoid copying
1264 unmodifyed sectors */
1265 header.l2_bits = 12; /* 32 KB L2 tables */
1266 exthdr.min_cluster_alloc = cpu_to_be32(1);
1267 } else {
1268 DPRINTF("Setting file size: %"PRId64" sectors"
1269 "(total %"PRId64" [%"PRId64" MB])\n",
1270 size,
1271 (uint64_t) (size << SECTOR_SHIFT),
1272 (uint64_t) (size >> 11));
1273 header.cluster_bits = 12; /* 4 KB clusters */
1274 header.l2_bits = 9; /* 4 KB L2 tables */
1275 exthdr.min_cluster_alloc = cpu_to_be32(1 << 9);
1277 /*Set the header size value*/
1278 header.size = cpu_to_be64(size * 512);
1280 header_size = (header_size + 7) & ~7;
1281 if (header_size % 4096 > 0) {
1282 header_size = ((header_size >> 12) + 1) << 12;
1285 shift = header.cluster_bits + header.l2_bits;
1286 l1_size = ((size * 512) + (1LL << shift) - 1) >> shift;
1288 header.l1_table_offset = cpu_to_be64(header_size);
1289 DPRINTF("L1 Table offset: %d, size %d\n",
1290 header_size,
1291 (int)(l1_size * sizeof(uint64_t)));
1292 header.crypt_method = cpu_to_be32(QCOW_CRYPT_NONE);
1294 ptr = calloc(1, l1_size * sizeof(uint64_t));
1295 exthdr.cksum = cpu_to_be32(gen_cksum(ptr, l1_size * sizeof(uint64_t)));
1296 printf("Created cksum: %d\n",exthdr.cksum);
1297 free(ptr);
1299 /*adjust file length to system page size boundary*/
1300 length = ROUNDUP(header_size + (l1_size * sizeof(uint64_t)),
1301 getpagesize());
1302 if (qtruncate(fd, length, 0)!=0) {
1303 DPRINTF("ERROR truncating file\n");
1304 return -1;
1307 if (sparse == 0) {
1308 /*Filesize is length+l1_size*(1 << s->l2_bits)+(size*512)*/
1309 total_length = length + (l1_size * (1 << 9)) + (size * 512);
1310 if (qtruncate(fd, total_length, 0)!=0) {
1311 DPRINTF("ERROR truncating file\n");
1312 return -1;
1314 printf("File truncated to length %"PRIu64"\n",total_length);
1315 } else
1316 flags = SPARSE_FILE;
1318 flags |= EXTHDR_L1_BIG_ENDIAN;
1319 exthdr.flags = cpu_to_be32(flags);
1321 /* write all the data */
1322 lseek(fd, 0, SEEK_SET);
1323 ret += write(fd, &header, sizeof(header));
1324 ret += write(fd, &exthdr, sizeof(exthdr));
1325 if (backing_file)
1326 ret += write(fd, backing_filename, backing_filename_len);
1328 lseek(fd, header_size, SEEK_SET);
1329 tmp = 0;
1330 for (i = 0;i < l1_size; i++) {
1331 ret += write(fd, &tmp, sizeof(tmp));
1334 close(fd);
1336 return 0;
1339 static int qcow_make_empty(struct tdqcow_state *s)
1341 uint32_t l1_length = s->l1_size * sizeof(uint64_t);
1343 memset(s->l1_table, 0, l1_length);
1344 lseek(s->fd, s->l1_table_offset, SEEK_SET);
1345 if (write(s->fd, s->l1_table, l1_length) < 0)
1346 return -1;
1347 if (qtruncate(s->fd, s->l1_table_offset + l1_length, s->sparse)!=0) {
1348 DPRINTF("ERROR truncating file\n");
1349 return -1;
1352 memset(s->l2_cache, 0, s->l2_size * L2_CACHE_SIZE * sizeof(uint64_t));
1353 memset(s->l2_cache_offsets, 0, L2_CACHE_SIZE * sizeof(uint64_t));
1354 memset(s->l2_cache_counts, 0, L2_CACHE_SIZE * sizeof(uint32_t));
1356 return 0;
1359 static int qcow_get_cluster_size(struct tdqcow_state *s)
1361 return s->cluster_size;
1364 /* XXX: put compressed sectors first, then all the cluster aligned
1365 tables to avoid losing bytes in alignment */
1366 static int qcow_compress_cluster(struct tdqcow_state *s, int64_t sector_num,
1367 const uint8_t *buf)
1369 z_stream strm;
1370 int ret, out_len;
1371 uint8_t *out_buf;
1372 uint64_t cluster_offset;
1374 out_buf = malloc(s->cluster_size + (s->cluster_size / 1000) + 128);
1375 if (!out_buf)
1376 return -1;
1378 /* best compression, small window, no zlib header */
1379 memset(&strm, 0, sizeof(strm));
1380 ret = deflateInit2(&strm, Z_DEFAULT_COMPRESSION,
1381 Z_DEFLATED, -12,
1382 9, Z_DEFAULT_STRATEGY);
1383 if (ret != 0) {
1384 free(out_buf);
1385 return -1;
1388 strm.avail_in = s->cluster_size;
1389 strm.next_in = (uint8_t *)buf;
1390 strm.avail_out = s->cluster_size;
1391 strm.next_out = out_buf;
1393 ret = deflate(&strm, Z_FINISH);
1394 if (ret != Z_STREAM_END && ret != Z_OK) {
1395 free(out_buf);
1396 deflateEnd(&strm);
1397 return -1;
1399 out_len = strm.next_out - out_buf;
1401 deflateEnd(&strm);
1403 if (ret != Z_STREAM_END || out_len >= s->cluster_size) {
1404 /* could not compress: write normal cluster */
1405 //tdqcow_queue_write(bs, sector_num, buf, s->cluster_sectors);
1406 } else {
1407 cluster_offset = get_cluster_offset(s, sector_num << 9, 2,
1408 out_len, 0, 0);
1409 cluster_offset &= s->cluster_offset_mask;
1410 lseek(s->fd, cluster_offset, SEEK_SET);
1411 if (write(s->fd, out_buf, out_len) != out_len) {
1412 free(out_buf);
1413 return -1;
1417 free(out_buf);
1418 return 0;
1421 static int
1422 tdqcow_get_image_type(const char *file, int *type)
1424 int fd;
1425 size_t size;
1426 QCowHeader header;
1428 fd = open(file, O_RDONLY);
1429 if (fd == -1)
1430 return -errno;
1432 size = read(fd, &header, sizeof(header));
1433 close(fd);
1434 if (size != sizeof(header))
1435 return (errno ? -errno : -EIO);
1437 be32_to_cpus(&header.magic);
1438 if (header.magic == QCOW_MAGIC)
1439 *type = DISK_TYPE_QCOW;
1440 else
1441 *type = DISK_TYPE_AIO;
1443 return 0;
1446 int tdqcow_get_parent_id(td_driver_t *driver, td_disk_id_t *id)
1448 off_t off;
1449 char *buf, *filename;
1450 int len, secs, type, err = -EINVAL;
1451 struct tdqcow_state *child = (struct tdqcow_state *)driver->data;
1453 if (!child->backing_file_offset)
1454 return TD_NO_PARENT;
1456 /* read the backing file name */
1457 len = child->backing_file_size;
1458 off = child->backing_file_offset - (child->backing_file_offset % 512);
1459 secs = (len + (child->backing_file_offset - off) + 511) >> 9;
1461 if (posix_memalign((void **)&buf, 512, secs << 9))
1462 return -1;
1464 if (lseek(child->fd, off, SEEK_SET) == (off_t)-1)
1465 goto out;
1467 if (read(child->fd, buf, secs << 9) != secs << 9)
1468 goto out;
1469 filename = buf + (child->backing_file_offset - off);
1470 filename[len] = '\0';
1472 if (tdqcow_get_image_type(filename, &type))
1473 goto out;
1475 id->name = strdup(filename);
1476 id->drivertype = type;
1477 err = 0;
1478 out:
1479 free(buf);
1480 return err;
1483 int tdqcow_validate_parent(td_driver_t *driver,
1484 td_driver_t *pdriver, td_flag_t flags)
1486 struct stat stats;
1487 uint64_t psize, csize;
1488 struct tdqcow_state *c = (struct tdqcow_state *)driver->data;
1489 struct tdqcow_state *p = (struct tdqcow_state *)pdriver->data;
1491 if (stat(p->name, &stats))
1492 return -EINVAL;
1493 if (get_filesize(p->name, &psize, &stats))
1494 return -EINVAL;
1496 if (stat(c->name, &stats))
1497 return -EINVAL;
1498 if (get_filesize(c->name, &csize, &stats))
1499 return -EINVAL;
1501 if (csize != psize)
1502 return -EINVAL;
1504 return 0;
1507 struct tap_disk tapdisk_qcow = {
1508 .disk_type = "tapdisk_qcow",
1509 .flags = 0,
1510 .private_data_size = sizeof(struct tdqcow_state),
1511 .td_open = tdqcow_open,
1512 .td_close = tdqcow_close,
1513 .td_queue_read = tdqcow_queue_read,
1514 .td_queue_write = tdqcow_queue_write,
1515 .td_get_parent_id = tdqcow_get_parent_id,
1516 .td_validate_parent = tdqcow_validate_parent,
1517 .td_debug = NULL,
1518 };