debuggers.hg

view tools/blktap/drivers/block-vmdk.c @ 0:7d21f7218375

Exact replica of unstable on 051908 + README-this
author Mukesh Rathor
date Mon May 19 15:34:57 2008 -0700 (2008-05-19)
parents
children
line source
1 /* block-vmdk.c
2 *
3 * VMware Disk format implementation.
4 *
5 * (c) 2006 Andrew Warfield and Julian Chesterfield
6 *
7 * This is largely the same as the vmdk driver in Qemu, I've just twisted it
8 * to match our interfaces. The original (BSDish) Copyright message appears
9 * below:
10 */
12 /*
13 * Block driver for the VMDK format
14 *
15 * Copyright (c) 2004 Fabrice Bellard
16 * Copyright (c) 2005 Filip Navara
17 *
18 * Permission is hereby granted, free of charge, to any person obtaining a copy
19 * of this software and associated documentation files (the "Software"), to deal
20 * in the Software without restriction, including without limitation the rights
21 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
22 * copies of the Software, and to permit persons to whom the Software is
23 * furnished to do so, subject to the following conditions:
24 *
25 * The above copyright notice and this permission notice shall be included in
26 * all copies or substantial portions of the Software.
27 *
28 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
29 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
30 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
31 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
32 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
33 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
34 * THE SOFTWARE.
35 */
37 #include <errno.h>
38 #include <fcntl.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <unistd.h>
42 #include <sys/statvfs.h>
43 #include <sys/stat.h>
44 #include <sys/ioctl.h>
45 #include <string.h>
46 #include "tapdisk.h"
47 #include "bswap.h"
49 /* *BSD has no O_LARGEFILE */
50 #ifndef O_LARGEFILE
51 #define O_LARGEFILE 0
52 #endif
54 #define safer_free(_x) \
55 do { \
56 if (NULL != _x) { \
57 free(_x); \
58 (_x) = NULL; \
59 } \
60 } while (0) ;
62 #define VMDK3_MAGIC (('C' << 24) | ('O' << 16) | ('W' << 8) | 'D')
63 #define VMDK4_MAGIC (('K' << 24) | ('D' << 16) | ('M' << 8) | 'V')
65 typedef struct {
66 uint32_t version;
67 uint32_t flags;
68 uint32_t disk_sectors;
69 uint32_t granularity;
70 uint32_t l1dir_offset;
71 uint32_t l1dir_size;
72 uint32_t file_sectors;
73 uint32_t cylinders;
74 uint32_t heads;
75 uint32_t sectors_per_track;
76 } VMDK3Header;
78 typedef struct {
79 uint32_t version;
80 uint32_t flags;
81 int64_t capacity;
82 int64_t granularity;
83 int64_t desc_offset;
84 int64_t desc_size;
85 int32_t num_gtes_per_gte;
86 int64_t rgd_offset;
87 int64_t gd_offset;
88 int64_t grain_offset;
89 char filler[1];
90 char check_bytes[4];
91 } __attribute__((packed)) VMDK4Header;
93 #define L2_CACHE_SIZE 16
95 struct tdvmdk_state {
96 int fd;
97 int poll_pipe[2]; /* dummy fd for polling on */
99 unsigned int l1_size;
100 int64_t l1_table_offset;
101 int64_t l1_backup_table_offset;
102 uint32_t l1_entry_sectors;
103 unsigned int l2_size;
105 uint32_t *l1_table;
106 uint32_t *l1_backup_table;
107 uint32_t *l2_cache;
108 uint32_t l2_cache_offsets[L2_CACHE_SIZE];
109 uint32_t l2_cache_counts[L2_CACHE_SIZE];
111 unsigned int cluster_sectors;
112 };
114 static inline void init_fds(struct disk_driver *dd)
115 {
116 int i;
117 struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
119 for (i = 0; i < MAX_IOFD; i++)
120 dd->io_fd[i] = 0;
122 dd->io_fd[0] = prv->poll_pipe[0];
123 }
125 /* Open the disk file and initialize aio state. */
126 static int tdvmdk_open (struct disk_driver *dd,
127 const char *name, td_flag_t flags)
128 {
129 int ret, fd;
130 int l1_size, i, o_flags;
131 uint32_t magic;
132 struct td_state *s = dd->td_state;
133 struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
135 /* set up a pipe so that we can hand back a poll fd that won't fire.*/
136 ret = pipe(prv->poll_pipe);
137 if (ret != 0)
138 return -1;
140 /* Open the file */
141 o_flags = O_DIRECT | O_LARGEFILE |
142 ((flags == TD_RDONLY) ? O_RDONLY : O_RDWR);
143 fd = open(name, o_flags);
145 if ( (fd == -1) && (errno == EINVAL) ) {
147 /* Maybe O_DIRECT isn't supported. */
148 o_flags &= ~O_DIRECT;
149 fd = open(name, o_flags);
150 if (fd != -1) DPRINTF("WARNING: Accessing image without"
151 "O_DIRECT! (%s)\n", name);
153 } else if (fd != -1) DPRINTF("open(%s) with O_DIRECT\n", name);
155 if (fd == -1) {
156 DPRINTF("Unable to open [%s]!\n",name);
157 ret = 0 - errno;
158 return -1;
159 }
161 prv->fd = fd;
163 /* Grok the vmdk header. */
164 if ((ret = read(fd, &magic, sizeof(magic))) != sizeof(magic))
165 goto fail;
166 magic = be32_to_cpu(magic);
167 if (magic == VMDK3_MAGIC) {
168 VMDK3Header header;
169 if (read(fd, &header, sizeof(header)) !=
170 sizeof(header))
171 goto fail;
172 prv->cluster_sectors = le32_to_cpu(header.granularity);
173 prv->l2_size = 1 << 9;
174 prv->l1_size = 1 << 6;
175 s->size = le32_to_cpu(header.disk_sectors);
176 prv->l1_table_offset = le32_to_cpu(header.l1dir_offset) << 9;
177 prv->l1_backup_table_offset = 0;
178 prv->l1_entry_sectors = prv->l2_size * prv->cluster_sectors;
179 } else if (magic == VMDK4_MAGIC) {
180 VMDK4Header header;
182 if (read(fd, &header, sizeof(header)) != sizeof(header))
183 goto fail;
184 s->size = le32_to_cpu(header.capacity);
185 prv->cluster_sectors = le32_to_cpu(header.granularity);
186 prv->l2_size = le32_to_cpu(header.num_gtes_per_gte);
187 prv->l1_entry_sectors = prv->l2_size * prv->cluster_sectors;
188 if (prv->l1_entry_sectors <= 0)
189 goto fail;
190 prv->l1_size = (s->size + prv->l1_entry_sectors - 1)
191 / prv->l1_entry_sectors;
192 prv->l1_table_offset = le64_to_cpu(header.rgd_offset) << 9;
193 prv->l1_backup_table_offset =
194 le64_to_cpu(header.gd_offset) << 9;
195 } else {
196 goto fail;
197 }
198 /* read the L1 table */
199 l1_size = prv->l1_size * sizeof(uint32_t);
200 prv->l1_table = malloc(l1_size);
201 if (!prv->l1_table)
202 goto fail;
203 if (lseek(fd, prv->l1_table_offset, SEEK_SET) == -1)
204 goto fail;
205 if (read(fd, prv->l1_table, l1_size) != l1_size)
206 goto fail;
207 for (i = 0; i < prv->l1_size; i++) {
208 le32_to_cpus(&prv->l1_table[i]);
209 }
211 if (prv->l1_backup_table_offset) {
212 prv->l1_backup_table = malloc(l1_size);
213 if (!prv->l1_backup_table)
214 goto fail;
215 if (lseek(fd, prv->l1_backup_table_offset, SEEK_SET) == -1)
216 goto fail;
217 if (read(fd, prv->l1_backup_table, l1_size) != l1_size)
218 goto fail;
219 for(i = 0; i < prv->l1_size; i++) {
220 le32_to_cpus(&prv->l1_backup_table[i]);
221 }
222 }
224 prv->l2_cache = malloc(prv->l2_size * L2_CACHE_SIZE *sizeof(uint32_t));
225 if (!prv->l2_cache)
226 goto fail;
227 prv->fd = fd;
228 init_fds(dd);
229 DPRINTF("VMDK File opened successfully\n");
230 return 0;
232 fail:
233 DPRINTF("VMDK File open failed.\n");
234 safer_free(prv->l1_backup_table);
235 free(prv->l1_table);
236 free(prv->l2_cache);
237 close(fd);
238 return -1;
239 }
241 static uint64_t get_cluster_offset(struct tdvmdk_state *prv,
242 uint64_t offset, int allocate)
243 {
244 unsigned int l1_index, l2_offset, l2_index;
245 int min_index, i, j;
246 uint32_t min_count, *l2_table, tmp;
247 uint64_t cluster_offset;
249 l1_index = (offset >> 9) / prv->l1_entry_sectors;
250 if (l1_index >= prv->l1_size)
251 return 0;
252 l2_offset = prv->l1_table[l1_index];
253 if (!l2_offset)
254 return 0;
255 for (i = 0; i < L2_CACHE_SIZE; i++) {
256 if (l2_offset == prv->l2_cache_offsets[i]) {
257 /* increment the hit count */
258 if (++prv->l2_cache_counts[i] == 0xffffffff) {
259 for(j = 0; j < L2_CACHE_SIZE; j++) {
260 prv->l2_cache_counts[j] >>= 1;
261 }
262 }
263 l2_table = prv->l2_cache + (i * prv->l2_size);
264 goto found;
265 }
266 }
267 /* not found: load a new entry in the least used one */
268 min_index = 0;
269 min_count = 0xffffffff;
270 for (i = 0; i < L2_CACHE_SIZE; i++) {
271 if (prv->l2_cache_counts[i] < min_count) {
272 min_count = prv->l2_cache_counts[i];
273 min_index = i;
274 }
275 }
276 l2_table = prv->l2_cache + (min_index * prv->l2_size);
277 lseek(prv->fd, (int64_t)l2_offset * 512, SEEK_SET);
278 if (read(prv->fd, l2_table, prv->l2_size * sizeof(uint32_t)) !=
279 prv->l2_size * sizeof(uint32_t))
280 return 0;
281 prv->l2_cache_offsets[min_index] = l2_offset;
282 prv->l2_cache_counts[min_index] = 1;
283 found:
284 l2_index = ((offset >> 9) / prv->cluster_sectors) % prv->l2_size;
285 cluster_offset = le32_to_cpu(l2_table[l2_index]);
286 if (!cluster_offset) {
287 if (!allocate)
288 return 0;
289 cluster_offset = lseek(prv->fd, 0, SEEK_END);
290 if (ftruncate(prv->fd, cluster_offset +
291 (prv->cluster_sectors << 9)))
292 return 0;
293 cluster_offset >>= 9;
294 /* update L2 table */
295 tmp = cpu_to_le32(cluster_offset);
296 l2_table[l2_index] = tmp;
297 lseek(prv->fd, ((int64_t)l2_offset * 512) +
298 (l2_index * sizeof(tmp)), SEEK_SET);
299 if (write(prv->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
300 return 0;
301 /* update backup L2 table */
302 if (prv->l1_backup_table_offset != 0) {
303 l2_offset = prv->l1_backup_table[l1_index];
304 lseek(prv->fd, ((int64_t)l2_offset * 512) +
305 (l2_index * sizeof(tmp)), SEEK_SET);
306 if (write(prv->fd, &tmp, sizeof(tmp)) != sizeof(tmp))
307 return 0;
308 }
309 }
310 cluster_offset <<= 9;
311 return cluster_offset;
312 }
314 static int tdvmdk_queue_read(struct disk_driver *dd, uint64_t sector,
315 int nb_sectors, char *buf, td_callback_t cb,
316 int id, void *private)
317 {
318 struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
319 int index_in_cluster, n;
320 uint64_t cluster_offset;
321 int ret = 0;
323 while (nb_sectors > 0) {
324 cluster_offset = get_cluster_offset(prv, sector << 9, 0);
325 index_in_cluster = sector % prv->cluster_sectors;
326 n = prv->cluster_sectors - index_in_cluster;
327 if (n > nb_sectors)
328 n = nb_sectors;
329 if (!cluster_offset) {
330 memset(buf, 0, 512 * n);
331 } else {
332 lseek(prv->fd, cluster_offset + index_in_cluster * 512,
333 SEEK_SET);
334 ret = read(prv->fd, buf, n * 512);
335 if (ret != n * 512) {
336 ret = -1;
337 goto done;
338 }
339 }
340 nb_sectors -= n;
341 sector += n;
342 buf += n * 512;
343 }
344 done:
345 return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private);
346 }
348 static int tdvmdk_queue_write(struct disk_driver *dd, uint64_t sector,
349 int nb_sectors, char *buf, td_callback_t cb,
350 int id, void *private)
351 {
352 struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
353 int index_in_cluster, n;
354 uint64_t cluster_offset;
355 int ret = 0;
357 while (nb_sectors > 0) {
358 index_in_cluster = sector & (prv->cluster_sectors - 1);
359 n = prv->cluster_sectors - index_in_cluster;
360 if (n > nb_sectors)
361 n = nb_sectors;
362 cluster_offset = get_cluster_offset(prv, sector << 9, 1);
363 if (!cluster_offset) {
364 ret = -1;
365 goto done;
366 }
367 lseek(prv->fd, cluster_offset + index_in_cluster * 512,
368 SEEK_SET);
369 ret = write(prv->fd, buf, n * 512);
370 if (ret != n * 512) {
371 ret = -1;
372 goto done;
373 }
374 nb_sectors -= n;
375 sector += n;
376 buf += n * 512;
377 }
378 done:
379 return cb(dd, ret == -1 ? -1 : 0, sector, nb_sectors, id, private);
380 }
382 static int tdvmdk_submit(struct disk_driver *dd)
383 {
384 return 0;
385 }
387 static int tdvmdk_close(struct disk_driver *dd)
388 {
389 struct tdvmdk_state *prv = (struct tdvmdk_state *)dd->private;
391 safer_free(prv->l1_table);
392 safer_free(prv->l1_backup_table);
393 safer_free(prv->l2_cache);
394 close(prv->fd);
395 close(prv->poll_pipe[0]);
396 close(prv->poll_pipe[1]);
397 return 0;
398 }
400 static int tdvmdk_do_callbacks(struct disk_driver *dd, int sid)
401 {
402 /* always ask for a kick */
403 return 1;
404 }
406 static int tdvmdk_get_parent_id(struct disk_driver *dd, struct disk_id *id)
407 {
408 return TD_NO_PARENT;
409 }
411 static int tdvmdk_validate_parent(struct disk_driver *dd,
412 struct disk_driver *parent, td_flag_t flags)
413 {
414 return -EINVAL;
415 }
417 struct tap_disk tapdisk_vmdk = {
418 .disk_type = "tapdisk_vmdk",
419 .private_data_size = sizeof(struct tdvmdk_state),
420 .td_open = tdvmdk_open,
421 .td_queue_read = tdvmdk_queue_read,
422 .td_queue_write = tdvmdk_queue_write,
423 .td_submit = tdvmdk_submit,
424 .td_close = tdvmdk_close,
425 .td_do_callbacks = tdvmdk_do_callbacks,
426 .td_get_parent_id = tdvmdk_get_parent_id,
427 .td_validate_parent = tdvmdk_validate_parent
428 };