debuggers.hg

view tools/blktap2/drivers/io-optimize.c @ 20916:f2ef85551a09

blktap2: Sort out tapdisk AIO init.

Move event callbacks registration into tapdisk-queue. This should also
obsoletes the dummy pollfd pipe in the synchronous I/O case.

Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jan 29 08:54:22 2010 +0000 (2010-01-29)
parents 1c627434605e
children
line source
1 /*
2 * Copyright (c) 2008, XenSource Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions are met:
7 * * Redistributions of source code must retain the above copyright
8 * notice, this list of conditions and the following disclaimer.
9 * * Redistributions in binary form must reproduce the above copyright
10 * notice, this list of conditions and the following disclaimer in the
11 * documentation and/or other materials provided with the distribution.
12 * * Neither the name of XenSource Inc. nor the names of its contributors
13 * may be used to endorse or promote products derived from this software
14 * without specific prior written permission.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
20 * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
21 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
22 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
23 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
24 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
25 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
26 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
29 #include <time.h>
30 #include <stdio.h>
31 #include <errno.h>
32 #include <stdlib.h>
33 #include <unistd.h>
34 #include <inttypes.h>
36 #include "io-optimize.h"
37 #include "tapdisk-log.h"
39 #if (!defined(TEST) && defined(DEBUG))
40 #define DBG(ctx, f, a...) tlog_write(TLOG_DBG, f, ##a)
41 #elif defined(TEST)
42 #define DBG(ctx, f, a...) printf(f, ##a)
43 #else
44 #define DBG(ctx, f, a...) ((void)0)
45 #endif
47 static void print_merged_iocbs(struct opioctx *ctx,
48 struct iocb **iocbs, int num_iocbs);
50 void
51 opio_free(struct opioctx *ctx)
52 {
53 free(ctx->opios);
54 ctx->opios = NULL;
56 free(ctx->free_opios);
57 ctx->free_opios = NULL;
59 free(ctx->iocb_queue);
60 ctx->iocb_queue = NULL;
62 free(ctx->event_queue);
63 ctx->event_queue = NULL;
64 }
66 int
67 opio_init(struct opioctx *ctx, int num_iocbs)
68 {
69 int i;
71 memset(ctx, 0, sizeof(struct opioctx));
73 ctx->num_opios = num_iocbs;
74 ctx->free_opio_cnt = num_iocbs;
75 ctx->opios = calloc(1, sizeof(struct opio) * num_iocbs);
76 ctx->free_opios = calloc(1, sizeof(struct opio *) * num_iocbs);
77 ctx->iocb_queue = calloc(1, sizeof(struct iocb *) * num_iocbs);
78 ctx->event_queue = calloc(1, sizeof(struct io_event) * num_iocbs);
80 if (!ctx->opios || !ctx->free_opios ||
81 !ctx->iocb_queue || !ctx->event_queue)
82 goto fail;
84 for (i = 0; i < num_iocbs; i++)
85 ctx->free_opios[i] = &ctx->opios[i];
87 return 0;
89 fail:
90 opio_free(ctx);
91 return -ENOMEM;
92 }
94 static inline struct opio *
95 alloc_opio(struct opioctx *ctx)
96 {
97 if (ctx->free_opio_cnt <= 0)
98 return NULL;
99 return ctx->free_opios[--ctx->free_opio_cnt];
100 }
102 static inline void
103 free_opio(struct opioctx *ctx, struct opio *op)
104 {
105 memset(op, 0, sizeof(struct opio));
106 ctx->free_opios[ctx->free_opio_cnt++] = op;
107 }
109 static inline void
110 restore_iocb(struct opio *op)
111 {
112 struct iocb *io = op->iocb;
114 io->data = op->data;
115 io->u.c.buf = op->buf;
116 io->u.c.nbytes = op->nbytes;
117 }
119 static inline int
120 iocb_optimized(struct opioctx *ctx, struct iocb *io)
121 {
122 unsigned long iop = (unsigned long)io->data;
123 unsigned long start = (unsigned long)ctx->opios;
124 unsigned long end = start + (ctx->num_opios * sizeof(struct opio));
126 return (iop >= start && iop < end);
127 }
129 static inline int
130 contiguous_sectors(struct iocb *l, struct iocb *r)
131 {
132 return (l->u.c.offset + l->u.c.nbytes == r->u.c.offset);
133 }
135 static inline int
136 contiguous_buffers(struct iocb *l, struct iocb *r)
137 {
138 return (l->u.c.buf + l->u.c.nbytes == r->u.c.buf);
139 }
141 static inline int
142 contiguous_iocbs(struct iocb *l, struct iocb *r)
143 {
144 return ((l->aio_fildes == r->aio_fildes) &&
145 contiguous_sectors(l, r) &&
146 contiguous_buffers(l, r));
147 }
149 static inline void
150 init_opio_list(struct opio *op)
151 {
152 op->list.head = op->list.tail = op;
153 }
155 static struct opio *
156 opio_iocb_init(struct opioctx *ctx, struct iocb *io)
157 {
158 struct opio *op;
160 op = alloc_opio(ctx);
161 if (!op)
162 return NULL;
164 op->buf = io->u.c.buf;
165 op->nbytes = io->u.c.nbytes;
166 op->offset = io->u.c.offset;
167 op->data = io->data;
168 op->iocb = io;
169 io->data = op;
171 init_opio_list(op);
173 return op;
174 }
176 static inline struct opio *
177 opio_get(struct opioctx *ctx, struct iocb *io)
178 {
179 if (iocb_optimized(ctx, io))
180 return (struct opio *)io->data;
181 else
182 return opio_iocb_init(ctx, io);
183 }
185 static int
186 merge_tail(struct opioctx *ctx, struct iocb *head, struct iocb *io)
187 {
188 struct opio *ophead, *opio;
190 ophead = opio_get(ctx, head);
191 if (!ophead)
192 return -ENOMEM;
194 opio = opio_get(ctx, io);
195 if (!opio)
196 return -ENOMEM;
198 opio->head = ophead;
199 head->u.c.nbytes += io->u.c.nbytes;
200 ophead->list.tail = ophead->list.tail->next = opio;
202 return 0;
203 }
205 static int
206 merge(struct opioctx *ctx, struct iocb *head, struct iocb *io)
207 {
208 if (head->aio_lio_opcode != io->aio_lio_opcode)
209 return -EINVAL;
211 if (!contiguous_iocbs(head, io))
212 return -EINVAL;
214 return merge_tail(ctx, head, io);
215 }
217 int
218 io_merge(struct opioctx *ctx, struct iocb **queue, int num)
219 {
220 int i, on_queue;
221 struct iocb *io, **q;
222 struct opio *ophead;
224 if (!num)
225 return 0;
227 on_queue = 0;
228 q = ctx->iocb_queue;
229 memcpy(q, queue, num * sizeof(struct iocb *));
231 for (i = 1; i < num; i++) {
232 io = q[i];
233 if (merge(ctx, queue[on_queue], io) != 0)
234 queue[++on_queue] = io;
235 }
237 #if (defined(TEST) || defined(DEBUG))
238 print_merged_iocbs(ctx, queue, on_queue + 1);
239 #endif
241 return ++on_queue;
242 }
244 static int
245 expand_iocb(struct opioctx *ctx, struct iocb **queue, struct iocb *io)
246 {
247 int idx;
248 struct opio *op, *next;
250 idx = 0;
251 op = (struct opio *)io->data;
252 while (op) {
253 next = op->next;
254 restore_iocb(op);
255 queue[idx++] = op->iocb;
256 free_opio(ctx, op);
257 op = next;
258 }
260 return idx;
261 }
263 int
264 io_expand_iocbs(struct opioctx *ctx, struct iocb **queue, int idx, int num)
265 {
266 int i, on_queue;
267 struct iocb *io, **q;
269 if (!num)
270 return 0;
272 on_queue = 0;
273 q = ctx->iocb_queue;
274 memcpy(q, queue, num * sizeof(struct iocb *));
276 for (i = idx; i < num; i++) {
277 io = q[i];
278 if (!iocb_optimized(ctx, io))
279 queue[on_queue++] = io;
280 else
281 on_queue += expand_iocb(ctx, queue + on_queue, io);
282 }
284 return on_queue;
285 }
287 static int
288 expand_event(struct opioctx *ctx,
289 struct io_event *event, struct io_event *queue, int idx)
290 {
291 int err;
292 struct iocb *io;
293 struct io_event *ep;
294 struct opio *ophead, *op, *next;
296 io = event->obj;
297 ophead = (struct opio *)io->data;
298 op = ophead;
300 if (event->res == io->u.c.nbytes)
301 err = 0;
302 else if ((int)event->res < 0)
303 err = (int)event->res;
304 else
305 err = -EIO;
307 while (op) {
308 next = op->next;
309 ep = &queue[idx++];
310 ep->obj = op->iocb;
311 ep->res = (err ? err : op->nbytes);
312 restore_iocb(op);
313 free_opio(ctx, op);
314 op = next;
315 }
317 return idx;
318 }
320 int
321 io_split(struct opioctx *ctx, struct io_event *events, int num)
322 {
323 int on_queue;
324 struct iocb *io;
325 struct io_event *ep, *q;
327 if (!num)
328 return 0;
330 on_queue = 0;
331 q = ctx->event_queue;
332 memcpy(q, events, num * sizeof(struct io_event));
334 for (ep = q; num-- > 0; ep++) {
335 io = ep->obj;
336 if (!iocb_optimized(ctx, io))
337 events[on_queue++] = *ep;
338 else
339 on_queue = expand_event(ctx, ep, events, on_queue);
340 }
342 return on_queue;
343 }
345 /******************************************************************************
346 debug print functions
347 ******************************************************************************/
348 static inline void
349 __print_iocb(struct opioctx *ctx, struct iocb *io, char *prefix)
350 {
351 char *type;
353 type = (io->aio_lio_opcode == IO_CMD_PREAD ? "read" : "write");
355 DBG(ctx, "%soff: %08llx, nbytes: %04lx, buf: %p, type: %s, data: %08lx,"
356 " optimized: %d\n", prefix, io->u.c.offset, io->u.c.nbytes,
357 io->u.c.buf, type, (unsigned long)io->data,
358 iocb_optimized(ctx, io));
359 }
361 static char *null_prefix = "";
362 #define print_iocb(ctx, io) __print_iocb(ctx, io, null_prefix)
364 static void
365 print_iocbs(struct opioctx *ctx, struct iocb **iocbs, int num_iocbs)
366 {
367 int i;
368 char pref[10];
369 struct iocb *io;
371 DBG(ctx, "iocbs:\n");
372 for (i = 0; i < num_iocbs; i++) {
373 io = iocbs[i];
374 snprintf(pref, 10, "%d: ", i);
375 __print_iocb(ctx, io, pref);
376 }
377 }
379 static void
380 print_optimized_iocbs(struct opioctx *ctx, struct opio *op, int *cnt)
381 {
382 char pref[10];
384 while (op) {
385 snprintf(pref, 10, " %d: ", (*cnt)++);
386 __print_iocb(ctx, op->iocb, pref);
387 op = op->next;
388 }
389 }
391 static void
392 print_merged_iocbs(struct opioctx *ctx, struct iocb **iocbs, int num_iocbs)
393 {
394 int i, cnt;
395 char pref[10];
396 struct iocb *io;
397 struct opio *op;
399 DBG(ctx, "merged iocbs:\n");
400 for (i = 0, cnt = 0; i < num_iocbs; i++) {
401 io = iocbs[i];
402 snprintf(pref, 10, "%d: ", cnt++);
403 __print_iocb(ctx, io, pref);
405 if (iocb_optimized(ctx, io)) {
406 op = (struct opio *)io->data;
407 print_optimized_iocbs(ctx, op->next, &cnt);
408 }
409 }
410 }
412 static void
413 print_events(struct opioctx *ctx, struct io_event *events, int num_events)
414 {
415 int i;
416 struct iocb *io;
418 for (i = 0; i < num_events; i++) {
419 io = events[i].obj;
420 print_iocb(ctx, io);
421 }
422 }
423 /******************************************************************************
424 end debug print functions
425 ******************************************************************************/
427 #if defined(TEST)
429 #define hmask 0x80000000UL
430 #define smask 0x40000000UL
431 #define make_data(idx, is_head, sparse) \
432 (void *)((idx) | ((is_head) ? hmask : 0) | ((sparse) ? smask : 0))
433 #define data_idx(data) (int)((unsigned long)(data) & (0x0fffffff))
434 #define data_is_head(data) (((unsigned long)(data) & hmask) ? 1 : 0)
435 #define data_is_sparse(data) (((unsigned long)(data) & smask) ? 1 : 0)
437 static void
438 usage(void)
439 {
440 fprintf(stderr, "usage: io_optimize [-n num_runs] "
441 "[-i num_iocbs] [-s num_secs] [-r random_seed]\n");
442 exit(-1);
443 }
445 static int xalloc_cnt, xfree_cnt;
446 static inline char *
447 xalloc(int size)
448 {
449 char *buf = malloc(size);
450 if (!buf) {
451 fprintf(stderr, "xalloc failed\n");
452 exit(ENOMEM);
453 }
454 xalloc_cnt++;
455 return buf;
456 }
458 static inline void
459 xfree(void *buf)
460 {
461 free(buf);
462 xfree_cnt++;
463 }
465 static void
466 randomize_iocbs(struct iocb **iocbs, int num_iocbs, int num_secs)
467 {
468 int i, j;
470 i = 0;
471 while (i < num_iocbs) {
472 char *buf;
473 short type;
474 int segs, sparse_mem;
475 uint64_t offset, nbytes;
477 type = (random() % 10 < 5 ? IO_CMD_PREAD : IO_CMD_PWRITE);
478 offset = ((random() % num_secs) << 9);
480 if (random() % 10 < 4) {
481 segs = 1;
482 nbytes = (((random() % 7) + 1) << 9);
483 } else {
484 segs = (random() % 10) + 1;
485 nbytes = 4096;
486 }
488 if (i + segs > num_iocbs)
489 segs = (num_iocbs - i);
491 sparse_mem = (random() % 10 < 2 ? 1 : 0);
493 if (sparse_mem)
494 buf = xalloc(nbytes);
495 else
496 buf = xalloc(segs * nbytes);
498 for (j = 0; j < segs; j++) {
499 struct iocb *io = iocbs[i + j];
500 io->aio_lio_opcode = type;
501 io->u.c.nbytes = nbytes;
502 io->u.c.offset = offset;
503 io->u.c.buf = buf;
504 offset += nbytes;
506 io->data = make_data(i + j, (j == 0), sparse_mem);
508 if (j + 1 < segs && sparse_mem)
509 buf = xalloc(nbytes);
510 else
511 buf += nbytes;
512 }
514 i += segs;
515 }
516 }
518 static int
519 simulate_io(struct iocb **iocbs, struct io_event *events, int num_iocbs)
520 {
521 int i, done;
522 struct iocb *io;
523 struct io_event *ep;
525 if (num_iocbs > 1)
526 done = (random() % (num_iocbs - 1)) + 1;
527 else
528 done = num_iocbs;
530 for (i = 0; i < done; i++) {
531 io = iocbs[i];
532 ep = &events[i];
533 ep->obj = io;
534 ep->res = (random() % 10 < 8 ? io->u.c.nbytes : 0);
535 }
537 return done;
538 }
540 static inline void
541 process_events(struct opioctx *ctx,
542 struct iocb *iocb_list, struct io_event *events, int num)
543 {
544 int i;
545 struct iocb *io;
547 for (i = 0; i < num; i++) {
548 io = events[i].obj;
549 print_iocb(ctx, io);
550 if (data_idx(io->data) != (io - iocb_list)) {
551 printf("corrupt data! data_idx = %d, io = %d\n",
552 data_idx(io->data), (io - iocb_list));
553 exit(-1);
554 }
555 if (data_is_head(io->data) || data_is_sparse(io->data))
556 xfree(io->u.c.buf);
557 memset(io, 0, sizeof(struct iocb));
558 }
559 }
561 static inline void
562 init_optest(struct iocb *iocb_list,
563 struct iocb **iocbs, struct io_event *events, int num)
564 {
565 int i;
567 memset(iocb_list, 0, num * sizeof(struct iocb));
568 memset(events, 0, num * sizeof(struct io_event));
570 for (i = 0; i < num; i++)
571 iocbs[i] = &iocb_list[i];
572 }
574 int
575 main(int argc, char **argv)
576 {
577 uint64_t num_secs;
578 struct opioctx ctx;
579 struct io_event *events;
580 int i, c, num_runs, num_iocbs, seed;
581 struct iocb *iocb_list, **iocbs, **ioqueue;
583 num_runs = 1;
584 num_iocbs = 300;
585 seed = time(NULL);
586 num_secs = ((4ULL << 20) >> 9); /* 4GB disk */
588 while ((c = getopt(argc, argv, "n:i:s:r:h")) != -1) {
589 switch (c) {
590 case 'n':
591 num_runs = atoi(optarg);
592 break;
593 case 'i':
594 num_iocbs = atoi(optarg);
595 break;
596 case 's':
597 num_secs = strtoull(optarg, NULL, 10);
598 break;
599 case 'r':
600 seed = atoi(optarg);
601 break;
602 case 'h':
603 usage();
604 case '?':
605 fprintf(stderr, "Unrecognized option: -%c\n", optopt);
606 usage();
607 }
608 }
610 printf("Running %d tests with %d iocbs on %llu sectors, seed = %d\n",
611 num_runs, num_iocbs, num_secs, seed);
613 srand(seed);
615 iocb_list = malloc(num_iocbs * sizeof(struct iocb));
616 iocbs = malloc(num_iocbs * sizeof(struct iocb *));
617 events = malloc(num_iocbs * sizeof(struct io_event));
619 if (!iocb_list || !iocbs || !events || opio_init(&ctx, num_iocbs)) {
620 fprintf(stderr, "initialization failed\n");
621 exit(ENOMEM);
622 }
624 for (i = 0; i < num_runs; i++) {
625 int op_rem, op_done, num_split, num_events, num_done;
627 ioqueue = iocbs;
628 init_optest(iocb_list, ioqueue, events, num_iocbs);
629 randomize_iocbs(ioqueue, num_iocbs, num_secs);
630 print_iocbs(&ctx, ioqueue, num_iocbs);
632 op_done = 0;
633 num_done = 0;
634 op_rem = io_merge(&ctx, ioqueue, num_iocbs);
635 print_iocbs(&ctx, ioqueue, op_rem);
636 print_merged_iocbs(&ctx, ioqueue, op_rem);
638 while (num_done < num_iocbs) {
639 DBG(&ctx, "optimized remaining: %d\n", op_rem);
641 DBG(&ctx, "simulating\n");
642 num_events = simulate_io(ioqueue + op_done, events, op_rem);
643 print_events(&ctx, events, num_events);
645 DBG(&ctx, "splitting %d\n", num_events);
646 num_split = io_split(&ctx, events, num_events);
647 print_events(&ctx, events, num_split);
649 DBG(&ctx, "processing %d\n", num_split);
650 process_events(&ctx, iocb_list, events, num_split);
652 op_rem -= num_events;
653 op_done += num_events;
654 num_done += num_split;
655 }
657 DBG(&ctx, "run %d: processed: %d, xallocs: %d, xfrees: %d\n",
658 i, num_done, xalloc_cnt, xfree_cnt);
659 if (xalloc_cnt != xfree_cnt)
660 exit(-1);
661 xalloc_cnt = xfree_cnt = 0;
662 }
664 free(iocbs);
665 free(events);
666 free(iocb_list);
667 opio_free(&ctx);
669 return 0;
670 }
671 #endif