debuggers.hg

changeset 20918:b0ffb4912c46

blktap2: Prefer AIO eventfd support on kernels >= 2.6.22

Mainline kernel support for eventfd(2) in linux aio was added between
2.6.21 and 2.6.22. Libaio after 0.3.107 has the header file, but
presently few systems support it. Neither do we rely on an up-to-date
libc6.

Instead, this patch adds a header which defines custom iocb_common
struct, and works around a potentially missing sys/eventfd.h.

Signed-off-by: Daniel Stodden <daniel.stodden@citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Fri Jan 29 08:55:27 2010 +0000 (2010-01-29)
parents 218026df8d5f
children 857d7b2dd8c7
files tools/blktap2/drivers/block-aio.c tools/blktap2/drivers/libaio-compat.h tools/blktap2/drivers/tapdisk-queue.c tools/blktap2/drivers/tapdisk-utils.c tools/blktap2/drivers/tapdisk-utils.h
line diff
     1.1 --- a/tools/blktap2/drivers/block-aio.c	Fri Jan 29 08:54:51 2010 +0000
     1.2 +++ b/tools/blktap2/drivers/block-aio.c	Fri Jan 29 08:55:27 2010 +0000
     1.3 @@ -28,7 +28,6 @@
     1.4  
     1.5  
     1.6  #include <errno.h>
     1.7 -#include <libaio.h>
     1.8  #include <fcntl.h>
     1.9  #include <stdio.h>
    1.10  #include <stdlib.h>
     2.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     2.2 +++ b/tools/blktap2/drivers/libaio-compat.h	Fri Jan 29 08:55:27 2010 +0000
     2.3 @@ -0,0 +1,92 @@
     2.4 +/*
     2.5 + * Copyright (c) 2010, XenSource Inc.
     2.6 + * All rights reserved.
     2.7 + *
     2.8 + * This  library is  free  software; you  can  redistribute it  and/or
     2.9 + * modify it under the terms  of the GNU Lesser General Public License
    2.10 + * as published by  the Free Software Foundation; either  version 2 of
    2.11 + * the License, or (at your option) any later version.
    2.12 + *
    2.13 + * This library is distributed in the hope that it will be useful, but
    2.14 + * WITHOUT  ANY  WARRANTY;  without   even  the  implied  warranty  of
    2.15 + * MERCHANTABILITY or  FITNESS FOR A PARTICULAR PURPOSE.   See the GNU
    2.16 + * Lesser General Public License for more details.
    2.17 + *
    2.18 + * You should  have received a copy  of the GNU  Lesser General Public
    2.19 + * License along with this library; if not, write to the Free Software
    2.20 + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
    2.21 + * USA
    2.22 + */
    2.23 +
    2.24 +/*
    2.25 + * kernel 2.6.21 added eventfd(2) support, kernel 2.6.22 eventfds for
    2.26 + * aio. libaio 0.3.107 updated the header file, but few systems have
    2.27 + * it. define a custom iocb_common struct instead, and work around a
    2.28 + * potentially missing sys/eventfd.h. this header should vanish over
    2.29 + * time.
    2.30 + */
    2.31 +
    2.32 +#ifndef __LIBAIO_COMPAT
    2.33 +#define __LIBAIO_COMPAT
    2.34 +
    2.35 +#include <libaio.h>
    2.36 +#include <unistd.h>
    2.37 +#include <sys/syscall.h>
    2.38 +
    2.39 +struct __compat_io_iocb_common {
    2.40 +	char             __pad_buf[8];
    2.41 +	char             __pad_nbytes[8];
    2.42 +	long long	offset;
    2.43 +	long long	__pad3;
    2.44 +	unsigned	flags;
    2.45 +	unsigned	resfd;
    2.46 +};
    2.47 +
    2.48 +static inline void __io_set_eventfd(struct iocb *iocb, int eventfd)
    2.49 +{
    2.50 +	struct __compat_io_iocb_common *c;
    2.51 +	c = (struct __compat_io_iocb_common*)&iocb->u.c;
    2.52 +	c->flags |= (1 << 0);
    2.53 +	c->resfd = eventfd;
    2.54 +}
    2.55 +
    2.56 +#ifndef SYS_eventfd
    2.57 +#ifndef __NR_eventfd
    2.58 +# if defined(__alpha__)
    2.59 +#  define __NR_eventfd		478
    2.60 +# elif defined(__arm__)
    2.61 +#  define __NR_eventfd		(__NR_SYSCALL_BASE+351)
    2.62 +# elif defined(__ia64__)
    2.63 +#  define __NR_eventfd		1309
    2.64 +# elif defined(__i386__)
    2.65 +#  define __NR_eventfd		323
    2.66 +# elif defined(__m68k__)
    2.67 +#  define __NR_eventfd		319
    2.68 +# elif 0 && defined(__mips__)
    2.69 +#  error __NR_eventfd?
    2.70 +#  define __NR_eventfd		(__NR_Linux + 319)
    2.71 +#  define __NR_eventfd		(__NR_Linux + 278)
    2.72 +#  define __NR_eventfd		(__NR_Linux + 282)
    2.73 +# elif defined(__hppa__)
    2.74 +#  define __NR_eventfd		(__NR_Linux + 304)
    2.75 +# elif defined(__PPC__) || defined(__powerpc64__)
    2.76 +#  define __NR_eventfd		307
    2.77 +# elif defined(__s390__) || defined(__s390x__)
    2.78 +#  define __NR_eventfd		318
    2.79 +# elif defined(__sparc__)
    2.80 +#  define __NR_eventfd		313
    2.81 +# elif defined(__x86_64__)
    2.82 +#  define __NR_eventfd		284
    2.83 +# endif
    2.84 +#else
    2.85 +# error __NR_eventfd?
    2.86 +#endif
    2.87 +#define SYS_eventfd __NR_eventfd
    2.88 +#endif
    2.89 +
    2.90 +static inline int tapdisk_sys_eventfd(int initval)
    2.91 +{
    2.92 +	return syscall(SYS_eventfd, initval, 0);
    2.93 +}
    2.94 +
    2.95 +#endif /* __LIBAIO_COMPAT */
     3.1 --- a/tools/blktap2/drivers/tapdisk-queue.c	Fri Jan 29 08:54:51 2010 +0000
     3.2 +++ b/tools/blktap2/drivers/tapdisk-queue.c	Fri Jan 29 08:55:27 2010 +0000
     3.3 @@ -30,12 +30,18 @@
     3.4  #include <stdlib.h>
     3.5  #include <unistd.h>
     3.6  #include <libaio.h>
     3.7 +#ifdef __linux__
     3.8 +#include <linux/version.h>
     3.9 +#endif
    3.10  
    3.11  #include "tapdisk.h"
    3.12  #include "tapdisk-log.h"
    3.13  #include "tapdisk-queue.h"
    3.14  #include "tapdisk-filter.h"
    3.15  #include "tapdisk-server.h"
    3.16 +#include "tapdisk-utils.h"
    3.17 +
    3.18 +#include "libaio-compat.h"
    3.19  #include "atomicio.h"
    3.20  
    3.21  #define WARN(_f, _a...) tlog_write(TLOG_WARN, _f, ##_a)
    3.22 @@ -270,10 +276,122 @@ struct lio {
    3.23  	io_context_t     aio_ctx;
    3.24  	struct io_event *aio_events;
    3.25  
    3.26 -	int              poll_fd;
    3.27 +	int              event_fd;
    3.28  	int              event_id;
    3.29 +
    3.30 +	int              flags;
    3.31  };
    3.32  
    3.33 +#define LIO_FLAG_EVENTFD        (1<<0)
    3.34 +
    3.35 +static int
    3.36 +tapdisk_lio_check_resfd(void)
    3.37 +{
    3.38 +	return tapdisk_linux_version() >= KERNEL_VERSION(2, 6, 22);
    3.39 +}
    3.40 +
    3.41 +static void
    3.42 +tapdisk_lio_destroy_aio(struct tqueue *queue)
    3.43 +{
    3.44 +	struct lio *lio = queue->tio_data;
    3.45 +
    3.46 +	if (lio->event_fd >= 0) {
    3.47 +		close(lio->event_fd);
    3.48 +		lio->event_fd = -1;
    3.49 +	}
    3.50 +
    3.51 +	if (lio->aio_ctx) {
    3.52 +		io_destroy(lio->aio_ctx);
    3.53 +		lio->aio_ctx = 0;
    3.54 +	}
    3.55 +}
    3.56 +
    3.57 +static int
    3.58 +__lio_setup_aio_poll(struct tqueue *queue, int qlen)
    3.59 +{
    3.60 +	struct lio *lio = queue->tio_data;
    3.61 +	int err, fd;
    3.62 +
    3.63 +	lio->aio_ctx = REQUEST_ASYNC_FD;
    3.64 +
    3.65 +	fd = io_setup(qlen, &lio->aio_ctx);
    3.66 +	if (fd < 0) {
    3.67 +		lio->aio_ctx = 0;
    3.68 +		err = -errno;
    3.69 +
    3.70 +		if (err == -EINVAL)
    3.71 +			goto fail_fd;
    3.72 +
    3.73 +		goto fail;
    3.74 +	}
    3.75 +
    3.76 +	lio->event_fd = fd;
    3.77 +
    3.78 +	return 0;
    3.79 +
    3.80 +fail_fd:
    3.81 +	DPRINTF("Couldn't get fd for AIO poll support. This is probably "
    3.82 +		"because your kernel does not have the aio-poll patch "
    3.83 +		"applied.\n");
    3.84 +fail:
    3.85 +	return err;
    3.86 +}
    3.87 +
    3.88 +static int
    3.89 +__lio_setup_aio_eventfd(struct tqueue *queue, int qlen)
    3.90 +{
    3.91 +	struct lio *lio = queue->tio_data;
    3.92 +	int err;
    3.93 +
    3.94 +	err = io_setup(qlen, &lio->aio_ctx);
    3.95 +	if (err < 0) {
    3.96 +		lio->aio_ctx = 0;
    3.97 +		return err;
    3.98 +	}
    3.99 +
   3.100 +	lio->event_fd = tapdisk_sys_eventfd(0);
   3.101 +	if (lio->event_fd < 0)
   3.102 +		return  -errno;
   3.103 +
   3.104 +	lio->flags |= LIO_FLAG_EVENTFD;
   3.105 +
   3.106 +	return 0;
   3.107 +}
   3.108 +
   3.109 +static int
   3.110 +tapdisk_lio_setup_aio(struct tqueue *queue, int qlen)
   3.111 +{
   3.112 +	struct lio *lio = queue->tio_data;
   3.113 +	int err;
   3.114 +
   3.115 +	lio->aio_ctx  =  0;
   3.116 +	lio->event_fd = -1;
   3.117 +
   3.118 +	/*
   3.119 +	 * prefer the mainline eventfd(2) api, if available.
   3.120 +	 * if not, fall back to the poll fd patch.
   3.121 +	 */
   3.122 +
   3.123 +	err = !tapdisk_lio_check_resfd();
   3.124 +	if (!err)
   3.125 +		err = __lio_setup_aio_eventfd(queue, qlen);
   3.126 +	if (err)
   3.127 +		err = __lio_setup_aio_poll(queue, qlen);
   3.128 +
   3.129 +	if (err == -EAGAIN)
   3.130 +		goto fail_rsv;
   3.131 +fail:
   3.132 +	return err;
   3.133 +
   3.134 +fail_rsv:
   3.135 +	DPRINTF("Couldn't setup AIO context. If you are trying to "
   3.136 +		"concurrently use a large number of blktap-based disks, you may "
   3.137 +		"need to increase the system-wide aio request limit. "
   3.138 +		"(e.g. 'echo 1048576 > /proc/sys/fs/aio-max-nr')\n");
   3.139 +	goto fail;
   3.140 +}
   3.141 +
   3.142 +
   3.143  static void
   3.144  tapdisk_lio_destroy(struct tqueue *queue)
   3.145  {
   3.146 @@ -287,10 +405,7 @@ tapdisk_lio_destroy(struct tqueue *queue
   3.147  		lio->event_id = -1;
   3.148  	}
   3.149  
   3.150 -	if (lio->aio_ctx) {
   3.151 -		io_destroy(lio->aio_ctx);
   3.152 -		lio->aio_ctx = NULL;
   3.153 -	}
   3.154 +	tapdisk_lio_destroy_aio(queue);
   3.155  
   3.156  	if (lio->aio_events) {
   3.157  		free(lio->aio_events);
   3.158 @@ -299,6 +414,27 @@ tapdisk_lio_destroy(struct tqueue *queue
   3.159  }
   3.160  
   3.161  static void
   3.162 +tapdisk_lio_set_eventfd(struct tqueue *queue, int n, struct iocb **iocbs)
   3.163 +{
   3.164 +	struct lio *lio = queue->tio_data;
   3.165 +	int i;
   3.166 +
   3.167 +	if (lio->flags & LIO_FLAG_EVENTFD)
   3.168 +		for (i = 0; i < n; ++i)
   3.169 +			__io_set_eventfd(iocbs[i], lio->event_fd);
   3.170 +}
   3.171 +
   3.172 +static void
   3.173 +tapdisk_lio_ack_event(struct tqueue *queue)
   3.174 +{
   3.175 +	struct lio *lio = queue->tio_data;
   3.176 +	uint64_t val;
   3.177 +
   3.178 +	if (lio->flags & LIO_FLAG_EVENTFD)
   3.179 +		read(lio->event_fd, &val, sizeof(val));
   3.180 +}
   3.181 +
   3.182 +static void
   3.183  tapdisk_lio_event(event_id_t id, char mode, void *private)
   3.184  {
   3.185  	struct tqueue *queue = private;
   3.186 @@ -308,6 +444,8 @@ tapdisk_lio_event(event_id_t id, char mo
   3.187  	struct tiocb *tiocb;
   3.188  	struct io_event *ep;
   3.189  
   3.190 +	tapdisk_lio_ack_event(queue);
   3.191 +
   3.192  	lio   = queue->tio_data;
   3.193  	ret   = io_getevents(lio->aio_ctx, 0,
   3.194  			     queue->size, lio->aio_events, NULL);
   3.195 @@ -336,22 +474,14 @@ tapdisk_lio_setup(struct tqueue *queue, 
   3.196  	int err;
   3.197  
   3.198  	lio->event_id = -1;
   3.199 -	lio->aio_ctx  = REQUEST_ASYNC_FD;
   3.200  
   3.201 -	lio->poll_fd = io_setup(qlen, &lio->aio_ctx);
   3.202 -	err = lio->poll_fd;
   3.203 -	if (err < 0) {
   3.204 -		lio->aio_ctx = NULL;
   3.205 -
   3.206 -		if (err == -EAGAIN)
   3.207 -			goto fail_rsv;
   3.208 -
   3.209 -		goto fail_fd;
   3.210 -	}
   3.211 +	err = tapdisk_lio_setup_aio(queue, qlen);
   3.212 +	if (err)
   3.213 +		goto fail;
   3.214  
   3.215  	lio->event_id =
   3.216  		tapdisk_server_register_event(SCHEDULER_POLL_READ_FD,
   3.217 -					      lio->poll_fd, 0,
   3.218 +					      lio->event_fd, 0,
   3.219  					      tapdisk_lio_event,
   3.220  					      queue);
   3.221  	err = lio->event_id;
   3.222 @@ -369,19 +499,6 @@ tapdisk_lio_setup(struct tqueue *queue, 
   3.223  fail:
   3.224  	tapdisk_lio_destroy(queue);
   3.225  	return err;
   3.226 -
   3.227 -fail_rsv:
   3.228 -	DPRINTF("Couldn't setup AIO context. If you are trying to "
   3.229 -		"concurrently use a large number of blktap-based disks, you may "
   3.230 -		"need to increase the system-wide aio request limit. "
   3.231 -		"(e.g. 'echo 1048576 > /proc/sys/fs/aio-max-nr')\n");
   3.232 -	goto fail;
   3.233 -
   3.234 -fail_fd:
   3.235 -	DPRINTF("Couldn't get fd for AIO poll support. This is probably "
   3.236 -		"because your kernel does not have  the aio-poll patch "
   3.237 -		"applied.\n");
   3.238 -	goto fail;
   3.239  }
   3.240  
   3.241  static int
   3.242 @@ -395,6 +512,7 @@ tapdisk_lio_submit(struct tqueue *queue)
   3.243  
   3.244  	tapdisk_filter_iocbs(queue->filter, queue->iocbs, queue->queued);
   3.245  	merged    = io_merge(&queue->opioctx, queue->iocbs, queue->queued);
   3.246 +	tapdisk_lio_set_eventfd(queue, merged, queue->iocbs);
   3.247  	submitted = io_submit(lio->aio_ctx, merged, queue->iocbs);
   3.248  
   3.249  	DBG("queued: %d, merged: %d, submitted: %d\n",
     4.1 --- a/tools/blktap2/drivers/tapdisk-utils.c	Fri Jan 29 08:54:51 2010 +0000
     4.2 +++ b/tools/blktap2/drivers/tapdisk-utils.c	Fri Jan 29 08:55:27 2010 +0000
     4.3 @@ -33,6 +33,10 @@
     4.4  #include <sys/mman.h>
     4.5  #include <sys/ioctl.h>
     4.6  #include <sys/resource.h>
     4.7 +#include <sys/utsname.h>
     4.8 +#ifdef __linux__
     4.9 +#include <linux/version.h>
    4.10 +#endif
    4.11  
    4.12  #include "blk.h"
    4.13  #include "tapdisk.h"
    4.14 @@ -183,3 +187,31 @@ tapdisk_get_image_size(int fd, uint64_t 
    4.15  
    4.16  	return 0;
    4.17  }
    4.18 +
    4.19 +#ifdef __linux__
    4.20 +
    4.21 +int tapdisk_linux_version(void)
    4.22 +{
    4.23 +	struct utsname uts;
    4.24 +	unsigned int version, patchlevel, sublevel;
    4.25 +	int n, err;
    4.26 +
    4.27 +	err = uname(&uts);
    4.28 +	if (err)
    4.29 +		return -errno;
    4.30 +
    4.31 +	n = sscanf(uts.release, "%u.%u.%u", &version, &patchlevel, &sublevel);
    4.32 +	if (n != 3)
    4.33 +		return -ENOSYS;
    4.34 +
    4.35 +	return KERNEL_VERSION(version, patchlevel, sublevel);
    4.36 +}
    4.37 +
    4.38 +#else
    4.39 +
    4.40 +int tapdisk_linux_version(void)
    4.41 +{
    4.42 +	return -ENOSYS;
    4.43 +}
    4.44 +
    4.45 +#endif
     5.1 --- a/tools/blktap2/drivers/tapdisk-utils.h	Fri Jan 29 08:54:51 2010 +0000
     5.2 +++ b/tools/blktap2/drivers/tapdisk-utils.h	Fri Jan 29 08:55:27 2010 +0000
     5.3 @@ -38,5 +38,6 @@ int tapdisk_set_resource_limits(void);
     5.4  int tapdisk_namedup(char **, const char *);
     5.5  int tapdisk_parse_disk_type(const char *, char **, int *);
     5.6  int tapdisk_get_image_size(int, uint64_t *, uint32_t *);
     5.7 +int tapdisk_linux_version(void);
     5.8  
     5.9  #endif