debuggers.hg

view tools/xenstore/xenstored_core.c @ 6676:d6d77aa96aa1

Make xenstored listen to domain exception virqs.
The virq triggers a scan for domains which have gone away, and then
removes those domains.
Signed-off-by: Christian Limpach <Christian.Limpach@cl.cam.ac.uk>
author cl349@firebug.cl.cam.ac.uk
date Tue Sep 06 16:59:14 2005 +0000 (2005-09-06)
parents aeaa3c83f6e5
children d4d69c509371
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored.h"
48 #include "xenstored_core.h"
49 #include "xenstored_watch.h"
50 #include "xenstored_transaction.h"
51 #include "xenstored_domain.h"
52 #include "xenctrl.h"
53 #include "xen/io/domain_controller.h"
54 #include "xcs_proto.h"
56 static bool verbose;
57 LIST_HEAD(connections);
58 static int tracefd = -1;
60 #ifdef TESTING
61 static bool failtest = false;
63 /* We override talloc's malloc. */
64 void *test_malloc(size_t size)
65 {
66 /* 1 in 20 means only about 50% of connections establish. */
67 if (failtest && (random() % 32) == 0)
68 return NULL;
69 return malloc(size);
70 }
72 static void stop_failtest(int signum __attribute__((unused)))
73 {
74 failtest = false;
75 }
77 /* Need these before we #define away write_all/mkdir in testing.h */
78 bool test_write_all(int fd, void *contents, unsigned int len);
79 bool test_write_all(int fd, void *contents, unsigned int len)
80 {
81 if (failtest && (random() % 8) == 0) {
82 if (len)
83 len = random() % len;
84 write(fd, contents, len);
85 errno = ENOSPC;
86 return false;
87 }
88 return xs_write_all(fd, contents, len);
89 }
91 int test_mkdir(const char *dir, int perms);
92 int test_mkdir(const char *dir, int perms)
93 {
94 if (failtest && (random() % 8) == 0) {
95 errno = ENOSPC;
96 return -1;
97 }
98 return mkdir(dir, perms);
99 }
100 #endif /* TESTING */
102 #include "xenstored_test.h"
104 /* FIXME: Ideally, this should never be called. Some can be eliminated. */
105 /* Something is horribly wrong: shutdown immediately. */
106 void __attribute__((noreturn)) corrupt(struct connection *conn,
107 const char *fmt, ...)
108 {
109 va_list arglist;
110 char *str;
111 int saved_errno = errno;
113 va_start(arglist, fmt);
114 str = talloc_vasprintf(NULL, fmt, arglist);
115 va_end(arglist);
117 trace("xenstored corruption: connection id %i: err %s: %s",
118 conn ? (int)conn->id : -1, strerror(saved_errno), str);
119 eprintf("xenstored corruption: connection id %i: err %s: %s",
120 conn ? (int)conn->id : -1, strerror(saved_errno), str);
121 #ifdef TESTING
122 /* Allow them to attach debugger. */
123 sleep(30);
124 #endif
125 syslog(LOG_DAEMON,
126 "xenstored corruption: connection id %i: err %s: %s",
127 conn ? (int)conn->id : -1, strerror(saved_errno), str);
128 _exit(2);
129 }
131 static char *sockmsg_string(enum xsd_sockmsg_type type)
132 {
133 switch (type) {
134 case XS_DEBUG: return "DEBUG";
135 case XS_SHUTDOWN: return "SHUTDOWN";
136 case XS_DIRECTORY: return "DIRECTORY";
137 case XS_READ: return "READ";
138 case XS_GET_PERMS: return "GET_PERMS";
139 case XS_WATCH: return "WATCH";
140 case XS_WATCH_ACK: return "WATCH_ACK";
141 case XS_UNWATCH: return "UNWATCH";
142 case XS_TRANSACTION_START: return "TRANSACTION_START";
143 case XS_TRANSACTION_END: return "TRANSACTION_END";
144 case XS_INTRODUCE: return "INTRODUCE";
145 case XS_RELEASE: return "RELEASE";
146 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
147 case XS_WRITE: return "WRITE";
148 case XS_MKDIR: return "MKDIR";
149 case XS_RM: return "RM";
150 case XS_SET_PERMS: return "SET_PERMS";
151 case XS_WATCH_EVENT: return "WATCH_EVENT";
152 case XS_ERROR: return "ERROR";
153 default:
154 return "**UNKNOWN**";
155 }
156 }
158 static void trace_io(const struct connection *conn,
159 const char *prefix,
160 const struct buffered_data *data)
161 {
162 char string[64];
163 unsigned int i;
165 if (tracefd < 0)
166 return;
168 write(tracefd, prefix, strlen(prefix));
169 sprintf(string, " %p ", conn);
170 write(tracefd, string, strlen(string));
171 write(tracefd, sockmsg_string(data->hdr.msg.type),
172 strlen(sockmsg_string(data->hdr.msg.type)));
173 write(tracefd, " (", 2);
174 for (i = 0; i < data->hdr.msg.len; i++) {
175 if (data->buffer[i] == '\0')
176 write(tracefd, " ", 1);
177 else
178 write(tracefd, data->buffer + i, 1);
179 }
180 write(tracefd, ")\n", 2);
181 }
183 void trace_create(const void *data, const char *type)
184 {
185 char string[64];
186 if (tracefd < 0)
187 return;
189 write(tracefd, "CREATE ", strlen("CREATE "));
190 write(tracefd, type, strlen(type));
191 sprintf(string, " %p\n", data);
192 write(tracefd, string, strlen(string));
193 }
195 void trace_destroy(const void *data, const char *type)
196 {
197 char string[64];
198 if (tracefd < 0)
199 return;
201 write(tracefd, "DESTROY ", strlen("DESTROY "));
202 write(tracefd, type, strlen(type));
203 sprintf(string, " %p\n", data);
204 write(tracefd, string, strlen(string));
205 }
207 void trace_watch_timeout(const struct connection *conn, const char *node, const char *token)
208 {
209 char string[64];
210 if (tracefd < 0)
211 return;
212 write(tracefd, "WATCH_TIMEOUT ", strlen("WATCH_TIMEOUT "));
213 sprintf(string, " %p ", conn);
214 write(tracefd, string, strlen(string));
215 write(tracefd, " (", 2);
216 write(tracefd, node, strlen(node));
217 write(tracefd, " ", 1);
218 write(tracefd, token, strlen(token));
219 write(tracefd, ")\n", 2);
220 }
222 static void trace_blocked(const struct connection *conn,
223 const struct buffered_data *data)
224 {
225 char string[64];
227 if (tracefd < 0)
228 return;
230 write(tracefd, "BLOCKED", strlen("BLOCKED"));
231 sprintf(string, " %p (", conn);
232 write(tracefd, string, strlen(string));
233 write(tracefd, sockmsg_string(data->hdr.msg.type),
234 strlen(sockmsg_string(data->hdr.msg.type)));
235 write(tracefd, ")\n", 2);
236 }
238 void trace(const char *fmt, ...)
239 {
240 va_list arglist;
241 char *str;
243 if (tracefd < 0)
244 return;
246 va_start(arglist, fmt);
247 str = talloc_vasprintf(NULL, fmt, arglist);
248 va_end(arglist);
249 write(tracefd, str, strlen(str));
250 talloc_free(str);
251 }
253 static bool write_message(struct connection *conn)
254 {
255 int ret;
256 struct buffered_data *out = conn->out;
258 assert(conn->state != BLOCKED);
259 if (out->inhdr) {
260 if (verbose)
261 xprintf("Writing msg %s (%s) out to %p\n",
262 sockmsg_string(out->hdr.msg.type),
263 out->buffer, conn);
264 ret = conn->write(conn, out->hdr.raw + out->used,
265 sizeof(out->hdr) - out->used);
266 if (ret < 0)
267 return false;
269 out->used += ret;
270 if (out->used < sizeof(out->hdr))
271 return true;
273 out->inhdr = false;
274 out->used = 0;
276 /* Second write might block if non-zero. */
277 if (out->hdr.msg.len && !conn->domain)
278 return true;
279 }
281 ret = conn->write(conn, out->buffer + out->used,
282 out->hdr.msg.len - out->used);
284 if (ret < 0)
285 return false;
287 out->used += ret;
288 if (out->used != out->hdr.msg.len)
289 return true;
291 trace_io(conn, "OUT", out);
292 conn->out = NULL;
293 talloc_free(out);
295 queue_next_event(conn);
297 /* No longer busy? */
298 if (!conn->out)
299 conn->state = OK;
300 return true;
301 }
303 static int destroy_conn(void *_conn)
304 {
305 struct connection *conn = _conn;
307 /* Flush outgoing if possible, but don't block. */
308 if (!conn->domain) {
309 fd_set set;
310 struct timeval none;
312 FD_ZERO(&set);
313 FD_SET(conn->fd, &set);
314 none.tv_sec = none.tv_usec = 0;
316 while (conn->out
317 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
318 if (!write_message(conn))
319 break;
320 close(conn->fd);
321 }
322 list_del(&conn->list);
323 trace_destroy(conn, "connection");
324 return 0;
325 }
327 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
328 int event_fd, int xcs_fd)
329 {
330 struct connection *i;
331 int max;
333 FD_ZERO(inset);
334 FD_ZERO(outset);
335 FD_SET(sock, inset);
336 max = sock;
337 FD_SET(ro_sock, inset);
338 if (ro_sock > max)
339 max = ro_sock;
340 FD_SET(event_fd, inset);
341 if (event_fd > max)
342 max = event_fd;
343 FD_SET(xcs_fd, inset);
344 if (xcs_fd > max)
345 max = xcs_fd;
346 list_for_each_entry(i, &connections, list) {
347 if (i->domain)
348 continue;
349 if (i->state == OK)
350 FD_SET(i->fd, inset);
351 if (i->out)
352 FD_SET(i->fd, outset);
353 if (i->fd > max)
354 max = i->fd;
355 }
356 return max;
357 }
359 /* Read everything from a talloc_open'ed fd. */
360 void *read_all(int *fd, unsigned int *size)
361 {
362 unsigned int max = 4;
363 int ret;
364 void *buffer = talloc_size(fd, max);
366 *size = 0;
367 while ((ret = read(*fd, buffer + *size, max - *size)) > 0) {
368 *size += ret;
369 if (*size == max)
370 buffer = talloc_realloc_size(fd, buffer, max *= 2);
371 }
372 if (ret < 0)
373 return NULL;
374 return buffer;
375 }
377 static int destroy_fd(void *_fd)
378 {
379 int *fd = _fd;
380 close(*fd);
381 return 0;
382 }
384 /* Return a pointer to an fd, self-closing and attached to this pathname. */
385 int *talloc_open(const char *pathname, int flags, int mode)
386 {
387 int *fd;
389 fd = talloc(pathname, int);
390 *fd = open(pathname, flags, mode);
391 if (*fd < 0) {
392 int saved_errno = errno;
393 talloc_free(fd);
394 errno = saved_errno;
395 return NULL;
396 }
397 talloc_set_destructor(fd, destroy_fd);
398 return fd;
399 }
401 /* Is child a subnode of parent, or equal? */
402 bool is_child(const char *child, const char *parent)
403 {
404 unsigned int len = strlen(parent);
406 /* / should really be "" for this algorithm to work, but that's a
407 * usability nightmare. */
408 if (streq(parent, "/"))
409 return true;
411 if (strncmp(child, parent, len) != 0)
412 return false;
414 return child[len] == '/' || child[len] == '\0';
415 }
417 /* Answer never ends in /. */
418 char *node_dir_outside_transaction(const char *node)
419 {
420 if (streq(node, "/"))
421 return talloc_strdup(node, xs_daemon_store());
422 return talloc_asprintf(node, "%s%s", xs_daemon_store(), node);
423 }
425 static char *node_dir(struct transaction *trans, const char *node)
426 {
427 if (!trans || !within_transaction(trans, node))
428 return node_dir_outside_transaction(node);
429 return node_dir_inside_transaction(trans, node);
430 }
432 static char *datafile(const char *dir)
433 {
434 return talloc_asprintf(dir, "%s/.data", dir);
435 }
437 static char *node_datafile(struct transaction *trans, const char *node)
438 {
439 return datafile(node_dir(trans, node));
440 }
442 static char *permfile(const char *dir)
443 {
444 return talloc_asprintf(dir, "%s/.perms", dir);
445 }
447 static char *node_permfile(struct transaction *trans, const char *node)
448 {
449 return permfile(node_dir(trans, node));
450 }
452 struct buffered_data *new_buffer(void *ctx)
453 {
454 struct buffered_data *data;
456 data = talloc(ctx, struct buffered_data);
457 data->inhdr = true;
458 data->used = 0;
459 data->buffer = NULL;
461 return data;
462 }
464 /* Return length of string (including nul) at this offset. */
465 unsigned int get_string(const struct buffered_data *data, unsigned int offset)
466 {
467 const char *nul;
469 if (offset >= data->used)
470 return 0;
472 nul = memchr(data->buffer + offset, 0, data->used - offset);
473 if (!nul)
474 return 0;
476 return nul - (data->buffer + offset) + 1;
477 }
479 /* Break input into vectors, return the number, fill in up to num of them. */
480 unsigned int get_strings(struct buffered_data *data,
481 char *vec[], unsigned int num)
482 {
483 unsigned int off, i, len;
485 off = i = 0;
486 while ((len = get_string(data, off)) != 0) {
487 if (i < num)
488 vec[i] = data->buffer + off;
489 i++;
490 off += len;
491 }
492 return i;
493 }
495 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
496 const void *data, unsigned int len)
497 {
498 struct buffered_data *bdata;
500 /* When data gets freed, we want list entry is destroyed (so
501 * list entry is a child). */
502 bdata = new_buffer(conn);
503 bdata->buffer = talloc_array(bdata, char, len);
505 bdata->hdr.msg.type = type;
506 bdata->hdr.msg.len = len;
507 memcpy(bdata->buffer, data, len);
509 /* There might be an event going out now. Queue behind it. */
510 if (conn->out) {
511 assert(conn->out->hdr.msg.type == XS_WATCH_EVENT);
512 assert(!conn->waiting_reply);
513 conn->waiting_reply = bdata;
514 } else
515 conn->out = bdata;
516 assert(conn->state != BLOCKED);
517 conn->state = BUSY;
518 }
520 /* Some routines (write, mkdir, etc) just need a non-error return */
521 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
522 {
523 send_reply(conn, type, "OK", sizeof("OK"));
524 }
526 void send_error(struct connection *conn, int error)
527 {
528 unsigned int i;
530 for (i = 0; error != xsd_errors[i].errnum; i++) {
531 if (i == ARRAY_SIZE(xsd_errors) - 1) {
532 eprintf("xenstored: error %i untranslatable", error);
533 i = 0; /* EINVAL */
534 break;
535 }
536 }
537 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
538 strlen(xsd_errors[i].errstring) + 1);
539 }
541 static bool valid_chars(const char *node)
542 {
543 /* Nodes can have lots of crap. */
544 return (strspn(node,
545 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
546 "abcdefghijklmnopqrstuvwxyz"
547 "0123456789-/_@") == strlen(node));
548 }
550 bool is_valid_nodename(const char *node)
551 {
552 /* Must start in /. */
553 if (!strstarts(node, "/"))
554 return false;
556 /* Cannot end in / (unless it's just "/"). */
557 if (strends(node, "/") && !streq(node, "/"))
558 return false;
560 /* No double //. */
561 if (strstr(node, "//"))
562 return false;
564 return valid_chars(node);
565 }
567 /* We expect one arg in the input: return NULL otherwise. */
568 static const char *onearg(struct buffered_data *in)
569 {
570 if (!in->used || get_string(in, 0) != in->used)
571 return NULL;
572 return in->buffer;
573 }
575 /* If it fails, returns NULL and sets errno. */
576 static struct xs_permissions *get_perms(const char *dir, unsigned int *num)
577 {
578 unsigned int size;
579 char *strings;
580 struct xs_permissions *ret;
581 int *fd;
583 fd = talloc_open(permfile(dir), O_RDONLY, 0);
584 if (!fd)
585 return NULL;
586 strings = read_all(fd, &size);
587 if (!strings)
588 return NULL;
590 *num = xs_count_strings(strings, size);
591 ret = talloc_array(dir, struct xs_permissions, *num);
592 if (!xs_strings_to_perms(ret, *num, strings))
593 corrupt(NULL, "Permissions corrupt for %s", dir);
595 return ret;
596 }
598 static char *perms_to_strings(const void *ctx,
599 struct xs_permissions *perms, unsigned int num,
600 unsigned int *len)
601 {
602 unsigned int i;
603 char *strings = NULL;
604 char buffer[MAX_STRLEN(domid_t) + 1];
606 for (*len = 0, i = 0; i < num; i++) {
607 if (!xs_perm_to_string(&perms[i], buffer))
608 return NULL;
610 strings = talloc_realloc(ctx, strings, char,
611 *len + strlen(buffer) + 1);
612 strcpy(strings + *len, buffer);
613 *len += strlen(buffer) + 1;
614 }
615 return strings;
616 }
618 /* Destroy this, and its children, and its children's children. */
619 int destroy_path(void *path)
620 {
621 DIR *dir;
622 struct dirent *dirent;
624 dir = opendir(path);
625 if (!dir) {
626 if (unlink(path) == 0 || errno == ENOENT)
627 return 0;
628 corrupt(NULL, "Destroying path %s", path);
629 }
631 while ((dirent = readdir(dir)) != NULL) {
632 char fullpath[strlen(path) + 1 + strlen(dirent->d_name) + 1];
633 sprintf(fullpath, "%s/%s", (char *)path, dirent->d_name);
634 if (!streq(dirent->d_name,".") && !streq(dirent->d_name,".."))
635 destroy_path(fullpath);
636 }
637 closedir(dir);
638 if (rmdir(path) != 0)
639 corrupt(NULL, "Destroying directory %s", path);
640 return 0;
641 }
643 /* Create a self-destructing temporary path */
644 static char *temppath(const char *path)
645 {
646 char *tmppath = talloc_asprintf(path, "%s.tmp", path);
647 talloc_set_destructor(tmppath, destroy_path);
648 return tmppath;
649 }
651 /* Create a self-destructing temporary file */
652 static char *tempfile(const char *path, void *contents, unsigned int len)
653 {
654 int *fd;
655 char *tmppath = temppath(path);
657 fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640);
658 if (!fd)
659 return NULL;
660 if (!xs_write_all(*fd, contents, len))
661 return NULL;
663 return tmppath;
664 }
666 static int destroy_opendir(void *_dir)
667 {
668 DIR **dir = _dir;
669 closedir(*dir);
670 return 0;
671 }
673 /* Return a pointer to a DIR*, self-closing and attached to this pathname. */
674 DIR **talloc_opendir(const char *pathname)
675 {
676 DIR **dir;
678 dir = talloc(pathname, DIR *);
679 *dir = opendir(pathname);
680 if (!*dir) {
681 int saved_errno = errno;
682 talloc_free(dir);
683 errno = saved_errno;
684 return NULL;
685 }
686 talloc_set_destructor(dir, destroy_opendir);
687 return dir;
688 }
690 /* We assume rename() doesn't fail on moves in same dir. */
691 static void commit_tempfile(const char *path)
692 {
693 char realname[strlen(path) + 1];
694 unsigned int len = strrchr(path, '.') - path;
696 memcpy(realname, path, len);
697 realname[len] = '\0';
698 if (rename(path, realname) != 0)
699 corrupt(NULL, "Committing %s", realname);
700 talloc_set_destructor(path, NULL);
701 }
703 static bool set_perms(struct transaction *transaction,
704 const char *node,
705 struct xs_permissions *perms, unsigned int num)
706 {
707 unsigned int len;
708 char *permpath, *strings;
710 strings = perms_to_strings(node, perms, num, &len);
711 if (!strings)
712 return false;
714 /* Create then move. */
715 permpath = tempfile(node_permfile(transaction, node), strings, len);
716 if (!permpath)
717 return false;
719 commit_tempfile(permpath);
720 return true;
721 }
723 static char *get_parent(const char *node)
724 {
725 char *slash = strrchr(node + 1, '/');
726 if (!slash)
727 return talloc_strdup(node, "/");
728 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
729 }
731 static enum xs_perm_type perm_for_id(domid_t id,
732 struct xs_permissions *perms,
733 unsigned int num)
734 {
735 unsigned int i;
737 /* Owners and tools get it all... */
738 if (!id || perms[0].id == id)
739 return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
741 for (i = 1; i < num; i++)
742 if (perms[i].id == id)
743 return perms[i].perms;
745 return perms[0].perms;
746 }
748 /* What do parents say? */
749 static enum xs_perm_type ask_parents(struct connection *conn,
750 const char *node)
751 {
752 struct xs_permissions *perms;
753 unsigned int num;
755 do {
756 node = get_parent(node);
757 perms = get_perms(node_dir(conn->transaction, node), &num);
758 if (perms)
759 break;
760 } while (!streq(node, "/"));
762 /* No permission at root? We're in trouble. */
763 if (!perms)
764 corrupt(conn, "No permissions file at root");
766 return perm_for_id(conn->id, perms, num);
767 }
769 /* We have a weird permissions system. You can allow someone into a
770 * specific node without allowing it in the parents. If it's going to
771 * fail, however, we don't want the errno to indicate any information
772 * about the node. */
773 static int errno_from_parents(struct connection *conn, const char *node,
774 int errnum)
775 {
776 /* We always tell them about memory failures. */
777 if (errnum == ENOMEM)
778 return errnum;
780 if (ask_parents(conn, node) & XS_PERM_READ)
781 return errnum;
782 return EACCES;
783 }
785 char *canonicalize(struct connection *conn, const char *node)
786 {
787 const char *prefix;
789 if (!node || strstarts(node, "/"))
790 return (char *)node;
791 prefix = get_implicit_path(conn);
792 if (prefix)
793 return talloc_asprintf(node, "%s/%s", prefix, node);
794 return (char *)node;
795 }
797 bool check_node_perms(struct connection *conn, const char *node,
798 enum xs_perm_type perm)
799 {
800 struct xs_permissions *perms;
801 unsigned int num;
803 if (!node || !is_valid_nodename(node)) {
804 errno = EINVAL;
805 return false;
806 }
808 if (!conn->can_write && (perm & XS_PERM_WRITE)) {
809 errno = EROFS;
810 return false;
811 }
813 perms = get_perms(node_dir(conn->transaction, node), &num);
815 if (perms) {
816 if (perm_for_id(conn->id, perms, num) & perm)
817 return true;
818 errno = EACCES;
819 return false;
820 }
822 /* If it's OK not to exist, we consult parents. */
823 if (errno == ENOENT && (perm & XS_PERM_ENOENT_OK)) {
824 if (ask_parents(conn, node) & perm)
825 return true;
826 /* Parents say they should not know. */
827 errno = EACCES;
828 return false;
829 }
831 /* They might not have permission to even *see* this node, in
832 * which case we return EACCES even if it's ENOENT or EIO. */
833 errno = errno_from_parents(conn, node, errno);
834 return false;
835 }
837 bool check_event_node(const char *node)
838 {
839 if (!node || !strstarts(node, "@")) {
840 errno = EINVAL;
841 return false;
842 }
843 return true;
844 }
846 static void send_directory(struct connection *conn, const char *node)
847 {
848 char *path, *reply;
849 unsigned int reply_len = 0;
850 DIR **dir;
851 struct dirent *dirent;
853 node = canonicalize(conn, node);
854 if (!check_node_perms(conn, node, XS_PERM_READ)) {
855 send_error(conn, errno);
856 return;
857 }
859 path = node_dir(conn->transaction, node);
860 dir = talloc_opendir(path);
861 if (!dir) {
862 send_error(conn, errno);
863 return;
864 }
866 reply = talloc_strdup(node, "");
867 while ((dirent = readdir(*dir)) != NULL) {
868 int len = strlen(dirent->d_name) + 1;
870 if (!valid_chars(dirent->d_name))
871 continue;
873 reply = talloc_realloc(path, reply, char, reply_len + len);
874 strcpy(reply + reply_len, dirent->d_name);
875 reply_len += len;
876 }
878 send_reply(conn, XS_DIRECTORY, reply, reply_len);
879 }
881 static void do_read(struct connection *conn, const char *node)
882 {
883 char *value;
884 unsigned int size;
885 int *fd;
887 node = canonicalize(conn, node);
888 if (!check_node_perms(conn, node, XS_PERM_READ)) {
889 send_error(conn, errno);
890 return;
891 }
893 fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0);
894 if (!fd) {
895 /* Data file doesn't exist? We call that a directory */
896 if (errno == ENOENT)
897 errno = EISDIR;
898 send_error(conn, errno);
899 return;
900 }
902 value = read_all(fd, &size);
903 if (!value)
904 send_error(conn, errno);
905 else
906 send_reply(conn, XS_READ, value, size);
907 }
909 /* Commit this directory, eg. comitting a/b.tmp/c causes a/b.tmp -> a.b */
910 static bool commit_dir(char *dir)
911 {
912 char *dot, *slash, *dest;
914 dot = strrchr(dir, '.');
915 slash = strchr(dot, '/');
916 if (slash)
917 *slash = '\0';
919 dest = talloc_asprintf(dir, "%.*s", (int)(dot - dir), dir);
920 return rename(dir, dest) == 0;
921 }
923 /* Create a temporary directory. Put data in it (if data != NULL) */
924 static char *tempdir(struct connection *conn,
925 const char *node, void *data, unsigned int datalen)
926 {
927 struct xs_permissions *perms;
928 char *permstr;
929 unsigned int num, len;
930 int *fd;
931 char *dir;
933 dir = temppath(node_dir(conn->transaction, node));
934 if (mkdir(dir, 0750) != 0) {
935 if (errno != ENOENT)
936 return NULL;
938 dir = tempdir(conn, get_parent(node), NULL, 0);
939 if (!dir)
940 return NULL;
942 dir = talloc_asprintf(dir, "%s%s", dir, strrchr(node, '/'));
943 if (mkdir(dir, 0750) != 0)
944 return NULL;
945 talloc_set_destructor(dir, destroy_path);
946 }
948 perms = get_perms(get_parent(dir), &num);
949 assert(perms);
950 /* Domains own what they create. */
951 if (conn->id)
952 perms->id = conn->id;
954 permstr = perms_to_strings(dir, perms, num, &len);
955 fd = talloc_open(permfile(dir), O_WRONLY|O_CREAT|O_EXCL, 0640);
956 if (!fd || !xs_write_all(*fd, permstr, len))
957 return NULL;
959 if (data) {
960 char *datapath = datafile(dir);
962 fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640);
963 if (!fd || !xs_write_all(*fd, data, datalen))
964 return NULL;
965 }
966 return dir;
967 }
969 /* path, flags, data... */
970 static void do_write(struct connection *conn, struct buffered_data *in)
971 {
972 unsigned int offset, datalen;
973 char *vec[2];
974 char *node, *tmppath;
975 enum xs_perm_type mode;
976 struct stat st;
978 /* Extra "strings" can be created by binary data. */
979 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
980 send_error(conn, EINVAL);
981 return;
982 }
984 node = canonicalize(conn, vec[0]);
985 if (!within_transaction(conn->transaction, node)) {
986 send_error(conn, EROFS);
987 return;
988 }
990 if (transaction_block(conn, node))
991 return;
993 offset = strlen(vec[0]) + strlen(vec[1]) + 2;
994 datalen = in->used - offset;
996 if (streq(vec[1], XS_WRITE_NONE))
997 mode = XS_PERM_WRITE;
998 else if (streq(vec[1], XS_WRITE_CREATE))
999 mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
1000 else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
1001 mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
1002 else {
1003 send_error(conn, EINVAL);
1004 return;
1007 if (!check_node_perms(conn, node, mode)) {
1008 send_error(conn, errno);
1009 return;
1012 if (lstat(node_dir(conn->transaction, node), &st) != 0) {
1013 char *dir;
1015 /* Does not exist... */
1016 if (errno != ENOENT) {
1017 send_error(conn, errno);
1018 return;
1021 /* Not going to create it? */
1022 if (streq(vec[1], XS_WRITE_NONE)) {
1023 send_error(conn, ENOENT);
1024 return;
1027 dir = tempdir(conn, node, in->buffer + offset, datalen);
1028 if (!dir || !commit_dir(dir)) {
1029 send_error(conn, errno);
1030 return;
1033 } else {
1034 /* Exists... */
1035 if (streq(vec[1], XS_WRITE_CREATE_EXCL)) {
1036 send_error(conn, EEXIST);
1037 return;
1040 tmppath = tempfile(node_datafile(conn->transaction, node),
1041 in->buffer + offset, datalen);
1042 if (!tmppath) {
1043 send_error(conn, errno);
1044 return;
1047 commit_tempfile(tmppath);
1050 add_change_node(conn->transaction, node, false);
1051 fire_watches(conn, node, false);
1052 send_ack(conn, XS_WRITE);
1055 static void do_mkdir(struct connection *conn, const char *node)
1057 char *dir;
1058 struct stat st;
1060 node = canonicalize(conn, node);
1061 if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
1062 send_error(conn, errno);
1063 return;
1066 if (!within_transaction(conn->transaction, node)) {
1067 send_error(conn, EROFS);
1068 return;
1071 if (transaction_block(conn, node))
1072 return;
1074 /* Must not already exist. */
1075 if (lstat(node_dir(conn->transaction, node), &st) == 0) {
1076 send_error(conn, EEXIST);
1077 return;
1080 dir = tempdir(conn, node, NULL, 0);
1081 if (!dir || !commit_dir(dir)) {
1082 send_error(conn, errno);
1083 return;
1086 add_change_node(conn->transaction, node, false);
1087 fire_watches(conn, node, false);
1088 send_ack(conn, XS_MKDIR);
1091 static void do_rm(struct connection *conn, const char *node)
1093 char *tmppath, *path;
1095 node = canonicalize(conn, node);
1096 if (!check_node_perms(conn, node, XS_PERM_WRITE)) {
1097 send_error(conn, errno);
1098 return;
1101 if (!within_transaction(conn->transaction, node)) {
1102 send_error(conn, EROFS);
1103 return;
1106 if (transaction_block(conn, node))
1107 return;
1109 if (streq(node, "/")) {
1110 send_error(conn, EINVAL);
1111 return;
1114 /* We move the directory to temporary name, destructor cleans up. */
1115 path = node_dir(conn->transaction, node);
1116 tmppath = talloc_asprintf(node, "%s.tmp", path);
1117 talloc_set_destructor(tmppath, destroy_path);
1119 if (rename(path, tmppath) != 0) {
1120 send_error(conn, errno);
1121 return;
1124 add_change_node(conn->transaction, node, true);
1125 fire_watches(conn, node, true);
1126 send_ack(conn, XS_RM);
1129 static void do_get_perms(struct connection *conn, const char *node)
1131 struct xs_permissions *perms;
1132 char *strings;
1133 unsigned int len, num;
1135 node = canonicalize(conn, node);
1136 if (!check_node_perms(conn, node, XS_PERM_READ)) {
1137 send_error(conn, errno);
1138 return;
1141 perms = get_perms(node_dir(conn->transaction, node), &num);
1142 if (!perms) {
1143 send_error(conn, errno);
1144 return;
1147 strings = perms_to_strings(node, perms, num, &len);
1148 if (!strings)
1149 send_error(conn, errno);
1150 else
1151 send_reply(conn, XS_GET_PERMS, strings, len);
1154 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1156 unsigned int num;
1157 char *node, *permstr;
1158 struct xs_permissions *perms;
1160 num = xs_count_strings(in->buffer, in->used);
1161 if (num < 2) {
1162 send_error(conn, EINVAL);
1163 return;
1166 /* First arg is node name. */
1167 node = canonicalize(conn, in->buffer);
1168 permstr = in->buffer + strlen(in->buffer) + 1;
1169 num--;
1171 if (!within_transaction(conn->transaction, node)) {
1172 send_error(conn, EROFS);
1173 return;
1176 if (transaction_block(conn, node))
1177 return;
1179 /* We must own node to do this (tools can do this too). */
1180 if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER)) {
1181 send_error(conn, errno);
1182 return;
1185 perms = talloc_array(node, struct xs_permissions, num);
1186 if (!xs_strings_to_perms(perms, num, permstr)) {
1187 send_error(conn, errno);
1188 return;
1191 if (!set_perms(conn->transaction, node, perms, num)) {
1192 send_error(conn, errno);
1193 return;
1196 add_change_node(conn->transaction, node, false);
1197 fire_watches(conn, node, false);
1198 send_ack(conn, XS_SET_PERMS);
1201 /* Process "in" for conn: "in" will vanish after this conversation, so
1202 * we can talloc off it for temporary variables. May free "conn".
1203 */
1204 static void process_message(struct connection *conn, struct buffered_data *in)
1206 switch (in->hdr.msg.type) {
1207 case XS_DIRECTORY:
1208 send_directory(conn, onearg(in));
1209 break;
1211 case XS_READ:
1212 do_read(conn, onearg(in));
1213 break;
1215 case XS_WRITE:
1216 do_write(conn, in);
1217 break;
1219 case XS_MKDIR:
1220 do_mkdir(conn, onearg(in));
1221 break;
1223 case XS_RM:
1224 do_rm(conn, onearg(in));
1225 break;
1227 case XS_GET_PERMS:
1228 do_get_perms(conn, onearg(in));
1229 break;
1231 case XS_SET_PERMS:
1232 do_set_perms(conn, in);
1233 break;
1235 case XS_SHUTDOWN:
1236 /* FIXME: Implement gentle shutdown too. */
1237 /* Only tools can do this. */
1238 if (conn->id != 0) {
1239 send_error(conn, EACCES);
1240 break;
1242 if (!conn->can_write) {
1243 send_error(conn, EROFS);
1244 break;
1246 send_ack(conn, XS_SHUTDOWN);
1247 /* Everything hangs off auto-free context, freed at exit. */
1248 exit(0);
1250 case XS_DEBUG:
1251 if (streq(in->buffer, "print"))
1252 xprintf("debug: %s", in->buffer + get_string(in, 0));
1253 #ifdef TESTING
1254 /* For testing, we allow them to set id. */
1255 if (streq(in->buffer, "setid")) {
1256 conn->id = atoi(in->buffer + get_string(in, 0));
1257 send_ack(conn, XS_DEBUG);
1258 } else if (streq(in->buffer, "failtest")) {
1259 if (get_string(in, 0) < in->used)
1260 srandom(atoi(in->buffer + get_string(in, 0)));
1261 send_ack(conn, XS_DEBUG);
1262 failtest = true;
1264 #endif /* TESTING */
1265 break;
1267 case XS_WATCH:
1268 do_watch(conn, in);
1269 break;
1271 case XS_WATCH_ACK:
1272 do_watch_ack(conn, onearg(in));
1273 break;
1275 case XS_UNWATCH:
1276 do_unwatch(conn, in);
1277 break;
1279 case XS_TRANSACTION_START:
1280 do_transaction_start(conn, onearg(in));
1281 break;
1283 case XS_TRANSACTION_END:
1284 do_transaction_end(conn, onearg(in));
1285 break;
1287 case XS_INTRODUCE:
1288 do_introduce(conn, in);
1289 break;
1291 case XS_RELEASE:
1292 do_release(conn, onearg(in));
1293 break;
1295 case XS_GET_DOMAIN_PATH:
1296 do_get_domain_path(conn, onearg(in));
1297 break;
1299 case XS_WATCH_EVENT:
1300 default:
1301 eprintf("Client unknown operation %i", in->hdr.msg.type);
1302 send_error(conn, ENOSYS);
1306 static int out_of_mem(void *data)
1308 longjmp(*(jmp_buf *)data, 1);
1311 static void consider_message(struct connection *conn)
1313 struct buffered_data *in = NULL;
1314 enum xsd_sockmsg_type type = conn->in->hdr.msg.type;
1315 jmp_buf talloc_fail;
1317 assert(conn->state == OK);
1319 /* For simplicity, we kill the connection on OOM. */
1320 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1321 if (setjmp(talloc_fail)) {
1322 talloc_free(conn);
1323 goto end;
1326 if (verbose)
1327 xprintf("Got message %s len %i from %p\n",
1328 sockmsg_string(type), conn->in->hdr.msg.len, conn);
1330 /* We might get a command while waiting for an ack: this means
1331 * the other end discarded it: we will re-transmit. */
1332 if (type != XS_WATCH_ACK)
1333 conn->waiting_for_ack = NULL;
1335 /* Careful: process_message may free connection. We detach
1336 * "in" beforehand and allocate the new buffer to avoid
1337 * touching conn after process_message.
1338 */
1339 in = talloc_steal(talloc_autofree_context(), conn->in);
1340 conn->in = new_buffer(conn);
1341 process_message(conn, in);
1343 if (conn->state == BLOCKED) {
1344 /* Blocked by transaction: queue for re-xmit. */
1345 talloc_free(conn->in);
1346 conn->in = in;
1347 in = NULL;
1348 trace_blocked(conn, conn->in);
1351 end:
1352 talloc_free(in);
1353 talloc_set_fail_handler(NULL, NULL);
1354 if (talloc_total_blocks(NULL)
1355 != talloc_total_blocks(talloc_autofree_context()) + 1) {
1356 talloc_report_full(NULL, stderr);
1357 abort();
1361 /* Errors in reading or allocating here mean we get out of sync, so we
1362 * drop the whole client connection. */
1363 void handle_input(struct connection *conn)
1365 int bytes;
1366 struct buffered_data *in;
1368 assert(conn->state == OK);
1369 in = conn->in;
1371 /* Not finished header yet? */
1372 if (in->inhdr) {
1373 bytes = conn->read(conn, in->hdr.raw + in->used,
1374 sizeof(in->hdr) - in->used);
1375 if (bytes <= 0)
1376 goto bad_client;
1377 in->used += bytes;
1378 if (in->used != sizeof(in->hdr))
1379 return;
1381 if (in->hdr.msg.len > PATH_MAX) {
1382 #ifndef TESTING
1383 syslog(LOG_DAEMON, "Client tried to feed us %i",
1384 in->hdr.msg.len);
1385 #endif
1386 goto bad_client;
1389 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1390 if (!in->buffer)
1391 goto bad_client;
1392 in->used = 0;
1393 in->inhdr = false;
1394 return;
1397 bytes = conn->read(conn, in->buffer + in->used,
1398 in->hdr.msg.len - in->used);
1399 if (bytes < 0)
1400 goto bad_client;
1402 in->used += bytes;
1403 if (in->used != in->hdr.msg.len)
1404 return;
1406 trace_io(conn, "IN ", in);
1407 consider_message(conn);
1408 return;
1410 bad_client:
1411 /* Kill it. */
1412 talloc_free(conn);
1415 void handle_output(struct connection *conn)
1417 if (!write_message(conn))
1418 talloc_free(conn);
1421 /* If a transaction has ended, see if we can unblock any connections. */
1422 static void unblock_connections(void)
1424 struct connection *i, *tmp;
1426 list_for_each_entry_safe(i, tmp, &connections, list) {
1427 switch (i->state) {
1428 case BLOCKED:
1429 if (!transaction_covering_node(i->blocked_by)) {
1430 talloc_free(i->blocked_by);
1431 i->blocked_by = NULL;
1432 i->state = OK;
1433 consider_message(i);
1435 break;
1436 case BUSY:
1437 case OK:
1438 break;
1442 /* To balance bias, move first entry to end. */
1443 if (!list_empty(&connections)) {
1444 i = list_top(&connections, struct connection, list);
1445 list_del(&i->list);
1446 list_add_tail(&i->list, &connections);
1450 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1452 struct connection *new;
1453 jmp_buf talloc_fail;
1455 new = talloc(talloc_autofree_context(), struct connection);
1456 if (!new)
1457 return NULL;
1459 new->state = OK;
1460 new->blocked_by = NULL;
1461 new->out = new->waiting_reply = NULL;
1462 new->waiting_for_ack = NULL;
1463 new->fd = -1;
1464 new->id = 0;
1465 new->domain = NULL;
1466 new->transaction = NULL;
1467 new->write = write;
1468 new->read = read;
1469 new->can_write = true;
1470 INIT_LIST_HEAD(&new->watches);
1472 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1473 if (setjmp(talloc_fail)) {
1474 talloc_free(new);
1475 return NULL;
1477 new->in = new_buffer(new);
1478 talloc_set_fail_handler(NULL, NULL);
1480 list_add_tail(&new->list, &connections);
1481 talloc_set_destructor(new, destroy_conn);
1482 trace_create(new, "connection");
1483 return new;
1486 static int writefd(struct connection *conn, const void *data, unsigned int len)
1488 return write(conn->fd, data, len);
1491 static int readfd(struct connection *conn, void *data, unsigned int len)
1493 return read(conn->fd, data, len);
1496 static void accept_connection(int sock, bool canwrite)
1498 int fd;
1499 struct connection *conn;
1501 fd = accept(sock, NULL, NULL);
1502 if (fd < 0)
1503 return;
1505 conn = new_connection(writefd, readfd);
1506 if (conn) {
1507 conn->fd = fd;
1508 conn->can_write = canwrite;
1509 } else
1510 close(fd);
1513 /* Calc timespan from now to absolute time. */
1514 static void time_relative_to_now(struct timeval *tv)
1516 struct timeval now;
1518 gettimeofday(&now, NULL);
1519 if (timercmp(&now, tv, >))
1520 timerclear(tv);
1521 else {
1522 tv->tv_sec -= now.tv_sec;
1523 if (now.tv_usec > tv->tv_usec) {
1524 tv->tv_sec--;
1525 tv->tv_usec += 1000000;
1527 tv->tv_usec -= now.tv_usec;
1531 #ifdef TESTING
1532 /* Useful for running under debugger. */
1533 void dump_connection(void)
1535 struct connection *i;
1537 list_for_each_entry(i, &connections, list) {
1538 printf("Connection %p:\n", i);
1539 printf(" state = %s\n",
1540 i->state == OK ? "OK"
1541 : i->state == BLOCKED ? "BLOCKED"
1542 : i->state == BUSY ? "BUSY"
1543 : "INVALID");
1544 if (i->id)
1545 printf(" id = %i\n", i->id);
1546 if (i->blocked_by)
1547 printf(" blocked on = %s\n", i->blocked_by);
1548 if (!i->in->inhdr || i->in->used)
1549 printf(" got %i bytes of %s\n",
1550 i->in->used, i->in->inhdr ? "header" : "data");
1551 if (i->out)
1552 printf(" sending message %s (%s) out\n",
1553 sockmsg_string(i->out->hdr.msg.type),
1554 i->out->buffer);
1555 if (i->waiting_reply)
1556 printf(" ... and behind is queued %s (%s)\n",
1557 sockmsg_string(i->waiting_reply->hdr.msg.type),
1558 i->waiting_reply->buffer);
1559 #if 0
1560 if (i->transaction)
1561 dump_transaction(i);
1562 if (i->domain)
1563 dump_domain(i);
1564 #endif
1565 dump_watches(i);
1568 #endif
1570 static void setup_structure(void)
1572 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_READ };
1573 char *root, *dir, *permfile;
1575 /* Create root directory, with permissions. */
1576 if (mkdir(xs_daemon_store(), 0750) != 0) {
1577 if (errno != EEXIST)
1578 barf_perror("Could not create root %s",
1579 xs_daemon_store());
1580 return;
1582 root = talloc_strdup(talloc_autofree_context(), "/");
1583 if (!set_perms(NULL, root, &perms, 1))
1584 barf_perror("Could not create permissions in root");
1586 /* Create tool directory, with xenstored subdir. */
1587 dir = talloc_asprintf(root, "%s/%s", xs_daemon_store(), "tool");
1588 if (mkdir(dir, 0750) != 0)
1589 barf_perror("Making dir %s", dir);
1591 permfile = talloc_strdup(root, "/tool");
1592 if (!set_perms(NULL, permfile, &perms, 1))
1593 barf_perror("Could not create permissions on %s", permfile);
1595 dir = talloc_asprintf(root, "%s/%s", dir, "xenstored");
1596 if (mkdir(dir, 0750) != 0)
1597 barf_perror("Making dir %s", dir);
1599 permfile = talloc_strdup(root, "/tool/xenstored");
1600 if (!set_perms(NULL, permfile, &perms, 1))
1601 barf_perror("Could not create permissions on %s", permfile);
1602 talloc_free(root);
1603 if (mkdir(xs_daemon_transactions(), 0750) != 0)
1604 barf_perror("Could not create transaction dir %s",
1605 xs_daemon_transactions());
1608 static void write_pidfile(const char *pidfile)
1610 char buf[100];
1611 int len;
1612 int fd;
1614 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1615 if (fd == -1)
1616 barf_perror("Opening pid file %s", pidfile);
1618 /* We exit silently if daemon already running. */
1619 if (lockf(fd, F_TLOCK, 0) == -1)
1620 exit(0);
1622 len = sprintf(buf, "%d\n", getpid());
1623 write(fd, buf, len);
1626 /* Stevens. */
1627 static void daemonize(void)
1629 pid_t pid;
1631 /* Separate from our parent via fork, so init inherits us. */
1632 if ((pid = fork()) < 0)
1633 barf_perror("Failed to fork daemon");
1634 if (pid != 0)
1635 exit(0);
1637 /* Session leader so ^C doesn't whack us. */
1638 setsid();
1639 /* Move off any mount points we might be in. */
1640 chdir("/");
1641 /* Discard our parent's old-fashioned umask prejudices. */
1642 umask(0);
1645 static int open_domain_socket(const char *path)
1647 struct sockaddr_un addr;
1648 int sock;
1649 size_t addr_len;
1651 if ((sock = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) {
1652 goto out;
1655 addr.sun_family = AF_UNIX;
1656 strcpy(addr.sun_path, path);
1657 addr_len = sizeof(addr.sun_family) + strlen(XCS_SUN_PATH) + 1;
1659 if (connect(sock, (struct sockaddr *)&addr, addr_len) == -1) {
1660 goto out_close_sock;
1663 return sock;
1665 out_close_sock:
1666 close(sock);
1667 out:
1668 return -1;
1671 bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
1673 size_t offset = 0;
1674 ssize_t len;
1676 while (offset < size) {
1677 if (do_read) {
1678 len = read(fd, data + offset, size - offset);
1679 } else {
1680 len = write(fd, data + offset, size - offset);
1683 if (len < 1) {
1684 if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
1685 continue;
1686 } else {
1687 return false;
1689 } else {
1690 offset += len;
1694 return true;
1697 #define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true)
1698 #define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
1700 /* synchronized send/recv strictly for setting up xcs */
1701 /* always use asychronize callbacks any other time */
1702 static bool xcs_send_recv(int fd, xcs_msg_t *msg)
1704 bool ret = false;
1706 if (!write_sync(fd, msg, sizeof(*msg))) {
1707 eprintf("Write failed at %s:%s():L%d? Possible bug.",
1708 __FILE__, __FUNCTION__, __LINE__);
1709 goto out;
1712 if (!read_sync(fd, msg, sizeof(*msg))) {
1713 eprintf("Read failed at %s:%s():L%d? Possible bug.",
1714 __FILE__, __FUNCTION__, __LINE__);
1715 goto out;
1718 ret = true;
1720 out:
1721 return ret;
1724 static void handle_xcs(int xcs_fd)
1726 xcs_msg_t msg;
1728 if (!read_sync(xcs_fd, &msg, sizeof(msg)))
1729 barf_perror("read from xcs failed!");
1731 domain_cleanup();
1734 static int xcs_init(void)
1736 int ctrl_fd, data_fd;
1737 xcs_msg_t msg;
1739 ctrl_fd = open_domain_socket(XCS_SUN_PATH);
1740 if (ctrl_fd == -1)
1741 barf_perror("Failed to contact xcs. Is it running?");
1743 data_fd = open_domain_socket(XCS_SUN_PATH);
1744 if (data_fd == -1)
1745 barf_perror("Failed to contact xcs. Is it running?");
1747 memset(&msg, 0, sizeof(msg));
1748 msg.type = XCS_CONNECT_CTRL;
1749 if (!xcs_send_recv(ctrl_fd, &msg) || msg.result != XCS_RSLT_OK)
1750 barf_perror("xcs control connect failed.");
1752 msg.type = XCS_CONNECT_DATA;
1753 if (!xcs_send_recv(data_fd, &msg) || msg.result != XCS_RSLT_OK)
1754 barf_perror("xcs data connect failed.");
1756 msg.type = XCS_VIRQ_BIND;
1757 msg.u.virq.virq = VIRQ_DOM_EXC;
1758 if (!xcs_send_recv(ctrl_fd, &msg) || msg.result != XCS_RSLT_OK)
1759 barf_perror("xcs virq bind failed.");
1761 return data_fd;
1765 static struct option options[] = {
1766 { "pid-file", 1, NULL, 'F' },
1767 { "no-fork", 0, NULL, 'N' },
1768 { "output-pid", 0, NULL, 'P' },
1769 { "trace-file", 1, NULL, 'T' },
1770 { "verbose", 0, NULL, 'V' },
1771 { NULL, 0, NULL, 0 } };
1773 int main(int argc, char *argv[])
1775 int opt, *sock, *ro_sock, event_fd, xcs_fd, max;
1776 struct sockaddr_un addr;
1777 fd_set inset, outset;
1778 bool dofork = true;
1779 bool outputpid = false;
1780 const char *pidfile = NULL;
1782 while ((opt = getopt_long(argc, argv, "F:NPT:V", options,
1783 NULL)) != -1) {
1784 switch (opt) {
1785 case 'F':
1786 pidfile = optarg;
1787 break;
1788 case 'N':
1789 dofork = false;
1790 break;
1791 case 'P':
1792 outputpid = true;
1793 break;
1794 case 'T':
1795 tracefd = open(optarg, O_WRONLY|O_CREAT|O_APPEND, 0600);
1796 if (tracefd < 0)
1797 barf_perror("Could not open tracefile %s",
1798 optarg);
1799 write(tracefd, "\n***\n", strlen("\n***\n"));
1800 break;
1801 case 'V':
1802 verbose = true;
1803 break;
1806 if (optind != argc)
1807 barf("%s: No arguments desired", argv[0]);
1809 if (dofork) {
1810 openlog("xenstored", 0, LOG_DAEMON);
1811 daemonize();
1813 if (pidfile)
1814 write_pidfile(pidfile);
1816 talloc_enable_leak_report_full();
1818 /* Create sockets for them to listen to. */
1819 sock = talloc(talloc_autofree_context(), int);
1820 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1821 if (*sock < 0)
1822 barf_perror("Could not create socket");
1823 ro_sock = talloc(talloc_autofree_context(), int);
1824 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1825 if (*ro_sock < 0)
1826 barf_perror("Could not create socket");
1827 talloc_set_destructor(sock, destroy_fd);
1828 talloc_set_destructor(ro_sock, destroy_fd);
1830 /* Don't kill us with SIGPIPE. */
1831 signal(SIGPIPE, SIG_IGN);
1833 /* FIXME: Be more sophisticated, don't mug running daemon. */
1834 unlink(xs_daemon_socket());
1835 unlink(xs_daemon_socket_ro());
1837 addr.sun_family = AF_UNIX;
1838 strcpy(addr.sun_path, xs_daemon_socket());
1839 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1840 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1841 strcpy(addr.sun_path, xs_daemon_socket_ro());
1842 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1843 barf_perror("Could not bind socket to %s",
1844 xs_daemon_socket_ro());
1845 if (chmod(xs_daemon_socket(), 0600) != 0
1846 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1847 barf_perror("Could not chmod sockets");
1849 if (listen(*sock, 1) != 0
1850 || listen(*ro_sock, 1) != 0)
1851 barf_perror("Could not listen on sockets");
1853 /* If we're the first, create .perms file for root. */
1854 setup_structure();
1856 /* Listen to hypervisor. */
1857 event_fd = domain_init();
1859 /* Listen to hypervisor - more. */
1860 xcs_fd = xcs_init();
1862 /* Restore existing connections. */
1863 restore_existing_connections();
1865 if (outputpid) {
1866 printf("%i\n", getpid());
1867 fflush(stdout);
1870 /* close stdin/stdout now we're ready to accept connections */
1871 if (dofork) {
1872 close(STDIN_FILENO);
1873 close(STDOUT_FILENO);
1874 close(STDERR_FILENO);
1877 #ifdef TESTING
1878 signal(SIGUSR1, stop_failtest);
1879 #endif
1881 /* Get ready to listen to the tools. */
1882 max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd,
1883 xcs_fd);
1885 /* Main loop. */
1886 /* FIXME: Rewrite so noone can starve. */
1887 for (;;) {
1888 struct connection *i;
1889 struct timeval *tvp = NULL, tv;
1891 timerclear(&tv);
1892 shortest_transaction_timeout(&tv);
1893 shortest_watch_ack_timeout(&tv);
1894 if (timerisset(&tv)) {
1895 time_relative_to_now(&tv);
1896 tvp = &tv;
1899 if (select(max+1, &inset, &outset, NULL, tvp) < 0) {
1900 if (errno == EINTR)
1901 continue;
1902 barf_perror("Select failed");
1905 if (FD_ISSET(*sock, &inset))
1906 accept_connection(*sock, true);
1908 if (FD_ISSET(*ro_sock, &inset))
1909 accept_connection(*ro_sock, false);
1911 if (FD_ISSET(event_fd, &inset))
1912 handle_event(event_fd);
1914 if (FD_ISSET(xcs_fd, &inset))
1915 handle_xcs(xcs_fd);
1917 list_for_each_entry(i, &connections, list) {
1918 if (i->domain)
1919 continue;
1921 /* Operations can delete themselves or others
1922 * (xs_release): list is not safe after input,
1923 * so break. */
1924 if (FD_ISSET(i->fd, &inset)) {
1925 handle_input(i);
1926 break;
1928 if (FD_ISSET(i->fd, &outset)) {
1929 handle_output(i);
1930 break;
1934 /* Handle all possible I/O for domain connections. */
1935 more:
1936 list_for_each_entry(i, &connections, list) {
1937 if (!i->domain)
1938 continue;
1940 if (domain_can_read(i)) {
1941 handle_input(i);
1942 goto more;
1945 if (domain_can_write(i)) {
1946 handle_output(i);
1947 goto more;
1951 if (tvp) {
1952 check_transaction_timeout();
1953 check_watch_ack_timeout();
1956 /* If transactions ended, we might be able to do more work. */
1957 unblock_connections();
1959 max = initialize_set(&inset, &outset, *sock, *ro_sock,
1960 event_fd, xcs_fd);