debuggers.hg

view tools/xenstore/xenstored_core.c @ 6677:d4d69c509371

merge?
author cl349@firebug.cl.cam.ac.uk
date Tue Sep 06 17:00:25 2005 +0000 (2005-09-06)
parents d6d77aa96aa1 158d23cbd2e6
children acde14d25398
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored.h"
48 #include "xenstored_core.h"
49 #include "xenstored_watch.h"
50 #include "xenstored_transaction.h"
51 #include "xenstored_domain.h"
52 #include "xenctrl.h"
53 #include "xen/io/domain_controller.h"
54 #include "xcs_proto.h"
56 static bool verbose;
57 LIST_HEAD(connections);
58 static int tracefd = -1;
60 #ifdef TESTING
61 static bool failtest = false;
63 /* We override talloc's malloc. */
64 void *test_malloc(size_t size)
65 {
66 /* 1 in 20 means only about 50% of connections establish. */
67 if (failtest && (random() % 32) == 0)
68 return NULL;
69 return malloc(size);
70 }
72 static void stop_failtest(int signum __attribute__((unused)))
73 {
74 failtest = false;
75 }
77 /* Need these before we #define away write_all/mkdir in testing.h */
78 bool test_write_all(int fd, void *contents, unsigned int len);
79 bool test_write_all(int fd, void *contents, unsigned int len)
80 {
81 if (failtest && (random() % 8) == 0) {
82 if (len)
83 len = random() % len;
84 write(fd, contents, len);
85 errno = ENOSPC;
86 return false;
87 }
88 return xs_write_all(fd, contents, len);
89 }
91 int test_mkdir(const char *dir, int perms);
92 int test_mkdir(const char *dir, int perms)
93 {
94 if (failtest && (random() % 8) == 0) {
95 errno = ENOSPC;
96 return -1;
97 }
98 return mkdir(dir, perms);
99 }
100 #endif /* TESTING */
102 #include "xenstored_test.h"
104 /* FIXME: Ideally, this should never be called. Some can be eliminated. */
105 /* Something is horribly wrong: shutdown immediately. */
106 void __attribute__((noreturn)) corrupt(struct connection *conn,
107 const char *fmt, ...)
108 {
109 va_list arglist;
110 char *str;
111 int saved_errno = errno;
113 va_start(arglist, fmt);
114 str = talloc_vasprintf(NULL, fmt, arglist);
115 va_end(arglist);
117 trace("xenstored corruption: connection id %i: err %s: %s",
118 conn ? (int)conn->id : -1, strerror(saved_errno), str);
119 eprintf("xenstored corruption: connection id %i: err %s: %s",
120 conn ? (int)conn->id : -1, strerror(saved_errno), str);
121 #ifdef TESTING
122 /* Allow them to attach debugger. */
123 sleep(30);
124 #endif
125 syslog(LOG_DAEMON,
126 "xenstored corruption: connection id %i: err %s: %s",
127 conn ? (int)conn->id : -1, strerror(saved_errno), str);
128 _exit(2);
129 }
131 static char *sockmsg_string(enum xsd_sockmsg_type type)
132 {
133 switch (type) {
134 case XS_DEBUG: return "DEBUG";
135 case XS_SHUTDOWN: return "SHUTDOWN";
136 case XS_DIRECTORY: return "DIRECTORY";
137 case XS_READ: return "READ";
138 case XS_GET_PERMS: return "GET_PERMS";
139 case XS_WATCH: return "WATCH";
140 case XS_WATCH_ACK: return "WATCH_ACK";
141 case XS_UNWATCH: return "UNWATCH";
142 case XS_TRANSACTION_START: return "TRANSACTION_START";
143 case XS_TRANSACTION_END: return "TRANSACTION_END";
144 case XS_INTRODUCE: return "INTRODUCE";
145 case XS_RELEASE: return "RELEASE";
146 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
147 case XS_WRITE: return "WRITE";
148 case XS_MKDIR: return "MKDIR";
149 case XS_RM: return "RM";
150 case XS_SET_PERMS: return "SET_PERMS";
151 case XS_WATCH_EVENT: return "WATCH_EVENT";
152 case XS_ERROR: return "ERROR";
153 default:
154 return "**UNKNOWN**";
155 }
156 }
158 static void trace_io(const struct connection *conn,
159 const char *prefix,
160 const struct buffered_data *data)
161 {
162 char string[64];
163 unsigned int i;
165 if (tracefd < 0)
166 return;
168 write(tracefd, prefix, strlen(prefix));
169 sprintf(string, " %p ", conn);
170 write(tracefd, string, strlen(string));
171 write(tracefd, sockmsg_string(data->hdr.msg.type),
172 strlen(sockmsg_string(data->hdr.msg.type)));
173 write(tracefd, " (", 2);
174 for (i = 0; i < data->hdr.msg.len; i++) {
175 if (data->buffer[i] == '\0')
176 write(tracefd, " ", 1);
177 else
178 write(tracefd, data->buffer + i, 1);
179 }
180 write(tracefd, ")\n", 2);
181 }
183 void trace_create(const void *data, const char *type)
184 {
185 char string[64];
186 if (tracefd < 0)
187 return;
189 write(tracefd, "CREATE ", strlen("CREATE "));
190 write(tracefd, type, strlen(type));
191 sprintf(string, " %p\n", data);
192 write(tracefd, string, strlen(string));
193 }
195 void trace_destroy(const void *data, const char *type)
196 {
197 char string[64];
198 if (tracefd < 0)
199 return;
201 write(tracefd, "DESTROY ", strlen("DESTROY "));
202 write(tracefd, type, strlen(type));
203 sprintf(string, " %p\n", data);
204 write(tracefd, string, strlen(string));
205 }
207 void trace_watch_timeout(const struct connection *conn, const char *node, const char *token)
208 {
209 char string[64];
210 if (tracefd < 0)
211 return;
212 write(tracefd, "WATCH_TIMEOUT ", strlen("WATCH_TIMEOUT "));
213 sprintf(string, " %p ", conn);
214 write(tracefd, string, strlen(string));
215 write(tracefd, " (", 2);
216 write(tracefd, node, strlen(node));
217 write(tracefd, " ", 1);
218 write(tracefd, token, strlen(token));
219 write(tracefd, ")\n", 2);
220 }
222 static void trace_blocked(const struct connection *conn,
223 const struct buffered_data *data)
224 {
225 char string[64];
227 if (tracefd < 0)
228 return;
230 write(tracefd, "BLOCKED", strlen("BLOCKED"));
231 sprintf(string, " %p (", conn);
232 write(tracefd, string, strlen(string));
233 write(tracefd, sockmsg_string(data->hdr.msg.type),
234 strlen(sockmsg_string(data->hdr.msg.type)));
235 write(tracefd, ")\n", 2);
236 }
238 void trace(const char *fmt, ...)
239 {
240 va_list arglist;
241 char *str;
243 if (tracefd < 0)
244 return;
246 va_start(arglist, fmt);
247 str = talloc_vasprintf(NULL, fmt, arglist);
248 va_end(arglist);
249 write(tracefd, str, strlen(str));
250 talloc_free(str);
251 }
253 static bool write_message(struct connection *conn)
254 {
255 int ret;
256 struct buffered_data *out = conn->out;
258 assert(conn->state != BLOCKED);
259 if (out->inhdr) {
260 if (verbose)
261 xprintf("Writing msg %s (%s) out to %p\n",
262 sockmsg_string(out->hdr.msg.type),
263 out->buffer, conn);
264 ret = conn->write(conn, out->hdr.raw + out->used,
265 sizeof(out->hdr) - out->used);
266 if (ret < 0)
267 return false;
269 out->used += ret;
270 if (out->used < sizeof(out->hdr))
271 return true;
273 out->inhdr = false;
274 out->used = 0;
276 /* Second write might block if non-zero. */
277 if (out->hdr.msg.len && !conn->domain)
278 return true;
279 }
281 ret = conn->write(conn, out->buffer + out->used,
282 out->hdr.msg.len - out->used);
284 if (ret < 0)
285 return false;
287 out->used += ret;
288 if (out->used != out->hdr.msg.len)
289 return true;
291 trace_io(conn, "OUT", out);
292 conn->out = NULL;
293 talloc_free(out);
295 queue_next_event(conn);
297 /* No longer busy? */
298 if (!conn->out)
299 conn->state = OK;
300 return true;
301 }
303 static int destroy_conn(void *_conn)
304 {
305 struct connection *conn = _conn;
307 /* Flush outgoing if possible, but don't block. */
308 if (!conn->domain) {
309 fd_set set;
310 struct timeval none;
312 FD_ZERO(&set);
313 FD_SET(conn->fd, &set);
314 none.tv_sec = none.tv_usec = 0;
316 while (conn->out
317 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
318 if (!write_message(conn))
319 break;
320 close(conn->fd);
321 }
322 list_del(&conn->list);
323 trace_destroy(conn, "connection");
324 return 0;
325 }
327 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
328 int event_fd, int xcs_fd)
329 {
330 struct connection *i;
331 int max;
333 FD_ZERO(inset);
334 FD_ZERO(outset);
335 FD_SET(sock, inset);
336 max = sock;
337 FD_SET(ro_sock, inset);
338 if (ro_sock > max)
339 max = ro_sock;
340 FD_SET(event_fd, inset);
341 if (event_fd > max)
342 max = event_fd;
343 FD_SET(xcs_fd, inset);
344 if (xcs_fd > max)
345 max = xcs_fd;
346 list_for_each_entry(i, &connections, list) {
347 if (i->domain)
348 continue;
349 if (i->state == OK)
350 FD_SET(i->fd, inset);
351 if (i->out)
352 FD_SET(i->fd, outset);
353 if (i->fd > max)
354 max = i->fd;
355 }
356 return max;
357 }
359 /* Read everything from a talloc_open'ed fd. */
360 void *read_all(int *fd, unsigned int *size)
361 {
362 unsigned int max = 4;
363 int ret;
364 void *buffer = talloc_size(fd, max);
366 *size = 0;
367 while ((ret = read(*fd, buffer + *size, max - *size)) > 0) {
368 *size += ret;
369 if (*size == max)
370 buffer = talloc_realloc_size(fd, buffer, max *= 2);
371 }
372 if (ret < 0)
373 return NULL;
374 return buffer;
375 }
377 static int destroy_fd(void *_fd)
378 {
379 int *fd = _fd;
380 close(*fd);
381 return 0;
382 }
384 /* Return a pointer to an fd, self-closing and attached to this pathname. */
385 int *talloc_open(const char *pathname, int flags, int mode)
386 {
387 int *fd;
389 fd = talloc(pathname, int);
390 *fd = open(pathname, flags, mode);
391 if (*fd < 0) {
392 int saved_errno = errno;
393 talloc_free(fd);
394 errno = saved_errno;
395 return NULL;
396 }
397 talloc_set_destructor(fd, destroy_fd);
398 return fd;
399 }
401 /* Is child a subnode of parent, or equal? */
402 bool is_child(const char *child, const char *parent)
403 {
404 unsigned int len = strlen(parent);
406 /* / should really be "" for this algorithm to work, but that's a
407 * usability nightmare. */
408 if (streq(parent, "/"))
409 return true;
411 if (strncmp(child, parent, len) != 0)
412 return false;
414 return child[len] == '/' || child[len] == '\0';
415 }
417 /* Answer never ends in /. */
418 char *node_dir_outside_transaction(const char *node)
419 {
420 if (streq(node, "/"))
421 return talloc_strdup(node, xs_daemon_store());
422 return talloc_asprintf(node, "%s%s", xs_daemon_store(), node);
423 }
425 static char *node_dir(struct transaction *trans, const char *node)
426 {
427 if (!trans || !within_transaction(trans, node))
428 return node_dir_outside_transaction(node);
429 return node_dir_inside_transaction(trans, node);
430 }
432 static char *datafile(const char *dir)
433 {
434 return talloc_asprintf(dir, "%s/.data", dir);
435 }
437 static char *node_datafile(struct transaction *trans, const char *node)
438 {
439 return datafile(node_dir(trans, node));
440 }
442 static char *permfile(const char *dir)
443 {
444 return talloc_asprintf(dir, "%s/.perms", dir);
445 }
447 static char *node_permfile(struct transaction *trans, const char *node)
448 {
449 return permfile(node_dir(trans, node));
450 }
452 struct buffered_data *new_buffer(void *ctx)
453 {
454 struct buffered_data *data;
456 data = talloc(ctx, struct buffered_data);
457 data->inhdr = true;
458 data->used = 0;
459 data->buffer = NULL;
461 return data;
462 }
464 /* Return length of string (including nul) at this offset. */
465 unsigned int get_string(const struct buffered_data *data, unsigned int offset)
466 {
467 const char *nul;
469 if (offset >= data->used)
470 return 0;
472 nul = memchr(data->buffer + offset, 0, data->used - offset);
473 if (!nul)
474 return 0;
476 return nul - (data->buffer + offset) + 1;
477 }
479 /* Break input into vectors, return the number, fill in up to num of them. */
480 unsigned int get_strings(struct buffered_data *data,
481 char *vec[], unsigned int num)
482 {
483 unsigned int off, i, len;
485 off = i = 0;
486 while ((len = get_string(data, off)) != 0) {
487 if (i < num)
488 vec[i] = data->buffer + off;
489 i++;
490 off += len;
491 }
492 return i;
493 }
495 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
496 const void *data, unsigned int len)
497 {
498 struct buffered_data *bdata;
500 /* When data gets freed, we want list entry is destroyed (so
501 * list entry is a child). */
502 bdata = new_buffer(conn);
503 bdata->buffer = talloc_array(bdata, char, len);
505 bdata->hdr.msg.type = type;
506 bdata->hdr.msg.len = len;
507 memcpy(bdata->buffer, data, len);
509 /* There might be an event going out now. Queue behind it. */
510 if (conn->out) {
511 assert(conn->out->hdr.msg.type == XS_WATCH_EVENT);
512 assert(!conn->waiting_reply);
513 conn->waiting_reply = bdata;
514 } else
515 conn->out = bdata;
516 assert(conn->state != BLOCKED);
517 conn->state = BUSY;
518 }
520 /* Some routines (write, mkdir, etc) just need a non-error return */
521 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
522 {
523 send_reply(conn, type, "OK", sizeof("OK"));
524 }
526 void send_error(struct connection *conn, int error)
527 {
528 unsigned int i;
530 for (i = 0; error != xsd_errors[i].errnum; i++) {
531 if (i == ARRAY_SIZE(xsd_errors) - 1) {
532 eprintf("xenstored: error %i untranslatable", error);
533 i = 0; /* EINVAL */
534 break;
535 }
536 }
537 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
538 strlen(xsd_errors[i].errstring) + 1);
539 }
541 static bool valid_chars(const char *node)
542 {
543 /* Nodes can have lots of crap. */
544 return (strspn(node,
545 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
546 "abcdefghijklmnopqrstuvwxyz"
547 "0123456789-/_@") == strlen(node));
548 }
550 bool is_valid_nodename(const char *node)
551 {
552 /* Must start in /. */
553 if (!strstarts(node, "/"))
554 return false;
556 /* Cannot end in / (unless it's just "/"). */
557 if (strends(node, "/") && !streq(node, "/"))
558 return false;
560 /* No double //. */
561 if (strstr(node, "//"))
562 return false;
564 return valid_chars(node);
565 }
567 /* We expect one arg in the input: return NULL otherwise. */
568 static const char *onearg(struct buffered_data *in)
569 {
570 if (!in->used || get_string(in, 0) != in->used)
571 return NULL;
572 return in->buffer;
573 }
575 /* If it fails, returns NULL and sets errno. */
576 static struct xs_permissions *get_perms(const char *dir, unsigned int *num)
577 {
578 unsigned int size;
579 char *strings;
580 struct xs_permissions *ret;
581 int *fd;
583 fd = talloc_open(permfile(dir), O_RDONLY, 0);
584 if (!fd)
585 return NULL;
586 strings = read_all(fd, &size);
587 if (!strings)
588 return NULL;
590 *num = xs_count_strings(strings, size);
591 ret = talloc_array(dir, struct xs_permissions, *num);
592 if (!xs_strings_to_perms(ret, *num, strings))
593 corrupt(NULL, "Permissions corrupt for %s", dir);
595 return ret;
596 }
598 static char *perms_to_strings(const void *ctx,
599 struct xs_permissions *perms, unsigned int num,
600 unsigned int *len)
601 {
602 unsigned int i;
603 char *strings = NULL;
604 char buffer[MAX_STRLEN(domid_t) + 1];
606 for (*len = 0, i = 0; i < num; i++) {
607 if (!xs_perm_to_string(&perms[i], buffer))
608 return NULL;
610 strings = talloc_realloc(ctx, strings, char,
611 *len + strlen(buffer) + 1);
612 strcpy(strings + *len, buffer);
613 *len += strlen(buffer) + 1;
614 }
615 return strings;
616 }
618 /* Destroy this, and its children, and its children's children. */
619 int destroy_path(void *path)
620 {
621 DIR *dir;
622 struct dirent *dirent;
624 dir = opendir(path);
625 if (!dir) {
626 if (unlink(path) == 0 || errno == ENOENT)
627 return 0;
628 corrupt(NULL, "Destroying path %s", path);
629 }
631 while ((dirent = readdir(dir)) != NULL) {
632 char fullpath[strlen(path) + 1 + strlen(dirent->d_name) + 1];
633 sprintf(fullpath, "%s/%s", (char *)path, dirent->d_name);
634 if (!streq(dirent->d_name,".") && !streq(dirent->d_name,".."))
635 destroy_path(fullpath);
636 }
637 closedir(dir);
638 if (rmdir(path) != 0)
639 corrupt(NULL, "Destroying directory %s", path);
640 return 0;
641 }
643 /* Create a self-destructing temporary path */
644 static char *temppath(const char *path)
645 {
646 char *tmppath = talloc_asprintf(path, "%s.tmp", path);
647 talloc_set_destructor(tmppath, destroy_path);
648 return tmppath;
649 }
651 /* Create a self-destructing temporary file */
652 static char *tempfile(const char *path, void *contents, unsigned int len)
653 {
654 int *fd;
655 char *tmppath = temppath(path);
657 fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640);
658 if (!fd)
659 return NULL;
660 if (!xs_write_all(*fd, contents, len))
661 return NULL;
663 return tmppath;
664 }
666 static int destroy_opendir(void *_dir)
667 {
668 DIR **dir = _dir;
669 closedir(*dir);
670 return 0;
671 }
673 /* Return a pointer to a DIR*, self-closing and attached to this pathname. */
674 DIR **talloc_opendir(const char *pathname)
675 {
676 DIR **dir;
678 dir = talloc(pathname, DIR *);
679 *dir = opendir(pathname);
680 if (!*dir) {
681 int saved_errno = errno;
682 talloc_free(dir);
683 errno = saved_errno;
684 return NULL;
685 }
686 talloc_set_destructor(dir, destroy_opendir);
687 return dir;
688 }
690 /* We assume rename() doesn't fail on moves in same dir. */
691 static void commit_tempfile(const char *path)
692 {
693 char realname[strlen(path) + 1];
694 unsigned int len = strrchr(path, '.') - path;
696 memcpy(realname, path, len);
697 realname[len] = '\0';
698 if (rename(path, realname) != 0)
699 corrupt(NULL, "Committing %s", realname);
700 talloc_set_destructor(path, NULL);
701 }
703 static bool set_perms(struct transaction *transaction,
704 const char *node,
705 struct xs_permissions *perms, unsigned int num)
706 {
707 unsigned int len;
708 char *permpath, *strings;
710 strings = perms_to_strings(node, perms, num, &len);
711 if (!strings)
712 return false;
714 /* Create then move. */
715 permpath = tempfile(node_permfile(transaction, node), strings, len);
716 if (!permpath)
717 return false;
719 commit_tempfile(permpath);
720 return true;
721 }
723 static char *get_parent(const char *node)
724 {
725 char *slash = strrchr(node + 1, '/');
726 if (!slash)
727 return talloc_strdup(node, "/");
728 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
729 }
731 static enum xs_perm_type perm_for_id(domid_t id,
732 struct xs_permissions *perms,
733 unsigned int num)
734 {
735 unsigned int i;
737 /* Owners and tools get it all... */
738 if (!id || perms[0].id == id)
739 return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
741 for (i = 1; i < num; i++)
742 if (perms[i].id == id)
743 return perms[i].perms;
745 return perms[0].perms;
746 }
748 /* What do parents say? */
749 static enum xs_perm_type ask_parents(struct connection *conn,
750 const char *node)
751 {
752 struct xs_permissions *perms;
753 unsigned int num;
755 do {
756 node = get_parent(node);
757 perms = get_perms(node_dir(conn->transaction, node), &num);
758 if (perms)
759 break;
760 } while (!streq(node, "/"));
762 /* No permission at root? We're in trouble. */
763 if (!perms)
764 corrupt(conn, "No permissions file at root");
766 return perm_for_id(conn->id, perms, num);
767 }
769 /* We have a weird permissions system. You can allow someone into a
770 * specific node without allowing it in the parents. If it's going to
771 * fail, however, we don't want the errno to indicate any information
772 * about the node. */
773 static int errno_from_parents(struct connection *conn, const char *node,
774 int errnum)
775 {
776 /* We always tell them about memory failures. */
777 if (errnum == ENOMEM)
778 return errnum;
780 if (ask_parents(conn, node) & XS_PERM_READ)
781 return errnum;
782 return EACCES;
783 }
785 char *canonicalize(struct connection *conn, const char *node)
786 {
787 const char *prefix;
789 if (!node || strstarts(node, "/"))
790 return (char *)node;
791 prefix = get_implicit_path(conn);
792 if (prefix)
793 return talloc_asprintf(node, "%s/%s", prefix, node);
794 return (char *)node;
795 }
797 bool check_node_perms(struct connection *conn, const char *node,
798 enum xs_perm_type perm)
799 {
800 struct xs_permissions *perms;
801 unsigned int num;
803 if (!node || !is_valid_nodename(node)) {
804 errno = EINVAL;
805 return false;
806 }
808 if (!conn->can_write && (perm & XS_PERM_WRITE)) {
809 errno = EROFS;
810 return false;
811 }
813 perms = get_perms(node_dir(conn->transaction, node), &num);
815 if (perms) {
816 if (perm_for_id(conn->id, perms, num) & perm)
817 return true;
818 errno = EACCES;
819 return false;
820 }
822 /* If it's OK not to exist, we consult parents. */
823 if (errno == ENOENT && (perm & XS_PERM_ENOENT_OK)) {
824 if (ask_parents(conn, node) & perm)
825 return true;
826 /* Parents say they should not know. */
827 errno = EACCES;
828 return false;
829 }
831 /* They might not have permission to even *see* this node, in
832 * which case we return EACCES even if it's ENOENT or EIO. */
833 errno = errno_from_parents(conn, node, errno);
834 return false;
835 }
837 bool check_event_node(const char *node)
838 {
839 if (!node || !strstarts(node, "@")) {
840 errno = EINVAL;
841 return false;
842 }
843 return true;
844 }
846 static void send_directory(struct connection *conn, const char *node)
847 {
848 char *path, *reply;
849 unsigned int reply_len = 0;
850 DIR **dir;
851 struct dirent *dirent;
853 node = canonicalize(conn, node);
854 if (!check_node_perms(conn, node, XS_PERM_READ)) {
855 send_error(conn, errno);
856 return;
857 }
859 path = node_dir(conn->transaction, node);
860 dir = talloc_opendir(path);
861 if (!dir) {
862 send_error(conn, errno);
863 return;
864 }
866 reply = talloc_strdup(node, "");
867 while ((dirent = readdir(*dir)) != NULL) {
868 int len = strlen(dirent->d_name) + 1;
870 if (!valid_chars(dirent->d_name))
871 continue;
873 reply = talloc_realloc(path, reply, char, reply_len + len);
874 strcpy(reply + reply_len, dirent->d_name);
875 reply_len += len;
876 }
878 send_reply(conn, XS_DIRECTORY, reply, reply_len);
879 }
881 static void do_read(struct connection *conn, const char *node)
882 {
883 char *value;
884 unsigned int size;
885 int *fd;
887 node = canonicalize(conn, node);
888 if (!check_node_perms(conn, node, XS_PERM_READ)) {
889 send_error(conn, errno);
890 return;
891 }
893 fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0);
894 if (!fd) {
895 /* Data file doesn't exist? We call that a directory */
896 if (errno == ENOENT)
897 errno = EISDIR;
898 send_error(conn, errno);
899 return;
900 }
902 value = read_all(fd, &size);
903 if (!value)
904 send_error(conn, errno);
905 else
906 send_reply(conn, XS_READ, value, size);
907 }
909 /* Commit this directory, eg. comitting a/b.tmp/c causes a/b.tmp -> a.b */
910 static bool commit_dir(char *dir)
911 {
912 char *dot, *slash, *dest;
914 dot = strrchr(dir, '.');
915 slash = strchr(dot, '/');
916 if (slash)
917 *slash = '\0';
919 dest = talloc_asprintf(dir, "%.*s", (int)(dot - dir), dir);
920 return rename(dir, dest) == 0;
921 }
923 /* Create a temporary directory. Put data in it (if data != NULL) */
924 static char *tempdir(struct connection *conn,
925 const char *node, void *data, unsigned int datalen)
926 {
927 struct xs_permissions *perms;
928 char *permstr;
929 unsigned int num, len;
930 int *fd;
931 char *dir;
933 dir = temppath(node_dir(conn->transaction, node));
934 if (mkdir(dir, 0750) != 0) {
935 if (errno != ENOENT)
936 return NULL;
938 dir = tempdir(conn, get_parent(node), NULL, 0);
939 if (!dir)
940 return NULL;
942 dir = talloc_asprintf(dir, "%s%s", dir, strrchr(node, '/'));
943 if (mkdir(dir, 0750) != 0)
944 return NULL;
945 talloc_set_destructor(dir, destroy_path);
946 }
948 perms = get_perms(get_parent(dir), &num);
949 assert(perms);
950 /* Domains own what they create. */
951 if (conn->id)
952 perms->id = conn->id;
954 permstr = perms_to_strings(dir, perms, num, &len);
955 fd = talloc_open(permfile(dir), O_WRONLY|O_CREAT|O_EXCL, 0640);
956 if (!fd || !xs_write_all(*fd, permstr, len))
957 return NULL;
959 if (data) {
960 char *datapath = datafile(dir);
962 fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640);
963 if (!fd || !xs_write_all(*fd, data, datalen))
964 return NULL;
965 }
966 return dir;
967 }
969 /* path, flags, data... */
970 static void do_write(struct connection *conn, struct buffered_data *in)
971 {
972 unsigned int offset, datalen;
973 char *vec[2];
974 char *node, *tmppath;
975 enum xs_perm_type mode;
976 struct stat st;
978 /* Extra "strings" can be created by binary data. */
979 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
980 send_error(conn, EINVAL);
981 return;
982 }
984 node = canonicalize(conn, vec[0]);
985 if (!within_transaction(conn->transaction, node)) {
986 send_error(conn, EROFS);
987 return;
988 }
990 if (transaction_block(conn, node))
991 return;
993 offset = strlen(vec[0]) + strlen(vec[1]) + 2;
994 datalen = in->used - offset;
996 if (streq(vec[1], XS_WRITE_NONE))
997 mode = XS_PERM_WRITE;
998 else if (streq(vec[1], XS_WRITE_CREATE))
999 mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
1000 else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
1001 mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
1002 else {
1003 send_error(conn, EINVAL);
1004 return;
1007 if (!check_node_perms(conn, node, mode)) {
1008 send_error(conn, errno);
1009 return;
1012 if (lstat(node_dir(conn->transaction, node), &st) != 0) {
1013 char *dir;
1015 /* Does not exist... */
1016 if (errno != ENOENT) {
1017 send_error(conn, errno);
1018 return;
1021 /* Not going to create it? */
1022 if (streq(vec[1], XS_WRITE_NONE)) {
1023 send_error(conn, ENOENT);
1024 return;
1027 dir = tempdir(conn, node, in->buffer + offset, datalen);
1028 if (!dir || !commit_dir(dir)) {
1029 send_error(conn, errno);
1030 return;
1033 } else {
1034 /* Exists... */
1035 if (streq(vec[1], XS_WRITE_CREATE_EXCL)) {
1036 send_error(conn, EEXIST);
1037 return;
1040 tmppath = tempfile(node_datafile(conn->transaction, node),
1041 in->buffer + offset, datalen);
1042 if (!tmppath) {
1043 send_error(conn, errno);
1044 return;
1047 commit_tempfile(tmppath);
1050 add_change_node(conn->transaction, node, false);
1051 fire_watches(conn, node, false);
1052 send_ack(conn, XS_WRITE);
1055 static void do_mkdir(struct connection *conn, const char *node)
1057 char *dir;
1058 struct stat st;
1060 node = canonicalize(conn, node);
1061 if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
1062 send_error(conn, errno);
1063 return;
1066 if (!within_transaction(conn->transaction, node)) {
1067 send_error(conn, EROFS);
1068 return;
1071 if (transaction_block(conn, node))
1072 return;
1074 /* Must not already exist. */
1075 if (lstat(node_dir(conn->transaction, node), &st) == 0) {
1076 send_error(conn, EEXIST);
1077 return;
1080 dir = tempdir(conn, node, NULL, 0);
1081 if (!dir || !commit_dir(dir)) {
1082 send_error(conn, errno);
1083 return;
1086 add_change_node(conn->transaction, node, false);
1087 fire_watches(conn, node, false);
1088 send_ack(conn, XS_MKDIR);
1091 static void do_rm(struct connection *conn, const char *node)
1093 char *tmppath, *path;
1095 node = canonicalize(conn, node);
1096 if (!check_node_perms(conn, node, XS_PERM_WRITE)) {
1097 send_error(conn, errno);
1098 return;
1101 if (!within_transaction(conn->transaction, node)) {
1102 send_error(conn, EROFS);
1103 return;
1106 if (transaction_block(conn, node))
1107 return;
1109 if (streq(node, "/")) {
1110 send_error(conn, EINVAL);
1111 return;
1114 /* We move the directory to temporary name, destructor cleans up. */
1115 path = node_dir(conn->transaction, node);
1116 tmppath = talloc_asprintf(node, "%s.tmp", path);
1117 talloc_set_destructor(tmppath, destroy_path);
1119 if (rename(path, tmppath) != 0) {
1120 send_error(conn, errno);
1121 return;
1124 add_change_node(conn->transaction, node, true);
1125 fire_watches(conn, node, true);
1126 send_ack(conn, XS_RM);
1129 static void do_get_perms(struct connection *conn, const char *node)
1131 struct xs_permissions *perms;
1132 char *strings;
1133 unsigned int len, num;
1135 node = canonicalize(conn, node);
1136 if (!check_node_perms(conn, node, XS_PERM_READ)) {
1137 send_error(conn, errno);
1138 return;
1141 perms = get_perms(node_dir(conn->transaction, node), &num);
1142 if (!perms) {
1143 send_error(conn, errno);
1144 return;
1147 strings = perms_to_strings(node, perms, num, &len);
1148 if (!strings)
1149 send_error(conn, errno);
1150 else
1151 send_reply(conn, XS_GET_PERMS, strings, len);
1154 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1156 unsigned int num;
1157 char *node, *permstr;
1158 struct xs_permissions *perms;
1160 num = xs_count_strings(in->buffer, in->used);
1161 if (num < 2) {
1162 send_error(conn, EINVAL);
1163 return;
1166 /* First arg is node name. */
1167 node = canonicalize(conn, in->buffer);
1168 permstr = in->buffer + strlen(in->buffer) + 1;
1169 num--;
1171 if (!within_transaction(conn->transaction, node)) {
1172 send_error(conn, EROFS);
1173 return;
1176 if (transaction_block(conn, node))
1177 return;
1179 /* We must own node to do this (tools can do this too). */
1180 if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER)) {
1181 send_error(conn, errno);
1182 return;
1185 perms = talloc_array(node, struct xs_permissions, num);
1186 if (!xs_strings_to_perms(perms, num, permstr)) {
1187 send_error(conn, errno);
1188 return;
1191 if (!set_perms(conn->transaction, node, perms, num)) {
1192 send_error(conn, errno);
1193 return;
1196 add_change_node(conn->transaction, node, false);
1197 fire_watches(conn, node, false);
1198 send_ack(conn, XS_SET_PERMS);
1201 /* Process "in" for conn: "in" will vanish after this conversation, so
1202 * we can talloc off it for temporary variables. May free "conn".
1203 */
1204 static void process_message(struct connection *conn, struct buffered_data *in)
1206 switch (in->hdr.msg.type) {
1207 case XS_DIRECTORY:
1208 send_directory(conn, onearg(in));
1209 break;
1211 case XS_READ:
1212 do_read(conn, onearg(in));
1213 break;
1215 case XS_WRITE:
1216 do_write(conn, in);
1217 break;
1219 case XS_MKDIR:
1220 do_mkdir(conn, onearg(in));
1221 break;
1223 case XS_RM:
1224 do_rm(conn, onearg(in));
1225 break;
1227 case XS_GET_PERMS:
1228 do_get_perms(conn, onearg(in));
1229 break;
1231 case XS_SET_PERMS:
1232 do_set_perms(conn, in);
1233 break;
1235 case XS_SHUTDOWN:
1236 /* FIXME: Implement gentle shutdown too. */
1237 /* Only tools can do this. */
1238 if (conn->id != 0) {
1239 send_error(conn, EACCES);
1240 break;
1242 if (!conn->can_write) {
1243 send_error(conn, EROFS);
1244 break;
1246 send_ack(conn, XS_SHUTDOWN);
1247 /* Everything hangs off auto-free context, freed at exit. */
1248 exit(0);
1250 case XS_DEBUG:
1251 if (streq(in->buffer, "print"))
1252 xprintf("debug: %s", in->buffer + get_string(in, 0));
1253 #ifdef TESTING
1254 /* For testing, we allow them to set id. */
1255 if (streq(in->buffer, "setid")) {
1256 conn->id = atoi(in->buffer + get_string(in, 0));
1257 send_ack(conn, XS_DEBUG);
1258 } else if (streq(in->buffer, "failtest")) {
1259 if (get_string(in, 0) < in->used)
1260 srandom(atoi(in->buffer + get_string(in, 0)));
1261 send_ack(conn, XS_DEBUG);
1262 failtest = true;
1264 #endif /* TESTING */
1265 break;
1267 case XS_WATCH:
1268 do_watch(conn, in);
1269 break;
1271 case XS_WATCH_ACK:
1272 do_watch_ack(conn, onearg(in));
1273 break;
1275 case XS_UNWATCH:
1276 do_unwatch(conn, in);
1277 break;
1279 case XS_TRANSACTION_START:
1280 do_transaction_start(conn, onearg(in));
1281 break;
1283 case XS_TRANSACTION_END:
1284 do_transaction_end(conn, onearg(in));
1285 break;
1287 case XS_INTRODUCE:
1288 do_introduce(conn, in);
1289 break;
1291 case XS_RELEASE:
1292 do_release(conn, onearg(in));
1293 break;
1295 case XS_GET_DOMAIN_PATH:
1296 do_get_domain_path(conn, onearg(in));
1297 break;
1299 case XS_WATCH_EVENT:
1300 default:
1301 eprintf("Client unknown operation %i", in->hdr.msg.type);
1302 send_error(conn, ENOSYS);
1306 static int out_of_mem(void *data)
1308 longjmp(*(jmp_buf *)data, 1);
1311 static void consider_message(struct connection *conn)
1313 /*
1314 * 'volatile' qualifier prevents register allocation which fixes:
1315 * warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork'
1316 */
1317 struct buffered_data *volatile in = NULL;
1318 enum xsd_sockmsg_type volatile type = conn->in->hdr.msg.type;
1319 jmp_buf talloc_fail;
1321 assert(conn->state == OK);
1323 /* For simplicity, we kill the connection on OOM. */
1324 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1325 if (setjmp(talloc_fail)) {
1326 talloc_free(conn);
1327 goto end;
1330 if (verbose)
1331 xprintf("Got message %s len %i from %p\n",
1332 sockmsg_string(type), conn->in->hdr.msg.len, conn);
1334 /* We might get a command while waiting for an ack: this means
1335 * the other end discarded it: we will re-transmit. */
1336 if (type != XS_WATCH_ACK)
1337 conn->waiting_for_ack = NULL;
1339 /* Careful: process_message may free connection. We detach
1340 * "in" beforehand and allocate the new buffer to avoid
1341 * touching conn after process_message.
1342 */
1343 in = talloc_steal(talloc_autofree_context(), conn->in);
1344 conn->in = new_buffer(conn);
1345 process_message(conn, in);
1347 if (conn->state == BLOCKED) {
1348 /* Blocked by transaction: queue for re-xmit. */
1349 talloc_free(conn->in);
1350 conn->in = in;
1351 in = NULL;
1352 trace_blocked(conn, conn->in);
1355 end:
1356 talloc_free(in);
1357 talloc_set_fail_handler(NULL, NULL);
1358 if (talloc_total_blocks(NULL)
1359 != talloc_total_blocks(talloc_autofree_context()) + 1) {
1360 talloc_report_full(NULL, stderr);
1361 abort();
1365 /* Errors in reading or allocating here mean we get out of sync, so we
1366 * drop the whole client connection. */
1367 void handle_input(struct connection *conn)
1369 int bytes;
1370 struct buffered_data *in;
1372 assert(conn->state == OK);
1373 in = conn->in;
1375 /* Not finished header yet? */
1376 if (in->inhdr) {
1377 bytes = conn->read(conn, in->hdr.raw + in->used,
1378 sizeof(in->hdr) - in->used);
1379 if (bytes <= 0)
1380 goto bad_client;
1381 in->used += bytes;
1382 if (in->used != sizeof(in->hdr))
1383 return;
1385 if (in->hdr.msg.len > PATH_MAX) {
1386 #ifndef TESTING
1387 syslog(LOG_DAEMON, "Client tried to feed us %i",
1388 in->hdr.msg.len);
1389 #endif
1390 goto bad_client;
1393 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1394 if (!in->buffer)
1395 goto bad_client;
1396 in->used = 0;
1397 in->inhdr = false;
1398 return;
1401 bytes = conn->read(conn, in->buffer + in->used,
1402 in->hdr.msg.len - in->used);
1403 if (bytes < 0)
1404 goto bad_client;
1406 in->used += bytes;
1407 if (in->used != in->hdr.msg.len)
1408 return;
1410 trace_io(conn, "IN ", in);
1411 consider_message(conn);
1412 return;
1414 bad_client:
1415 /* Kill it. */
1416 talloc_free(conn);
1419 void handle_output(struct connection *conn)
1421 if (!write_message(conn))
1422 talloc_free(conn);
1425 /* If a transaction has ended, see if we can unblock any connections. */
1426 static void unblock_connections(void)
1428 struct connection *i, *tmp;
1430 list_for_each_entry_safe(i, tmp, &connections, list) {
1431 switch (i->state) {
1432 case BLOCKED:
1433 if (!transaction_covering_node(i->blocked_by)) {
1434 talloc_free(i->blocked_by);
1435 i->blocked_by = NULL;
1436 i->state = OK;
1437 consider_message(i);
1439 break;
1440 case BUSY:
1441 case OK:
1442 break;
1446 /* To balance bias, move first entry to end. */
1447 if (!list_empty(&connections)) {
1448 i = list_top(&connections, struct connection, list);
1449 list_del(&i->list);
1450 list_add_tail(&i->list, &connections);
1454 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1456 /*
1457 * 'volatile' qualifier prevents register allocation which fixes:
1458 * warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork'
1459 */
1460 struct connection *volatile new;
1461 jmp_buf talloc_fail;
1463 new = talloc(talloc_autofree_context(), struct connection);
1464 if (!new)
1465 return NULL;
1467 new->state = OK;
1468 new->blocked_by = NULL;
1469 new->out = new->waiting_reply = NULL;
1470 new->waiting_for_ack = NULL;
1471 new->fd = -1;
1472 new->id = 0;
1473 new->domain = NULL;
1474 new->transaction = NULL;
1475 new->write = write;
1476 new->read = read;
1477 new->can_write = true;
1478 INIT_LIST_HEAD(&new->watches);
1480 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1481 if (setjmp(talloc_fail)) {
1482 talloc_free(new);
1483 return NULL;
1485 new->in = new_buffer(new);
1486 talloc_set_fail_handler(NULL, NULL);
1488 list_add_tail(&new->list, &connections);
1489 talloc_set_destructor(new, destroy_conn);
1490 trace_create(new, "connection");
1491 return new;
1494 static int writefd(struct connection *conn, const void *data, unsigned int len)
1496 return write(conn->fd, data, len);
1499 static int readfd(struct connection *conn, void *data, unsigned int len)
1501 return read(conn->fd, data, len);
1504 static void accept_connection(int sock, bool canwrite)
1506 int fd;
1507 struct connection *conn;
1509 fd = accept(sock, NULL, NULL);
1510 if (fd < 0)
1511 return;
1513 conn = new_connection(writefd, readfd);
1514 if (conn) {
1515 conn->fd = fd;
1516 conn->can_write = canwrite;
1517 } else
1518 close(fd);
1521 /* Calc timespan from now to absolute time. */
1522 static void time_relative_to_now(struct timeval *tv)
1524 struct timeval now;
1526 gettimeofday(&now, NULL);
1527 if (timercmp(&now, tv, >))
1528 timerclear(tv);
1529 else {
1530 tv->tv_sec -= now.tv_sec;
1531 if (now.tv_usec > tv->tv_usec) {
1532 tv->tv_sec--;
1533 tv->tv_usec += 1000000;
1535 tv->tv_usec -= now.tv_usec;
1539 #ifdef TESTING
1540 /* Useful for running under debugger. */
1541 void dump_connection(void)
1543 struct connection *i;
1545 list_for_each_entry(i, &connections, list) {
1546 printf("Connection %p:\n", i);
1547 printf(" state = %s\n",
1548 i->state == OK ? "OK"
1549 : i->state == BLOCKED ? "BLOCKED"
1550 : i->state == BUSY ? "BUSY"
1551 : "INVALID");
1552 if (i->id)
1553 printf(" id = %i\n", i->id);
1554 if (i->blocked_by)
1555 printf(" blocked on = %s\n", i->blocked_by);
1556 if (!i->in->inhdr || i->in->used)
1557 printf(" got %i bytes of %s\n",
1558 i->in->used, i->in->inhdr ? "header" : "data");
1559 if (i->out)
1560 printf(" sending message %s (%s) out\n",
1561 sockmsg_string(i->out->hdr.msg.type),
1562 i->out->buffer);
1563 if (i->waiting_reply)
1564 printf(" ... and behind is queued %s (%s)\n",
1565 sockmsg_string(i->waiting_reply->hdr.msg.type),
1566 i->waiting_reply->buffer);
1567 #if 0
1568 if (i->transaction)
1569 dump_transaction(i);
1570 if (i->domain)
1571 dump_domain(i);
1572 #endif
1573 dump_watches(i);
1576 #endif
1578 static void setup_structure(void)
1580 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_READ };
1581 char *root, *dir, *permfile;
1583 /* Create root directory, with permissions. */
1584 if (mkdir(xs_daemon_store(), 0750) != 0) {
1585 if (errno != EEXIST)
1586 barf_perror("Could not create root %s",
1587 xs_daemon_store());
1588 return;
1590 root = talloc_strdup(talloc_autofree_context(), "/");
1591 if (!set_perms(NULL, root, &perms, 1))
1592 barf_perror("Could not create permissions in root");
1594 /* Create tool directory, with xenstored subdir. */
1595 dir = talloc_asprintf(root, "%s/%s", xs_daemon_store(), "tool");
1596 if (mkdir(dir, 0750) != 0)
1597 barf_perror("Making dir %s", dir);
1599 permfile = talloc_strdup(root, "/tool");
1600 if (!set_perms(NULL, permfile, &perms, 1))
1601 barf_perror("Could not create permissions on %s", permfile);
1603 dir = talloc_asprintf(root, "%s/%s", dir, "xenstored");
1604 if (mkdir(dir, 0750) != 0)
1605 barf_perror("Making dir %s", dir);
1607 permfile = talloc_strdup(root, "/tool/xenstored");
1608 if (!set_perms(NULL, permfile, &perms, 1))
1609 barf_perror("Could not create permissions on %s", permfile);
1610 talloc_free(root);
1611 if (mkdir(xs_daemon_transactions(), 0750) != 0)
1612 barf_perror("Could not create transaction dir %s",
1613 xs_daemon_transactions());
1616 static void write_pidfile(const char *pidfile)
1618 char buf[100];
1619 int len;
1620 int fd;
1622 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1623 if (fd == -1)
1624 barf_perror("Opening pid file %s", pidfile);
1626 /* We exit silently if daemon already running. */
1627 if (lockf(fd, F_TLOCK, 0) == -1)
1628 exit(0);
1630 len = sprintf(buf, "%d\n", getpid());
1631 write(fd, buf, len);
1634 /* Stevens. */
1635 static void daemonize(void)
1637 pid_t pid;
1639 /* Separate from our parent via fork, so init inherits us. */
1640 if ((pid = fork()) < 0)
1641 barf_perror("Failed to fork daemon");
1642 if (pid != 0)
1643 exit(0);
1645 /* Session leader so ^C doesn't whack us. */
1646 setsid();
1647 /* Move off any mount points we might be in. */
1648 chdir("/");
1649 /* Discard our parent's old-fashioned umask prejudices. */
1650 umask(0);
1653 static int open_domain_socket(const char *path)
1655 struct sockaddr_un addr;
1656 int sock;
1657 size_t addr_len;
1659 if ((sock = socket(PF_UNIX, SOCK_STREAM, 0)) == -1) {
1660 goto out;
1663 addr.sun_family = AF_UNIX;
1664 strcpy(addr.sun_path, path);
1665 addr_len = sizeof(addr.sun_family) + strlen(XCS_SUN_PATH) + 1;
1667 if (connect(sock, (struct sockaddr *)&addr, addr_len) == -1) {
1668 goto out_close_sock;
1671 return sock;
1673 out_close_sock:
1674 close(sock);
1675 out:
1676 return -1;
1679 bool _read_write_sync(int fd, void *data, size_t size, bool do_read)
1681 size_t offset = 0;
1682 ssize_t len;
1684 while (offset < size) {
1685 if (do_read) {
1686 len = read(fd, data + offset, size - offset);
1687 } else {
1688 len = write(fd, data + offset, size - offset);
1691 if (len < 1) {
1692 if (len == -1 && (errno == EAGAIN || errno == EINTR)) {
1693 continue;
1694 } else {
1695 return false;
1697 } else {
1698 offset += len;
1702 return true;
1705 #define read_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, true)
1706 #define write_sync(fd, buffer, size) _read_write_sync(fd, buffer, size, false)
1708 /* synchronized send/recv strictly for setting up xcs */
1709 /* always use asychronize callbacks any other time */
1710 static bool xcs_send_recv(int fd, xcs_msg_t *msg)
1712 bool ret = false;
1714 if (!write_sync(fd, msg, sizeof(*msg))) {
1715 eprintf("Write failed at %s:%s():L%d? Possible bug.",
1716 __FILE__, __FUNCTION__, __LINE__);
1717 goto out;
1720 if (!read_sync(fd, msg, sizeof(*msg))) {
1721 eprintf("Read failed at %s:%s():L%d? Possible bug.",
1722 __FILE__, __FUNCTION__, __LINE__);
1723 goto out;
1726 ret = true;
1728 out:
1729 return ret;
1732 static void handle_xcs(int xcs_fd)
1734 xcs_msg_t msg;
1736 if (!read_sync(xcs_fd, &msg, sizeof(msg)))
1737 barf_perror("read from xcs failed!");
1739 domain_cleanup();
1742 static int xcs_init(void)
1744 int ctrl_fd, data_fd;
1745 xcs_msg_t msg;
1747 ctrl_fd = open_domain_socket(XCS_SUN_PATH);
1748 if (ctrl_fd == -1)
1749 barf_perror("Failed to contact xcs. Is it running?");
1751 data_fd = open_domain_socket(XCS_SUN_PATH);
1752 if (data_fd == -1)
1753 barf_perror("Failed to contact xcs. Is it running?");
1755 memset(&msg, 0, sizeof(msg));
1756 msg.type = XCS_CONNECT_CTRL;
1757 if (!xcs_send_recv(ctrl_fd, &msg) || msg.result != XCS_RSLT_OK)
1758 barf_perror("xcs control connect failed.");
1760 msg.type = XCS_CONNECT_DATA;
1761 if (!xcs_send_recv(data_fd, &msg) || msg.result != XCS_RSLT_OK)
1762 barf_perror("xcs data connect failed.");
1764 msg.type = XCS_VIRQ_BIND;
1765 msg.u.virq.virq = VIRQ_DOM_EXC;
1766 if (!xcs_send_recv(ctrl_fd, &msg) || msg.result != XCS_RSLT_OK)
1767 barf_perror("xcs virq bind failed.");
1769 return data_fd;
1773 static struct option options[] = {
1774 { "pid-file", 1, NULL, 'F' },
1775 { "no-fork", 0, NULL, 'N' },
1776 { "output-pid", 0, NULL, 'P' },
1777 { "trace-file", 1, NULL, 'T' },
1778 { "verbose", 0, NULL, 'V' },
1779 { NULL, 0, NULL, 0 } };
1781 int main(int argc, char *argv[])
1783 int opt, *sock, *ro_sock, event_fd, xcs_fd, max;
1784 struct sockaddr_un addr;
1785 fd_set inset, outset;
1786 bool dofork = true;
1787 bool outputpid = false;
1788 const char *pidfile = NULL;
1790 while ((opt = getopt_long(argc, argv, "F:NPT:V", options,
1791 NULL)) != -1) {
1792 switch (opt) {
1793 case 'F':
1794 pidfile = optarg;
1795 break;
1796 case 'N':
1797 dofork = false;
1798 break;
1799 case 'P':
1800 outputpid = true;
1801 break;
1802 case 'T':
1803 tracefd = open(optarg, O_WRONLY|O_CREAT|O_APPEND, 0600);
1804 if (tracefd < 0)
1805 barf_perror("Could not open tracefile %s",
1806 optarg);
1807 write(tracefd, "\n***\n", strlen("\n***\n"));
1808 break;
1809 case 'V':
1810 verbose = true;
1811 break;
1814 if (optind != argc)
1815 barf("%s: No arguments desired", argv[0]);
1817 if (dofork) {
1818 openlog("xenstored", 0, LOG_DAEMON);
1819 daemonize();
1821 if (pidfile)
1822 write_pidfile(pidfile);
1824 talloc_enable_leak_report_full();
1826 /* Create sockets for them to listen to. */
1827 sock = talloc(talloc_autofree_context(), int);
1828 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1829 if (*sock < 0)
1830 barf_perror("Could not create socket");
1831 ro_sock = talloc(talloc_autofree_context(), int);
1832 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1833 if (*ro_sock < 0)
1834 barf_perror("Could not create socket");
1835 talloc_set_destructor(sock, destroy_fd);
1836 talloc_set_destructor(ro_sock, destroy_fd);
1838 /* Don't kill us with SIGPIPE. */
1839 signal(SIGPIPE, SIG_IGN);
1841 /* FIXME: Be more sophisticated, don't mug running daemon. */
1842 unlink(xs_daemon_socket());
1843 unlink(xs_daemon_socket_ro());
1845 addr.sun_family = AF_UNIX;
1846 strcpy(addr.sun_path, xs_daemon_socket());
1847 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1848 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1849 strcpy(addr.sun_path, xs_daemon_socket_ro());
1850 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1851 barf_perror("Could not bind socket to %s",
1852 xs_daemon_socket_ro());
1853 if (chmod(xs_daemon_socket(), 0600) != 0
1854 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1855 barf_perror("Could not chmod sockets");
1857 if (listen(*sock, 1) != 0
1858 || listen(*ro_sock, 1) != 0)
1859 barf_perror("Could not listen on sockets");
1861 /* If we're the first, create .perms file for root. */
1862 setup_structure();
1864 /* Listen to hypervisor. */
1865 event_fd = domain_init();
1867 /* Listen to hypervisor - more. */
1868 xcs_fd = xcs_init();
1870 /* Restore existing connections. */
1871 restore_existing_connections();
1873 if (outputpid) {
1874 printf("%i\n", getpid());
1875 fflush(stdout);
1878 /* close stdin/stdout now we're ready to accept connections */
1879 if (dofork) {
1880 close(STDIN_FILENO);
1881 close(STDOUT_FILENO);
1882 close(STDERR_FILENO);
1885 #ifdef TESTING
1886 signal(SIGUSR1, stop_failtest);
1887 #endif
1889 /* Get ready to listen to the tools. */
1890 max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd,
1891 xcs_fd);
1893 /* Main loop. */
1894 /* FIXME: Rewrite so noone can starve. */
1895 for (;;) {
1896 struct connection *i;
1897 struct timeval *tvp = NULL, tv;
1899 timerclear(&tv);
1900 shortest_transaction_timeout(&tv);
1901 shortest_watch_ack_timeout(&tv);
1902 if (timerisset(&tv)) {
1903 time_relative_to_now(&tv);
1904 tvp = &tv;
1907 if (select(max+1, &inset, &outset, NULL, tvp) < 0) {
1908 if (errno == EINTR)
1909 continue;
1910 barf_perror("Select failed");
1913 if (FD_ISSET(*sock, &inset))
1914 accept_connection(*sock, true);
1916 if (FD_ISSET(*ro_sock, &inset))
1917 accept_connection(*ro_sock, false);
1919 if (FD_ISSET(event_fd, &inset))
1920 handle_event(event_fd);
1922 if (FD_ISSET(xcs_fd, &inset))
1923 handle_xcs(xcs_fd);
1925 list_for_each_entry(i, &connections, list) {
1926 if (i->domain)
1927 continue;
1929 /* Operations can delete themselves or others
1930 * (xs_release): list is not safe after input,
1931 * so break. */
1932 if (FD_ISSET(i->fd, &inset)) {
1933 handle_input(i);
1934 break;
1936 if (FD_ISSET(i->fd, &outset)) {
1937 handle_output(i);
1938 break;
1942 /* Handle all possible I/O for domain connections. */
1943 more:
1944 list_for_each_entry(i, &connections, list) {
1945 if (!i->domain)
1946 continue;
1948 if (domain_can_read(i)) {
1949 handle_input(i);
1950 goto more;
1953 if (domain_can_write(i)) {
1954 handle_output(i);
1955 goto more;
1959 if (tvp) {
1960 check_transaction_timeout();
1961 check_watch_ack_timeout();
1964 /* If transactions ended, we might be able to do more work. */
1965 unblock_connections();
1967 max = initialize_set(&inset, &outset, *sock, *ro_sock,
1968 event_fd, xcs_fd);