xen-vtx-unstable

view tools/xenstore/xenstored_core.c @ 6774:4d899a738d59

merge?
author cl349@firebug.cl.cam.ac.uk
date Tue Sep 13 15:05:49 2005 +0000 (2005-09-13)
parents f804b28871ba acde14d25398
children e7c7196fa329 8ca0f98ba8e2
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 //#define DEBUG
43 #include "utils.h"
44 #include "list.h"
45 #include "talloc.h"
46 #include "xs_lib.h"
47 #include "xenstored.h"
48 #include "xenstored_core.h"
49 #include "xenstored_watch.h"
50 #include "xenstored_transaction.h"
51 #include "xenstored_domain.h"
52 #include "xenctrl.h"
53 #include "xen/io/domain_controller.h"
55 static bool verbose;
56 LIST_HEAD(connections);
57 static int tracefd = -1;
59 #ifdef TESTING
60 static bool failtest = false;
62 /* We override talloc's malloc. */
63 void *test_malloc(size_t size)
64 {
65 /* 1 in 20 means only about 50% of connections establish. */
66 if (failtest && (random() % 32) == 0)
67 return NULL;
68 return malloc(size);
69 }
71 static void stop_failtest(int signum __attribute__((unused)))
72 {
73 failtest = false;
74 }
76 /* Need these before we #define away write_all/mkdir in testing.h */
77 bool test_write_all(int fd, void *contents, unsigned int len);
78 bool test_write_all(int fd, void *contents, unsigned int len)
79 {
80 if (failtest && (random() % 8) == 0) {
81 if (len)
82 len = random() % len;
83 write(fd, contents, len);
84 errno = ENOSPC;
85 return false;
86 }
87 return xs_write_all(fd, contents, len);
88 }
90 int test_mkdir(const char *dir, int perms);
91 int test_mkdir(const char *dir, int perms)
92 {
93 if (failtest && (random() % 8) == 0) {
94 errno = ENOSPC;
95 return -1;
96 }
97 return mkdir(dir, perms);
98 }
99 #endif /* TESTING */
101 #include "xenstored_test.h"
103 /* FIXME: Ideally, this should never be called. Some can be eliminated. */
104 /* Something is horribly wrong: shutdown immediately. */
105 void __attribute__((noreturn)) corrupt(struct connection *conn,
106 const char *fmt, ...)
107 {
108 va_list arglist;
109 char *str;
110 int saved_errno = errno;
112 va_start(arglist, fmt);
113 str = talloc_vasprintf(NULL, fmt, arglist);
114 va_end(arglist);
116 trace("xenstored corruption: connection id %i: err %s: %s",
117 conn ? (int)conn->id : -1, strerror(saved_errno), str);
118 eprintf("xenstored corruption: connection id %i: err %s: %s",
119 conn ? (int)conn->id : -1, strerror(saved_errno), str);
120 #ifdef TESTING
121 /* Allow them to attach debugger. */
122 sleep(30);
123 #endif
124 syslog(LOG_DAEMON,
125 "xenstored corruption: connection id %i: err %s: %s",
126 conn ? (int)conn->id : -1, strerror(saved_errno), str);
127 _exit(2);
128 }
130 static char *sockmsg_string(enum xsd_sockmsg_type type)
131 {
132 switch (type) {
133 case XS_DEBUG: return "DEBUG";
134 case XS_SHUTDOWN: return "SHUTDOWN";
135 case XS_DIRECTORY: return "DIRECTORY";
136 case XS_READ: return "READ";
137 case XS_GET_PERMS: return "GET_PERMS";
138 case XS_WATCH: return "WATCH";
139 case XS_WATCH_ACK: return "WATCH_ACK";
140 case XS_UNWATCH: return "UNWATCH";
141 case XS_TRANSACTION_START: return "TRANSACTION_START";
142 case XS_TRANSACTION_END: return "TRANSACTION_END";
143 case XS_INTRODUCE: return "INTRODUCE";
144 case XS_RELEASE: return "RELEASE";
145 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
146 case XS_WRITE: return "WRITE";
147 case XS_MKDIR: return "MKDIR";
148 case XS_RM: return "RM";
149 case XS_SET_PERMS: return "SET_PERMS";
150 case XS_WATCH_EVENT: return "WATCH_EVENT";
151 case XS_ERROR: return "ERROR";
152 default:
153 return "**UNKNOWN**";
154 }
155 }
157 static void trace_io(const struct connection *conn,
158 const char *prefix,
159 const struct buffered_data *data)
160 {
161 char string[64];
162 unsigned int i;
164 if (tracefd < 0)
165 return;
167 write(tracefd, prefix, strlen(prefix));
168 sprintf(string, " %p ", conn);
169 write(tracefd, string, strlen(string));
170 write(tracefd, sockmsg_string(data->hdr.msg.type),
171 strlen(sockmsg_string(data->hdr.msg.type)));
172 write(tracefd, " (", 2);
173 for (i = 0; i < data->hdr.msg.len; i++) {
174 if (data->buffer[i] == '\0')
175 write(tracefd, " ", 1);
176 else
177 write(tracefd, data->buffer + i, 1);
178 }
179 write(tracefd, ")\n", 2);
180 }
182 void trace_create(const void *data, const char *type)
183 {
184 char string[64];
185 if (tracefd < 0)
186 return;
188 write(tracefd, "CREATE ", strlen("CREATE "));
189 write(tracefd, type, strlen(type));
190 sprintf(string, " %p\n", data);
191 write(tracefd, string, strlen(string));
192 }
194 void trace_destroy(const void *data, const char *type)
195 {
196 char string[64];
197 if (tracefd < 0)
198 return;
200 write(tracefd, "DESTROY ", strlen("DESTROY "));
201 write(tracefd, type, strlen(type));
202 sprintf(string, " %p\n", data);
203 write(tracefd, string, strlen(string));
204 }
206 void trace_watch_timeout(const struct connection *conn, const char *node, const char *token)
207 {
208 char string[64];
209 if (tracefd < 0)
210 return;
211 write(tracefd, "WATCH_TIMEOUT ", strlen("WATCH_TIMEOUT "));
212 sprintf(string, " %p ", conn);
213 write(tracefd, string, strlen(string));
214 write(tracefd, " (", 2);
215 write(tracefd, node, strlen(node));
216 write(tracefd, " ", 1);
217 write(tracefd, token, strlen(token));
218 write(tracefd, ")\n", 2);
219 }
221 static void trace_blocked(const struct connection *conn,
222 const struct buffered_data *data)
223 {
224 char string[64];
226 if (tracefd < 0)
227 return;
229 write(tracefd, "BLOCKED", strlen("BLOCKED"));
230 sprintf(string, " %p (", conn);
231 write(tracefd, string, strlen(string));
232 write(tracefd, sockmsg_string(data->hdr.msg.type),
233 strlen(sockmsg_string(data->hdr.msg.type)));
234 write(tracefd, ")\n", 2);
235 }
237 void trace(const char *fmt, ...)
238 {
239 va_list arglist;
240 char *str;
242 if (tracefd < 0)
243 return;
245 va_start(arglist, fmt);
246 str = talloc_vasprintf(NULL, fmt, arglist);
247 va_end(arglist);
248 write(tracefd, str, strlen(str));
249 talloc_free(str);
250 }
252 static bool write_message(struct connection *conn)
253 {
254 int ret;
255 struct buffered_data *out = conn->out;
257 assert(conn->state != BLOCKED);
258 if (out->inhdr) {
259 if (verbose)
260 xprintf("Writing msg %s (%s) out to %p\n",
261 sockmsg_string(out->hdr.msg.type),
262 out->buffer, conn);
263 ret = conn->write(conn, out->hdr.raw + out->used,
264 sizeof(out->hdr) - out->used);
265 if (ret < 0)
266 return false;
268 out->used += ret;
269 if (out->used < sizeof(out->hdr))
270 return true;
272 out->inhdr = false;
273 out->used = 0;
275 /* Second write might block if non-zero. */
276 if (out->hdr.msg.len && !conn->domain)
277 return true;
278 }
280 ret = conn->write(conn, out->buffer + out->used,
281 out->hdr.msg.len - out->used);
283 if (ret < 0)
284 return false;
286 out->used += ret;
287 if (out->used != out->hdr.msg.len)
288 return true;
290 trace_io(conn, "OUT", out);
291 conn->out = NULL;
292 talloc_free(out);
294 queue_next_event(conn);
296 /* No longer busy? */
297 if (!conn->out)
298 conn->state = OK;
299 return true;
300 }
302 static int destroy_conn(void *_conn)
303 {
304 struct connection *conn = _conn;
306 /* Flush outgoing if possible, but don't block. */
307 if (!conn->domain) {
308 fd_set set;
309 struct timeval none;
311 FD_ZERO(&set);
312 FD_SET(conn->fd, &set);
313 none.tv_sec = none.tv_usec = 0;
315 while (conn->out
316 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
317 if (!write_message(conn))
318 break;
319 close(conn->fd);
320 }
321 list_del(&conn->list);
322 trace_destroy(conn, "connection");
323 return 0;
324 }
326 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
327 int event_fd)
328 {
329 struct connection *i;
330 int max;
332 FD_ZERO(inset);
333 FD_ZERO(outset);
334 FD_SET(sock, inset);
335 max = sock;
336 FD_SET(ro_sock, inset);
337 if (ro_sock > max)
338 max = ro_sock;
339 FD_SET(event_fd, inset);
340 if (event_fd > max)
341 max = event_fd;
342 list_for_each_entry(i, &connections, list) {
343 if (i->domain)
344 continue;
345 if (i->state == OK)
346 FD_SET(i->fd, inset);
347 if (i->out)
348 FD_SET(i->fd, outset);
349 if (i->fd > max)
350 max = i->fd;
351 }
352 return max;
353 }
355 /* Read everything from a talloc_open'ed fd. */
356 void *read_all(int *fd, unsigned int *size)
357 {
358 unsigned int max = 4;
359 int ret;
360 void *buffer = talloc_size(fd, max);
362 *size = 0;
363 while ((ret = read(*fd, buffer + *size, max - *size)) > 0) {
364 *size += ret;
365 if (*size == max)
366 buffer = talloc_realloc_size(fd, buffer, max *= 2);
367 }
368 if (ret < 0)
369 return NULL;
370 return buffer;
371 }
373 static int destroy_fd(void *_fd)
374 {
375 int *fd = _fd;
376 close(*fd);
377 return 0;
378 }
380 /* Return a pointer to an fd, self-closing and attached to this pathname. */
381 int *talloc_open(const char *pathname, int flags, int mode)
382 {
383 int *fd;
385 fd = talloc(pathname, int);
386 *fd = open(pathname, flags, mode);
387 if (*fd < 0) {
388 int saved_errno = errno;
389 talloc_free(fd);
390 errno = saved_errno;
391 return NULL;
392 }
393 talloc_set_destructor(fd, destroy_fd);
394 return fd;
395 }
397 /* Is child a subnode of parent, or equal? */
398 bool is_child(const char *child, const char *parent)
399 {
400 unsigned int len = strlen(parent);
402 /* / should really be "" for this algorithm to work, but that's a
403 * usability nightmare. */
404 if (streq(parent, "/"))
405 return true;
407 if (strncmp(child, parent, len) != 0)
408 return false;
410 return child[len] == '/' || child[len] == '\0';
411 }
413 /* Answer never ends in /. */
414 char *node_dir_outside_transaction(const char *node)
415 {
416 if (streq(node, "/"))
417 return talloc_strdup(node, xs_daemon_store());
418 return talloc_asprintf(node, "%s%s", xs_daemon_store(), node);
419 }
421 static char *node_dir(struct transaction *trans, const char *node)
422 {
423 if (!trans || !within_transaction(trans, node))
424 return node_dir_outside_transaction(node);
425 return node_dir_inside_transaction(trans, node);
426 }
428 static char *datafile(const char *dir)
429 {
430 return talloc_asprintf(dir, "%s/.data", dir);
431 }
433 static char *node_datafile(struct transaction *trans, const char *node)
434 {
435 return datafile(node_dir(trans, node));
436 }
438 static char *permfile(const char *dir)
439 {
440 return talloc_asprintf(dir, "%s/.perms", dir);
441 }
443 static char *node_permfile(struct transaction *trans, const char *node)
444 {
445 return permfile(node_dir(trans, node));
446 }
448 struct buffered_data *new_buffer(void *ctx)
449 {
450 struct buffered_data *data;
452 data = talloc(ctx, struct buffered_data);
453 data->inhdr = true;
454 data->used = 0;
455 data->buffer = NULL;
457 return data;
458 }
460 /* Return length of string (including nul) at this offset. */
461 unsigned int get_string(const struct buffered_data *data, unsigned int offset)
462 {
463 const char *nul;
465 if (offset >= data->used)
466 return 0;
468 nul = memchr(data->buffer + offset, 0, data->used - offset);
469 if (!nul)
470 return 0;
472 return nul - (data->buffer + offset) + 1;
473 }
475 /* Break input into vectors, return the number, fill in up to num of them. */
476 unsigned int get_strings(struct buffered_data *data,
477 char *vec[], unsigned int num)
478 {
479 unsigned int off, i, len;
481 off = i = 0;
482 while ((len = get_string(data, off)) != 0) {
483 if (i < num)
484 vec[i] = data->buffer + off;
485 i++;
486 off += len;
487 }
488 return i;
489 }
491 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
492 const void *data, unsigned int len)
493 {
494 struct buffered_data *bdata;
496 /* When data gets freed, we want list entry is destroyed (so
497 * list entry is a child). */
498 bdata = new_buffer(conn);
499 bdata->buffer = talloc_array(bdata, char, len);
501 bdata->hdr.msg.type = type;
502 bdata->hdr.msg.len = len;
503 memcpy(bdata->buffer, data, len);
505 /* There might be an event going out now. Queue behind it. */
506 if (conn->out) {
507 assert(conn->out->hdr.msg.type == XS_WATCH_EVENT);
508 assert(!conn->waiting_reply);
509 conn->waiting_reply = bdata;
510 } else
511 conn->out = bdata;
512 assert(conn->state != BLOCKED);
513 conn->state = BUSY;
514 }
516 /* Some routines (write, mkdir, etc) just need a non-error return */
517 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
518 {
519 send_reply(conn, type, "OK", sizeof("OK"));
520 }
522 void send_error(struct connection *conn, int error)
523 {
524 unsigned int i;
526 for (i = 0; error != xsd_errors[i].errnum; i++) {
527 if (i == ARRAY_SIZE(xsd_errors) - 1) {
528 eprintf("xenstored: error %i untranslatable", error);
529 i = 0; /* EINVAL */
530 break;
531 }
532 }
533 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
534 strlen(xsd_errors[i].errstring) + 1);
535 }
537 static bool valid_chars(const char *node)
538 {
539 /* Nodes can have lots of crap. */
540 return (strspn(node,
541 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
542 "abcdefghijklmnopqrstuvwxyz"
543 "0123456789-/_@") == strlen(node));
544 }
546 bool is_valid_nodename(const char *node)
547 {
548 /* Must start in /. */
549 if (!strstarts(node, "/"))
550 return false;
552 /* Cannot end in / (unless it's just "/"). */
553 if (strends(node, "/") && !streq(node, "/"))
554 return false;
556 /* No double //. */
557 if (strstr(node, "//"))
558 return false;
560 return valid_chars(node);
561 }
563 /* We expect one arg in the input: return NULL otherwise. */
564 static const char *onearg(struct buffered_data *in)
565 {
566 if (!in->used || get_string(in, 0) != in->used)
567 return NULL;
568 return in->buffer;
569 }
571 /* If it fails, returns NULL and sets errno. */
572 static struct xs_permissions *get_perms(const char *dir, unsigned int *num)
573 {
574 unsigned int size;
575 char *strings;
576 struct xs_permissions *ret;
577 int *fd;
579 fd = talloc_open(permfile(dir), O_RDONLY, 0);
580 if (!fd)
581 return NULL;
582 strings = read_all(fd, &size);
583 if (!strings)
584 return NULL;
586 *num = xs_count_strings(strings, size);
587 ret = talloc_array(dir, struct xs_permissions, *num);
588 if (!xs_strings_to_perms(ret, *num, strings))
589 corrupt(NULL, "Permissions corrupt for %s", dir);
591 return ret;
592 }
594 static char *perms_to_strings(const void *ctx,
595 struct xs_permissions *perms, unsigned int num,
596 unsigned int *len)
597 {
598 unsigned int i;
599 char *strings = NULL;
600 char buffer[MAX_STRLEN(domid_t) + 1];
602 for (*len = 0, i = 0; i < num; i++) {
603 if (!xs_perm_to_string(&perms[i], buffer))
604 return NULL;
606 strings = talloc_realloc(ctx, strings, char,
607 *len + strlen(buffer) + 1);
608 strcpy(strings + *len, buffer);
609 *len += strlen(buffer) + 1;
610 }
611 return strings;
612 }
614 /* Destroy this, and its children, and its children's children. */
615 int destroy_path(void *path)
616 {
617 DIR *dir;
618 struct dirent *dirent;
620 dir = opendir(path);
621 if (!dir) {
622 if (unlink(path) == 0 || errno == ENOENT)
623 return 0;
624 corrupt(NULL, "Destroying path %s", path);
625 }
627 while ((dirent = readdir(dir)) != NULL) {
628 char fullpath[strlen(path) + 1 + strlen(dirent->d_name) + 1];
629 sprintf(fullpath, "%s/%s", (char *)path, dirent->d_name);
630 if (!streq(dirent->d_name,".") && !streq(dirent->d_name,".."))
631 destroy_path(fullpath);
632 }
633 closedir(dir);
634 if (rmdir(path) != 0)
635 corrupt(NULL, "Destroying directory %s", path);
636 return 0;
637 }
639 /* Create a self-destructing temporary path */
640 static char *temppath(const char *path)
641 {
642 char *tmppath = talloc_asprintf(path, "%s.tmp", path);
643 talloc_set_destructor(tmppath, destroy_path);
644 return tmppath;
645 }
647 /* Create a self-destructing temporary file */
648 static char *tempfile(const char *path, void *contents, unsigned int len)
649 {
650 int *fd;
651 char *tmppath = temppath(path);
653 fd = talloc_open(tmppath, O_WRONLY|O_CREAT|O_EXCL, 0640);
654 if (!fd)
655 return NULL;
656 if (!xs_write_all(*fd, contents, len))
657 return NULL;
659 return tmppath;
660 }
662 static int destroy_opendir(void *_dir)
663 {
664 DIR **dir = _dir;
665 closedir(*dir);
666 return 0;
667 }
669 /* Return a pointer to a DIR*, self-closing and attached to this pathname. */
670 DIR **talloc_opendir(const char *pathname)
671 {
672 DIR **dir;
674 dir = talloc(pathname, DIR *);
675 *dir = opendir(pathname);
676 if (!*dir) {
677 int saved_errno = errno;
678 talloc_free(dir);
679 errno = saved_errno;
680 return NULL;
681 }
682 talloc_set_destructor(dir, destroy_opendir);
683 return dir;
684 }
686 /* We assume rename() doesn't fail on moves in same dir. */
687 static void commit_tempfile(const char *path)
688 {
689 char realname[strlen(path) + 1];
690 unsigned int len = strrchr(path, '.') - path;
692 memcpy(realname, path, len);
693 realname[len] = '\0';
694 if (rename(path, realname) != 0)
695 corrupt(NULL, "Committing %s", realname);
696 talloc_set_destructor(path, NULL);
697 }
699 static bool set_perms(struct transaction *transaction,
700 const char *node,
701 struct xs_permissions *perms, unsigned int num)
702 {
703 unsigned int len;
704 char *permpath, *strings;
706 strings = perms_to_strings(node, perms, num, &len);
707 if (!strings)
708 return false;
710 /* Create then move. */
711 permpath = tempfile(node_permfile(transaction, node), strings, len);
712 if (!permpath)
713 return false;
715 commit_tempfile(permpath);
716 return true;
717 }
719 static char *get_parent(const char *node)
720 {
721 char *slash = strrchr(node + 1, '/');
722 if (!slash)
723 return talloc_strdup(node, "/");
724 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
725 }
727 static enum xs_perm_type perm_for_id(domid_t id,
728 struct xs_permissions *perms,
729 unsigned int num)
730 {
731 unsigned int i;
733 /* Owners and tools get it all... */
734 if (!id || perms[0].id == id)
735 return XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
737 for (i = 1; i < num; i++)
738 if (perms[i].id == id)
739 return perms[i].perms;
741 return perms[0].perms;
742 }
744 /* What do parents say? */
745 static enum xs_perm_type ask_parents(struct connection *conn,
746 const char *node)
747 {
748 struct xs_permissions *perms;
749 unsigned int num;
751 do {
752 node = get_parent(node);
753 perms = get_perms(node_dir(conn->transaction, node), &num);
754 if (perms)
755 break;
756 } while (!streq(node, "/"));
758 /* No permission at root? We're in trouble. */
759 if (!perms)
760 corrupt(conn, "No permissions file at root");
762 return perm_for_id(conn->id, perms, num);
763 }
765 /* We have a weird permissions system. You can allow someone into a
766 * specific node without allowing it in the parents. If it's going to
767 * fail, however, we don't want the errno to indicate any information
768 * about the node. */
769 static int errno_from_parents(struct connection *conn, const char *node,
770 int errnum)
771 {
772 /* We always tell them about memory failures. */
773 if (errnum == ENOMEM)
774 return errnum;
776 if (ask_parents(conn, node) & XS_PERM_READ)
777 return errnum;
778 return EACCES;
779 }
781 char *canonicalize(struct connection *conn, const char *node)
782 {
783 const char *prefix;
785 if (!node || strstarts(node, "/"))
786 return (char *)node;
787 prefix = get_implicit_path(conn);
788 if (prefix)
789 return talloc_asprintf(node, "%s/%s", prefix, node);
790 return (char *)node;
791 }
793 bool check_node_perms(struct connection *conn, const char *node,
794 enum xs_perm_type perm)
795 {
796 struct xs_permissions *perms;
797 unsigned int num;
799 if (!node || !is_valid_nodename(node)) {
800 errno = EINVAL;
801 return false;
802 }
804 if (!conn->can_write && (perm & XS_PERM_WRITE)) {
805 errno = EROFS;
806 return false;
807 }
809 perms = get_perms(node_dir(conn->transaction, node), &num);
811 if (perms) {
812 if (perm_for_id(conn->id, perms, num) & perm)
813 return true;
814 errno = EACCES;
815 return false;
816 }
818 /* If it's OK not to exist, we consult parents. */
819 if (errno == ENOENT && (perm & XS_PERM_ENOENT_OK)) {
820 if (ask_parents(conn, node) & perm)
821 return true;
822 /* Parents say they should not know. */
823 errno = EACCES;
824 return false;
825 }
827 /* They might not have permission to even *see* this node, in
828 * which case we return EACCES even if it's ENOENT or EIO. */
829 errno = errno_from_parents(conn, node, errno);
830 return false;
831 }
833 bool check_event_node(const char *node)
834 {
835 if (!node || !strstarts(node, "@")) {
836 errno = EINVAL;
837 return false;
838 }
839 return true;
840 }
842 static void send_directory(struct connection *conn, const char *node)
843 {
844 char *path, *reply;
845 unsigned int reply_len = 0;
846 DIR **dir;
847 struct dirent *dirent;
849 node = canonicalize(conn, node);
850 if (!check_node_perms(conn, node, XS_PERM_READ)) {
851 send_error(conn, errno);
852 return;
853 }
855 path = node_dir(conn->transaction, node);
856 dir = talloc_opendir(path);
857 if (!dir) {
858 send_error(conn, errno);
859 return;
860 }
862 reply = talloc_strdup(node, "");
863 while ((dirent = readdir(*dir)) != NULL) {
864 int len = strlen(dirent->d_name) + 1;
866 if (!valid_chars(dirent->d_name))
867 continue;
869 reply = talloc_realloc(path, reply, char, reply_len + len);
870 strcpy(reply + reply_len, dirent->d_name);
871 reply_len += len;
872 }
874 send_reply(conn, XS_DIRECTORY, reply, reply_len);
875 }
877 static void do_read(struct connection *conn, const char *node)
878 {
879 char *value;
880 unsigned int size;
881 int *fd;
883 node = canonicalize(conn, node);
884 if (!check_node_perms(conn, node, XS_PERM_READ)) {
885 send_error(conn, errno);
886 return;
887 }
889 fd = talloc_open(node_datafile(conn->transaction, node), O_RDONLY, 0);
890 if (!fd) {
891 /* Data file doesn't exist? We call that a directory */
892 if (errno == ENOENT)
893 errno = EISDIR;
894 send_error(conn, errno);
895 return;
896 }
898 value = read_all(fd, &size);
899 if (!value)
900 send_error(conn, errno);
901 else
902 send_reply(conn, XS_READ, value, size);
903 }
905 /* Commit this directory, eg. comitting a/b.tmp/c causes a/b.tmp -> a.b */
906 static bool commit_dir(char *dir)
907 {
908 char *dot, *slash, *dest;
910 dot = strrchr(dir, '.');
911 slash = strchr(dot, '/');
912 if (slash)
913 *slash = '\0';
915 dest = talloc_asprintf(dir, "%.*s", (int)(dot - dir), dir);
916 return rename(dir, dest) == 0;
917 }
919 /* Create a temporary directory. Put data in it (if data != NULL) */
920 static char *tempdir(struct connection *conn,
921 const char *node, void *data, unsigned int datalen)
922 {
923 struct xs_permissions *perms;
924 char *permstr;
925 unsigned int num, len;
926 int *fd;
927 char *dir;
929 dir = temppath(node_dir(conn->transaction, node));
930 if (mkdir(dir, 0750) != 0) {
931 if (errno != ENOENT)
932 return NULL;
934 dir = tempdir(conn, get_parent(node), NULL, 0);
935 if (!dir)
936 return NULL;
938 dir = talloc_asprintf(dir, "%s%s", dir, strrchr(node, '/'));
939 if (mkdir(dir, 0750) != 0)
940 return NULL;
941 talloc_set_destructor(dir, destroy_path);
942 }
944 perms = get_perms(get_parent(dir), &num);
945 assert(perms);
946 /* Domains own what they create. */
947 if (conn->id)
948 perms->id = conn->id;
950 permstr = perms_to_strings(dir, perms, num, &len);
951 fd = talloc_open(permfile(dir), O_WRONLY|O_CREAT|O_EXCL, 0640);
952 if (!fd || !xs_write_all(*fd, permstr, len))
953 return NULL;
955 if (data) {
956 char *datapath = datafile(dir);
958 fd = talloc_open(datapath, O_WRONLY|O_CREAT|O_EXCL, 0640);
959 if (!fd || !xs_write_all(*fd, data, datalen))
960 return NULL;
961 }
962 return dir;
963 }
965 /* path, flags, data... */
966 static void do_write(struct connection *conn, struct buffered_data *in)
967 {
968 unsigned int offset, datalen;
969 char *vec[2];
970 char *node, *tmppath;
971 enum xs_perm_type mode;
972 struct stat st;
974 /* Extra "strings" can be created by binary data. */
975 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
976 send_error(conn, EINVAL);
977 return;
978 }
980 node = canonicalize(conn, vec[0]);
981 if (!within_transaction(conn->transaction, node)) {
982 send_error(conn, EROFS);
983 return;
984 }
986 if (transaction_block(conn, node))
987 return;
989 offset = strlen(vec[0]) + strlen(vec[1]) + 2;
990 datalen = in->used - offset;
992 if (streq(vec[1], XS_WRITE_NONE))
993 mode = XS_PERM_WRITE;
994 else if (streq(vec[1], XS_WRITE_CREATE))
995 mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
996 else if (streq(vec[1], XS_WRITE_CREATE_EXCL))
997 mode = XS_PERM_WRITE|XS_PERM_ENOENT_OK;
998 else {
999 send_error(conn, EINVAL);
1000 return;
1003 if (!check_node_perms(conn, node, mode)) {
1004 send_error(conn, errno);
1005 return;
1008 if (lstat(node_dir(conn->transaction, node), &st) != 0) {
1009 char *dir;
1011 /* Does not exist... */
1012 if (errno != ENOENT) {
1013 send_error(conn, errno);
1014 return;
1017 /* Not going to create it? */
1018 if (streq(vec[1], XS_WRITE_NONE)) {
1019 send_error(conn, ENOENT);
1020 return;
1023 dir = tempdir(conn, node, in->buffer + offset, datalen);
1024 if (!dir || !commit_dir(dir)) {
1025 send_error(conn, errno);
1026 return;
1029 } else {
1030 /* Exists... */
1031 if (streq(vec[1], XS_WRITE_CREATE_EXCL)) {
1032 send_error(conn, EEXIST);
1033 return;
1036 tmppath = tempfile(node_datafile(conn->transaction, node),
1037 in->buffer + offset, datalen);
1038 if (!tmppath) {
1039 send_error(conn, errno);
1040 return;
1043 commit_tempfile(tmppath);
1046 add_change_node(conn->transaction, node, false);
1047 fire_watches(conn, node, false);
1048 send_ack(conn, XS_WRITE);
1051 static void do_mkdir(struct connection *conn, const char *node)
1053 char *dir;
1054 struct stat st;
1056 node = canonicalize(conn, node);
1057 if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_ENOENT_OK)) {
1058 send_error(conn, errno);
1059 return;
1062 if (!within_transaction(conn->transaction, node)) {
1063 send_error(conn, EROFS);
1064 return;
1067 if (transaction_block(conn, node))
1068 return;
1070 /* Must not already exist. */
1071 if (lstat(node_dir(conn->transaction, node), &st) == 0) {
1072 send_error(conn, EEXIST);
1073 return;
1076 dir = tempdir(conn, node, NULL, 0);
1077 if (!dir || !commit_dir(dir)) {
1078 send_error(conn, errno);
1079 return;
1082 add_change_node(conn->transaction, node, false);
1083 fire_watches(conn, node, false);
1084 send_ack(conn, XS_MKDIR);
1087 static void do_rm(struct connection *conn, const char *node)
1089 char *tmppath, *path;
1091 node = canonicalize(conn, node);
1092 if (!check_node_perms(conn, node, XS_PERM_WRITE)) {
1093 send_error(conn, errno);
1094 return;
1097 if (!within_transaction(conn->transaction, node)) {
1098 send_error(conn, EROFS);
1099 return;
1102 if (transaction_block(conn, node))
1103 return;
1105 if (streq(node, "/")) {
1106 send_error(conn, EINVAL);
1107 return;
1110 /* We move the directory to temporary name, destructor cleans up. */
1111 path = node_dir(conn->transaction, node);
1112 tmppath = talloc_asprintf(node, "%s.tmp", path);
1113 talloc_set_destructor(tmppath, destroy_path);
1115 if (rename(path, tmppath) != 0) {
1116 send_error(conn, errno);
1117 return;
1120 add_change_node(conn->transaction, node, true);
1121 fire_watches(conn, node, true);
1122 send_ack(conn, XS_RM);
1125 static void do_get_perms(struct connection *conn, const char *node)
1127 struct xs_permissions *perms;
1128 char *strings;
1129 unsigned int len, num;
1131 node = canonicalize(conn, node);
1132 if (!check_node_perms(conn, node, XS_PERM_READ)) {
1133 send_error(conn, errno);
1134 return;
1137 perms = get_perms(node_dir(conn->transaction, node), &num);
1138 if (!perms) {
1139 send_error(conn, errno);
1140 return;
1143 strings = perms_to_strings(node, perms, num, &len);
1144 if (!strings)
1145 send_error(conn, errno);
1146 else
1147 send_reply(conn, XS_GET_PERMS, strings, len);
1150 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1152 unsigned int num;
1153 char *node, *permstr;
1154 struct xs_permissions *perms;
1156 num = xs_count_strings(in->buffer, in->used);
1157 if (num < 2) {
1158 send_error(conn, EINVAL);
1159 return;
1162 /* First arg is node name. */
1163 node = canonicalize(conn, in->buffer);
1164 permstr = in->buffer + strlen(in->buffer) + 1;
1165 num--;
1167 if (!within_transaction(conn->transaction, node)) {
1168 send_error(conn, EROFS);
1169 return;
1172 if (transaction_block(conn, node))
1173 return;
1175 /* We must own node to do this (tools can do this too). */
1176 if (!check_node_perms(conn, node, XS_PERM_WRITE|XS_PERM_OWNER)) {
1177 send_error(conn, errno);
1178 return;
1181 perms = talloc_array(node, struct xs_permissions, num);
1182 if (!xs_strings_to_perms(perms, num, permstr)) {
1183 send_error(conn, errno);
1184 return;
1187 if (!set_perms(conn->transaction, node, perms, num)) {
1188 send_error(conn, errno);
1189 return;
1192 add_change_node(conn->transaction, node, false);
1193 fire_watches(conn, node, false);
1194 send_ack(conn, XS_SET_PERMS);
1197 /* Process "in" for conn: "in" will vanish after this conversation, so
1198 * we can talloc off it for temporary variables. May free "conn".
1199 */
1200 static void process_message(struct connection *conn, struct buffered_data *in)
1202 switch (in->hdr.msg.type) {
1203 case XS_DIRECTORY:
1204 send_directory(conn, onearg(in));
1205 break;
1207 case XS_READ:
1208 do_read(conn, onearg(in));
1209 break;
1211 case XS_WRITE:
1212 do_write(conn, in);
1213 break;
1215 case XS_MKDIR:
1216 do_mkdir(conn, onearg(in));
1217 break;
1219 case XS_RM:
1220 do_rm(conn, onearg(in));
1221 break;
1223 case XS_GET_PERMS:
1224 do_get_perms(conn, onearg(in));
1225 break;
1227 case XS_SET_PERMS:
1228 do_set_perms(conn, in);
1229 break;
1231 case XS_SHUTDOWN:
1232 /* FIXME: Implement gentle shutdown too. */
1233 /* Only tools can do this. */
1234 if (conn->id != 0) {
1235 send_error(conn, EACCES);
1236 break;
1238 if (!conn->can_write) {
1239 send_error(conn, EROFS);
1240 break;
1242 send_ack(conn, XS_SHUTDOWN);
1243 /* Everything hangs off auto-free context, freed at exit. */
1244 exit(0);
1246 case XS_DEBUG:
1247 if (streq(in->buffer, "print"))
1248 xprintf("debug: %s", in->buffer + get_string(in, 0));
1249 #ifdef TESTING
1250 /* For testing, we allow them to set id. */
1251 if (streq(in->buffer, "setid")) {
1252 conn->id = atoi(in->buffer + get_string(in, 0));
1253 send_ack(conn, XS_DEBUG);
1254 } else if (streq(in->buffer, "failtest")) {
1255 if (get_string(in, 0) < in->used)
1256 srandom(atoi(in->buffer + get_string(in, 0)));
1257 send_ack(conn, XS_DEBUG);
1258 failtest = true;
1260 #endif /* TESTING */
1261 break;
1263 case XS_WATCH:
1264 do_watch(conn, in);
1265 break;
1267 case XS_WATCH_ACK:
1268 do_watch_ack(conn, onearg(in));
1269 break;
1271 case XS_UNWATCH:
1272 do_unwatch(conn, in);
1273 break;
1275 case XS_TRANSACTION_START:
1276 do_transaction_start(conn, onearg(in));
1277 break;
1279 case XS_TRANSACTION_END:
1280 do_transaction_end(conn, onearg(in));
1281 break;
1283 case XS_INTRODUCE:
1284 do_introduce(conn, in);
1285 break;
1287 case XS_RELEASE:
1288 do_release(conn, onearg(in));
1289 break;
1291 case XS_GET_DOMAIN_PATH:
1292 do_get_domain_path(conn, onearg(in));
1293 break;
1295 case XS_WATCH_EVENT:
1296 default:
1297 eprintf("Client unknown operation %i", in->hdr.msg.type);
1298 send_error(conn, ENOSYS);
1302 static int out_of_mem(void *data)
1304 longjmp(*(jmp_buf *)data, 1);
1307 static void consider_message(struct connection *conn)
1309 /*
1310 * 'volatile' qualifier prevents register allocation which fixes:
1311 * warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork'
1312 */
1313 struct buffered_data *volatile in = NULL;
1314 enum xsd_sockmsg_type volatile type = conn->in->hdr.msg.type;
1315 jmp_buf talloc_fail;
1317 assert(conn->state == OK);
1319 /* For simplicity, we kill the connection on OOM. */
1320 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1321 if (setjmp(talloc_fail)) {
1322 talloc_free(conn);
1323 goto end;
1326 if (verbose)
1327 xprintf("Got message %s len %i from %p\n",
1328 sockmsg_string(type), conn->in->hdr.msg.len, conn);
1330 /* We might get a command while waiting for an ack: this means
1331 * the other end discarded it: we will re-transmit. */
1332 if (type != XS_WATCH_ACK)
1333 conn->waiting_for_ack = NULL;
1335 /* Careful: process_message may free connection. We detach
1336 * "in" beforehand and allocate the new buffer to avoid
1337 * touching conn after process_message.
1338 */
1339 in = talloc_steal(talloc_autofree_context(), conn->in);
1340 conn->in = new_buffer(conn);
1341 process_message(conn, in);
1343 if (conn->state == BLOCKED) {
1344 /* Blocked by transaction: queue for re-xmit. */
1345 talloc_free(conn->in);
1346 conn->in = in;
1347 in = NULL;
1348 trace_blocked(conn, conn->in);
1351 end:
1352 talloc_free(in);
1353 talloc_set_fail_handler(NULL, NULL);
1354 if (talloc_total_blocks(NULL)
1355 != talloc_total_blocks(talloc_autofree_context()) + 1) {
1356 talloc_report_full(NULL, stderr);
1357 abort();
1361 /* Errors in reading or allocating here mean we get out of sync, so we
1362 * drop the whole client connection. */
1363 void handle_input(struct connection *conn)
1365 int bytes;
1366 struct buffered_data *in;
1368 assert(conn->state == OK);
1369 in = conn->in;
1371 /* Not finished header yet? */
1372 if (in->inhdr) {
1373 bytes = conn->read(conn, in->hdr.raw + in->used,
1374 sizeof(in->hdr) - in->used);
1375 if (bytes <= 0)
1376 goto bad_client;
1377 in->used += bytes;
1378 if (in->used != sizeof(in->hdr))
1379 return;
1381 if (in->hdr.msg.len > PATH_MAX) {
1382 #ifndef TESTING
1383 syslog(LOG_DAEMON, "Client tried to feed us %i",
1384 in->hdr.msg.len);
1385 #endif
1386 goto bad_client;
1389 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1390 if (!in->buffer)
1391 goto bad_client;
1392 in->used = 0;
1393 in->inhdr = false;
1394 return;
1397 bytes = conn->read(conn, in->buffer + in->used,
1398 in->hdr.msg.len - in->used);
1399 if (bytes < 0)
1400 goto bad_client;
1402 in->used += bytes;
1403 if (in->used != in->hdr.msg.len)
1404 return;
1406 trace_io(conn, "IN ", in);
1407 consider_message(conn);
1408 return;
1410 bad_client:
1411 /* Kill it. */
1412 talloc_free(conn);
1415 void handle_output(struct connection *conn)
1417 if (!write_message(conn))
1418 talloc_free(conn);
1421 /* If a transaction has ended, see if we can unblock any connections. */
1422 static void unblock_connections(void)
1424 struct connection *i, *tmp;
1426 list_for_each_entry_safe(i, tmp, &connections, list) {
1427 switch (i->state) {
1428 case BLOCKED:
1429 if (!transaction_covering_node(i->blocked_by)) {
1430 talloc_free(i->blocked_by);
1431 i->blocked_by = NULL;
1432 i->state = OK;
1433 consider_message(i);
1435 break;
1436 case BUSY:
1437 case OK:
1438 break;
1442 /* To balance bias, move first entry to end. */
1443 if (!list_empty(&connections)) {
1444 i = list_top(&connections, struct connection, list);
1445 list_del(&i->list);
1446 list_add_tail(&i->list, &connections);
1450 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1452 /*
1453 * 'volatile' qualifier prevents register allocation which fixes:
1454 * warning: variable 'xxx' might be clobbered by 'longjmp' or 'vfork'
1455 */
1456 struct connection *volatile new;
1457 jmp_buf talloc_fail;
1459 new = talloc(talloc_autofree_context(), struct connection);
1460 if (!new)
1461 return NULL;
1463 new->state = OK;
1464 new->blocked_by = NULL;
1465 new->out = new->waiting_reply = NULL;
1466 new->waiting_for_ack = NULL;
1467 new->fd = -1;
1468 new->id = 0;
1469 new->domain = NULL;
1470 new->transaction = NULL;
1471 new->write = write;
1472 new->read = read;
1473 new->can_write = true;
1474 INIT_LIST_HEAD(&new->watches);
1476 talloc_set_fail_handler(out_of_mem, &talloc_fail);
1477 if (setjmp(talloc_fail)) {
1478 talloc_free(new);
1479 return NULL;
1481 new->in = new_buffer(new);
1482 talloc_set_fail_handler(NULL, NULL);
1484 list_add_tail(&new->list, &connections);
1485 talloc_set_destructor(new, destroy_conn);
1486 trace_create(new, "connection");
1487 return new;
1490 static int writefd(struct connection *conn, const void *data, unsigned int len)
1492 return write(conn->fd, data, len);
1495 static int readfd(struct connection *conn, void *data, unsigned int len)
1497 return read(conn->fd, data, len);
1500 static void accept_connection(int sock, bool canwrite)
1502 int fd;
1503 struct connection *conn;
1505 fd = accept(sock, NULL, NULL);
1506 if (fd < 0)
1507 return;
1509 conn = new_connection(writefd, readfd);
1510 if (conn) {
1511 conn->fd = fd;
1512 conn->can_write = canwrite;
1513 } else
1514 close(fd);
1517 /* Calc timespan from now to absolute time. */
1518 static void time_relative_to_now(struct timeval *tv)
1520 struct timeval now;
1522 gettimeofday(&now, NULL);
1523 if (timercmp(&now, tv, >))
1524 timerclear(tv);
1525 else {
1526 tv->tv_sec -= now.tv_sec;
1527 if (now.tv_usec > tv->tv_usec) {
1528 tv->tv_sec--;
1529 tv->tv_usec += 1000000;
1531 tv->tv_usec -= now.tv_usec;
1535 #ifdef TESTING
1536 /* Useful for running under debugger. */
1537 void dump_connection(void)
1539 struct connection *i;
1541 list_for_each_entry(i, &connections, list) {
1542 printf("Connection %p:\n", i);
1543 printf(" state = %s\n",
1544 i->state == OK ? "OK"
1545 : i->state == BLOCKED ? "BLOCKED"
1546 : i->state == BUSY ? "BUSY"
1547 : "INVALID");
1548 if (i->id)
1549 printf(" id = %i\n", i->id);
1550 if (i->blocked_by)
1551 printf(" blocked on = %s\n", i->blocked_by);
1552 if (!i->in->inhdr || i->in->used)
1553 printf(" got %i bytes of %s\n",
1554 i->in->used, i->in->inhdr ? "header" : "data");
1555 if (i->out)
1556 printf(" sending message %s (%s) out\n",
1557 sockmsg_string(i->out->hdr.msg.type),
1558 i->out->buffer);
1559 if (i->waiting_reply)
1560 printf(" ... and behind is queued %s (%s)\n",
1561 sockmsg_string(i->waiting_reply->hdr.msg.type),
1562 i->waiting_reply->buffer);
1563 #if 0
1564 if (i->transaction)
1565 dump_transaction(i);
1566 if (i->domain)
1567 dump_domain(i);
1568 #endif
1569 dump_watches(i);
1572 #endif
1574 static void setup_structure(void)
1576 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_READ };
1577 char *root, *dir, *permfile;
1579 /* Create root directory, with permissions. */
1580 if (mkdir(xs_daemon_store(), 0750) != 0) {
1581 if (errno != EEXIST)
1582 barf_perror("Could not create root %s",
1583 xs_daemon_store());
1584 return;
1586 root = talloc_strdup(talloc_autofree_context(), "/");
1587 if (!set_perms(NULL, root, &perms, 1))
1588 barf_perror("Could not create permissions in root");
1590 /* Create tool directory, with xenstored subdir. */
1591 dir = talloc_asprintf(root, "%s/%s", xs_daemon_store(), "tool");
1592 if (mkdir(dir, 0750) != 0)
1593 barf_perror("Making dir %s", dir);
1595 permfile = talloc_strdup(root, "/tool");
1596 if (!set_perms(NULL, permfile, &perms, 1))
1597 barf_perror("Could not create permissions on %s", permfile);
1599 dir = talloc_asprintf(root, "%s/%s", dir, "xenstored");
1600 if (mkdir(dir, 0750) != 0)
1601 barf_perror("Making dir %s", dir);
1603 permfile = talloc_strdup(root, "/tool/xenstored");
1604 if (!set_perms(NULL, permfile, &perms, 1))
1605 barf_perror("Could not create permissions on %s", permfile);
1606 talloc_free(root);
1607 if (mkdir(xs_daemon_transactions(), 0750) != 0)
1608 barf_perror("Could not create transaction dir %s",
1609 xs_daemon_transactions());
1612 static void write_pidfile(const char *pidfile)
1614 char buf[100];
1615 int len;
1616 int fd;
1618 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1619 if (fd == -1)
1620 barf_perror("Opening pid file %s", pidfile);
1622 /* We exit silently if daemon already running. */
1623 if (lockf(fd, F_TLOCK, 0) == -1)
1624 exit(0);
1626 len = sprintf(buf, "%d\n", getpid());
1627 write(fd, buf, len);
1630 /* Stevens. */
1631 static void daemonize(void)
1633 pid_t pid;
1635 /* Separate from our parent via fork, so init inherits us. */
1636 if ((pid = fork()) < 0)
1637 barf_perror("Failed to fork daemon");
1638 if (pid != 0)
1639 exit(0);
1641 /* Session leader so ^C doesn't whack us. */
1642 setsid();
1643 #ifndef TESTING /* Relative paths for socket names */
1644 /* Move off any mount points we might be in. */
1645 chdir("/");
1646 #endif
1647 /* Discard our parent's old-fashioned umask prejudices. */
1648 umask(0);
1652 static struct option options[] = {
1653 { "pid-file", 1, NULL, 'F' },
1654 { "no-fork", 0, NULL, 'N' },
1655 { "output-pid", 0, NULL, 'P' },
1656 { "trace-file", 1, NULL, 'T' },
1657 { "verbose", 0, NULL, 'V' },
1658 { NULL, 0, NULL, 0 } };
1660 int main(int argc, char *argv[])
1662 int opt, *sock, *ro_sock, event_fd, max;
1663 struct sockaddr_un addr;
1664 fd_set inset, outset;
1665 bool dofork = true;
1666 bool outputpid = false;
1667 const char *pidfile = NULL;
1669 while ((opt = getopt_long(argc, argv, "F:NPT:V", options,
1670 NULL)) != -1) {
1671 switch (opt) {
1672 case 'F':
1673 pidfile = optarg;
1674 break;
1675 case 'N':
1676 dofork = false;
1677 break;
1678 case 'P':
1679 outputpid = true;
1680 break;
1681 case 'T':
1682 tracefd = open(optarg, O_WRONLY|O_CREAT|O_APPEND, 0600);
1683 if (tracefd < 0)
1684 barf_perror("Could not open tracefile %s",
1685 optarg);
1686 write(tracefd, "\n***\n", strlen("\n***\n"));
1687 break;
1688 case 'V':
1689 verbose = true;
1690 break;
1693 if (optind != argc)
1694 barf("%s: No arguments desired", argv[0]);
1696 if (dofork) {
1697 openlog("xenstored", 0, LOG_DAEMON);
1698 daemonize();
1700 if (pidfile)
1701 write_pidfile(pidfile);
1703 talloc_enable_leak_report_full();
1705 /* Create sockets for them to listen to. */
1706 sock = talloc(talloc_autofree_context(), int);
1707 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1708 if (*sock < 0)
1709 barf_perror("Could not create socket");
1710 ro_sock = talloc(talloc_autofree_context(), int);
1711 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1712 if (*ro_sock < 0)
1713 barf_perror("Could not create socket");
1714 talloc_set_destructor(sock, destroy_fd);
1715 talloc_set_destructor(ro_sock, destroy_fd);
1717 /* Don't kill us with SIGPIPE. */
1718 signal(SIGPIPE, SIG_IGN);
1720 /* FIXME: Be more sophisticated, don't mug running daemon. */
1721 unlink(xs_daemon_socket());
1722 unlink(xs_daemon_socket_ro());
1724 addr.sun_family = AF_UNIX;
1725 strcpy(addr.sun_path, xs_daemon_socket());
1726 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1727 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1728 strcpy(addr.sun_path, xs_daemon_socket_ro());
1729 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1730 barf_perror("Could not bind socket to %s",
1731 xs_daemon_socket_ro());
1732 if (chmod(xs_daemon_socket(), 0600) != 0
1733 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1734 barf_perror("Could not chmod sockets");
1736 if (listen(*sock, 1) != 0
1737 || listen(*ro_sock, 1) != 0)
1738 barf_perror("Could not listen on sockets");
1740 /* If we're the first, create .perms file for root. */
1741 setup_structure();
1743 /* Listen to hypervisor. */
1744 event_fd = domain_init();
1746 /* Restore existing connections. */
1747 restore_existing_connections();
1749 if (outputpid) {
1750 printf("%i\n", getpid());
1751 fflush(stdout);
1754 /* close stdin/stdout now we're ready to accept connections */
1755 if (dofork) {
1756 close(STDIN_FILENO);
1757 close(STDOUT_FILENO);
1758 close(STDERR_FILENO);
1761 #ifdef TESTING
1762 signal(SIGUSR1, stop_failtest);
1763 #endif
1765 /* Get ready to listen to the tools. */
1766 max = initialize_set(&inset, &outset, *sock, *ro_sock, event_fd);
1768 /* Main loop. */
1769 /* FIXME: Rewrite so noone can starve. */
1770 for (;;) {
1771 struct connection *i;
1772 struct timeval *tvp = NULL, tv;
1774 timerclear(&tv);
1775 shortest_transaction_timeout(&tv);
1776 shortest_watch_ack_timeout(&tv);
1777 if (timerisset(&tv)) {
1778 time_relative_to_now(&tv);
1779 tvp = &tv;
1782 if (select(max+1, &inset, &outset, NULL, tvp) < 0) {
1783 if (errno == EINTR)
1784 continue;
1785 barf_perror("Select failed");
1788 if (FD_ISSET(*sock, &inset))
1789 accept_connection(*sock, true);
1791 if (FD_ISSET(*ro_sock, &inset))
1792 accept_connection(*ro_sock, false);
1794 if (FD_ISSET(event_fd, &inset))
1795 handle_event(event_fd);
1797 list_for_each_entry(i, &connections, list) {
1798 if (i->domain)
1799 continue;
1801 /* Operations can delete themselves or others
1802 * (xs_release): list is not safe after input,
1803 * so break. */
1804 if (FD_ISSET(i->fd, &inset)) {
1805 handle_input(i);
1806 break;
1808 if (FD_ISSET(i->fd, &outset)) {
1809 handle_output(i);
1810 break;
1814 /* Handle all possible I/O for domain connections. */
1815 more:
1816 list_for_each_entry(i, &connections, list) {
1817 if (!i->domain)
1818 continue;
1820 if (domain_can_read(i)) {
1821 handle_input(i);
1822 goto more;
1825 if (domain_can_write(i)) {
1826 handle_output(i);
1827 goto more;
1831 if (tvp) {
1832 check_transaction_timeout();
1833 check_watch_ack_timeout();
1836 /* If transactions ended, we might be able to do more work. */
1837 unblock_connections();
1839 max = initialize_set(&inset, &outset, *sock, *ro_sock,
1840 event_fd);