debuggers.hg

view tools/xenstore/xenstored_core.c @ 16986:a6c037d8cba3

Add DTrace support to xenstored

Add USDT probes for significant xenstore operations to allow dynamic
tracing.

Signed-off-by: John Levon <john.levon@sun.com>
author Keir Fraser <keir.fraser@citrix.com>
date Thu Jan 31 09:13:27 2008 +0000 (2008-01-31)
parents 26fc953a89bb
children 21d9575c669e
line source
1 /*
2 Simple prototype Xen Store Daemon providing simple tree-like database.
3 Copyright (C) 2005 Rusty Russell IBM Corporation
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2 of the License, or
8 (at your option) any later version.
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 */
20 #include <sys/types.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/select.h>
24 #include <sys/un.h>
25 #include <sys/time.h>
26 #include <time.h>
27 #include <unistd.h>
28 #include <fcntl.h>
29 #include <stdbool.h>
30 #include <stdio.h>
31 #include <stdarg.h>
32 #include <stdlib.h>
33 #include <syslog.h>
34 #include <string.h>
35 #include <errno.h>
36 #include <dirent.h>
37 #include <getopt.h>
38 #include <signal.h>
39 #include <assert.h>
40 #include <setjmp.h>
42 #include "utils.h"
43 #include "list.h"
44 #include "talloc.h"
45 #include "xs_lib.h"
46 #include "xenstored_core.h"
47 #include "xenstored_watch.h"
48 #include "xenstored_transaction.h"
49 #include "xenstored_domain.h"
50 #include "xenctrl.h"
51 #include "tdb.h"
53 #include "hashtable.h"
55 extern int xce_handle; /* in xenstored_domain.c */
57 static bool verbose = false;
58 LIST_HEAD(connections);
59 static int tracefd = -1;
60 static bool recovery = true;
61 static bool remove_local = true;
62 static int reopen_log_pipe[2];
63 static char *tracefile = NULL;
64 static TDB_CONTEXT *tdb_ctx;
66 static void corrupt(struct connection *conn, const char *fmt, ...);
67 static void check_store(void);
69 #define log(...) \
70 do { \
71 char *s = talloc_asprintf(NULL, __VA_ARGS__); \
72 trace("%s\n", s); \
73 syslog(LOG_ERR, "%s", s); \
74 talloc_free(s); \
75 } while (0)
78 int quota_nb_entry_per_domain = 1000;
79 int quota_nb_watch_per_domain = 128;
80 int quota_max_entry_size = 2048; /* 2K */
81 int quota_max_transaction = 10;
83 TDB_CONTEXT *tdb_context(struct connection *conn)
84 {
85 /* conn = NULL used in manual_node at setup. */
86 if (!conn || !conn->transaction)
87 return tdb_ctx;
88 return tdb_transaction_context(conn->transaction);
89 }
91 bool replace_tdb(const char *newname, TDB_CONTEXT *newtdb)
92 {
93 if (rename(newname, xs_daemon_tdb()) != 0)
94 return false;
95 tdb_close(tdb_ctx);
96 tdb_ctx = talloc_steal(talloc_autofree_context(), newtdb);
97 return true;
98 }
100 static char *sockmsg_string(enum xsd_sockmsg_type type)
101 {
102 switch (type) {
103 case XS_DEBUG: return "DEBUG";
104 case XS_DIRECTORY: return "DIRECTORY";
105 case XS_READ: return "READ";
106 case XS_GET_PERMS: return "GET_PERMS";
107 case XS_WATCH: return "WATCH";
108 case XS_UNWATCH: return "UNWATCH";
109 case XS_TRANSACTION_START: return "TRANSACTION_START";
110 case XS_TRANSACTION_END: return "TRANSACTION_END";
111 case XS_INTRODUCE: return "INTRODUCE";
112 case XS_RELEASE: return "RELEASE";
113 case XS_GET_DOMAIN_PATH: return "GET_DOMAIN_PATH";
114 case XS_WRITE: return "WRITE";
115 case XS_MKDIR: return "MKDIR";
116 case XS_RM: return "RM";
117 case XS_SET_PERMS: return "SET_PERMS";
118 case XS_WATCH_EVENT: return "WATCH_EVENT";
119 case XS_ERROR: return "ERROR";
120 case XS_IS_DOMAIN_INTRODUCED: return "XS_IS_DOMAIN_INTRODUCED";
121 case XS_RESUME: return "RESUME";
122 case XS_SET_TARGET: return "SET_TARGET";
123 default:
124 return "**UNKNOWN**";
125 }
126 }
128 void trace(const char *fmt, ...)
129 {
130 va_list arglist;
131 char *str;
132 char sbuf[1024];
133 int ret, dummy;
135 if (tracefd < 0)
136 return;
138 /* try to use a static buffer */
139 va_start(arglist, fmt);
140 ret = vsnprintf(sbuf, 1024, fmt, arglist);
141 va_end(arglist);
143 if (ret <= 1024) {
144 dummy = write(tracefd, sbuf, ret);
145 return;
146 }
148 /* fail back to dynamic allocation */
149 va_start(arglist, fmt);
150 str = talloc_vasprintf(NULL, fmt, arglist);
151 va_end(arglist);
152 dummy = write(tracefd, str, strlen(str));
153 talloc_free(str);
154 }
156 static void trace_io(const struct connection *conn,
157 const struct buffered_data *data,
158 int out)
159 {
160 unsigned int i;
161 time_t now;
162 struct tm *tm;
164 #ifdef HAVE_DTRACE
165 dtrace_io(conn, data, out);
166 #endif
168 if (tracefd < 0)
169 return;
171 now = time(NULL);
172 tm = localtime(&now);
174 trace("%s %p %04d%02d%02d %02d:%02d:%02d %s (",
175 out ? "OUT" : "IN", conn,
176 tm->tm_year + 1900, tm->tm_mon + 1,
177 tm->tm_mday, tm->tm_hour, tm->tm_min, tm->tm_sec,
178 sockmsg_string(data->hdr.msg.type));
180 for (i = 0; i < data->hdr.msg.len; i++)
181 trace("%c", (data->buffer[i] != '\0') ? data->buffer[i] : ' ');
182 trace(")\n");
183 }
185 void trace_create(const void *data, const char *type)
186 {
187 trace("CREATE %s %p\n", type, data);
188 }
190 void trace_destroy(const void *data, const char *type)
191 {
192 trace("DESTROY %s %p\n", type, data);
193 }
195 /**
196 * Signal handler for SIGHUP, which requests that the trace log is reopened
197 * (in the main loop). A single byte is written to reopen_log_pipe, to awaken
198 * the select() in the main loop.
199 */
200 static void trigger_reopen_log(int signal __attribute__((unused)))
201 {
202 char c = 'A';
203 int dummy;
204 dummy = write(reopen_log_pipe[1], &c, 1);
205 }
208 static void reopen_log(void)
209 {
210 if (tracefile) {
211 if (tracefd > 0)
212 close(tracefd);
214 tracefd = open(tracefile, O_WRONLY|O_CREAT|O_APPEND, 0600);
216 if (tracefd < 0)
217 perror("Could not open tracefile");
218 else
219 trace("\n***\n");
220 }
221 }
224 static bool write_messages(struct connection *conn)
225 {
226 int ret;
227 struct buffered_data *out;
229 out = list_top(&conn->out_list, struct buffered_data, list);
230 if (out == NULL)
231 return true;
233 if (out->inhdr) {
234 if (verbose)
235 xprintf("Writing msg %s (%.*s) out to %p\n",
236 sockmsg_string(out->hdr.msg.type),
237 out->hdr.msg.len,
238 out->buffer, conn);
239 ret = conn->write(conn, out->hdr.raw + out->used,
240 sizeof(out->hdr) - out->used);
241 if (ret < 0)
242 return false;
244 out->used += ret;
245 if (out->used < sizeof(out->hdr))
246 return true;
248 out->inhdr = false;
249 out->used = 0;
251 /* Second write might block if non-zero. */
252 if (out->hdr.msg.len && !conn->domain)
253 return true;
254 }
256 ret = conn->write(conn, out->buffer + out->used,
257 out->hdr.msg.len - out->used);
258 if (ret < 0)
259 return false;
261 out->used += ret;
262 if (out->used != out->hdr.msg.len)
263 return true;
265 trace_io(conn, out, 1);
267 list_del(&out->list);
268 talloc_free(out);
270 return true;
271 }
273 static int destroy_conn(void *_conn)
274 {
275 struct connection *conn = _conn;
277 /* Flush outgoing if possible, but don't block. */
278 if (!conn->domain) {
279 fd_set set;
280 struct timeval none;
282 FD_ZERO(&set);
283 FD_SET(conn->fd, &set);
284 none.tv_sec = none.tv_usec = 0;
286 while (!list_empty(&conn->out_list)
287 && select(conn->fd+1, NULL, &set, NULL, &none) == 1)
288 if (!write_messages(conn))
289 break;
290 close(conn->fd);
291 }
292 if (conn->target)
293 talloc_unlink(conn, conn->target);
294 list_del(&conn->list);
295 trace_destroy(conn, "connection");
296 return 0;
297 }
300 static void set_fd(int fd, fd_set *set, int *max)
301 {
302 if (fd < 0)
303 return;
304 FD_SET(fd, set);
305 if (fd > *max)
306 *max = fd;
307 }
310 static int initialize_set(fd_set *inset, fd_set *outset, int sock, int ro_sock,
311 struct timeval **ptimeout)
312 {
313 static struct timeval zero_timeout = { 0 };
314 struct connection *conn;
315 int max = -1;
317 *ptimeout = NULL;
319 FD_ZERO(inset);
320 FD_ZERO(outset);
322 set_fd(sock, inset, &max);
323 set_fd(ro_sock, inset, &max);
324 set_fd(reopen_log_pipe[0], inset, &max);
326 if (xce_handle != -1)
327 set_fd(xc_evtchn_fd(xce_handle), inset, &max);
329 list_for_each_entry(conn, &connections, list) {
330 if (conn->domain) {
331 if (domain_can_read(conn) ||
332 (domain_can_write(conn) &&
333 !list_empty(&conn->out_list)))
334 *ptimeout = &zero_timeout;
335 } else {
336 set_fd(conn->fd, inset, &max);
337 if (!list_empty(&conn->out_list))
338 FD_SET(conn->fd, outset);
339 }
340 }
342 return max;
343 }
345 static int destroy_fd(void *_fd)
346 {
347 int *fd = _fd;
348 close(*fd);
349 return 0;
350 }
352 /* Return a pointer to an fd, self-closing and attached to this pathname. */
353 int *talloc_open(const char *pathname, int flags, int mode)
354 {
355 int *fd;
357 fd = talloc(pathname, int);
358 *fd = open(pathname, flags, mode);
359 if (*fd < 0) {
360 int saved_errno = errno;
361 talloc_free(fd);
362 errno = saved_errno;
363 return NULL;
364 }
365 talloc_set_destructor(fd, destroy_fd);
366 return fd;
367 }
369 /* Is child a subnode of parent, or equal? */
370 bool is_child(const char *child, const char *parent)
371 {
372 unsigned int len = strlen(parent);
374 /* / should really be "" for this algorithm to work, but that's a
375 * usability nightmare. */
376 if (streq(parent, "/"))
377 return true;
379 if (strncmp(child, parent, len) != 0)
380 return false;
382 return child[len] == '/' || child[len] == '\0';
383 }
385 /* If it fails, returns NULL and sets errno. */
386 static struct node *read_node(struct connection *conn, const char *name)
387 {
388 TDB_DATA key, data;
389 uint32_t *p;
390 struct node *node;
391 TDB_CONTEXT * context = tdb_context(conn);
393 key.dptr = (void *)name;
394 key.dsize = strlen(name);
395 data = tdb_fetch(context, key);
397 if (data.dptr == NULL) {
398 if (tdb_error(context) == TDB_ERR_NOEXIST)
399 errno = ENOENT;
400 else {
401 log("TDB error on read: %s", tdb_errorstr(context));
402 errno = EIO;
403 }
404 return NULL;
405 }
407 node = talloc(name, struct node);
408 node->name = talloc_strdup(node, name);
409 node->parent = NULL;
410 node->tdb = tdb_context(conn);
411 talloc_steal(node, data.dptr);
413 /* Datalen, childlen, number of permissions */
414 p = (uint32_t *)data.dptr;
415 node->num_perms = p[0];
416 node->datalen = p[1];
417 node->childlen = p[2];
419 /* Permissions are struct xs_permissions. */
420 node->perms = (void *)&p[3];
421 /* Data is binary blob (usually ascii, no nul). */
422 node->data = node->perms + node->num_perms;
423 /* Children is strings, nul separated. */
424 node->children = node->data + node->datalen;
426 return node;
427 }
429 static bool write_node(struct connection *conn, const struct node *node)
430 {
431 /*
432 * conn will be null when this is called from manual_node.
433 * tdb_context copes with this.
434 */
436 TDB_DATA key, data;
437 void *p;
439 key.dptr = (void *)node->name;
440 key.dsize = strlen(node->name);
442 data.dsize = 3*sizeof(uint32_t)
443 + node->num_perms*sizeof(node->perms[0])
444 + node->datalen + node->childlen;
446 if (domain_is_unprivileged(conn) && data.dsize >= quota_max_entry_size)
447 goto error;
449 data.dptr = talloc_size(node, data.dsize);
450 ((uint32_t *)data.dptr)[0] = node->num_perms;
451 ((uint32_t *)data.dptr)[1] = node->datalen;
452 ((uint32_t *)data.dptr)[2] = node->childlen;
453 p = data.dptr + 3 * sizeof(uint32_t);
455 memcpy(p, node->perms, node->num_perms*sizeof(node->perms[0]));
456 p += node->num_perms*sizeof(node->perms[0]);
457 memcpy(p, node->data, node->datalen);
458 p += node->datalen;
459 memcpy(p, node->children, node->childlen);
461 /* TDB should set errno, but doesn't even set ecode AFAICT. */
462 if (tdb_store(tdb_context(conn), key, data, TDB_REPLACE) != 0) {
463 corrupt(conn, "Write of %s failed", key.dptr);
464 goto error;
465 }
466 return true;
467 error:
468 errno = ENOSPC;
469 return false;
470 }
472 static enum xs_perm_type perm_for_conn(struct connection *conn,
473 struct xs_permissions *perms,
474 unsigned int num)
475 {
476 unsigned int i;
477 enum xs_perm_type mask = XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER;
479 if (!conn->can_write)
480 mask &= ~XS_PERM_WRITE;
482 /* Owners and tools get it all... */
483 if (!conn->id || perms[0].id == conn->id
484 || (conn->target && perms[0].id == conn->target->id))
485 return (XS_PERM_READ|XS_PERM_WRITE|XS_PERM_OWNER) & mask;
487 for (i = 1; i < num; i++)
488 if (perms[i].id == conn->id
489 || (conn->target && perms[i].id == conn->target->id))
490 return perms[i].perms & mask;
492 return perms[0].perms & mask;
493 }
495 static char *get_parent(const char *node)
496 {
497 char *slash = strrchr(node + 1, '/');
498 if (!slash)
499 return talloc_strdup(node, "/");
500 return talloc_asprintf(node, "%.*s", (int)(slash - node), node);
501 }
503 /* What do parents say? */
504 static enum xs_perm_type ask_parents(struct connection *conn, const char *name)
505 {
506 struct node *node;
508 do {
509 name = get_parent(name);
510 node = read_node(conn, name);
511 if (node)
512 break;
513 } while (!streq(name, "/"));
515 /* No permission at root? We're in trouble. */
516 if (!node)
517 corrupt(conn, "No permissions file at root");
519 return perm_for_conn(conn, node->perms, node->num_perms);
520 }
522 /* We have a weird permissions system. You can allow someone into a
523 * specific node without allowing it in the parents. If it's going to
524 * fail, however, we don't want the errno to indicate any information
525 * about the node. */
526 static int errno_from_parents(struct connection *conn, const char *node,
527 int errnum, enum xs_perm_type perm)
528 {
529 /* We always tell them about memory failures. */
530 if (errnum == ENOMEM)
531 return errnum;
533 if (ask_parents(conn, node) & perm)
534 return errnum;
535 return EACCES;
536 }
538 /* If it fails, returns NULL and sets errno. */
539 struct node *get_node(struct connection *conn,
540 const char *name,
541 enum xs_perm_type perm)
542 {
543 struct node *node;
545 if (!name || !is_valid_nodename(name)) {
546 errno = EINVAL;
547 return NULL;
548 }
549 node = read_node(conn, name);
550 /* If we don't have permission, we don't have node. */
551 if (node) {
552 if ((perm_for_conn(conn, node->perms, node->num_perms) & perm)
553 != perm) {
554 errno = EACCES;
555 node = NULL;
556 }
557 }
558 /* Clean up errno if they weren't supposed to know. */
559 if (!node)
560 errno = errno_from_parents(conn, name, errno, perm);
561 return node;
562 }
564 static struct buffered_data *new_buffer(void *ctx)
565 {
566 struct buffered_data *data;
568 data = talloc_zero(ctx, struct buffered_data);
569 if (data == NULL)
570 return NULL;
572 data->inhdr = true;
573 return data;
574 }
576 /* Return length of string (including nul) at this offset.
577 * If there is no nul, returns 0 for failure.
578 */
579 static unsigned int get_string(const struct buffered_data *data,
580 unsigned int offset)
581 {
582 const char *nul;
584 if (offset >= data->used)
585 return 0;
587 nul = memchr(data->buffer + offset, 0, data->used - offset);
588 if (!nul)
589 return 0;
591 return nul - (data->buffer + offset) + 1;
592 }
594 /* Break input into vectors, return the number, fill in up to num of them.
595 * Always returns the actual number of nuls in the input. Stores the
596 * positions of the starts of the nul-terminated strings in vec.
597 * Callers who use this and then rely only on vec[] will
598 * ignore any data after the final nul.
599 */
600 unsigned int get_strings(struct buffered_data *data,
601 char *vec[], unsigned int num)
602 {
603 unsigned int off, i, len;
605 off = i = 0;
606 while ((len = get_string(data, off)) != 0) {
607 if (i < num)
608 vec[i] = data->buffer + off;
609 i++;
610 off += len;
611 }
612 return i;
613 }
615 void send_reply(struct connection *conn, enum xsd_sockmsg_type type,
616 const void *data, unsigned int len)
617 {
618 struct buffered_data *bdata;
620 /* Message is a child of the connection context for auto-cleanup. */
621 bdata = new_buffer(conn);
622 bdata->buffer = talloc_array(bdata, char, len);
624 /* Echo request header in reply unless this is an async watch event. */
625 if (type != XS_WATCH_EVENT) {
626 memcpy(&bdata->hdr.msg, &conn->in->hdr.msg,
627 sizeof(struct xsd_sockmsg));
628 } else {
629 memset(&bdata->hdr.msg, 0, sizeof(struct xsd_sockmsg));
630 }
632 /* Update relevant header fields and fill in the message body. */
633 bdata->hdr.msg.type = type;
634 bdata->hdr.msg.len = len;
635 memcpy(bdata->buffer, data, len);
637 /* Queue for later transmission. */
638 list_add_tail(&bdata->list, &conn->out_list);
639 }
641 /* Some routines (write, mkdir, etc) just need a non-error return */
642 void send_ack(struct connection *conn, enum xsd_sockmsg_type type)
643 {
644 send_reply(conn, type, "OK", sizeof("OK"));
645 }
647 void send_error(struct connection *conn, int error)
648 {
649 unsigned int i;
651 for (i = 0; error != xsd_errors[i].errnum; i++) {
652 if (i == ARRAY_SIZE(xsd_errors) - 1) {
653 eprintf("xenstored: error %i untranslatable", error);
654 i = 0; /* EINVAL */
655 break;
656 }
657 }
658 send_reply(conn, XS_ERROR, xsd_errors[i].errstring,
659 strlen(xsd_errors[i].errstring) + 1);
660 }
662 static bool valid_chars(const char *node)
663 {
664 /* Nodes can have lots of crap. */
665 return (strspn(node,
666 "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
667 "abcdefghijklmnopqrstuvwxyz"
668 "0123456789-/_@") == strlen(node));
669 }
671 bool is_valid_nodename(const char *node)
672 {
673 /* Must start in /. */
674 if (!strstarts(node, "/"))
675 return false;
677 /* Cannot end in / (unless it's just "/"). */
678 if (strends(node, "/") && !streq(node, "/"))
679 return false;
681 /* No double //. */
682 if (strstr(node, "//"))
683 return false;
685 if (strlen(node) > XENSTORE_ABS_PATH_MAX)
686 return false;
688 return valid_chars(node);
689 }
691 /* We expect one arg in the input: return NULL otherwise.
692 * The payload must contain exactly one nul, at the end.
693 */
694 static const char *onearg(struct buffered_data *in)
695 {
696 if (!in->used || get_string(in, 0) != in->used)
697 return NULL;
698 return in->buffer;
699 }
701 static char *perms_to_strings(const void *ctx,
702 struct xs_permissions *perms, unsigned int num,
703 unsigned int *len)
704 {
705 unsigned int i;
706 char *strings = NULL;
707 char buffer[MAX_STRLEN(unsigned int) + 1];
709 for (*len = 0, i = 0; i < num; i++) {
710 if (!xs_perm_to_string(&perms[i], buffer, sizeof(buffer)))
711 return NULL;
713 strings = talloc_realloc(ctx, strings, char,
714 *len + strlen(buffer) + 1);
715 strcpy(strings + *len, buffer);
716 *len += strlen(buffer) + 1;
717 }
718 return strings;
719 }
721 char *canonicalize(struct connection *conn, const char *node)
722 {
723 const char *prefix;
725 if (!node || strstarts(node, "/"))
726 return (char *)node;
727 prefix = get_implicit_path(conn);
728 if (prefix)
729 return talloc_asprintf(node, "%s/%s", prefix, node);
730 return (char *)node;
731 }
733 bool check_event_node(const char *node)
734 {
735 if (!node || !strstarts(node, "@")) {
736 errno = EINVAL;
737 return false;
738 }
739 return true;
740 }
742 static void send_directory(struct connection *conn, const char *name)
743 {
744 struct node *node;
746 name = canonicalize(conn, name);
747 node = get_node(conn, name, XS_PERM_READ);
748 if (!node) {
749 send_error(conn, errno);
750 return;
751 }
753 send_reply(conn, XS_DIRECTORY, node->children, node->childlen);
754 }
756 static void do_read(struct connection *conn, const char *name)
757 {
758 struct node *node;
760 name = canonicalize(conn, name);
761 node = get_node(conn, name, XS_PERM_READ);
762 if (!node) {
763 send_error(conn, errno);
764 return;
765 }
767 send_reply(conn, XS_READ, node->data, node->datalen);
768 }
770 static void delete_node_single(struct connection *conn, struct node *node)
771 {
772 TDB_DATA key;
774 key.dptr = (void *)node->name;
775 key.dsize = strlen(node->name);
777 if (tdb_delete(tdb_context(conn), key) != 0) {
778 corrupt(conn, "Could not delete '%s'", node->name);
779 return;
780 }
781 domain_entry_dec(conn, node);
782 }
784 /* Must not be / */
785 static char *basename(const char *name)
786 {
787 return strrchr(name, '/') + 1;
788 }
790 static struct node *construct_node(struct connection *conn, const char *name)
791 {
792 const char *base;
793 unsigned int baselen;
794 struct node *parent, *node;
795 char *children, *parentname = get_parent(name);
797 /* If parent doesn't exist, create it. */
798 parent = read_node(conn, parentname);
799 if (!parent)
800 parent = construct_node(conn, parentname);
801 if (!parent)
802 return NULL;
804 if (domain_entry(conn) >= quota_nb_entry_per_domain)
805 return NULL;
807 /* Add child to parent. */
808 base = basename(name);
809 baselen = strlen(base) + 1;
810 children = talloc_array(name, char, parent->childlen + baselen);
811 memcpy(children, parent->children, parent->childlen);
812 memcpy(children + parent->childlen, base, baselen);
813 parent->children = children;
814 parent->childlen += baselen;
816 /* Allocate node */
817 node = talloc(name, struct node);
818 node->tdb = tdb_context(conn);
819 node->name = talloc_strdup(node, name);
821 /* Inherit permissions, except domains own what they create */
822 node->num_perms = parent->num_perms;
823 node->perms = talloc_memdup(node, parent->perms,
824 node->num_perms * sizeof(node->perms[0]));
825 if (conn && conn->id)
826 node->perms[0].id = conn->id;
828 /* No children, no data */
829 node->children = node->data = NULL;
830 node->childlen = node->datalen = 0;
831 node->parent = parent;
832 domain_entry_inc(conn, node);
833 return node;
834 }
836 static int destroy_node(void *_node)
837 {
838 struct node *node = _node;
839 TDB_DATA key;
841 if (streq(node->name, "/"))
842 corrupt(NULL, "Destroying root node!");
844 key.dptr = (void *)node->name;
845 key.dsize = strlen(node->name);
847 tdb_delete(node->tdb, key);
848 return 0;
849 }
851 static struct node *create_node(struct connection *conn,
852 const char *name,
853 void *data, unsigned int datalen)
854 {
855 struct node *node, *i;
857 node = construct_node(conn, name);
858 if (!node)
859 return NULL;
861 node->data = data;
862 node->datalen = datalen;
864 /* We write out the nodes down, setting destructor in case
865 * something goes wrong. */
866 for (i = node; i; i = i->parent) {
867 if (!write_node(conn, i)) {
868 domain_entry_dec(conn, i);
869 return NULL;
870 }
871 talloc_set_destructor(i, destroy_node);
872 }
874 /* OK, now remove destructors so they stay around */
875 for (i = node; i; i = i->parent)
876 talloc_set_destructor(i, NULL);
877 return node;
878 }
880 /* path, data... */
881 static void do_write(struct connection *conn, struct buffered_data *in)
882 {
883 unsigned int offset, datalen;
884 struct node *node;
885 char *vec[1] = { NULL }; /* gcc4 + -W + -Werror fucks code. */
886 char *name;
888 /* Extra "strings" can be created by binary data. */
889 if (get_strings(in, vec, ARRAY_SIZE(vec)) < ARRAY_SIZE(vec)) {
890 send_error(conn, EINVAL);
891 return;
892 }
894 offset = strlen(vec[0]) + 1;
895 datalen = in->used - offset;
897 name = canonicalize(conn, vec[0]);
898 node = get_node(conn, name, XS_PERM_WRITE);
899 if (!node) {
900 /* No permissions, invalid input? */
901 if (errno != ENOENT) {
902 send_error(conn, errno);
903 return;
904 }
905 node = create_node(conn, name, in->buffer + offset, datalen);
906 if (!node) {
907 send_error(conn, errno);
908 return;
909 }
910 } else {
911 node->data = in->buffer + offset;
912 node->datalen = datalen;
913 if (!write_node(conn, node)){
914 send_error(conn, errno);
915 return;
916 }
917 }
919 add_change_node(conn->transaction, name, false);
920 fire_watches(conn, name, false);
921 send_ack(conn, XS_WRITE);
922 }
924 static void do_mkdir(struct connection *conn, const char *name)
925 {
926 struct node *node;
928 name = canonicalize(conn, name);
929 node = get_node(conn, name, XS_PERM_WRITE);
931 /* If it already exists, fine. */
932 if (!node) {
933 /* No permissions? */
934 if (errno != ENOENT) {
935 send_error(conn, errno);
936 return;
937 }
938 node = create_node(conn, name, NULL, 0);
939 if (!node) {
940 send_error(conn, errno);
941 return;
942 }
943 add_change_node(conn->transaction, name, false);
944 fire_watches(conn, name, false);
945 }
946 send_ack(conn, XS_MKDIR);
947 }
949 static void delete_node(struct connection *conn, struct node *node)
950 {
951 unsigned int i;
953 /* Delete self, then delete children. If we crash, then the worst
954 that can happen is the children will continue to take up space, but
955 will otherwise be unreachable. */
956 delete_node_single(conn, node);
958 /* Delete children, too. */
959 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
960 struct node *child;
962 child = read_node(conn,
963 talloc_asprintf(node, "%s/%s", node->name,
964 node->children + i));
965 if (child) {
966 delete_node(conn, child);
967 }
968 else {
969 trace("delete_node: No child '%s/%s' found!\n",
970 node->name, node->children + i);
971 /* Skip it, we've already deleted the parent. */
972 }
973 }
974 }
977 /* Delete memory using memmove. */
978 static void memdel(void *mem, unsigned off, unsigned len, unsigned total)
979 {
980 memmove(mem + off, mem + off + len, total - off - len);
981 }
984 static bool remove_child_entry(struct connection *conn, struct node *node,
985 size_t offset)
986 {
987 size_t childlen = strlen(node->children + offset);
988 memdel(node->children, offset, childlen + 1, node->childlen);
989 node->childlen -= childlen + 1;
990 return write_node(conn, node);
991 }
994 static bool delete_child(struct connection *conn,
995 struct node *node, const char *childname)
996 {
997 unsigned int i;
999 for (i = 0; i < node->childlen; i += strlen(node->children+i) + 1) {
1000 if (streq(node->children+i, childname)) {
1001 return remove_child_entry(conn, node, i);
1004 corrupt(conn, "Can't find child '%s' in %s", childname, node->name);
1005 return false;
1009 static int _rm(struct connection *conn, struct node *node, const char *name)
1011 /* Delete from parent first, then if we crash, the worst that can
1012 happen is the child will continue to take up space, but will
1013 otherwise be unreachable. */
1014 struct node *parent = read_node(conn, get_parent(name));
1015 if (!parent) {
1016 send_error(conn, EINVAL);
1017 return 0;
1020 if (!delete_child(conn, parent, basename(name))) {
1021 send_error(conn, EINVAL);
1022 return 0;
1025 delete_node(conn, node);
1026 return 1;
1030 static void internal_rm(const char *name)
1032 char *tname = talloc_strdup(NULL, name);
1033 struct node *node = read_node(NULL, tname);
1034 if (node)
1035 _rm(NULL, node, tname);
1036 talloc_free(node);
1037 talloc_free(tname);
1041 static void do_rm(struct connection *conn, const char *name)
1043 struct node *node;
1045 name = canonicalize(conn, name);
1046 node = get_node(conn, name, XS_PERM_WRITE);
1047 if (!node) {
1048 /* Didn't exist already? Fine, if parent exists. */
1049 if (errno == ENOENT) {
1050 node = read_node(conn, get_parent(name));
1051 if (node) {
1052 send_ack(conn, XS_RM);
1053 return;
1055 /* Restore errno, just in case. */
1056 errno = ENOENT;
1058 send_error(conn, errno);
1059 return;
1062 if (streq(name, "/")) {
1063 send_error(conn, EINVAL);
1064 return;
1067 if (_rm(conn, node, name)) {
1068 add_change_node(conn->transaction, name, true);
1069 fire_watches(conn, name, true);
1070 send_ack(conn, XS_RM);
1075 static void do_get_perms(struct connection *conn, const char *name)
1077 struct node *node;
1078 char *strings;
1079 unsigned int len;
1081 name = canonicalize(conn, name);
1082 node = get_node(conn, name, XS_PERM_READ);
1083 if (!node) {
1084 send_error(conn, errno);
1085 return;
1088 strings = perms_to_strings(node, node->perms, node->num_perms, &len);
1089 if (!strings)
1090 send_error(conn, errno);
1091 else
1092 send_reply(conn, XS_GET_PERMS, strings, len);
1095 static void do_set_perms(struct connection *conn, struct buffered_data *in)
1097 unsigned int num;
1098 struct xs_permissions *perms;
1099 char *name, *permstr;
1100 struct node *node;
1102 num = xs_count_strings(in->buffer, in->used);
1103 if (num < 2) {
1104 send_error(conn, EINVAL);
1105 return;
1108 /* First arg is node name. */
1109 name = canonicalize(conn, in->buffer);
1110 permstr = in->buffer + strlen(in->buffer) + 1;
1111 num--;
1113 /* We must own node to do this (tools can do this too). */
1114 node = get_node(conn, name, XS_PERM_WRITE|XS_PERM_OWNER);
1115 if (!node) {
1116 send_error(conn, errno);
1117 return;
1120 perms = talloc_array(node, struct xs_permissions, num);
1121 if (!xs_strings_to_perms(perms, num, permstr)) {
1122 send_error(conn, errno);
1123 return;
1126 /* Unprivileged domains may not change the owner. */
1127 if (domain_is_unprivileged(conn) &&
1128 perms[0].id != node->perms[0].id) {
1129 send_error(conn, EPERM);
1130 return;
1133 domain_entry_dec(conn, node);
1134 node->perms = perms;
1135 node->num_perms = num;
1136 domain_entry_inc(conn, node);
1138 if (!write_node(conn, node)) {
1139 send_error(conn, errno);
1140 return;
1143 add_change_node(conn->transaction, name, false);
1144 fire_watches(conn, name, false);
1145 send_ack(conn, XS_SET_PERMS);
1148 static void do_debug(struct connection *conn, struct buffered_data *in)
1150 int num;
1152 if (conn->id != 0) {
1153 send_error(conn, EACCES);
1154 return;
1157 num = xs_count_strings(in->buffer, in->used);
1159 if (streq(in->buffer, "print")) {
1160 if (num < 2) {
1161 send_error(conn, EINVAL);
1162 return;
1164 xprintf("debug: %s", in->buffer + get_string(in, 0));
1167 if (streq(in->buffer, "check"))
1168 check_store();
1170 send_ack(conn, XS_DEBUG);
1173 /* Process "in" for conn: "in" will vanish after this conversation, so
1174 * we can talloc off it for temporary variables. May free "conn".
1175 */
1176 static void process_message(struct connection *conn, struct buffered_data *in)
1178 struct transaction *trans;
1180 trans = transaction_lookup(conn, in->hdr.msg.tx_id);
1181 if (IS_ERR(trans)) {
1182 send_error(conn, -PTR_ERR(trans));
1183 return;
1186 assert(conn->transaction == NULL);
1187 conn->transaction = trans;
1189 switch (in->hdr.msg.type) {
1190 case XS_DIRECTORY:
1191 send_directory(conn, onearg(in));
1192 break;
1194 case XS_READ:
1195 do_read(conn, onearg(in));
1196 break;
1198 case XS_WRITE:
1199 do_write(conn, in);
1200 break;
1202 case XS_MKDIR:
1203 do_mkdir(conn, onearg(in));
1204 break;
1206 case XS_RM:
1207 do_rm(conn, onearg(in));
1208 break;
1210 case XS_GET_PERMS:
1211 do_get_perms(conn, onearg(in));
1212 break;
1214 case XS_SET_PERMS:
1215 do_set_perms(conn, in);
1216 break;
1218 case XS_DEBUG:
1219 do_debug(conn, in);
1220 break;
1222 case XS_WATCH:
1223 do_watch(conn, in);
1224 break;
1226 case XS_UNWATCH:
1227 do_unwatch(conn, in);
1228 break;
1230 case XS_TRANSACTION_START:
1231 do_transaction_start(conn, in);
1232 break;
1234 case XS_TRANSACTION_END:
1235 do_transaction_end(conn, onearg(in));
1236 break;
1238 case XS_INTRODUCE:
1239 do_introduce(conn, in);
1240 break;
1242 case XS_IS_DOMAIN_INTRODUCED:
1243 do_is_domain_introduced(conn, onearg(in));
1244 break;
1246 case XS_RELEASE:
1247 do_release(conn, onearg(in));
1248 break;
1250 case XS_GET_DOMAIN_PATH:
1251 do_get_domain_path(conn, onearg(in));
1252 break;
1254 case XS_RESUME:
1255 do_resume(conn, onearg(in));
1256 break;
1258 case XS_SET_TARGET:
1259 do_set_target(conn, in);
1260 break;
1262 default:
1263 eprintf("Client unknown operation %i", in->hdr.msg.type);
1264 send_error(conn, ENOSYS);
1265 break;
1268 conn->transaction = NULL;
1271 static void consider_message(struct connection *conn)
1273 if (verbose)
1274 xprintf("Got message %s len %i from %p\n",
1275 sockmsg_string(conn->in->hdr.msg.type),
1276 conn->in->hdr.msg.len, conn);
1278 process_message(conn, conn->in);
1280 talloc_free(conn->in);
1281 conn->in = new_buffer(conn);
1284 /* Errors in reading or allocating here mean we get out of sync, so we
1285 * drop the whole client connection. */
1286 static void handle_input(struct connection *conn)
1288 int bytes;
1289 struct buffered_data *in = conn->in;
1291 /* Not finished header yet? */
1292 if (in->inhdr) {
1293 bytes = conn->read(conn, in->hdr.raw + in->used,
1294 sizeof(in->hdr) - in->used);
1295 if (bytes < 0)
1296 goto bad_client;
1297 in->used += bytes;
1298 if (in->used != sizeof(in->hdr))
1299 return;
1301 if (in->hdr.msg.len > XENSTORE_PAYLOAD_MAX) {
1302 syslog(LOG_ERR, "Client tried to feed us %i",
1303 in->hdr.msg.len);
1304 goto bad_client;
1307 in->buffer = talloc_array(in, char, in->hdr.msg.len);
1308 if (!in->buffer)
1309 goto bad_client;
1310 in->used = 0;
1311 in->inhdr = false;
1312 return;
1315 bytes = conn->read(conn, in->buffer + in->used,
1316 in->hdr.msg.len - in->used);
1317 if (bytes < 0)
1318 goto bad_client;
1320 in->used += bytes;
1321 if (in->used != in->hdr.msg.len)
1322 return;
1324 trace_io(conn, in, 0);
1325 consider_message(conn);
1326 return;
1328 bad_client:
1329 /* Kill it. */
1330 talloc_free(conn);
1333 static void handle_output(struct connection *conn)
1335 if (!write_messages(conn))
1336 talloc_free(conn);
1339 struct connection *new_connection(connwritefn_t *write, connreadfn_t *read)
1341 struct connection *new;
1343 new = talloc_zero(talloc_autofree_context(), struct connection);
1344 if (!new)
1345 return NULL;
1347 new->fd = -1;
1348 new->write = write;
1349 new->read = read;
1350 new->can_write = true;
1351 new->transaction_started = 0;
1352 INIT_LIST_HEAD(&new->out_list);
1353 INIT_LIST_HEAD(&new->watches);
1354 INIT_LIST_HEAD(&new->transaction_list);
1356 new->in = new_buffer(new);
1357 if (new->in == NULL) {
1358 talloc_free(new);
1359 return NULL;
1362 list_add_tail(&new->list, &connections);
1363 talloc_set_destructor(new, destroy_conn);
1364 trace_create(new, "connection");
1365 return new;
1368 static int writefd(struct connection *conn, const void *data, unsigned int len)
1370 int rc;
1372 while ((rc = write(conn->fd, data, len)) < 0) {
1373 if (errno == EAGAIN) {
1374 rc = 0;
1375 break;
1377 if (errno != EINTR)
1378 break;
1381 return rc;
1384 static int readfd(struct connection *conn, void *data, unsigned int len)
1386 int rc;
1388 while ((rc = read(conn->fd, data, len)) < 0) {
1389 if (errno == EAGAIN) {
1390 rc = 0;
1391 break;
1393 if (errno != EINTR)
1394 break;
1397 /* Reading zero length means we're done with this connection. */
1398 if ((rc == 0) && (len != 0)) {
1399 errno = EBADF;
1400 rc = -1;
1403 return rc;
1406 static void accept_connection(int sock, bool canwrite)
1408 int fd;
1409 struct connection *conn;
1411 fd = accept(sock, NULL, NULL);
1412 if (fd < 0)
1413 return;
1415 conn = new_connection(writefd, readfd);
1416 if (conn) {
1417 conn->fd = fd;
1418 conn->can_write = canwrite;
1419 } else
1420 close(fd);
1423 #define TDB_FLAGS 0
1425 /* We create initial nodes manually. */
1426 static void manual_node(const char *name, const char *child)
1428 struct node *node;
1429 struct xs_permissions perms = { .id = 0, .perms = XS_PERM_NONE };
1431 node = talloc_zero(NULL, struct node);
1432 node->name = name;
1433 node->perms = &perms;
1434 node->num_perms = 1;
1435 node->children = (char *)child;
1436 if (child)
1437 node->childlen = strlen(child) + 1;
1439 if (!write_node(NULL, node))
1440 barf_perror("Could not create initial node %s", name);
1441 talloc_free(node);
1444 static void setup_structure(void)
1446 char *tdbname;
1447 tdbname = talloc_strdup(talloc_autofree_context(), xs_daemon_tdb());
1448 tdb_ctx = tdb_open(tdbname, 0, TDB_FLAGS, O_RDWR, 0);
1450 if (tdb_ctx) {
1451 /* XXX When we make xenstored able to restart, this will have
1452 to become cleverer, checking for existing domains and not
1453 removing the corresponding entries, but for now xenstored
1454 cannot be restarted without losing all the registered
1455 watches, which breaks all the backend drivers anyway. We
1456 can therefore get away with just clearing /local and
1457 expecting Xend to put the appropriate entries back in.
1459 When this change is made it is important to note that
1460 dom0's entries must be cleaned up on reboot _before_ this
1461 daemon starts, otherwise the backend drivers and dom0's
1462 balloon driver will pick up stale entries. In the case of
1463 the balloon driver, this can be fatal.
1464 */
1465 char *tlocal = talloc_strdup(NULL, "/local");
1467 check_store();
1469 if (remove_local) {
1470 internal_rm("/local");
1471 create_node(NULL, tlocal, NULL, 0);
1473 check_store();
1476 talloc_free(tlocal);
1478 else {
1479 tdb_ctx = tdb_open(tdbname, 7919, TDB_FLAGS, O_RDWR|O_CREAT,
1480 0640);
1481 if (!tdb_ctx)
1482 barf_perror("Could not create tdb file %s", tdbname);
1484 manual_node("/", "tool");
1485 manual_node("/tool", "xenstored");
1486 manual_node("/tool/xenstored", NULL);
1488 check_store();
1493 static unsigned int hash_from_key_fn(void *k)
1495 char *str = k;
1496 unsigned int hash = 5381;
1497 char c;
1499 while ((c = *str++))
1500 hash = ((hash << 5) + hash) + (unsigned int)c;
1502 return hash;
1506 static int keys_equal_fn(void *key1, void *key2)
1508 return 0 == strcmp((char *)key1, (char *)key2);
1512 static char *child_name(const char *s1, const char *s2)
1514 if (strcmp(s1, "/")) {
1515 return talloc_asprintf(NULL, "%s/%s", s1, s2);
1517 else {
1518 return talloc_asprintf(NULL, "/%s", s2);
1523 static void remember_string(struct hashtable *hash, const char *str)
1525 char *k = malloc(strlen(str) + 1);
1526 strcpy(k, str);
1527 hashtable_insert(hash, k, (void *)1);
1531 /**
1532 * A node has a children field that names the children of the node, separated
1533 * by NULs. We check whether there are entries in there that are duplicated
1534 * (and if so, delete the second one), and whether there are any that do not
1535 * have a corresponding child node (and if so, delete them). Each valid child
1536 * is then recursively checked.
1538 * No deleting is performed if the recovery flag is cleared (i.e. -R was
1539 * passed on the command line).
1541 * As we go, we record each node in the given reachable hashtable. These
1542 * entries will be used later in clean_store.
1543 */
1544 static void check_store_(const char *name, struct hashtable *reachable)
1546 struct node *node = read_node(NULL, name);
1548 if (node) {
1549 size_t i = 0;
1551 struct hashtable * children =
1552 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1554 remember_string(reachable, name);
1556 while (i < node->childlen) {
1557 size_t childlen = strlen(node->children + i);
1558 char * childname = child_name(node->name,
1559 node->children + i);
1560 struct node *childnode = read_node(NULL, childname);
1562 if (childnode) {
1563 if (hashtable_search(children, childname)) {
1564 log("check_store: '%s' is duplicated!",
1565 childname);
1567 if (recovery) {
1568 remove_child_entry(NULL, node,
1569 i);
1570 i -= childlen + 1;
1573 else {
1574 remember_string(children, childname);
1575 check_store_(childname, reachable);
1578 else {
1579 log("check_store: No child '%s' found!\n",
1580 childname);
1582 if (recovery) {
1583 remove_child_entry(NULL, node, i);
1584 i -= childlen + 1;
1588 talloc_free(childnode);
1589 talloc_free(childname);
1590 i += childlen + 1;
1593 hashtable_destroy(children, 0 /* Don't free values (they are
1594 all (void *)1) */);
1595 talloc_free(node);
1597 else {
1598 /* Impossible, because no database should ever be without the
1599 root, and otherwise, we've just checked in our caller
1600 (which made a recursive call to get here). */
1602 log("check_store: No child '%s' found: impossible!", name);
1607 /**
1608 * Helper to clean_store below.
1609 */
1610 static int clean_store_(TDB_CONTEXT *tdb, TDB_DATA key, TDB_DATA val,
1611 void *private)
1613 struct hashtable *reachable = private;
1614 char * name = talloc_strndup(NULL, key.dptr, key.dsize);
1616 if (!hashtable_search(reachable, name)) {
1617 log("clean_store: '%s' is orphaned!", name);
1618 if (recovery) {
1619 tdb_delete(tdb, key);
1623 talloc_free(name);
1625 return 0;
1629 /**
1630 * Given the list of reachable nodes, iterate over the whole store, and
1631 * remove any that were not reached.
1632 */
1633 static void clean_store(struct hashtable *reachable)
1635 tdb_traverse(tdb_ctx, &clean_store_, reachable);
1639 static void check_store(void)
1641 char * root = talloc_strdup(NULL, "/");
1642 struct hashtable * reachable =
1643 create_hashtable(16, hash_from_key_fn, keys_equal_fn);
1645 log("Checking store ...");
1646 check_store_(root, reachable);
1647 clean_store(reachable);
1648 log("Checking store complete.");
1650 hashtable_destroy(reachable, 0 /* Don't free values (they are all
1651 (void *)1) */);
1652 talloc_free(root);
1656 /* Something is horribly wrong: check the store. */
1657 static void corrupt(struct connection *conn, const char *fmt, ...)
1659 va_list arglist;
1660 char *str;
1661 int saved_errno = errno;
1663 va_start(arglist, fmt);
1664 str = talloc_vasprintf(NULL, fmt, arglist);
1665 va_end(arglist);
1667 log("corruption detected by connection %i: err %s: %s",
1668 conn ? (int)conn->id : -1, strerror(saved_errno), str);
1670 check_store();
1674 static void write_pidfile(const char *pidfile)
1676 char buf[100];
1677 int len;
1678 int fd;
1680 fd = open(pidfile, O_RDWR | O_CREAT, 0600);
1681 if (fd == -1)
1682 barf_perror("Opening pid file %s", pidfile);
1684 /* We exit silently if daemon already running. */
1685 if (lockf(fd, F_TLOCK, 0) == -1)
1686 exit(0);
1688 len = snprintf(buf, sizeof(buf), "%ld\n", (long)getpid());
1689 if (write(fd, buf, len) != len)
1690 barf_perror("Writing pid file %s", pidfile);
1693 /* Stevens. */
1694 static void daemonize(void)
1696 pid_t pid;
1698 /* Separate from our parent via fork, so init inherits us. */
1699 if ((pid = fork()) < 0)
1700 barf_perror("Failed to fork daemon");
1701 if (pid != 0)
1702 exit(0);
1704 /* Session leader so ^C doesn't whack us. */
1705 setsid();
1707 /* Let session leader exit so child cannot regain CTTY */
1708 if ((pid = fork()) < 0)
1709 barf_perror("Failed to fork daemon");
1710 if (pid != 0)
1711 exit(0);
1713 /* Move off any mount points we might be in. */
1714 if (chdir("/") == -1)
1715 barf_perror("Failed to chdir");
1717 /* Discard our parent's old-fashioned umask prejudices. */
1718 umask(0);
1722 static void usage(void)
1724 fprintf(stderr,
1725 "Usage:\n"
1726 "\n"
1727 " xenstored <options>\n"
1728 "\n"
1729 "where options may include:\n"
1730 "\n"
1731 " --no-domain-init to state that xenstored should not initialise dom0,\n"
1732 " --pid-file <file> giving a file for the daemon's pid to be written,\n"
1733 " --help to output this message,\n"
1734 " --no-fork to request that the daemon does not fork,\n"
1735 " --output-pid to request that the pid of the daemon is output,\n"
1736 " --trace-file <file> giving the file for logging, and\n"
1737 " --entry-nb <nb> limit the number of entries per domain,\n"
1738 " --entry-size <size> limit the size of entry per domain, and\n"
1739 " --entry-watch <nb> limit the number of watches per domain,\n"
1740 " --transaction <nb> limit the number of transaction allowed per domain,\n"
1741 " --no-recovery to request that no recovery should be attempted when\n"
1742 " the store is corrupted (debug only),\n"
1743 " --preserve-local to request that /local is preserved on start-up,\n"
1744 " --verbose to request verbose execution.\n");
1748 static struct option options[] = {
1749 { "no-domain-init", 0, NULL, 'D' },
1750 { "entry-nb", 1, NULL, 'E' },
1751 { "pid-file", 1, NULL, 'F' },
1752 { "help", 0, NULL, 'H' },
1753 { "no-fork", 0, NULL, 'N' },
1754 { "output-pid", 0, NULL, 'P' },
1755 { "entry-size", 1, NULL, 'S' },
1756 { "trace-file", 1, NULL, 'T' },
1757 { "transaction", 1, NULL, 't' },
1758 { "no-recovery", 0, NULL, 'R' },
1759 { "preserve-local", 0, NULL, 'L' },
1760 { "verbose", 0, NULL, 'V' },
1761 { "watch-nb", 1, NULL, 'W' },
1762 { NULL, 0, NULL, 0 } };
1764 extern void dump_conn(struct connection *conn);
1766 int main(int argc, char *argv[])
1768 int opt, *sock, *ro_sock, max;
1769 struct sockaddr_un addr;
1770 fd_set inset, outset;
1771 bool dofork = true;
1772 bool outputpid = false;
1773 bool no_domain_init = false;
1774 const char *pidfile = NULL;
1775 int evtchn_fd = -1;
1776 struct timeval *timeout;
1778 while ((opt = getopt_long(argc, argv, "DE:F:HNPS:t:T:RLVW:", options,
1779 NULL)) != -1) {
1780 switch (opt) {
1781 case 'D':
1782 no_domain_init = true;
1783 break;
1784 case 'E':
1785 quota_nb_entry_per_domain = strtol(optarg, NULL, 10);
1786 break;
1787 case 'F':
1788 pidfile = optarg;
1789 break;
1790 case 'H':
1791 usage();
1792 return 0;
1793 case 'N':
1794 dofork = false;
1795 break;
1796 case 'P':
1797 outputpid = true;
1798 break;
1799 case 'R':
1800 recovery = false;
1801 break;
1802 case 'L':
1803 remove_local = false;
1804 break;
1805 case 'S':
1806 quota_max_entry_size = strtol(optarg, NULL, 10);
1807 break;
1808 case 't':
1809 quota_max_transaction = strtol(optarg, NULL, 10);
1810 break;
1811 case 'T':
1812 tracefile = optarg;
1813 break;
1814 case 'V':
1815 verbose = true;
1816 break;
1817 case 'W':
1818 quota_nb_watch_per_domain = strtol(optarg, NULL, 10);
1819 break;
1822 if (optind != argc)
1823 barf("%s: No arguments desired", argv[0]);
1825 reopen_log();
1827 /* make sure xenstored directory exists */
1828 if (mkdir(xs_daemon_rundir(), 0755)) {
1829 if (errno != EEXIST) {
1830 perror("error: mkdir daemon rundir");
1831 exit(-1);
1835 if (mkdir(xs_daemon_rootdir(), 0755)) {
1836 if (errno != EEXIST) {
1837 perror("error: mkdir daemon rootdir");
1838 exit(-1);
1842 if (dofork) {
1843 openlog("xenstored", 0, LOG_DAEMON);
1844 daemonize();
1846 if (pidfile)
1847 write_pidfile(pidfile);
1849 /* Talloc leak reports go to stderr, which is closed if we fork. */
1850 if (!dofork)
1851 talloc_enable_leak_report_full();
1853 /* Create sockets for them to listen to. */
1854 sock = talloc(talloc_autofree_context(), int);
1855 *sock = socket(PF_UNIX, SOCK_STREAM, 0);
1856 if (*sock < 0)
1857 barf_perror("Could not create socket");
1858 ro_sock = talloc(talloc_autofree_context(), int);
1859 *ro_sock = socket(PF_UNIX, SOCK_STREAM, 0);
1860 if (*ro_sock < 0)
1861 barf_perror("Could not create socket");
1862 talloc_set_destructor(sock, destroy_fd);
1863 talloc_set_destructor(ro_sock, destroy_fd);
1865 /* Don't kill us with SIGPIPE. */
1866 signal(SIGPIPE, SIG_IGN);
1868 /* FIXME: Be more sophisticated, don't mug running daemon. */
1869 unlink(xs_daemon_socket());
1870 unlink(xs_daemon_socket_ro());
1872 addr.sun_family = AF_UNIX;
1873 strcpy(addr.sun_path, xs_daemon_socket());
1874 if (bind(*sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1875 barf_perror("Could not bind socket to %s", xs_daemon_socket());
1876 strcpy(addr.sun_path, xs_daemon_socket_ro());
1877 if (bind(*ro_sock, (struct sockaddr *)&addr, sizeof(addr)) != 0)
1878 barf_perror("Could not bind socket to %s",
1879 xs_daemon_socket_ro());
1880 if (chmod(xs_daemon_socket(), 0600) != 0
1881 || chmod(xs_daemon_socket_ro(), 0660) != 0)
1882 barf_perror("Could not chmod sockets");
1884 if (listen(*sock, 1) != 0
1885 || listen(*ro_sock, 1) != 0)
1886 barf_perror("Could not listen on sockets");
1888 if (pipe(reopen_log_pipe)) {
1889 barf_perror("pipe");
1892 /* Setup the database */
1893 setup_structure();
1895 /* Listen to hypervisor. */
1896 if (!no_domain_init)
1897 domain_init();
1899 /* Restore existing connections. */
1900 restore_existing_connections();
1902 if (outputpid) {
1903 printf("%ld\n", (long)getpid());
1904 fflush(stdout);
1907 /* redirect to /dev/null now we're ready to accept connections */
1908 if (dofork) {
1909 int devnull = open("/dev/null", O_RDWR);
1910 if (devnull == -1)
1911 barf_perror("Could not open /dev/null\n");
1912 dup2(devnull, STDIN_FILENO);
1913 dup2(devnull, STDOUT_FILENO);
1914 dup2(devnull, STDERR_FILENO);
1915 close(devnull);
1916 xprintf = trace;
1919 signal(SIGHUP, trigger_reopen_log);
1921 if (xce_handle != -1)
1922 evtchn_fd = xc_evtchn_fd(xce_handle);
1924 /* Get ready to listen to the tools. */
1925 max = initialize_set(&inset, &outset, *sock, *ro_sock, &timeout);
1927 /* Tell the kernel we're up and running. */
1928 xenbus_notify_running();
1930 /* Main loop. */
1931 for (;;) {
1932 struct connection *conn, *old_conn;
1934 if (select(max+1, &inset, &outset, NULL, timeout) < 0) {
1935 if (errno == EINTR)
1936 continue;
1937 barf_perror("Select failed");
1940 if (FD_ISSET(reopen_log_pipe[0], &inset)) {
1941 char c;
1942 if (read(reopen_log_pipe[0], &c, 1) != 1)
1943 barf_perror("read failed");
1944 reopen_log();
1947 if (FD_ISSET(*sock, &inset))
1948 accept_connection(*sock, true);
1950 if (FD_ISSET(*ro_sock, &inset))
1951 accept_connection(*ro_sock, false);
1953 if (evtchn_fd != -1 && FD_ISSET(evtchn_fd, &inset))
1954 handle_event();
1956 conn = list_entry(connections.next, typeof(*conn), list);
1957 while (&conn->list != &connections) {
1958 talloc_increase_ref_count(conn);
1960 if (conn->domain) {
1961 if (domain_can_read(conn))
1962 handle_input(conn);
1963 if (domain_can_write(conn) &&
1964 !list_empty(&conn->out_list))
1965 handle_output(conn);
1966 } else {
1967 if (FD_ISSET(conn->fd, &inset))
1968 handle_input(conn);
1969 if (FD_ISSET(conn->fd, &outset))
1970 handle_output(conn);
1973 old_conn = conn;
1974 conn = list_entry(old_conn->list.next,
1975 typeof(*conn), list);
1976 talloc_free(old_conn);
1979 max = initialize_set(&inset, &outset, *sock, *ro_sock,
1980 &timeout);
1984 /*
1985 * Local variables:
1986 * c-file-style: "linux"
1987 * indent-tabs-mode: t
1988 * c-indent-level: 8
1989 * c-basic-offset: 8
1990 * tab-width: 8
1991 * End:
1992 */