debuggers.hg

changeset 19948:5c40f649a6a4

fs-back: better error handling in fs-backend

Currently most of the error checking in fs-backend is done by the use
of asserts that would terminate the daemon in case of a single error
on a single request. This patch replaces the asserts with debugging
messages and terminates the connection on which the error occurred.
With this patch applied I was able to complete successfully over 1000
live migrations with stubdoms.

Signed-off-by: Stefano Stabellini <stefano.stabellini@eu.citrix.com>
author Keir Fraser <keir.fraser@citrix.com>
date Wed Jul 08 10:58:09 2009 +0100 (2009-07-08)
parents 797006dadc38
children e7d4fd334626
files tools/fs-back/fs-backend.c tools/fs-back/fs-backend.h tools/fs-back/fs-ops.c tools/fs-back/fs-xenbus.c
line diff
     1.1 --- a/tools/fs-back/fs-backend.c	Wed Jul 08 10:51:00 2009 +0100
     1.2 +++ b/tools/fs-back/fs-backend.c	Wed Jul 08 10:58:09 2009 +0100
     1.3 @@ -144,7 +144,8 @@ moretodo:
     1.4          xc_evtchn_notify(mount->evth, mount->local_evtchn);
     1.5  }
     1.6  
     1.7 -static void terminate_mount_request(struct fs_mount *mount) {
     1.8 +void terminate_mount_request(struct fs_mount *mount)
     1.9 +{
    1.10      int count = 0, i;
    1.11  
    1.12      FS_DEBUG("terminate_mount_request %s\n", mount->frontend);
    1.13 @@ -158,7 +159,13 @@ static void terminate_mount_request(stru
    1.14          }
    1.15      mount->nr_entries = count;
    1.16  
    1.17 -    while (!xenbus_frontend_state_changed(mount, STATE_CLOSING));
    1.18 +    /* wait for the frontend to shut down but don't wait more than 3
    1.19 +     * seconds */
    1.20 +    i = 0;
    1.21 +    while (!xenbus_frontend_state_changed(mount, STATE_CLOSING) && i < 3) {
    1.22 +        sleep(1);
    1.23 +        i++;
    1.24 +    }
    1.25      xenbus_write_backend_state(mount, STATE_CLOSED);
    1.26  
    1.27      xc_gnttab_munmap(mount->gnth, mount->ring.sring, mount->shared_ring_size);
    1.28 @@ -183,7 +190,7 @@ static void handle_connection(int fronte
    1.29  {
    1.30      struct fs_mount *mount;
    1.31      struct fs_export *export;
    1.32 -    struct fsif_sring *sring;
    1.33 +    struct fsif_sring *sring = NULL;
    1.34      uint32_t dom_ids[MAX_RING_SIZE];
    1.35      int i;
    1.36  
    1.37 @@ -204,24 +211,38 @@ static void handle_connection(int fronte
    1.38      }
    1.39  
    1.40      mount = (struct fs_mount*)malloc(sizeof(struct fs_mount));
    1.41 +    memset(mount, 0, sizeof(struct fs_mount));
    1.42      mount->dom_id = frontend_dom_id;
    1.43      mount->export = export;
    1.44      mount->mount_id = mount_id++;
    1.45 -    xenbus_read_mount_request(mount, frontend);
    1.46 +    if (xenbus_read_mount_request(mount, frontend) < 0)
    1.47 +        goto error;
    1.48      FS_DEBUG("Frontend found at: %s (gref=%d, evtchn=%d)\n", 
    1.49              mount->frontend, mount->grefs[0], mount->remote_evtchn);
    1.50 -    xenbus_write_backend_node(mount);
    1.51 +    if (!xenbus_write_backend_node(mount)) {
    1.52 +        FS_DEBUG("ERROR: failed to write backend node on xenbus\n");
    1.53 +        goto error;
    1.54 +    }
    1.55      mount->evth = -1;
    1.56      mount->evth = xc_evtchn_open(); 
    1.57 -    assert(mount->evth != -1);
    1.58 +    if (mount->evth < 0) {
    1.59 +        FS_DEBUG("ERROR: Couldn't open evtchn!\n");
    1.60 +        goto error;
    1.61 +    }
    1.62      mount->local_evtchn = -1;
    1.63      mount->local_evtchn = xc_evtchn_bind_interdomain(mount->evth, 
    1.64                                                       mount->dom_id, 
    1.65                                                       mount->remote_evtchn);
    1.66 -    assert(mount->local_evtchn != -1);
    1.67 +    if (mount->local_evtchn < 0) {
    1.68 +        FS_DEBUG("ERROR: Couldn't bind evtchn!\n");
    1.69 +        goto error;
    1.70 +    }
    1.71      mount->gnth = -1;
    1.72      mount->gnth = xc_gnttab_open(); 
    1.73 -    assert(mount->gnth != -1);
    1.74 +    if (mount->gnth < 0) {
    1.75 +        FS_DEBUG("ERROR: Couldn't open gnttab!\n");
    1.76 +        goto error;
    1.77 +    }
    1.78      for(i=0; i<mount->shared_ring_size; i++)
    1.79          dom_ids[i] = mount->dom_id;
    1.80      sring = xc_gnttab_map_grant_refs(mount->gnth,
    1.81 @@ -230,16 +251,40 @@ static void handle_connection(int fronte
    1.82                                       mount->grefs,
    1.83                                       PROT_READ | PROT_WRITE);
    1.84  
    1.85 +    if (!sring) {
    1.86 +        FS_DEBUG("ERROR: Couldn't amp grant refs!\n");
    1.87 +        goto error;
    1.88 +    }
    1.89 +
    1.90      BACK_RING_INIT(&mount->ring, sring, mount->shared_ring_size * XC_PAGE_SIZE);
    1.91      mount->nr_entries = mount->ring.nr_ents; 
    1.92      for (i = 0; i < MAX_FDS; i++)
    1.93          mount->fds[i] = -1;
    1.94  
    1.95      LIST_INSERT_HEAD(&mount_requests_head, mount, entries);
    1.96 -    xenbus_watch_frontend_state(mount);
    1.97 -    xenbus_write_backend_state(mount, STATE_READY);
    1.98 -    
    1.99 +    if (!xenbus_watch_frontend_state(mount)) {
   1.100 +        FS_DEBUG("ERROR: failed to watch frontend state on xenbus\n");
   1.101 +        goto error;
   1.102 +    }
   1.103 +    if (!xenbus_write_backend_state(mount, STATE_READY)) {
   1.104 +        FS_DEBUG("ERROR: failed to write backend state to xenbus\n");
   1.105 +        goto error;
   1.106 +    }
   1.107 +
   1.108      allocate_request_array(mount);
   1.109 +
   1.110 +    return;
   1.111 +
   1.112 +error:
   1.113 +    xenbus_write_backend_state(mount, STATE_CLOSED);
   1.114 +    if (sring)
   1.115 +        xc_gnttab_munmap(mount->gnth, mount->ring.sring, mount->shared_ring_size);
   1.116 +    if (mount->gnth > 0)
   1.117 +        xc_gnttab_close(mount->gnth);
   1.118 +    if (mount->local_evtchn > 0)
   1.119 +        xc_evtchn_unbind(mount->evth, mount->local_evtchn);
   1.120 +    if (mount->evth > 0)
   1.121 +        xc_evtchn_close(mount->evth);
   1.122  }
   1.123  
   1.124  static void await_connections(void)
     2.1 --- a/tools/fs-back/fs-backend.h	Wed Jul 08 10:51:00 2009 +0100
     2.2 +++ b/tools/fs-back/fs-backend.h	Wed Jul 08 10:58:09 2009 +0100
     2.3 @@ -56,6 +56,7 @@ struct fs_mount
     2.4      LIST_ENTRY(fs_mount) entries;
     2.5  };
     2.6  
     2.7 +void terminate_mount_request(struct fs_mount *mount);
     2.8  
     2.9  /* Handle to XenStore driver */
    2.10  extern struct xs_handle *xsh;
    2.11 @@ -63,12 +64,12 @@ extern struct xs_handle *xsh;
    2.12  bool xenbus_create_request_node(void);
    2.13  int xenbus_register_export(struct fs_export *export);
    2.14  int xenbus_get_watch_fd(void);
    2.15 -void xenbus_read_mount_request(struct fs_mount *mount, char *frontend);
    2.16 -void xenbus_write_backend_node(struct fs_mount *mount);
    2.17 -void xenbus_write_backend_state(struct fs_mount *mount, const char *state);
    2.18 +int xenbus_read_mount_request(struct fs_mount *mount, char *frontend);
    2.19 +bool xenbus_write_backend_node(struct fs_mount *mount);
    2.20 +bool xenbus_write_backend_state(struct fs_mount *mount, const char *state);
    2.21  int xenbus_frontend_state_changed(struct fs_mount *mount, const char *oldstate);
    2.22 -void xenbus_watch_frontend_state(struct fs_mount *mount);
    2.23 -void xenbus_unwatch_frontend_state(struct fs_mount *mount);
    2.24 +bool xenbus_watch_frontend_state(struct fs_mount *mount);
    2.25 +bool xenbus_unwatch_frontend_state(struct fs_mount *mount);
    2.26  char* xenbus_read_frontend_state(struct fs_mount *mount);
    2.27  
    2.28  /* File operations, implemented in fs-ops.c */
     3.1 --- a/tools/fs-back/fs-ops.c	Wed Jul 08 10:51:00 2009 +0100
     3.2 +++ b/tools/fs-back/fs-ops.c	Wed Jul 08 10:58:09 2009 +0100
     3.3 @@ -99,7 +99,10 @@ static void dispatch_file_open(struct fs
     3.4          }
     3.5      }
     3.6  out:
     3.7 -    assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
     3.8 +    if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
     3.9 +        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
    3.10 +        terminate_mount_request(mount);
    3.11 +    }
    3.12      /* We can advance the request consumer index, from here on, the request
    3.13       * should not be used (it may be overrinden by a response) */
    3.14      mount->ring.req_cons++;
    3.15 @@ -187,7 +190,11 @@ static void dispatch_file_read(struct fs
    3.16      priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
    3.17      priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
    3.18      priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
    3.19 -    assert(aio_read(&priv_req->aiocb) >= 0);
    3.20 +    if (aio_read(&priv_req->aiocb) < 0) {
    3.21 +        FS_DEBUG("ERROR: aio_read failed errno=%d\n", errno);
    3.22 +        xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count);
    3.23 +        terminate_mount_request(mount);
    3.24 +    }
    3.25  
    3.26      /* We can advance the request consumer index, from here on, the request
    3.27       * should not be used (it may be overrinden by a response) */
    3.28 @@ -201,9 +208,10 @@ static void end_file_read(struct fs_moun
    3.29      uint16_t req_id;
    3.30  
    3.31      /* Release the grant */
    3.32 -    assert(xc_gnttab_munmap(mount->gnth, 
    3.33 -                            priv_req->page, 
    3.34 -                            priv_req->count) == 0);
    3.35 +    if (xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count) != 0) {
    3.36 +        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
    3.37 +        terminate_mount_request(mount);
    3.38 +    }
    3.39  
    3.40      /* Get a response from the ring */
    3.41      rsp_idx = mount->ring.rsp_prod_pvt++;
    3.42 @@ -257,7 +265,11 @@ static void dispatch_file_write(struct f
    3.43      priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
    3.44      priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
    3.45      priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
    3.46 -    assert(aio_write(&priv_req->aiocb) >= 0);
    3.47 +    if (aio_write(&priv_req->aiocb) < 0) {
    3.48 +        FS_DEBUG("ERROR: aio_write failed errno=%d\n", errno);
    3.49 +        xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count);
    3.50 +        terminate_mount_request(mount);
    3.51 +    }
    3.52  
    3.53       
    3.54      /* We can advance the request consumer index, from here on, the request
    3.55 @@ -272,9 +284,10 @@ static void end_file_write(struct fs_mou
    3.56      uint16_t req_id;
    3.57  
    3.58      /* Release the grant */
    3.59 -    assert(xc_gnttab_munmap(mount->gnth, 
    3.60 -                            priv_req->page, 
    3.61 -                            priv_req->count) == 0);
    3.62 +    if (xc_gnttab_munmap(mount->gnth, priv_req->page, priv_req->count) != 0) {
    3.63 +        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
    3.64 +        terminate_mount_request(mount);
    3.65 +    }
    3.66      
    3.67      /* Get a response from the ring */
    3.68      rsp_idx = mount->ring.rsp_prod_pvt++;
    3.69 @@ -392,7 +405,10 @@ static void dispatch_remove(struct fs_mo
    3.70          ret = remove(file_name);
    3.71      }
    3.72      FS_DEBUG("Got ret: %d\n", ret);
    3.73 -    assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
    3.74 +    if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
    3.75 +        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
    3.76 +        terminate_mount_request(mount);
    3.77 +    }
    3.78      /* We can advance the request consumer index, from here on, the request
    3.79       * should not be used (it may be overrinden by a response) */
    3.80      mount->ring.req_cons++;
    3.81 @@ -435,7 +451,10 @@ static void dispatch_rename(struct fs_mo
    3.82          ret = rename(old_file_name, new_file_name);
    3.83      }
    3.84      FS_DEBUG("Got ret: %d\n", ret);
    3.85 -    assert(xc_gnttab_munmap(mount->gnth, buf, 1) == 0);
    3.86 +    if (xc_gnttab_munmap(mount->gnth, buf, 1) != 0) {
    3.87 +        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
    3.88 +        terminate_mount_request(mount);
    3.89 +    }
    3.90      /* We can advance the request consumer index, from here on, the request
    3.91       * should not be used (it may be overrinden by a response) */
    3.92      mount->ring.req_cons++;
    3.93 @@ -500,7 +519,10 @@ static void dispatch_create(struct fs_mo
    3.94          }
    3.95      }
    3.96  out:
    3.97 -    assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
    3.98 +    if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
    3.99 +        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
   3.100 +        terminate_mount_request(mount);
   3.101 +    }
   3.102      FS_DEBUG("Got ret %d (errno=%d)\n", ret, errno);
   3.103  
   3.104      /* Get a response from the ring */
   3.105 @@ -556,7 +578,8 @@ static void dispatch_list(struct fs_moun
   3.106      /* If there was any error with reading the directory, errno will be set */
   3.107      error_code = errno;
   3.108      /* Copy file names of the remaining non-NULL dirents into buf */
   3.109 -    assert(NAME_MAX < XC_PAGE_SIZE >> 1);
   3.110 +    if (NAME_MAX >= XC_PAGE_SIZE >> 1)
   3.111 +        goto error_out;
   3.112      while(dirent != NULL && 
   3.113              (XC_PAGE_SIZE - ((unsigned long)buf & XC_PAGE_MASK) > NAME_MAX))
   3.114      {
   3.115 @@ -572,8 +595,11 @@ error_out:
   3.116      ret_val = ((nr_files << NR_FILES_SHIFT) & NR_FILES_MASK) | 
   3.117                ((error_code << ERROR_SHIFT) & ERROR_MASK) | 
   3.118                (dirent != NULL ? HAS_MORE_FLAG : 0);
   3.119 -    assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
   3.120 -    
   3.121 +    if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
   3.122 +        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
   3.123 +        terminate_mount_request(mount);
   3.124 +    }
   3.125 +
   3.126      /* Get a response from the ring */
   3.127      rsp_idx = mount->ring.rsp_prod_pvt++;
   3.128      FS_DEBUG("Writing response at: idx=%d, id=%d\n", rsp_idx, req_id);
   3.129 @@ -640,7 +666,10 @@ static void dispatch_fs_space(struct fs_
   3.130      if(ret >= 0)
   3.131          ret = stat.f_bsize * stat.f_bfree;
   3.132  
   3.133 -    assert(xc_gnttab_munmap(mount->gnth, file_name, 1) == 0);
   3.134 +    if (xc_gnttab_munmap(mount->gnth, file_name, 1) != 0) {
   3.135 +        FS_DEBUG("ERROR: xc_gnttab_munmap failed errno=%d\n", errno);
   3.136 +        terminate_mount_request(mount);
   3.137 +    }
   3.138      /* We can advance the request consumer index, from here on, the request
   3.139       * should not be used (it may be overrinden by a response) */
   3.140      mount->ring.req_cons++;
   3.141 @@ -680,9 +709,11 @@ static void dispatch_file_sync(struct fs
   3.142      priv_req->aiocb.aio_sigevent.sigev_notify = SIGEV_SIGNAL;
   3.143      priv_req->aiocb.aio_sigevent.sigev_signo = SIGUSR2;
   3.144      priv_req->aiocb.aio_sigevent.sigev_value.sival_ptr = priv_req;
   3.145 -    assert(aio_fsync(O_SYNC, &priv_req->aiocb) >= 0);
   3.146 +    if (aio_fsync(O_SYNC, &priv_req->aiocb) < 0) {
   3.147 +        FS_DEBUG("ERROR: aio_fsync failed errno=%d\n", errno);
   3.148 +        terminate_mount_request(mount);
   3.149 +    }
   3.150  
   3.151 -     
   3.152      /* We can advance the request consumer index, from here on, the request
   3.153       * should not be used (it may be overrinden by a response) */
   3.154      mount->ring.req_cons++;
     4.1 --- a/tools/fs-back/fs-xenbus.c	Wed Jul 08 10:51:00 2009 +0100
     4.2 +++ b/tools/fs-back/fs-xenbus.c	Wed Jul 08 10:58:09 2009 +0100
     4.3 @@ -107,11 +107,15 @@ int xenbus_get_watch_fd(void)
     4.4      int res;
     4.5      assert(xsh != NULL);
     4.6      res = xs_watch(xsh, WATCH_NODE, "conn-watch");
     4.7 -    assert(res);
     4.8 +    if (!res) {
     4.9 +        FS_DEBUG("ERROR: xs_watch %s failed ret=%d errno=%d\n",
    4.10 +                 WATCH_NODE, res, errno);
    4.11 +        return -1;
    4.12 +    }
    4.13      return xs_fileno(xsh); 
    4.14  }
    4.15  
    4.16 -void xenbus_read_mount_request(struct fs_mount *mount, char *frontend)
    4.17 +int xenbus_read_mount_request(struct fs_mount *mount, char *frontend)
    4.18  {
    4.19      char node[1024];
    4.20      char *s;
    4.21 @@ -126,12 +130,18 @@ void xenbus_read_mount_request(struct fs
    4.22      mount->frontend = frontend;
    4.23      snprintf(node, sizeof(node), "%s/state", frontend);
    4.24      s = xs_read(xsh, XBT_NULL, node, NULL);
    4.25 -    assert(strcmp(s, STATE_READY) == 0);
    4.26 +    if (strcmp(s, STATE_READY) != 0) {
    4.27 +        FS_DEBUG("ERROR: frontend not read\n");
    4.28 +        goto error;
    4.29 +    }
    4.30      free(s);
    4.31      snprintf(node, sizeof(node), "%s/ring-size", frontend);
    4.32      s = xs_read(xsh, XBT_NULL, node, NULL);
    4.33      mount->shared_ring_size = atoi(s);
    4.34 -    assert(mount->shared_ring_size <= MAX_RING_SIZE);
    4.35 +    if (mount->shared_ring_size > MAX_RING_SIZE) {
    4.36 +        FS_DEBUG("ERROR: shared_ring_size (%d) > MAX_RING_SIZE\n", mount->shared_ring_size);
    4.37 +        goto error;
    4.38 +    }
    4.39      free(s);
    4.40      for(i=0; i<mount->shared_ring_size; i++)
    4.41      {
    4.42 @@ -144,6 +154,11 @@ void xenbus_read_mount_request(struct fs
    4.43      s = xs_read(xsh, XBT_NULL, node, NULL);
    4.44      mount->remote_evtchn = atoi(s);
    4.45      free(s);
    4.46 +    return 0;
    4.47 +
    4.48 +error:
    4.49 +    free(s);
    4.50 +    return -1;
    4.51  }
    4.52  
    4.53  /* Small utility function to figure out our domain id */
    4.54 @@ -161,7 +176,7 @@ static int get_self_id(void)
    4.55  } 
    4.56  
    4.57  
    4.58 -void xenbus_write_backend_node(struct fs_mount *mount)
    4.59 +bool xenbus_write_backend_node(struct fs_mount *mount)
    4.60  {
    4.61      char node[1024], backend_node[1024];
    4.62      int self_id;
    4.63 @@ -175,10 +190,10 @@ void xenbus_write_backend_node(struct fs
    4.64      xs_write(xsh, XBT_NULL, node, backend_node, strlen(backend_node));
    4.65  
    4.66      snprintf(node, sizeof(node), ROOT_NODE"/%d/state", mount->mount_id);
    4.67 -    xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, strlen(STATE_INITIALISED));
    4.68 +    return xs_write(xsh, XBT_NULL, node, STATE_INITIALISED, strlen(STATE_INITIALISED));
    4.69  }
    4.70  
    4.71 -void xenbus_write_backend_state(struct fs_mount *mount, const char *state)
    4.72 +bool xenbus_write_backend_state(struct fs_mount *mount, const char *state)
    4.73  {
    4.74      char node[1024];
    4.75      int self_id;
    4.76 @@ -186,29 +201,25 @@ void xenbus_write_backend_state(struct f
    4.77      assert(xsh != NULL);
    4.78      self_id = get_self_id();
    4.79      snprintf(node, sizeof(node), ROOT_NODE"/%d/state", mount->mount_id);
    4.80 -    xs_write(xsh, XBT_NULL, node, state, strlen(state));
    4.81 +    return xs_write(xsh, XBT_NULL, node, state, strlen(state));
    4.82  }
    4.83  
    4.84 -void xenbus_watch_frontend_state(struct fs_mount *mount)
    4.85 +bool xenbus_watch_frontend_state(struct fs_mount *mount)
    4.86  {
    4.87 -    int res;
    4.88      char statepath[1024];
    4.89  
    4.90      assert(xsh != NULL);
    4.91      snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend);
    4.92 -    res = xs_watch(xsh, statepath, "frontend-state");
    4.93 -    assert(res);
    4.94 +    return xs_watch(xsh, statepath, "frontend-state");
    4.95  }
    4.96  
    4.97 -void xenbus_unwatch_frontend_state(struct fs_mount *mount)
    4.98 +bool xenbus_unwatch_frontend_state(struct fs_mount *mount)
    4.99  {
   4.100 -    int res;
   4.101      char statepath[1024];
   4.102  
   4.103      assert(xsh != NULL);
   4.104      snprintf(statepath, sizeof(statepath), "%s/state", mount->frontend);
   4.105 -    res = xs_unwatch(xsh, statepath, "frontend-state");
   4.106 -    assert(res);
   4.107 +    return xs_unwatch(xsh, statepath, "frontend-state");
   4.108  }
   4.109  
   4.110  int xenbus_frontend_state_changed(struct fs_mount *mount, const char *oldstate)