xen-vtx-unstable

annotate linux-2.6-xen-sparse/drivers/xen/blkfront/blkfront.c @ 6776:e7c7196fa329

merge?
author cl349@firebug.cl.cam.ac.uk
date Tue Sep 13 15:46:49 2005 +0000 (2005-09-13)
parents 4d899a738d59 cdfa7dd00c44
children 72e4e2aab342
rev   line source
cl349@4087 1 /******************************************************************************
cl349@4087 2 * blkfront.c
cl349@4087 3 *
cl349@4087 4 * XenLinux virtual block-device driver.
cl349@4087 5 *
cl349@4087 6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
cl349@4087 7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
cl349@4087 8 * Copyright (c) 2004, Christian Limpach
cl349@4112 9 * Copyright (c) 2004, Andrew Warfield
cwc22@4461 10 * Copyright (c) 2005, Christopher Clark
cl349@4087 11 *
cl349@4087 12 * This file may be distributed separately from the Linux kernel, or
cl349@4087 13 * incorporated into other software packages, subject to the following license:
cl349@4087 14 *
cl349@4087 15 * Permission is hereby granted, free of charge, to any person obtaining a copy
cl349@4087 16 * of this source file (the "Software"), to deal in the Software without
cl349@4087 17 * restriction, including without limitation the rights to use, copy, modify,
cl349@4087 18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
cl349@4087 19 * and to permit persons to whom the Software is furnished to do so, subject to
cl349@4087 20 * the following conditions:
cl349@4087 21 *
cl349@4087 22 * The above copyright notice and this permission notice shall be included in
cl349@4087 23 * all copies or substantial portions of the Software.
cl349@4087 24 *
cl349@4087 25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
cl349@4087 26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
cl349@4087 27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
cl349@4087 28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
cl349@4087 29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
cl349@4087 30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
cl349@4087 31 * IN THE SOFTWARE.
cl349@4087 32 */
cl349@4087 33
cwc22@4461 34 #if 1
kaf24@6456 35 #define ASSERT(p) \
kaf24@6456 36 if (!(p)) { printk("Assertion '%s' failed, line %d, file %s", #p , \
kaf24@6456 37 __LINE__, __FILE__); *(int*)0=0; }
cwc22@4461 38 #else
cwc22@4461 39 #define ASSERT(_p)
cwc22@4461 40 #endif
cwc22@4461 41
cl349@4087 42 #include <linux/version.h>
cl349@4087 43 #include "block.h"
cl349@4087 44 #include <linux/cdrom.h>
cl349@4087 45 #include <linux/sched.h>
cl349@4087 46 #include <linux/interrupt.h>
cl349@4087 47 #include <scsi/scsi.h>
cl349@4112 48 #include <asm-xen/evtchn.h>
cl349@6259 49 #include <asm-xen/xenbus.h>
cwc22@4461 50 #include <asm-xen/xen-public/grant_table.h>
cwc22@4461 51 #include <asm-xen/gnttab.h>
cl349@4087 52
cl349@6341 53 #define BLKIF_STATE_DISCONNECTED 0
cl349@6341 54 #define BLKIF_STATE_CONNECTED 1
cl349@4087 55
cl349@6341 56 static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED;
cl349@4087 57
cwc22@4461 58 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
cwc22@4461 59 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
kaf24@4611 60 #define GRANTREF_INVALID (1<<15)
cl349@6616 61 #define GRANT_INVALID_REF (0xFFFF)
cl349@4087 62
kaf24@4611 63 static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */
cl349@4087 64
cl349@6341 65 static void kick_pending_request_queues(struct blkfront_info *info);
cl349@4087 66
kaf24@4611 67 static void blkif_completion(struct blk_shadow *s);
cl349@4087 68
kaf24@6459 69 static inline int GET_ID_FROM_FREELIST(
kaf24@6459 70 struct blkfront_info *info)
kaf24@4611 71 {
kaf24@6459 72 unsigned long free = info->shadow_free;
kaf24@6456 73 BUG_ON(free > BLK_RING_SIZE);
kaf24@6459 74 info->shadow_free = info->shadow[free].req.id;
kaf24@6459 75 info->shadow[free].req.id = 0x0fffffee; /* debug */
kaf24@6456 76 return free;
cl349@4087 77 }
cl349@4087 78
kaf24@6459 79 static inline void ADD_ID_TO_FREELIST(
kaf24@6459 80 struct blkfront_info *info, unsigned long id)
cl349@4087 81 {
kaf24@6459 82 info->shadow[id].req.id = info->shadow_free;
kaf24@6459 83 info->shadow[id].request = 0;
kaf24@6459 84 info->shadow_free = id;
cl349@4087 85 }
cl349@4087 86
kaf24@4611 87 static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r)
cl349@4087 88 {
cl349@4087 89
kaf24@6456 90 s->req = *r;
cl349@4087 91 }
cl349@4087 92
kaf24@4611 93 static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s)
cl349@4087 94 {
cl349@4087 95
kaf24@6456 96 *r = s->req;
cl349@4087 97 }
cl349@4087 98
cl349@6338 99 static inline void flush_requests(struct blkfront_info *info)
cl349@4087 100 {
kaf24@6456 101 RING_PUSH_REQUESTS(&info->ring);
kaf24@6456 102 notify_via_evtchn(info->evtchn);
cl349@4087 103 }
cl349@4087 104
cl349@6341 105 static void kick_pending_request_queues(struct blkfront_info *info)
cl349@4087 106 {
cl349@6341 107 if (!RING_FULL(&info->ring)) {
cl349@6341 108 /* Re-enable calldowns. */
cl349@6341 109 blk_start_queue(info->rq);
cl349@6341 110 /* Kick things off immediately. */
cl349@6341 111 do_blkif_request(info->rq);
cl349@6341 112 }
cl349@6341 113 }
cl349@6341 114
cl349@6341 115 static void blkif_restart_queue(void *arg)
cl349@6341 116 {
cl349@6341 117 struct blkfront_info *info = (struct blkfront_info *)arg;
cl349@6341 118 spin_lock_irq(&blkif_io_lock);
cl349@6341 119 kick_pending_request_queues(info);
cl349@6341 120 spin_unlock_irq(&blkif_io_lock);
cl349@4087 121 }
cl349@4087 122
cl349@6342 123 static void blkif_restart_queue_callback(void *arg)
cl349@6342 124 {
cl349@6342 125 struct blkfront_info *info = (struct blkfront_info *)arg;
cl349@6342 126 schedule_work(&info->work);
cl349@4087 127 }
cl349@4087 128
cl349@4087 129 int blkif_open(struct inode *inode, struct file *filep)
cl349@4087 130 {
cl349@6341 131 return 0;
cl349@4087 132 }
cl349@4087 133
cl349@4087 134
cl349@4087 135 int blkif_release(struct inode *inode, struct file *filep)
cl349@4087 136 {
kaf24@6456 137 return 0;
cl349@4087 138 }
cl349@4087 139
cl349@4087 140
cl349@4087 141 int blkif_ioctl(struct inode *inode, struct file *filep,
cl349@4087 142 unsigned command, unsigned long argument)
cl349@4087 143 {
kaf24@6456 144 int i;
cl349@4087 145
kaf24@6456 146 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
kaf24@6456 147 command, (long)argument, inode->i_rdev);
cl349@6341 148
kaf24@6456 149 switch ( command )
kaf24@6456 150 {
kaf24@6456 151 case HDIO_GETGEO:
kaf24@6456 152 /* return ENOSYS to use defaults */
kaf24@6456 153 return -ENOSYS;
cl349@4087 154
kaf24@6456 155 case CDROMMULTISESSION:
kaf24@6456 156 DPRINTK("FIXME: support multisession CDs later\n");
kaf24@6456 157 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
kaf24@6456 158 if (put_user(0, (char *)(argument + i)))
kaf24@6456 159 return -EFAULT;
kaf24@6456 160 return 0;
cl349@4087 161
kaf24@6456 162 default:
kaf24@6456 163 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
kaf24@6456 164 command);*/
kaf24@6456 165 return -EINVAL; /* same return as native Linux */
kaf24@6456 166 }
cl349@4087 167
kaf24@6456 168 return 0;
cl349@4087 169 }
cl349@4087 170
cl349@4087 171
cl349@4087 172 /*
cl349@4087 173 * blkif_queue_request
cl349@4087 174 *
cl349@6341 175 * request block io
cl349@4087 176 *
cl349@4087 177 * id: for guest use only.
cl349@4087 178 * operation: BLKIF_OP_{READ,WRITE,PROBE}
cl349@4087 179 * buffer: buffer to read/write into. this should be a
cl349@4087 180 * virtual address in the guest os.
cl349@4087 181 */
cl349@4087 182 static int blkif_queue_request(struct request *req)
cl349@4087 183 {
kaf24@6456 184 struct blkfront_info *info = req->rq_disk->private_data;
iap10@6681 185 unsigned long buffer_mfn;
kaf24@6456 186 blkif_request_t *ring_req;
kaf24@6456 187 struct bio *bio;
kaf24@6456 188 struct bio_vec *bvec;
kaf24@6456 189 int idx;
kaf24@6456 190 unsigned long id;
kaf24@6456 191 unsigned int fsect, lsect;
kaf24@6456 192 int ref;
kaf24@6456 193 grant_ref_t gref_head;
cl349@4087 194
kaf24@6456 195 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
kaf24@6456 196 return 1;
cl349@4087 197
kaf24@6456 198 if (gnttab_alloc_grant_references(
kaf24@6456 199 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
kaf24@6456 200 gnttab_request_free_callback(
kaf24@6456 201 &info->callback,
kaf24@6456 202 blkif_restart_queue_callback,
kaf24@6456 203 info,
kaf24@6456 204 BLKIF_MAX_SEGMENTS_PER_REQUEST);
kaf24@6456 205 return 1;
kaf24@6456 206 }
cl349@6341 207
kaf24@6456 208 /* Fill out a communications ring structure. */
kaf24@6456 209 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
kaf24@6459 210 id = GET_ID_FROM_FREELIST(info);
kaf24@6459 211 info->shadow[id].request = (unsigned long)req;
cl349@4087 212
kaf24@6456 213 ring_req->id = id;
kaf24@6456 214 ring_req->operation = rq_data_dir(req) ?
kaf24@6456 215 BLKIF_OP_WRITE : BLKIF_OP_READ;
kaf24@6456 216 ring_req->sector_number = (blkif_sector_t)req->sector;
kaf24@6456 217 ring_req->handle = info->handle;
cl349@4087 218
kaf24@6456 219 ring_req->nr_segments = 0;
kaf24@6456 220 rq_for_each_bio (bio, req) {
kaf24@6456 221 bio_for_each_segment (bvec, bio, idx) {
kaf24@6456 222 BUG_ON(ring_req->nr_segments
kaf24@6456 223 == BLKIF_MAX_SEGMENTS_PER_REQUEST);
iap10@6681 224 buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
kaf24@6456 225 fsect = bvec->bv_offset >> 9;
kaf24@6456 226 lsect = fsect + (bvec->bv_len >> 9) - 1;
kaf24@6456 227 /* install a grant reference. */
kaf24@6456 228 ref = gnttab_claim_grant_reference(&gref_head);
kaf24@6456 229 ASSERT(ref != -ENOSPC);
kaf24@6456 230
kaf24@6456 231 gnttab_grant_foreign_access_ref(
kaf24@6456 232 ref,
kaf24@6456 233 info->backend_id,
iap10@6681 234 buffer_mfn,
kaf24@6456 235 rq_data_dir(req) );
cwc22@4461 236
kaf24@6459 237 info->shadow[id].frame[ring_req->nr_segments] =
iap10@6681 238 buffer_mfn;
cwc22@4461 239
kaf24@6456 240 ring_req->frame_and_sects[ring_req->nr_segments] =
kaf24@6456 241 blkif_fas_from_gref(ref, fsect, lsect);
cl349@6341 242
kaf24@6456 243 ring_req->nr_segments++;
kaf24@6456 244 }
kaf24@6456 245 }
cl349@4087 246
kaf24@6456 247 info->ring.req_prod_pvt++;
cl349@6341 248
kaf24@6456 249 /* Keep a private copy so we can reissue requests when recovering. */
kaf24@6459 250 pickle_request(&info->shadow[id], ring_req);
cl349@4087 251
kaf24@6456 252 gnttab_free_grant_references(gref_head);
cl349@6341 253
kaf24@6456 254 return 0;
cl349@4087 255 }
cl349@4087 256
cl349@4087 257 /*
cl349@4087 258 * do_blkif_request
cl349@4087 259 * read a block; request is in a request queue
cl349@4087 260 */
cl349@4087 261 void do_blkif_request(request_queue_t *rq)
cl349@4087 262 {
kaf24@6456 263 struct blkfront_info *info = NULL;
kaf24@6456 264 struct request *req;
kaf24@6456 265 int queued;
cl349@4087 266
kaf24@6456 267 DPRINTK("Entered do_blkif_request\n");
cl349@4087 268
kaf24@6456 269 queued = 0;
cl349@4087 270
kaf24@6456 271 while ((req = elv_next_request(rq)) != NULL) {
kaf24@6456 272 info = req->rq_disk->private_data;
cl349@6338 273
kaf24@6456 274 if (!blk_fs_request(req)) {
kaf24@6456 275 end_request(req, 0);
kaf24@6456 276 continue;
kaf24@6456 277 }
kaf24@6456 278
kaf24@6456 279 if (RING_FULL(&info->ring))
kaf24@6456 280 goto wait;
cl349@4087 281
kaf24@6456 282 DPRINTK("do_blk_req %p: cmd %p, sec %lx, "
kaf24@6456 283 "(%u/%li) buffer:%p [%s]\n",
kaf24@6456 284 req, req->cmd, req->sector, req->current_nr_sectors,
kaf24@6456 285 req->nr_sectors, req->buffer,
kaf24@6456 286 rq_data_dir(req) ? "write" : "read");
kaf24@4611 287
kaf24@6456 288 blkdev_dequeue_request(req);
kaf24@6456 289 if (blkif_queue_request(req)) {
kaf24@6456 290 blk_requeue_request(rq, req);
kaf24@6456 291 wait:
kaf24@6456 292 /* Avoid pointless unplugs. */
kaf24@6456 293 blk_stop_queue(rq);
kaf24@6456 294 break;
kaf24@6456 295 }
kaf24@4611 296
kaf24@6456 297 queued++;
kaf24@6456 298 }
cl349@4087 299
kaf24@6456 300 if (queued != 0)
kaf24@6456 301 flush_requests(info);
cl349@4087 302 }
cl349@4087 303
cl349@4087 304
cl349@4087 305 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
cl349@4087 306 {
kaf24@6456 307 struct request *req;
kaf24@6456 308 blkif_response_t *bret;
kaf24@6456 309 RING_IDX i, rp;
kaf24@6456 310 unsigned long flags;
kaf24@6456 311 struct blkfront_info *info = (struct blkfront_info *)dev_id;
cl349@4087 312
kaf24@6456 313 spin_lock_irqsave(&blkif_io_lock, flags);
cl349@4087 314
kaf24@6456 315 if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) {
kaf24@6456 316 spin_unlock_irqrestore(&blkif_io_lock, flags);
kaf24@6456 317 return IRQ_HANDLED;
kaf24@6456 318 }
cl349@4087 319
kaf24@6456 320 rp = info->ring.sring->rsp_prod;
kaf24@6456 321 rmb(); /* Ensure we see queued responses up to 'rp'. */
cl349@4087 322
kaf24@6456 323 for (i = info->ring.rsp_cons; i != rp; i++) {
kaf24@6456 324 unsigned long id;
cl349@4087 325
kaf24@6456 326 bret = RING_GET_RESPONSE(&info->ring, i);
kaf24@6456 327 id = bret->id;
kaf24@6459 328 req = (struct request *)info->shadow[id].request;
cl349@4087 329
kaf24@6459 330 blkif_completion(&info->shadow[id]);
cl349@4087 331
kaf24@6459 332 ADD_ID_TO_FREELIST(info, id);
cl349@4087 333
kaf24@6456 334 switch (bret->operation) {
kaf24@6456 335 case BLKIF_OP_READ:
kaf24@6456 336 case BLKIF_OP_WRITE:
kaf24@6456 337 if (unlikely(bret->status != BLKIF_RSP_OKAY))
kaf24@6456 338 DPRINTK("Bad return from blkdev data "
kaf24@6456 339 "request: %x\n", bret->status);
cl349@4087 340
kaf24@6456 341 BUG_ON(end_that_request_first(
kaf24@6456 342 req, (bret->status == BLKIF_RSP_OKAY),
kaf24@6456 343 req->hard_nr_sectors));
kaf24@6456 344 end_that_request_last(req);
kaf24@6456 345 break;
kaf24@6456 346 default:
kaf24@6456 347 BUG();
kaf24@6456 348 }
kaf24@6456 349 }
cl349@6341 350
kaf24@6456 351 info->ring.rsp_cons = i;
cl349@4087 352
kaf24@6456 353 kick_pending_request_queues(info);
cl349@4087 354
kaf24@6456 355 spin_unlock_irqrestore(&blkif_io_lock, flags);
cl349@4087 356
kaf24@6456 357 return IRQ_HANDLED;
cl349@4087 358 }
cl349@4087 359
cl349@6338 360 static void blkif_free(struct blkfront_info *info)
cl349@4087 361 {
kaf24@6456 362 /* Prevent new requests being issued until we fix things up. */
kaf24@6456 363 spin_lock_irq(&blkif_io_lock);
kaf24@6456 364 info->connected = BLKIF_STATE_DISCONNECTED;
kaf24@6456 365 spin_unlock_irq(&blkif_io_lock);
cl349@4087 366
kaf24@6456 367 /* Free resources associated with old device channel. */
kaf24@6456 368 if (info->ring.sring != NULL) {
kaf24@6456 369 free_page((unsigned long)info->ring.sring);
kaf24@6456 370 info->ring.sring = NULL;
kaf24@6456 371 }
cl349@6616 372 if (info->ring_ref != GRANT_INVALID_REF)
cl349@6616 373 gnttab_end_foreign_access(info->ring_ref, 0);
cl349@6616 374 info->ring_ref = GRANT_INVALID_REF;
shand@6465 375 unbind_evtchn_from_irqhandler(info->evtchn, info);
kaf24@6456 376 info->evtchn = 0;
cl349@4087 377 }
cl349@4087 378
cl349@6338 379 static void blkif_recover(struct blkfront_info *info)
cl349@4087 380 {
kaf24@6456 381 int i;
kaf24@6456 382 blkif_request_t *req;
kaf24@6456 383 struct blk_shadow *copy;
kaf24@6456 384 int j;
cl349@4087 385
kaf24@6456 386 /* Stage 1: Make a safe copy of the shadow state. */
kaf24@6459 387 copy = (struct blk_shadow *)kmalloc(sizeof(info->shadow), GFP_KERNEL);
kaf24@6456 388 BUG_ON(copy == NULL);
kaf24@6459 389 memcpy(copy, info->shadow, sizeof(info->shadow));
cl349@4087 390
kaf24@6456 391 /* Stage 2: Set up free list. */
kaf24@6459 392 memset(&info->shadow, 0, sizeof(info->shadow));
kaf24@6456 393 for (i = 0; i < BLK_RING_SIZE; i++)
kaf24@6459 394 info->shadow[i].req.id = i+1;
kaf24@6459 395 info->shadow_free = info->ring.req_prod_pvt;
kaf24@6459 396 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
kaf24@4611 397
kaf24@6456 398 /* Stage 3: Find pending requests and requeue them. */
kaf24@6456 399 for (i = 0; i < BLK_RING_SIZE; i++) {
kaf24@6456 400 /* Not in use? */
kaf24@6456 401 if (copy[i].request == 0)
kaf24@6456 402 continue;
kaf24@4611 403
kaf24@6456 404 /* Grab a request slot and unpickle shadow state into it. */
kaf24@6456 405 req = RING_GET_REQUEST(
kaf24@6456 406 &info->ring, info->ring.req_prod_pvt);
kaf24@6456 407 unpickle_request(req, &copy[i]);
kaf24@4611 408
kaf24@6456 409 /* We get a new request id, and must reset the shadow state. */
kaf24@6459 410 req->id = GET_ID_FROM_FREELIST(info);
kaf24@6459 411 memcpy(&info->shadow[req->id], &copy[i], sizeof(copy[i]));
kaf24@4611 412
kaf24@6456 413 /* Rewrite any grant references invalidated by susp/resume. */
kaf24@6456 414 for (j = 0; j < req->nr_segments; j++) {
kaf24@6456 415 if ( req->frame_and_sects[j] & GRANTREF_INVALID )
kaf24@6456 416 gnttab_grant_foreign_access_ref(
kaf24@6456 417 blkif_gref_from_fas(
kaf24@6456 418 req->frame_and_sects[j]),
kaf24@6456 419 info->backend_id,
kaf24@6459 420 info->shadow[req->id].frame[j],
kaf24@6456 421 rq_data_dir(
kaf24@6456 422 (struct request *)
kaf24@6459 423 info->shadow[req->id].request));
kaf24@6456 424 req->frame_and_sects[j] &= ~GRANTREF_INVALID;
kaf24@6456 425 }
kaf24@6459 426 info->shadow[req->id].req = *req;
kaf24@4611 427
kaf24@6456 428 info->ring.req_prod_pvt++;
kaf24@6456 429 }
cl349@4087 430
kaf24@6456 431 kfree(copy);
cl349@4087 432
kaf24@6456 433 recovery = 0;
cl349@4087 434
kaf24@6456 435 /* info->ring->req_prod will be set when we flush_requests().*/
kaf24@6456 436 wmb();
cl349@4087 437
kaf24@6456 438 /* Kicks things back into life. */
kaf24@6456 439 flush_requests(info);
cl349@4087 440
kaf24@6456 441 /* Now safe to left other people use the interface. */
kaf24@6456 442 info->connected = BLKIF_STATE_CONNECTED;
cl349@4087 443 }
cl349@4087 444
cl349@6338 445 static void blkif_connect(struct blkfront_info *info, u16 evtchn)
cl349@4087 446 {
kaf24@6456 447 int err = 0;
cl349@4087 448
kaf24@6456 449 info->evtchn = evtchn;
cl349@4087 450
kaf24@6456 451 err = bind_evtchn_to_irqhandler(
kaf24@6456 452 info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
kaf24@6456 453 if (err != 0) {
kaf24@6456 454 WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
kaf24@6456 455 return;
kaf24@6456 456 }
cl349@4087 457 }
cl349@4087 458
cl349@4087 459
cl349@6259 460 static struct xenbus_device_id blkfront_ids[] = {
cl349@6259 461 { "vbd" },
cl349@6259 462 { "" }
cl349@6259 463 };
cl349@6259 464
cl349@6259 465 static void watch_for_status(struct xenbus_watch *watch, const char *node)
cl349@4087 466 {
cl349@6259 467 struct blkfront_info *info;
cl349@6259 468 unsigned int binfo;
cl349@6259 469 unsigned long sectors, sector_size;
cl349@6259 470 int err;
cl349@6259 471
cl349@6259 472 info = container_of(watch, struct blkfront_info, watch);
cl349@6259 473 node += strlen(watch->node);
cl349@6259 474
cl349@6259 475 /* FIXME: clean up when error on the other end. */
cl349@6341 476 if (info->connected == BLKIF_STATE_CONNECTED)
cl349@6259 477 return;
cl349@6259 478
cl349@6341 479 err = xenbus_gather(watch->node,
cl349@6259 480 "sectors", "%lu", &sectors,
cl349@6259 481 "info", "%u", &binfo,
cl349@6259 482 "sector-size", "%lu", &sector_size,
cl349@6259 483 NULL);
cl349@6283 484 if (err) {
cl349@6403 485 xenbus_dev_error(info->xbdev, err,
cl349@6403 486 "reading backend fields at %s", watch->node);
cl349@6283 487 return;
cl349@6283 488 }
cl349@6283 489
cl349@6341 490 xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
cl349@6341 491 info->connected = BLKIF_STATE_CONNECTED;
cl349@6259 492
cl349@6341 493 blkif_state = BLKIF_STATE_CONNECTED;
cl349@4087 494
cl349@6341 495 xenbus_dev_ok(info->xbdev);
cl349@4087 496
cl349@6283 497 /* Kick pending requests. */
cl349@6283 498 spin_lock_irq(&blkif_io_lock);
cl349@6341 499 kick_pending_request_queues(info);
cl349@6283 500 spin_unlock_irq(&blkif_io_lock);
cl349@6259 501 }
cl349@6259 502
cl349@6338 503 static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
cl349@6259 504 {
cl349@6259 505 blkif_sring_t *sring;
cl349@6259 506 evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
cl349@6259 507 int err;
cl349@6259 508
cl349@6616 509 info->ring_ref = GRANT_INVALID_REF;
cl349@6616 510
cl349@6259 511 sring = (void *)__get_free_page(GFP_KERNEL);
cl349@6259 512 if (!sring) {
cl349@6259 513 xenbus_dev_error(dev, -ENOMEM, "allocating shared ring");
cl349@6259 514 return -ENOMEM;
cl349@6259 515 }
cl349@6259 516 SHARED_RING_INIT(sring);
cl349@6338 517 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
cl349@6259 518
cl349@6341 519 err = gnttab_grant_foreign_access(info->backend_id,
cl349@6341 520 virt_to_mfn(info->ring.sring), 0);
cl349@6341 521 if (err == -ENOSPC) {
cl349@6341 522 free_page((unsigned long)info->ring.sring);
cl349@6341 523 info->ring.sring = 0;
cl349@6341 524 xenbus_dev_error(dev, err, "granting access to ring page");
cl349@6341 525 return err;
cl349@6341 526 }
cl349@6387 527 info->ring_ref = err;
cl349@6260 528
cl349@6338 529 op.u.alloc_unbound.dom = info->backend_id;
cl349@6259 530 err = HYPERVISOR_event_channel_op(&op);
cl349@6259 531 if (err) {
cl349@6387 532 gnttab_end_foreign_access(info->ring_ref, 0);
cl349@6616 533 info->ring_ref = GRANT_INVALID_REF;
cl349@6338 534 free_page((unsigned long)info->ring.sring);
cl349@6338 535 info->ring.sring = 0;
cl349@6259 536 xenbus_dev_error(dev, err, "allocating event channel");
cl349@6259 537 return err;
cl349@6259 538 }
cl349@6338 539 blkif_connect(info, op.u.alloc_unbound.port);
cl349@6259 540 return 0;
cl349@4087 541 }
cl349@4087 542
cl349@6259 543 /* Common code used when first setting up, and when resuming. */
cl349@6259 544 static int talk_to_backend(struct xenbus_device *dev,
cl349@6259 545 struct blkfront_info *info)
cl349@6259 546 {
cl349@6259 547 char *backend;
cl349@6259 548 const char *message;
cl349@6338 549 int err;
cl349@6259 550
cl349@6283 551 backend = NULL;
cl349@6283 552 err = xenbus_gather(dev->nodename,
cl349@6338 553 "backend-id", "%i", &info->backend_id,
cl349@6283 554 "backend", NULL, &backend,
cl349@6283 555 NULL);
cl349@6283 556 if (XENBUS_EXIST_ERR(err))
cl349@6283 557 goto out;
cl349@6283 558 if (backend && strlen(backend) == 0) {
cl349@6283 559 err = -ENOENT;
cl349@6283 560 goto out;
cl349@6283 561 }
cl349@6283 562 if (err < 0) {
cl349@6283 563 xenbus_dev_error(dev, err, "reading %s/backend or backend-id",
cl349@6259 564 dev->nodename);
cl349@6259 565 goto out;
cl349@6259 566 }
cl349@6259 567
cl349@6338 568 /* Create shared ring, alloc event channel. */
cl349@6338 569 err = setup_blkring(dev, info);
cl349@6372 570 if (err) {
cl349@6372 571 xenbus_dev_error(dev, err, "setting up block ring");
cl349@6338 572 goto out;
cl349@6372 573 }
cl349@6259 574
cl349@6259 575 err = xenbus_transaction_start(dev->nodename);
cl349@6259 576 if (err) {
cl349@6259 577 xenbus_dev_error(dev, err, "starting transaction");
cl349@6259 578 goto destroy_blkring;
cl349@6259 579 }
cl349@6259 580
cl349@6387 581 err = xenbus_printf(dev->nodename, "ring-ref","%u", info->ring_ref);
cl349@6260 582 if (err) {
cl349@6387 583 message = "writing ring-ref";
cl349@6260 584 goto abort_transaction;
cl349@6259 585 }
cl349@6259 586 err = xenbus_printf(dev->nodename,
cl349@6338 587 "event-channel", "%u", info->evtchn);
cl349@6259 588 if (err) {
cl349@6259 589 message = "writing event-channel";
cl349@6259 590 goto abort_transaction;
cl349@6259 591 }
cl349@6259 592
cl349@6283 593 info->backend = backend;
cl349@6283 594 backend = NULL;
cl349@6283 595
cl349@6283 596 info->watch.node = info->backend;
cl349@6259 597 info->watch.callback = watch_for_status;
cl349@6259 598 err = register_xenbus_watch(&info->watch);
cl349@6259 599 if (err) {
cl349@6259 600 message = "registering watch on backend";
cl349@6259 601 goto abort_transaction;
cl349@6259 602 }
cl349@6259 603
cl349@6259 604 err = xenbus_transaction_end(0);
cl349@6259 605 if (err) {
cl349@6259 606 xenbus_dev_error(dev, err, "completing transaction");
cl349@6259 607 goto destroy_blkring;
cl349@6259 608 }
cl349@6259 609
cl349@6283 610 out:
cl349@6283 611 if (backend)
cl349@6283 612 kfree(backend);
cl349@6283 613 return err;
cl349@6283 614
cl349@6283 615 abort_transaction:
cl349@6259 616 xenbus_transaction_end(1);
cl349@6259 617 /* Have to do this *outside* transaction. */
cl349@6259 618 xenbus_dev_error(dev, err, "%s", message);
cl349@6283 619 destroy_blkring:
cl349@6338 620 blkif_free(info);
cl349@6283 621 goto out;
cl349@6259 622 }
cl349@6259 623
cl349@6259 624 /* Setup supplies the backend dir, virtual device.
cl349@6259 625
cl349@6259 626 We place an event channel and shared frame entries.
cl349@6259 627 We watch backend to wait if it's ok. */
cl349@6259 628 static int blkfront_probe(struct xenbus_device *dev,
cl349@6259 629 const struct xenbus_device_id *id)
cl349@6259 630 {
kaf24@6459 631 int err, vdevice, i;
cl349@6259 632 struct blkfront_info *info;
cl349@6259 633
cl349@6259 634 /* FIXME: Use dynamic device id if this is not set. */
cl349@6259 635 err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice);
cl349@6283 636 if (XENBUS_EXIST_ERR(err))
cl349@6260 637 return err;
cl349@6259 638 if (err < 0) {
cl349@6259 639 xenbus_dev_error(dev, err, "reading virtual-device");
cl349@6259 640 return err;
cl349@6259 641 }
cl349@6259 642
cl349@6259 643 info = kmalloc(sizeof(*info), GFP_KERNEL);
cl349@6259 644 if (!info) {
cl349@6259 645 xenbus_dev_error(dev, err, "allocating info structure");
cl349@6259 646 return err;
cl349@6259 647 }
cl349@6341 648 info->xbdev = dev;
cl349@6259 649 info->vdevice = vdevice;
cl349@6341 650 info->connected = BLKIF_STATE_DISCONNECTED;
cl349@6341 651 info->mi = NULL;
cl349@6342 652 INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
cl349@6283 653
kaf24@6459 654 info->shadow_free = 0;
kaf24@6459 655 memset(info->shadow, 0, sizeof(info->shadow));
kaf24@6459 656 for (i = 0; i < BLK_RING_SIZE; i++)
kaf24@6459 657 info->shadow[i].req.id = i+1;
kaf24@6459 658 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
kaf24@6459 659
cl349@6259 660 /* Front end dir is a number, which is used as the id. */
cl349@6259 661 info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
cl349@6259 662 dev->data = info;
cl349@6259 663
cl349@6259 664 err = talk_to_backend(dev, info);
cl349@6259 665 if (err) {
cl349@6259 666 kfree(info);
cl349@6421 667 dev->data = NULL;
cl349@6259 668 return err;
cl349@6259 669 }
cl349@6259 670
cl349@6259 671 /* Call once in case entries already there. */
cl349@6259 672 watch_for_status(&info->watch, info->watch.node);
cl349@6259 673 return 0;
cl349@6259 674 }
cl349@6259 675
cl349@6259 676 static int blkfront_remove(struct xenbus_device *dev)
cl349@6259 677 {
cl349@6259 678 struct blkfront_info *info = dev->data;
cl349@6259 679
cl349@6259 680 if (info->backend)
cl349@6259 681 unregister_xenbus_watch(&info->watch);
cl349@6259 682
cl349@6341 683 if (info->mi)
cl349@6341 684 xlvbd_del(info);
cl349@6338 685
cl349@6338 686 blkif_free(info);
cl349@6338 687
cl349@6259 688 kfree(info->backend);
cl349@6259 689 kfree(info);
cl349@6259 690
cl349@6259 691 return 0;
cl349@6259 692 }
cl349@6259 693
cl349@6259 694 static int blkfront_suspend(struct xenbus_device *dev)
cl349@6259 695 {
cl349@6259 696 struct blkfront_info *info = dev->data;
cl349@6259 697
cl349@6259 698 unregister_xenbus_watch(&info->watch);
cl349@6259 699 kfree(info->backend);
cl349@6259 700 info->backend = NULL;
cl349@6259 701
cl349@6338 702 recovery = 1;
cl349@6338 703 blkif_free(info);
cl349@6259 704
cl349@6259 705 return 0;
cl349@6259 706 }
cl349@6259 707
cl349@6259 708 static int blkfront_resume(struct xenbus_device *dev)
cl349@6259 709 {
cl349@6259 710 struct blkfront_info *info = dev->data;
cl349@6259 711 int err;
cl349@6259 712
cl349@6259 713 /* FIXME: Check geometry hasn't changed here... */
cl349@6259 714 err = talk_to_backend(dev, info);
cl349@6259 715 if (!err) {
cl349@6338 716 blkif_recover(info);
cl349@6259 717 }
cl349@6259 718 return err;
cl349@6259 719 }
cl349@6259 720
cl349@6259 721 static struct xenbus_driver blkfront = {
cl349@6261 722 .name = "vbd",
cl349@6259 723 .owner = THIS_MODULE,
cl349@6259 724 .ids = blkfront_ids,
cl349@6259 725 .probe = blkfront_probe,
cl349@6259 726 .remove = blkfront_remove,
cl349@6259 727 .resume = blkfront_resume,
cl349@6259 728 .suspend = blkfront_suspend,
cl349@6259 729 };
cl349@6259 730
cl349@6259 731 static void __init init_blk_xenbus(void)
cl349@6259 732 {
cl349@6259 733 xenbus_register_device(&blkfront);
cl349@6259 734 }
cl349@6259 735
cl349@6259 736 static int wait_for_blkif(void)
cl349@4087 737 {
kaf24@6456 738 int err = 0;
kaf24@6456 739 int i;
cl349@4087 740
kaf24@6456 741 /*
kaf24@6456 742 * We should figure out how many and which devices we need to
kaf24@6456 743 * proceed and only wait for those. For now, continue once the
kaf24@6456 744 * first device is around.
kaf24@6456 745 */
kaf24@6456 746 for (i = 0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++) {
kaf24@6456 747 set_current_state(TASK_INTERRUPTIBLE);
kaf24@6456 748 schedule_timeout(1);
kaf24@6456 749 }
cl349@4087 750
kaf24@6456 751 if (blkif_state != BLKIF_STATE_CONNECTED) {
kaf24@6456 752 WPRINTK("Timeout connecting to device!\n");
kaf24@6456 753 err = -ENOSYS;
kaf24@6456 754 }
kaf24@6456 755 return err;
cl349@4087 756 }
cl349@4087 757
cl349@6259 758 static int __init xlblk_init(void)
cl349@4087 759 {
cl349@6618 760 if ((xen_start_info->flags & SIF_INITDOMAIN) ||
cl349@6618 761 (xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
kaf24@6456 762 return 0;
cl349@4087 763
kaf24@6456 764 IPRINTK("Initialising virtual block device driver\n");
cl349@4087 765
kaf24@6456 766 init_blk_xenbus();
kaf24@6456 767
kaf24@6456 768 wait_for_blkif();
cl349@4087 769
kaf24@6456 770 return 0;
kaf24@6456 771 }
cl349@4087 772
kaf24@6456 773 module_init(xlblk_init);
cl349@4087 774
kaf24@4611 775 static void blkif_completion(struct blk_shadow *s)
cl349@4087 776 {
kaf24@6456 777 int i;
kaf24@6456 778 for (i = 0; i < s->req.nr_segments; i++)
kaf24@6695 779 gnttab_end_foreign_access(
kaf24@6695 780 blkif_gref_from_fas(s->req.frame_and_sects[i]), 0);
cl349@4087 781 }
kaf24@6456 782
kaf24@6456 783 /*
kaf24@6456 784 * Local variables:
kaf24@6456 785 * c-file-style: "linux"
kaf24@6456 786 * indent-tabs-mode: t
kaf24@6456 787 * c-indent-level: 8
kaf24@6456 788 * c-basic-offset: 8
kaf24@6456 789 * tab-width: 8
kaf24@6456 790 * End:
kaf24@6456 791 */