debuggers.hg

annotate linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c @ 3647:d49c0626928e

bitkeeper revision 1.1159.212.69 (42009c14_wjFIPRo2s6br4oGS3Ln-w)

Merge labyrinth.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-2.0-testing.bk
into labyrinth.cl.cam.ac.uk:/auto/groups/xeno-xenod/BK/xen-unstable.bk
author iap10@labyrinth.cl.cam.ac.uk
date Wed Feb 02 09:23:32 2005 +0000 (2005-02-02)
parents 2c56c6b39a48 17e50d7ed675
children bbe8541361dd
rev   line source
cl349@3368 1 /******************************************************************************
cl349@3368 2 * blkfront.c
cl349@3368 3 *
cl349@3368 4 * XenLinux virtual block-device driver.
cl349@3368 5 *
cl349@3368 6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
cl349@3368 7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
cl349@3368 8 * Copyright (c) 2004, Christian Limpach
kaf24@3387 9 * Copyright (c) 2004, Andrew Warfield
cl349@3368 10 *
cl349@3368 11 * This file may be distributed separately from the Linux kernel, or
cl349@3368 12 * incorporated into other software packages, subject to the following license:
cl349@3368 13 *
cl349@3368 14 * Permission is hereby granted, free of charge, to any person obtaining a copy
cl349@3368 15 * of this source file (the "Software"), to deal in the Software without
cl349@3368 16 * restriction, including without limitation the rights to use, copy, modify,
cl349@3368 17 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
cl349@3368 18 * and to permit persons to whom the Software is furnished to do so, subject to
cl349@3368 19 * the following conditions:
cl349@3368 20 *
cl349@3368 21 * The above copyright notice and this permission notice shall be included in
cl349@3368 22 * all copies or substantial portions of the Software.
cl349@3368 23 *
cl349@3368 24 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
cl349@3368 25 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
cl349@3368 26 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
cl349@3368 27 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
cl349@3368 28 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
cl349@3368 29 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
cl349@3368 30 * IN THE SOFTWARE.
cl349@3368 31 */
cl349@3368 32
cl349@3368 33 #include <linux/version.h>
cl349@3368 34
cl349@3368 35 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
cl349@3368 36 #include "block.h"
cl349@3368 37 #else
cl349@3368 38 #include "common.h"
cl349@3368 39 #include <linux/blk.h>
cl349@3368 40 #include <linux/tqueue.h>
cl349@3368 41 #endif
cl349@3368 42
cl349@3368 43 #include <linux/cdrom.h>
cl349@3368 44 #include <linux/sched.h>
cl349@3368 45 #include <linux/interrupt.h>
cl349@3368 46 #include <scsi/scsi.h>
cl349@3368 47 #include <asm-xen/ctrl_if.h>
cl349@3369 48 #include <asm-xen/evtchn.h>
cl349@3368 49
cl349@3368 50 typedef unsigned char byte; /* from linux/ide.h */
cl349@3368 51
cl349@3368 52 /* Control whether runtime update of vbds is enabled. */
cl349@3368 53 #define ENABLE_VBD_UPDATE 1
cl349@3368 54
cl349@3368 55 #if ENABLE_VBD_UPDATE
cl349@3368 56 static void vbd_update(void);
cl349@3368 57 #else
cl349@3368 58 static void vbd_update(void){};
cl349@3368 59 #endif
cl349@3368 60
cl349@3368 61 #define BLKIF_STATE_CLOSED 0
cl349@3368 62 #define BLKIF_STATE_DISCONNECTED 1
cl349@3368 63 #define BLKIF_STATE_CONNECTED 2
cl349@3368 64
cl349@3368 65 #define WPRINTK(fmt, args...) printk(KERN_WARNING "xen_blk: " fmt, ##args)
cl349@3368 66
cl349@3368 67 static int blkif_handle = 0;
cl349@3368 68 static unsigned int blkif_state = BLKIF_STATE_CLOSED;
cl349@3368 69 static unsigned int blkif_evtchn = 0;
cl349@3368 70 static unsigned int blkif_irq = 0;
cl349@3368 71
cl349@3368 72 static int blkif_control_rsp_valid;
cl349@3368 73 static blkif_response_t blkif_control_rsp;
cl349@3368 74
kaf24@3387 75 static blkif_front_ring_t blk_ring;
cl349@3368 76
cl349@3368 77 unsigned long rec_ring_free;
kaf24@3387 78 blkif_request_t rec_ring[RING_SIZE(BLKIF_RING, &blk_ring)];
cl349@3368 79
cl349@3368 80 static int recovery = 0; /* "Recovery in progress" flag. Protected
cl349@3368 81 * by the blkif_io_lock */
cl349@3368 82
cl349@3368 83 static void kick_pending_request_queues(void);
cl349@3368 84
cl349@3368 85 int __init xlblk_init(void);
cl349@3368 86
cl349@3368 87 void blkif_completion( blkif_request_t *req );
cl349@3368 88
cl349@3368 89 static inline int GET_ID_FROM_FREELIST( void )
cl349@3368 90 {
cl349@3368 91 unsigned long free = rec_ring_free;
cl349@3368 92
kaf24@3387 93 if ( free > RING_SIZE(BLKIF_RING, &blk_ring) )
cl349@3368 94 BUG();
cl349@3368 95
cl349@3368 96 rec_ring_free = rec_ring[free].id;
cl349@3368 97
cl349@3368 98 rec_ring[free].id = 0x0fffffee; /* debug */
cl349@3368 99
cl349@3368 100 return free;
cl349@3368 101 }
cl349@3368 102
cl349@3368 103 static inline void ADD_ID_TO_FREELIST( unsigned long id )
cl349@3368 104 {
cl349@3368 105 rec_ring[id].id = rec_ring_free;
cl349@3368 106 rec_ring_free = id;
cl349@3368 107 }
cl349@3368 108
cl349@3368 109
cl349@3368 110 /************************ COMMON CODE (inlined) ************************/
cl349@3368 111
cl349@3368 112 /* Kernel-specific definitions used in the common code */
cl349@3368 113 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
cl349@3368 114 #define DISABLE_SCATTERGATHER()
cl349@3368 115 #else
cl349@3368 116 static int sg_operation = -1;
cl349@3368 117 #define DISABLE_SCATTERGATHER() (sg_operation = -1)
cl349@3368 118 #endif
cl349@3368 119
cl349@3368 120 static inline void translate_req_to_pfn(blkif_request_t *xreq,
cl349@3368 121 blkif_request_t *req)
cl349@3368 122 {
cl349@3368 123 int i;
cl349@3368 124
cl349@3368 125 xreq->operation = req->operation;
cl349@3368 126 xreq->nr_segments = req->nr_segments;
cl349@3368 127 xreq->device = req->device;
cl349@3368 128 /* preserve id */
cl349@3368 129 xreq->sector_number = req->sector_number;
cl349@3368 130
cl349@3368 131 for ( i = 0; i < req->nr_segments; i++ )
cl349@3368 132 xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
cl349@3368 133 }
cl349@3368 134
cl349@3368 135 static inline void translate_req_to_mfn(blkif_request_t *xreq,
cl349@3368 136 blkif_request_t *req)
cl349@3368 137 {
cl349@3368 138 int i;
cl349@3368 139
cl349@3368 140 xreq->operation = req->operation;
cl349@3368 141 xreq->nr_segments = req->nr_segments;
cl349@3368 142 xreq->device = req->device;
cl349@3368 143 xreq->id = req->id; /* copy id (unlike above) */
cl349@3368 144 xreq->sector_number = req->sector_number;
cl349@3368 145
cl349@3368 146 for ( i = 0; i < req->nr_segments; i++ )
cl349@3368 147 xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
cl349@3368 148 }
cl349@3368 149
cl349@3368 150
cl349@3368 151 static inline void flush_requests(void)
cl349@3368 152 {
cl349@3368 153 DISABLE_SCATTERGATHER();
kaf24@3387 154 RING_PUSH_REQUESTS(BLKIF_RING, &blk_ring);
cl349@3368 155 notify_via_evtchn(blkif_evtchn);
cl349@3368 156 }
cl349@3368 157
cl349@3368 158
cl349@3368 159
cl349@3368 160
cl349@3368 161 /************************** KERNEL VERSION 2.6 **************************/
cl349@3368 162
cl349@3368 163 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
cl349@3368 164
iap10@3484 165 module_init(xlblk_init);
cl349@3368 166
cl349@3368 167 #if ENABLE_VBD_UPDATE
cl349@3368 168 static void vbd_update(void)
cl349@3368 169 {
cl349@3368 170 }
cl349@3368 171 #endif /* ENABLE_VBD_UPDATE */
cl349@3368 172
cl349@3368 173 static void kick_pending_request_queues(void)
cl349@3368 174 {
cl349@3368 175
cl349@3368 176 if ( (xlbd_blk_queue != NULL) &&
cl349@3368 177 test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) )
cl349@3368 178 {
cl349@3368 179 blk_start_queue(xlbd_blk_queue);
cl349@3368 180 /* XXXcl call to request_fn should not be needed but
cl349@3368 181 * we get stuck without... needs investigating
cl349@3368 182 */
cl349@3368 183 xlbd_blk_queue->request_fn(xlbd_blk_queue);
cl349@3368 184 }
cl349@3368 185
cl349@3368 186 }
cl349@3368 187
cl349@3368 188
cl349@3368 189 int blkif_open(struct inode *inode, struct file *filep)
cl349@3368 190 {
cl349@3368 191 struct gendisk *gd = inode->i_bdev->bd_disk;
cl349@3368 192 struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
cl349@3368 193
cl349@3368 194 /* Update of usage count is protected by per-device semaphore. */
cl349@3368 195 di->mi->usage++;
cl349@3368 196
cl349@3368 197 return 0;
cl349@3368 198 }
cl349@3368 199
cl349@3368 200
cl349@3368 201 int blkif_release(struct inode *inode, struct file *filep)
cl349@3368 202 {
cl349@3368 203 struct gendisk *gd = inode->i_bdev->bd_disk;
cl349@3368 204 struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
cl349@3368 205
cl349@3368 206 /*
cl349@3368 207 * When usage drops to zero it may allow more VBD updates to occur.
cl349@3368 208 * Update of usage count is protected by a per-device semaphore.
cl349@3368 209 */
cl349@3368 210 if (--di->mi->usage == 0) {
cl349@3368 211 vbd_update();
cl349@3368 212 }
cl349@3368 213
cl349@3368 214 return 0;
cl349@3368 215 }
cl349@3368 216
cl349@3368 217
cl349@3368 218 int blkif_ioctl(struct inode *inode, struct file *filep,
cl349@3368 219 unsigned command, unsigned long argument)
cl349@3368 220 {
iap10@3646 221 int i;
cl349@3368 222 /* struct gendisk *gd = inode->i_bdev->bd_disk; */
cl349@3368 223
cl349@3368 224 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
cl349@3368 225 command, (long)argument, inode->i_rdev);
cl349@3368 226
cl349@3368 227 switch (command) {
cl349@3368 228
cl349@3368 229 case HDIO_GETGEO:
cl349@3368 230 /* return ENOSYS to use defaults */
cl349@3368 231 return -ENOSYS;
cl349@3368 232
iap10@3646 233 case CDROMMULTISESSION:
iap10@3646 234 DPRINTK("FIXME: support multisession CDs later\n");
iap10@3646 235 for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
iap10@3646 236 if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
iap10@3646 237 return 0;
iap10@3646 238
cl349@3368 239 default:
cl349@3368 240 printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
cl349@3368 241 command);
cl349@3368 242 return -ENOSYS;
cl349@3368 243 }
cl349@3368 244
cl349@3368 245 return 0;
cl349@3368 246 }
cl349@3368 247
cl349@3368 248 #if 0
cl349@3368 249 /* check media change: should probably do something here in some cases :-) */
cl349@3368 250 int blkif_check(kdev_t dev)
cl349@3368 251 {
cl349@3368 252 DPRINTK("blkif_check\n");
cl349@3368 253 return 0;
cl349@3368 254 }
cl349@3368 255
cl349@3368 256 int blkif_revalidate(kdev_t dev)
cl349@3368 257 {
cl349@3368 258 struct block_device *bd;
cl349@3368 259 struct gendisk *gd;
cl349@3368 260 xen_block_t *disk;
cl349@3368 261 unsigned long capacity;
cl349@3368 262 int i, rc = 0;
cl349@3368 263
cl349@3368 264 if ( (bd = bdget(dev)) == NULL )
cl349@3368 265 return -EINVAL;
cl349@3368 266
cl349@3368 267 /*
cl349@3368 268 * Update of partition info, and check of usage count, is protected
cl349@3368 269 * by the per-block-device semaphore.
cl349@3368 270 */
cl349@3368 271 down(&bd->bd_sem);
cl349@3368 272
cl349@3368 273 if ( ((gd = get_gendisk(dev)) == NULL) ||
cl349@3368 274 ((disk = xldev_to_xldisk(dev)) == NULL) ||
cl349@3368 275 ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
cl349@3368 276 {
cl349@3368 277 rc = -EINVAL;
cl349@3368 278 goto out;
cl349@3368 279 }
cl349@3368 280
cl349@3368 281 if ( disk->usage > 1 )
cl349@3368 282 {
cl349@3368 283 rc = -EBUSY;
cl349@3368 284 goto out;
cl349@3368 285 }
cl349@3368 286
cl349@3368 287 /* Only reread partition table if VBDs aren't mapped to partitions. */
cl349@3368 288 if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
cl349@3368 289 {
cl349@3368 290 for ( i = gd->max_p - 1; i >= 0; i-- )
cl349@3368 291 {
cl349@3368 292 invalidate_device(dev+i, 1);
cl349@3368 293 gd->part[MINOR(dev+i)].start_sect = 0;
cl349@3368 294 gd->part[MINOR(dev+i)].nr_sects = 0;
cl349@3368 295 gd->sizes[MINOR(dev+i)] = 0;
cl349@3368 296 }
cl349@3368 297
cl349@3368 298 grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
cl349@3368 299 }
cl349@3368 300
cl349@3368 301 out:
cl349@3368 302 up(&bd->bd_sem);
cl349@3368 303 bdput(bd);
cl349@3368 304 return rc;
cl349@3368 305 }
cl349@3368 306 #endif
cl349@3368 307
cl349@3368 308 /*
cl349@3368 309 * blkif_queue_request
cl349@3368 310 *
cl349@3368 311 * request block io
cl349@3368 312 *
cl349@3368 313 * id: for guest use only.
cl349@3368 314 * operation: BLKIF_OP_{READ,WRITE,PROBE}
cl349@3368 315 * buffer: buffer to read/write into. this should be a
cl349@3368 316 * virtual address in the guest os.
cl349@3368 317 */
cl349@3368 318 static int blkif_queue_request(struct request *req)
cl349@3368 319 {
cl349@3368 320 struct xlbd_disk_info *di =
cl349@3368 321 (struct xlbd_disk_info *)req->rq_disk->private_data;
cl349@3368 322 unsigned long buffer_ma;
cl349@3368 323 blkif_request_t *ring_req;
cl349@3368 324 struct bio *bio;
cl349@3368 325 struct bio_vec *bvec;
kaf24@3446 326 int idx;
cl349@3368 327 unsigned long id;
cl349@3368 328 unsigned int fsect, lsect;
cl349@3368 329
kaf24@3446 330 if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
cl349@3368 331 return 1;
cl349@3368 332
cl349@3368 333 /* Fill out a communications ring structure. */
kaf24@3387 334 ring_req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, blk_ring.req_prod_pvt);
cl349@3368 335 id = GET_ID_FROM_FREELIST();
cl349@3368 336 rec_ring[id].id = (unsigned long) req;
cl349@3368 337
cl349@3368 338 ring_req->id = id;
cl349@3368 339 ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
cl349@3368 340 BLKIF_OP_READ;
cl349@3368 341 ring_req->sector_number = (blkif_sector_t)req->sector;
cl349@3368 342 ring_req->device = di->xd_device;
cl349@3368 343
cl349@3368 344 ring_req->nr_segments = 0;
kaf24@3446 345 rq_for_each_bio(bio, req)
kaf24@3446 346 {
kaf24@3446 347 bio_for_each_segment(bvec, bio, idx)
kaf24@3446 348 {
kaf24@3446 349 if ( ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST )
kaf24@3446 350 BUG();
cl349@3368 351 buffer_ma = page_to_phys(bvec->bv_page);
cl349@3368 352 fsect = bvec->bv_offset >> 9;
cl349@3368 353 lsect = fsect + (bvec->bv_len >> 9) - 1;
cl349@3368 354 ring_req->frame_and_sects[ring_req->nr_segments++] =
cl349@3368 355 buffer_ma | (fsect << 3) | lsect;
cl349@3368 356 }
cl349@3368 357 }
cl349@3368 358
kaf24@3387 359 blk_ring.req_prod_pvt++;
kaf24@3387 360
cl349@3368 361 /* Keep a private copy so we can reissue requests when recovering. */
kaf24@3446 362 translate_req_to_pfn(&rec_ring[id], ring_req);
cl349@3368 363
cl349@3368 364 return 0;
cl349@3368 365 }
cl349@3368 366
cl349@3368 367
cl349@3368 368 /*
cl349@3368 369 * do_blkif_request
cl349@3368 370 * read a block; request is in a request queue
cl349@3368 371 */
cl349@3368 372 void do_blkif_request(request_queue_t *rq)
cl349@3368 373 {
cl349@3368 374 struct request *req;
cl349@3368 375 int queued;
cl349@3368 376
cl349@3368 377 DPRINTK("Entered do_blkif_request\n");
cl349@3368 378
cl349@3368 379 queued = 0;
cl349@3368 380
cl349@3368 381 while ((req = elv_next_request(rq)) != NULL) {
cl349@3368 382 if (!blk_fs_request(req)) {
cl349@3368 383 end_request(req, 0);
cl349@3368 384 continue;
cl349@3368 385 }
cl349@3368 386
kaf24@3387 387 if ( RING_FULL(BLKIF_RING, &blk_ring) )
cl349@3368 388 {
cl349@3368 389 blk_stop_queue(rq);
cl349@3368 390 break;
cl349@3368 391 }
cl349@3368 392 DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
cl349@3368 393 req, req->cmd, req->sector, req->current_nr_sectors,
cl349@3368 394 req->nr_sectors, req->buffer,
cl349@3368 395 rq_data_dir(req) ? "write" : "read");
cl349@3368 396 blkdev_dequeue_request(req);
cl349@3368 397 if (blkif_queue_request(req)) {
cl349@3368 398 blk_stop_queue(rq);
cl349@3368 399 break;
cl349@3368 400 }
cl349@3368 401 queued++;
cl349@3368 402 }
cl349@3368 403
cl349@3368 404 if (queued != 0)
cl349@3368 405 flush_requests();
cl349@3368 406 }
cl349@3368 407
cl349@3368 408
cl349@3368 409 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
cl349@3368 410 {
cl349@3368 411 struct request *req;
cl349@3368 412 blkif_response_t *bret;
kaf24@3387 413 RING_IDX i, rp;
cl349@3368 414 unsigned long flags;
kaf24@3387 415
cl349@3368 416 spin_lock_irqsave(&blkif_io_lock, flags);
cl349@3368 417
cl349@3368 418 if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
cl349@3368 419 unlikely(recovery) )
cl349@3368 420 {
cl349@3368 421 spin_unlock_irqrestore(&blkif_io_lock, flags);
cl349@3368 422 return IRQ_HANDLED;
cl349@3368 423 }
kaf24@3387 424
smh22@3398 425 rp = blk_ring.sring->rsp_prod;
cl349@3368 426 rmb(); /* Ensure we see queued responses up to 'rp'. */
cl349@3368 427
kaf24@3387 428 for ( i = blk_ring.rsp_cons; i != rp; i++ )
cl349@3368 429 {
kaf24@3446 430 unsigned long id;
cl349@3368 431
kaf24@3387 432 bret = RING_GET_RESPONSE(BLKIF_RING, &blk_ring, i);
kaf24@3446 433 id = bret->id;
kaf24@3446 434 req = (struct request *)rec_ring[id].id;
kaf24@3446 435 blkif_completion( &rec_ring[id] );
cl349@3368 436
kaf24@3446 437 ADD_ID_TO_FREELIST(id); /* overwrites req */
cl349@3368 438
cl349@3368 439 switch ( bret->operation )
cl349@3368 440 {
cl349@3368 441 case BLKIF_OP_READ:
cl349@3368 442 case BLKIF_OP_WRITE:
cl349@3368 443 if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
cl349@3368 444 DPRINTK("Bad return from blkdev data request: %x\n",
cl349@3368 445 bret->status);
kaf24@3446 446
cl349@3368 447 if ( unlikely(end_that_request_first
cl349@3368 448 (req,
cl349@3368 449 (bret->status == BLKIF_RSP_OKAY),
cl349@3368 450 req->hard_nr_sectors)) )
cl349@3368 451 BUG();
cl349@3368 452 end_that_request_last(req);
cl349@3368 453
cl349@3368 454 break;
cl349@3368 455 case BLKIF_OP_PROBE:
cl349@3368 456 memcpy(&blkif_control_rsp, bret, sizeof(*bret));
cl349@3368 457 blkif_control_rsp_valid = 1;
cl349@3368 458 break;
cl349@3368 459 default:
cl349@3368 460 BUG();
cl349@3368 461 }
cl349@3368 462 }
kaf24@3387 463
kaf24@3387 464 blk_ring.rsp_cons = i;
cl349@3368 465
cl349@3368 466 kick_pending_request_queues();
cl349@3368 467
cl349@3368 468 spin_unlock_irqrestore(&blkif_io_lock, flags);
cl349@3368 469
cl349@3368 470 return IRQ_HANDLED;
cl349@3368 471 }
cl349@3368 472
cl349@3368 473 #else
cl349@3368 474 /************************** KERNEL VERSION 2.4 **************************/
cl349@3368 475
cl349@3368 476 static kdev_t sg_dev;
cl349@3368 477 static unsigned long sg_next_sect;
cl349@3368 478
cl349@3368 479 /*
cl349@3368 480 * Request queues with outstanding work, but ring is currently full.
cl349@3368 481 * We need no special lock here, as we always access this with the
cl349@3368 482 * blkif_io_lock held. We only need a small maximum list.
cl349@3368 483 */
cl349@3368 484 #define MAX_PENDING 8
cl349@3368 485 static request_queue_t *pending_queues[MAX_PENDING];
cl349@3368 486 static int nr_pending;
cl349@3368 487
cl349@3368 488
cl349@3368 489 #define blkif_io_lock io_request_lock
cl349@3368 490
cl349@3368 491 /*============================================================================*/
cl349@3368 492 #if ENABLE_VBD_UPDATE
cl349@3368 493
cl349@3368 494 /*
cl349@3368 495 * blkif_update_int/update-vbds_task - handle VBD update events.
cl349@3368 496 * Schedule a task for keventd to run, which will update the VBDs and perform
cl349@3368 497 * the corresponding updates to our view of VBD state.
cl349@3368 498 */
cl349@3368 499 static void update_vbds_task(void *unused)
cl349@3368 500 {
cl349@3368 501 xlvbd_update_vbds();
cl349@3368 502 }
cl349@3368 503
cl349@3368 504 static void vbd_update(void)
cl349@3368 505 {
cl349@3368 506 static struct tq_struct update_tq;
cl349@3368 507 update_tq.routine = update_vbds_task;
cl349@3368 508 schedule_task(&update_tq);
cl349@3368 509 }
cl349@3368 510
cl349@3368 511 #endif /* ENABLE_VBD_UPDATE */
cl349@3368 512 /*============================================================================*/
cl349@3368 513
cl349@3368 514 static void kick_pending_request_queues(void)
cl349@3368 515 {
cl349@3368 516 /* We kick pending request queues if the ring is reasonably empty. */
cl349@3368 517 if ( (nr_pending != 0) &&
kaf24@3387 518 (RING_PENDING_REQUESTS(BLKIF_RING, &blk_ring) <
kaf24@3448 519 (RING_SIZE(BLKIF_RING, &blk_ring) >> 1)) )
cl349@3368 520 {
cl349@3368 521 /* Attempt to drain the queue, but bail if the ring becomes full. */
kaf24@3387 522 while ( (nr_pending != 0) && !RING_FULL(BLKIF_RING, &blk_ring) )
cl349@3368 523 do_blkif_request(pending_queues[--nr_pending]);
cl349@3368 524 }
cl349@3368 525 }
cl349@3368 526
cl349@3368 527 int blkif_open(struct inode *inode, struct file *filep)
cl349@3368 528 {
cl349@3368 529 short xldev = inode->i_rdev;
cl349@3368 530 struct gendisk *gd = get_gendisk(xldev);
cl349@3368 531 xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
cl349@3368 532 short minor = MINOR(xldev);
cl349@3368 533
cl349@3368 534 if ( gd->part[minor].nr_sects == 0 )
cl349@3368 535 {
cl349@3368 536 /*
cl349@3368 537 * Device either doesn't exist, or has zero capacity; we use a few
cl349@3368 538 * cheesy heuristics to return the relevant error code
cl349@3368 539 */
cl349@3368 540 if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
cl349@3368 541 ((minor & (gd->max_p - 1)) != 0) )
cl349@3368 542 {
cl349@3368 543 /*
cl349@3368 544 * We have a real device, but no such partition, or we just have a
cl349@3368 545 * partition number so guess this is the problem.
cl349@3368 546 */
cl349@3368 547 return -ENXIO; /* no such device or address */
cl349@3368 548 }
cl349@3368 549 else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
cl349@3368 550 {
cl349@3368 551 /* This is a removable device => assume that media is missing. */
cl349@3368 552 return -ENOMEDIUM; /* media not present (this is a guess) */
cl349@3368 553 }
cl349@3368 554 else
cl349@3368 555 {
cl349@3368 556 /* Just go for the general 'no such device' error. */
cl349@3368 557 return -ENODEV; /* no such device */
cl349@3368 558 }
cl349@3368 559 }
cl349@3368 560
cl349@3368 561 /* Update of usage count is protected by per-device semaphore. */
cl349@3368 562 disk->usage++;
cl349@3368 563
cl349@3368 564 return 0;
cl349@3368 565 }
cl349@3368 566
cl349@3368 567
cl349@3368 568 int blkif_release(struct inode *inode, struct file *filep)
cl349@3368 569 {
cl349@3368 570 xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
cl349@3368 571
cl349@3368 572 /*
cl349@3368 573 * When usage drops to zero it may allow more VBD updates to occur.
cl349@3368 574 * Update of usage count is protected by a per-device semaphore.
cl349@3368 575 */
cl349@3368 576 if ( --disk->usage == 0 ) {
cl349@3368 577 vbd_update();
cl349@3368 578 }
cl349@3368 579
cl349@3368 580 return 0;
cl349@3368 581 }
cl349@3368 582
cl349@3368 583
cl349@3368 584 int blkif_ioctl(struct inode *inode, struct file *filep,
cl349@3368 585 unsigned command, unsigned long argument)
cl349@3368 586 {
cl349@3368 587 kdev_t dev = inode->i_rdev;
cl349@3368 588 struct hd_geometry *geo = (struct hd_geometry *)argument;
cl349@3368 589 struct gendisk *gd;
cl349@3368 590 struct hd_struct *part;
cl349@3368 591 int i;
cl349@3368 592 unsigned short cylinders;
cl349@3368 593 byte heads, sectors;
cl349@3368 594
cl349@3368 595 /* NB. No need to check permissions. That is done for us. */
cl349@3368 596
cl349@3368 597 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
cl349@3368 598 command, (long) argument, dev);
cl349@3368 599
cl349@3368 600 gd = get_gendisk(dev);
cl349@3368 601 part = &gd->part[MINOR(dev)];
cl349@3368 602
cl349@3368 603 switch ( command )
cl349@3368 604 {
cl349@3368 605 case BLKGETSIZE:
cl349@3368 606 DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
cl349@3368 607 return put_user(part->nr_sects, (unsigned long *) argument);
cl349@3368 608
cl349@3368 609 case BLKGETSIZE64:
cl349@3368 610 DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
cl349@3368 611 (u64)part->nr_sects * 512);
cl349@3368 612 return put_user((u64)part->nr_sects * 512, (u64 *) argument);
cl349@3368 613
cl349@3368 614 case BLKRRPART: /* re-read partition table */
cl349@3368 615 DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
cl349@3368 616 return blkif_revalidate(dev);
cl349@3368 617
cl349@3368 618 case BLKSSZGET:
cl349@3368 619 return hardsect_size[MAJOR(dev)][MINOR(dev)];
cl349@3368 620
cl349@3368 621 case BLKBSZGET: /* get block size */
cl349@3368 622 DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
cl349@3368 623 break;
cl349@3368 624
cl349@3368 625 case BLKBSZSET: /* set block size */
cl349@3368 626 DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET);
cl349@3368 627 break;
cl349@3368 628
cl349@3368 629 case BLKRASET: /* set read-ahead */
cl349@3368 630 DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET);
cl349@3368 631 break;
cl349@3368 632
cl349@3368 633 case BLKRAGET: /* get read-ahead */
cl349@3368 634 DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET);
cl349@3368 635 break;
cl349@3368 636
cl349@3368 637 case HDIO_GETGEO:
cl349@3368 638 DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO);
cl349@3368 639 if (!argument) return -EINVAL;
cl349@3368 640
cl349@3368 641 /* We don't have real geometry info, but let's at least return
kaf24@3446 642 values consistent with the size of the device */
cl349@3368 643
cl349@3368 644 heads = 0xff;
cl349@3368 645 sectors = 0x3f;
cl349@3368 646 cylinders = part->nr_sects / (heads * sectors);
cl349@3368 647
cl349@3368 648 if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
cl349@3368 649 if (put_user(heads, (byte *)&geo->heads)) return -EFAULT;
cl349@3368 650 if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT;
cl349@3368 651 if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return -EFAULT;
cl349@3368 652
cl349@3368 653 return 0;
cl349@3368 654
cl349@3368 655 case HDIO_GETGEO_BIG:
cl349@3368 656 DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
cl349@3368 657 if (!argument) return -EINVAL;
cl349@3368 658
cl349@3368 659 /* We don't have real geometry info, but let's at least return
kaf24@3446 660 values consistent with the size of the device */
cl349@3368 661
cl349@3368 662 heads = 0xff;
cl349@3368 663 sectors = 0x3f;
cl349@3368 664 cylinders = part->nr_sects / (heads * sectors);
cl349@3368 665
cl349@3368 666 if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
cl349@3368 667 if (put_user(heads, (byte *)&geo->heads)) return -EFAULT;
cl349@3368 668 if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT;
cl349@3368 669 if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return -EFAULT;
cl349@3368 670
cl349@3368 671 return 0;
cl349@3368 672
cl349@3368 673 case CDROMMULTISESSION:
cl349@3368 674 DPRINTK("FIXME: support multisession CDs later\n");
cl349@3368 675 for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
cl349@3368 676 if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
cl349@3368 677 return 0;
cl349@3368 678
cl349@3368 679 case SCSI_IOCTL_GET_BUS_NUMBER:
cl349@3368 680 DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif");
cl349@3368 681 return -ENOSYS;
cl349@3368 682
cl349@3368 683 default:
cl349@3368 684 printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command);
cl349@3368 685 return -ENOSYS;
cl349@3368 686 }
cl349@3368 687
cl349@3368 688 return 0;
cl349@3368 689 }
cl349@3368 690
cl349@3368 691
cl349@3368 692
cl349@3368 693 /* check media change: should probably do something here in some cases :-) */
cl349@3368 694 int blkif_check(kdev_t dev)
cl349@3368 695 {
cl349@3368 696 DPRINTK("blkif_check\n");
cl349@3368 697 return 0;
cl349@3368 698 }
cl349@3368 699
cl349@3368 700 int blkif_revalidate(kdev_t dev)
cl349@3368 701 {
cl349@3368 702 struct block_device *bd;
cl349@3368 703 struct gendisk *gd;
cl349@3368 704 xl_disk_t *disk;
cl349@3368 705 unsigned long capacity;
cl349@3368 706 int i, rc = 0;
cl349@3368 707
cl349@3368 708 if ( (bd = bdget(dev)) == NULL )
cl349@3368 709 return -EINVAL;
cl349@3368 710
cl349@3368 711 /*
cl349@3368 712 * Update of partition info, and check of usage count, is protected
cl349@3368 713 * by the per-block-device semaphore.
cl349@3368 714 */
cl349@3368 715 down(&bd->bd_sem);
cl349@3368 716
cl349@3368 717 if ( ((gd = get_gendisk(dev)) == NULL) ||
cl349@3368 718 ((disk = xldev_to_xldisk(dev)) == NULL) ||
cl349@3368 719 ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
cl349@3368 720 {
cl349@3368 721 rc = -EINVAL;
cl349@3368 722 goto out;
cl349@3368 723 }
cl349@3368 724
cl349@3368 725 if ( disk->usage > 1 )
cl349@3368 726 {
cl349@3368 727 rc = -EBUSY;
cl349@3368 728 goto out;
cl349@3368 729 }
cl349@3368 730
cl349@3368 731 /* Only reread partition table if VBDs aren't mapped to partitions. */
cl349@3368 732 if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
cl349@3368 733 {
cl349@3368 734 for ( i = gd->max_p - 1; i >= 0; i-- )
cl349@3368 735 {
cl349@3368 736 invalidate_device(dev+i, 1);
cl349@3368 737 gd->part[MINOR(dev+i)].start_sect = 0;
cl349@3368 738 gd->part[MINOR(dev+i)].nr_sects = 0;
cl349@3368 739 gd->sizes[MINOR(dev+i)] = 0;
cl349@3368 740 }
cl349@3368 741
cl349@3368 742 grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
cl349@3368 743 }
cl349@3368 744
cl349@3368 745 out:
cl349@3368 746 up(&bd->bd_sem);
cl349@3368 747 bdput(bd);
cl349@3368 748 return rc;
cl349@3368 749 }
cl349@3368 750
cl349@3368 751
cl349@3368 752 /*
cl349@3368 753 * blkif_queue_request
cl349@3368 754 *
cl349@3368 755 * request block io
cl349@3368 756 *
cl349@3368 757 * id: for guest use only.
cl349@3368 758 * operation: BLKIF_OP_{READ,WRITE,PROBE}
cl349@3368 759 * buffer: buffer to read/write into. this should be a
cl349@3368 760 * virtual address in the guest os.
cl349@3368 761 */
cl349@3368 762 static int blkif_queue_request(unsigned long id,
cl349@3368 763 int operation,
cl349@3368 764 char * buffer,
cl349@3368 765 unsigned long sector_number,
cl349@3368 766 unsigned short nr_sectors,
cl349@3368 767 kdev_t device)
cl349@3368 768 {
cl349@3368 769 unsigned long buffer_ma = virt_to_bus(buffer);
cl349@3368 770 unsigned long xid;
cl349@3368 771 struct gendisk *gd;
cl349@3368 772 blkif_request_t *req;
cl349@3368 773 struct buffer_head *bh;
cl349@3368 774 unsigned int fsect, lsect;
cl349@3368 775
cl349@3368 776 fsect = (buffer_ma & ~PAGE_MASK) >> 9;
cl349@3368 777 lsect = fsect + nr_sectors - 1;
cl349@3368 778
cl349@3368 779 /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */
cl349@3368 780 if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
cl349@3368 781 BUG();
cl349@3368 782 if ( lsect > 7 )
cl349@3368 783 BUG();
cl349@3368 784
cl349@3368 785 buffer_ma &= PAGE_MASK;
cl349@3368 786
cl349@3368 787 if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
cl349@3368 788 return 1;
cl349@3368 789
cl349@3368 790 switch ( operation )
cl349@3368 791 {
cl349@3368 792
cl349@3368 793 case BLKIF_OP_READ:
cl349@3368 794 case BLKIF_OP_WRITE:
cl349@3368 795 gd = get_gendisk(device);
cl349@3368 796
cl349@3368 797 /*
cl349@3368 798 * Update the sector_number we'll pass down as appropriate; note that
cl349@3368 799 * we could sanity check that resulting sector will be in this
cl349@3368 800 * partition, but this will happen in driver backend anyhow.
cl349@3368 801 */
cl349@3368 802 sector_number += gd->part[MINOR(device)].start_sect;
cl349@3368 803
cl349@3368 804 /*
cl349@3368 805 * If this unit doesn't consist of virtual partitions then we clear
cl349@3368 806 * the partn bits from the device number.
cl349@3368 807 */
cl349@3368 808 if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
cl349@3368 809 GENHD_FL_VIRT_PARTNS) )
cl349@3368 810 device &= ~(gd->max_p - 1);
cl349@3368 811
cl349@3368 812 if ( (sg_operation == operation) &&
cl349@3368 813 (sg_dev == device) &&
cl349@3368 814 (sg_next_sect == sector_number) )
cl349@3368 815 {
kaf24@3387 816 req = RING_GET_REQUEST(BLKIF_RING, &blk_ring,
kaf24@3448 817 blk_ring.req_prod_pvt - 1);
cl349@3368 818 bh = (struct buffer_head *)id;
kaf24@3446 819
cl349@3368 820 bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id;
kaf24@3446 821
cl349@3368 822
kaf24@3446 823 rec_ring[req->id].id = id;
cl349@3368 824
cl349@3368 825 req->frame_and_sects[req->nr_segments] =
cl349@3368 826 buffer_ma | (fsect<<3) | lsect;
cl349@3368 827 if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
cl349@3368 828 sg_next_sect += nr_sectors;
cl349@3368 829 else
cl349@3368 830 DISABLE_SCATTERGATHER();
cl349@3368 831
cl349@3368 832 /* Update the copy of the request in the recovery ring. */
cl349@3368 833 translate_req_to_pfn(&rec_ring[req->id], req );
cl349@3368 834
cl349@3368 835 return 0;
cl349@3368 836 }
kaf24@3387 837 else if ( RING_FULL(BLKIF_RING, &blk_ring) )
cl349@3368 838 {
cl349@3368 839 return 1;
cl349@3368 840 }
cl349@3368 841 else
cl349@3368 842 {
cl349@3368 843 sg_operation = operation;
cl349@3368 844 sg_dev = device;
cl349@3368 845 sg_next_sect = sector_number + nr_sectors;
cl349@3368 846 }
cl349@3368 847 break;
cl349@3368 848
cl349@3368 849 default:
cl349@3368 850 panic("unknown op %d\n", operation);
cl349@3368 851 }
cl349@3368 852
cl349@3368 853 /* Fill out a communications ring structure. */
kaf24@3387 854 req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, blk_ring.req_prod_pvt);
cl349@3368 855
cl349@3368 856 xid = GET_ID_FROM_FREELIST();
cl349@3368 857 rec_ring[xid].id = id;
cl349@3368 858
cl349@3368 859 req->id = xid;
cl349@3368 860 req->operation = operation;
cl349@3368 861 req->sector_number = (blkif_sector_t)sector_number;
cl349@3368 862 req->device = device;
cl349@3368 863 req->nr_segments = 1;
cl349@3368 864 req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
cl349@3368 865
cl349@3368 866 /* Keep a private copy so we can reissue requests when recovering. */
cl349@3368 867 translate_req_to_pfn(&rec_ring[xid], req );
cl349@3368 868
kaf24@3387 869 blk_ring.req_prod_pvt++;
kaf24@3387 870
cl349@3368 871 return 0;
cl349@3368 872 }
cl349@3368 873
cl349@3368 874
cl349@3368 875 /*
cl349@3368 876 * do_blkif_request
cl349@3368 877 * read a block; request is in a request queue
cl349@3368 878 */
cl349@3368 879 void do_blkif_request(request_queue_t *rq)
cl349@3368 880 {
cl349@3368 881 struct request *req;
cl349@3368 882 struct buffer_head *bh, *next_bh;
cl349@3368 883 int rw, nsect, full, queued = 0;
cl349@3368 884
cl349@3368 885 DPRINTK("Entered do_blkif_request\n");
cl349@3368 886
cl349@3368 887 while ( !rq->plugged && !list_empty(&rq->queue_head))
cl349@3368 888 {
cl349@3368 889 if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
cl349@3368 890 goto out;
cl349@3368 891
cl349@3368 892 DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
cl349@3368 893 req, req->cmd, req->sector,
cl349@3368 894 req->current_nr_sectors, req->nr_sectors, req->bh);
cl349@3368 895
cl349@3368 896 rw = req->cmd;
cl349@3368 897 if ( rw == READA )
cl349@3368 898 rw = READ;
cl349@3368 899 if ( unlikely((rw != READ) && (rw != WRITE)) )
cl349@3368 900 panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
cl349@3368 901
cl349@3368 902 req->errors = 0;
cl349@3368 903
cl349@3368 904 bh = req->bh;
cl349@3368 905 while ( bh != NULL )
cl349@3368 906 {
cl349@3368 907 next_bh = bh->b_reqnext;
cl349@3368 908 bh->b_reqnext = NULL;
cl349@3368 909
cl349@3368 910 full = blkif_queue_request(
cl349@3368 911 (unsigned long)bh,
cl349@3368 912 (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
cl349@3368 913 bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
cl349@3368 914
cl349@3368 915 if ( full )
cl349@3368 916 {
cl349@3368 917 bh->b_reqnext = next_bh;
cl349@3368 918 pending_queues[nr_pending++] = rq;
cl349@3368 919 if ( unlikely(nr_pending >= MAX_PENDING) )
cl349@3368 920 BUG();
cl349@3368 921 goto out;
cl349@3368 922 }
cl349@3368 923
cl349@3368 924 queued++;
cl349@3368 925
cl349@3368 926 /* Dequeue the buffer head from the request. */
cl349@3368 927 nsect = bh->b_size >> 9;
cl349@3368 928 bh = req->bh = next_bh;
cl349@3368 929
cl349@3368 930 if ( bh != NULL )
cl349@3368 931 {
cl349@3368 932 /* There's another buffer head to do. Update the request. */
cl349@3368 933 req->hard_sector += nsect;
cl349@3368 934 req->hard_nr_sectors -= nsect;
cl349@3368 935 req->sector = req->hard_sector;
cl349@3368 936 req->nr_sectors = req->hard_nr_sectors;
cl349@3368 937 req->current_nr_sectors = bh->b_size >> 9;
cl349@3368 938 req->buffer = bh->b_data;
cl349@3368 939 }
cl349@3368 940 else
cl349@3368 941 {
cl349@3368 942 /* That was the last buffer head. Finalise the request. */
cl349@3368 943 if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
cl349@3368 944 BUG();
cl349@3368 945 blkdev_dequeue_request(req);
cl349@3368 946 end_that_request_last(req);
cl349@3368 947 }
cl349@3368 948 }
cl349@3368 949 }
cl349@3368 950
cl349@3368 951 out:
cl349@3368 952 if ( queued != 0 )
cl349@3368 953 flush_requests();
cl349@3368 954 }
cl349@3368 955
cl349@3368 956
cl349@3368 957 static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
cl349@3368 958 {
kaf24@3387 959 RING_IDX i, rp;
cl349@3368 960 unsigned long flags;
cl349@3368 961 struct buffer_head *bh, *next_bh;
cl349@3368 962
cl349@3368 963 spin_lock_irqsave(&io_request_lock, flags);
cl349@3368 964
cl349@3368 965 if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) )
cl349@3368 966 {
cl349@3368 967 spin_unlock_irqrestore(&io_request_lock, flags);
cl349@3368 968 return;
cl349@3368 969 }
cl349@3368 970
smh22@3397 971 rp = blk_ring.sring->rsp_prod;
cl349@3368 972 rmb(); /* Ensure we see queued responses up to 'rp'. */
cl349@3368 973
kaf24@3387 974 for ( i = blk_ring.rsp_cons; i != rp; i++ )
cl349@3368 975 {
kaf24@3446 976 unsigned long id;
kaf24@3387 977 blkif_response_t *bret;
kaf24@3387 978
smh22@3397 979 bret = RING_GET_RESPONSE(BLKIF_RING, &blk_ring, i);
kaf24@3446 980 id = bret->id;
kaf24@3446 981 bh = (struct buffer_head *)rec_ring[id].id;
cl349@3368 982
kaf24@3446 983 blkif_completion( &rec_ring[id] );
cl349@3368 984
kaf24@3446 985 ADD_ID_TO_FREELIST(id);
cl349@3368 986
cl349@3368 987 switch ( bret->operation )
cl349@3368 988 {
cl349@3368 989 case BLKIF_OP_READ:
cl349@3368 990 case BLKIF_OP_WRITE:
cl349@3368 991 if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
cl349@3368 992 DPRINTK("Bad return from blkdev data request: %lx\n",
cl349@3368 993 bret->status);
cl349@3368 994 for ( ; bh != NULL; bh = next_bh )
cl349@3368 995 {
cl349@3368 996 next_bh = bh->b_reqnext;
cl349@3368 997 bh->b_reqnext = NULL;
cl349@3368 998 bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
cl349@3368 999 }
cl349@3368 1000
cl349@3368 1001 break;
cl349@3368 1002 case BLKIF_OP_PROBE:
cl349@3368 1003 memcpy(&blkif_control_rsp, bret, sizeof(*bret));
cl349@3368 1004 blkif_control_rsp_valid = 1;
cl349@3368 1005 break;
cl349@3368 1006 default:
cl349@3368 1007 BUG();
cl349@3368 1008 }
kaf24@3387 1009
cl349@3368 1010 }
kaf24@3387 1011 blk_ring.rsp_cons = i;
cl349@3368 1012
cl349@3368 1013 kick_pending_request_queues();
cl349@3368 1014
cl349@3368 1015 spin_unlock_irqrestore(&io_request_lock, flags);
cl349@3368 1016 }
cl349@3368 1017
cl349@3368 1018 #endif
cl349@3368 1019
cl349@3368 1020 /***************************** COMMON CODE *******************************/
cl349@3368 1021
cl349@3368 1022
cl349@3368 1023 void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
cl349@3368 1024 {
cl349@3368 1025 unsigned long flags, id;
kaf24@3387 1026 blkif_request_t *req_d;
cl349@3368 1027
cl349@3368 1028 retry:
kaf24@3387 1029 while ( RING_FULL(BLKIF_RING, &blk_ring) )
cl349@3368 1030 {
cl349@3368 1031 set_current_state(TASK_INTERRUPTIBLE);
cl349@3368 1032 schedule_timeout(1);
cl349@3368 1033 }
cl349@3368 1034
cl349@3368 1035 spin_lock_irqsave(&blkif_io_lock, flags);
kaf24@3387 1036 if ( RING_FULL(BLKIF_RING, &blk_ring) )
cl349@3368 1037 {
cl349@3368 1038 spin_unlock_irqrestore(&blkif_io_lock, flags);
cl349@3368 1039 goto retry;
cl349@3368 1040 }
cl349@3368 1041
cl349@3368 1042 DISABLE_SCATTERGATHER();
kaf24@3387 1043 req_d = RING_GET_REQUEST(BLKIF_RING, &blk_ring, blk_ring.req_prod_pvt);
kaf24@3387 1044 *req_d = *req;
cl349@3368 1045
cl349@3368 1046 id = GET_ID_FROM_FREELIST();
kaf24@3387 1047 req_d->id = id;
cl349@3368 1048 rec_ring[id].id = (unsigned long) req;
cl349@3368 1049
cl349@3368 1050 translate_req_to_pfn( &rec_ring[id], req );
cl349@3368 1051
kaf24@3387 1052 blk_ring.req_prod_pvt++;
cl349@3368 1053 flush_requests();
cl349@3368 1054
cl349@3368 1055 spin_unlock_irqrestore(&blkif_io_lock, flags);
cl349@3368 1056
cl349@3368 1057 while ( !blkif_control_rsp_valid )
cl349@3368 1058 {
cl349@3368 1059 set_current_state(TASK_INTERRUPTIBLE);
cl349@3368 1060 schedule_timeout(1);
cl349@3368 1061 }
cl349@3368 1062
cl349@3368 1063 memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
cl349@3368 1064 blkif_control_rsp_valid = 0;
cl349@3368 1065 }
cl349@3368 1066
cl349@3368 1067
cl349@3368 1068 /* Send a driver status notification to the domain controller. */
cl349@3368 1069 static void send_driver_status(int ok)
cl349@3368 1070 {
cl349@3368 1071 ctrl_msg_t cmsg = {
cl349@3368 1072 .type = CMSG_BLKIF_FE,
cl349@3368 1073 .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
cl349@3368 1074 .length = sizeof(blkif_fe_driver_status_t),
cl349@3368 1075 };
cl349@3368 1076 blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
cl349@3368 1077
cl349@3368 1078 msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
cl349@3368 1079
cl349@3368 1080 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
cl349@3368 1081 }
cl349@3368 1082
cl349@3368 1083 /* Tell the controller to bring up the interface. */
cl349@3368 1084 static void blkif_send_interface_connect(void)
cl349@3368 1085 {
cl349@3368 1086 ctrl_msg_t cmsg = {
cl349@3368 1087 .type = CMSG_BLKIF_FE,
cl349@3368 1088 .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
cl349@3368 1089 .length = sizeof(blkif_fe_interface_connect_t),
cl349@3368 1090 };
cl349@3368 1091 blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
cl349@3368 1092
cl349@3368 1093 msg->handle = 0;
kaf24@3387 1094 msg->shmem_frame = (virt_to_machine(blk_ring.sring) >> PAGE_SHIFT);
cl349@3368 1095
cl349@3368 1096 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
cl349@3368 1097 }
cl349@3368 1098
cl349@3368 1099 static void blkif_free(void)
cl349@3368 1100 {
cl349@3368 1101 /* Prevent new requests being issued until we fix things up. */
cl349@3368 1102 spin_lock_irq(&blkif_io_lock);
cl349@3368 1103 recovery = 1;
cl349@3368 1104 blkif_state = BLKIF_STATE_DISCONNECTED;
cl349@3368 1105 spin_unlock_irq(&blkif_io_lock);
cl349@3368 1106
cl349@3368 1107 /* Free resources associated with old device channel. */
kaf24@3387 1108 if ( blk_ring.sring != NULL )
cl349@3368 1109 {
kaf24@3387 1110 free_page((unsigned long)blk_ring.sring);
kaf24@3387 1111 blk_ring.sring = NULL;
cl349@3368 1112 }
cl349@3368 1113 free_irq(blkif_irq, NULL);
cl349@3368 1114 blkif_irq = 0;
cl349@3368 1115
cl349@3368 1116 unbind_evtchn_from_irq(blkif_evtchn);
cl349@3368 1117 blkif_evtchn = 0;
cl349@3368 1118 }
cl349@3368 1119
cl349@3368 1120 static void blkif_close(void)
cl349@3368 1121 {
cl349@3368 1122 }
cl349@3368 1123
cl349@3368 1124 /* Move from CLOSED to DISCONNECTED state. */
cl349@3368 1125 static void blkif_disconnect(void)
cl349@3368 1126 {
kaf24@3387 1127 blkif_sring_t *sring;
kaf24@3387 1128
kaf24@3387 1129 if ( blk_ring.sring != NULL )
kaf24@3387 1130 free_page((unsigned long)blk_ring.sring);
kaf24@3387 1131
kaf24@3387 1132 sring = (blkif_sring_t *)__get_free_page(GFP_KERNEL);
kaf24@3387 1133 SHARED_RING_INIT(BLKIF_RING, sring);
kaf24@3387 1134 FRONT_RING_INIT(BLKIF_RING, &blk_ring, sring);
cl349@3368 1135 blkif_state = BLKIF_STATE_DISCONNECTED;
cl349@3368 1136 blkif_send_interface_connect();
cl349@3368 1137 }
cl349@3368 1138
cl349@3368 1139 static void blkif_reset(void)
cl349@3368 1140 {
cl349@3368 1141 blkif_free();
cl349@3368 1142 blkif_disconnect();
cl349@3368 1143 }
cl349@3368 1144
cl349@3368 1145 static void blkif_recover(void)
cl349@3368 1146 {
cl349@3368 1147 int i;
kaf24@3387 1148 blkif_request_t *req;
cl349@3368 1149
cl349@3368 1150 /* Hmm, requests might be re-ordered when we re-issue them.
cl349@3368 1151 * This will need to be fixed once we have barriers */
cl349@3368 1152
cl349@3368 1153 /* Stage 1 : Find active and move to safety. */
kaf24@3387 1154 for ( i = 0; i < RING_SIZE(BLKIF_RING, &blk_ring); i++ )
cl349@3368 1155 {
cl349@3368 1156 if ( rec_ring[i].id >= PAGE_OFFSET )
cl349@3368 1157 {
kaf24@3387 1158 req = RING_GET_REQUEST(BLKIF_RING, &blk_ring,
kaf24@3448 1159 blk_ring.req_prod_pvt);
kaf24@3387 1160 translate_req_to_mfn(req, &rec_ring[i]);
kaf24@3387 1161 blk_ring.req_prod_pvt++;
cl349@3368 1162 }
cl349@3368 1163 }
cl349@3368 1164
cl349@3368 1165 /* Stage 2 : Set up shadow list. */
kaf24@3387 1166 for ( i = 0; i < blk_ring.req_prod_pvt; i++ )
cl349@3368 1167 {
kaf24@3387 1168 req = RING_GET_REQUEST(BLKIF_RING, &blk_ring, i);
kaf24@3448 1169 rec_ring[i].id = req->id;
kaf24@3387 1170 req->id = i;
kaf24@3387 1171 translate_req_to_pfn(&rec_ring[i], req);
cl349@3368 1172 }
cl349@3368 1173
cl349@3368 1174 /* Stage 3 : Set up free list. */
kaf24@3387 1175 for ( ; i < RING_SIZE(BLKIF_RING, &blk_ring); i++ )
cl349@3368 1176 rec_ring[i].id = i+1;
kaf24@3387 1177 rec_ring_free = blk_ring.req_prod_pvt;
kaf24@3387 1178 rec_ring[RING_SIZE(BLKIF_RING, &blk_ring)-1].id = 0x0fffffff;
cl349@3368 1179
cl349@3368 1180 /* blk_ring->req_prod will be set when we flush_requests().*/
cl349@3368 1181 wmb();
cl349@3368 1182
cl349@3368 1183 /* Switch off recovery mode, using a memory barrier to ensure that
cl349@3368 1184 * it's seen before we flush requests - we don't want to miss any
cl349@3368 1185 * interrupts. */
cl349@3368 1186 recovery = 0;
cl349@3368 1187 wmb();
cl349@3368 1188
cl349@3368 1189 /* Kicks things back into life. */
cl349@3368 1190 flush_requests();
cl349@3368 1191
cl349@3368 1192 /* Now safe to left other peope use interface. */
cl349@3368 1193 blkif_state = BLKIF_STATE_CONNECTED;
cl349@3368 1194 }
cl349@3368 1195
cl349@3368 1196 static void blkif_connect(blkif_fe_interface_status_t *status)
cl349@3368 1197 {
cl349@3368 1198 int err = 0;
cl349@3368 1199
cl349@3368 1200 blkif_evtchn = status->evtchn;
cl349@3368 1201 blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
cl349@3368 1202
cl349@3368 1203 err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
cl349@3368 1204 if ( err )
cl349@3368 1205 {
cl349@3368 1206 printk(KERN_ALERT "xen_blk: request_irq failed (err=%d)\n", err);
cl349@3368 1207 return;
cl349@3368 1208 }
cl349@3368 1209
cl349@3368 1210 if ( recovery )
cl349@3368 1211 {
cl349@3368 1212 blkif_recover();
cl349@3368 1213 }
cl349@3368 1214 else
cl349@3368 1215 {
cl349@3368 1216 /* Transition to connected in case we need to do
cl349@3368 1217 * a partition probe on a whole disk. */
cl349@3368 1218 blkif_state = BLKIF_STATE_CONNECTED;
cl349@3368 1219
cl349@3368 1220 /* Probe for discs attached to the interface. */
cl349@3368 1221 xlvbd_init();
cl349@3368 1222 }
cl349@3368 1223
cl349@3368 1224 /* Kick pending requests. */
cl349@3368 1225 spin_lock_irq(&blkif_io_lock);
cl349@3368 1226 kick_pending_request_queues();
cl349@3368 1227 spin_unlock_irq(&blkif_io_lock);
cl349@3368 1228 }
cl349@3368 1229
cl349@3368 1230 static void unexpected(blkif_fe_interface_status_t *status)
cl349@3368 1231 {
kaf24@3446 1232 DPRINTK(" Unexpected blkif status %u in state %u\n",
kaf24@3446 1233 status->status, blkif_state);
cl349@3368 1234 }
cl349@3368 1235
cl349@3368 1236 static void blkif_status(blkif_fe_interface_status_t *status)
cl349@3368 1237 {
cl349@3368 1238 if ( status->handle != blkif_handle )
cl349@3368 1239 {
cl349@3369 1240 WPRINTK(" Invalid blkif: handle=%u\n", status->handle);
cl349@3369 1241 unexpected(status);
cl349@3368 1242 return;
cl349@3368 1243 }
cl349@3368 1244
cl349@3368 1245 switch ( status->status )
cl349@3368 1246 {
cl349@3368 1247 case BLKIF_INTERFACE_STATUS_CLOSED:
cl349@3368 1248 switch ( blkif_state )
cl349@3368 1249 {
cl349@3368 1250 case BLKIF_STATE_CLOSED:
cl349@3368 1251 unexpected(status);
cl349@3368 1252 break;
cl349@3368 1253 case BLKIF_STATE_DISCONNECTED:
cl349@3368 1254 case BLKIF_STATE_CONNECTED:
cl349@3368 1255 unexpected(status);
cl349@3368 1256 blkif_close();
cl349@3368 1257 break;
cl349@3368 1258 }
cl349@3368 1259 break;
cl349@3368 1260
cl349@3368 1261 case BLKIF_INTERFACE_STATUS_DISCONNECTED:
cl349@3368 1262 switch ( blkif_state )
cl349@3368 1263 {
cl349@3368 1264 case BLKIF_STATE_CLOSED:
cl349@3368 1265 blkif_disconnect();
cl349@3368 1266 break;
cl349@3368 1267 case BLKIF_STATE_DISCONNECTED:
cl349@3368 1268 case BLKIF_STATE_CONNECTED:
cl349@3368 1269 /* unexpected(status); */ /* occurs during suspend/resume */
cl349@3368 1270 blkif_reset();
cl349@3368 1271 break;
cl349@3368 1272 }
cl349@3368 1273 break;
cl349@3368 1274
cl349@3368 1275 case BLKIF_INTERFACE_STATUS_CONNECTED:
cl349@3368 1276 switch ( blkif_state )
cl349@3368 1277 {
cl349@3368 1278 case BLKIF_STATE_CLOSED:
cl349@3368 1279 unexpected(status);
cl349@3368 1280 blkif_disconnect();
cl349@3368 1281 blkif_connect(status);
cl349@3368 1282 break;
cl349@3368 1283 case BLKIF_STATE_DISCONNECTED:
cl349@3368 1284 blkif_connect(status);
cl349@3368 1285 break;
cl349@3368 1286 case BLKIF_STATE_CONNECTED:
cl349@3368 1287 unexpected(status);
cl349@3368 1288 blkif_connect(status);
cl349@3368 1289 break;
cl349@3368 1290 }
cl349@3368 1291 break;
cl349@3368 1292
kaf24@3446 1293 case BLKIF_INTERFACE_STATUS_CHANGED:
cl349@3368 1294 switch ( blkif_state )
cl349@3368 1295 {
cl349@3368 1296 case BLKIF_STATE_CLOSED:
cl349@3368 1297 case BLKIF_STATE_DISCONNECTED:
cl349@3368 1298 unexpected(status);
cl349@3368 1299 break;
cl349@3368 1300 case BLKIF_STATE_CONNECTED:
cl349@3368 1301 vbd_update();
cl349@3368 1302 break;
cl349@3368 1303 }
kaf24@3446 1304 break;
cl349@3368 1305
cl349@3368 1306 default:
cl349@3368 1307 WPRINTK(" Invalid blkif status: %d\n", status->status);
cl349@3368 1308 break;
cl349@3368 1309 }
cl349@3368 1310 }
cl349@3368 1311
cl349@3368 1312
cl349@3368 1313 static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
cl349@3368 1314 {
cl349@3368 1315 switch ( msg->subtype )
cl349@3368 1316 {
cl349@3368 1317 case CMSG_BLKIF_FE_INTERFACE_STATUS:
cl349@3368 1318 if ( msg->length != sizeof(blkif_fe_interface_status_t) )
cl349@3368 1319 goto parse_error;
cl349@3368 1320 blkif_status((blkif_fe_interface_status_t *)
cl349@3368 1321 &msg->msg[0]);
cl349@3368 1322 break;
cl349@3368 1323 default:
cl349@3368 1324 goto parse_error;
cl349@3368 1325 }
cl349@3368 1326
cl349@3368 1327 ctrl_if_send_response(msg);
cl349@3368 1328 return;
cl349@3368 1329
cl349@3368 1330 parse_error:
cl349@3368 1331 msg->length = 0;
cl349@3368 1332 ctrl_if_send_response(msg);
cl349@3368 1333 }
cl349@3368 1334
cl349@3368 1335 int wait_for_blkif(void)
cl349@3368 1336 {
cl349@3368 1337 int err = 0;
cl349@3368 1338 int i;
cl349@3368 1339 send_driver_status(1);
cl349@3368 1340
cl349@3368 1341 /*
cl349@3368 1342 * We should read 'nr_interfaces' from response message and wait
cl349@3368 1343 * for notifications before proceeding. For now we assume that we
cl349@3368 1344 * will be notified of exactly one interface.
cl349@3368 1345 */
cl349@3368 1346 for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
cl349@3368 1347 {
cl349@3368 1348 set_current_state(TASK_INTERRUPTIBLE);
cl349@3368 1349 schedule_timeout(1);
cl349@3368 1350 }
cl349@3368 1351
cl349@3368 1352 if ( blkif_state != BLKIF_STATE_CONNECTED )
cl349@3368 1353 {
cl349@3368 1354 printk(KERN_INFO "xen_blk: Timeout connecting to device!\n");
cl349@3368 1355 err = -ENOSYS;
cl349@3368 1356 }
cl349@3368 1357 return err;
cl349@3368 1358 }
cl349@3368 1359
cl349@3368 1360 int __init xlblk_init(void)
cl349@3368 1361 {
cl349@3368 1362 int i;
cl349@3368 1363
cl349@3368 1364 if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
cl349@3368 1365 (xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
cl349@3368 1366 return 0;
cl349@3368 1367
cl349@3368 1368 printk(KERN_INFO "xen_blk: Initialising virtual block device driver\n");
cl349@3368 1369
cl349@3368 1370 rec_ring_free = 0;
kaf24@3387 1371 for ( i = 0; i < RING_SIZE(BLKIF_RING, &blk_ring); i++ )
kaf24@3446 1372 rec_ring[i].id = i+1;
kaf24@3387 1373 rec_ring[RING_SIZE(BLKIF_RING, &blk_ring)-1].id = 0x0fffffff;
cl349@3368 1374
cl349@3368 1375 (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
cl349@3368 1376 CALLBACK_IN_BLOCKING_CONTEXT);
cl349@3368 1377
cl349@3368 1378 wait_for_blkif();
cl349@3368 1379
cl349@3368 1380 return 0;
cl349@3368 1381 }
cl349@3368 1382
cl349@3368 1383 void blkdev_suspend(void)
cl349@3368 1384 {
cl349@3368 1385 }
cl349@3368 1386
cl349@3368 1387 void blkdev_resume(void)
cl349@3368 1388 {
cl349@3368 1389 send_driver_status(1);
cl349@3368 1390 }
cl349@3368 1391
cl349@3368 1392 /* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
cl349@3368 1393
cl349@3368 1394 void blkif_completion(blkif_request_t *req)
cl349@3368 1395 {
cl349@3368 1396 int i;
cl349@3368 1397
cl349@3368 1398 switch ( req->operation )
cl349@3368 1399 {
cl349@3368 1400 case BLKIF_OP_READ:
kaf24@3446 1401 for ( i = 0; i < req->nr_segments; i++ )
kaf24@3446 1402 {
kaf24@3446 1403 unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
kaf24@3446 1404 unsigned long mfn = phys_to_machine_mapping[pfn];
kaf24@3446 1405 xen_machphys_update(mfn, pfn);
kaf24@3446 1406 }
kaf24@3446 1407 break;
cl349@3368 1408 }
cl349@3368 1409
cl349@3368 1410 }