debuggers.hg

view linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c @ 3646:17e50d7ed675

bitkeeper revision 1.1159.223.57 (42009aa1vckOCJ3XiCz8SXnP4BQsFw)

Small hack to make CROMMULTISESSION ioctl return success rather than enosys, like we did on Xen 1.2.
From: Anthony Liguori aliguori@us.ibm.com
Signed-off-by: ian.pratt@cl.cam.ac.uk
author iap10@labyrinth.cl.cam.ac.uk
date Wed Feb 02 09:17:21 2005 +0000 (2005-02-02)
parents 515888a1b568
children d49c0626928e
line source
1 /******************************************************************************
2 * blkfront.c
3 *
4 * XenLinux virtual block-device driver.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach
9 *
10 * This file may be distributed separately from the Linux kernel, or
11 * incorporated into other software packages, subject to the following license:
12 *
13 * Permission is hereby granted, free of charge, to any person obtaining a copy
14 * of this source file (the "Software"), to deal in the Software without
15 * restriction, including without limitation the rights to use, copy, modify,
16 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
17 * and to permit persons to whom the Software is furnished to do so, subject to
18 * the following conditions:
19 *
20 * The above copyright notice and this permission notice shall be included in
21 * all copies or substantial portions of the Software.
22 *
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
24 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
25 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
26 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
27 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
28 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
29 * IN THE SOFTWARE.
30 */
32 #include <linux/version.h>
34 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
35 #include "block.h"
36 #else
37 #include "common.h"
38 #include <linux/blk.h>
39 #include <linux/tqueue.h>
40 #endif
42 #include <linux/cdrom.h>
43 #include <linux/sched.h>
44 #include <linux/interrupt.h>
45 #include <scsi/scsi.h>
46 #include <asm-xen/ctrl_if.h>
48 typedef unsigned char byte; /* from linux/ide.h */
50 /* Control whether runtime update of vbds is enabled. */
51 #define ENABLE_VBD_UPDATE 1
53 #if ENABLE_VBD_UPDATE
54 static void vbd_update(void);
55 #else
56 static void vbd_update(void){};
57 #endif
59 #define BLKIF_STATE_CLOSED 0
60 #define BLKIF_STATE_DISCONNECTED 1
61 #define BLKIF_STATE_CONNECTED 2
63 #define WPRINTK(fmt, args...) printk(KERN_WARNING "xen_blk: " fmt, ##args)
65 static int blkif_handle = 0;
66 static unsigned int blkif_state = BLKIF_STATE_CLOSED;
67 static unsigned int blkif_evtchn = 0;
68 static unsigned int blkif_irq = 0;
70 static int blkif_control_rsp_valid;
71 static blkif_response_t blkif_control_rsp;
73 static blkif_ring_t *blk_ring = NULL;
74 static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
75 static BLKIF_RING_IDX req_prod; /* Private request producer. */
77 unsigned long rec_ring_free;
78 blkif_request_t rec_ring[BLKIF_RING_SIZE];
80 static int recovery = 0; /* "Recovery in progress" flag. Protected
81 * by the blkif_io_lock */
83 /* We plug the I/O ring if the driver is suspended or if the ring is full. */
84 #define BLKIF_RING_FULL (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \
85 (blkif_state != BLKIF_STATE_CONNECTED))
87 static void kick_pending_request_queues(void);
89 int __init xlblk_init(void);
91 void blkif_completion( blkif_request_t *req );
93 static inline int GET_ID_FROM_FREELIST( void )
94 {
95 unsigned long free = rec_ring_free;
97 if ( free > BLKIF_RING_SIZE )
98 BUG();
100 rec_ring_free = rec_ring[free].id;
102 rec_ring[free].id = 0x0fffffee; /* debug */
104 return free;
105 }
107 static inline void ADD_ID_TO_FREELIST( unsigned long id )
108 {
109 rec_ring[id].id = rec_ring_free;
110 rec_ring_free = id;
111 }
114 /************************ COMMON CODE (inlined) ************************/
116 /* Kernel-specific definitions used in the common code */
117 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
118 #define DISABLE_SCATTERGATHER()
119 #else
120 static int sg_operation = -1;
121 #define DISABLE_SCATTERGATHER() (sg_operation = -1)
122 #endif
124 static inline void translate_req_to_pfn(blkif_request_t *xreq,
125 blkif_request_t *req)
126 {
127 int i;
129 xreq->operation = req->operation;
130 xreq->nr_segments = req->nr_segments;
131 xreq->device = req->device;
132 /* preserve id */
133 xreq->sector_number = req->sector_number;
135 for ( i = 0; i < req->nr_segments; i++ )
136 xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
137 }
139 static inline void translate_req_to_mfn(blkif_request_t *xreq,
140 blkif_request_t *req)
141 {
142 int i;
144 xreq->operation = req->operation;
145 xreq->nr_segments = req->nr_segments;
146 xreq->device = req->device;
147 xreq->id = req->id; /* copy id (unlike above) */
148 xreq->sector_number = req->sector_number;
150 for ( i = 0; i < req->nr_segments; i++ )
151 xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
152 }
155 static inline void flush_requests(void)
156 {
157 DISABLE_SCATTERGATHER();
158 wmb(); /* Ensure that the frontend can see the requests. */
159 blk_ring->req_prod = req_prod;
160 notify_via_evtchn(blkif_evtchn);
161 }
166 /************************** KERNEL VERSION 2.6 **************************/
168 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
170 module_init(xlblk_init);
172 #if ENABLE_VBD_UPDATE
173 static void vbd_update(void)
174 {
175 }
176 #endif /* ENABLE_VBD_UPDATE */
178 static void kick_pending_request_queues(void)
179 {
181 if ( (xlbd_blk_queue != NULL) &&
182 test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) )
183 {
184 blk_start_queue(xlbd_blk_queue);
185 /* XXXcl call to request_fn should not be needed but
186 * we get stuck without... needs investigating
187 */
188 xlbd_blk_queue->request_fn(xlbd_blk_queue);
189 }
191 }
194 int blkif_open(struct inode *inode, struct file *filep)
195 {
196 struct gendisk *gd = inode->i_bdev->bd_disk;
197 struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
199 /* Update of usage count is protected by per-device semaphore. */
200 di->mi->usage++;
202 return 0;
203 }
206 int blkif_release(struct inode *inode, struct file *filep)
207 {
208 struct gendisk *gd = inode->i_bdev->bd_disk;
209 struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
211 /*
212 * When usage drops to zero it may allow more VBD updates to occur.
213 * Update of usage count is protected by a per-device semaphore.
214 */
215 if (--di->mi->usage == 0) {
216 vbd_update();
217 }
219 return 0;
220 }
223 int blkif_ioctl(struct inode *inode, struct file *filep,
224 unsigned command, unsigned long argument)
225 {
226 int i;
227 /* struct gendisk *gd = inode->i_bdev->bd_disk; */
229 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
230 command, (long)argument, inode->i_rdev);
232 switch (command) {
234 case HDIO_GETGEO:
235 /* return ENOSYS to use defaults */
236 return -ENOSYS;
238 case CDROMMULTISESSION:
239 DPRINTK("FIXME: support multisession CDs later\n");
240 for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
241 if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
242 return 0;
244 default:
245 printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
246 command);
247 return -ENOSYS;
248 }
250 return 0;
251 }
253 #if 0
254 /* check media change: should probably do something here in some cases :-) */
255 int blkif_check(kdev_t dev)
256 {
257 DPRINTK("blkif_check\n");
258 return 0;
259 }
261 int blkif_revalidate(kdev_t dev)
262 {
263 struct block_device *bd;
264 struct gendisk *gd;
265 xen_block_t *disk;
266 unsigned long capacity;
267 int i, rc = 0;
269 if ( (bd = bdget(dev)) == NULL )
270 return -EINVAL;
272 /*
273 * Update of partition info, and check of usage count, is protected
274 * by the per-block-device semaphore.
275 */
276 down(&bd->bd_sem);
278 if ( ((gd = get_gendisk(dev)) == NULL) ||
279 ((disk = xldev_to_xldisk(dev)) == NULL) ||
280 ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
281 {
282 rc = -EINVAL;
283 goto out;
284 }
286 if ( disk->usage > 1 )
287 {
288 rc = -EBUSY;
289 goto out;
290 }
292 /* Only reread partition table if VBDs aren't mapped to partitions. */
293 if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
294 {
295 for ( i = gd->max_p - 1; i >= 0; i-- )
296 {
297 invalidate_device(dev+i, 1);
298 gd->part[MINOR(dev+i)].start_sect = 0;
299 gd->part[MINOR(dev+i)].nr_sects = 0;
300 gd->sizes[MINOR(dev+i)] = 0;
301 }
303 grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
304 }
306 out:
307 up(&bd->bd_sem);
308 bdput(bd);
309 return rc;
310 }
311 #endif
313 /*
314 * blkif_queue_request
315 *
316 * request block io
317 *
318 * id: for guest use only.
319 * operation: BLKIF_OP_{READ,WRITE,PROBE}
320 * buffer: buffer to read/write into. this should be a
321 * virtual address in the guest os.
322 */
323 static int blkif_queue_request(struct request *req)
324 {
325 struct xlbd_disk_info *di =
326 (struct xlbd_disk_info *)req->rq_disk->private_data;
327 unsigned long buffer_ma;
328 blkif_request_t *ring_req;
329 struct bio *bio;
330 struct bio_vec *bvec;
331 int idx;
332 unsigned long id;
333 unsigned int fsect, lsect;
335 if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
336 return 1;
338 /* Fill out a communications ring structure. */
339 ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
340 id = GET_ID_FROM_FREELIST();
341 rec_ring[id].id = (unsigned long) req;
343 ring_req->id = id;
344 ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
345 BLKIF_OP_READ;
346 ring_req->sector_number = (blkif_sector_t)req->sector;
347 ring_req->device = di->xd_device;
349 ring_req->nr_segments = 0;
350 rq_for_each_bio(bio, req)
351 {
352 bio_for_each_segment(bvec, bio, idx)
353 {
354 if ( ring_req->nr_segments == BLKIF_MAX_SEGMENTS_PER_REQUEST )
355 BUG();
356 buffer_ma = page_to_phys(bvec->bv_page);
357 fsect = bvec->bv_offset >> 9;
358 lsect = fsect + (bvec->bv_len >> 9) - 1;
359 ring_req->frame_and_sects[ring_req->nr_segments++] =
360 buffer_ma | (fsect << 3) | lsect;
361 }
362 }
364 req_prod++;
366 /* Keep a private copy so we can reissue requests when recovering. */
367 translate_req_to_pfn(&rec_ring[id], ring_req);
369 return 0;
370 }
373 /*
374 * do_blkif_request
375 * read a block; request is in a request queue
376 */
377 void do_blkif_request(request_queue_t *rq)
378 {
379 struct request *req;
380 int queued;
382 DPRINTK("Entered do_blkif_request\n");
384 queued = 0;
386 while ((req = elv_next_request(rq)) != NULL) {
387 if (!blk_fs_request(req)) {
388 end_request(req, 0);
389 continue;
390 }
392 if ( BLKIF_RING_FULL )
393 {
394 blk_stop_queue(rq);
395 break;
396 }
397 DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
398 req, req->cmd, req->sector, req->current_nr_sectors,
399 req->nr_sectors, req->buffer,
400 rq_data_dir(req) ? "write" : "read");
401 blkdev_dequeue_request(req);
402 if (blkif_queue_request(req)) {
403 blk_stop_queue(rq);
404 break;
405 }
406 queued++;
407 }
409 if (queued != 0)
410 flush_requests();
411 }
414 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
415 {
416 struct request *req;
417 blkif_response_t *bret;
418 BLKIF_RING_IDX i, rp;
419 unsigned long flags;
421 spin_lock_irqsave(&blkif_io_lock, flags);
423 if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
424 unlikely(recovery) )
425 {
426 spin_unlock_irqrestore(&blkif_io_lock, flags);
427 return IRQ_HANDLED;
428 }
430 rp = blk_ring->resp_prod;
431 rmb(); /* Ensure we see queued responses up to 'rp'. */
433 for ( i = resp_cons; i != rp; i++ )
434 {
435 unsigned long id;
436 bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
438 id = bret->id;
439 req = (struct request *)rec_ring[id].id;
441 blkif_completion( &rec_ring[id] );
443 ADD_ID_TO_FREELIST(id); /* overwrites req */
445 switch ( bret->operation )
446 {
447 case BLKIF_OP_READ:
448 case BLKIF_OP_WRITE:
449 if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
450 DPRINTK("Bad return from blkdev data request: %x\n",
451 bret->status);
453 if ( unlikely(end_that_request_first
454 (req,
455 (bret->status == BLKIF_RSP_OKAY),
456 req->hard_nr_sectors)) )
457 BUG();
458 end_that_request_last(req);
460 break;
461 case BLKIF_OP_PROBE:
462 memcpy(&blkif_control_rsp, bret, sizeof(*bret));
463 blkif_control_rsp_valid = 1;
464 break;
465 default:
466 BUG();
467 }
468 }
470 resp_cons = i;
472 kick_pending_request_queues();
474 spin_unlock_irqrestore(&blkif_io_lock, flags);
476 return IRQ_HANDLED;
477 }
479 #else
480 /************************** KERNEL VERSION 2.4 **************************/
482 static kdev_t sg_dev;
483 static unsigned long sg_next_sect;
485 /*
486 * Request queues with outstanding work, but ring is currently full.
487 * We need no special lock here, as we always access this with the
488 * blkif_io_lock held. We only need a small maximum list.
489 */
490 #define MAX_PENDING 8
491 static request_queue_t *pending_queues[MAX_PENDING];
492 static int nr_pending;
495 #define blkif_io_lock io_request_lock
497 /*============================================================================*/
498 #if ENABLE_VBD_UPDATE
500 /*
501 * blkif_update_int/update-vbds_task - handle VBD update events.
502 * Schedule a task for keventd to run, which will update the VBDs and perform
503 * the corresponding updates to our view of VBD state.
504 */
505 static void update_vbds_task(void *unused)
506 {
507 xlvbd_update_vbds();
508 }
510 static void vbd_update(void)
511 {
512 static struct tq_struct update_tq;
513 update_tq.routine = update_vbds_task;
514 schedule_task(&update_tq);
515 }
517 #endif /* ENABLE_VBD_UPDATE */
518 /*============================================================================*/
521 static void kick_pending_request_queues(void)
522 {
523 /* We kick pending request queues if the ring is reasonably empty. */
524 if ( (nr_pending != 0) &&
525 ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) )
526 {
527 /* Attempt to drain the queue, but bail if the ring becomes full. */
528 while ( (nr_pending != 0) && !BLKIF_RING_FULL )
529 do_blkif_request(pending_queues[--nr_pending]);
530 }
531 }
533 int blkif_open(struct inode *inode, struct file *filep)
534 {
535 short xldev = inode->i_rdev;
536 struct gendisk *gd = get_gendisk(xldev);
537 xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
538 short minor = MINOR(xldev);
540 if ( gd->part[minor].nr_sects == 0 )
541 {
542 /*
543 * Device either doesn't exist, or has zero capacity; we use a few
544 * cheesy heuristics to return the relevant error code
545 */
546 if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
547 ((minor & (gd->max_p - 1)) != 0) )
548 {
549 /*
550 * We have a real device, but no such partition, or we just have a
551 * partition number so guess this is the problem.
552 */
553 return -ENXIO; /* no such device or address */
554 }
555 else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
556 {
557 /* This is a removable device => assume that media is missing. */
558 return -ENOMEDIUM; /* media not present (this is a guess) */
559 }
560 else
561 {
562 /* Just go for the general 'no such device' error. */
563 return -ENODEV; /* no such device */
564 }
565 }
567 /* Update of usage count is protected by per-device semaphore. */
568 disk->usage++;
570 return 0;
571 }
574 int blkif_release(struct inode *inode, struct file *filep)
575 {
576 xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
578 /*
579 * When usage drops to zero it may allow more VBD updates to occur.
580 * Update of usage count is protected by a per-device semaphore.
581 */
582 if ( --disk->usage == 0 ) {
583 vbd_update();
584 }
586 return 0;
587 }
590 int blkif_ioctl(struct inode *inode, struct file *filep,
591 unsigned command, unsigned long argument)
592 {
593 kdev_t dev = inode->i_rdev;
594 struct hd_geometry *geo = (struct hd_geometry *)argument;
595 struct gendisk *gd;
596 struct hd_struct *part;
597 int i;
598 unsigned short cylinders;
599 byte heads, sectors;
601 /* NB. No need to check permissions. That is done for us. */
603 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
604 command, (long) argument, dev);
606 gd = get_gendisk(dev);
607 part = &gd->part[MINOR(dev)];
609 switch ( command )
610 {
611 case BLKGETSIZE:
612 DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
613 return put_user(part->nr_sects, (unsigned long *) argument);
615 case BLKGETSIZE64:
616 DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
617 (u64)part->nr_sects * 512);
618 return put_user((u64)part->nr_sects * 512, (u64 *) argument);
620 case BLKRRPART: /* re-read partition table */
621 DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
622 return blkif_revalidate(dev);
624 case BLKSSZGET:
625 return hardsect_size[MAJOR(dev)][MINOR(dev)];
627 case BLKBSZGET: /* get block size */
628 DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
629 break;
631 case BLKBSZSET: /* set block size */
632 DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET);
633 break;
635 case BLKRASET: /* set read-ahead */
636 DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET);
637 break;
639 case BLKRAGET: /* get read-ahead */
640 DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET);
641 break;
643 case HDIO_GETGEO:
644 DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO);
645 if (!argument) return -EINVAL;
647 /* We don't have real geometry info, but let's at least return
648 values consistent with the size of the device */
650 heads = 0xff;
651 sectors = 0x3f;
652 cylinders = part->nr_sects / (heads * sectors);
654 if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
655 if (put_user(heads, (byte *)&geo->heads)) return -EFAULT;
656 if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT;
657 if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return -EFAULT;
659 return 0;
661 case HDIO_GETGEO_BIG:
662 DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
663 if (!argument) return -EINVAL;
665 /* We don't have real geometry info, but let's at least return
666 values consistent with the size of the device */
668 heads = 0xff;
669 sectors = 0x3f;
670 cylinders = part->nr_sects / (heads * sectors);
672 if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
673 if (put_user(heads, (byte *)&geo->heads)) return -EFAULT;
674 if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT;
675 if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return -EFAULT;
677 return 0;
679 case CDROMMULTISESSION:
680 DPRINTK("FIXME: support multisession CDs later\n");
681 for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
682 if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
683 return 0;
685 case SCSI_IOCTL_GET_BUS_NUMBER:
686 DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif");
687 return -ENOSYS;
689 default:
690 printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command);
691 return -ENOSYS;
692 }
694 return 0;
695 }
699 /* check media change: should probably do something here in some cases :-) */
700 int blkif_check(kdev_t dev)
701 {
702 DPRINTK("blkif_check\n");
703 return 0;
704 }
706 int blkif_revalidate(kdev_t dev)
707 {
708 struct block_device *bd;
709 struct gendisk *gd;
710 xl_disk_t *disk;
711 unsigned long capacity;
712 int i, rc = 0;
714 if ( (bd = bdget(dev)) == NULL )
715 return -EINVAL;
717 /*
718 * Update of partition info, and check of usage count, is protected
719 * by the per-block-device semaphore.
720 */
721 down(&bd->bd_sem);
723 if ( ((gd = get_gendisk(dev)) == NULL) ||
724 ((disk = xldev_to_xldisk(dev)) == NULL) ||
725 ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
726 {
727 rc = -EINVAL;
728 goto out;
729 }
731 if ( disk->usage > 1 )
732 {
733 rc = -EBUSY;
734 goto out;
735 }
737 /* Only reread partition table if VBDs aren't mapped to partitions. */
738 if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
739 {
740 for ( i = gd->max_p - 1; i >= 0; i-- )
741 {
742 invalidate_device(dev+i, 1);
743 gd->part[MINOR(dev+i)].start_sect = 0;
744 gd->part[MINOR(dev+i)].nr_sects = 0;
745 gd->sizes[MINOR(dev+i)] = 0;
746 }
748 grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
749 }
751 out:
752 up(&bd->bd_sem);
753 bdput(bd);
754 return rc;
755 }
758 /*
759 * blkif_queue_request
760 *
761 * request block io
762 *
763 * id: for guest use only.
764 * operation: BLKIF_OP_{READ,WRITE,PROBE}
765 * buffer: buffer to read/write into. this should be a
766 * virtual address in the guest os.
767 */
768 static int blkif_queue_request(unsigned long id,
769 int operation,
770 char * buffer,
771 unsigned long sector_number,
772 unsigned short nr_sectors,
773 kdev_t device)
774 {
775 unsigned long buffer_ma = virt_to_bus(buffer);
776 unsigned long xid;
777 struct gendisk *gd;
778 blkif_request_t *req;
779 struct buffer_head *bh;
780 unsigned int fsect, lsect;
782 fsect = (buffer_ma & ~PAGE_MASK) >> 9;
783 lsect = fsect + nr_sectors - 1;
785 /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */
786 if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
787 BUG();
788 if ( lsect > 7 )
789 BUG();
791 buffer_ma &= PAGE_MASK;
793 if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
794 return 1;
796 switch ( operation )
797 {
799 case BLKIF_OP_READ:
800 case BLKIF_OP_WRITE:
801 gd = get_gendisk(device);
803 /*
804 * Update the sector_number we'll pass down as appropriate; note that
805 * we could sanity check that resulting sector will be in this
806 * partition, but this will happen in driver backend anyhow.
807 */
808 sector_number += gd->part[MINOR(device)].start_sect;
810 /*
811 * If this unit doesn't consist of virtual partitions then we clear
812 * the partn bits from the device number.
813 */
814 if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
815 GENHD_FL_VIRT_PARTNS) )
816 device &= ~(gd->max_p - 1);
818 if ( (sg_operation == operation) &&
819 (sg_dev == device) &&
820 (sg_next_sect == sector_number) )
821 {
823 req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod-1)].req;
824 bh = (struct buffer_head *)id;
826 bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id;
829 rec_ring[req->id].id = id;
831 req->frame_and_sects[req->nr_segments] =
832 buffer_ma | (fsect<<3) | lsect;
833 if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
834 sg_next_sect += nr_sectors;
835 else
836 DISABLE_SCATTERGATHER();
838 /* Update the copy of the request in the recovery ring. */
839 translate_req_to_pfn(&rec_ring[req->id], req );
841 return 0;
842 }
843 else if ( BLKIF_RING_FULL )
844 {
845 return 1;
846 }
847 else
848 {
849 sg_operation = operation;
850 sg_dev = device;
851 sg_next_sect = sector_number + nr_sectors;
852 }
853 break;
855 default:
856 panic("unknown op %d\n", operation);
857 }
859 /* Fill out a communications ring structure. */
860 req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
862 xid = GET_ID_FROM_FREELIST();
863 rec_ring[xid].id = id;
865 req->id = xid;
866 req->operation = operation;
867 req->sector_number = (blkif_sector_t)sector_number;
868 req->device = device;
869 req->nr_segments = 1;
870 req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
872 req_prod++;
874 /* Keep a private copy so we can reissue requests when recovering. */
875 translate_req_to_pfn(&rec_ring[xid], req );
877 return 0;
878 }
881 /*
882 * do_blkif_request
883 * read a block; request is in a request queue
884 */
885 void do_blkif_request(request_queue_t *rq)
886 {
887 struct request *req;
888 struct buffer_head *bh, *next_bh;
889 int rw, nsect, full, queued = 0;
891 DPRINTK("Entered do_blkif_request\n");
893 while ( !rq->plugged && !list_empty(&rq->queue_head))
894 {
895 if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
896 goto out;
898 DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
899 req, req->cmd, req->sector,
900 req->current_nr_sectors, req->nr_sectors, req->bh);
902 rw = req->cmd;
903 if ( rw == READA )
904 rw = READ;
905 if ( unlikely((rw != READ) && (rw != WRITE)) )
906 panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
908 req->errors = 0;
910 bh = req->bh;
911 while ( bh != NULL )
912 {
913 next_bh = bh->b_reqnext;
914 bh->b_reqnext = NULL;
916 full = blkif_queue_request(
917 (unsigned long)bh,
918 (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
919 bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
921 if ( full )
922 {
923 bh->b_reqnext = next_bh;
924 pending_queues[nr_pending++] = rq;
925 if ( unlikely(nr_pending >= MAX_PENDING) )
926 BUG();
927 goto out;
928 }
930 queued++;
932 /* Dequeue the buffer head from the request. */
933 nsect = bh->b_size >> 9;
934 bh = req->bh = next_bh;
936 if ( bh != NULL )
937 {
938 /* There's another buffer head to do. Update the request. */
939 req->hard_sector += nsect;
940 req->hard_nr_sectors -= nsect;
941 req->sector = req->hard_sector;
942 req->nr_sectors = req->hard_nr_sectors;
943 req->current_nr_sectors = bh->b_size >> 9;
944 req->buffer = bh->b_data;
945 }
946 else
947 {
948 /* That was the last buffer head. Finalise the request. */
949 if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
950 BUG();
951 blkdev_dequeue_request(req);
952 end_that_request_last(req);
953 }
954 }
955 }
957 out:
958 if ( queued != 0 )
959 flush_requests();
960 }
963 static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
964 {
965 BLKIF_RING_IDX i, rp;
966 unsigned long flags;
967 struct buffer_head *bh, *next_bh;
969 spin_lock_irqsave(&io_request_lock, flags);
971 if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) )
972 {
973 spin_unlock_irqrestore(&io_request_lock, flags);
974 return;
975 }
977 rp = blk_ring->resp_prod;
978 rmb(); /* Ensure we see queued responses up to 'rp'. */
980 for ( i = resp_cons; i != rp; i++ )
981 {
982 unsigned long id;
983 blkif_response_t *bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
985 id = bret->id;
986 bh = (struct buffer_head *)rec_ring[id].id;
988 blkif_completion( &rec_ring[id] );
990 ADD_ID_TO_FREELIST(id);
992 switch ( bret->operation )
993 {
994 case BLKIF_OP_READ:
995 case BLKIF_OP_WRITE:
996 if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
997 DPRINTK("Bad return from blkdev data request: %lx\n",
998 bret->status);
999 for ( ; bh != NULL; bh = next_bh )
1001 next_bh = bh->b_reqnext;
1002 bh->b_reqnext = NULL;
1003 bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
1006 break;
1007 case BLKIF_OP_PROBE:
1008 memcpy(&blkif_control_rsp, bret, sizeof(*bret));
1009 blkif_control_rsp_valid = 1;
1010 break;
1011 default:
1012 BUG();
1016 resp_cons = i;
1018 kick_pending_request_queues();
1020 spin_unlock_irqrestore(&io_request_lock, flags);
1023 #endif
1025 /***************************** COMMON CODE *******************************/
1028 void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
1030 unsigned long flags, id;
1032 retry:
1033 while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
1035 set_current_state(TASK_INTERRUPTIBLE);
1036 schedule_timeout(1);
1039 spin_lock_irqsave(&blkif_io_lock, flags);
1040 if ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
1042 spin_unlock_irqrestore(&blkif_io_lock, flags);
1043 goto retry;
1046 DISABLE_SCATTERGATHER();
1047 blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req = *req;
1049 id = GET_ID_FROM_FREELIST();
1050 blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = id;
1051 rec_ring[id].id = (unsigned long) req;
1053 translate_req_to_pfn( &rec_ring[id], req );
1055 req_prod++;
1056 flush_requests();
1058 spin_unlock_irqrestore(&blkif_io_lock, flags);
1060 while ( !blkif_control_rsp_valid )
1062 set_current_state(TASK_INTERRUPTIBLE);
1063 schedule_timeout(1);
1066 memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
1067 blkif_control_rsp_valid = 0;
1071 /* Send a driver status notification to the domain controller. */
1072 static void send_driver_status(int ok)
1074 ctrl_msg_t cmsg = {
1075 .type = CMSG_BLKIF_FE,
1076 .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
1077 .length = sizeof(blkif_fe_driver_status_t),
1078 };
1079 blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
1081 msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
1083 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
1086 /* Tell the controller to bring up the interface. */
1087 static void blkif_send_interface_connect(void)
1089 ctrl_msg_t cmsg = {
1090 .type = CMSG_BLKIF_FE,
1091 .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
1092 .length = sizeof(blkif_fe_interface_connect_t),
1093 };
1094 blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
1096 msg->handle = 0;
1097 msg->shmem_frame = (virt_to_machine(blk_ring) >> PAGE_SHIFT);
1099 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
1102 static void blkif_free(void)
1104 /* Prevent new requests being issued until we fix things up. */
1105 spin_lock_irq(&blkif_io_lock);
1106 recovery = 1;
1107 blkif_state = BLKIF_STATE_DISCONNECTED;
1108 spin_unlock_irq(&blkif_io_lock);
1110 /* Free resources associated with old device channel. */
1111 if ( blk_ring != NULL )
1113 free_page((unsigned long)blk_ring);
1114 blk_ring = NULL;
1116 free_irq(blkif_irq, NULL);
1117 blkif_irq = 0;
1119 unbind_evtchn_from_irq(blkif_evtchn);
1120 blkif_evtchn = 0;
1123 static void blkif_close(void)
1127 /* Move from CLOSED to DISCONNECTED state. */
1128 static void blkif_disconnect(void)
1130 if ( blk_ring != NULL )
1131 free_page((unsigned long)blk_ring);
1132 blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
1133 blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
1134 blkif_state = BLKIF_STATE_DISCONNECTED;
1135 blkif_send_interface_connect();
1138 static void blkif_reset(void)
1140 blkif_free();
1141 blkif_disconnect();
1144 static void blkif_recover(void)
1146 int i;
1148 /* Hmm, requests might be re-ordered when we re-issue them.
1149 * This will need to be fixed once we have barriers */
1151 /* Stage 1 : Find active and move to safety. */
1152 for ( i = 0; i < BLKIF_RING_SIZE; i++ )
1154 if ( rec_ring[i].id >= PAGE_OFFSET )
1156 translate_req_to_mfn(
1157 &blk_ring->ring[req_prod].req, &rec_ring[i]);
1158 req_prod++;
1162 /* Stage 2 : Set up shadow list. */
1163 for ( i = 0; i < req_prod; i++ )
1165 rec_ring[i].id = blk_ring->ring[i].req.id;
1166 blk_ring->ring[i].req.id = i;
1167 translate_req_to_pfn(&rec_ring[i], &blk_ring->ring[i].req);
1170 /* Stage 3 : Set up free list. */
1171 for ( ; i < BLKIF_RING_SIZE; i++ )
1172 rec_ring[i].id = i+1;
1173 rec_ring_free = req_prod;
1174 rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
1176 /* blk_ring->req_prod will be set when we flush_requests().*/
1177 wmb();
1179 /* Switch off recovery mode, using a memory barrier to ensure that
1180 * it's seen before we flush requests - we don't want to miss any
1181 * interrupts. */
1182 recovery = 0;
1183 wmb();
1185 /* Kicks things back into life. */
1186 flush_requests();
1188 /* Now safe to left other peope use interface. */
1189 blkif_state = BLKIF_STATE_CONNECTED;
1192 static void blkif_connect(blkif_fe_interface_status_t *status)
1194 int err = 0;
1196 blkif_evtchn = status->evtchn;
1197 blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
1199 err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
1200 if ( err )
1202 printk(KERN_ALERT "xen_blk: request_irq failed (err=%d)\n", err);
1203 return;
1206 if ( recovery )
1208 blkif_recover();
1210 else
1212 /* Transition to connected in case we need to do
1213 * a partition probe on a whole disk. */
1214 blkif_state = BLKIF_STATE_CONNECTED;
1216 /* Probe for discs attached to the interface. */
1217 xlvbd_init();
1220 /* Kick pending requests. */
1221 spin_lock_irq(&blkif_io_lock);
1222 kick_pending_request_queues();
1223 spin_unlock_irq(&blkif_io_lock);
1226 static void unexpected(blkif_fe_interface_status_t *status)
1228 DPRINTK(" Unexpected blkif status %u in state %u\n",
1229 status->status, blkif_state);
1232 static void blkif_status(blkif_fe_interface_status_t *status)
1234 if ( status->handle != blkif_handle )
1236 WPRINTK(" Invalid blkif: handle=%u", status->handle);
1237 return;
1240 switch ( status->status )
1242 case BLKIF_INTERFACE_STATUS_CLOSED:
1243 switch ( blkif_state )
1245 case BLKIF_STATE_CLOSED:
1246 unexpected(status);
1247 break;
1248 case BLKIF_STATE_DISCONNECTED:
1249 case BLKIF_STATE_CONNECTED:
1250 unexpected(status);
1251 blkif_close();
1252 break;
1254 break;
1256 case BLKIF_INTERFACE_STATUS_DISCONNECTED:
1257 switch ( blkif_state )
1259 case BLKIF_STATE_CLOSED:
1260 blkif_disconnect();
1261 break;
1262 case BLKIF_STATE_DISCONNECTED:
1263 case BLKIF_STATE_CONNECTED:
1264 /* unexpected(status); */ /* occurs during suspend/resume */
1265 blkif_reset();
1266 break;
1268 break;
1270 case BLKIF_INTERFACE_STATUS_CONNECTED:
1271 switch ( blkif_state )
1273 case BLKIF_STATE_CLOSED:
1274 unexpected(status);
1275 blkif_disconnect();
1276 blkif_connect(status);
1277 break;
1278 case BLKIF_STATE_DISCONNECTED:
1279 blkif_connect(status);
1280 break;
1281 case BLKIF_STATE_CONNECTED:
1282 unexpected(status);
1283 blkif_connect(status);
1284 break;
1286 break;
1288 case BLKIF_INTERFACE_STATUS_CHANGED:
1289 switch ( blkif_state )
1291 case BLKIF_STATE_CLOSED:
1292 case BLKIF_STATE_DISCONNECTED:
1293 unexpected(status);
1294 break;
1295 case BLKIF_STATE_CONNECTED:
1296 vbd_update();
1297 break;
1299 break;
1301 default:
1302 WPRINTK(" Invalid blkif status: %d\n", status->status);
1303 break;
1308 static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
1310 switch ( msg->subtype )
1312 case CMSG_BLKIF_FE_INTERFACE_STATUS:
1313 if ( msg->length != sizeof(blkif_fe_interface_status_t) )
1314 goto parse_error;
1315 blkif_status((blkif_fe_interface_status_t *)
1316 &msg->msg[0]);
1317 break;
1318 default:
1319 goto parse_error;
1322 ctrl_if_send_response(msg);
1323 return;
1325 parse_error:
1326 msg->length = 0;
1327 ctrl_if_send_response(msg);
1330 int wait_for_blkif(void)
1332 int err = 0;
1333 int i;
1334 send_driver_status(1);
1336 /*
1337 * We should read 'nr_interfaces' from response message and wait
1338 * for notifications before proceeding. For now we assume that we
1339 * will be notified of exactly one interface.
1340 */
1341 for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
1343 set_current_state(TASK_INTERRUPTIBLE);
1344 schedule_timeout(1);
1347 if ( blkif_state != BLKIF_STATE_CONNECTED )
1349 printk(KERN_INFO "xen_blk: Timeout connecting to device!\n");
1350 err = -ENOSYS;
1352 return err;
1355 int __init xlblk_init(void)
1357 int i;
1359 if ( (xen_start_info.flags & SIF_INITDOMAIN) ||
1360 (xen_start_info.flags & SIF_BLK_BE_DOMAIN) )
1361 return 0;
1363 printk(KERN_INFO "xen_blk: Initialising virtual block device driver\n");
1365 rec_ring_free = 0;
1366 for ( i = 0; i < BLKIF_RING_SIZE; i++ )
1367 rec_ring[i].id = i+1;
1368 rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
1370 (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
1371 CALLBACK_IN_BLOCKING_CONTEXT);
1373 wait_for_blkif();
1375 return 0;
1378 void blkdev_suspend(void)
1382 void blkdev_resume(void)
1384 send_driver_status(1);
1387 /* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
1389 void blkif_completion(blkif_request_t *req)
1391 int i;
1393 switch ( req->operation )
1395 case BLKIF_OP_READ:
1396 for ( i = 0; i < req->nr_segments; i++ )
1398 unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
1399 unsigned long mfn = phys_to_machine_mapping[pfn];
1400 xen_machphys_update(mfn, pfn);
1402 break;