debuggers.hg

view linux-2.6.8.1-xen-sparse/drivers/xen/blkfront/blkfront.c @ 2615:4ecd18756ef8

bitkeeper revision 1.1159.1.192 (415c7082v4eKZkH-NXLbAR9bU6B8hg)

Merge ssh://srg//auto/groups/xeno/BK/xeno-unstable.bk
into equilibrium.research:/export/scratch/xeno-blkscripts.bk
author mwilli2@equilibrium.research
date Thu Sep 30 20:45:54 2004 +0000 (2004-09-30)
parents 5ba3470963d4 5109394e871c
children 8a3a77314cb5 ff4e7a241335
line source
1 /******************************************************************************
2 * blkfront.c
3 *
4 * XenLinux virtual block-device driver.
5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach
9 */
11 #include <linux/version.h>
13 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
14 #include "block.h"
15 #else
16 #include "common.h"
17 #include <linux/blk.h>
18 #include <linux/tqueue.h>
19 #endif
21 #include <linux/cdrom.h>
22 #include <linux/sched.h>
23 #include <linux/interrupt.h>
24 #include <scsi/scsi.h>
25 #include <asm-xen/ctrl_if.h>
27 typedef unsigned char byte; /* from linux/ide.h */
29 /* Control whether runtime update of vbds is enabled. */
30 #define ENABLE_VBD_UPDATE 1
32 #if ENABLE_VBD_UPDATE
33 static void vbd_update(void);
34 #else
35 static void vbd_update(void){};
36 #endif
38 #define BLKIF_STATE_CLOSED 0
39 #define BLKIF_STATE_DISCONNECTED 1
40 #define BLKIF_STATE_CONNECTED 2
42 static char *blkif_state_name[] = {
43 [BLKIF_STATE_CLOSED] = "closed",
44 [BLKIF_STATE_DISCONNECTED] = "disconnected",
45 [BLKIF_STATE_CONNECTED] = "connected",
46 };
48 static char * blkif_status_name[] = {
49 [BLKIF_INTERFACE_STATUS_CLOSED] = "closed",
50 [BLKIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
51 [BLKIF_INTERFACE_STATUS_CONNECTED] = "connected",
52 [BLKIF_INTERFACE_STATUS_CHANGED] = "changed",
53 };
55 #if 1
56 #define dprintf(fmt, args...) \
57 printk(KERN_ALERT "[XEN:%s:%s:%d] " fmt, __FUNCTION__, __FILE__, __LINE__, ##args)
58 #endif
60 #define WPRINTK(fmt, args...) printk(KERN_WARNING "[XEN] " fmt, ##args)
62 static int blkif_handle = 0;
63 static unsigned int blkif_state = BLKIF_STATE_CLOSED;
64 static unsigned int blkif_evtchn = 0;
65 static unsigned int blkif_irq = 0;
67 static int blkif_control_rsp_valid;
68 static blkif_response_t blkif_control_rsp;
70 static blkif_ring_t *blk_ring = NULL;
71 static BLKIF_RING_IDX resp_cons; /* Response consumer for comms ring. */
72 static BLKIF_RING_IDX req_prod; /* Private request producer. */
74 unsigned long rec_ring_free;
75 blkif_request_t rec_ring[BLKIF_RING_SIZE];
77 static int recovery = 0; /* "Recovery in progress" flag. Protected
78 * by the blkif_io_lock */
80 /* We plug the I/O ring if the driver is suspended or if the ring is full. */
81 #define BLKIF_RING_FULL (((req_prod - resp_cons) == BLKIF_RING_SIZE) || \
82 (blkif_state != BLKIF_STATE_CONNECTED))
84 static inline void translate_req_to_mfn(blkif_request_t *xreq,
85 blkif_request_t *req);
87 static inline void translate_req_to_pfn(blkif_request_t *xreq,
88 blkif_request_t *req);
90 static inline void flush_requests(void);
92 static void kick_pending_request_queues(void);
94 int __init xlblk_init(void);
96 void blkif_completion( blkif_request_t *req );
98 static inline int GET_ID_FROM_FREELIST( void )
99 {
100 unsigned long free = rec_ring_free;
102 if ( free > BLKIF_RING_SIZE )
103 BUG();
105 rec_ring_free = rec_ring[free].id;
107 rec_ring[free].id = 0x0fffffee; /* debug */
109 return free;
110 }
112 static inline void ADD_ID_TO_FREELIST( unsigned long id )
113 {
114 rec_ring[id].id = rec_ring_free;
115 rec_ring_free = id;
116 }
119 /************************** KERNEL VERSION 2.6 **************************/
121 #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0)
123 #define DISABLE_SCATTERGATHER()
125 __initcall(xlblk_init);
127 #if ENABLE_VBD_UPDATE
128 static void vbd_update()
129 {
130 dprintf(">\n");
131 dprintf("<\n");
132 }
133 #endif /* ENABLE_VBD_UPDATE */
135 static void kick_pending_request_queues(void)
136 {
138 if ( (xlbd_blk_queue != NULL) &&
139 test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) )
140 {
141 blk_start_queue(xlbd_blk_queue);
142 /* XXXcl call to request_fn should not be needed but
143 * we get stuck without... needs investigating
144 */
145 xlbd_blk_queue->request_fn(xlbd_blk_queue);
146 }
148 }
151 int blkif_open(struct inode *inode, struct file *filep)
152 {
153 struct gendisk *gd = inode->i_bdev->bd_disk;
154 struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
156 /* Update of usage count is protected by per-device semaphore. */
157 di->mi->usage++;
159 return 0;
160 }
163 int blkif_release(struct inode *inode, struct file *filep)
164 {
165 struct gendisk *gd = inode->i_bdev->bd_disk;
166 struct xlbd_disk_info *di = (struct xlbd_disk_info *)gd->private_data;
168 /*
169 * When usage drops to zero it may allow more VBD updates to occur.
170 * Update of usage count is protected by a per-device semaphore.
171 */
172 if (--di->mi->usage == 0) {
173 vbd_update();
174 }
176 return 0;
177 }
180 int blkif_ioctl(struct inode *inode, struct file *filep,
181 unsigned command, unsigned long argument)
182 {
183 /* struct gendisk *gd = inode->i_bdev->bd_disk; */
185 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
186 command, (long)argument, inode->i_rdev);
188 switch (command) {
190 case HDIO_GETGEO:
191 /* return ENOSYS to use defaults */
192 return -ENOSYS;
194 default:
195 printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
196 command);
197 return -ENOSYS;
198 }
200 return 0;
201 }
203 #if 0
204 /* check media change: should probably do something here in some cases :-) */
205 int blkif_check(kdev_t dev)
206 {
207 DPRINTK("blkif_check\n");
208 return 0;
209 }
211 int blkif_revalidate(kdev_t dev)
212 {
213 struct block_device *bd;
214 struct gendisk *gd;
215 xen_block_t *disk;
216 unsigned long capacity;
217 int i, rc = 0;
219 if ( (bd = bdget(dev)) == NULL )
220 return -EINVAL;
222 /*
223 * Update of partition info, and check of usage count, is protected
224 * by the per-block-device semaphore.
225 */
226 down(&bd->bd_sem);
228 if ( ((gd = get_gendisk(dev)) == NULL) ||
229 ((disk = xldev_to_xldisk(dev)) == NULL) ||
230 ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
231 {
232 rc = -EINVAL;
233 goto out;
234 }
236 if ( disk->usage > 1 )
237 {
238 rc = -EBUSY;
239 goto out;
240 }
242 /* Only reread partition table if VBDs aren't mapped to partitions. */
243 if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
244 {
245 for ( i = gd->max_p - 1; i >= 0; i-- )
246 {
247 invalidate_device(dev+i, 1);
248 gd->part[MINOR(dev+i)].start_sect = 0;
249 gd->part[MINOR(dev+i)].nr_sects = 0;
250 gd->sizes[MINOR(dev+i)] = 0;
251 }
253 grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
254 }
256 out:
257 up(&bd->bd_sem);
258 bdput(bd);
259 return rc;
260 }
261 #endif
263 /*
264 * blkif_queue_request
265 *
266 * request block io
267 *
268 * id: for guest use only.
269 * operation: BLKIF_OP_{READ,WRITE,PROBE}
270 * buffer: buffer to read/write into. this should be a
271 * virtual address in the guest os.
272 */
273 static int blkif_queue_request(struct request *req)
274 {
275 struct xlbd_disk_info *di =
276 (struct xlbd_disk_info *)req->rq_disk->private_data;
277 unsigned long buffer_ma;
278 blkif_request_t *ring_req;
279 struct bio *bio;
280 struct bio_vec *bvec;
281 int idx, s;
282 unsigned long id;
283 unsigned int fsect, lsect;
285 if (unlikely(blkif_state != BLKIF_STATE_CONNECTED))
286 return 1;
288 /* Fill out a communications ring structure. */
289 ring_req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
290 id = GET_ID_FROM_FREELIST();
291 rec_ring[id].id = (unsigned long) req;
293 ring_req->id = id;
294 ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE :
295 BLKIF_OP_READ;
296 ring_req->sector_number = (blkif_sector_t)req->sector;
297 ring_req->device = di->xd_device;
299 s = 0;
300 ring_req->nr_segments = 0;
301 rq_for_each_bio(bio, req) {
302 bio_for_each_segment(bvec, bio, idx) {
303 buffer_ma = page_to_phys(bvec->bv_page);
304 if (unlikely((buffer_ma & ((1<<9)-1)) != 0))
305 BUG();
307 fsect = bvec->bv_offset >> 9;
308 lsect = fsect + (bvec->bv_len >> 9) - 1;
309 if (unlikely(lsect > 7))
310 BUG();
312 ring_req->frame_and_sects[ring_req->nr_segments++] =
313 buffer_ma | (fsect << 3) | lsect;
314 s += bvec->bv_len >> 9;
315 }
316 }
318 req_prod++;
320 /* Keep a private copy so we can reissue requests when recovering. */
321 translate_req_to_pfn( &rec_ring[id], ring_req);
323 return 0;
324 }
327 /*
328 * do_blkif_request
329 * read a block; request is in a request queue
330 */
331 void do_blkif_request(request_queue_t *rq)
332 {
333 struct request *req;
334 int queued;
336 DPRINTK("Entered do_blkif_request\n");
338 queued = 0;
340 while ((req = elv_next_request(rq)) != NULL) {
341 if (!blk_fs_request(req)) {
342 end_request(req, 0);
343 continue;
344 }
346 if ( BLKIF_RING_FULL )
347 {
348 blk_stop_queue(rq);
349 break;
350 }
351 DPRINTK("do_blkif_request %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n",
352 req, req->cmd, req->sector, req->current_nr_sectors,
353 req->nr_sectors, req->buffer,
354 rq_data_dir(req) ? "write" : "read");
355 blkdev_dequeue_request(req);
356 if (blkif_queue_request(req)) {
357 blk_stop_queue(rq);
358 break;
359 }
360 queued++;
361 }
363 if (queued != 0)
364 flush_requests();
365 }
368 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
369 {
370 struct request *req;
371 blkif_response_t *bret;
372 BLKIF_RING_IDX i, rp;
373 unsigned long flags;
375 spin_lock_irqsave(&blkif_io_lock, flags);
377 if ( unlikely(blkif_state == BLKIF_STATE_CLOSED) ||
378 unlikely(recovery) )
379 {
380 spin_unlock_irqrestore(&blkif_io_lock, flags);
381 return IRQ_HANDLED;
382 }
384 rp = blk_ring->resp_prod;
385 rmb(); /* Ensure we see queued responses up to 'rp'. */
387 for ( i = resp_cons; i != rp; i++ )
388 {
389 unsigned long id;
390 bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
392 id = bret->id;
393 req = (struct request *)rec_ring[id].id;
395 blkif_completion( &rec_ring[id] );
397 ADD_ID_TO_FREELIST(id); /* overwrites req */
399 switch ( bret->operation )
400 {
401 case BLKIF_OP_READ:
402 case BLKIF_OP_WRITE:
403 if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
404 DPRINTK("Bad return from blkdev data request: %x\n",
405 bret->status);
407 if ( unlikely(end_that_request_first
408 (req,
409 (bret->status == BLKIF_RSP_OKAY),
410 req->hard_nr_sectors)) )
411 BUG();
412 end_that_request_last(req);
414 break;
415 case BLKIF_OP_PROBE:
416 memcpy(&blkif_control_rsp, bret, sizeof(*bret));
417 blkif_control_rsp_valid = 1;
418 break;
419 default:
420 BUG();
421 }
422 }
424 resp_cons = i;
426 kick_pending_request_queues();
428 spin_unlock_irqrestore(&blkif_io_lock, flags);
430 return IRQ_HANDLED;
431 }
433 #else
434 /************************** KERNEL VERSION 2.4 **************************/
436 static kdev_t sg_dev;
437 static int sg_operation = -1;
438 static unsigned long sg_next_sect;
440 /*
441 * Request queues with outstanding work, but ring is currently full.
442 * We need no special lock here, as we always access this with the
443 * blkif_io_lock held. We only need a small maximum list.
444 */
445 #define MAX_PENDING 8
446 static request_queue_t *pending_queues[MAX_PENDING];
447 static int nr_pending;
450 #define DISABLE_SCATTERGATHER() (sg_operation = -1)
452 #define blkif_io_lock io_request_lock
454 /*============================================================================*/
455 #if ENABLE_VBD_UPDATE
457 /*
458 * blkif_update_int/update-vbds_task - handle VBD update events.
459 * Schedule a task for keventd to run, which will update the VBDs and perform
460 * the corresponding updates to our view of VBD state.
461 */
462 static void update_vbds_task(void *unused)
463 {
464 xlvbd_update_vbds();
465 }
467 static void vbd_update(void)
468 {
469 static struct tq_struct update_tq;
470 dprintf(">\n");
471 update_tq.routine = update_vbds_task;
472 schedule_task(&update_tq);
473 dprintf("<\n");
474 }
476 #endif /* ENABLE_VBD_UPDATE */
477 /*============================================================================*/
480 static void kick_pending_request_queues(void)
481 {
482 /* We kick pending request queues if the ring is reasonably empty. */
483 if ( (nr_pending != 0) &&
484 ((req_prod - resp_cons) < (BLKIF_RING_SIZE >> 1)) )
485 {
486 /* Attempt to drain the queue, but bail if the ring becomes full. */
487 while ( (nr_pending != 0) && !BLKIF_RING_FULL )
488 do_blkif_request(pending_queues[--nr_pending]);
489 }
490 }
492 int blkif_open(struct inode *inode, struct file *filep)
493 {
494 short xldev = inode->i_rdev;
495 struct gendisk *gd = get_gendisk(xldev);
496 xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
497 short minor = MINOR(xldev);
499 if ( gd->part[minor].nr_sects == 0 )
500 {
501 /*
502 * Device either doesn't exist, or has zero capacity; we use a few
503 * cheesy heuristics to return the relevant error code
504 */
505 if ( (gd->sizes[minor >> gd->minor_shift] != 0) ||
506 ((minor & (gd->max_p - 1)) != 0) )
507 {
508 /*
509 * We have a real device, but no such partition, or we just have a
510 * partition number so guess this is the problem.
511 */
512 return -ENXIO; /* no such device or address */
513 }
514 else if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE )
515 {
516 /* This is a removable device => assume that media is missing. */
517 return -ENOMEDIUM; /* media not present (this is a guess) */
518 }
519 else
520 {
521 /* Just go for the general 'no such device' error. */
522 return -ENODEV; /* no such device */
523 }
524 }
526 /* Update of usage count is protected by per-device semaphore. */
527 disk->usage++;
529 return 0;
530 }
533 int blkif_release(struct inode *inode, struct file *filep)
534 {
535 xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
537 /*
538 * When usage drops to zero it may allow more VBD updates to occur.
539 * Update of usage count is protected by a per-device semaphore.
540 */
541 if ( --disk->usage == 0 ) {
542 vbd_update();
543 }
545 return 0;
546 }
549 int blkif_ioctl(struct inode *inode, struct file *filep,
550 unsigned command, unsigned long argument)
551 {
552 kdev_t dev = inode->i_rdev;
553 struct hd_geometry *geo = (struct hd_geometry *)argument;
554 struct gendisk *gd;
555 struct hd_struct *part;
556 int i;
557 unsigned short cylinders;
558 byte heads, sectors;
560 /* NB. No need to check permissions. That is done for us. */
562 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
563 command, (long) argument, dev);
565 gd = get_gendisk(dev);
566 part = &gd->part[MINOR(dev)];
568 switch ( command )
569 {
570 case BLKGETSIZE:
571 DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
572 return put_user(part->nr_sects, (unsigned long *) argument);
574 case BLKGETSIZE64:
575 DPRINTK_IOCTL(" BLKGETSIZE64: %x %llx\n", BLKGETSIZE64,
576 (u64)part->nr_sects * 512);
577 return put_user((u64)part->nr_sects * 512, (u64 *) argument);
579 case BLKRRPART: /* re-read partition table */
580 DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
581 return blkif_revalidate(dev);
583 case BLKSSZGET:
584 return hardsect_size[MAJOR(dev)][MINOR(dev)];
586 case BLKBSZGET: /* get block size */
587 DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
588 break;
590 case BLKBSZSET: /* set block size */
591 DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET);
592 break;
594 case BLKRASET: /* set read-ahead */
595 DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET);
596 break;
598 case BLKRAGET: /* get read-ahead */
599 DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET);
600 break;
602 case HDIO_GETGEO:
603 DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO);
604 if (!argument) return -EINVAL;
606 /* We don't have real geometry info, but let's at least return
607 values consistent with the size of the device */
609 heads = 0xff;
610 sectors = 0x3f;
611 cylinders = part->nr_sects / (heads * sectors);
613 if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
614 if (put_user(heads, (byte *)&geo->heads)) return -EFAULT;
615 if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT;
616 if (put_user(cylinders, (unsigned short *)&geo->cylinders)) return -EFAULT;
618 return 0;
620 case HDIO_GETGEO_BIG:
621 DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
622 if (!argument) return -EINVAL;
624 /* We don't have real geometry info, but let's at least return
625 values consistent with the size of the device */
627 heads = 0xff;
628 sectors = 0x3f;
629 cylinders = part->nr_sects / (heads * sectors);
631 if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
632 if (put_user(heads, (byte *)&geo->heads)) return -EFAULT;
633 if (put_user(sectors, (byte *)&geo->sectors)) return -EFAULT;
634 if (put_user(cylinders, (unsigned int *) &geo->cylinders)) return -EFAULT;
636 return 0;
638 case CDROMMULTISESSION:
639 DPRINTK("FIXME: support multisession CDs later\n");
640 for ( i = 0; i < sizeof(struct cdrom_multisession); i++ )
641 if ( put_user(0, (byte *)(argument + i)) ) return -EFAULT;
642 return 0;
644 case SCSI_IOCTL_GET_BUS_NUMBER:
645 DPRINTK("FIXME: SCSI_IOCTL_GET_BUS_NUMBER ioctl in XL blkif");
646 return -ENOSYS;
648 default:
649 printk(KERN_ALERT "ioctl %08x not supported by XL blkif\n", command);
650 return -ENOSYS;
651 }
653 return 0;
654 }
658 /* check media change: should probably do something here in some cases :-) */
659 int blkif_check(kdev_t dev)
660 {
661 DPRINTK("blkif_check\n");
662 return 0;
663 }
665 int blkif_revalidate(kdev_t dev)
666 {
667 struct block_device *bd;
668 struct gendisk *gd;
669 xl_disk_t *disk;
670 unsigned long capacity;
671 int i, rc = 0;
673 if ( (bd = bdget(dev)) == NULL )
674 return -EINVAL;
676 /*
677 * Update of partition info, and check of usage count, is protected
678 * by the per-block-device semaphore.
679 */
680 down(&bd->bd_sem);
682 if ( ((gd = get_gendisk(dev)) == NULL) ||
683 ((disk = xldev_to_xldisk(dev)) == NULL) ||
684 ((capacity = gd->part[MINOR(dev)].nr_sects) == 0) )
685 {
686 rc = -EINVAL;
687 goto out;
688 }
690 if ( disk->usage > 1 )
691 {
692 rc = -EBUSY;
693 goto out;
694 }
696 /* Only reread partition table if VBDs aren't mapped to partitions. */
697 if ( !(gd->flags[MINOR(dev) >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) )
698 {
699 for ( i = gd->max_p - 1; i >= 0; i-- )
700 {
701 invalidate_device(dev+i, 1);
702 gd->part[MINOR(dev+i)].start_sect = 0;
703 gd->part[MINOR(dev+i)].nr_sects = 0;
704 gd->sizes[MINOR(dev+i)] = 0;
705 }
707 grok_partitions(gd, MINOR(dev)>>gd->minor_shift, gd->max_p, capacity);
708 }
710 out:
711 up(&bd->bd_sem);
712 bdput(bd);
713 return rc;
714 }
719 /*
720 * blkif_queue_request
721 *
722 * request block io
723 *
724 * id: for guest use only.
725 * operation: BLKIF_OP_{READ,WRITE,PROBE}
726 * buffer: buffer to read/write into. this should be a
727 * virtual address in the guest os.
728 */
729 static int blkif_queue_request(unsigned long id,
730 int operation,
731 char * buffer,
732 unsigned long sector_number,
733 unsigned short nr_sectors,
734 kdev_t device)
735 {
736 unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer));
737 unsigned long xid;
738 struct gendisk *gd;
739 blkif_request_t *req;
740 struct buffer_head *bh;
741 unsigned int fsect, lsect;
743 fsect = (buffer_ma & ~PAGE_MASK) >> 9;
744 lsect = fsect + nr_sectors - 1;
746 /* Buffer must be sector-aligned. Extent mustn't cross a page boundary. */
747 if ( unlikely((buffer_ma & ((1<<9)-1)) != 0) )
748 BUG();
749 if ( lsect > 7 )
750 BUG();
752 buffer_ma &= PAGE_MASK;
754 if ( unlikely(blkif_state != BLKIF_STATE_CONNECTED) )
755 return 1;
757 switch ( operation )
758 {
760 case BLKIF_OP_READ:
761 case BLKIF_OP_WRITE:
762 gd = get_gendisk(device);
764 /*
765 * Update the sector_number we'll pass down as appropriate; note that
766 * we could sanity check that resulting sector will be in this
767 * partition, but this will happen in driver backend anyhow.
768 */
769 sector_number += gd->part[MINOR(device)].start_sect;
771 /*
772 * If this unit doesn't consist of virtual partitions then we clear
773 * the partn bits from the device number.
774 */
775 if ( !(gd->flags[MINOR(device)>>gd->minor_shift] &
776 GENHD_FL_VIRT_PARTNS) )
777 device &= ~(gd->max_p - 1);
779 if ( (sg_operation == operation) &&
780 (sg_dev == device) &&
781 (sg_next_sect == sector_number) )
782 {
784 req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod-1)].req;
785 bh = (struct buffer_head *)id;
787 bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id;
790 rec_ring[req->id].id = id;
792 req->frame_and_sects[req->nr_segments] =
793 buffer_ma | (fsect<<3) | lsect;
794 if ( ++req->nr_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST )
795 sg_next_sect += nr_sectors;
796 else
797 DISABLE_SCATTERGATHER();
799 /* Update the copy of the request in the recovery ring. */
800 translate_req_to_pfn(&rec_ring[req->id], req );
802 return 0;
803 }
804 else if ( BLKIF_RING_FULL )
805 {
806 return 1;
807 }
808 else
809 {
810 sg_operation = operation;
811 sg_dev = device;
812 sg_next_sect = sector_number + nr_sectors;
813 }
814 break;
816 default:
817 panic("unknown op %d\n", operation);
818 }
820 /* Fill out a communications ring structure. */
821 req = &blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req;
823 xid = GET_ID_FROM_FREELIST();
824 rec_ring[xid].id = id;
826 req->id = xid;
827 req->operation = operation;
828 req->sector_number = (blkif_sector_t)sector_number;
829 req->device = device;
830 req->nr_segments = 1;
831 req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect;
833 req_prod++;
835 /* Keep a private copy so we can reissue requests when recovering. */
836 translate_req_to_pfn(&rec_ring[xid], req );
840 return 0;
841 }
844 /*
845 * do_blkif_request
846 * read a block; request is in a request queue
847 */
848 void do_blkif_request(request_queue_t *rq)
849 {
850 struct request *req;
851 struct buffer_head *bh, *next_bh;
852 int rw, nsect, full, queued = 0;
854 DPRINTK("Entered do_blkif_request\n");
856 while ( !rq->plugged && !list_empty(&rq->queue_head))
857 {
858 if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
859 goto out;
861 DPRINTK("do_blkif_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
862 req, req->cmd, req->sector,
863 req->current_nr_sectors, req->nr_sectors, req->bh);
865 rw = req->cmd;
866 if ( rw == READA )
867 rw = READ;
868 if ( unlikely((rw != READ) && (rw != WRITE)) )
869 panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
871 req->errors = 0;
873 bh = req->bh;
874 while ( bh != NULL )
875 {
876 next_bh = bh->b_reqnext;
877 bh->b_reqnext = NULL;
879 full = blkif_queue_request(
880 (unsigned long)bh,
881 (rw == READ) ? BLKIF_OP_READ : BLKIF_OP_WRITE,
882 bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
884 if ( full )
885 {
886 bh->b_reqnext = next_bh;
887 pending_queues[nr_pending++] = rq;
888 if ( unlikely(nr_pending >= MAX_PENDING) )
889 BUG();
890 goto out;
891 }
893 queued++;
895 /* Dequeue the buffer head from the request. */
896 nsect = bh->b_size >> 9;
897 bh = req->bh = next_bh;
899 if ( bh != NULL )
900 {
901 /* There's another buffer head to do. Update the request. */
902 req->hard_sector += nsect;
903 req->hard_nr_sectors -= nsect;
904 req->sector = req->hard_sector;
905 req->nr_sectors = req->hard_nr_sectors;
906 req->current_nr_sectors = bh->b_size >> 9;
907 req->buffer = bh->b_data;
908 }
909 else
910 {
911 /* That was the last buffer head. Finalise the request. */
912 if ( unlikely(end_that_request_first(req, 1, "XenBlk")) )
913 BUG();
914 blkdev_dequeue_request(req);
915 end_that_request_last(req);
916 }
917 }
918 }
920 out:
921 if ( queued != 0 )
922 flush_requests();
923 }
926 static void blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
927 {
928 BLKIF_RING_IDX i, rp;
929 unsigned long flags;
930 struct buffer_head *bh, *next_bh;
932 spin_lock_irqsave(&io_request_lock, flags);
934 if ( unlikely(blkif_state == BLKIF_STATE_CLOSED || recovery) )
935 {
936 spin_unlock_irqrestore(&io_request_lock, flags);
937 return;
938 }
940 rp = blk_ring->resp_prod;
941 rmb(); /* Ensure we see queued responses up to 'rp'. */
943 for ( i = resp_cons; i != rp; i++ )
944 {
945 unsigned long id;
946 blkif_response_t *bret = &blk_ring->ring[MASK_BLKIF_IDX(i)].resp;
948 id = bret->id;
949 bh = (struct buffer_head *)rec_ring[id].id;
951 blkif_completion( &rec_ring[id] );
953 ADD_ID_TO_FREELIST(id);
955 switch ( bret->operation )
956 {
957 case BLKIF_OP_READ:
958 case BLKIF_OP_WRITE:
959 if ( unlikely(bret->status != BLKIF_RSP_OKAY) )
960 DPRINTK("Bad return from blkdev data request: %lx\n",
961 bret->status);
962 for ( ; bh != NULL; bh = next_bh )
963 {
964 next_bh = bh->b_reqnext;
965 bh->b_reqnext = NULL;
966 bh->b_end_io(bh, bret->status == BLKIF_RSP_OKAY);
967 }
969 break;
970 case BLKIF_OP_PROBE:
971 memcpy(&blkif_control_rsp, bret, sizeof(*bret));
972 blkif_control_rsp_valid = 1;
973 break;
974 default:
975 BUG();
976 }
977 }
979 resp_cons = i;
981 kick_pending_request_queues();
983 spin_unlock_irqrestore(&io_request_lock, flags);
984 }
986 #endif
988 /***************************** COMMON CODE *******************************/
991 static inline void translate_req_to_pfn(blkif_request_t *xreq,
992 blkif_request_t *req)
993 {
994 int i;
996 xreq->operation = req->operation;
997 xreq->nr_segments = req->nr_segments;
998 xreq->device = req->device;
999 /* preserve id */
1000 xreq->sector_number = req->sector_number;
1002 for ( i = 0; i < req->nr_segments; i++ ){
1003 xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]);
1007 static inline void translate_req_to_mfn(blkif_request_t *xreq,
1008 blkif_request_t *req)
1010 int i;
1012 xreq->operation = req->operation;
1013 xreq->nr_segments = req->nr_segments;
1014 xreq->device = req->device;
1015 xreq->id = req->id; /* copy id (unlike above) */
1016 xreq->sector_number = req->sector_number;
1018 for ( i = 0; i < req->nr_segments; i++ ){
1019 xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]);
1025 static inline void flush_requests(void)
1027 DISABLE_SCATTERGATHER();
1028 wmb(); /* Ensure that the frontend can see the requests. */
1029 blk_ring->req_prod = req_prod;
1030 notify_via_evtchn(blkif_evtchn);
1034 void blkif_control_send(blkif_request_t *req, blkif_response_t *rsp)
1036 unsigned long flags, id;
1038 retry:
1039 while ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
1041 set_current_state(TASK_INTERRUPTIBLE);
1042 schedule_timeout(1);
1045 spin_lock_irqsave(&blkif_io_lock, flags);
1046 if ( (req_prod - resp_cons) == BLKIF_RING_SIZE )
1048 spin_unlock_irqrestore(&blkif_io_lock, flags);
1049 goto retry;
1052 DISABLE_SCATTERGATHER();
1053 blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req = *req;
1055 id = GET_ID_FROM_FREELIST();
1056 blk_ring->ring[MASK_BLKIF_IDX(req_prod)].req.id = id;
1057 rec_ring[id].id = (unsigned long) req;
1059 translate_req_to_pfn( &rec_ring[id], req );
1061 req_prod++;
1062 flush_requests();
1064 spin_unlock_irqrestore(&blkif_io_lock, flags);
1066 while ( !blkif_control_rsp_valid )
1068 set_current_state(TASK_INTERRUPTIBLE);
1069 schedule_timeout(1);
1072 memcpy(rsp, &blkif_control_rsp, sizeof(*rsp));
1073 blkif_control_rsp_valid = 0;
1077 /* Send a driver status notification to the domain controller. */
1078 static void send_driver_status(int ok){
1079 ctrl_msg_t cmsg = {
1080 .type = CMSG_BLKIF_FE,
1081 .subtype = CMSG_BLKIF_FE_DRIVER_STATUS,
1082 .length = sizeof(blkif_fe_driver_status_t),
1083 };
1084 blkif_fe_driver_status_t *msg = (void*)cmsg.msg;
1086 msg->status = (ok ? BLKIF_DRIVER_STATUS_UP : BLKIF_DRIVER_STATUS_DOWN);
1088 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
1091 /* Tell the controller to bring up the interface. */
1092 static void blkif_send_interface_connect(void){
1093 ctrl_msg_t cmsg = {
1094 .type = CMSG_BLKIF_FE,
1095 .subtype = CMSG_BLKIF_FE_INTERFACE_CONNECT,
1096 .length = sizeof(blkif_fe_interface_connect_t),
1097 };
1098 blkif_fe_interface_connect_t *msg = (void*)cmsg.msg;
1100 msg->handle = 0;
1101 msg->shmem_frame = (virt_to_machine(blk_ring) >> PAGE_SHIFT);
1103 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
1106 static void blkif_free(void)
1109 printk(KERN_INFO "[XEN] Recovering virtual block device driver\n");
1111 /* Prevent new requests being issued until we fix things up. */
1112 spin_lock_irq(&blkif_io_lock);
1113 recovery = 1;
1114 blkif_state = BLKIF_STATE_DISCONNECTED;
1115 spin_unlock_irq(&blkif_io_lock);
1117 /* Free resources associated with old device channel. */
1118 if(blk_ring){
1119 free_page((unsigned long)blk_ring);
1120 blk_ring = 0;
1122 free_irq(blkif_irq, NULL);
1123 blkif_irq = 0;
1125 unbind_evtchn_from_irq(blkif_evtchn);
1126 blkif_evtchn = 0;
1129 static void blkif_close(void){
1132 /* Move from CLOSED to DISCONNECTED state. */
1133 static void blkif_disconnect(void)
1135 if(blk_ring) free_page((unsigned long)blk_ring);
1136 blk_ring = (blkif_ring_t *)__get_free_page(GFP_KERNEL);
1137 blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
1138 blkif_state = BLKIF_STATE_DISCONNECTED;
1139 blkif_send_interface_connect();
1142 static void blkif_reset(void)
1144 printk(KERN_INFO "[XEN] Recovering virtual block device driver\n");
1145 blkif_free();
1146 blkif_disconnect();
1149 static void blkif_recover(void)
1152 int i;
1154 /* Hmm, requests might be re-ordered when we re-issue them.
1155 * This will need to be fixed once we have barriers */
1157 /* Stage 1 : Find active and move to safety. */
1158 for ( i = 0; i < BLKIF_RING_SIZE; i++ ) {
1159 if ( rec_ring[i].id >= PAGE_OFFSET ) {
1160 translate_req_to_mfn(
1161 &blk_ring->ring[req_prod].req, &rec_ring[i]);
1162 req_prod++;
1166 printk(KERN_ALERT"blkfront: recovered %d descriptors\n",req_prod);
1168 /* Stage 2 : Set up shadow list. */
1169 for ( i = 0; i < req_prod; i++ ) {
1170 rec_ring[i].id = blk_ring->ring[i].req.id;
1171 blk_ring->ring[i].req.id = i;
1172 translate_req_to_pfn(&rec_ring[i], &blk_ring->ring[i].req);
1175 /* Stage 3 : Set up free list. */
1176 for ( ; i < BLKIF_RING_SIZE; i++ ){
1177 rec_ring[i].id = i+1;
1179 rec_ring_free = req_prod;
1180 rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
1182 /* blk_ring->req_prod will be set when we flush_requests().*/
1183 wmb();
1185 /* Switch off recovery mode, using a memory barrier to ensure that
1186 * it's seen before we flush requests - we don't want to miss any
1187 * interrupts. */
1188 recovery = 0;
1189 wmb();
1191 /* Kicks things back into life. */
1192 flush_requests();
1194 /* Now safe to left other peope use interface. */
1195 blkif_state = BLKIF_STATE_CONNECTED;
1198 static void blkif_connect(blkif_fe_interface_status_t *status)
1200 int err = 0;
1202 blkif_evtchn = status->evtchn;
1203 blkif_irq = bind_evtchn_to_irq(blkif_evtchn);
1205 err = request_irq(blkif_irq, blkif_int, SA_SAMPLE_RANDOM, "blkif", NULL);
1206 if(err){
1207 printk(KERN_ALERT "[XEN] blkfront request_irq failed (err=%d)\n", err);
1208 return;
1211 if ( recovery ) {
1212 blkif_recover();
1213 } else {
1214 /* Transition to connected in case we need to do
1215 * a partition probe on a whole disk. */
1216 blkif_state = BLKIF_STATE_CONNECTED;
1218 /* Probe for discs attached to the interface. */
1219 xlvbd_init();
1222 /* Kick pending requests. */
1223 spin_lock_irq(&blkif_io_lock);
1224 kick_pending_request_queues();
1225 spin_unlock_irq(&blkif_io_lock);
1228 static void unexpected(blkif_fe_interface_status_t *status)
1230 WPRINTK(" Unexpected blkif status %s in state %s\n",
1231 blkif_status_name[status->status],
1232 blkif_state_name[blkif_state]);
1235 static void blkif_status(blkif_fe_interface_status_t *status)
1237 if (status->handle != blkif_handle) {
1238 WPRINTK(" Invalid blkif: handle=%u", status->handle);
1239 return;
1242 switch (status->status) {
1244 case BLKIF_INTERFACE_STATUS_CLOSED:
1245 switch(blkif_state){
1246 case BLKIF_STATE_CLOSED:
1247 unexpected(status);
1248 break;
1249 case BLKIF_STATE_DISCONNECTED:
1250 case BLKIF_STATE_CONNECTED:
1251 unexpected(status);
1252 blkif_close();
1253 break;
1255 break;
1257 case BLKIF_INTERFACE_STATUS_DISCONNECTED:
1258 switch(blkif_state){
1259 case BLKIF_STATE_CLOSED:
1260 blkif_disconnect();
1261 break;
1262 case BLKIF_STATE_DISCONNECTED:
1263 case BLKIF_STATE_CONNECTED:
1264 unexpected(status);
1265 blkif_reset();
1266 break;
1268 break;
1270 case BLKIF_INTERFACE_STATUS_CONNECTED:
1271 switch(blkif_state){
1272 case BLKIF_STATE_CLOSED:
1273 unexpected(status);
1274 blkif_disconnect();
1275 blkif_connect(status);
1276 break;
1277 case BLKIF_STATE_DISCONNECTED:
1278 blkif_connect(status);
1279 break;
1280 case BLKIF_STATE_CONNECTED:
1281 unexpected(status);
1282 blkif_connect(status);
1283 break;
1285 break;
1287 case BLKIF_INTERFACE_STATUS_CHANGED:
1288 switch(blkif_state){
1289 case BLKIF_STATE_CLOSED:
1290 case BLKIF_STATE_DISCONNECTED:
1291 unexpected(status);
1292 break;
1293 case BLKIF_STATE_CONNECTED:
1294 vbd_update();
1295 break;
1297 break;
1299 default:
1300 WPRINTK(" Invalid blkif status: %d\n", status->status);
1301 break;
1306 static void blkif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
1308 switch ( msg->subtype )
1310 case CMSG_BLKIF_FE_INTERFACE_STATUS:
1311 if ( msg->length != sizeof(blkif_fe_interface_status_t) )
1312 goto parse_error;
1313 blkif_status((blkif_fe_interface_status_t *)
1314 &msg->msg[0]);
1315 break;
1316 default:
1317 goto parse_error;
1320 ctrl_if_send_response(msg);
1321 return;
1323 parse_error:
1324 msg->length = 0;
1325 ctrl_if_send_response(msg);
1328 int wait_for_blkif(void){
1329 int err = 0;
1330 int i;
1331 send_driver_status(1);
1333 /*
1334 * We should read 'nr_interfaces' from response message and wait
1335 * for notifications before proceeding. For now we assume that we
1336 * will be notified of exactly one interface.
1337 */
1338 for ( i=0; (blkif_state != BLKIF_STATE_CONNECTED) && (i < 10*HZ); i++ )
1340 set_current_state(TASK_INTERRUPTIBLE);
1341 schedule_timeout(1);
1344 if (blkif_state != BLKIF_STATE_CONNECTED){
1345 printk(KERN_INFO "[XEN] Timeout connecting block device driver!\n");
1346 err = -ENOSYS;
1348 return err;
1351 int __init xlblk_init(void)
1353 int i;
1355 if ( (start_info.flags & SIF_INITDOMAIN)
1356 || (start_info.flags & SIF_BLK_BE_DOMAIN) )
1357 return 0;
1359 printk(KERN_INFO "[XEN] Initialising virtual block device driver\n");
1361 rec_ring_free = 0;
1362 for (i=0; i<BLKIF_RING_SIZE; i++)
1364 rec_ring[i].id = i+1;
1366 rec_ring[BLKIF_RING_SIZE-1].id = 0x0fffffff;
1368 (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx,
1369 CALLBACK_IN_BLOCKING_CONTEXT);
1371 wait_for_blkif();
1373 return 0;
1376 void blkdev_suspend(void)
1380 void blkdev_resume(void)
1382 send_driver_status(1);
1385 /* XXXXX THIS IS A TEMPORARY FUNCTION UNTIL WE GET GRANT TABLES */
1387 void blkif_completion(blkif_request_t *req)
1389 int i;
1391 switch ( req->operation )
1393 case BLKIF_OP_READ:
1394 for ( i = 0; i < req->nr_segments; i++ )
1396 unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT;
1397 unsigned long mfn = phys_to_machine_mapping[pfn];
1398 xen_machphys_update(mfn, pfn);
1400 break;