debuggers.hg

view xenolinux-2.4.21-sparse/arch/xeno/drivers/block/xl_block.c @ 669:2aedb293ebd7

bitkeeper revision 1.339.1.14 (3f134181W265apVtHlxDfb4h6aFC7w)

xl_block.c:
Blkdev access errors from Xen are not logged in Xenolinux driver but left for higher layers to flag.
author kaf24@scramble.cl.cam.ac.uk
date Mon Jul 14 23:49:21 2003 +0000 (2003-07-14)
parents f36f032527a0
children 63da63184f59
line source
1 /******************************************************************************
2 * xl_block.c
3 *
4 * Xenolinux virtual block-device driver.
5 *
6 */
8 #include "xl_block.h"
9 #include <linux/blk.h>
10 #include <linux/cdrom.h>
12 typedef unsigned char byte; /* from linux/ide.h */
14 #define XLBLK_RESPONSE_IRQ _EVENT_BLK_RESP
15 #define DEBUG_IRQ _EVENT_DEBUG
17 static blk_ring_t *blk_ring;
18 static unsigned int resp_cons; /* Response consumer for comms ring. */
19 static unsigned int req_prod; /* Private request producer. */
20 static xen_disk_info_t xlblk_disk_info;
21 static int xlblk_control_msg_pending;
23 #define RING_FULL (BLK_RING_INC(req_prod) == resp_cons)
25 /*
26 * Request queues with outstanding work, but ring is currently full.
27 * We need no special lock here, as we always access this with the
28 * io_request_lock held. We only need a small maximum list.
29 */
30 #define MAX_PENDING 8
31 static request_queue_t *pending_queues[MAX_PENDING];
32 static int nr_pending;
34 static kdev_t sg_dev;
35 static int sg_operation = -1;
36 static unsigned long sg_next_sect;
37 #define DISABLE_SCATTERGATHER() (sg_operation = -1)
39 static inline void signal_requests_to_xen(void)
40 {
41 DISABLE_SCATTERGATHER();
42 blk_ring->req_prod = req_prod;
43 HYPERVISOR_block_io_op();
44 }
47 inline kdev_t physdev_to_xldev(unsigned short physdev)
48 {
49 switch (physdev & XENDEV_TYPE_MASK) {
50 case XENDEV_IDE:
51 if ( (physdev & XENDEV_IDX_MASK) < XLIDE_DEVS_PER_MAJOR) {
52 return MKDEV(XLIDE_MAJOR_0,
53 (physdev & XENDEV_IDX_MASK) << XLIDE_PARTN_SHIFT);
54 } else if ( (physdev & XENDEV_IDX_MASK) < (XLIDE_DEVS_PER_MAJOR * 2)) {
55 return MKDEV(XLIDE_MAJOR_1,
56 (physdev & XENDEV_IDX_MASK) << XLIDE_PARTN_SHIFT);
57 }
58 break;
59 case XENDEV_SCSI:
60 return MKDEV(XLSCSI_MAJOR,
61 (physdev & XENDEV_IDX_MASK) << XLSCSI_PARTN_SHIFT);
62 case XENDEV_VIRTUAL:
63 return MKDEV(XLVIRT_MAJOR,
64 (physdev & XENDEV_IDX_MASK) << XLVIRT_PARTN_SHIFT);
65 }
67 return 0;
68 }
71 /* Convert from a XenoLinux major device to the Xen-level 'physical' device */
72 inline unsigned short xldev_to_physdev(kdev_t xldev)
73 {
74 unsigned short physdev = 0;
76 switch ( MAJOR(xldev) )
77 {
78 case XLIDE_MAJOR_0:
79 physdev = XENDEV_IDE + (0*XLIDE_DEVS_PER_MAJOR) +
80 (MINOR(xldev) >> XLIDE_PARTN_SHIFT);
81 break;
83 case XLIDE_MAJOR_1:
84 physdev = XENDEV_IDE + (1*XLIDE_DEVS_PER_MAJOR) +
85 (MINOR(xldev) >> XLIDE_PARTN_SHIFT);
86 break;
88 case XLSCSI_MAJOR:
89 physdev = XENDEV_SCSI + (MINOR(xldev) >> XLSCSI_PARTN_SHIFT);
90 break;
92 case XLVIRT_MAJOR:
93 physdev = XENDEV_VIRTUAL + (MINOR(xldev) >> XLVIRT_PARTN_SHIFT);
94 break;
95 }
97 return physdev;
98 }
101 static inline struct gendisk *xldev_to_gendisk(kdev_t xldev)
102 {
103 struct gendisk *gd = NULL;
105 switch ( MAJOR(xldev) )
106 {
107 case XLIDE_MAJOR_0:
108 gd = xlide_gendisk[0];
109 break;
111 case XLIDE_MAJOR_1:
112 gd = xlide_gendisk[1];
113 break;
115 case XLSCSI_MAJOR:
116 gd = xlscsi_gendisk;
117 break;
119 case XLVIRT_MAJOR:
120 gd = xlsegment_gendisk;
121 break;
122 }
124 if ( gd == NULL ) BUG();
126 return gd;
127 }
130 static inline xl_disk_t *xldev_to_xldisk(kdev_t xldev)
131 {
132 struct gendisk *gd = xldev_to_gendisk(xldev);
133 return (xl_disk_t *)gd->real_devices +
134 (MINOR(xldev) >> PARTN_SHIFT(xldev));
135 }
138 int xenolinux_block_open(struct inode *inode, struct file *filep)
139 {
140 xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
141 disk->usage++;
142 DPRINTK("xenolinux_block_open\n");
143 return 0;
144 }
147 int xenolinux_block_release(struct inode *inode, struct file *filep)
148 {
149 xl_disk_t *disk = xldev_to_xldisk(inode->i_rdev);
150 disk->usage--;
151 DPRINTK("xenolinux_block_release\n");
152 return 0;
153 }
155 /*
156 * handle ioctl calls
157 *
158 * individual ioctls are defined in /usr/include/linux/fs.h
159 */
161 int xenolinux_block_ioctl(struct inode *inode, struct file *filep,
162 unsigned command, unsigned long argument)
163 {
164 kdev_t dev = inode->i_rdev;
165 struct hd_geometry *geo = (struct hd_geometry *)argument;
166 struct gendisk *gd;
167 struct hd_struct *part;
169 DPRINTK("xenolinux_block_ioctl\n");
171 /* check permissions */
172 if (!capable(CAP_SYS_ADMIN)) return -EPERM;
173 if (!inode) return -EINVAL;
175 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
176 command, (long) argument, dev);
178 gd = xldev_to_gendisk(dev);
179 part = &gd->part[MINOR(dev)];
181 switch ( command )
182 {
183 case BLKGETSIZE:
184 DPRINTK_IOCTL(" BLKGETSIZE: %x %lx\n", BLKGETSIZE, part->nr_sects);
185 return put_user(part->nr_sects, (unsigned long *) argument);
187 case BLKRRPART: /* re-read partition table */
188 DPRINTK_IOCTL(" BLKRRPART: %x\n", BLKRRPART);
189 return xenolinux_block_revalidate(dev);
191 case BLKSSZGET:
192 switch ( MAJOR(dev) )
193 {
194 case XLIDE_MAJOR_0:
195 DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET,
196 xlide_hwsect(MINOR(dev)));
197 return xlide_hwsect(MINOR(dev));
199 case XLSCSI_MAJOR:
200 DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET,
201 xlscsi_hwsect(MINOR(dev)));
202 return xlscsi_hwsect(MINOR(dev));
204 case XLVIRT_MAJOR:
205 DPRINTK_IOCTL(" BLKSSZGET: %x 0x%x\n", BLKSSZGET,
206 xlsegment_hwsect(MINOR(dev)));
207 return xlsegment_hwsect(MINOR(dev));
209 default:
210 printk(KERN_ALERT "BLKSSZGET ioctl() on bogus disk!\n");
211 return 0;
212 }
214 case BLKBSZGET: /* get block size */
215 DPRINTK_IOCTL(" BLKBSZGET: %x\n", BLKBSZGET);
216 break;
218 case BLKBSZSET: /* set block size */
219 DPRINTK_IOCTL(" BLKBSZSET: %x\n", BLKBSZSET);
220 break;
222 case BLKRASET: /* set read-ahead */
223 DPRINTK_IOCTL(" BLKRASET: %x\n", BLKRASET);
224 break;
226 case BLKRAGET: /* get read-ahead */
227 DPRINTK_IOCTL(" BLKRAFET: %x\n", BLKRAGET);
228 break;
230 case HDIO_GETGEO:
231 /* note: these values are complete garbage */
232 DPRINTK_IOCTL(" HDIO_GETGEO: %x\n", HDIO_GETGEO);
233 if (!argument) return -EINVAL;
234 if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
235 if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
236 if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
237 if (put_user(0x106, (unsigned short *)&geo->cylinders)) return -EFAULT;
238 return 0;
240 case HDIO_GETGEO_BIG:
241 /* note: these values are complete garbage */
242 DPRINTK_IOCTL(" HDIO_GETGEO_BIG: %x\n", HDIO_GETGEO_BIG);
243 if (!argument) return -EINVAL;
244 if (put_user(0x00, (unsigned long *) &geo->start)) return -EFAULT;
245 if (put_user(0xff, (byte *)&geo->heads)) return -EFAULT;
246 if (put_user(0x3f, (byte *)&geo->sectors)) return -EFAULT;
247 if (put_user(0x106, (unsigned int *) &geo->cylinders)) return -EFAULT;
248 return 0;
250 case CDROMMULTISESSION:
251 printk("FIXME: support multisession CDs later\n");
252 memset((struct cdrom_multisession *)argument, 0,
253 sizeof(struct cdrom_multisession));
254 return 0;
256 default:
257 printk("ioctl %08x not supported by xl_block\n", command);
258 return -ENOSYS;
259 }
261 return 0;
262 }
264 int xenolinux_block_check(kdev_t dev)
265 {
266 DPRINTK("xenolinux_block_check\n");
267 return 0;
268 }
270 int xenolinux_block_revalidate(kdev_t dev)
271 {
272 struct gendisk *gd = xldev_to_gendisk(dev);
273 xl_disk_t *disk = xldev_to_xldisk(dev);
274 unsigned long flags;
275 int i, partn_shift = PARTN_SHIFT(dev);
276 int xdev = dev & XENDEV_IDX_MASK;
278 DPRINTK("xenolinux_block_revalidate: %d %d %d\n",
279 dev, xdev, XENDEV_IDX_MASK);
281 spin_lock_irqsave(&io_request_lock, flags);
282 if ( disk->usage > 1 )
283 {
284 spin_unlock_irqrestore(&io_request_lock, flags);
285 return -EBUSY;
286 }
287 spin_unlock_irqrestore(&io_request_lock, flags);
289 for ( i = 0; i < (1 << partn_shift); i++ )
290 {
291 invalidate_device(xdev + i, 1);
292 gd->part[xdev + i].start_sect = 0;
293 gd->part[xdev + i].nr_sects = 0;
294 }
296 grok_partitions(gd, MINOR(dev) >> partn_shift,
297 1 << partn_shift, disk->capacity);
299 return 0;
300 }
303 /*
304 * hypervisor_request
305 *
306 * request block io
307 *
308 * id: for guest use only.
309 * operation: XEN_BLOCK_{READ,WRITE,PROBE*,SEG*}
310 * buffer: buffer to read/write into. this should be a
311 * virtual address in the guest os.
312 */
313 static int hypervisor_request(unsigned long id,
314 int operation,
315 char * buffer,
316 unsigned long sector_number,
317 unsigned short nr_sectors,
318 kdev_t device)
319 {
320 unsigned long buffer_ma = phys_to_machine(virt_to_phys(buffer));
321 kdev_t phys_device = (kdev_t) 0;
322 struct gendisk *gd;
323 blk_ring_req_entry_t *req;
324 struct buffer_head *bh;
326 if ( nr_sectors >= (1<<9) ) BUG();
327 if ( (buffer_ma & ((1<<9)-1)) != 0 ) BUG();
329 switch ( operation )
330 {
331 case XEN_BLOCK_SEG_CREATE:
332 case XEN_BLOCK_SEG_DELETE:
333 case XEN_BLOCK_PHYSDEV_GRANT:
334 case XEN_BLOCK_PHYSDEV_PROBE:
335 case XEN_BLOCK_PROBE_BLK:
336 case XEN_BLOCK_PROBE_SEG:
337 case XEN_BLOCK_PROBE_SEG_ALL:
338 if ( RING_FULL ) return 1;
339 phys_device = (kdev_t) 0;
340 sector_number = 0;
341 DISABLE_SCATTERGATHER();
342 break;
344 case XEN_BLOCK_READ:
345 case XEN_BLOCK_WRITE:
346 phys_device = xldev_to_physdev(device);
347 gd = xldev_to_gendisk(device);
349 sector_number += gd->part[MINOR(device)].start_sect;
350 if ( (sg_operation == operation) &&
351 (sg_dev == phys_device) &&
352 (sg_next_sect == sector_number) )
353 {
354 req = &blk_ring->ring[(req_prod-1)&(BLK_RING_SIZE-1)].req;
355 bh = (struct buffer_head *)id;
356 bh->b_reqnext = (struct buffer_head *)req->id;
357 req->id = id;
358 req->buffer_and_sects[req->nr_segments] = buffer_ma | nr_sectors;
359 if ( ++req->nr_segments < MAX_BLK_SEGS )
360 sg_next_sect += nr_sectors;
361 else
362 DISABLE_SCATTERGATHER();
363 return 0;
364 }
365 else if ( RING_FULL )
366 {
367 return 1;
368 }
369 else
370 {
371 sg_operation = operation;
372 sg_dev = phys_device;
373 sg_next_sect = sector_number + nr_sectors;
374 }
375 break;
377 default:
378 panic("unknown op %d\n", operation);
379 }
381 /* Fill out a communications ring structure. */
382 req = &blk_ring->ring[req_prod].req;
383 req->id = id;
384 req->operation = operation;
385 req->sector_number = sector_number;
386 req->device = phys_device;
387 req->nr_segments = 1;
388 req->buffer_and_sects[0] = buffer_ma | nr_sectors;
389 req_prod = BLK_RING_INC(req_prod);
391 return 0;
392 }
395 /*
396 * do_xlblk_request
397 * read a block; request is in a request queue
398 */
399 void do_xlblk_request(request_queue_t *rq)
400 {
401 struct request *req;
402 struct buffer_head *bh, *next_bh;
403 int rw, nsect, full, queued = 0;
405 DPRINTK("xlblk.c::do_xlblk_request for '%s'\n", DEVICE_NAME);
407 while ( !rq->plugged && !list_empty(&rq->queue_head))
408 {
409 if ( (req = blkdev_entry_next_request(&rq->queue_head)) == NULL )
410 goto out;
412 DPRINTK("do_xlblk_request %p: cmd %i, sec %lx, (%li/%li) bh:%p\n",
413 req, req->cmd, req->sector,
414 req->current_nr_sectors, req->nr_sectors, req->bh);
416 rw = req->cmd;
417 if ( rw == READA ) rw = READ;
418 if ((rw != READ) && (rw != WRITE))
419 panic("XenoLinux Virtual Block Device: bad cmd: %d\n", rw);
421 req->errors = 0;
423 bh = req->bh;
424 while ( bh != NULL )
425 {
426 next_bh = bh->b_reqnext;
427 bh->b_reqnext = NULL;
429 full = hypervisor_request(
430 (unsigned long)bh,
431 (rw == READ) ? XEN_BLOCK_READ : XEN_BLOCK_WRITE,
432 bh->b_data, bh->b_rsector, bh->b_size>>9, bh->b_rdev);
434 if ( full )
435 {
436 bh->b_reqnext = next_bh;
437 pending_queues[nr_pending++] = rq;
438 if ( nr_pending >= MAX_PENDING ) BUG();
439 goto out;
440 }
442 queued++;
444 /* Dequeue the buffer head from the request. */
445 nsect = bh->b_size >> 9;
446 bh = req->bh = next_bh;
448 if ( bh != NULL )
449 {
450 /* There's another buffer head to do. Update the request. */
451 req->hard_sector += nsect;
452 req->hard_nr_sectors -= nsect;
453 req->sector = req->hard_sector;
454 req->nr_sectors = req->hard_nr_sectors;
455 req->current_nr_sectors = bh->b_size >> 9;
456 req->buffer = bh->b_data;
457 }
458 else
459 {
460 /* That was the last buffer head. Finalise the request. */
461 if ( end_that_request_first(req, 1, "XenBlk") ) BUG();
462 blkdev_dequeue_request(req);
463 end_that_request_last(req);
464 }
465 }
466 }
468 out:
469 if ( queued != 0 ) signal_requests_to_xen();
470 }
473 static void xlblk_response_int(int irq, void *dev_id, struct pt_regs *ptregs)
474 {
475 int i;
476 unsigned long flags;
477 struct buffer_head *bh, *next_bh;
479 spin_lock_irqsave(&io_request_lock, flags);
481 for ( i = resp_cons;
482 i != blk_ring->resp_prod;
483 i = BLK_RING_INC(i) )
484 {
485 blk_ring_resp_entry_t *bret = &blk_ring->ring[i].resp;
486 switch (bret->operation)
487 {
488 case XEN_BLOCK_READ:
489 case XEN_BLOCK_WRITE:
490 if ( bret->status )
491 DPRINTK("Bad return from blkdev data request: %lx\n",
492 bret->status);
493 for ( bh = (struct buffer_head *)bret->id;
494 bh != NULL;
495 bh = next_bh )
496 {
497 next_bh = bh->b_reqnext;
498 bh->b_reqnext = NULL;
499 bh->b_end_io(bh, !bret->status);
500 }
501 break;
503 case XEN_BLOCK_SEG_CREATE:
504 case XEN_BLOCK_SEG_DELETE:
505 case XEN_BLOCK_PROBE_SEG:
506 case XEN_BLOCK_PROBE_SEG_ALL:
507 case XEN_BLOCK_PROBE_BLK:
508 case XEN_BLOCK_PHYSDEV_GRANT:
509 case XEN_BLOCK_PHYSDEV_PROBE:
510 xlblk_control_msg_pending = bret->status;
511 break;
513 default:
514 BUG();
515 }
516 }
518 resp_cons = i;
520 /* We kick pending request queues if the ring is reasonably empty. */
521 if ( (nr_pending != 0) &&
522 (((req_prod - resp_cons) & (BLK_RING_SIZE - 1)) <
523 (BLK_RING_SIZE >> 1)) )
524 {
525 /* Attempt to drain the queue, but bail if the ring becomes full. */
526 while ( nr_pending != 0 )
527 {
528 do_xlblk_request(pending_queues[--nr_pending]);
529 if ( RING_FULL ) break;
530 }
531 }
533 spin_unlock_irqrestore(&io_request_lock, flags);
534 }
537 /* Send a synchronous message to Xen. */
538 int xenolinux_control_msg(int operation, char *buffer, int size)
539 {
540 unsigned long flags;
541 char *aligned_buf;
543 /* We copy from an aligned buffer, as interface needs sector alignment. */
544 aligned_buf = (char *)get_free_page(GFP_KERNEL);
545 if ( aligned_buf == NULL ) BUG();
546 memcpy(aligned_buf, buffer, size);
548 xlblk_control_msg_pending = 2;
549 spin_lock_irqsave(&io_request_lock, flags);
550 /* Note that size gets rounded up to a sector-sized boundary. */
551 if ( hypervisor_request(0, operation, aligned_buf, 0, (size+511)/512, 0) )
552 return -EAGAIN;
553 signal_requests_to_xen();
554 spin_unlock_irqrestore(&io_request_lock, flags);
555 while ( xlblk_control_msg_pending == 2 ) barrier();
557 memcpy(buffer, aligned_buf, size);
558 free_page((unsigned long)aligned_buf);
560 return xlblk_control_msg_pending ? -EINVAL : 0;
561 }
564 int __init xlblk_init(void)
565 {
566 int error;
568 xlblk_control_msg_pending = 0;
569 nr_pending = 0;
571 /* This mapping was created early at boot time. */
572 blk_ring = (blk_ring_t *)fix_to_virt(FIX_BLKRING_BASE);
573 blk_ring->req_prod = blk_ring->resp_prod = resp_cons = req_prod = 0;
575 error = request_irq(XLBLK_RESPONSE_IRQ, xlblk_response_int,
576 SA_SAMPLE_RANDOM, "xlblk-response", NULL);
577 if ( error )
578 {
579 printk(KERN_ALERT "Could not allocate receive interrupt\n");
580 goto fail;
581 }
583 /* Probe for disk information. */
584 memset(&xlblk_disk_info, 0, sizeof(xlblk_disk_info));
585 error = xenolinux_control_msg(XEN_BLOCK_PROBE_BLK,
586 (char *)&xlblk_disk_info,
587 sizeof(xen_disk_info_t));
588 if ( error )
589 {
590 printk(KERN_ALERT "Could not probe disks (%d)\n", error);
591 free_irq(XLBLK_RESPONSE_IRQ, NULL);
592 goto fail;
593 }
595 /* Pass the information to our fake IDE and SCSI susbystems. */
596 xlide_init(&xlblk_disk_info);
597 xlscsi_init(&xlblk_disk_info);
599 return 0;
601 fail:
602 return error;
603 }
605 static void __exit xlblk_cleanup(void)
606 {
607 xlide_cleanup();
608 xlscsi_cleanup();
609 free_irq(XLBLK_RESPONSE_IRQ, NULL);
610 }
613 #ifdef MODULE
614 module_init(xlblk_init);
615 module_exit(xlblk_cleanup);
616 #endif