debuggers.hg

view linux-2.6.8.1-xen-sparse/drivers/xen/blkfront/vbd.c @ 2620:86f3590030af

bitkeeper revision 1.1159.1.196 (415d2c407zgMBjgq11fXyF4ysEuEkA)

Merge freefall.cl.cam.ac.uk:/auto/groups/xeno/BK/xeno.bk
into freefall.cl.cam.ac.uk:/auto/groups/xeno/users/cl349/BK/xeno.bk-nbsd
author cl349@freefall.cl.cam.ac.uk
date Fri Oct 01 10:06:56 2004 +0000 (2004-10-01)
parents 11be1dfb262b 653fd1eddd8c
children a9128b3b9f45 d0beb68a7ae0 8aa9d487a8dd
line source
1 /******************************************************************************
2 * vbd.c
3 *
4 * XenLinux virtual block-device driver (xvd).
5 *
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
8 * Copyright (c) 2004, Christian Limpach
9 */
11 #include "block.h"
12 #include <linux/blkdev.h>
14 /*
15 * For convenience we distinguish between ide, scsi and 'other' (i.e.
16 * potentially combinations of the two) in the naming scheme and in a few
17 * other places (like default readahead, etc).
18 */
20 #define NUM_IDE_MAJORS 10
21 #define NUM_SCSI_MAJORS 9
22 #define NUM_VBD_MAJORS 1
24 static struct xlbd_type_info xlbd_ide_type = {
25 .partn_shift = 6,
26 // XXXcl todo blksize_size[major] = 1024;
27 .hardsect_size = 512,
28 .max_sectors = 128, /* 'hwif->rqsize' if we knew it */
29 // XXXcl todo read_ahead[major] = 8; /* from drivers/ide/ide-probe.c */
30 .name = "hd",
31 };
33 static struct xlbd_type_info xlbd_scsi_type = {
34 .partn_shift = 4,
35 // XXXcl todo blksize_size[major] = 1024; /* XXX 512; */
36 .hardsect_size = 512,
37 .max_sectors = 128*8, /* XXX 128; */
38 // XXXcl todo read_ahead[major] = 0; /* XXX 8; -- guessing */
39 .name = "sd",
40 };
42 static struct xlbd_type_info xlbd_vbd_type = {
43 .partn_shift = 4,
44 // XXXcl todo blksize_size[major] = 512;
45 .hardsect_size = 512,
46 .max_sectors = 128,
47 // XXXcl todo read_ahead[major] = 8;
48 .name = "xvd",
49 };
51 /* XXXcl handle cciss after finding out why it's "hacked" in */
53 static struct xlbd_major_info *major_info[NUM_IDE_MAJORS + NUM_SCSI_MAJORS +
54 NUM_VBD_MAJORS];
56 /* Information about our VBDs. */
57 #define MAX_VBDS 64
58 static int nr_vbds;
59 static vdisk_t *vbd_info;
61 struct request_queue *xlbd_blk_queue = NULL;
63 #define MAJOR_XEN(dev) ((dev)>>8)
64 #define MINOR_XEN(dev) ((dev) & 0xff)
66 static struct block_device_operations xlvbd_block_fops =
67 {
68 .owner = THIS_MODULE,
69 .open = blkif_open,
70 .release = blkif_release,
71 .ioctl = blkif_ioctl,
72 #if 0
73 check_media_change: blkif_check,
74 revalidate: blkif_revalidate,
75 #endif
76 };
78 spinlock_t blkif_io_lock = SPIN_LOCK_UNLOCKED;
80 static int xlvbd_get_vbd_info(vdisk_t *disk_info)
81 {
82 vdisk_t *buf = (vdisk_t *)__get_free_page(GFP_KERNEL);
83 blkif_request_t req;
84 blkif_response_t rsp;
85 int nr;
87 memset(&req, 0, sizeof(req));
88 req.operation = BLKIF_OP_PROBE;
89 req.nr_segments = 1;
90 req.frame_and_sects[0] = virt_to_machine(buf) | 7;
92 blkif_control_send(&req, &rsp);
94 if ( rsp.status <= 0 )
95 {
96 printk(KERN_ALERT "Could not probe disks (%d)\n", rsp.status);
97 return -1;
98 }
100 if ( (nr = rsp.status) > MAX_VBDS )
101 nr = MAX_VBDS;
102 memcpy(disk_info, buf, nr * sizeof(vdisk_t));
104 free_page((unsigned long)buf);
106 return nr;
107 }
109 static struct xlbd_major_info *xlbd_get_major_info(int xd_device, int *minor)
110 {
111 int mi_idx, new_major;
112 int xd_major = MAJOR_XEN(xd_device);
113 int xd_minor = MINOR_XEN(xd_device);
115 *minor = xd_minor;
117 switch (xd_major) {
118 case IDE0_MAJOR: mi_idx = 0; new_major = IDE0_MAJOR; break;
119 case IDE1_MAJOR: mi_idx = 1; new_major = IDE1_MAJOR; break;
120 case IDE2_MAJOR: mi_idx = 2; new_major = IDE2_MAJOR; break;
121 case IDE3_MAJOR: mi_idx = 3; new_major = IDE3_MAJOR; break;
122 case IDE4_MAJOR: mi_idx = 4; new_major = IDE4_MAJOR; break;
123 case IDE5_MAJOR: mi_idx = 5; new_major = IDE5_MAJOR; break;
124 case IDE6_MAJOR: mi_idx = 6; new_major = IDE6_MAJOR; break;
125 case IDE7_MAJOR: mi_idx = 7; new_major = IDE7_MAJOR; break;
126 case IDE8_MAJOR: mi_idx = 8; new_major = IDE8_MAJOR; break;
127 case IDE9_MAJOR: mi_idx = 9; new_major = IDE9_MAJOR; break;
128 case SCSI_DISK0_MAJOR: mi_idx = 10; new_major = SCSI_DISK0_MAJOR; break;
129 case SCSI_DISK1_MAJOR ... SCSI_DISK7_MAJOR:
130 mi_idx = 11 + xd_major - SCSI_DISK1_MAJOR;
131 new_major = SCSI_DISK1_MAJOR + xd_major - SCSI_DISK1_MAJOR;
132 break;
133 case SCSI_CDROM_MAJOR: mi_idx = 18; new_major = SCSI_CDROM_MAJOR; break;
134 default: mi_idx = 19; new_major = 0;/* XXXcl notyet */ break;
135 }
137 if (major_info[mi_idx])
138 return major_info[mi_idx];
140 major_info[mi_idx] = kmalloc(sizeof(struct xlbd_major_info), GFP_KERNEL);
141 if (major_info[mi_idx] == NULL)
142 return NULL;
144 memset(major_info[mi_idx], 0, sizeof(struct xlbd_major_info));
146 switch (mi_idx) {
147 case 0 ... (NUM_IDE_MAJORS - 1):
148 major_info[mi_idx]->type = &xlbd_ide_type;
149 break;
150 case NUM_IDE_MAJORS ... (NUM_IDE_MAJORS + NUM_SCSI_MAJORS - 1):
151 major_info[mi_idx]->type = &xlbd_scsi_type;
152 break;
153 case (NUM_IDE_MAJORS + NUM_SCSI_MAJORS) ...
154 (NUM_IDE_MAJORS + NUM_SCSI_MAJORS + NUM_VBD_MAJORS - 1):
155 major_info[mi_idx]->type = &xlbd_vbd_type;
156 break;
157 }
158 major_info[mi_idx]->major = new_major;
160 if (register_blkdev(major_info[mi_idx]->major, major_info[mi_idx]->type->name)) {
161 printk(KERN_ALERT "XL VBD: can't get major %d with name %s\n",
162 major_info[mi_idx]->major, major_info[mi_idx]->type->name);
163 goto out;
164 }
166 devfs_mk_dir(major_info[mi_idx]->type->name);
168 return major_info[mi_idx];
170 out:
171 kfree(major_info[mi_idx]);
172 major_info[mi_idx] = NULL;
173 return NULL;
174 }
176 static struct gendisk *xlvbd_get_gendisk(struct xlbd_major_info *mi,
177 int xd_minor, vdisk_t *xd)
178 {
179 struct gendisk *gd;
180 struct xlbd_disk_info *di;
181 int device, partno;
183 device = MKDEV(mi->major, xd_minor);
184 gd = get_gendisk(device, &partno);
185 if (gd)
186 return gd;
188 di = kmalloc(sizeof(struct xlbd_disk_info), GFP_KERNEL);
189 if (di == NULL)
190 return NULL;
191 di->mi = mi;
192 di->xd_device = xd->device;
194 /* Construct an appropriate gendisk structure. */
195 gd = alloc_disk(1);
196 if (gd == NULL)
197 goto out;
199 gd->major = mi->major;
200 gd->first_minor = xd_minor;
201 gd->fops = &xlvbd_block_fops;
202 gd->private_data = di;
203 sprintf(gd->disk_name, "%s%c%d", mi->type->name,
204 'a' + (xd_minor >> mi->type->partn_shift),
205 xd_minor & ((1 << mi->type->partn_shift) - 1));
206 /* sprintf(gd->devfs_name, "%s%s/disc%d", mi->type->name, , ); XXXdevfs */
208 set_capacity(gd, xd->capacity);
210 if (xlbd_blk_queue == NULL) {
211 xlbd_blk_queue = blk_init_queue(do_blkif_request,
212 &blkif_io_lock);
213 if (xlbd_blk_queue == NULL)
214 goto out;
215 elevator_init(xlbd_blk_queue, &elevator_noop);
217 /*
218 * Turn off barking 'headactive' mode. We dequeue
219 * buffer heads as soon as we pass them to back-end
220 * driver.
221 */
222 blk_queue_headactive(xlbd_blk_queue, 0); /* XXXcl: noop according to blkdev.h */
224 blk_queue_hardsect_size(xlbd_blk_queue,
225 mi->type->hardsect_size);
226 blk_queue_max_sectors(xlbd_blk_queue, mi->type->max_sectors); /* 'hwif->rqsize' if we knew it */
228 /* XXXcl: set mask to PAGE_SIZE for now, to improve either use
229 - blk_queue_merge_bvec to merge requests with adjacent ma's
230 - the tags infrastructure
231 - the dma infrastructure
232 */
233 blk_queue_segment_boundary(xlbd_blk_queue, PAGE_SIZE - 1);
235 blk_queue_max_phys_segments(xlbd_blk_queue,
236 BLKIF_MAX_SEGMENTS_PER_REQUEST);
237 blk_queue_max_hw_segments(xlbd_blk_queue,
238 BLKIF_MAX_SEGMENTS_PER_REQUEST); /* XXXcl not needed? */
241 }
242 gd->queue = xlbd_blk_queue;
244 add_disk(gd);
246 return gd;
248 out:
249 if (gd)
250 del_gendisk(gd);
251 kfree(di);
252 return NULL;
253 }
255 /*
256 * xlvbd_init_device - initialise a VBD device
257 * @disk: a vdisk_t describing the VBD
258 *
259 * Takes a vdisk_t * that describes a VBD the domain has access to.
260 * Performs appropriate initialisation and registration of the device.
261 *
262 * Care needs to be taken when making re-entrant calls to ensure that
263 * corruption does not occur. Also, devices that are in use should not have
264 * their details updated. This is the caller's responsibility.
265 */
266 static int xlvbd_init_device(vdisk_t *xd)
267 {
268 struct block_device *bd;
269 struct gendisk *gd;
270 struct xlbd_major_info *mi;
271 int device;
272 int minor;
274 int err = -ENOMEM;
276 mi = xlbd_get_major_info(xd->device, &minor);
277 if (mi == NULL)
278 return -EPERM;
280 device = MKDEV(mi->major, minor);
282 if ((bd = bdget(device)) == NULL)
283 return -EPERM;
285 /*
286 * Update of partition info, and check of usage count, is protected
287 * by the per-block-device semaphore.
288 */
289 down(&bd->bd_sem);
291 gd = xlvbd_get_gendisk(mi, minor, xd);
292 if (mi == NULL) {
293 err = -EPERM;
294 goto out;
295 }
297 if (VDISK_READONLY(xd->info))
298 set_disk_ro(gd, 1);
300 /* Some final fix-ups depending on the device type */
301 switch (VDISK_TYPE(xd->info)) {
302 case VDISK_TYPE_CDROM:
303 gd->flags |= GENHD_FL_REMOVABLE | GENHD_FL_CD;
304 /* FALLTHROUGH */
305 case VDISK_TYPE_FLOPPY:
306 case VDISK_TYPE_TAPE:
307 gd->flags |= GENHD_FL_REMOVABLE;
308 break;
310 case VDISK_TYPE_DISK:
311 break;
313 default:
314 printk(KERN_ALERT "XenLinux: unknown device type %d\n",
315 VDISK_TYPE(xd->info));
316 break;
317 }
319 err = 0;
320 out:
321 up(&bd->bd_sem);
322 bdput(bd);
323 return err;
324 }
326 #if 0
327 /*
328 * xlvbd_remove_device - remove a device node if possible
329 * @device: numeric device ID
330 *
331 * Updates the gendisk structure and invalidates devices.
332 *
333 * This is OK for now but in future, should perhaps consider where this should
334 * deallocate gendisks / unregister devices.
335 */
336 static int xlvbd_remove_device(int device)
337 {
338 int i, rc = 0, minor = MINOR(device);
339 struct gendisk *gd;
340 struct block_device *bd;
341 xen_block_t *disk = NULL;
343 if ( (bd = bdget(device)) == NULL )
344 return -1;
346 /*
347 * Update of partition info, and check of usage count, is protected
348 * by the per-block-device semaphore.
349 */
350 down(&bd->bd_sem);
352 if ( ((gd = get_gendisk(device)) == NULL) ||
353 ((disk = xldev_to_xldisk(device)) == NULL) )
354 BUG();
356 if ( disk->usage != 0 )
357 {
358 printk(KERN_ALERT "VBD removal failed - in use [dev=%x]\n", device);
359 rc = -1;
360 goto out;
361 }
363 if ( (minor & (gd->max_p-1)) != 0 )
364 {
365 /* 1: The VBD is mapped to a partition rather than a whole unit. */
366 invalidate_device(device, 1);
367 gd->part[minor].start_sect = 0;
368 gd->part[minor].nr_sects = 0;
369 gd->sizes[minor] = 0;
371 /* Clear the consists-of-virtual-partitions flag if possible. */
372 gd->flags[minor >> gd->minor_shift] &= ~GENHD_FL_VIRT_PARTNS;
373 for ( i = 1; i < gd->max_p; i++ )
374 if ( gd->sizes[(minor & ~(gd->max_p-1)) + i] != 0 )
375 gd->flags[minor >> gd->minor_shift] |= GENHD_FL_VIRT_PARTNS;
377 /*
378 * If all virtual partitions are now gone, and a 'whole unit' VBD is
379 * present, then we can try to grok the unit's real partition table.
380 */
381 if ( !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS) &&
382 (gd->sizes[minor & ~(gd->max_p-1)] != 0) &&
383 !(gd->flags[minor >> gd->minor_shift] & GENHD_FL_REMOVABLE) )
384 {
385 register_disk(gd,
386 device&~(gd->max_p-1),
387 gd->max_p,
388 &xlvbd_block_fops,
389 gd->part[minor&~(gd->max_p-1)].nr_sects);
390 }
391 }
392 else
393 {
394 /*
395 * 2: The VBD is mapped to an entire 'unit'. Clear all partitions.
396 * NB. The partition entries are only cleared if there are no VBDs
397 * mapped to individual partitions on this unit.
398 */
399 i = gd->max_p - 1; /* Default: clear subpartitions as well. */
400 if ( gd->flags[minor >> gd->minor_shift] & GENHD_FL_VIRT_PARTNS )
401 i = 0; /* 'Virtual' mode: only clear the 'whole unit' entry. */
402 while ( i >= 0 )
403 {
404 invalidate_device(device+i, 1);
405 gd->part[minor+i].start_sect = 0;
406 gd->part[minor+i].nr_sects = 0;
407 gd->sizes[minor+i] = 0;
408 i--;
409 }
410 }
412 out:
413 up(&bd->bd_sem);
414 bdput(bd);
415 return rc;
416 }
418 /*
419 * xlvbd_update_vbds - reprobes the VBD status and performs updates driver
420 * state. The VBDs need to be updated in this way when the domain is
421 * initialised and also each time we receive an XLBLK_UPDATE event.
422 */
423 void xlvbd_update_vbds(void)
424 {
425 int i, j, k, old_nr, new_nr;
426 vdisk_t *old_info, *new_info, *merged_info;
428 old_info = vbd_info;
429 old_nr = nr_vbds;
431 new_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
432 if ( unlikely(new_nr = xlvbd_get_vbd_info(new_info)) < 0 )
433 {
434 kfree(new_info);
435 return;
436 }
438 /*
439 * Final list maximum size is old list + new list. This occurs only when
440 * old list and new list do not overlap at all, and we cannot yet destroy
441 * VBDs in the old list because the usage counts are busy.
442 */
443 merged_info = kmalloc((old_nr + new_nr) * sizeof(vdisk_t), GFP_KERNEL);
445 /* @i tracks old list; @j tracks new list; @k tracks merged list. */
446 i = j = k = 0;
448 while ( (i < old_nr) && (j < new_nr) )
449 {
450 if ( old_info[i].device < new_info[j].device )
451 {
452 if ( xlvbd_remove_device(old_info[i].device) != 0 )
453 memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
454 i++;
455 }
456 else if ( old_info[i].device > new_info[j].device )
457 {
458 if ( xlvbd_init_device(&new_info[j]) == 0 )
459 memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
460 j++;
461 }
462 else
463 {
464 if ( ((old_info[i].capacity == new_info[j].capacity) &&
465 (old_info[i].info == new_info[j].info)) ||
466 (xlvbd_remove_device(old_info[i].device) != 0) )
467 memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
468 else if ( xlvbd_init_device(&new_info[j]) == 0 )
469 memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
470 i++; j++;
471 }
472 }
474 for ( ; i < old_nr; i++ )
475 {
476 if ( xlvbd_remove_device(old_info[i].device) != 0 )
477 memcpy(&merged_info[k++], &old_info[i], sizeof(vdisk_t));
478 }
480 for ( ; j < new_nr; j++ )
481 {
482 if ( xlvbd_init_device(&new_info[j]) == 0 )
483 memcpy(&merged_info[k++], &new_info[j], sizeof(vdisk_t));
484 }
486 vbd_info = merged_info;
487 nr_vbds = k;
489 kfree(old_info);
490 kfree(new_info);
491 }
492 #endif
494 /*
495 * Set up all the linux device goop for the virtual block devices
496 * (vbd's) that we know about. Note that although from the backend
497 * driver's p.o.v. VBDs are addressed simply an opaque 16-bit device
498 * number, the domain creation tools conventionally allocate these
499 * numbers to correspond to those used by 'real' linux -- this is just
500 * for convenience as it means e.g. that the same /etc/fstab can be
501 * used when booting with or without Xen.
502 */
503 int xlvbd_init(void)
504 {
505 int i;
507 /*
508 * If compiled as a module, we don't support unloading yet. We
509 * therefore permanently increment the reference count to
510 * disallow it.
511 */
512 MOD_INC_USE_COUNT;
514 memset(major_info, 0, sizeof(major_info));
516 for (i = 0; i < sizeof(major_info) / sizeof(major_info[0]); i++) {
517 }
519 vbd_info = kmalloc(MAX_VBDS * sizeof(vdisk_t), GFP_KERNEL);
520 nr_vbds = xlvbd_get_vbd_info(vbd_info);
522 if (nr_vbds < 0) {
523 kfree(vbd_info);
524 vbd_info = NULL;
525 nr_vbds = 0;
526 } else {
527 for (i = 0; i < nr_vbds; i++)
528 xlvbd_init_device(&vbd_info[i]);
529 }
531 return 0;
532 }