rev |
line source |
cl349@4087
|
1 /******************************************************************************
|
cl349@4087
|
2 * blkfront.c
|
cl349@4087
|
3 *
|
cl349@4087
|
4 * XenLinux virtual block-device driver.
|
cl349@4087
|
5 *
|
cl349@4087
|
6 * Copyright (c) 2003-2004, Keir Fraser & Steve Hand
|
cl349@4087
|
7 * Modifications by Mark A. Williamson are (c) Intel Research Cambridge
|
cl349@4087
|
8 * Copyright (c) 2004, Christian Limpach
|
cl349@4112
|
9 * Copyright (c) 2004, Andrew Warfield
|
cwc22@4461
|
10 * Copyright (c) 2005, Christopher Clark
|
cl349@4087
|
11 *
|
cl349@4087
|
12 * This file may be distributed separately from the Linux kernel, or
|
cl349@4087
|
13 * incorporated into other software packages, subject to the following license:
|
cl349@4087
|
14 *
|
cl349@4087
|
15 * Permission is hereby granted, free of charge, to any person obtaining a copy
|
cl349@4087
|
16 * of this source file (the "Software"), to deal in the Software without
|
cl349@4087
|
17 * restriction, including without limitation the rights to use, copy, modify,
|
cl349@4087
|
18 * merge, publish, distribute, sublicense, and/or sell copies of the Software,
|
cl349@4087
|
19 * and to permit persons to whom the Software is furnished to do so, subject to
|
cl349@4087
|
20 * the following conditions:
|
cl349@4087
|
21 *
|
cl349@4087
|
22 * The above copyright notice and this permission notice shall be included in
|
cl349@4087
|
23 * all copies or substantial portions of the Software.
|
cl349@4087
|
24 *
|
cl349@4087
|
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
cl349@4087
|
26 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
cl349@4087
|
27 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
cl349@4087
|
28 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
cl349@4087
|
29 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
cl349@4087
|
30 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
|
cl349@4087
|
31 * IN THE SOFTWARE.
|
cl349@4087
|
32 */
|
cl349@4087
|
33
|
cwc22@4461
|
34 #if 1
|
kaf24@6456
|
35 #define ASSERT(p) \
|
kaf24@6456
|
36 if (!(p)) { printk("Assertion '%s' failed, line %d, file %s", #p , \
|
kaf24@6456
|
37 __LINE__, __FILE__); *(int*)0=0; }
|
cwc22@4461
|
38 #else
|
cwc22@4461
|
39 #define ASSERT(_p)
|
cwc22@4461
|
40 #endif
|
cwc22@4461
|
41
|
cl349@4087
|
42 #include <linux/version.h>
|
cl349@4087
|
43 #include "block.h"
|
cl349@4087
|
44 #include <linux/cdrom.h>
|
cl349@4087
|
45 #include <linux/sched.h>
|
cl349@4087
|
46 #include <linux/interrupt.h>
|
cl349@4087
|
47 #include <scsi/scsi.h>
|
cl349@4112
|
48 #include <asm-xen/evtchn.h>
|
cl349@6259
|
49 #include <asm-xen/xenbus.h>
|
cwc22@4461
|
50 #include <asm-xen/xen-public/grant_table.h>
|
cwc22@4461
|
51 #include <asm-xen/gnttab.h>
|
cl349@4087
|
52
|
cl349@6341
|
53 #define BLKIF_STATE_DISCONNECTED 0
|
cl349@6341
|
54 #define BLKIF_STATE_CONNECTED 1
|
cl349@4087
|
55
|
cl349@6341
|
56 static unsigned int blkif_state = BLKIF_STATE_DISCONNECTED;
|
cl349@4087
|
57
|
cwc22@4461
|
58 #define MAXIMUM_OUTSTANDING_BLOCK_REQS \
|
cwc22@4461
|
59 (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE)
|
kaf24@4611
|
60 #define GRANTREF_INVALID (1<<15)
|
cl349@6616
|
61 #define GRANT_INVALID_REF (0xFFFF)
|
cl349@4087
|
62
|
kaf24@4611
|
63 static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */
|
cl349@4087
|
64
|
cl349@6341
|
65 static void kick_pending_request_queues(struct blkfront_info *info);
|
cl349@4087
|
66
|
kaf24@4611
|
67 static void blkif_completion(struct blk_shadow *s);
|
cl349@4087
|
68
|
kaf24@6459
|
69 static inline int GET_ID_FROM_FREELIST(
|
kaf24@6459
|
70 struct blkfront_info *info)
|
kaf24@4611
|
71 {
|
kaf24@6459
|
72 unsigned long free = info->shadow_free;
|
kaf24@6456
|
73 BUG_ON(free > BLK_RING_SIZE);
|
kaf24@6459
|
74 info->shadow_free = info->shadow[free].req.id;
|
kaf24@6459
|
75 info->shadow[free].req.id = 0x0fffffee; /* debug */
|
kaf24@6456
|
76 return free;
|
cl349@4087
|
77 }
|
cl349@4087
|
78
|
kaf24@6459
|
79 static inline void ADD_ID_TO_FREELIST(
|
kaf24@6459
|
80 struct blkfront_info *info, unsigned long id)
|
cl349@4087
|
81 {
|
kaf24@6459
|
82 info->shadow[id].req.id = info->shadow_free;
|
kaf24@6459
|
83 info->shadow[id].request = 0;
|
kaf24@6459
|
84 info->shadow_free = id;
|
cl349@4087
|
85 }
|
cl349@4087
|
86
|
kaf24@4611
|
87 static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r)
|
cl349@4087
|
88 {
|
cl349@4087
|
89
|
kaf24@6456
|
90 s->req = *r;
|
cl349@4087
|
91 }
|
cl349@4087
|
92
|
kaf24@4611
|
93 static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s)
|
cl349@4087
|
94 {
|
cl349@4087
|
95
|
kaf24@6456
|
96 *r = s->req;
|
cl349@4087
|
97 }
|
cl349@4087
|
98
|
cl349@6338
|
99 static inline void flush_requests(struct blkfront_info *info)
|
cl349@4087
|
100 {
|
kaf24@6456
|
101 RING_PUSH_REQUESTS(&info->ring);
|
kaf24@6456
|
102 notify_via_evtchn(info->evtchn);
|
cl349@4087
|
103 }
|
cl349@4087
|
104
|
cl349@6341
|
105 static void kick_pending_request_queues(struct blkfront_info *info)
|
cl349@4087
|
106 {
|
cl349@6341
|
107 if (!RING_FULL(&info->ring)) {
|
cl349@6341
|
108 /* Re-enable calldowns. */
|
cl349@6341
|
109 blk_start_queue(info->rq);
|
cl349@6341
|
110 /* Kick things off immediately. */
|
cl349@6341
|
111 do_blkif_request(info->rq);
|
cl349@6341
|
112 }
|
cl349@6341
|
113 }
|
cl349@6341
|
114
|
cl349@6341
|
115 static void blkif_restart_queue(void *arg)
|
cl349@6341
|
116 {
|
cl349@6341
|
117 struct blkfront_info *info = (struct blkfront_info *)arg;
|
cl349@6341
|
118 spin_lock_irq(&blkif_io_lock);
|
cl349@6341
|
119 kick_pending_request_queues(info);
|
cl349@6341
|
120 spin_unlock_irq(&blkif_io_lock);
|
cl349@4087
|
121 }
|
cl349@4087
|
122
|
cl349@6342
|
123 static void blkif_restart_queue_callback(void *arg)
|
cl349@6342
|
124 {
|
cl349@6342
|
125 struct blkfront_info *info = (struct blkfront_info *)arg;
|
cl349@6342
|
126 schedule_work(&info->work);
|
cl349@4087
|
127 }
|
cl349@4087
|
128
|
cl349@4087
|
129 int blkif_open(struct inode *inode, struct file *filep)
|
cl349@4087
|
130 {
|
cl349@6341
|
131 return 0;
|
cl349@4087
|
132 }
|
cl349@4087
|
133
|
cl349@4087
|
134
|
cl349@4087
|
135 int blkif_release(struct inode *inode, struct file *filep)
|
cl349@4087
|
136 {
|
kaf24@6456
|
137 return 0;
|
cl349@4087
|
138 }
|
cl349@4087
|
139
|
cl349@4087
|
140
|
cl349@4087
|
141 int blkif_ioctl(struct inode *inode, struct file *filep,
|
cl349@4087
|
142 unsigned command, unsigned long argument)
|
cl349@4087
|
143 {
|
kaf24@6456
|
144 int i;
|
cl349@4087
|
145
|
kaf24@6456
|
146 DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n",
|
kaf24@6456
|
147 command, (long)argument, inode->i_rdev);
|
cl349@6341
|
148
|
kaf24@6456
|
149 switch ( command )
|
kaf24@6456
|
150 {
|
kaf24@6456
|
151 case HDIO_GETGEO:
|
kaf24@6456
|
152 /* return ENOSYS to use defaults */
|
kaf24@6456
|
153 return -ENOSYS;
|
cl349@4087
|
154
|
kaf24@6456
|
155 case CDROMMULTISESSION:
|
kaf24@6456
|
156 DPRINTK("FIXME: support multisession CDs later\n");
|
kaf24@6456
|
157 for (i = 0; i < sizeof(struct cdrom_multisession); i++)
|
kaf24@6456
|
158 if (put_user(0, (char *)(argument + i)))
|
kaf24@6456
|
159 return -EFAULT;
|
kaf24@6456
|
160 return 0;
|
cl349@4087
|
161
|
kaf24@6456
|
162 default:
|
kaf24@6456
|
163 /*printk(KERN_ALERT "ioctl %08x not supported by Xen blkdev\n",
|
kaf24@6456
|
164 command);*/
|
kaf24@6456
|
165 return -EINVAL; /* same return as native Linux */
|
kaf24@6456
|
166 }
|
cl349@4087
|
167
|
kaf24@6456
|
168 return 0;
|
cl349@4087
|
169 }
|
cl349@4087
|
170
|
cl349@4087
|
171
|
cl349@4087
|
172 /*
|
cl349@4087
|
173 * blkif_queue_request
|
cl349@4087
|
174 *
|
cl349@6341
|
175 * request block io
|
cl349@4087
|
176 *
|
cl349@4087
|
177 * id: for guest use only.
|
cl349@4087
|
178 * operation: BLKIF_OP_{READ,WRITE,PROBE}
|
cl349@4087
|
179 * buffer: buffer to read/write into. this should be a
|
cl349@4087
|
180 * virtual address in the guest os.
|
cl349@4087
|
181 */
|
cl349@4087
|
182 static int blkif_queue_request(struct request *req)
|
cl349@4087
|
183 {
|
kaf24@6456
|
184 struct blkfront_info *info = req->rq_disk->private_data;
|
iap10@6681
|
185 unsigned long buffer_mfn;
|
kaf24@6456
|
186 blkif_request_t *ring_req;
|
kaf24@6456
|
187 struct bio *bio;
|
kaf24@6456
|
188 struct bio_vec *bvec;
|
kaf24@6456
|
189 int idx;
|
kaf24@6456
|
190 unsigned long id;
|
kaf24@6456
|
191 unsigned int fsect, lsect;
|
kaf24@6456
|
192 int ref;
|
kaf24@6456
|
193 grant_ref_t gref_head;
|
cl349@4087
|
194
|
kaf24@6456
|
195 if (unlikely(info->connected != BLKIF_STATE_CONNECTED))
|
kaf24@6456
|
196 return 1;
|
cl349@4087
|
197
|
kaf24@6456
|
198 if (gnttab_alloc_grant_references(
|
kaf24@6456
|
199 BLKIF_MAX_SEGMENTS_PER_REQUEST, &gref_head) < 0) {
|
kaf24@6456
|
200 gnttab_request_free_callback(
|
kaf24@6456
|
201 &info->callback,
|
kaf24@6456
|
202 blkif_restart_queue_callback,
|
kaf24@6456
|
203 info,
|
kaf24@6456
|
204 BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
kaf24@6456
|
205 return 1;
|
kaf24@6456
|
206 }
|
cl349@6341
|
207
|
kaf24@6456
|
208 /* Fill out a communications ring structure. */
|
kaf24@6456
|
209 ring_req = RING_GET_REQUEST(&info->ring, info->ring.req_prod_pvt);
|
kaf24@6459
|
210 id = GET_ID_FROM_FREELIST(info);
|
kaf24@6459
|
211 info->shadow[id].request = (unsigned long)req;
|
cl349@4087
|
212
|
kaf24@6456
|
213 ring_req->id = id;
|
kaf24@6456
|
214 ring_req->operation = rq_data_dir(req) ?
|
kaf24@6456
|
215 BLKIF_OP_WRITE : BLKIF_OP_READ;
|
kaf24@6456
|
216 ring_req->sector_number = (blkif_sector_t)req->sector;
|
kaf24@6456
|
217 ring_req->handle = info->handle;
|
cl349@4087
|
218
|
kaf24@6456
|
219 ring_req->nr_segments = 0;
|
kaf24@6456
|
220 rq_for_each_bio (bio, req) {
|
kaf24@6456
|
221 bio_for_each_segment (bvec, bio, idx) {
|
kaf24@6456
|
222 BUG_ON(ring_req->nr_segments
|
kaf24@6456
|
223 == BLKIF_MAX_SEGMENTS_PER_REQUEST);
|
iap10@6681
|
224 buffer_mfn = page_to_phys(bvec->bv_page) >> PAGE_SHIFT;
|
kaf24@6456
|
225 fsect = bvec->bv_offset >> 9;
|
kaf24@6456
|
226 lsect = fsect + (bvec->bv_len >> 9) - 1;
|
kaf24@6456
|
227 /* install a grant reference. */
|
kaf24@6456
|
228 ref = gnttab_claim_grant_reference(&gref_head);
|
kaf24@6456
|
229 ASSERT(ref != -ENOSPC);
|
kaf24@6456
|
230
|
kaf24@6456
|
231 gnttab_grant_foreign_access_ref(
|
kaf24@6456
|
232 ref,
|
kaf24@6456
|
233 info->backend_id,
|
iap10@6681
|
234 buffer_mfn,
|
kaf24@6456
|
235 rq_data_dir(req) );
|
cwc22@4461
|
236
|
kaf24@6459
|
237 info->shadow[id].frame[ring_req->nr_segments] =
|
iap10@6681
|
238 buffer_mfn;
|
cwc22@4461
|
239
|
kaf24@6456
|
240 ring_req->frame_and_sects[ring_req->nr_segments] =
|
kaf24@6456
|
241 blkif_fas_from_gref(ref, fsect, lsect);
|
cl349@6341
|
242
|
kaf24@6456
|
243 ring_req->nr_segments++;
|
kaf24@6456
|
244 }
|
kaf24@6456
|
245 }
|
cl349@4087
|
246
|
kaf24@6456
|
247 info->ring.req_prod_pvt++;
|
cl349@6341
|
248
|
kaf24@6456
|
249 /* Keep a private copy so we can reissue requests when recovering. */
|
kaf24@6459
|
250 pickle_request(&info->shadow[id], ring_req);
|
cl349@4087
|
251
|
kaf24@6456
|
252 gnttab_free_grant_references(gref_head);
|
cl349@6341
|
253
|
kaf24@6456
|
254 return 0;
|
cl349@4087
|
255 }
|
cl349@4087
|
256
|
cl349@4087
|
257 /*
|
cl349@4087
|
258 * do_blkif_request
|
cl349@4087
|
259 * read a block; request is in a request queue
|
cl349@4087
|
260 */
|
cl349@4087
|
261 void do_blkif_request(request_queue_t *rq)
|
cl349@4087
|
262 {
|
kaf24@6456
|
263 struct blkfront_info *info = NULL;
|
kaf24@6456
|
264 struct request *req;
|
kaf24@6456
|
265 int queued;
|
cl349@4087
|
266
|
kaf24@6456
|
267 DPRINTK("Entered do_blkif_request\n");
|
cl349@4087
|
268
|
kaf24@6456
|
269 queued = 0;
|
cl349@4087
|
270
|
kaf24@6456
|
271 while ((req = elv_next_request(rq)) != NULL) {
|
kaf24@6456
|
272 info = req->rq_disk->private_data;
|
cl349@6338
|
273
|
kaf24@6456
|
274 if (!blk_fs_request(req)) {
|
kaf24@6456
|
275 end_request(req, 0);
|
kaf24@6456
|
276 continue;
|
kaf24@6456
|
277 }
|
kaf24@6456
|
278
|
kaf24@6456
|
279 if (RING_FULL(&info->ring))
|
kaf24@6456
|
280 goto wait;
|
cl349@4087
|
281
|
kaf24@6456
|
282 DPRINTK("do_blk_req %p: cmd %p, sec %lx, "
|
kaf24@6456
|
283 "(%u/%li) buffer:%p [%s]\n",
|
kaf24@6456
|
284 req, req->cmd, req->sector, req->current_nr_sectors,
|
kaf24@6456
|
285 req->nr_sectors, req->buffer,
|
kaf24@6456
|
286 rq_data_dir(req) ? "write" : "read");
|
kaf24@4611
|
287
|
kaf24@6456
|
288 blkdev_dequeue_request(req);
|
kaf24@6456
|
289 if (blkif_queue_request(req)) {
|
kaf24@6456
|
290 blk_requeue_request(rq, req);
|
kaf24@6456
|
291 wait:
|
kaf24@6456
|
292 /* Avoid pointless unplugs. */
|
kaf24@6456
|
293 blk_stop_queue(rq);
|
kaf24@6456
|
294 break;
|
kaf24@6456
|
295 }
|
kaf24@4611
|
296
|
kaf24@6456
|
297 queued++;
|
kaf24@6456
|
298 }
|
cl349@4087
|
299
|
kaf24@6456
|
300 if (queued != 0)
|
kaf24@6456
|
301 flush_requests(info);
|
cl349@4087
|
302 }
|
cl349@4087
|
303
|
cl349@4087
|
304
|
cl349@4087
|
305 static irqreturn_t blkif_int(int irq, void *dev_id, struct pt_regs *ptregs)
|
cl349@4087
|
306 {
|
kaf24@6456
|
307 struct request *req;
|
kaf24@6456
|
308 blkif_response_t *bret;
|
kaf24@6456
|
309 RING_IDX i, rp;
|
kaf24@6456
|
310 unsigned long flags;
|
kaf24@6456
|
311 struct blkfront_info *info = (struct blkfront_info *)dev_id;
|
cl349@4087
|
312
|
kaf24@6456
|
313 spin_lock_irqsave(&blkif_io_lock, flags);
|
cl349@4087
|
314
|
kaf24@6456
|
315 if (unlikely(info->connected != BLKIF_STATE_CONNECTED || recovery)) {
|
kaf24@6456
|
316 spin_unlock_irqrestore(&blkif_io_lock, flags);
|
kaf24@6456
|
317 return IRQ_HANDLED;
|
kaf24@6456
|
318 }
|
cl349@4087
|
319
|
kaf24@6456
|
320 rp = info->ring.sring->rsp_prod;
|
kaf24@6456
|
321 rmb(); /* Ensure we see queued responses up to 'rp'. */
|
cl349@4087
|
322
|
kaf24@6456
|
323 for (i = info->ring.rsp_cons; i != rp; i++) {
|
kaf24@6456
|
324 unsigned long id;
|
cl349@4087
|
325
|
kaf24@6456
|
326 bret = RING_GET_RESPONSE(&info->ring, i);
|
kaf24@6456
|
327 id = bret->id;
|
kaf24@6459
|
328 req = (struct request *)info->shadow[id].request;
|
cl349@4087
|
329
|
kaf24@6459
|
330 blkif_completion(&info->shadow[id]);
|
cl349@4087
|
331
|
kaf24@6459
|
332 ADD_ID_TO_FREELIST(info, id);
|
cl349@4087
|
333
|
kaf24@6456
|
334 switch (bret->operation) {
|
kaf24@6456
|
335 case BLKIF_OP_READ:
|
kaf24@6456
|
336 case BLKIF_OP_WRITE:
|
kaf24@6456
|
337 if (unlikely(bret->status != BLKIF_RSP_OKAY))
|
kaf24@6456
|
338 DPRINTK("Bad return from blkdev data "
|
kaf24@6456
|
339 "request: %x\n", bret->status);
|
cl349@4087
|
340
|
kaf24@6456
|
341 BUG_ON(end_that_request_first(
|
kaf24@6456
|
342 req, (bret->status == BLKIF_RSP_OKAY),
|
kaf24@6456
|
343 req->hard_nr_sectors));
|
kaf24@6456
|
344 end_that_request_last(req);
|
kaf24@6456
|
345 break;
|
kaf24@6456
|
346 default:
|
kaf24@6456
|
347 BUG();
|
kaf24@6456
|
348 }
|
kaf24@6456
|
349 }
|
cl349@6341
|
350
|
kaf24@6456
|
351 info->ring.rsp_cons = i;
|
cl349@4087
|
352
|
kaf24@6456
|
353 kick_pending_request_queues(info);
|
cl349@4087
|
354
|
kaf24@6456
|
355 spin_unlock_irqrestore(&blkif_io_lock, flags);
|
cl349@4087
|
356
|
kaf24@6456
|
357 return IRQ_HANDLED;
|
cl349@4087
|
358 }
|
cl349@4087
|
359
|
cl349@6338
|
360 static void blkif_free(struct blkfront_info *info)
|
cl349@4087
|
361 {
|
kaf24@6456
|
362 /* Prevent new requests being issued until we fix things up. */
|
kaf24@6456
|
363 spin_lock_irq(&blkif_io_lock);
|
kaf24@6456
|
364 info->connected = BLKIF_STATE_DISCONNECTED;
|
kaf24@6456
|
365 spin_unlock_irq(&blkif_io_lock);
|
cl349@4087
|
366
|
kaf24@6456
|
367 /* Free resources associated with old device channel. */
|
kaf24@6456
|
368 if (info->ring.sring != NULL) {
|
kaf24@6456
|
369 free_page((unsigned long)info->ring.sring);
|
kaf24@6456
|
370 info->ring.sring = NULL;
|
kaf24@6456
|
371 }
|
cl349@6616
|
372 if (info->ring_ref != GRANT_INVALID_REF)
|
cl349@6616
|
373 gnttab_end_foreign_access(info->ring_ref, 0);
|
cl349@6616
|
374 info->ring_ref = GRANT_INVALID_REF;
|
shand@6465
|
375 unbind_evtchn_from_irqhandler(info->evtchn, info);
|
kaf24@6456
|
376 info->evtchn = 0;
|
cl349@4087
|
377 }
|
cl349@4087
|
378
|
cl349@6338
|
379 static void blkif_recover(struct blkfront_info *info)
|
cl349@4087
|
380 {
|
kaf24@6456
|
381 int i;
|
kaf24@6456
|
382 blkif_request_t *req;
|
kaf24@6456
|
383 struct blk_shadow *copy;
|
kaf24@6456
|
384 int j;
|
cl349@4087
|
385
|
kaf24@6456
|
386 /* Stage 1: Make a safe copy of the shadow state. */
|
kaf24@6459
|
387 copy = (struct blk_shadow *)kmalloc(sizeof(info->shadow), GFP_KERNEL);
|
kaf24@6456
|
388 BUG_ON(copy == NULL);
|
kaf24@6459
|
389 memcpy(copy, info->shadow, sizeof(info->shadow));
|
cl349@4087
|
390
|
kaf24@6456
|
391 /* Stage 2: Set up free list. */
|
kaf24@6459
|
392 memset(&info->shadow, 0, sizeof(info->shadow));
|
kaf24@6456
|
393 for (i = 0; i < BLK_RING_SIZE; i++)
|
kaf24@6459
|
394 info->shadow[i].req.id = i+1;
|
kaf24@6459
|
395 info->shadow_free = info->ring.req_prod_pvt;
|
kaf24@6459
|
396 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
|
kaf24@4611
|
397
|
kaf24@6456
|
398 /* Stage 3: Find pending requests and requeue them. */
|
kaf24@6456
|
399 for (i = 0; i < BLK_RING_SIZE; i++) {
|
kaf24@6456
|
400 /* Not in use? */
|
kaf24@6456
|
401 if (copy[i].request == 0)
|
kaf24@6456
|
402 continue;
|
kaf24@4611
|
403
|
kaf24@6456
|
404 /* Grab a request slot and unpickle shadow state into it. */
|
kaf24@6456
|
405 req = RING_GET_REQUEST(
|
kaf24@6456
|
406 &info->ring, info->ring.req_prod_pvt);
|
kaf24@6456
|
407 unpickle_request(req, ©[i]);
|
kaf24@4611
|
408
|
kaf24@6456
|
409 /* We get a new request id, and must reset the shadow state. */
|
kaf24@6459
|
410 req->id = GET_ID_FROM_FREELIST(info);
|
kaf24@6459
|
411 memcpy(&info->shadow[req->id], ©[i], sizeof(copy[i]));
|
kaf24@4611
|
412
|
kaf24@6456
|
413 /* Rewrite any grant references invalidated by susp/resume. */
|
kaf24@6456
|
414 for (j = 0; j < req->nr_segments; j++) {
|
kaf24@6456
|
415 if ( req->frame_and_sects[j] & GRANTREF_INVALID )
|
kaf24@6456
|
416 gnttab_grant_foreign_access_ref(
|
kaf24@6456
|
417 blkif_gref_from_fas(
|
kaf24@6456
|
418 req->frame_and_sects[j]),
|
kaf24@6456
|
419 info->backend_id,
|
kaf24@6459
|
420 info->shadow[req->id].frame[j],
|
kaf24@6456
|
421 rq_data_dir(
|
kaf24@6456
|
422 (struct request *)
|
kaf24@6459
|
423 info->shadow[req->id].request));
|
kaf24@6456
|
424 req->frame_and_sects[j] &= ~GRANTREF_INVALID;
|
kaf24@6456
|
425 }
|
kaf24@6459
|
426 info->shadow[req->id].req = *req;
|
kaf24@4611
|
427
|
kaf24@6456
|
428 info->ring.req_prod_pvt++;
|
kaf24@6456
|
429 }
|
cl349@4087
|
430
|
kaf24@6456
|
431 kfree(copy);
|
cl349@4087
|
432
|
kaf24@6456
|
433 recovery = 0;
|
cl349@4087
|
434
|
kaf24@6456
|
435 /* info->ring->req_prod will be set when we flush_requests().*/
|
kaf24@6456
|
436 wmb();
|
cl349@4087
|
437
|
kaf24@6456
|
438 /* Kicks things back into life. */
|
kaf24@6456
|
439 flush_requests(info);
|
cl349@4087
|
440
|
kaf24@6456
|
441 /* Now safe to left other people use the interface. */
|
kaf24@6456
|
442 info->connected = BLKIF_STATE_CONNECTED;
|
cl349@4087
|
443 }
|
cl349@4087
|
444
|
cl349@6338
|
445 static void blkif_connect(struct blkfront_info *info, u16 evtchn)
|
cl349@4087
|
446 {
|
kaf24@6456
|
447 int err = 0;
|
cl349@4087
|
448
|
kaf24@6456
|
449 info->evtchn = evtchn;
|
cl349@4087
|
450
|
kaf24@6456
|
451 err = bind_evtchn_to_irqhandler(
|
kaf24@6456
|
452 info->evtchn, blkif_int, SA_SAMPLE_RANDOM, "blkif", info);
|
kaf24@6456
|
453 if (err != 0) {
|
kaf24@6456
|
454 WPRINTK("bind_evtchn_to_irqhandler failed (err=%d)\n", err);
|
kaf24@6456
|
455 return;
|
kaf24@6456
|
456 }
|
cl349@4087
|
457 }
|
cl349@4087
|
458
|
cl349@4087
|
459
|
cl349@6259
|
460 static struct xenbus_device_id blkfront_ids[] = {
|
cl349@6259
|
461 { "vbd" },
|
cl349@6259
|
462 { "" }
|
cl349@6259
|
463 };
|
cl349@6259
|
464
|
cl349@6259
|
465 static void watch_for_status(struct xenbus_watch *watch, const char *node)
|
cl349@4087
|
466 {
|
cl349@6259
|
467 struct blkfront_info *info;
|
cl349@6259
|
468 unsigned int binfo;
|
cl349@6259
|
469 unsigned long sectors, sector_size;
|
cl349@6259
|
470 int err;
|
cl349@6259
|
471
|
cl349@6259
|
472 info = container_of(watch, struct blkfront_info, watch);
|
cl349@6259
|
473 node += strlen(watch->node);
|
cl349@6259
|
474
|
cl349@6259
|
475 /* FIXME: clean up when error on the other end. */
|
cl349@6341
|
476 if (info->connected == BLKIF_STATE_CONNECTED)
|
cl349@6259
|
477 return;
|
cl349@6259
|
478
|
cl349@6341
|
479 err = xenbus_gather(watch->node,
|
cl349@6259
|
480 "sectors", "%lu", §ors,
|
cl349@6259
|
481 "info", "%u", &binfo,
|
cl349@6259
|
482 "sector-size", "%lu", §or_size,
|
cl349@6259
|
483 NULL);
|
cl349@6283
|
484 if (err) {
|
cl349@6403
|
485 xenbus_dev_error(info->xbdev, err,
|
cl349@6403
|
486 "reading backend fields at %s", watch->node);
|
cl349@6283
|
487 return;
|
cl349@6283
|
488 }
|
cl349@6283
|
489
|
cl349@6341
|
490 xlvbd_add(sectors, info->vdevice, binfo, sector_size, info);
|
cl349@6341
|
491 info->connected = BLKIF_STATE_CONNECTED;
|
cl349@6259
|
492
|
cl349@6341
|
493 blkif_state = BLKIF_STATE_CONNECTED;
|
cl349@4087
|
494
|
cl349@6341
|
495 xenbus_dev_ok(info->xbdev);
|
cl349@4087
|
496
|
cl349@6283
|
497 /* Kick pending requests. */
|
cl349@6283
|
498 spin_lock_irq(&blkif_io_lock);
|
cl349@6341
|
499 kick_pending_request_queues(info);
|
cl349@6283
|
500 spin_unlock_irq(&blkif_io_lock);
|
cl349@6259
|
501 }
|
cl349@6259
|
502
|
cl349@6338
|
503 static int setup_blkring(struct xenbus_device *dev, struct blkfront_info *info)
|
cl349@6259
|
504 {
|
cl349@6259
|
505 blkif_sring_t *sring;
|
cl349@6259
|
506 evtchn_op_t op = { .cmd = EVTCHNOP_alloc_unbound };
|
cl349@6259
|
507 int err;
|
cl349@6259
|
508
|
cl349@6616
|
509 info->ring_ref = GRANT_INVALID_REF;
|
cl349@6616
|
510
|
cl349@6259
|
511 sring = (void *)__get_free_page(GFP_KERNEL);
|
cl349@6259
|
512 if (!sring) {
|
cl349@6259
|
513 xenbus_dev_error(dev, -ENOMEM, "allocating shared ring");
|
cl349@6259
|
514 return -ENOMEM;
|
cl349@6259
|
515 }
|
cl349@6259
|
516 SHARED_RING_INIT(sring);
|
cl349@6338
|
517 FRONT_RING_INIT(&info->ring, sring, PAGE_SIZE);
|
cl349@6259
|
518
|
cl349@6341
|
519 err = gnttab_grant_foreign_access(info->backend_id,
|
cl349@6341
|
520 virt_to_mfn(info->ring.sring), 0);
|
cl349@6341
|
521 if (err == -ENOSPC) {
|
cl349@6341
|
522 free_page((unsigned long)info->ring.sring);
|
cl349@6341
|
523 info->ring.sring = 0;
|
cl349@6341
|
524 xenbus_dev_error(dev, err, "granting access to ring page");
|
cl349@6341
|
525 return err;
|
cl349@6341
|
526 }
|
cl349@6387
|
527 info->ring_ref = err;
|
cl349@6260
|
528
|
cl349@6338
|
529 op.u.alloc_unbound.dom = info->backend_id;
|
cl349@6259
|
530 err = HYPERVISOR_event_channel_op(&op);
|
cl349@6259
|
531 if (err) {
|
cl349@6387
|
532 gnttab_end_foreign_access(info->ring_ref, 0);
|
cl349@6616
|
533 info->ring_ref = GRANT_INVALID_REF;
|
cl349@6338
|
534 free_page((unsigned long)info->ring.sring);
|
cl349@6338
|
535 info->ring.sring = 0;
|
cl349@6259
|
536 xenbus_dev_error(dev, err, "allocating event channel");
|
cl349@6259
|
537 return err;
|
cl349@6259
|
538 }
|
cl349@6338
|
539 blkif_connect(info, op.u.alloc_unbound.port);
|
cl349@6259
|
540 return 0;
|
cl349@4087
|
541 }
|
cl349@4087
|
542
|
cl349@6259
|
543 /* Common code used when first setting up, and when resuming. */
|
cl349@6259
|
544 static int talk_to_backend(struct xenbus_device *dev,
|
cl349@6259
|
545 struct blkfront_info *info)
|
cl349@6259
|
546 {
|
cl349@6259
|
547 char *backend;
|
cl349@6259
|
548 const char *message;
|
cl349@6338
|
549 int err;
|
cl349@6259
|
550
|
cl349@6283
|
551 backend = NULL;
|
cl349@6283
|
552 err = xenbus_gather(dev->nodename,
|
cl349@6338
|
553 "backend-id", "%i", &info->backend_id,
|
cl349@6283
|
554 "backend", NULL, &backend,
|
cl349@6283
|
555 NULL);
|
cl349@6283
|
556 if (XENBUS_EXIST_ERR(err))
|
cl349@6283
|
557 goto out;
|
cl349@6283
|
558 if (backend && strlen(backend) == 0) {
|
cl349@6283
|
559 err = -ENOENT;
|
cl349@6283
|
560 goto out;
|
cl349@6283
|
561 }
|
cl349@6283
|
562 if (err < 0) {
|
cl349@6283
|
563 xenbus_dev_error(dev, err, "reading %s/backend or backend-id",
|
cl349@6259
|
564 dev->nodename);
|
cl349@6259
|
565 goto out;
|
cl349@6259
|
566 }
|
cl349@6259
|
567
|
cl349@6338
|
568 /* Create shared ring, alloc event channel. */
|
cl349@6338
|
569 err = setup_blkring(dev, info);
|
cl349@6372
|
570 if (err) {
|
cl349@6372
|
571 xenbus_dev_error(dev, err, "setting up block ring");
|
cl349@6338
|
572 goto out;
|
cl349@6372
|
573 }
|
cl349@6259
|
574
|
cl349@6259
|
575 err = xenbus_transaction_start(dev->nodename);
|
cl349@6259
|
576 if (err) {
|
cl349@6259
|
577 xenbus_dev_error(dev, err, "starting transaction");
|
cl349@6259
|
578 goto destroy_blkring;
|
cl349@6259
|
579 }
|
cl349@6259
|
580
|
cl349@6387
|
581 err = xenbus_printf(dev->nodename, "ring-ref","%u", info->ring_ref);
|
cl349@6260
|
582 if (err) {
|
cl349@6387
|
583 message = "writing ring-ref";
|
cl349@6260
|
584 goto abort_transaction;
|
cl349@6259
|
585 }
|
cl349@6259
|
586 err = xenbus_printf(dev->nodename,
|
cl349@6338
|
587 "event-channel", "%u", info->evtchn);
|
cl349@6259
|
588 if (err) {
|
cl349@6259
|
589 message = "writing event-channel";
|
cl349@6259
|
590 goto abort_transaction;
|
cl349@6259
|
591 }
|
cl349@6259
|
592
|
cl349@6283
|
593 info->backend = backend;
|
cl349@6283
|
594 backend = NULL;
|
cl349@6283
|
595
|
cl349@6283
|
596 info->watch.node = info->backend;
|
cl349@6259
|
597 info->watch.callback = watch_for_status;
|
cl349@6259
|
598 err = register_xenbus_watch(&info->watch);
|
cl349@6259
|
599 if (err) {
|
cl349@6259
|
600 message = "registering watch on backend";
|
cl349@6259
|
601 goto abort_transaction;
|
cl349@6259
|
602 }
|
cl349@6259
|
603
|
cl349@6259
|
604 err = xenbus_transaction_end(0);
|
cl349@6259
|
605 if (err) {
|
cl349@6259
|
606 xenbus_dev_error(dev, err, "completing transaction");
|
cl349@6259
|
607 goto destroy_blkring;
|
cl349@6259
|
608 }
|
cl349@6259
|
609
|
cl349@6283
|
610 out:
|
cl349@6283
|
611 if (backend)
|
cl349@6283
|
612 kfree(backend);
|
cl349@6283
|
613 return err;
|
cl349@6283
|
614
|
cl349@6283
|
615 abort_transaction:
|
cl349@6259
|
616 xenbus_transaction_end(1);
|
cl349@6259
|
617 /* Have to do this *outside* transaction. */
|
cl349@6259
|
618 xenbus_dev_error(dev, err, "%s", message);
|
cl349@6283
|
619 destroy_blkring:
|
cl349@6338
|
620 blkif_free(info);
|
cl349@6283
|
621 goto out;
|
cl349@6259
|
622 }
|
cl349@6259
|
623
|
cl349@6259
|
624 /* Setup supplies the backend dir, virtual device.
|
cl349@6259
|
625
|
cl349@6259
|
626 We place an event channel and shared frame entries.
|
cl349@6259
|
627 We watch backend to wait if it's ok. */
|
cl349@6259
|
628 static int blkfront_probe(struct xenbus_device *dev,
|
cl349@6259
|
629 const struct xenbus_device_id *id)
|
cl349@6259
|
630 {
|
kaf24@6459
|
631 int err, vdevice, i;
|
cl349@6259
|
632 struct blkfront_info *info;
|
cl349@6259
|
633
|
cl349@6259
|
634 /* FIXME: Use dynamic device id if this is not set. */
|
cl349@6259
|
635 err = xenbus_scanf(dev->nodename, "virtual-device", "%i", &vdevice);
|
cl349@6283
|
636 if (XENBUS_EXIST_ERR(err))
|
cl349@6260
|
637 return err;
|
cl349@6259
|
638 if (err < 0) {
|
cl349@6259
|
639 xenbus_dev_error(dev, err, "reading virtual-device");
|
cl349@6259
|
640 return err;
|
cl349@6259
|
641 }
|
cl349@6259
|
642
|
cl349@6259
|
643 info = kmalloc(sizeof(*info), GFP_KERNEL);
|
cl349@6259
|
644 if (!info) {
|
cl349@6259
|
645 xenbus_dev_error(dev, err, "allocating info structure");
|
cl349@6259
|
646 return err;
|
cl349@6259
|
647 }
|
cl349@6341
|
648 info->xbdev = dev;
|
cl349@6259
|
649 info->vdevice = vdevice;
|
cl349@6341
|
650 info->connected = BLKIF_STATE_DISCONNECTED;
|
cl349@6341
|
651 info->mi = NULL;
|
cl349@6342
|
652 INIT_WORK(&info->work, blkif_restart_queue, (void *)info);
|
cl349@6283
|
653
|
kaf24@6459
|
654 info->shadow_free = 0;
|
kaf24@6459
|
655 memset(info->shadow, 0, sizeof(info->shadow));
|
kaf24@6459
|
656 for (i = 0; i < BLK_RING_SIZE; i++)
|
kaf24@6459
|
657 info->shadow[i].req.id = i+1;
|
kaf24@6459
|
658 info->shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff;
|
kaf24@6459
|
659
|
cl349@6259
|
660 /* Front end dir is a number, which is used as the id. */
|
cl349@6259
|
661 info->handle = simple_strtoul(strrchr(dev->nodename,'/')+1, NULL, 0);
|
cl349@6259
|
662 dev->data = info;
|
cl349@6259
|
663
|
cl349@6259
|
664 err = talk_to_backend(dev, info);
|
cl349@6259
|
665 if (err) {
|
cl349@6259
|
666 kfree(info);
|
cl349@6421
|
667 dev->data = NULL;
|
cl349@6259
|
668 return err;
|
cl349@6259
|
669 }
|
cl349@6259
|
670
|
cl349@6259
|
671 /* Call once in case entries already there. */
|
cl349@6259
|
672 watch_for_status(&info->watch, info->watch.node);
|
cl349@6259
|
673 return 0;
|
cl349@6259
|
674 }
|
cl349@6259
|
675
|
cl349@6259
|
676 static int blkfront_remove(struct xenbus_device *dev)
|
cl349@6259
|
677 {
|
cl349@6259
|
678 struct blkfront_info *info = dev->data;
|
cl349@6259
|
679
|
cl349@6259
|
680 if (info->backend)
|
cl349@6259
|
681 unregister_xenbus_watch(&info->watch);
|
cl349@6259
|
682
|
cl349@6341
|
683 if (info->mi)
|
cl349@6341
|
684 xlvbd_del(info);
|
cl349@6338
|
685
|
cl349@6338
|
686 blkif_free(info);
|
cl349@6338
|
687
|
cl349@6259
|
688 kfree(info->backend);
|
cl349@6259
|
689 kfree(info);
|
cl349@6259
|
690
|
cl349@6259
|
691 return 0;
|
cl349@6259
|
692 }
|
cl349@6259
|
693
|
cl349@6259
|
694 static int blkfront_suspend(struct xenbus_device *dev)
|
cl349@6259
|
695 {
|
cl349@6259
|
696 struct blkfront_info *info = dev->data;
|
cl349@6259
|
697
|
cl349@6259
|
698 unregister_xenbus_watch(&info->watch);
|
cl349@6259
|
699 kfree(info->backend);
|
cl349@6259
|
700 info->backend = NULL;
|
cl349@6259
|
701
|
cl349@6338
|
702 recovery = 1;
|
cl349@6338
|
703 blkif_free(info);
|
cl349@6259
|
704
|
cl349@6259
|
705 return 0;
|
cl349@6259
|
706 }
|
cl349@6259
|
707
|
cl349@6259
|
708 static int blkfront_resume(struct xenbus_device *dev)
|
cl349@6259
|
709 {
|
cl349@6259
|
710 struct blkfront_info *info = dev->data;
|
cl349@6259
|
711 int err;
|
cl349@6259
|
712
|
cl349@6259
|
713 /* FIXME: Check geometry hasn't changed here... */
|
cl349@6259
|
714 err = talk_to_backend(dev, info);
|
cl349@6259
|
715 if (!err) {
|
cl349@6338
|
716 blkif_recover(info);
|
cl349@6259
|
717 }
|
cl349@6259
|
718 return err;
|
cl349@6259
|
719 }
|
cl349@6259
|
720
|
cl349@6259
|
721 static struct xenbus_driver blkfront = {
|
cl349@6261
|
722 .name = "vbd",
|
cl349@6259
|
723 .owner = THIS_MODULE,
|
cl349@6259
|
724 .ids = blkfront_ids,
|
cl349@6259
|
725 .probe = blkfront_probe,
|
cl349@6259
|
726 .remove = blkfront_remove,
|
cl349@6259
|
727 .resume = blkfront_resume,
|
cl349@6259
|
728 .suspend = blkfront_suspend,
|
cl349@6259
|
729 };
|
cl349@6259
|
730
|
cl349@6259
|
731 static void __init init_blk_xenbus(void)
|
cl349@6259
|
732 {
|
cl349@6259
|
733 xenbus_register_device(&blkfront);
|
cl349@6259
|
734 }
|
cl349@6259
|
735
|
cl349@6259
|
736 static int wait_for_blkif(void)
|
cl349@4087
|
737 {
|
kaf24@6456
|
738 int err = 0;
|
kaf24@6456
|
739 int i;
|
cl349@4087
|
740
|
kaf24@6456
|
741 /*
|
kaf24@6456
|
742 * We should figure out how many and which devices we need to
|
kaf24@6456
|
743 * proceed and only wait for those. For now, continue once the
|
kaf24@6456
|
744 * first device is around.
|
kaf24@6456
|
745 */
|
kaf24@6456
|
746 for (i = 0; blkif_state != BLKIF_STATE_CONNECTED && (i < 10*HZ); i++) {
|
kaf24@6456
|
747 set_current_state(TASK_INTERRUPTIBLE);
|
kaf24@6456
|
748 schedule_timeout(1);
|
kaf24@6456
|
749 }
|
cl349@4087
|
750
|
kaf24@6456
|
751 if (blkif_state != BLKIF_STATE_CONNECTED) {
|
kaf24@6456
|
752 WPRINTK("Timeout connecting to device!\n");
|
kaf24@6456
|
753 err = -ENOSYS;
|
kaf24@6456
|
754 }
|
kaf24@6456
|
755 return err;
|
cl349@4087
|
756 }
|
cl349@4087
|
757
|
cl349@6259
|
758 static int __init xlblk_init(void)
|
cl349@4087
|
759 {
|
cl349@6618
|
760 if ((xen_start_info->flags & SIF_INITDOMAIN) ||
|
cl349@6618
|
761 (xen_start_info->flags & SIF_BLK_BE_DOMAIN) )
|
kaf24@6456
|
762 return 0;
|
cl349@4087
|
763
|
kaf24@6456
|
764 IPRINTK("Initialising virtual block device driver\n");
|
cl349@4087
|
765
|
kaf24@6456
|
766 init_blk_xenbus();
|
kaf24@6456
|
767
|
kaf24@6456
|
768 wait_for_blkif();
|
cl349@4087
|
769
|
kaf24@6456
|
770 return 0;
|
kaf24@6456
|
771 }
|
cl349@4087
|
772
|
kaf24@6456
|
773 module_init(xlblk_init);
|
cl349@4087
|
774
|
kaf24@4611
|
775 static void blkif_completion(struct blk_shadow *s)
|
cl349@4087
|
776 {
|
kaf24@6456
|
777 int i;
|
kaf24@6456
|
778 for (i = 0; i < s->req.nr_segments; i++)
|
kaf24@6695
|
779 gnttab_end_foreign_access(
|
kaf24@6695
|
780 blkif_gref_from_fas(s->req.frame_and_sects[i]), 0);
|
cl349@4087
|
781 }
|
kaf24@6456
|
782
|
kaf24@6456
|
783 /*
|
kaf24@6456
|
784 * Local variables:
|
kaf24@6456
|
785 * c-file-style: "linux"
|
kaf24@6456
|
786 * indent-tabs-mode: t
|
kaf24@6456
|
787 * c-indent-level: 8
|
kaf24@6456
|
788 * c-basic-offset: 8
|
kaf24@6456
|
789 * tab-width: 8
|
kaf24@6456
|
790 * End:
|
kaf24@6456
|
791 */
|