# HG changeset patch # User kaf24@firebug.cl.cam.ac.uk # Date 1114074852 0 # Node ID ebeac8efe955e82f858bbc8ee2feaac1d115039c # Parent a1c65fd28aecfb19f81007fc4590db0a0edb5260 bitkeeper revision 1.1350 (42676ee4BkgqwvPiIyB44k55uY8cSA) Fix blkdev suspend/resume. Signed-off-by: Keir Fraser diff -r a1c65fd28aec -r ebeac8efe955 linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c --- a/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c Thu Apr 21 00:16:35 2005 +0000 +++ b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c Thu Apr 21 09:14:12 2005 +0000 @@ -330,34 +330,36 @@ int gnttab_resume(void) setup.nr_frames = NR_GRANT_FRAMES; setup.frame_list = frames; - if ( HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0 ) - BUG(); - if ( setup.status != 0 ) - BUG(); + BUG_ON(HYPERVISOR_grant_table_op(GNTTABOP_setup_table, &setup, 1) != 0); + BUG_ON(setup.status != 0); for ( i = 0; i < NR_GRANT_FRAMES; i++ ) set_fixmap_ma(FIX_GNTTAB_END - i, frames[i] << PAGE_SHIFT); + return 0; +} + +int gnttab_suspend(void) +{ + int i; + + for ( i = 0; i < NR_GRANT_FRAMES; i++ ) + clear_fixmap(FIX_GNTTAB_END - i); + + return 0; +} + +static int __init gnttab_init(void) +{ + int i; + + BUG_ON(gnttab_resume()); + shared = (grant_entry_t *)fix_to_virt(FIX_GNTTAB_END); for ( i = 0; i < NR_GRANT_ENTRIES; i++ ) gnttab_free_list[i] = i + 1; - return 0; -} - -int gnttab_suspend(void) -{ - int i; - for ( i = 0; i < NR_GRANT_FRAMES; i++ ) - clear_fixmap(FIX_GNTTAB_END - i); - return 0; -} - -static int __init gnttab_init(void) -{ - BUG_ON(gnttab_resume()); - /* * /proc/xen/grant : used by libxc to access grant tables */ diff -r a1c65fd28aec -r ebeac8efe955 linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c --- a/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Apr 21 00:16:35 2005 +0000 +++ b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c Thu Apr 21 09:14:12 2005 +0000 @@ -94,37 +94,38 @@ static domid_t rdomid = 0; static grant_ref_t gref_head, gref_terminal; #define MAXIMUM_OUTSTANDING_BLOCK_REQS \ (BLKIF_MAX_SEGMENTS_PER_REQUEST * BLKIF_RING_SIZE) +#define GRANTREF_INVALID (1<<15) #endif -unsigned long rec_ring_free; -blkif_request_t rec_ring[BLK_RING_SIZE]; +static struct blk_shadow { + blkif_request_t req; + unsigned long request; + unsigned long frame[BLKIF_MAX_SEGMENTS_PER_REQUEST]; +} blk_shadow[BLK_RING_SIZE]; +unsigned long blk_shadow_free; -static int recovery = 0; /* "Recovery in progress" flag. Protected - * by the blkif_io_lock */ +static int recovery = 0; /* Recovery in progress: protected by blkif_io_lock */ static void kick_pending_request_queues(void); int __init xlblk_init(void); -void blkif_completion( blkif_request_t *req ); - -static inline int GET_ID_FROM_FREELIST( void ) -{ - unsigned long free = rec_ring_free; +static void blkif_completion(struct blk_shadow *s); +static inline int GET_ID_FROM_FREELIST(void) +{ + unsigned long free = blk_shadow_free; BUG_ON(free > BLK_RING_SIZE); - - rec_ring_free = rec_ring[free].id; - - rec_ring[free].id = 0x0fffffee; /* debug */ - + blk_shadow_free = blk_shadow[free].req.id; + blk_shadow[free].req.id = 0x0fffffee; /* debug */ return free; } -static inline void ADD_ID_TO_FREELIST( unsigned long id ) +static inline void ADD_ID_TO_FREELIST(unsigned long id) { - rec_ring[id].id = rec_ring_free; - rec_ring_free = id; + blk_shadow[id].req.id = blk_shadow_free; + blk_shadow[id].request = 0; + blk_shadow_free = id; } @@ -138,41 +139,31 @@ static int sg_operation = -1; #define DISABLE_SCATTERGATHER() (sg_operation = -1) #endif -static inline void translate_req_to_pfn(blkif_request_t *xreq, - blkif_request_t *req) +static inline void pickle_request(struct blk_shadow *s, blkif_request_t *r) { +#ifndef CONFIG_XEN_BLKDEV_GRANT int i; +#endif - xreq->operation = req->operation; - xreq->nr_segments = req->nr_segments; - xreq->device = req->device; - /* preserve id */ - xreq->sector_number = req->sector_number; + s->req = *r; - for ( i = 0; i < req->nr_segments; i++ ) -#ifdef CONFIG_XEN_BLKDEV_GRANT - xreq->frame_and_sects[i] = req->frame_and_sects[i]; -#else - xreq->frame_and_sects[i] = machine_to_phys(req->frame_and_sects[i]); +#ifndef CONFIG_XEN_BLKDEV_GRANT + for ( i = 0; i < r->nr_segments; i++ ) + s->req.frame_and_sects[i] = machine_to_phys(r->frame_and_sects[i]); #endif } -static inline void translate_req_to_mfn(blkif_request_t *xreq, - blkif_request_t *req) +static inline void unpickle_request(blkif_request_t *r, struct blk_shadow *s) { +#ifndef CONFIG_XEN_BLKDEV_GRANT int i; +#endif - xreq->operation = req->operation; - xreq->nr_segments = req->nr_segments; - xreq->device = req->device; - xreq->id = req->id; /* copy id (unlike above) */ - xreq->sector_number = req->sector_number; + *r = s->req; - for ( i = 0; i < req->nr_segments; i++ ) -#ifdef CONFIG_XEN_BLKDEV_GRANT - xreq->frame_and_sects[i] = req->frame_and_sects[i]; -#else - xreq->frame_and_sects[i] = phys_to_machine(req->frame_and_sects[i]); +#ifndef CONFIG_XEN_BLKDEV_GRANT + for ( i = 0; i < s->req.nr_segments; i++ ) + r->frame_and_sects[i] = phys_to_machine(s->req.frame_and_sects[i]); #endif } @@ -185,8 +176,6 @@ static inline void flush_requests(void) } - - /************************** KERNEL VERSION 2.6 **************************/ #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) @@ -208,7 +197,6 @@ static void vbd_update(void) static void kick_pending_request_queues(void) { - if ( (xlbd_blk_queue != NULL) && test_bit(QUEUE_FLAG_STOPPED, &xlbd_blk_queue->queue_flags) ) { @@ -218,7 +206,6 @@ static void kick_pending_request_queues( */ xlbd_blk_queue->request_fn(xlbd_blk_queue); } - } @@ -243,9 +230,8 @@ int blkif_release(struct inode *inode, s * When usage drops to zero it may allow more VBD updates to occur. * Update of usage count is protected by a per-device semaphore. */ - if (--di->mi->usage == 0) { + if ( --di->mi->usage == 0 ) vbd_update(); - } return 0; } @@ -259,8 +245,8 @@ int blkif_ioctl(struct inode *inode, str DPRINTK_IOCTL("command: 0x%x, argument: 0x%lx, dev: 0x%04x\n", command, (long)argument, inode->i_rdev); - switch (command) { - + switch ( command ) + { case HDIO_GETGEO: /* return ENOSYS to use defaults */ return -ENOSYS; @@ -312,7 +298,7 @@ static int blkif_queue_request(struct re /* Fill out a communications ring structure. */ ring_req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); id = GET_ID_FROM_FREELIST(); - rec_ring[id].id = (unsigned long) req; + blk_shadow[id].request = (unsigned long)req; ring_req->id = id; ring_req->operation = rq_data_dir(req) ? BLKIF_OP_WRITE : @@ -341,8 +327,12 @@ static int blkif_queue_request(struct re buffer_ma >> PAGE_SHIFT, rq_data_dir(req) ); + blk_shadow[id].frame[ring_req->nr_segments] = + buffer_ma >> PAGE_SHIFT; + ring_req->frame_and_sects[ring_req->nr_segments++] = (((u32) ref) << 16) | (fsect << 3) | lsect; + #else ring_req->frame_and_sects[ring_req->nr_segments++] = buffer_ma | (fsect << 3) | lsect; @@ -353,7 +343,7 @@ static int blkif_queue_request(struct re blk_ring.req_prod_pvt++; /* Keep a private copy so we can reissue requests when recovering. */ - translate_req_to_pfn(&rec_ring[id], ring_req); + pickle_request(&blk_shadow[id], ring_req); return 0; } @@ -372,8 +362,10 @@ void do_blkif_request(request_queue_t *r queued = 0; - while ((req = elv_next_request(rq)) != NULL) { - if (!blk_fs_request(req)) { + while ( (req = elv_next_request(rq)) != NULL ) + { + if ( !blk_fs_request(req) ) + { end_request(req, 0); continue; } @@ -383,19 +375,23 @@ void do_blkif_request(request_queue_t *r blk_stop_queue(rq); break; } + DPRINTK("do_blk_req %p: cmd %p, sec %lx, (%u/%li) buffer:%p [%s]\n", req, req->cmd, req->sector, req->current_nr_sectors, req->nr_sectors, req->buffer, rq_data_dir(req) ? "write" : "read"); + blkdev_dequeue_request(req); - if (blkif_queue_request(req)) { + if ( blkif_queue_request(req) ) + { blk_stop_queue(rq); break; } + queued++; } - if (queued != 0) + if ( queued != 0 ) flush_requests(); } @@ -424,11 +420,12 @@ static irqreturn_t blkif_int(int irq, vo unsigned long id; bret = RING_GET_RESPONSE(&blk_ring, i); - id = bret->id; - req = (struct request *)rec_ring[id].id; - blkif_completion( &rec_ring[id] ); + id = bret->id; + req = (struct request *)blk_shadow[id].request; - ADD_ID_TO_FREELIST(id); /* overwrites req */ + blkif_completion(&blk_shadow[id]); + + ADD_ID_TO_FREELIST(id); switch ( bret->operation ) { @@ -437,7 +434,7 @@ static irqreturn_t blkif_int(int irq, vo if ( unlikely(bret->status != BLKIF_RSP_OKAY) ) DPRINTK("Bad return from blkdev data request: %x\n", bret->status); - + if ( unlikely(end_that_request_first (req, (bret->status == BLKIF_RSP_OKAY), @@ -813,10 +810,9 @@ static int blkif_queue_request(unsigned blk_ring.req_prod_pvt - 1); bh = (struct buffer_head *)id; - bh->b_reqnext = (struct buffer_head *)rec_ring[req->id].id; - - rec_ring[req->id].id = id; - + bh->b_reqnext = (struct buffer_head *)blk_shadow[req->id].request; + blk_shadow[req->id].request = (unsigned long)id; + #ifdef CONFIG_XEN_BLKDEV_GRANT /* install a grant reference. */ ref = gnttab_claim_grant_reference(&gref_head, gref_terminal); @@ -828,6 +824,9 @@ static int blkif_queue_request(unsigned buffer_ma >> PAGE_SHIFT, ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); + blk_shadow[id].frame[req->nr_segments] = + buffer_ma >> PAGE_SHIFT; + req->frame_and_sects[req->nr_segments] = (((u32) ref ) << 16) | (fsect << 3) | lsect; #else @@ -840,7 +839,7 @@ static int blkif_queue_request(unsigned DISABLE_SCATTERGATHER(); /* Update the copy of the request in the recovery ring. */ - translate_req_to_pfn(&rec_ring[req->id], req ); + pickle_request(&blk_shadow[req->id], req ); return 0; } @@ -864,7 +863,7 @@ static int blkif_queue_request(unsigned req = RING_GET_REQUEST(&blk_ring, blk_ring.req_prod_pvt); xid = GET_ID_FROM_FREELIST(); - rec_ring[xid].id = id; + blk_shadow[xid].request = (unsigned long)id; req->id = xid; req->operation = operation; @@ -882,13 +881,15 @@ static int blkif_queue_request(unsigned buffer_ma >> PAGE_SHIFT, ( operation == BLKIF_OP_WRITE ? 1 : 0 ) ); + blk_shadow[xid].frame[0] = buffer_ma >> PAGE_SHIFT; + req->frame_and_sects[0] = (((u32) ref)<<16) | (fsect<<3) | lsect; #else req->frame_and_sects[0] = buffer_ma | (fsect<<3) | lsect; #endif /* Keep a private copy so we can reissue requests when recovering. */ - translate_req_to_pfn(&rec_ring[xid], req ); + pickle_request(&blk_shadow[xid], req); blk_ring.req_prod_pvt++; @@ -1002,9 +1003,9 @@ static void blkif_int(int irq, void *dev bret = RING_GET_RESPONSE(&blk_ring, i); id = bret->id; - bh = (struct buffer_head *)rec_ring[id].id; + bh = (struct buffer_head *)blk_shadow[id].request; - blkif_completion( &rec_ring[id] ); + blkif_completion(&blk_shadow[id]); ADD_ID_TO_FREELIST(id); @@ -1083,9 +1084,9 @@ void blkif_control_send(blkif_request_t id = GET_ID_FROM_FREELIST(); req_d->id = id; - rec_ring[id].id = (unsigned long) req; + blk_shadow[id].request = (unsigned long)req; - translate_req_to_pfn( &rec_ring[id], req ); + pickle_request(&blk_shadow[id], req); blk_ring.req_prod_pvt++; flush_requests(); @@ -1184,50 +1185,69 @@ static void blkif_recover(void) { int i; blkif_request_t *req; + struct blk_shadow *copy; +#ifdef CONFIG_XEN_BLKDEV_GRANT + int j; +#endif - /* Hmm, requests might be re-ordered when we re-issue them. - * This will need to be fixed once we have barriers */ + /* Stage 1: Make a safe copy of the shadow state. */ + copy = (struct blk_shadow *)kmalloc(sizeof(blk_shadow), GFP_KERNEL); + BUG_ON(copy == NULL); + memcpy(copy, blk_shadow, sizeof(blk_shadow)); - /* Stage 1 : Find active and move to safety. */ + /* Stage 2: Set up free list. */ + memset(&blk_shadow, 0, sizeof(blk_shadow)); + for ( i = 0; i < BLK_RING_SIZE; i++ ) + blk_shadow[i].req.id = i+1; + blk_shadow_free = blk_ring.req_prod_pvt; + blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; + + /* Stage 3: Find pending requests and requeue them. */ for ( i = 0; i < BLK_RING_SIZE; i++ ) { - if ( rec_ring[i].id >= PAGE_OFFSET ) + /* Not in use? */ + if ( copy[i].request == 0 ) + continue; + + /* Grab a request slot and unpickle shadow state into it. */ + req = RING_GET_REQUEST( + &blk_ring, blk_ring.req_prod_pvt); + unpickle_request(req, ©[i]); + + /* We get a new request id, and must reset the shadow state. */ + req->id = GET_ID_FROM_FREELIST(); + memcpy(&blk_shadow[req->id], ©[i], sizeof(copy[i])); + +#ifdef CONFIG_XEN_BLKDEV_GRANT + /* Rewrite any grant references invalidated by suspend/resume. */ + for ( j = 0; j < req->nr_segments; j++ ) { - req = RING_GET_REQUEST(&blk_ring, - blk_ring.req_prod_pvt); - translate_req_to_mfn(req, &rec_ring[i]); - blk_ring.req_prod_pvt++; + if ( req->frame_and_sects[j] & GRANTREF_INVALID ) + gnttab_grant_foreign_access_ref( + blkif_gref_from_fas(req->frame_and_sects[j]), + rdomid, + blk_shadow[req->id].frame[j], + rq_data_dir((struct request *) + blk_shadow[req->id].request)); + req->frame_and_sects[j] &= ~GRANTREF_INVALID; } + blk_shadow[req->id].req = *req; +#endif + + blk_ring.req_prod_pvt++; } - /* Stage 2 : Set up shadow list. */ - for ( i = 0; i < blk_ring.req_prod_pvt; i++ ) - { - req = RING_GET_REQUEST(&blk_ring, i); - rec_ring[i].id = req->id; - req->id = i; - translate_req_to_pfn(&rec_ring[i], req); - } + kfree(copy); - /* Stage 3 : Set up free list. */ - for ( ; i < BLK_RING_SIZE; i++ ) - rec_ring[i].id = i+1; - rec_ring_free = blk_ring.req_prod_pvt; - rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff; + recovery = 0; /* blk_ring->req_prod will be set when we flush_requests().*/ wmb(); - /* Switch off recovery mode, using a memory barrier to ensure that - * it's seen before we flush requests - we don't want to miss any - * interrupts. */ - recovery = 0; - wmb(); - /* Kicks things back into life. */ flush_requests(); - /* Now safe to left other peope use interface. */ + /* Now safe to left other people use the interface. */ blkif_state = BLKIF_STATE_CONNECTED; } @@ -1409,10 +1429,11 @@ int __init xlblk_init(void) printk(KERN_INFO "xen_blk: Initialising virtual block device driver\n"); - rec_ring_free = 0; + blk_shadow_free = 0; + memset(blk_shadow, 0, sizeof(blk_shadow)); for ( i = 0; i < BLK_RING_SIZE; i++ ) - rec_ring[i].id = i+1; - rec_ring[BLK_RING_SIZE-1].id = 0x0fffffff; + blk_shadow[i].req.id = i+1; + blk_shadow[BLK_RING_SIZE-1].req.id = 0x0fffffff; (void)ctrl_if_register_receiver(CMSG_BLKIF_FE, blkif_ctrlif_rx, CALLBACK_IN_BLOCKING_CONTEXT); @@ -1428,32 +1449,32 @@ void blkdev_suspend(void) void blkdev_resume(void) { +#ifdef CONFIG_XEN_BLKDEV_GRANT + int i, j; + for ( i = 0; i < BLK_RING_SIZE; i++ ) + for ( j = 0; j < BLKIF_MAX_SEGMENTS_PER_REQUEST; j++ ) + blk_shadow[i].req.frame_and_sects[j] |= GRANTREF_INVALID; +#endif send_driver_status(1); } -void blkif_completion(blkif_request_t *req) +static void blkif_completion(struct blk_shadow *s) { int i; #ifdef CONFIG_XEN_BLKDEV_GRANT - grant_ref_t gref; - - for ( i = 0; i < req->nr_segments; i++ ) - { - gref = blkif_gref_from_fas(req->frame_and_sects[i]); - gnttab_release_grant_reference(&gref_head, gref); - } + for ( i = 0; i < s->req.nr_segments; i++ ) + gnttab_release_grant_reference( + &gref_head, blkif_gref_from_fas(s->req.frame_and_sects[i])); #else /* This is a hack to get the dirty logging bits set */ - switch ( req->operation ) + if ( s->req.operation == BLKIF_OP_READ ) { - case BLKIF_OP_READ: - for ( i = 0; i < req->nr_segments; i++ ) + for ( i = 0; i < s->req.nr_segments; i++ ) { - unsigned long pfn = req->frame_and_sects[i] >> PAGE_SHIFT; + unsigned long pfn = s->req.frame_and_sects[i] >> PAGE_SHIFT; unsigned long mfn = phys_to_machine_mapping[pfn]; xen_machphys_update(mfn, pfn); } - break; } #endif }