debuggers.hg

view linux-2.6.8.1-xen-sparse/drivers/xen/netfront/netfront.c @ 2615:4ecd18756ef8

bitkeeper revision 1.1159.1.192 (415c7082v4eKZkH-NXLbAR9bU6B8hg)

Merge ssh://srg//auto/groups/xeno/BK/xeno-unstable.bk
into equilibrium.research:/export/scratch/xeno-blkscripts.bk
author mwilli2@equilibrium.research
date Thu Sep 30 20:45:54 2004 +0000 (2004-09-30)
parents b68e751add47 e9d6f07d0e52
children 8a3a77314cb5 ff4e7a241335
line source
1 /******************************************************************************
2 * Virtual network driver for conversing with remote driver backends.
3 *
4 * Copyright (c) 2002-2004, K A Fraser
5 */
7 #include <linux/config.h>
8 #include <linux/module.h>
9 #include <linux/version.h>
10 #include <linux/kernel.h>
11 #include <linux/sched.h>
12 #include <linux/slab.h>
13 #include <linux/string.h>
14 #include <linux/errno.h>
15 #include <linux/netdevice.h>
16 #include <linux/inetdevice.h>
17 #include <linux/etherdevice.h>
18 #include <linux/skbuff.h>
19 #include <linux/init.h>
20 #include <linux/bitops.h>
21 #include <net/sock.h>
22 #include <net/pkt_sched.h>
23 #include <asm/io.h>
24 #include <asm-xen/evtchn.h>
25 #include <asm-xen/ctrl_if.h>
26 #include <asm-xen/hypervisor-ifs/io/netif.h>
27 #include <asm/page.h>
29 #include <net/arp.h>
30 #include <net/route.h>
32 #define DEBUG 0
34 #ifndef __GFP_NOWARN
35 #define __GFP_NOWARN 0
36 #endif
37 #define alloc_xen_skb(_l) __dev_alloc_skb((_l), GFP_ATOMIC|__GFP_NOWARN)
39 #define init_skb_shinfo(_skb) \
40 do { \
41 atomic_set(&(skb_shinfo(_skb)->dataref), 1); \
42 skb_shinfo(_skb)->nr_frags = 0; \
43 skb_shinfo(_skb)->frag_list = NULL; \
44 } while ( 0 )
46 /* Allow headroom on each rx pkt for Ethernet header, alignment padding, ... */
47 #define RX_HEADROOM 200
49 /*
50 * If the backend driver is pipelining transmit requests then we can be very
51 * aggressive in avoiding new-packet notifications -- only need to send a
52 * notification if there are no outstanding unreceived responses.
53 * If the backend may be buffering our transmit buffers for any reason then we
54 * are rather more conservative.
55 */
56 #ifdef CONFIG_XEN_NETDEV_FRONTEND_PIPELINED_TRANSMITTER
57 #define TX_TEST_IDX resp_prod /* aggressive: any outstanding responses? */
58 #else
59 #define TX_TEST_IDX req_cons /* conservative: not seen all our requests? */
60 #endif
62 static void network_tx_buf_gc(struct net_device *dev);
63 static void network_alloc_rx_buffers(struct net_device *dev);
65 static unsigned long rx_pfn_array[NETIF_RX_RING_SIZE];
66 static multicall_entry_t rx_mcl[NETIF_RX_RING_SIZE+1];
67 static mmu_update_t rx_mmu[NETIF_RX_RING_SIZE];
69 static struct list_head dev_list;
71 struct net_private
72 {
73 struct list_head list;
74 struct net_device *dev;
76 struct net_device_stats stats;
77 NETIF_RING_IDX rx_resp_cons, tx_resp_cons;
78 unsigned int tx_full;
80 netif_tx_interface_t *tx;
81 netif_rx_interface_t *rx;
83 spinlock_t tx_lock;
84 spinlock_t rx_lock;
86 unsigned int handle;
87 unsigned int evtchn;
88 unsigned int irq;
90 /* What is the status of our connection to the remote backend? */
91 #define BEST_CLOSED 0
92 #define BEST_DISCONNECTED 1
93 #define BEST_CONNECTED 2
94 unsigned int backend_state;
96 /* Is this interface open or closed (down or up)? */
97 #define UST_CLOSED 0
98 #define UST_OPEN 1
99 unsigned int user_state;
101 /* Receive-ring batched refills. */
102 #define RX_MIN_TARGET 8
103 #define RX_MAX_TARGET NETIF_RX_RING_SIZE
104 int rx_target;
105 struct sk_buff_head rx_batch;
107 /*
108 * {tx,rx}_skbs store outstanding skbuffs. The first entry in each
109 * array is an index into a chain of free entries.
110 */
111 struct sk_buff *tx_skbs[NETIF_TX_RING_SIZE+1];
112 struct sk_buff *rx_skbs[NETIF_RX_RING_SIZE+1];
113 };
115 /* Access macros for acquiring freeing slots in {tx,rx}_skbs[]. */
116 #define ADD_ID_TO_FREELIST(_list, _id) \
117 (_list)[(_id)] = (_list)[0]; \
118 (_list)[0] = (void *)(unsigned long)(_id);
119 #define GET_ID_FROM_FREELIST(_list) \
120 ({ unsigned long _id = (unsigned long)(_list)[0]; \
121 (_list)[0] = (_list)[_id]; \
122 (unsigned short)_id; })
124 static char *status_name[] = {
125 [NETIF_INTERFACE_STATUS_CLOSED] = "closed",
126 [NETIF_INTERFACE_STATUS_DISCONNECTED] = "disconnected",
127 [NETIF_INTERFACE_STATUS_CONNECTED] = "connected",
128 [NETIF_INTERFACE_STATUS_CHANGED] = "changed",
129 };
131 static char *be_state_name[] = {
132 [BEST_CLOSED] = "closed",
133 [BEST_DISCONNECTED] = "disconnected",
134 [BEST_CONNECTED] = "connected",
135 };
137 #if DEBUG
138 #define DPRINTK(fmt, args...) \
139 printk(KERN_ALERT "[XEN] (%s:%d) " fmt, __FUNCTION__, __LINE__, ##args)
140 #else
141 #define DPRINTK(fmt, args...) ((void)0)
142 #endif
143 #define IPRINTK(fmt, args...) \
144 printk(KERN_INFO "[XEN] " fmt, ##args)
145 #define WPRINTK(fmt, args...) \
146 printk(KERN_WARNING "[XEN] " fmt, ##args)
148 static struct net_device *find_dev_by_handle(unsigned int handle)
149 {
150 struct list_head *ent;
151 struct net_private *np;
152 list_for_each ( ent, &dev_list )
153 {
154 np = list_entry(ent, struct net_private, list);
155 if ( np->handle == handle )
156 return np->dev;
157 }
158 return NULL;
159 }
161 /** Network interface info. */
162 struct netif_ctrl {
163 /** Number of interfaces. */
164 int interface_n;
165 /** Number of connected interfaces. */
166 int connected_n;
167 /** Error code. */
168 int err;
169 int up;
170 };
172 static struct netif_ctrl netctrl;
174 static void netctrl_init(void)
175 {
176 memset(&netctrl, 0, sizeof(netctrl));
177 netctrl.up = NETIF_DRIVER_STATUS_DOWN;
178 }
180 /** Get or set a network interface error.
181 */
182 static int netctrl_err(int err)
183 {
184 if ( (err < 0) && !netctrl.err )
185 netctrl.err = err;
186 return netctrl.err;
187 }
189 /** Test if all network interfaces are connected.
190 *
191 * @return 1 if all connected, 0 if not, negative error code otherwise
192 */
193 static int netctrl_connected(void)
194 {
195 int ok;
197 if ( netctrl.err )
198 ok = netctrl.err;
199 else if ( netctrl.up == NETIF_DRIVER_STATUS_UP )
200 ok = (netctrl.connected_n == netctrl.interface_n);
201 else
202 ok = 0;
204 return ok;
205 }
207 /** Count the connected network interfaces.
208 *
209 * @return connected count
210 */
211 static int netctrl_connected_count(void)
212 {
214 struct list_head *ent;
215 struct net_private *np;
216 unsigned int connected;
218 connected = 0;
220 list_for_each(ent, &dev_list) {
221 np = list_entry(ent, struct net_private, list);
222 if (np->backend_state == BEST_CONNECTED)
223 connected++;
224 }
226 netctrl.connected_n = connected;
227 DPRINTK("> connected_n=%d interface_n=%d\n",
228 netctrl.connected_n, netctrl.interface_n);
229 return connected;
230 }
232 /** Send a packet on a net device to encourage switches to learn the
233 * MAC. We send a fake ARP request.
234 *
235 * @param dev device
236 * @return 0 on success, error code otherwise
237 */
238 static int vif_wake(struct net_device *dev)
239 {
240 struct sk_buff *skb;
241 u32 src_ip, dst_ip;
243 dst_ip = INADDR_BROADCAST;
244 src_ip = inet_select_addr(dev, dst_ip, RT_SCOPE_LINK);
246 skb = arp_create(ARPOP_REPLY, ETH_P_ARP,
247 dst_ip, dev, src_ip,
248 /*dst_hw*/ NULL, /*src_hw*/ NULL,
249 /*target_hw*/ dev->dev_addr);
250 if ( skb == NULL )
251 return -ENOMEM;
253 return dev_queue_xmit(skb);
254 }
256 static int network_open(struct net_device *dev)
257 {
258 struct net_private *np = dev->priv;
260 memset(&np->stats, 0, sizeof(np->stats));
262 np->user_state = UST_OPEN;
264 network_alloc_rx_buffers(dev);
265 np->rx->event = np->rx_resp_cons + 1;
267 netif_start_queue(dev);
269 return 0;
270 }
272 static void network_tx_buf_gc(struct net_device *dev)
273 {
274 NETIF_RING_IDX i, prod;
275 unsigned short id;
276 struct net_private *np = dev->priv;
277 struct sk_buff *skb;
279 if ( np->backend_state != BEST_CONNECTED )
280 return;
282 do {
283 prod = np->tx->resp_prod;
284 rmb(); /* Ensure we see responses up to 'rp'. */
286 for ( i = np->tx_resp_cons; i != prod; i++ )
287 {
288 id = np->tx->ring[MASK_NETIF_TX_IDX(i)].resp.id;
289 skb = np->tx_skbs[id];
290 ADD_ID_TO_FREELIST(np->tx_skbs, id);
291 dev_kfree_skb_irq(skb);
292 }
294 np->tx_resp_cons = prod;
296 /*
297 * Set a new event, then check for race with update of tx_cons. Note
298 * that it is essential to schedule a callback, no matter how few
299 * buffers are pending. Even if there is space in the transmit ring,
300 * higher layers may be blocked because too much data is outstanding:
301 * in such cases notification from Xen is likely to be the only kick
302 * that we'll get.
303 */
304 np->tx->event =
305 prod + ((np->tx->req_prod - prod) >> 1) + 1;
306 mb();
307 }
308 while ( prod != np->tx->resp_prod );
310 if ( np->tx_full &&
311 ((np->tx->req_prod - prod) < NETIF_TX_RING_SIZE) )
312 {
313 np->tx_full = 0;
314 if ( np->user_state == UST_OPEN )
315 netif_wake_queue(dev);
316 }
317 }
320 static void network_alloc_rx_buffers(struct net_device *dev)
321 {
322 unsigned short id;
323 struct net_private *np = dev->priv;
324 struct sk_buff *skb;
325 int i, batch_target;
326 NETIF_RING_IDX req_prod = np->rx->req_prod;
328 if ( unlikely(np->backend_state != BEST_CONNECTED) )
329 return;
331 /*
332 * Allocate skbuffs greedily, even though we batch updates to the
333 * receive ring. This creates a less bursty demand on the memory allocator,
334 * so should reduce the chance of failed allocation requests both for
335 * ourself and for other kernel subsystems.
336 */
337 batch_target = np->rx_target - (req_prod - np->rx_resp_cons);
338 for ( i = skb_queue_len(&np->rx_batch); i < batch_target; i++ )
339 {
340 if ( unlikely((skb = alloc_xen_skb(dev->mtu + RX_HEADROOM)) == NULL) )
341 break;
342 __skb_queue_tail(&np->rx_batch, skb);
343 }
345 /* Is the batch large enough to be worthwhile? */
346 if ( i < (np->rx_target/2) )
347 return;
349 for ( i = 0; ; i++ )
350 {
351 if ( (skb = __skb_dequeue(&np->rx_batch)) == NULL )
352 break;
354 skb->dev = dev;
356 id = GET_ID_FROM_FREELIST(np->rx_skbs);
358 np->rx_skbs[id] = skb;
360 np->rx->ring[MASK_NETIF_RX_IDX(req_prod + i)].req.id = id;
362 rx_pfn_array[i] = virt_to_machine(skb->head) >> PAGE_SHIFT;
364 /* Remove this page from pseudo phys map before passing back to Xen. */
365 phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT]
366 = INVALID_P2M_ENTRY;
368 rx_mcl[i].op = __HYPERVISOR_update_va_mapping;
369 rx_mcl[i].args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
370 rx_mcl[i].args[1] = 0;
371 rx_mcl[i].args[2] = 0;
372 }
374 /*
375 * We may have allocated buffers which have entries outstanding in the page
376 * update queue -- make sure we flush those first!
377 */
378 flush_page_update_queue();
380 /* After all PTEs have been zapped we blow away stale TLB entries. */
381 rx_mcl[i-1].args[2] = UVMF_FLUSH_TLB;
383 /* Give away a batch of pages. */
384 rx_mcl[i].op = __HYPERVISOR_dom_mem_op;
385 rx_mcl[i].args[0] = MEMOP_decrease_reservation;
386 rx_mcl[i].args[1] = (unsigned long)rx_pfn_array;
387 rx_mcl[i].args[2] = (unsigned long)i;
388 rx_mcl[i].args[3] = 0;
389 rx_mcl[i].args[4] = DOMID_SELF;
391 /* Zap PTEs and give away pages in one big multicall. */
392 (void)HYPERVISOR_multicall(rx_mcl, i+1);
394 /* Check return status of HYPERVISOR_dom_mem_op(). */
395 if ( unlikely(rx_mcl[i].args[5] != i) )
396 panic("Unable to reduce memory reservation\n");
398 /* Above is a suitable barrier to ensure backend will see requests. */
399 np->rx->req_prod = req_prod + i;
401 /* Adjust our floating fill target if we risked running out of buffers. */
402 if ( ((req_prod - np->rx->resp_prod) < (np->rx_target / 4)) &&
403 ((np->rx_target *= 2) > RX_MAX_TARGET) )
404 np->rx_target = RX_MAX_TARGET;
405 }
408 static int network_start_xmit(struct sk_buff *skb, struct net_device *dev)
409 {
410 unsigned short id;
411 struct net_private *np = (struct net_private *)dev->priv;
412 netif_tx_request_t *tx;
413 NETIF_RING_IDX i;
415 if ( unlikely(np->tx_full) )
416 {
417 printk(KERN_ALERT "%s: full queue wasn't stopped!\n", dev->name);
418 netif_stop_queue(dev);
419 goto drop;
420 }
422 if ( unlikely((((unsigned long)skb->data & ~PAGE_MASK) + skb->len) >=
423 PAGE_SIZE) )
424 {
425 struct sk_buff *nskb;
426 if ( unlikely((nskb = alloc_xen_skb(skb->len)) == NULL) )
427 goto drop;
428 skb_put(nskb, skb->len);
429 memcpy(nskb->data, skb->data, skb->len);
430 nskb->dev = skb->dev;
431 dev_kfree_skb(skb);
432 skb = nskb;
433 }
435 spin_lock_irq(&np->tx_lock);
437 if ( np->backend_state != BEST_CONNECTED )
438 {
439 spin_unlock_irq(&np->tx_lock);
440 goto drop;
441 }
443 i = np->tx->req_prod;
445 id = GET_ID_FROM_FREELIST(np->tx_skbs);
446 np->tx_skbs[id] = skb;
448 tx = &np->tx->ring[MASK_NETIF_TX_IDX(i)].req;
450 tx->id = id;
451 tx->addr = virt_to_machine(skb->data);
452 tx->size = skb->len;
454 wmb(); /* Ensure that backend will see the request. */
455 np->tx->req_prod = i + 1;
457 network_tx_buf_gc(dev);
459 if ( (i - np->tx_resp_cons) == (NETIF_TX_RING_SIZE - 1) )
460 {
461 np->tx_full = 1;
462 netif_stop_queue(dev);
463 }
465 spin_unlock_irq(&np->tx_lock);
467 np->stats.tx_bytes += skb->len;
468 np->stats.tx_packets++;
470 /* Only notify Xen if we really have to. */
471 mb();
472 if ( np->tx->TX_TEST_IDX == i )
473 notify_via_evtchn(np->evtchn);
475 return 0;
477 drop:
478 np->stats.tx_dropped++;
479 dev_kfree_skb(skb);
480 return 0;
481 }
484 static irqreturn_t netif_int(int irq, void *dev_id, struct pt_regs *ptregs)
485 {
486 struct net_device *dev = dev_id;
487 struct net_private *np = dev->priv;
488 unsigned long flags;
490 spin_lock_irqsave(&np->tx_lock, flags);
491 network_tx_buf_gc(dev);
492 spin_unlock_irqrestore(&np->tx_lock, flags);
494 if ( (np->rx_resp_cons != np->rx->resp_prod) &&
495 (np->user_state == UST_OPEN) )
496 netif_rx_schedule(dev);
498 return IRQ_HANDLED;
499 }
502 static int netif_poll(struct net_device *dev, int *pbudget)
503 {
504 struct net_private *np = dev->priv;
505 struct sk_buff *skb, *nskb;
506 netif_rx_response_t *rx;
507 NETIF_RING_IDX i, rp;
508 mmu_update_t *mmu = rx_mmu;
509 multicall_entry_t *mcl = rx_mcl;
510 int work_done, budget, more_to_do = 1;
511 struct sk_buff_head rxq;
512 unsigned long flags;
514 spin_lock(&np->rx_lock);
516 if ( np->backend_state != BEST_CONNECTED )
517 {
518 spin_unlock(&np->rx_lock);
519 return 0;
520 }
522 skb_queue_head_init(&rxq);
524 if ( (budget = *pbudget) > dev->quota )
525 budget = dev->quota;
527 rp = np->rx->resp_prod;
528 rmb(); /* Ensure we see queued responses up to 'rp'. */
530 for ( i = np->rx_resp_cons, work_done = 0;
531 (i != rp) && (work_done < budget);
532 i++, work_done++ )
533 {
534 rx = &np->rx->ring[MASK_NETIF_RX_IDX(i)].resp;
536 /*
537 * An error here is very odd. Usually indicates a backend bug,
538 * low-memory condition, or that we didn't have reservation headroom.
539 * Whatever - print an error and queue the id again straight away.
540 */
541 if ( unlikely(rx->status <= 0) )
542 {
543 printk(KERN_ALERT "bad buffer on RX ring!(%d)\n", rx->status);
544 np->rx->ring[MASK_NETIF_RX_IDX(np->rx->req_prod)].req.id = rx->id;
545 wmb();
546 np->rx->req_prod++;
547 continue;
548 }
550 skb = np->rx_skbs[rx->id];
551 ADD_ID_TO_FREELIST(np->rx_skbs, rx->id);
553 /* NB. We handle skb overflow later. */
554 skb->data = skb->head + (rx->addr & ~PAGE_MASK);
555 skb->len = rx->status;
556 skb->tail = skb->data + skb->len;
558 np->stats.rx_packets++;
559 np->stats.rx_bytes += rx->status;
561 /* Remap the page. */
562 mmu->ptr = (rx->addr & PAGE_MASK) | MMU_MACHPHYS_UPDATE;
563 mmu->val = __pa(skb->head) >> PAGE_SHIFT;
564 mmu++;
565 mcl->op = __HYPERVISOR_update_va_mapping;
566 mcl->args[0] = (unsigned long)skb->head >> PAGE_SHIFT;
567 mcl->args[1] = (rx->addr & PAGE_MASK) | __PAGE_KERNEL;
568 mcl->args[2] = 0;
569 mcl++;
571 phys_to_machine_mapping[__pa(skb->head) >> PAGE_SHIFT] =
572 rx->addr >> PAGE_SHIFT;
574 __skb_queue_tail(&rxq, skb);
575 }
577 /* Do all the remapping work, and M->P updates, in one big hypercall. */
578 if ( likely((mcl - rx_mcl) != 0) )
579 {
580 mcl->op = __HYPERVISOR_mmu_update;
581 mcl->args[0] = (unsigned long)rx_mmu;
582 mcl->args[1] = mmu - rx_mmu;
583 mcl->args[2] = 0;
584 mcl++;
585 (void)HYPERVISOR_multicall(rx_mcl, mcl - rx_mcl);
586 }
588 while ( (skb = __skb_dequeue(&rxq)) != NULL )
589 {
590 /*
591 * Enough room in skbuff for the data we were passed? Also, Linux
592 * expects at least 16 bytes headroom in each receive buffer.
593 */
594 if ( unlikely(skb->tail > skb->end) ||
595 unlikely((skb->data - skb->head) < 16) )
596 {
597 nskb = NULL;
599 /* Only copy the packet if it fits in the current MTU. */
600 if ( skb->len <= (dev->mtu + ETH_HLEN) )
601 {
602 if ( (skb->tail > skb->end) && net_ratelimit() )
603 printk(KERN_INFO "Received packet needs %d bytes more "
604 "headroom.\n", skb->tail - skb->end);
606 if ( (nskb = alloc_xen_skb(skb->len + 2)) != NULL )
607 {
608 skb_reserve(nskb, 2);
609 skb_put(nskb, skb->len);
610 memcpy(nskb->data, skb->data, skb->len);
611 nskb->dev = skb->dev;
612 }
613 }
614 else if ( net_ratelimit() )
615 printk(KERN_INFO "Received packet too big for MTU "
616 "(%d > %d)\n", skb->len - ETH_HLEN, dev->mtu);
618 /* Reinitialise and then destroy the old skbuff. */
619 skb->len = 0;
620 skb->tail = skb->data;
621 init_skb_shinfo(skb);
622 dev_kfree_skb(skb);
624 /* Switch old for new, if we copied the buffer. */
625 if ( (skb = nskb) == NULL )
626 continue;
627 }
629 /* Set the shared-info area, which is hidden behind the real data. */
630 init_skb_shinfo(skb);
632 /* Ethernet-specific work. Delayed to here as it peeks the header. */
633 skb->protocol = eth_type_trans(skb, dev);
635 /* Pass it up. */
636 netif_receive_skb(skb);
637 dev->last_rx = jiffies;
638 }
640 np->rx_resp_cons = i;
642 /* If we get a callback with very few responses, reduce fill target. */
643 /* NB. Note exponential increase, linear decrease. */
644 if ( ((np->rx->req_prod - np->rx->resp_prod) > ((3*np->rx_target) / 4)) &&
645 (--np->rx_target < RX_MIN_TARGET) )
646 np->rx_target = RX_MIN_TARGET;
648 network_alloc_rx_buffers(dev);
650 *pbudget -= work_done;
651 dev->quota -= work_done;
653 if ( work_done < budget )
654 {
655 local_irq_save(flags);
657 np->rx->event = i + 1;
659 /* Deal with hypervisor racing our resetting of rx_event. */
660 mb();
661 if ( np->rx->resp_prod == i )
662 {
663 __netif_rx_complete(dev);
664 more_to_do = 0;
665 }
667 local_irq_restore(flags);
668 }
670 spin_unlock(&np->rx_lock);
672 return more_to_do;
673 }
676 static int network_close(struct net_device *dev)
677 {
678 struct net_private *np = dev->priv;
679 np->user_state = UST_CLOSED;
680 netif_stop_queue(np->dev);
681 return 0;
682 }
685 static struct net_device_stats *network_get_stats(struct net_device *dev)
686 {
687 struct net_private *np = (struct net_private *)dev->priv;
688 return &np->stats;
689 }
692 static void network_connect(struct net_device *dev,
693 netif_fe_interface_status_t *status)
694 {
695 struct net_private *np;
696 int i, requeue_idx;
697 netif_tx_request_t *tx;
699 np = dev->priv;
700 spin_lock_irq(&np->tx_lock);
701 spin_lock(&np->rx_lock);
703 /* Recovery procedure: */
705 /* Step 1: Reinitialise variables. */
706 np->rx_resp_cons = np->tx_resp_cons = np->tx_full = 0;
707 np->rx->event = np->tx->event = 1;
709 /* Step 2: Rebuild the RX and TX ring contents.
710 * NB. We could just free the queued TX packets now but we hope
711 * that sending them out might do some good. We have to rebuild
712 * the RX ring because some of our pages are currently flipped out
713 * so we can't just free the RX skbs.
714 * NB2. Freelist index entries are always going to be less than
715 * __PAGE_OFFSET, whereas pointers to skbs will always be equal or
716 * greater than __PAGE_OFFSET: we use this property to distinguish
717 * them.
718 */
720 /* Rebuild the TX buffer freelist and the TX ring itself.
721 * NB. This reorders packets. We could keep more private state
722 * to avoid this but maybe it doesn't matter so much given the
723 * interface has been down.
724 */
725 for ( requeue_idx = 0, i = 1; i <= NETIF_TX_RING_SIZE; i++ )
726 {
727 if ( (unsigned long)np->tx_skbs[i] >= __PAGE_OFFSET )
728 {
729 struct sk_buff *skb = np->tx_skbs[i];
731 tx = &np->tx->ring[requeue_idx++].req;
733 tx->id = i;
734 tx->addr = virt_to_machine(skb->data);
735 tx->size = skb->len;
737 np->stats.tx_bytes += skb->len;
738 np->stats.tx_packets++;
739 }
740 }
741 wmb();
742 np->tx->req_prod = requeue_idx;
744 /* Rebuild the RX buffer freelist and the RX ring itself. */
745 for ( requeue_idx = 0, i = 1; i <= NETIF_RX_RING_SIZE; i++ )
746 if ( (unsigned long)np->rx_skbs[i] >= __PAGE_OFFSET )
747 np->rx->ring[requeue_idx++].req.id = i;
748 wmb();
749 np->rx->req_prod = requeue_idx;
751 printk(KERN_ALERT "[XEN] Netfront recovered tx=%d rxfree=%d\n",
752 np->tx->req_prod,np->rx->req_prod);
755 /* Step 3: All public and private state should now be sane. Get
756 * ready to start sending and receiving packets and give the driver
757 * domain a kick because we've probably just requeued some
758 * packets.
759 */
760 np->backend_state = BEST_CONNECTED;
761 wmb();
762 notify_via_evtchn(status->evtchn);
763 network_tx_buf_gc(dev);
765 if ( np->user_state == UST_OPEN )
766 netif_start_queue(dev);
768 spin_unlock(&np->rx_lock);
769 spin_unlock_irq(&np->tx_lock);
770 }
772 static void vif_show(struct net_private *np)
773 {
774 #if DEBUG
775 if (np) {
776 IPRINTK("<vif handle=%u %s(%s) evtchn=%u irq=%u tx=%p rx=%p>\n",
777 np->handle,
778 be_state_name[np->backend_state],
779 np->user_state ? "open" : "closed",
780 np->evtchn,
781 np->irq,
782 np->tx,
783 np->rx);
784 } else {
785 IPRINTK("<vif NULL>\n");
786 }
787 #endif
788 }
790 /* Send a connect message to xend to tell it to bring up the interface. */
791 static void send_interface_connect(struct net_private *np)
792 {
793 ctrl_msg_t cmsg = {
794 .type = CMSG_NETIF_FE,
795 .subtype = CMSG_NETIF_FE_INTERFACE_CONNECT,
796 .length = sizeof(netif_fe_interface_connect_t),
797 };
798 netif_fe_interface_connect_t *msg = (void*)cmsg.msg;
800 DPRINTK(">\n"); vif_show(np);
801 msg->handle = np->handle;
802 msg->tx_shmem_frame = (virt_to_machine(np->tx) >> PAGE_SHIFT);
803 msg->rx_shmem_frame = (virt_to_machine(np->rx) >> PAGE_SHIFT);
805 ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
806 DPRINTK("<\n");
807 }
809 /* Send a driver status notification to the domain controller. */
810 static int send_driver_status(int ok)
811 {
812 int err = 0;
813 ctrl_msg_t cmsg = {
814 .type = CMSG_NETIF_FE,
815 .subtype = CMSG_NETIF_FE_DRIVER_STATUS,
816 .length = sizeof(netif_fe_driver_status_t),
817 };
818 netif_fe_driver_status_t *msg = (void*)cmsg.msg;
820 msg->status = (ok ? NETIF_DRIVER_STATUS_UP : NETIF_DRIVER_STATUS_DOWN);
821 err = ctrl_if_send_message_block(&cmsg, NULL, 0, TASK_UNINTERRUPTIBLE);
822 return err;
823 }
825 /* Stop network device and free tx/rx queues and irq.
826 */
827 static void vif_release(struct net_private *np)
828 {
829 /* Stop old i/f to prevent errors whilst we rebuild the state. */
830 spin_lock_irq(&np->tx_lock);
831 spin_lock(&np->rx_lock);
832 netif_stop_queue(np->dev);
833 /* np->backend_state = BEST_DISCONNECTED; */
834 spin_unlock(&np->rx_lock);
835 spin_unlock_irq(&np->tx_lock);
837 /* Free resources. */
838 if(np->tx != NULL){
839 free_irq(np->irq, np->dev);
840 unbind_evtchn_from_irq(np->evtchn);
841 free_page((unsigned long)np->tx);
842 free_page((unsigned long)np->rx);
843 np->irq = 0;
844 np->evtchn = 0;
845 np->tx = NULL;
846 np->rx = NULL;
847 }
848 }
850 /* Release vif resources and close it down completely.
851 */
852 static void vif_close(struct net_private *np)
853 {
854 DPRINTK(">\n"); vif_show(np);
855 WPRINTK("Unexpected netif-CLOSED message in state %s\n",
856 be_state_name[np->backend_state]);
857 vif_release(np);
858 np->backend_state = BEST_CLOSED;
859 /* todo: take dev down and free. */
860 vif_show(np); DPRINTK("<\n");
861 }
863 /* Move the vif into disconnected state.
864 * Allocates tx/rx pages.
865 * Sends connect message to xend.
866 */
867 static void vif_disconnect(struct net_private *np){
868 DPRINTK(">\n");
869 if(np->tx) free_page((unsigned long)np->tx);
870 if(np->rx) free_page((unsigned long)np->rx);
871 // Before this np->tx and np->rx had better be null.
872 np->tx = (netif_tx_interface_t *)__get_free_page(GFP_KERNEL);
873 np->rx = (netif_rx_interface_t *)__get_free_page(GFP_KERNEL);
874 memset(np->tx, 0, PAGE_SIZE);
875 memset(np->rx, 0, PAGE_SIZE);
876 np->backend_state = BEST_DISCONNECTED;
877 send_interface_connect(np);
878 vif_show(np); DPRINTK("<\n");
879 }
881 /* Begin interface recovery.
882 *
883 * NB. Whilst we're recovering, we turn the carrier state off. We
884 * take measures to ensure that this device isn't used for
885 * anything. We also stop the queue for this device. Various
886 * different approaches (e.g. continuing to buffer packets) have
887 * been tested but don't appear to improve the overall impact on
888 * TCP connections.
889 *
890 * TODO: (MAW) Change the Xend<->Guest protocol so that a recovery
891 * is initiated by a special "RESET" message - disconnect could
892 * just mean we're not allowed to use this interface any more.
893 */
894 static void
895 vif_reset(
896 struct net_private *np)
897 {
898 DPRINTK(">\n");
899 IPRINTK("Attempting to reconnect network interface: handle=%u\n",
900 np->handle);
901 vif_release(np);
902 vif_disconnect(np);
903 vif_show(np); DPRINTK("<\n");
904 }
906 /* Move the vif into connected state.
907 * Sets the mac and event channel from the message.
908 * Binds the irq to the event channel.
909 */
910 static void
911 vif_connect(
912 struct net_private *np, netif_fe_interface_status_t *status)
913 {
914 struct net_device *dev = np->dev;
915 DPRINTK(">\n");
916 memcpy(dev->dev_addr, status->mac, ETH_ALEN);
917 network_connect(dev, status);
918 np->evtchn = status->evtchn;
919 np->irq = bind_evtchn_to_irq(np->evtchn);
920 (void)request_irq(np->irq, netif_int, SA_SAMPLE_RANDOM, dev->name, dev);
921 netctrl_connected_count();
922 vif_wake(dev);
923 vif_show(np); DPRINTK("<\n");
924 }
927 /** Create a network device.
928 * @param handle device handle
929 * @param val return parameter for created device
930 * @return 0 on success, error code otherwise
931 */
932 static int create_netdev(int handle, struct net_device **val)
933 {
934 int i, err = 0;
935 struct net_device *dev = NULL;
936 struct net_private *np = NULL;
938 if ( (dev = alloc_etherdev(sizeof(struct net_private))) == NULL )
939 {
940 printk(KERN_WARNING "%s> alloc_etherdev failed.\n", __FUNCTION__);
941 err = -ENOMEM;
942 goto exit;
943 }
945 np = dev->priv;
946 np->backend_state = BEST_CLOSED;
947 np->user_state = UST_CLOSED;
948 np->handle = handle;
950 spin_lock_init(&np->tx_lock);
951 spin_lock_init(&np->rx_lock);
953 skb_queue_head_init(&np->rx_batch);
954 np->rx_target = RX_MIN_TARGET;
956 /* Initialise {tx,rx}_skbs to be a free chain containing every entry. */
957 for ( i = 0; i <= NETIF_TX_RING_SIZE; i++ )
958 np->tx_skbs[i] = (void *)(i+1);
959 for ( i = 0; i <= NETIF_RX_RING_SIZE; i++ )
960 np->rx_skbs[i] = (void *)(i+1);
962 dev->open = network_open;
963 dev->hard_start_xmit = network_start_xmit;
964 dev->stop = network_close;
965 dev->get_stats = network_get_stats;
966 dev->poll = netif_poll;
967 dev->weight = 64;
969 if ( (err = register_netdev(dev)) != 0 )
970 {
971 printk(KERN_WARNING "%s> register_netdev err=%d\n", __FUNCTION__, err);
972 goto exit;
973 }
974 np->dev = dev;
975 list_add(&np->list, &dev_list);
977 exit:
978 if ( (err != 0) && (dev != NULL ) )
979 kfree(dev);
980 else if ( val != NULL )
981 *val = dev;
982 return err;
983 }
985 /* Get the target interface for a status message.
986 * Creates the interface when it makes sense.
987 * The returned interface may be null when there is no error.
988 *
989 * @param status status message
990 * @param np return parameter for interface state
991 * @return 0 on success, error code otherwise
992 */
993 static int
994 target_vif(
995 netif_fe_interface_status_t *status, struct net_private **np)
996 {
997 int err = 0;
998 struct net_device *dev;
1000 DPRINTK("> handle=%d\n", status->handle);
1001 if ( status->handle < 0 )
1003 err = -EINVAL;
1004 goto exit;
1007 if ( (dev = find_dev_by_handle(status->handle)) != NULL )
1008 goto exit;
1010 if ( status->status == NETIF_INTERFACE_STATUS_CLOSED )
1011 goto exit;
1012 if ( status->status == NETIF_INTERFACE_STATUS_CHANGED )
1013 goto exit;
1015 /* It's a new interface in a good state - create it. */
1016 DPRINTK("> create device...\n");
1017 if ( (err = create_netdev(status->handle, &dev)) != 0 )
1018 goto exit;
1020 netctrl.interface_n++;
1022 exit:
1023 if ( np != NULL )
1024 *np = ((dev && !err) ? dev->priv : NULL);
1025 DPRINTK("< err=%d\n", err);
1026 return err;
1029 /* Handle an interface status message. */
1030 static void netif_interface_status(netif_fe_interface_status_t *status)
1032 int err = 0;
1033 struct net_private *np = NULL;
1035 DPRINTK(">\n");
1036 DPRINTK("> status=%s handle=%d\n",
1037 status_name[status->status], status->handle);
1039 if ( (err = target_vif(status, &np)) != 0 )
1041 WPRINTK("Invalid netif: handle=%u\n", status->handle);
1042 return;
1045 if ( np == NULL )
1047 DPRINTK("> no vif\n");
1048 return;
1051 DPRINTK(">\n"); vif_show(np);
1053 switch ( status->status )
1055 case NETIF_INTERFACE_STATUS_CLOSED:
1056 switch ( np->backend_state )
1058 case BEST_CLOSED:
1059 case BEST_DISCONNECTED:
1060 case BEST_CONNECTED:
1061 vif_close(np);
1062 break;
1064 break;
1066 case NETIF_INTERFACE_STATUS_DISCONNECTED:
1067 switch ( np->backend_state )
1069 case BEST_CLOSED:
1070 vif_disconnect(np);
1071 break;
1072 case BEST_DISCONNECTED:
1073 case BEST_CONNECTED:
1074 vif_reset(np);
1075 break;
1077 break;
1079 case NETIF_INTERFACE_STATUS_CONNECTED:
1080 switch ( np->backend_state )
1082 case BEST_CLOSED:
1083 WPRINTK("Unexpected netif status %s in state %s\n",
1084 status_name[status->status],
1085 be_state_name[np->backend_state]);
1086 vif_disconnect(np);
1087 vif_connect(np, status);
1088 break;
1089 case BEST_DISCONNECTED:
1090 vif_connect(np, status);
1091 break;
1093 break;
1095 case NETIF_INTERFACE_STATUS_CHANGED:
1096 /*
1097 * The domain controller is notifying us that a device has been
1098 * added or removed.
1099 */
1100 break;
1102 default:
1103 WPRINTK("Invalid netif status code %d\n", status->status);
1104 break;
1106 vif_show(np);
1107 DPRINTK("<\n");
1110 /*
1111 * Initialize the network control interface.
1112 */
1113 static void netif_driver_status(netif_fe_driver_status_t *status)
1115 DPRINTK("> status=%d\n", status->status);
1116 netctrl.up = status->status;
1117 //netctrl.interface_n = status->max_handle;
1118 //netctrl.connected_n = 0;
1119 netctrl_connected_count();
1122 /* Receive handler for control messages. */
1123 static void netif_ctrlif_rx(ctrl_msg_t *msg, unsigned long id)
1126 switch ( msg->subtype )
1128 case CMSG_NETIF_FE_INTERFACE_STATUS:
1129 if ( msg->length != sizeof(netif_fe_interface_status_t) )
1130 goto error;
1131 netif_interface_status((netif_fe_interface_status_t *)
1132 &msg->msg[0]);
1133 break;
1135 case CMSG_NETIF_FE_DRIVER_STATUS:
1136 if ( msg->length != sizeof(netif_fe_driver_status_t) )
1137 goto error;
1138 netif_driver_status((netif_fe_driver_status_t *)
1139 &msg->msg[0]);
1140 break;
1142 error:
1143 default:
1144 msg->length = 0;
1145 break;
1148 ctrl_if_send_response(msg);
1152 #if 1
1153 /* Wait for all interfaces to be connected.
1155 * This works OK, but we'd like to use the probing mode (see below).
1156 */
1157 static int probe_interfaces(void)
1159 int err = 0, conn = 0;
1160 int wait_i, wait_n = 100;
1162 DPRINTK(">\n");
1164 for ( wait_i = 0; wait_i < wait_n; wait_i++)
1166 DPRINTK("> wait_i=%d\n", wait_i);
1167 conn = netctrl_connected();
1168 if(conn) break;
1169 DPRINTK("> schedule_timeout...\n");
1170 set_current_state(TASK_INTERRUPTIBLE);
1171 schedule_timeout(10);
1174 DPRINTK("> wait finished...\n");
1175 if ( conn <= 0 )
1177 err = netctrl_err(-ENETDOWN);
1178 WPRINTK("Failed to connect all virtual interfaces: err=%d\n", err);
1181 DPRINTK("< err=%d\n", err);
1183 return err;
1185 #else
1186 /* Probe for interfaces until no more are found.
1188 * This is the mode we'd like to use, but at the moment it panics the kernel.
1189 */
1190 static int probe_interfaces(void)
1192 int err = 0;
1193 int wait_i, wait_n = 100;
1194 ctrl_msg_t cmsg = {
1195 .type = CMSG_NETIF_FE,
1196 .subtype = CMSG_NETIF_FE_INTERFACE_STATUS,
1197 .length = sizeof(netif_fe_interface_status_t),
1198 };
1199 netif_fe_interface_status_t msg = {};
1200 ctrl_msg_t rmsg = {};
1201 netif_fe_interface_status_t *reply = (void*)rmsg.msg;
1202 int state = TASK_UNINTERRUPTIBLE;
1203 u32 query = -1;
1205 DPRINTK(">\n");
1207 netctrl.interface_n = 0;
1208 for ( wait_i = 0; wait_i < wait_n; wait_i++ )
1210 DPRINTK("> wait_i=%d query=%d\n", wait_i, query);
1211 msg.handle = query;
1212 memcpy(cmsg.msg, &msg, sizeof(msg));
1213 DPRINTK("> set_current_state...\n");
1214 set_current_state(state);
1215 DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
1216 DPRINTK("> sending...\n");
1217 err = ctrl_if_send_message_and_get_response(&cmsg, &rmsg, state);
1218 DPRINTK("> err=%d\n", err);
1219 if(err) goto exit;
1220 DPRINTK("> rmsg=%p msg=%p, reply=%p\n", &rmsg, rmsg.msg, reply);
1221 if((int)reply->handle < 0){
1222 // No more interfaces.
1223 break;
1225 query = -reply->handle - 2;
1226 DPRINTK(">netif_interface_status ...\n");
1227 netif_interface_status(reply);
1230 exit:
1231 if ( err )
1233 err = netctrl_err(-ENETDOWN);
1234 WPRINTK("Connecting virtual network interfaces failed: err=%d\n", err);
1237 DPRINTK("< err=%d\n", err);
1238 return err;
1241 #endif
1243 static int __init netif_init(void)
1245 int err = 0;
1247 if ( (start_info.flags & SIF_INITDOMAIN) ||
1248 (start_info.flags & SIF_NET_BE_DOMAIN) )
1249 return 0;
1251 IPRINTK("Initialising virtual ethernet driver.\n");
1252 INIT_LIST_HEAD(&dev_list);
1253 netctrl_init();
1254 (void)ctrl_if_register_receiver(CMSG_NETIF_FE, netif_ctrlif_rx,
1255 CALLBACK_IN_BLOCKING_CONTEXT);
1256 send_driver_status(1);
1257 err = probe_interfaces();
1258 if ( err )
1259 ctrl_if_unregister_receiver(CMSG_NETIF_FE, netif_ctrlif_rx);
1261 DPRINTK("< err=%d\n", err);
1262 return err;
1265 static void vif_suspend(struct net_private *np)
1267 // Avoid having tx/rx stuff happen until we're ready.
1268 DPRINTK(">\n");
1269 free_irq(np->irq, np->dev);
1270 unbind_evtchn_from_irq(np->evtchn);
1271 DPRINTK("<\n");
1274 static void vif_resume(struct net_private *np)
1276 // Connect regardless of whether IFF_UP flag set.
1277 // Stop bad things from happening until we're back up.
1278 DPRINTK(">\n");
1279 np->backend_state = BEST_DISCONNECTED;
1280 memset(np->tx, 0, PAGE_SIZE);
1281 memset(np->rx, 0, PAGE_SIZE);
1283 send_interface_connect(np);
1284 DPRINTK("<\n");
1287 void netif_suspend(void)
1289 #if 1 /* XXX THIS IS TEMPORARY */
1290 struct list_head *ent;
1291 struct net_private *np;
1293 DPRINTK(">\n");
1294 list_for_each(ent, &dev_list){
1295 np = list_entry(ent, struct net_private, list);
1296 vif_suspend(np);
1298 DPRINTK("<\n");
1299 #endif
1302 void netif_resume(void)
1304 #if 1
1305 /* XXX THIS IS TEMPORARY */
1306 struct list_head *ent;
1307 struct net_private *np;
1309 DPRINTK(">\n");
1310 list_for_each ( ent, &dev_list )
1312 np = list_entry(ent, struct net_private, list);
1313 vif_resume(np);
1315 DPRINTK("<\n");
1316 #endif
1320 __initcall(netif_init);