/****************************************************************************** * netif.h * * Unified network-device I/O interface for Xen guest OSes. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. * * Copyright (c) 2003-2004, Keir Fraser */ #ifndef __XEN_PUBLIC_IO_NETIF_H__ #define __XEN_PUBLIC_IO_NETIF_H__ #include "ring.h" #include "../grant_table.h" /* * Older implementation of Xen network frontend / backend has an * implicit dependency on the MAX_SKB_FRAGS as the maximum number of * ring slots a skb can use. Netfront / netback may not work as * expected when frontend and backend have different MAX_SKB_FRAGS. * * A better approach is to add mechanism for netfront / netback to * negotiate this value. However we cannot fix all possible * frontends, so we need to define a value which states the minimum * slots backend must support. * * The minimum value derives from older Linux kernel's MAX_SKB_FRAGS * (18), which is proved to work with most frontends. Any new backend * which doesn't negotiate with frontend should expect frontend to * send a valid packet using slots up to this value. */ #define XEN_NETIF_NR_SLOTS_MIN 18 /* * Notifications after enqueuing any type of message should be conditional on * the appropriate req_event or rsp_event field in the shared ring. * If the client sends notification for rx requests then it should specify * feature 'feature-rx-notify' via xenbus. Otherwise the backend will assume * that it cannot safely queue packets (as it may not be kicked to send them). */ /* * "feature-split-event-channels" is introduced to separate guest TX * and RX notification. Backend either doesn't support this feature or * advertises it via xenstore as 0 (disabled) or 1 (enabled). * * To make use of this feature, frontend should allocate two event * channels for TX and RX, advertise them to backend as * "event-channel-tx" and "event-channel-rx" respectively. If frontend * doesn't want to use this feature, it just writes "event-channel" * node as before. */ /* * Multiple transmit and receive queues: * If supported, the backend will write the key "multi-queue-max-queues" to * the directory for that vif, and set its value to the maximum supported * number of queues. * Frontends that are aware of this feature and wish to use it can write the * key "multi-queue-num-queues", set to the number they wish to use, which * must be greater than zero, and no more than the value reported by the backend * in "multi-queue-max-queues". * * Queues replicate the shared rings and event channels. * "feature-split-event-channels" may optionally be used when using * multiple queues, but is not mandatory. * * Each queue consists of one shared ring pair, i.e. there must be the same * number of tx and rx rings. * * For frontends requesting just one queue, the usual event-channel and * ring-ref keys are written as before, simplifying the backend processing * to avoid distinguishing between a frontend that doesn't understand the * multi-queue feature, and one that does, but requested only one queue. * * Frontends requesting two or more queues must not write the toplevel * event-channel (or event-channel-{tx,rx}) and {tx,rx}-ring-ref keys, * instead writing those keys under sub-keys having the name "queue-N" where * N is the integer ID of the queue for which those keys belong. Queues * are indexed from zero. For example, a frontend with two queues and split * event channels must write the following set of queue-related keys: * * /local/domain/1/device/vif/0/multi-queue-num-queues = "2" * /local/domain/1/device/vif/0/queue-0 = "" * /local/domain/1/device/vif/0/queue-0/tx-ring-ref = "<ring-ref-tx0>" * /local/domain/1/device/vif/0/queue-0/rx-ring-ref = "<ring-ref-rx0>" * /local/domain/1/device/vif/0/queue-0/event-channel-tx = "<evtchn-tx0>" * /local/domain/1/device/vif/0/queue-0/event-channel-rx = "<evtchn-rx0>" * /local/domain/1/device/vif/0/queue-1 = "" * /local/domain/1/device/vif/0/queue-1/tx-ring-ref = "<ring-ref-tx1>" * /local/domain/1/device/vif/0/queue-1/rx-ring-ref = "<ring-ref-rx1" * /local/domain/1/device/vif/0/queue-1/event-channel-tx = "<evtchn-tx1>" * /local/domain/1/device/vif/0/queue-1/event-channel-rx = "<evtchn-rx1>" * * If there is any inconsistency in the XenStore data, the backend may * choose not to connect any queues, instead treating the request as an * error. This includes scenarios where more (or fewer) queues were * requested than the frontend provided details for. * * Mapping of packets to queues is considered to be a function of the * transmitting system (backend or frontend) and is not negotiated * between the two. Guests are free to transmit packets on any queue * they choose, provided it has been set up correctly. Guests must be * prepared to receive packets on any queue they have requested be set up. */ /* * "feature-no-csum-offload" should be used to turn IPv4 TCP/UDP checksum * offload off or on. If it is missing then the feature is assumed to be on. * "feature-ipv6-csum-offload" should be used to turn IPv6 TCP/UDP checksum * offload on or off. If it is missing then the feature is assumed to be off. */ /* * "feature-gso-tcpv4" and "feature-gso-tcpv6" advertise the capability to * handle large TCP packets (in IPv4 or IPv6 form respectively). Neither * frontends nor backends are assumed to be capable unless the flags are * present. */ /* * "feature-multicast-control" advertises the capability to filter ethernet * multicast packets in the backend. To enable use of this capability the * frontend must set "request-multicast-control" before moving into the * connected state. * * If "request-multicast-control" is set then the backend transmit side should * no longer flood multicast packets to the frontend, it should instead drop any * multicast packet that does not match in a filter list. The list is * amended by the frontend by sending dummy transmit requests containing * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} extra-info fragments as specified below. * Once enabled by the frontend, the feature cannot be disabled except by * closing and re-connecting to the backend. */ /* * This is the 'wire' format for packets: * Request 1: netif_tx_request_t -- NETTXF_* (any flags) * [Request 2: netif_extra_info_t] (only if request 1 has NETTXF_extra_info) * [Request 3: netif_extra_info_t] (only if request 2 has XEN_NETIF_EXTRA_MORE) * Request 4: netif_tx_request_t -- NETTXF_more_data * Request 5: netif_tx_request_t -- NETTXF_more_data * ... * Request N: netif_tx_request_t -- 0 */ /* * Guest transmit * ============== * * Ring slot size is 12 octets, however not all request/response * structs use the full size. * * tx request data (netif_tx_request_t) * ------------------------------------ * * 0 1 2 3 4 5 6 7 octet * +-----+-----+-----+-----+-----+-----+-----+-----+ * | grant ref | offset | flags | * +-----+-----+-----+-----+-----+-----+-----+-----+ * | id | size | * +-----+-----+-----+-----+ * * grant ref: Reference to buffer page. * offset: Offset within buffer page. * flags: NETTXF_*. * id: request identifier, echoed in response. * size: packet size in bytes. * * tx response (netif_tx_response_t) * --------------------------------- * * 0 1 2 3 4 5 6 7 octet * +-----+-----+-----+-----+-----+-----+-----+-----+ * | id | status | unused | * +-----+-----+-----+-----+-----+-----+-----+-----+ * | unused | * +-----+-----+-----+-----+ * * id: reflects id in transmit request * status: NETIF_RSP_* * * Guest receive * ============= * * Ring slot size is 8 octets. * * rx request (netif_rx_request_t) * ------------------------------- * * 0 1 2 3 4 5 6 7 octet * +-----+-----+-----+-----+-----+-----+-----+-----+ * | id | pad | gref | * +-----+-----+-----+-----+-----+-----+-----+-----+ * * id: request identifier, echoed in response. * gref: reference to incoming granted frame. * * rx response (netif_rx_response_t) * --------------------------------- * * 0 1 2 3 4 5 6 7 octet * +-----+-----+-----+-----+-----+-----+-----+-----+ * | id | offset | flags | status | * +-----+-----+-----+-----+-----+-----+-----+-----+ * * id: reflects id in receive request * offset: offset in page of start of received packet * flags: NETRXF_* * status: -ve: NETIF_RSP_*; +ve: Rx'ed pkt size. * * Extra Info * ========== * * Can be present if initial request has NET{T,R}XF_extra_info, or * previous extra request has XEN_NETIF_EXTRA_MORE. * * The struct therefore needs to fit into either a tx or rx slot and * is therefore limited to 8 octets. * * extra info (netif_extra_info_t) * ------------------------------- * * General format: * * 0 1 2 3 4 5 6 7 octet * +-----+-----+-----+-----+-----+-----+-----+-----+ * |type |flags| type specfic data | * +-----+-----+-----+-----+-----+-----+-----+-----+ * | padding for tx | * +-----+-----+-----+-----+ * * type: XEN_NETIF_EXTRA_TYPE_* * flags: XEN_NETIF_EXTRA_FLAG_* * padding for tx: present only in the tx case due to 8 octet limit * from rx case. Not shown in type specific entries below. * * XEN_NETIF_EXTRA_TYPE_GSO: * * 0 1 2 3 4 5 6 7 octet * +-----+-----+-----+-----+-----+-----+-----+-----+ * |type |flags| size |type | pad | features | * +-----+-----+-----+-----+-----+-----+-----+-----+ * * type: Must be XEN_NETIF_EXTRA_TYPE_GSO * flags: XEN_NETIF_EXTRA_FLAG_* * size: Maximum payload size of each segment. * type: XEN_NETIF_GSO_TYPE_* * features: EN_NETIF_GSO_FEAT_* * * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: * * 0 1 2 3 4 5 6 7 octet * +-----+-----+-----+-----+-----+-----+-----+-----+ * |type |flags| addr | * +-----+-----+-----+-----+-----+-----+-----+-----+ * * type: Must be XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL} * flags: XEN_NETIF_EXTRA_FLAG_* * addr: address to add/remove */ /* Protocol checksum field is blank in the packet (hardware offload)? */ #define _NETTXF_csum_blank (0) #define NETTXF_csum_blank (1U<<_NETTXF_csum_blank) /* Packet data has been validated against protocol checksum. */ #define _NETTXF_data_validated (1) #define NETTXF_data_validated (1U<<_NETTXF_data_validated) /* Packet continues in the next request descriptor. */ #define _NETTXF_more_data (2) #define NETTXF_more_data (1U<<_NETTXF_more_data) /* Packet to be followed by extra descriptor(s). */ #define _NETTXF_extra_info (3) #define NETTXF_extra_info (1U<<_NETTXF_extra_info) #define XEN_NETIF_MAX_TX_SIZE 0xFFFF struct netif_tx_request { grant_ref_t gref; /* Reference to buffer page */ uint16_t offset; /* Offset within buffer page */ uint16_t flags; /* NETTXF_* */ uint16_t id; /* Echoed in response message. */ uint16_t size; /* Packet size in bytes. */ }; typedef struct netif_tx_request netif_tx_request_t; /* Types of netif_extra_info descriptors. */ #define XEN_NETIF_EXTRA_TYPE_NONE (0) /* Never used - invalid */ #define XEN_NETIF_EXTRA_TYPE_GSO (1) /* u.gso */ #define XEN_NETIF_EXTRA_TYPE_MCAST_ADD (2) /* u.mcast */ #define XEN_NETIF_EXTRA_TYPE_MCAST_DEL (3) /* u.mcast */ #define XEN_NETIF_EXTRA_TYPE_MAX (4) /* netif_extra_info_t flags. */ #define _XEN_NETIF_EXTRA_FLAG_MORE (0) #define XEN_NETIF_EXTRA_FLAG_MORE (1U<<_XEN_NETIF_EXTRA_FLAG_MORE) /* GSO types */ #define XEN_NETIF_GSO_TYPE_NONE (0) #define XEN_NETIF_GSO_TYPE_TCPV4 (1) #define XEN_NETIF_GSO_TYPE_TCPV6 (2) /* * This structure needs to fit within both netif_tx_request_t and * netif_rx_response_t for compatibility. */ struct netif_extra_info { uint8_t type; /* XEN_NETIF_EXTRA_TYPE_* */ uint8_t flags; /* XEN_NETIF_EXTRA_FLAG_* */ union { /* * XEN_NETIF_EXTRA_TYPE_GSO: */ struct { /* * Maximum payload size of each segment. For example, for TCP this * is just the path MSS. */ uint16_t size; /* * GSO type. This determines the protocol of the packet and any * extra features required to segment the packet properly. */ uint8_t type; /* XEN_NETIF_GSO_TYPE_* */ /* Future expansion. */ uint8_t pad; /* * GSO features. This specifies any extra GSO features required * to process this packet, such as ECN support for TCPv4. */ uint16_t features; /* XEN_NETIF_GSO_FEAT_* */ } gso; /* * XEN_NETIF_EXTRA_TYPE_MCAST_{ADD,DEL}: */ struct { uint8_t addr[6]; /* Address to add/remove. */ } mcast; uint16_t pad[3]; } u; }; typedef struct netif_extra_info netif_extra_info_t; struct netif_tx_response { uint16_t id; int16_t status; /* NETIF_RSP_* */ }; typedef struct netif_tx_response netif_tx_response_t; struct netif_rx_request { uint16_t id; /* Echoed in response message. */ uint16_t pad; grant_ref_t gref; /* Reference to incoming granted frame */ }; typedef struct netif_rx_request netif_rx_request_t; /* Packet data has been validated against protocol checksum. */ #define _NETRXF_data_validated (0) #define NETRXF_data_validated (1U<<_NETRXF_data_validated) /* Protocol checksum field is blank in the packet (hardware offload)? */ #define _NETRXF_csum_blank (1) #define NETRXF_csum_blank (1U<<_NETRXF_csum_blank) /* Packet continues in the next request descriptor. */ #define _NETRXF_more_data (2) #define NETRXF_more_data (1U<<_NETRXF_more_data) /* Packet to be followed by extra descriptor(s). */ #define _NETRXF_extra_info (3) #define NETRXF_extra_info (1U<<_NETRXF_extra_info) struct netif_rx_response { uint16_t id; uint16_t offset; /* Offset in page of start of received packet */ uint16_t flags; /* NETRXF_* */ int16_t status; /* -ve: NETIF_RSP_* ; +ve: Rx'ed pkt size. */ }; typedef struct netif_rx_response netif_rx_response_t; /* * Generate netif ring structures and types. */ DEFINE_RING_TYPES(netif_tx, struct netif_tx_request, struct netif_tx_response); DEFINE_RING_TYPES(netif_rx, struct netif_rx_request, struct netif_rx_response); #define NETIF_RSP_DROPPED -2 #define NETIF_RSP_ERROR -1 #define NETIF_RSP_OKAY 0 /* No response: used for auxiliary requests (e.g., netif_extra_info_t). */ #define NETIF_RSP_NULL 1 #endif /* * Local variables: * mode: C * c-file-style: "BSD" * c-basic-offset: 4 * tab-width: 4 * indent-tabs-mode: nil * End: */