aboutsummaryrefslogtreecommitdiffstats
path: root/drivers/net/avp
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/net/avp')
-rw-r--r--drivers/net/avp/Makefile57
-rw-r--r--drivers/net/avp/avp_ethdev.c2312
-rw-r--r--drivers/net/avp/avp_logs.h59
-rw-r--r--drivers/net/avp/rte_avp_common.h432
-rw-r--r--drivers/net/avp/rte_avp_fifo.h169
-rw-r--r--drivers/net/avp/rte_pmd_avp_version.map4
6 files changed, 3033 insertions, 0 deletions
diff --git a/drivers/net/avp/Makefile b/drivers/net/avp/Makefile
new file mode 100644
index 00000000..cd465aac
--- /dev/null
+++ b/drivers/net/avp/Makefile
@@ -0,0 +1,57 @@
+# BSD LICENSE
+#
+# Copyright(c) 2013-2017, Wind River Systems, Inc. All rights reserved.
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+# * Redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer.
+# * Redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in
+# the documentation and/or other materials provided with the
+# distribution.
+# * Neither the name of Wind River Systems nor the names of its
+# contributors may be used to endorse or promote products derived
+# from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+include $(RTE_SDK)/mk/rte.vars.mk
+
+#
+# library name
+#
+LIB = librte_pmd_avp.a
+
+CFLAGS += -O3
+CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR)
+
+EXPORT_MAP := rte_pmd_avp_version.map
+
+LIBABIVER := 1
+
+# install public header files to enable compilation of the hypervisor level
+# dpdk application
+SYMLINK-$(CONFIG_RTE_LIBRTE_AVP_PMD)-include += rte_avp_common.h
+SYMLINK-$(CONFIG_RTE_LIBRTE_AVP_PMD)-include += rte_avp_fifo.h
+
+#
+# all source files are stored in SRCS-y
+#
+SRCS-$(CONFIG_RTE_LIBRTE_AVP_PMD) += avp_ethdev.c
+
+include $(RTE_SDK)/mk/rte.lib.mk
diff --git a/drivers/net/avp/avp_ethdev.c b/drivers/net/avp/avp_ethdev.c
new file mode 100644
index 00000000..fe6849f5
--- /dev/null
+++ b/drivers/net/avp/avp_ethdev.c
@@ -0,0 +1,2312 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2013-2017, Wind River Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2) Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3) Neither the name of Wind River Systems nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+
+#include <rte_ethdev.h>
+#include <rte_ethdev_pci.h>
+#include <rte_memcpy.h>
+#include <rte_string_fns.h>
+#include <rte_memzone.h>
+#include <rte_malloc.h>
+#include <rte_atomic.h>
+#include <rte_branch_prediction.h>
+#include <rte_pci.h>
+#include <rte_ether.h>
+#include <rte_common.h>
+#include <rte_cycles.h>
+#include <rte_spinlock.h>
+#include <rte_byteorder.h>
+#include <rte_dev.h>
+#include <rte_memory.h>
+#include <rte_eal.h>
+#include <rte_io.h>
+
+#include "rte_avp_common.h"
+#include "rte_avp_fifo.h"
+
+#include "avp_logs.h"
+
+
+static int avp_dev_create(struct rte_pci_device *pci_dev,
+ struct rte_eth_dev *eth_dev);
+
+static int avp_dev_configure(struct rte_eth_dev *dev);
+static int avp_dev_start(struct rte_eth_dev *dev);
+static void avp_dev_stop(struct rte_eth_dev *dev);
+static void avp_dev_close(struct rte_eth_dev *dev);
+static void avp_dev_info_get(struct rte_eth_dev *dev,
+ struct rte_eth_dev_info *dev_info);
+static void avp_vlan_offload_set(struct rte_eth_dev *dev, int mask);
+static int avp_dev_link_update(struct rte_eth_dev *dev,
+ __rte_unused int wait_to_complete);
+static void avp_dev_promiscuous_enable(struct rte_eth_dev *dev);
+static void avp_dev_promiscuous_disable(struct rte_eth_dev *dev);
+
+static int avp_dev_rx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t rx_queue_id,
+ uint16_t nb_rx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *pool);
+
+static int avp_dev_tx_queue_setup(struct rte_eth_dev *dev,
+ uint16_t tx_queue_id,
+ uint16_t nb_tx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf);
+
+static uint16_t avp_recv_scattered_pkts(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
+static uint16_t avp_recv_pkts(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts);
+
+static uint16_t avp_xmit_scattered_pkts(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+
+static uint16_t avp_xmit_pkts(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts);
+
+static void avp_dev_rx_queue_release(void *rxq);
+static void avp_dev_tx_queue_release(void *txq);
+
+static void avp_dev_stats_get(struct rte_eth_dev *dev,
+ struct rte_eth_stats *stats);
+static void avp_dev_stats_reset(struct rte_eth_dev *dev);
+
+
+#define AVP_DEV_TO_PCI(eth_dev) RTE_DEV_TO_PCI((eth_dev)->device)
+
+
+#define AVP_MAX_RX_BURST 64
+#define AVP_MAX_TX_BURST 64
+#define AVP_MAX_MAC_ADDRS 1
+#define AVP_MIN_RX_BUFSIZE ETHER_MIN_LEN
+
+
+/*
+ * Defines the number of microseconds to wait before checking the response
+ * queue for completion.
+ */
+#define AVP_REQUEST_DELAY_USECS (5000)
+
+/*
+ * Defines the number times to check the response queue for completion before
+ * declaring a timeout.
+ */
+#define AVP_MAX_REQUEST_RETRY (100)
+
+/* Defines the current PCI driver version number */
+#define AVP_DPDK_DRIVER_VERSION RTE_AVP_CURRENT_GUEST_VERSION
+
+/*
+ * The set of PCI devices this driver supports
+ */
+static const struct rte_pci_id pci_id_avp_map[] = {
+ { .vendor_id = RTE_AVP_PCI_VENDOR_ID,
+ .device_id = RTE_AVP_PCI_DEVICE_ID,
+ .subsystem_vendor_id = RTE_AVP_PCI_SUB_VENDOR_ID,
+ .subsystem_device_id = RTE_AVP_PCI_SUB_DEVICE_ID,
+ .class_id = RTE_CLASS_ANY_ID,
+ },
+
+ { .vendor_id = 0, /* sentinel */
+ },
+};
+
+/*
+ * dev_ops for avp, bare necessities for basic operation
+ */
+static const struct eth_dev_ops avp_eth_dev_ops = {
+ .dev_configure = avp_dev_configure,
+ .dev_start = avp_dev_start,
+ .dev_stop = avp_dev_stop,
+ .dev_close = avp_dev_close,
+ .dev_infos_get = avp_dev_info_get,
+ .vlan_offload_set = avp_vlan_offload_set,
+ .stats_get = avp_dev_stats_get,
+ .stats_reset = avp_dev_stats_reset,
+ .link_update = avp_dev_link_update,
+ .promiscuous_enable = avp_dev_promiscuous_enable,
+ .promiscuous_disable = avp_dev_promiscuous_disable,
+ .rx_queue_setup = avp_dev_rx_queue_setup,
+ .rx_queue_release = avp_dev_rx_queue_release,
+ .tx_queue_setup = avp_dev_tx_queue_setup,
+ .tx_queue_release = avp_dev_tx_queue_release,
+};
+
+/**@{ AVP device flags */
+#define AVP_F_PROMISC (1 << 1)
+#define AVP_F_CONFIGURED (1 << 2)
+#define AVP_F_LINKUP (1 << 3)
+#define AVP_F_DETACHED (1 << 4)
+/**@} */
+
+/* Ethernet device validation marker */
+#define AVP_ETHDEV_MAGIC 0x92972862
+
+/*
+ * Defines the AVP device attributes which are attached to an RTE ethernet
+ * device
+ */
+struct avp_dev {
+ uint32_t magic; /**< Memory validation marker */
+ uint64_t device_id; /**< Unique system identifier */
+ struct ether_addr ethaddr; /**< Host specified MAC address */
+ struct rte_eth_dev_data *dev_data;
+ /**< Back pointer to ethernet device data */
+ volatile uint32_t flags; /**< Device operational flags */
+ uint8_t port_id; /**< Ethernet port identifier */
+ struct rte_mempool *pool; /**< pkt mbuf mempool */
+ unsigned int guest_mbuf_size; /**< local pool mbuf size */
+ unsigned int host_mbuf_size; /**< host mbuf size */
+ unsigned int max_rx_pkt_len; /**< maximum receive unit */
+ uint32_t host_features; /**< Supported feature bitmap */
+ uint32_t features; /**< Enabled feature bitmap */
+ unsigned int num_tx_queues; /**< Negotiated number of transmit queues */
+ unsigned int max_tx_queues; /**< Maximum number of transmit queues */
+ unsigned int num_rx_queues; /**< Negotiated number of receive queues */
+ unsigned int max_rx_queues; /**< Maximum number of receive queues */
+
+ struct rte_avp_fifo *tx_q[RTE_AVP_MAX_QUEUES]; /**< TX queue */
+ struct rte_avp_fifo *rx_q[RTE_AVP_MAX_QUEUES]; /**< RX queue */
+ struct rte_avp_fifo *alloc_q[RTE_AVP_MAX_QUEUES];
+ /**< Allocated mbufs queue */
+ struct rte_avp_fifo *free_q[RTE_AVP_MAX_QUEUES];
+ /**< To be freed mbufs queue */
+
+ /* mutual exclusion over the 'flag' and 'resp_q/req_q' fields */
+ rte_spinlock_t lock;
+
+ /* For request & response */
+ struct rte_avp_fifo *req_q; /**< Request queue */
+ struct rte_avp_fifo *resp_q; /**< Response queue */
+ void *host_sync_addr; /**< (host) Req/Resp Mem address */
+ void *sync_addr; /**< Req/Resp Mem address */
+ void *host_mbuf_addr; /**< (host) MBUF pool start address */
+ void *mbuf_addr; /**< MBUF pool start address */
+} __rte_cache_aligned;
+
+/* RTE ethernet private data */
+struct avp_adapter {
+ struct avp_dev avp;
+} __rte_cache_aligned;
+
+
+/* 32-bit MMIO register write */
+#define AVP_WRITE32(_value, _addr) rte_write32_relaxed((_value), (_addr))
+
+/* 32-bit MMIO register read */
+#define AVP_READ32(_addr) rte_read32_relaxed((_addr))
+
+/* Macro to cast the ethernet device private data to a AVP object */
+#define AVP_DEV_PRIVATE_TO_HW(adapter) \
+ (&((struct avp_adapter *)adapter)->avp)
+
+/*
+ * Defines the structure of a AVP device queue for the purpose of handling the
+ * receive and transmit burst callback functions
+ */
+struct avp_queue {
+ struct rte_eth_dev_data *dev_data;
+ /**< Backpointer to ethernet device data */
+ struct avp_dev *avp; /**< Backpointer to AVP device */
+ uint16_t queue_id;
+ /**< Queue identifier used for indexing current queue */
+ uint16_t queue_base;
+ /**< Base queue identifier for queue servicing */
+ uint16_t queue_limit;
+ /**< Maximum queue identifier for queue servicing */
+
+ uint64_t packets;
+ uint64_t bytes;
+ uint64_t errors;
+};
+
+/* send a request and wait for a response
+ *
+ * @warning must be called while holding the avp->lock spinlock.
+ */
+static int
+avp_dev_process_request(struct avp_dev *avp, struct rte_avp_request *request)
+{
+ unsigned int retry = AVP_MAX_REQUEST_RETRY;
+ void *resp_addr = NULL;
+ unsigned int count;
+ int ret;
+
+ PMD_DRV_LOG(DEBUG, "Sending request %u to host\n", request->req_id);
+
+ request->result = -ENOTSUP;
+
+ /* Discard any stale responses before starting a new request */
+ while (avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1))
+ PMD_DRV_LOG(DEBUG, "Discarding stale response\n");
+
+ rte_memcpy(avp->sync_addr, request, sizeof(*request));
+ count = avp_fifo_put(avp->req_q, &avp->host_sync_addr, 1);
+ if (count < 1) {
+ PMD_DRV_LOG(ERR, "Cannot send request %u to host\n",
+ request->req_id);
+ ret = -EBUSY;
+ goto done;
+ }
+
+ while (retry--) {
+ /* wait for a response */
+ usleep(AVP_REQUEST_DELAY_USECS);
+
+ count = avp_fifo_count(avp->resp_q);
+ if (count >= 1) {
+ /* response received */
+ break;
+ }
+
+ if ((count < 1) && (retry == 0)) {
+ PMD_DRV_LOG(ERR, "Timeout while waiting for a response for %u\n",
+ request->req_id);
+ ret = -ETIME;
+ goto done;
+ }
+ }
+
+ /* retrieve the response */
+ count = avp_fifo_get(avp->resp_q, (void **)&resp_addr, 1);
+ if ((count != 1) || (resp_addr != avp->host_sync_addr)) {
+ PMD_DRV_LOG(ERR, "Invalid response from host, count=%u resp=%p host_sync_addr=%p\n",
+ count, resp_addr, avp->host_sync_addr);
+ ret = -ENODATA;
+ goto done;
+ }
+
+ /* copy to user buffer */
+ rte_memcpy(request, avp->sync_addr, sizeof(*request));
+ ret = 0;
+
+ PMD_DRV_LOG(DEBUG, "Result %d received for request %u\n",
+ request->result, request->req_id);
+
+done:
+ return ret;
+}
+
+static int
+avp_dev_ctrl_set_link_state(struct rte_eth_dev *eth_dev, unsigned int state)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct rte_avp_request request;
+ int ret;
+
+ /* setup a link state change request */
+ memset(&request, 0, sizeof(request));
+ request.req_id = RTE_AVP_REQ_CFG_NETWORK_IF;
+ request.if_up = state;
+
+ ret = avp_dev_process_request(avp, &request);
+
+ return ret == 0 ? request.result : ret;
+}
+
+static int
+avp_dev_ctrl_set_config(struct rte_eth_dev *eth_dev,
+ struct rte_avp_device_config *config)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct rte_avp_request request;
+ int ret;
+
+ /* setup a configure request */
+ memset(&request, 0, sizeof(request));
+ request.req_id = RTE_AVP_REQ_CFG_DEVICE;
+ memcpy(&request.config, config, sizeof(request.config));
+
+ ret = avp_dev_process_request(avp, &request);
+
+ return ret == 0 ? request.result : ret;
+}
+
+static int
+avp_dev_ctrl_shutdown(struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct rte_avp_request request;
+ int ret;
+
+ /* setup a shutdown request */
+ memset(&request, 0, sizeof(request));
+ request.req_id = RTE_AVP_REQ_SHUTDOWN_DEVICE;
+
+ ret = avp_dev_process_request(avp, &request);
+
+ return ret == 0 ? request.result : ret;
+}
+
+/* translate from host mbuf virtual address to guest virtual address */
+static inline void *
+avp_dev_translate_buffer(struct avp_dev *avp, void *host_mbuf_address)
+{
+ return RTE_PTR_ADD(RTE_PTR_SUB(host_mbuf_address,
+ (uintptr_t)avp->host_mbuf_addr),
+ (uintptr_t)avp->mbuf_addr);
+}
+
+/* translate from host physical address to guest virtual address */
+static void *
+avp_dev_translate_address(struct rte_eth_dev *eth_dev,
+ phys_addr_t host_phys_addr)
+{
+ struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
+ struct rte_mem_resource *resource;
+ struct rte_avp_memmap_info *info;
+ struct rte_avp_memmap *map;
+ off_t offset;
+ void *addr;
+ unsigned int i;
+
+ addr = pci_dev->mem_resource[RTE_AVP_PCI_MEMORY_BAR].addr;
+ resource = &pci_dev->mem_resource[RTE_AVP_PCI_MEMMAP_BAR];
+ info = (struct rte_avp_memmap_info *)resource->addr;
+
+ offset = 0;
+ for (i = 0; i < info->nb_maps; i++) {
+ /* search all segments looking for a matching address */
+ map = &info->maps[i];
+
+ if ((host_phys_addr >= map->phys_addr) &&
+ (host_phys_addr < (map->phys_addr + map->length))) {
+ /* address is within this segment */
+ offset += (host_phys_addr - map->phys_addr);
+ addr = RTE_PTR_ADD(addr, offset);
+
+ PMD_DRV_LOG(DEBUG, "Translating host physical 0x%" PRIx64 " to guest virtual 0x%p\n",
+ host_phys_addr, addr);
+
+ return addr;
+ }
+ offset += map->length;
+ }
+
+ return NULL;
+}
+
+/* verify that the incoming device version is compatible with our version */
+static int
+avp_dev_version_check(uint32_t version)
+{
+ uint32_t driver = RTE_AVP_STRIP_MINOR_VERSION(AVP_DPDK_DRIVER_VERSION);
+ uint32_t device = RTE_AVP_STRIP_MINOR_VERSION(version);
+
+ if (device <= driver) {
+ /* the host driver version is less than or equal to ours */
+ return 0;
+ }
+
+ return 1;
+}
+
+/* verify that memory regions have expected version and validation markers */
+static int
+avp_dev_check_regions(struct rte_eth_dev *eth_dev)
+{
+ struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
+ struct rte_avp_memmap_info *memmap;
+ struct rte_avp_device_info *info;
+ struct rte_mem_resource *resource;
+ unsigned int i;
+
+ /* Dump resource info for debug */
+ for (i = 0; i < PCI_MAX_RESOURCE; i++) {
+ resource = &pci_dev->mem_resource[i];
+ if ((resource->phys_addr == 0) || (resource->len == 0))
+ continue;
+
+ PMD_DRV_LOG(DEBUG, "resource[%u]: phys=0x%" PRIx64 " len=%" PRIu64 " addr=%p\n",
+ i, resource->phys_addr,
+ resource->len, resource->addr);
+
+ switch (i) {
+ case RTE_AVP_PCI_MEMMAP_BAR:
+ memmap = (struct rte_avp_memmap_info *)resource->addr;
+ if ((memmap->magic != RTE_AVP_MEMMAP_MAGIC) ||
+ (memmap->version != RTE_AVP_MEMMAP_VERSION)) {
+ PMD_DRV_LOG(ERR, "Invalid memmap magic 0x%08x and version %u\n",
+ memmap->magic, memmap->version);
+ return -EINVAL;
+ }
+ break;
+
+ case RTE_AVP_PCI_DEVICE_BAR:
+ info = (struct rte_avp_device_info *)resource->addr;
+ if ((info->magic != RTE_AVP_DEVICE_MAGIC) ||
+ avp_dev_version_check(info->version)) {
+ PMD_DRV_LOG(ERR, "Invalid device info magic 0x%08x or version 0x%08x > 0x%08x\n",
+ info->magic, info->version,
+ AVP_DPDK_DRIVER_VERSION);
+ return -EINVAL;
+ }
+ break;
+
+ case RTE_AVP_PCI_MEMORY_BAR:
+ case RTE_AVP_PCI_MMIO_BAR:
+ if (resource->addr == NULL) {
+ PMD_DRV_LOG(ERR, "Missing address space for BAR%u\n",
+ i);
+ return -EINVAL;
+ }
+ break;
+
+ case RTE_AVP_PCI_MSIX_BAR:
+ default:
+ /* no validation required */
+ break;
+ }
+ }
+
+ return 0;
+}
+
+static int
+avp_dev_detach(struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ int ret;
+
+ PMD_DRV_LOG(NOTICE, "Detaching port %u from AVP device 0x%" PRIx64 "\n",
+ eth_dev->data->port_id, avp->device_id);
+
+ rte_spinlock_lock(&avp->lock);
+
+ if (avp->flags & AVP_F_DETACHED) {
+ PMD_DRV_LOG(NOTICE, "port %u already detached\n",
+ eth_dev->data->port_id);
+ ret = 0;
+ goto unlock;
+ }
+
+ /* shutdown the device first so the host stops sending us packets. */
+ ret = avp_dev_ctrl_shutdown(eth_dev);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Failed to send/recv shutdown to host, ret=%d\n",
+ ret);
+ avp->flags &= ~AVP_F_DETACHED;
+ goto unlock;
+ }
+
+ avp->flags |= AVP_F_DETACHED;
+ rte_wmb();
+
+ /* wait for queues to acknowledge the presence of the detach flag */
+ rte_delay_ms(1);
+
+ ret = 0;
+
+unlock:
+ rte_spinlock_unlock(&avp->lock);
+ return ret;
+}
+
+static void
+_avp_set_rx_queue_mappings(struct rte_eth_dev *eth_dev, uint16_t rx_queue_id)
+{
+ struct avp_dev *avp =
+ AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct avp_queue *rxq;
+ uint16_t queue_count;
+ uint16_t remainder;
+
+ rxq = (struct avp_queue *)eth_dev->data->rx_queues[rx_queue_id];
+
+ /*
+ * Must map all AVP fifos as evenly as possible between the configured
+ * device queues. Each device queue will service a subset of the AVP
+ * fifos. If there is an odd number of device queues the first set of
+ * device queues will get the extra AVP fifos.
+ */
+ queue_count = avp->num_rx_queues / eth_dev->data->nb_rx_queues;
+ remainder = avp->num_rx_queues % eth_dev->data->nb_rx_queues;
+ if (rx_queue_id < remainder) {
+ /* these queues must service one extra FIFO */
+ rxq->queue_base = rx_queue_id * (queue_count + 1);
+ rxq->queue_limit = rxq->queue_base + (queue_count + 1) - 1;
+ } else {
+ /* these queues service the regular number of FIFO */
+ rxq->queue_base = ((remainder * (queue_count + 1)) +
+ ((rx_queue_id - remainder) * queue_count));
+ rxq->queue_limit = rxq->queue_base + queue_count - 1;
+ }
+
+ PMD_DRV_LOG(DEBUG, "rxq %u at %p base %u limit %u\n",
+ rx_queue_id, rxq, rxq->queue_base, rxq->queue_limit);
+
+ rxq->queue_id = rxq->queue_base;
+}
+
+static void
+_avp_set_queue_counts(struct rte_eth_dev *eth_dev)
+{
+ struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct rte_avp_device_info *host_info;
+ void *addr;
+
+ addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
+ host_info = (struct rte_avp_device_info *)addr;
+
+ /*
+ * the transmit direction is not negotiated beyond respecting the max
+ * number of queues because the host can handle arbitrary guest tx
+ * queues (host rx queues).
+ */
+ avp->num_tx_queues = eth_dev->data->nb_tx_queues;
+
+ /*
+ * the receive direction is more restrictive. The host requires a
+ * minimum number of guest rx queues (host tx queues) therefore
+ * negotiate a value that is at least as large as the host minimum
+ * requirement. If the host and guest values are not identical then a
+ * mapping will be established in the receive_queue_setup function.
+ */
+ avp->num_rx_queues = RTE_MAX(host_info->min_rx_queues,
+ eth_dev->data->nb_rx_queues);
+
+ PMD_DRV_LOG(DEBUG, "Requesting %u Tx and %u Rx queues from host\n",
+ avp->num_tx_queues, avp->num_rx_queues);
+}
+
+static int
+avp_dev_attach(struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct rte_avp_device_config config;
+ unsigned int i;
+ int ret;
+
+ PMD_DRV_LOG(NOTICE, "Attaching port %u to AVP device 0x%" PRIx64 "\n",
+ eth_dev->data->port_id, avp->device_id);
+
+ rte_spinlock_lock(&avp->lock);
+
+ if (!(avp->flags & AVP_F_DETACHED)) {
+ PMD_DRV_LOG(NOTICE, "port %u already attached\n",
+ eth_dev->data->port_id);
+ ret = 0;
+ goto unlock;
+ }
+
+ /*
+ * make sure that the detached flag is set prior to reconfiguring the
+ * queues.
+ */
+ avp->flags |= AVP_F_DETACHED;
+ rte_wmb();
+
+ /*
+ * re-run the device create utility which will parse the new host info
+ * and setup the AVP device queue pointers.
+ */
+ ret = avp_dev_create(AVP_DEV_TO_PCI(eth_dev), eth_dev);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Failed to re-create AVP device, ret=%d\n",
+ ret);
+ goto unlock;
+ }
+
+ if (avp->flags & AVP_F_CONFIGURED) {
+ /*
+ * Update the receive queue mapping to handle cases where the
+ * source and destination hosts have different queue
+ * requirements. As long as the DETACHED flag is asserted the
+ * queue table should not be referenced so it should be safe to
+ * update it.
+ */
+ _avp_set_queue_counts(eth_dev);
+ for (i = 0; i < eth_dev->data->nb_rx_queues; i++)
+ _avp_set_rx_queue_mappings(eth_dev, i);
+
+ /*
+ * Update the host with our config details so that it knows the
+ * device is active.
+ */
+ memset(&config, 0, sizeof(config));
+ config.device_id = avp->device_id;
+ config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
+ config.driver_version = AVP_DPDK_DRIVER_VERSION;
+ config.features = avp->features;
+ config.num_tx_queues = avp->num_tx_queues;
+ config.num_rx_queues = avp->num_rx_queues;
+ config.if_up = !!(avp->flags & AVP_F_LINKUP);
+
+ ret = avp_dev_ctrl_set_config(eth_dev, &config);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
+ ret);
+ goto unlock;
+ }
+ }
+
+ rte_wmb();
+ avp->flags &= ~AVP_F_DETACHED;
+
+ ret = 0;
+
+unlock:
+ rte_spinlock_unlock(&avp->lock);
+ return ret;
+}
+
+static void
+avp_dev_interrupt_handler(void *data)
+{
+ struct rte_eth_dev *eth_dev = data;
+ struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
+ void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
+ uint32_t status, value;
+ int ret;
+
+ if (registers == NULL)
+ rte_panic("no mapped MMIO register space\n");
+
+ /* read the interrupt status register
+ * note: this register clears on read so all raised interrupts must be
+ * handled or remembered for later processing
+ */
+ status = AVP_READ32(
+ RTE_PTR_ADD(registers,
+ RTE_AVP_INTERRUPT_STATUS_OFFSET));
+
+ if (status & RTE_AVP_MIGRATION_INTERRUPT_MASK) {
+ /* handle interrupt based on current status */
+ value = AVP_READ32(
+ RTE_PTR_ADD(registers,
+ RTE_AVP_MIGRATION_STATUS_OFFSET));
+ switch (value) {
+ case RTE_AVP_MIGRATION_DETACHED:
+ ret = avp_dev_detach(eth_dev);
+ break;
+ case RTE_AVP_MIGRATION_ATTACHED:
+ ret = avp_dev_attach(eth_dev);
+ break;
+ default:
+ PMD_DRV_LOG(ERR, "unexpected migration status, status=%u\n",
+ value);
+ ret = -EINVAL;
+ }
+
+ /* acknowledge the request by writing out our current status */
+ value = (ret == 0 ? value : RTE_AVP_MIGRATION_ERROR);
+ AVP_WRITE32(value,
+ RTE_PTR_ADD(registers,
+ RTE_AVP_MIGRATION_ACK_OFFSET));
+
+ PMD_DRV_LOG(NOTICE, "AVP migration interrupt handled\n");
+ }
+
+ if (status & ~RTE_AVP_MIGRATION_INTERRUPT_MASK)
+ PMD_DRV_LOG(WARNING, "AVP unexpected interrupt, status=0x%08x\n",
+ status);
+
+ /* re-enable UIO interrupt handling */
+ ret = rte_intr_enable(&pci_dev->intr_handle);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Failed to re-enable UIO interrupts, ret=%d\n",
+ ret);
+ /* continue */
+ }
+}
+
+static int
+avp_dev_enable_interrupts(struct rte_eth_dev *eth_dev)
+{
+ struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
+ void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
+ int ret;
+
+ if (registers == NULL)
+ return -EINVAL;
+
+ /* enable UIO interrupt handling */
+ ret = rte_intr_enable(&pci_dev->intr_handle);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Failed to enable UIO interrupts, ret=%d\n",
+ ret);
+ return ret;
+ }
+
+ /* inform the device that all interrupts are enabled */
+ AVP_WRITE32(RTE_AVP_APP_INTERRUPTS_MASK,
+ RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
+
+ return 0;
+}
+
+static int
+avp_dev_disable_interrupts(struct rte_eth_dev *eth_dev)
+{
+ struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
+ void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
+ int ret;
+
+ if (registers == NULL)
+ return 0;
+
+ /* inform the device that all interrupts are disabled */
+ AVP_WRITE32(RTE_AVP_NO_INTERRUPTS_MASK,
+ RTE_PTR_ADD(registers, RTE_AVP_INTERRUPT_MASK_OFFSET));
+
+ /* enable UIO interrupt handling */
+ ret = rte_intr_disable(&pci_dev->intr_handle);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Failed to disable UIO interrupts, ret=%d\n",
+ ret);
+ return ret;
+ }
+
+ return 0;
+}
+
+static int
+avp_dev_setup_interrupts(struct rte_eth_dev *eth_dev)
+{
+ struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
+ int ret;
+
+ /* register a callback handler with UIO for interrupt notifications */
+ ret = rte_intr_callback_register(&pci_dev->intr_handle,
+ avp_dev_interrupt_handler,
+ (void *)eth_dev);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Failed to register UIO interrupt callback, ret=%d\n",
+ ret);
+ return ret;
+ }
+
+ /* enable interrupt processing */
+ return avp_dev_enable_interrupts(eth_dev);
+}
+
+static int
+avp_dev_migration_pending(struct rte_eth_dev *eth_dev)
+{
+ struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
+ void *registers = pci_dev->mem_resource[RTE_AVP_PCI_MMIO_BAR].addr;
+ uint32_t value;
+
+ if (registers == NULL)
+ return 0;
+
+ value = AVP_READ32(RTE_PTR_ADD(registers,
+ RTE_AVP_MIGRATION_STATUS_OFFSET));
+ if (value == RTE_AVP_MIGRATION_DETACHED) {
+ /* migration is in progress; ack it if we have not already */
+ AVP_WRITE32(value,
+ RTE_PTR_ADD(registers,
+ RTE_AVP_MIGRATION_ACK_OFFSET));
+ return 1;
+ }
+ return 0;
+}
+
+/*
+ * create a AVP device using the supplied device info by first translating it
+ * to guest address space(s).
+ */
+static int
+avp_dev_create(struct rte_pci_device *pci_dev,
+ struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct rte_avp_device_info *host_info;
+ struct rte_mem_resource *resource;
+ unsigned int i;
+
+ resource = &pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR];
+ if (resource->addr == NULL) {
+ PMD_DRV_LOG(ERR, "BAR%u is not mapped\n",
+ RTE_AVP_PCI_DEVICE_BAR);
+ return -EFAULT;
+ }
+ host_info = (struct rte_avp_device_info *)resource->addr;
+
+ if ((host_info->magic != RTE_AVP_DEVICE_MAGIC) ||
+ avp_dev_version_check(host_info->version)) {
+ PMD_DRV_LOG(ERR, "Invalid AVP PCI device, magic 0x%08x version 0x%08x > 0x%08x\n",
+ host_info->magic, host_info->version,
+ AVP_DPDK_DRIVER_VERSION);
+ return -EINVAL;
+ }
+
+ PMD_DRV_LOG(DEBUG, "AVP host device is v%u.%u.%u\n",
+ RTE_AVP_GET_RELEASE_VERSION(host_info->version),
+ RTE_AVP_GET_MAJOR_VERSION(host_info->version),
+ RTE_AVP_GET_MINOR_VERSION(host_info->version));
+
+ PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u TX queue(s)\n",
+ host_info->min_tx_queues, host_info->max_tx_queues);
+ PMD_DRV_LOG(DEBUG, "AVP host supports %u to %u RX queue(s)\n",
+ host_info->min_rx_queues, host_info->max_rx_queues);
+ PMD_DRV_LOG(DEBUG, "AVP host supports features 0x%08x\n",
+ host_info->features);
+
+ if (avp->magic != AVP_ETHDEV_MAGIC) {
+ /*
+ * First time initialization (i.e., not during a VM
+ * migration)
+ */
+ memset(avp, 0, sizeof(*avp));
+ avp->magic = AVP_ETHDEV_MAGIC;
+ avp->dev_data = eth_dev->data;
+ avp->port_id = eth_dev->data->port_id;
+ avp->host_mbuf_size = host_info->mbuf_size;
+ avp->host_features = host_info->features;
+ rte_spinlock_init(&avp->lock);
+ memcpy(&avp->ethaddr.addr_bytes[0],
+ host_info->ethaddr, ETHER_ADDR_LEN);
+ /* adjust max values to not exceed our max */
+ avp->max_tx_queues =
+ RTE_MIN(host_info->max_tx_queues, RTE_AVP_MAX_QUEUES);
+ avp->max_rx_queues =
+ RTE_MIN(host_info->max_rx_queues, RTE_AVP_MAX_QUEUES);
+ } else {
+ /* Re-attaching during migration */
+
+ /* TODO... requires validation of host values */
+ if ((host_info->features & avp->features) != avp->features) {
+ PMD_DRV_LOG(ERR, "AVP host features mismatched; 0x%08x, host=0x%08x\n",
+ avp->features, host_info->features);
+ /* this should not be possible; continue for now */
+ }
+ }
+
+ /* the device id is allowed to change over migrations */
+ avp->device_id = host_info->device_id;
+
+ /* translate incoming host addresses to guest address space */
+ PMD_DRV_LOG(DEBUG, "AVP first host tx queue at 0x%" PRIx64 "\n",
+ host_info->tx_phys);
+ PMD_DRV_LOG(DEBUG, "AVP first host alloc queue at 0x%" PRIx64 "\n",
+ host_info->alloc_phys);
+ for (i = 0; i < avp->max_tx_queues; i++) {
+ avp->tx_q[i] = avp_dev_translate_address(eth_dev,
+ host_info->tx_phys + (i * host_info->tx_size));
+
+ avp->alloc_q[i] = avp_dev_translate_address(eth_dev,
+ host_info->alloc_phys + (i * host_info->alloc_size));
+ }
+
+ PMD_DRV_LOG(DEBUG, "AVP first host rx queue at 0x%" PRIx64 "\n",
+ host_info->rx_phys);
+ PMD_DRV_LOG(DEBUG, "AVP first host free queue at 0x%" PRIx64 "\n",
+ host_info->free_phys);
+ for (i = 0; i < avp->max_rx_queues; i++) {
+ avp->rx_q[i] = avp_dev_translate_address(eth_dev,
+ host_info->rx_phys + (i * host_info->rx_size));
+ avp->free_q[i] = avp_dev_translate_address(eth_dev,
+ host_info->free_phys + (i * host_info->free_size));
+ }
+
+ PMD_DRV_LOG(DEBUG, "AVP host request queue at 0x%" PRIx64 "\n",
+ host_info->req_phys);
+ PMD_DRV_LOG(DEBUG, "AVP host response queue at 0x%" PRIx64 "\n",
+ host_info->resp_phys);
+ PMD_DRV_LOG(DEBUG, "AVP host sync address at 0x%" PRIx64 "\n",
+ host_info->sync_phys);
+ PMD_DRV_LOG(DEBUG, "AVP host mbuf address at 0x%" PRIx64 "\n",
+ host_info->mbuf_phys);
+ avp->req_q = avp_dev_translate_address(eth_dev, host_info->req_phys);
+ avp->resp_q = avp_dev_translate_address(eth_dev, host_info->resp_phys);
+ avp->sync_addr =
+ avp_dev_translate_address(eth_dev, host_info->sync_phys);
+ avp->mbuf_addr =
+ avp_dev_translate_address(eth_dev, host_info->mbuf_phys);
+
+ /*
+ * store the host mbuf virtual address so that we can calculate
+ * relative offsets for each mbuf as they are processed
+ */
+ avp->host_mbuf_addr = host_info->mbuf_va;
+ avp->host_sync_addr = host_info->sync_va;
+
+ /*
+ * store the maximum packet length that is supported by the host.
+ */
+ avp->max_rx_pkt_len = host_info->max_rx_pkt_len;
+ PMD_DRV_LOG(DEBUG, "AVP host max receive packet length is %u\n",
+ host_info->max_rx_pkt_len);
+
+ return 0;
+}
+
+/*
+ * This function is based on probe() function in avp_pci.c
+ * It returns 0 on success.
+ */
+static int
+eth_avp_dev_init(struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp =
+ AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct rte_pci_device *pci_dev;
+ int ret;
+
+ pci_dev = AVP_DEV_TO_PCI(eth_dev);
+ eth_dev->dev_ops = &avp_eth_dev_ops;
+ eth_dev->rx_pkt_burst = &avp_recv_pkts;
+ eth_dev->tx_pkt_burst = &avp_xmit_pkts;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
+ /*
+ * no setup required on secondary processes. All data is saved
+ * in dev_private by the primary process. All resource should
+ * be mapped to the same virtual address so all pointers should
+ * be valid.
+ */
+ if (eth_dev->data->scattered_rx) {
+ PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
+ eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
+ eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
+ }
+ return 0;
+ }
+
+ rte_eth_copy_pci_info(eth_dev, pci_dev);
+
+ eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE;
+
+ /* Check current migration status */
+ if (avp_dev_migration_pending(eth_dev)) {
+ PMD_DRV_LOG(ERR, "VM live migration operation in progress\n");
+ return -EBUSY;
+ }
+
+ /* Check BAR resources */
+ ret = avp_dev_check_regions(eth_dev);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Failed to validate BAR resources, ret=%d\n",
+ ret);
+ return ret;
+ }
+
+ /* Enable interrupts */
+ ret = avp_dev_setup_interrupts(eth_dev);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Failed to enable interrupts, ret=%d\n", ret);
+ return ret;
+ }
+
+ /* Handle each subtype */
+ ret = avp_dev_create(pci_dev, eth_dev);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Failed to create device, ret=%d\n", ret);
+ return ret;
+ }
+
+ /* Allocate memory for storing MAC addresses */
+ eth_dev->data->mac_addrs = rte_zmalloc("avp_ethdev", ETHER_ADDR_LEN, 0);
+ if (eth_dev->data->mac_addrs == NULL) {
+ PMD_DRV_LOG(ERR, "Failed to allocate %d bytes needed to store MAC addresses\n",
+ ETHER_ADDR_LEN);
+ return -ENOMEM;
+ }
+
+ /* Get a mac from device config */
+ ether_addr_copy(&avp->ethaddr, &eth_dev->data->mac_addrs[0]);
+
+ return 0;
+}
+
+static int
+eth_avp_dev_uninit(struct rte_eth_dev *eth_dev)
+{
+ int ret;
+
+ if (rte_eal_process_type() != RTE_PROC_PRIMARY)
+ return -EPERM;
+
+ if (eth_dev->data == NULL)
+ return 0;
+
+ ret = avp_dev_disable_interrupts(eth_dev);
+ if (ret != 0) {
+ PMD_DRV_LOG(ERR, "Failed to disable interrupts, ret=%d\n", ret);
+ return ret;
+ }
+
+ if (eth_dev->data->mac_addrs != NULL) {
+ rte_free(eth_dev->data->mac_addrs);
+ eth_dev->data->mac_addrs = NULL;
+ }
+
+ return 0;
+}
+
+static int
+eth_avp_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
+ struct rte_pci_device *pci_dev)
+{
+ struct rte_eth_dev *eth_dev;
+ int ret;
+
+ eth_dev = rte_eth_dev_pci_allocate(pci_dev,
+ sizeof(struct avp_adapter));
+ if (eth_dev == NULL)
+ return -ENOMEM;
+
+ ret = eth_avp_dev_init(eth_dev);
+ if (ret)
+ rte_eth_dev_pci_release(eth_dev);
+
+ return ret;
+}
+
+static int
+eth_avp_pci_remove(struct rte_pci_device *pci_dev)
+{
+ return rte_eth_dev_pci_generic_remove(pci_dev,
+ eth_avp_dev_uninit);
+}
+
+static struct rte_pci_driver rte_avp_pmd = {
+ .id_table = pci_id_avp_map,
+ .drv_flags = RTE_PCI_DRV_NEED_MAPPING,
+ .probe = eth_avp_pci_probe,
+ .remove = eth_avp_pci_remove,
+};
+
+static int
+avp_dev_enable_scattered(struct rte_eth_dev *eth_dev,
+ struct avp_dev *avp)
+{
+ unsigned int max_rx_pkt_len;
+
+ max_rx_pkt_len = eth_dev->data->dev_conf.rxmode.max_rx_pkt_len;
+
+ if ((max_rx_pkt_len > avp->guest_mbuf_size) ||
+ (max_rx_pkt_len > avp->host_mbuf_size)) {
+ /*
+ * If the guest MTU is greater than either the host or guest
+ * buffers then chained mbufs have to be enabled in the TX
+ * direction. It is assumed that the application will not need
+ * to send packets larger than their max_rx_pkt_len (MRU).
+ */
+ return 1;
+ }
+
+ if ((avp->max_rx_pkt_len > avp->guest_mbuf_size) ||
+ (avp->max_rx_pkt_len > avp->host_mbuf_size)) {
+ /*
+ * If the host MRU is greater than its own mbuf size or the
+ * guest mbuf size then chained mbufs have to be enabled in the
+ * RX direction.
+ */
+ return 1;
+ }
+
+ return 0;
+}
+
+static int
+avp_dev_rx_queue_setup(struct rte_eth_dev *eth_dev,
+ uint16_t rx_queue_id,
+ uint16_t nb_rx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_rxconf *rx_conf,
+ struct rte_mempool *pool)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct rte_pktmbuf_pool_private *mbp_priv;
+ struct avp_queue *rxq;
+
+ if (rx_queue_id >= eth_dev->data->nb_rx_queues) {
+ PMD_DRV_LOG(ERR, "RX queue id is out of range: rx_queue_id=%u, nb_rx_queues=%u\n",
+ rx_queue_id, eth_dev->data->nb_rx_queues);
+ return -EINVAL;
+ }
+
+ /* Save mbuf pool pointer */
+ avp->pool = pool;
+
+ /* Save the local mbuf size */
+ mbp_priv = rte_mempool_get_priv(pool);
+ avp->guest_mbuf_size = (uint16_t)(mbp_priv->mbuf_data_room_size);
+ avp->guest_mbuf_size -= RTE_PKTMBUF_HEADROOM;
+
+ if (avp_dev_enable_scattered(eth_dev, avp)) {
+ if (!eth_dev->data->scattered_rx) {
+ PMD_DRV_LOG(NOTICE, "AVP device configured for chained mbufs\n");
+ eth_dev->data->scattered_rx = 1;
+ eth_dev->rx_pkt_burst = avp_recv_scattered_pkts;
+ eth_dev->tx_pkt_burst = avp_xmit_scattered_pkts;
+ }
+ }
+
+ PMD_DRV_LOG(DEBUG, "AVP max_rx_pkt_len=(%u,%u) mbuf_size=(%u,%u)\n",
+ avp->max_rx_pkt_len,
+ eth_dev->data->dev_conf.rxmode.max_rx_pkt_len,
+ avp->host_mbuf_size,
+ avp->guest_mbuf_size);
+
+ /* allocate a queue object */
+ rxq = rte_zmalloc_socket("ethdev RX queue", sizeof(struct avp_queue),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (rxq == NULL) {
+ PMD_DRV_LOG(ERR, "Failed to allocate new Rx queue object\n");
+ return -ENOMEM;
+ }
+
+ /* save back pointers to AVP and Ethernet devices */
+ rxq->avp = avp;
+ rxq->dev_data = eth_dev->data;
+ eth_dev->data->rx_queues[rx_queue_id] = (void *)rxq;
+
+ /* setup the queue receive mapping for the current queue. */
+ _avp_set_rx_queue_mappings(eth_dev, rx_queue_id);
+
+ PMD_DRV_LOG(DEBUG, "Rx queue %u setup at %p\n", rx_queue_id, rxq);
+
+ (void)nb_rx_desc;
+ (void)rx_conf;
+ return 0;
+}
+
+static int
+avp_dev_tx_queue_setup(struct rte_eth_dev *eth_dev,
+ uint16_t tx_queue_id,
+ uint16_t nb_tx_desc,
+ unsigned int socket_id,
+ const struct rte_eth_txconf *tx_conf)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct avp_queue *txq;
+
+ if (tx_queue_id >= eth_dev->data->nb_tx_queues) {
+ PMD_DRV_LOG(ERR, "TX queue id is out of range: tx_queue_id=%u, nb_tx_queues=%u\n",
+ tx_queue_id, eth_dev->data->nb_tx_queues);
+ return -EINVAL;
+ }
+
+ /* allocate a queue object */
+ txq = rte_zmalloc_socket("ethdev TX queue", sizeof(struct avp_queue),
+ RTE_CACHE_LINE_SIZE, socket_id);
+ if (txq == NULL) {
+ PMD_DRV_LOG(ERR, "Failed to allocate new Tx queue object\n");
+ return -ENOMEM;
+ }
+
+ /* only the configured set of transmit queues are used */
+ txq->queue_id = tx_queue_id;
+ txq->queue_base = tx_queue_id;
+ txq->queue_limit = tx_queue_id;
+
+ /* save back pointers to AVP and Ethernet devices */
+ txq->avp = avp;
+ txq->dev_data = eth_dev->data;
+ eth_dev->data->tx_queues[tx_queue_id] = (void *)txq;
+
+ PMD_DRV_LOG(DEBUG, "Tx queue %u setup at %p\n", tx_queue_id, txq);
+
+ (void)nb_tx_desc;
+ (void)tx_conf;
+ return 0;
+}
+
+static inline int
+_avp_cmp_ether_addr(struct ether_addr *a, struct ether_addr *b)
+{
+ uint16_t *_a = (uint16_t *)&a->addr_bytes[0];
+ uint16_t *_b = (uint16_t *)&b->addr_bytes[0];
+ return (_a[0] ^ _b[0]) | (_a[1] ^ _b[1]) | (_a[2] ^ _b[2]);
+}
+
+static inline int
+_avp_mac_filter(struct avp_dev *avp, struct rte_mbuf *m)
+{
+ struct ether_hdr *eth = rte_pktmbuf_mtod(m, struct ether_hdr *);
+
+ if (likely(_avp_cmp_ether_addr(&avp->ethaddr, &eth->d_addr) == 0)) {
+ /* allow all packets destined to our address */
+ return 0;
+ }
+
+ if (likely(is_broadcast_ether_addr(&eth->d_addr))) {
+ /* allow all broadcast packets */
+ return 0;
+ }
+
+ if (likely(is_multicast_ether_addr(&eth->d_addr))) {
+ /* allow all multicast packets */
+ return 0;
+ }
+
+ if (avp->flags & AVP_F_PROMISC) {
+ /* allow all packets when in promiscuous mode */
+ return 0;
+ }
+
+ return -1;
+}
+
+#ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
+static inline void
+__avp_dev_buffer_sanity_check(struct avp_dev *avp, struct rte_avp_desc *buf)
+{
+ struct rte_avp_desc *first_buf;
+ struct rte_avp_desc *pkt_buf;
+ unsigned int pkt_len;
+ unsigned int nb_segs;
+ void *pkt_data;
+ unsigned int i;
+
+ first_buf = avp_dev_translate_buffer(avp, buf);
+
+ i = 0;
+ pkt_len = 0;
+ nb_segs = first_buf->nb_segs;
+ do {
+ /* Adjust pointers for guest addressing */
+ pkt_buf = avp_dev_translate_buffer(avp, buf);
+ if (pkt_buf == NULL)
+ rte_panic("bad buffer: segment %u has an invalid address %p\n",
+ i, buf);
+ pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
+ if (pkt_data == NULL)
+ rte_panic("bad buffer: segment %u has a NULL data pointer\n",
+ i);
+ if (pkt_buf->data_len == 0)
+ rte_panic("bad buffer: segment %u has 0 data length\n",
+ i);
+ pkt_len += pkt_buf->data_len;
+ nb_segs--;
+ i++;
+
+ } while (nb_segs && (buf = pkt_buf->next) != NULL);
+
+ if (nb_segs != 0)
+ rte_panic("bad buffer: expected %u segments found %u\n",
+ first_buf->nb_segs, (first_buf->nb_segs - nb_segs));
+ if (pkt_len != first_buf->pkt_len)
+ rte_panic("bad buffer: expected length %u found %u\n",
+ first_buf->pkt_len, pkt_len);
+}
+
+#define avp_dev_buffer_sanity_check(a, b) \
+ __avp_dev_buffer_sanity_check((a), (b))
+
+#else /* RTE_LIBRTE_AVP_DEBUG_BUFFERS */
+
+#define avp_dev_buffer_sanity_check(a, b) do {} while (0)
+
+#endif
+
+/*
+ * Copy a host buffer chain to a set of mbufs. This function assumes that
+ * there exactly the required number of mbufs to copy all source bytes.
+ */
+static inline struct rte_mbuf *
+avp_dev_copy_from_buffers(struct avp_dev *avp,
+ struct rte_avp_desc *buf,
+ struct rte_mbuf **mbufs,
+ unsigned int count)
+{
+ struct rte_mbuf *m_previous = NULL;
+ struct rte_avp_desc *pkt_buf;
+ unsigned int total_length = 0;
+ unsigned int copy_length;
+ unsigned int src_offset;
+ struct rte_mbuf *m;
+ uint16_t ol_flags;
+ uint16_t vlan_tci;
+ void *pkt_data;
+ unsigned int i;
+
+ avp_dev_buffer_sanity_check(avp, buf);
+
+ /* setup the first source buffer */
+ pkt_buf = avp_dev_translate_buffer(avp, buf);
+ pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
+ total_length = pkt_buf->pkt_len;
+ src_offset = 0;
+
+ if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
+ ol_flags = PKT_RX_VLAN_PKT;
+ vlan_tci = pkt_buf->vlan_tci;
+ } else {
+ ol_flags = 0;
+ vlan_tci = 0;
+ }
+
+ for (i = 0; (i < count) && (buf != NULL); i++) {
+ /* fill each destination buffer */
+ m = mbufs[i];
+
+ if (m_previous != NULL)
+ m_previous->next = m;
+
+ m_previous = m;
+
+ do {
+ /*
+ * Copy as many source buffers as will fit in the
+ * destination buffer.
+ */
+ copy_length = RTE_MIN((avp->guest_mbuf_size -
+ rte_pktmbuf_data_len(m)),
+ (pkt_buf->data_len -
+ src_offset));
+ rte_memcpy(RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
+ rte_pktmbuf_data_len(m)),
+ RTE_PTR_ADD(pkt_data, src_offset),
+ copy_length);
+ rte_pktmbuf_data_len(m) += copy_length;
+ src_offset += copy_length;
+
+ if (likely(src_offset == pkt_buf->data_len)) {
+ /* need a new source buffer */
+ buf = pkt_buf->next;
+ if (buf != NULL) {
+ pkt_buf = avp_dev_translate_buffer(
+ avp, buf);
+ pkt_data = avp_dev_translate_buffer(
+ avp, pkt_buf->data);
+ src_offset = 0;
+ }
+ }
+
+ if (unlikely(rte_pktmbuf_data_len(m) ==
+ avp->guest_mbuf_size)) {
+ /* need a new destination mbuf */
+ break;
+ }
+
+ } while (buf != NULL);
+ }
+
+ m = mbufs[0];
+ m->ol_flags = ol_flags;
+ m->nb_segs = count;
+ rte_pktmbuf_pkt_len(m) = total_length;
+ m->vlan_tci = vlan_tci;
+
+ __rte_mbuf_sanity_check(m, 1);
+
+ return m;
+}
+
+static uint16_t
+avp_recv_scattered_pkts(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct avp_queue *rxq = (struct avp_queue *)rx_queue;
+ struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
+ struct rte_mbuf *mbufs[RTE_AVP_MAX_MBUF_SEGMENTS];
+ struct avp_dev *avp = rxq->avp;
+ struct rte_avp_desc *pkt_buf;
+ struct rte_avp_fifo *free_q;
+ struct rte_avp_fifo *rx_q;
+ struct rte_avp_desc *buf;
+ unsigned int count, avail, n;
+ unsigned int guest_mbuf_size;
+ struct rte_mbuf *m;
+ unsigned int required;
+ unsigned int buf_len;
+ unsigned int port_id;
+ unsigned int i;
+
+ if (unlikely(avp->flags & AVP_F_DETACHED)) {
+ /* VM live migration in progress */
+ return 0;
+ }
+
+ guest_mbuf_size = avp->guest_mbuf_size;
+ port_id = avp->port_id;
+ rx_q = avp->rx_q[rxq->queue_id];
+ free_q = avp->free_q[rxq->queue_id];
+
+ /* setup next queue to service */
+ rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
+ (rxq->queue_id + 1) : rxq->queue_base;
+
+ /* determine how many slots are available in the free queue */
+ count = avp_fifo_free_count(free_q);
+
+ /* determine how many packets are available in the rx queue */
+ avail = avp_fifo_count(rx_q);
+
+ /* determine how many packets can be received */
+ count = RTE_MIN(count, avail);
+ count = RTE_MIN(count, nb_pkts);
+ count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
+
+ if (unlikely(count == 0)) {
+ /* no free buffers, or no buffers on the rx queue */
+ return 0;
+ }
+
+ /* retrieve pending packets */
+ n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
+ PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
+ count, rx_q);
+
+ count = 0;
+ for (i = 0; i < n; i++) {
+ /* prefetch next entry while processing current one */
+ if (i + 1 < n) {
+ pkt_buf = avp_dev_translate_buffer(avp,
+ avp_bufs[i + 1]);
+ rte_prefetch0(pkt_buf);
+ }
+ buf = avp_bufs[i];
+
+ /* Peek into the first buffer to determine the total length */
+ pkt_buf = avp_dev_translate_buffer(avp, buf);
+ buf_len = pkt_buf->pkt_len;
+
+ /* Allocate enough mbufs to receive the entire packet */
+ required = (buf_len + guest_mbuf_size - 1) / guest_mbuf_size;
+ if (rte_pktmbuf_alloc_bulk(avp->pool, mbufs, required)) {
+ rxq->dev_data->rx_mbuf_alloc_failed++;
+ continue;
+ }
+
+ /* Copy the data from the buffers to our mbufs */
+ m = avp_dev_copy_from_buffers(avp, buf, mbufs, required);
+
+ /* finalize mbuf */
+ m->port = port_id;
+
+ if (_avp_mac_filter(avp, m) != 0) {
+ /* silently discard packets not destined to our MAC */
+ rte_pktmbuf_free(m);
+ continue;
+ }
+
+ /* return new mbuf to caller */
+ rx_pkts[count++] = m;
+ rxq->bytes += buf_len;
+ }
+
+ rxq->packets += count;
+
+ /* return the buffers to the free queue */
+ avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
+
+ return count;
+}
+
+
+static uint16_t
+avp_recv_pkts(void *rx_queue,
+ struct rte_mbuf **rx_pkts,
+ uint16_t nb_pkts)
+{
+ struct avp_queue *rxq = (struct avp_queue *)rx_queue;
+ struct rte_avp_desc *avp_bufs[AVP_MAX_RX_BURST];
+ struct avp_dev *avp = rxq->avp;
+ struct rte_avp_desc *pkt_buf;
+ struct rte_avp_fifo *free_q;
+ struct rte_avp_fifo *rx_q;
+ unsigned int count, avail, n;
+ unsigned int pkt_len;
+ struct rte_mbuf *m;
+ char *pkt_data;
+ unsigned int i;
+
+ if (unlikely(avp->flags & AVP_F_DETACHED)) {
+ /* VM live migration in progress */
+ return 0;
+ }
+
+ rx_q = avp->rx_q[rxq->queue_id];
+ free_q = avp->free_q[rxq->queue_id];
+
+ /* setup next queue to service */
+ rxq->queue_id = (rxq->queue_id < rxq->queue_limit) ?
+ (rxq->queue_id + 1) : rxq->queue_base;
+
+ /* determine how many slots are available in the free queue */
+ count = avp_fifo_free_count(free_q);
+
+ /* determine how many packets are available in the rx queue */
+ avail = avp_fifo_count(rx_q);
+
+ /* determine how many packets can be received */
+ count = RTE_MIN(count, avail);
+ count = RTE_MIN(count, nb_pkts);
+ count = RTE_MIN(count, (unsigned int)AVP_MAX_RX_BURST);
+
+ if (unlikely(count == 0)) {
+ /* no free buffers, or no buffers on the rx queue */
+ return 0;
+ }
+
+ /* retrieve pending packets */
+ n = avp_fifo_get(rx_q, (void **)&avp_bufs, count);
+ PMD_RX_LOG(DEBUG, "Receiving %u packets from Rx queue at %p\n",
+ count, rx_q);
+
+ count = 0;
+ for (i = 0; i < n; i++) {
+ /* prefetch next entry while processing current one */
+ if (i < n - 1) {
+ pkt_buf = avp_dev_translate_buffer(avp,
+ avp_bufs[i + 1]);
+ rte_prefetch0(pkt_buf);
+ }
+
+ /* Adjust host pointers for guest addressing */
+ pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
+ pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
+ pkt_len = pkt_buf->pkt_len;
+
+ if (unlikely((pkt_len > avp->guest_mbuf_size) ||
+ (pkt_buf->nb_segs > 1))) {
+ /*
+ * application should be using the scattered receive
+ * function
+ */
+ rxq->errors++;
+ continue;
+ }
+
+ /* process each packet to be transmitted */
+ m = rte_pktmbuf_alloc(avp->pool);
+ if (unlikely(m == NULL)) {
+ rxq->dev_data->rx_mbuf_alloc_failed++;
+ continue;
+ }
+
+ /* copy data out of the host buffer to our buffer */
+ m->data_off = RTE_PKTMBUF_HEADROOM;
+ rte_memcpy(rte_pktmbuf_mtod(m, void *), pkt_data, pkt_len);
+
+ /* initialize the local mbuf */
+ rte_pktmbuf_data_len(m) = pkt_len;
+ rte_pktmbuf_pkt_len(m) = pkt_len;
+ m->port = avp->port_id;
+
+ if (pkt_buf->ol_flags & RTE_AVP_RX_VLAN_PKT) {
+ m->ol_flags = PKT_RX_VLAN_PKT;
+ m->vlan_tci = pkt_buf->vlan_tci;
+ }
+
+ if (_avp_mac_filter(avp, m) != 0) {
+ /* silently discard packets not destined to our MAC */
+ rte_pktmbuf_free(m);
+ continue;
+ }
+
+ /* return new mbuf to caller */
+ rx_pkts[count++] = m;
+ rxq->bytes += pkt_len;
+ }
+
+ rxq->packets += count;
+
+ /* return the buffers to the free queue */
+ avp_fifo_put(free_q, (void **)&avp_bufs[0], n);
+
+ return count;
+}
+
+/*
+ * Copy a chained mbuf to a set of host buffers. This function assumes that
+ * there are sufficient destination buffers to contain the entire source
+ * packet.
+ */
+static inline uint16_t
+avp_dev_copy_to_buffers(struct avp_dev *avp,
+ struct rte_mbuf *mbuf,
+ struct rte_avp_desc **buffers,
+ unsigned int count)
+{
+ struct rte_avp_desc *previous_buf = NULL;
+ struct rte_avp_desc *first_buf = NULL;
+ struct rte_avp_desc *pkt_buf;
+ struct rte_avp_desc *buf;
+ size_t total_length;
+ struct rte_mbuf *m;
+ size_t copy_length;
+ size_t src_offset;
+ char *pkt_data;
+ unsigned int i;
+
+ __rte_mbuf_sanity_check(mbuf, 1);
+
+ m = mbuf;
+ src_offset = 0;
+ total_length = rte_pktmbuf_pkt_len(m);
+ for (i = 0; (i < count) && (m != NULL); i++) {
+ /* fill each destination buffer */
+ buf = buffers[i];
+
+ if (i < count - 1) {
+ /* prefetch next entry while processing this one */
+ pkt_buf = avp_dev_translate_buffer(avp, buffers[i + 1]);
+ rte_prefetch0(pkt_buf);
+ }
+
+ /* Adjust pointers for guest addressing */
+ pkt_buf = avp_dev_translate_buffer(avp, buf);
+ pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
+
+ /* setup the buffer chain */
+ if (previous_buf != NULL)
+ previous_buf->next = buf;
+ else
+ first_buf = pkt_buf;
+
+ previous_buf = pkt_buf;
+
+ do {
+ /*
+ * copy as many source mbuf segments as will fit in the
+ * destination buffer.
+ */
+ copy_length = RTE_MIN((avp->host_mbuf_size -
+ pkt_buf->data_len),
+ (rte_pktmbuf_data_len(m) -
+ src_offset));
+ rte_memcpy(RTE_PTR_ADD(pkt_data, pkt_buf->data_len),
+ RTE_PTR_ADD(rte_pktmbuf_mtod(m, void *),
+ src_offset),
+ copy_length);
+ pkt_buf->data_len += copy_length;
+ src_offset += copy_length;
+
+ if (likely(src_offset == rte_pktmbuf_data_len(m))) {
+ /* need a new source buffer */
+ m = m->next;
+ src_offset = 0;
+ }
+
+ if (unlikely(pkt_buf->data_len ==
+ avp->host_mbuf_size)) {
+ /* need a new destination buffer */
+ break;
+ }
+
+ } while (m != NULL);
+ }
+
+ first_buf->nb_segs = count;
+ first_buf->pkt_len = total_length;
+
+ if (mbuf->ol_flags & PKT_TX_VLAN_PKT) {
+ first_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
+ first_buf->vlan_tci = mbuf->vlan_tci;
+ }
+
+ avp_dev_buffer_sanity_check(avp, buffers[0]);
+
+ return total_length;
+}
+
+
+static uint16_t
+avp_xmit_scattered_pkts(void *tx_queue,
+ struct rte_mbuf **tx_pkts,
+ uint16_t nb_pkts)
+{
+ struct rte_avp_desc *avp_bufs[(AVP_MAX_TX_BURST *
+ RTE_AVP_MAX_MBUF_SEGMENTS)];
+ struct avp_queue *txq = (struct avp_queue *)tx_queue;
+ struct rte_avp_desc *tx_bufs[AVP_MAX_TX_BURST];
+ struct avp_dev *avp = txq->avp;
+ struct rte_avp_fifo *alloc_q;
+ struct rte_avp_fifo *tx_q;
+ unsigned int count, avail, n;
+ unsigned int orig_nb_pkts;
+ struct rte_mbuf *m;
+ unsigned int required;
+ unsigned int segments;
+ unsigned int tx_bytes;
+ unsigned int i;
+
+ orig_nb_pkts = nb_pkts;
+ if (unlikely(avp->flags & AVP_F_DETACHED)) {
+ /* VM live migration in progress */
+ /* TODO ... buffer for X packets then drop? */
+ txq->errors += nb_pkts;
+ return 0;
+ }
+
+ tx_q = avp->tx_q[txq->queue_id];
+ alloc_q = avp->alloc_q[txq->queue_id];
+
+ /* limit the number of transmitted packets to the max burst size */
+ if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
+ nb_pkts = AVP_MAX_TX_BURST;
+
+ /* determine how many buffers are available to copy into */
+ avail = avp_fifo_count(alloc_q);
+ if (unlikely(avail > (AVP_MAX_TX_BURST *
+ RTE_AVP_MAX_MBUF_SEGMENTS)))
+ avail = AVP_MAX_TX_BURST * RTE_AVP_MAX_MBUF_SEGMENTS;
+
+ /* determine how many slots are available in the transmit queue */
+ count = avp_fifo_free_count(tx_q);
+
+ /* determine how many packets can be sent */
+ nb_pkts = RTE_MIN(count, nb_pkts);
+
+ /* determine how many packets will fit in the available buffers */
+ count = 0;
+ segments = 0;
+ for (i = 0; i < nb_pkts; i++) {
+ m = tx_pkts[i];
+ if (likely(i < (unsigned int)nb_pkts - 1)) {
+ /* prefetch next entry while processing this one */
+ rte_prefetch0(tx_pkts[i + 1]);
+ }
+ required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
+ avp->host_mbuf_size;
+
+ if (unlikely((required == 0) ||
+ (required > RTE_AVP_MAX_MBUF_SEGMENTS)))
+ break;
+ else if (unlikely(required + segments > avail))
+ break;
+ segments += required;
+ count++;
+ }
+ nb_pkts = count;
+
+ if (unlikely(nb_pkts == 0)) {
+ /* no available buffers, or no space on the tx queue */
+ txq->errors += orig_nb_pkts;
+ return 0;
+ }
+
+ PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
+ nb_pkts, tx_q);
+
+ /* retrieve sufficient send buffers */
+ n = avp_fifo_get(alloc_q, (void **)&avp_bufs, segments);
+ if (unlikely(n != segments)) {
+ PMD_TX_LOG(DEBUG, "Failed to allocate buffers "
+ "n=%u, segments=%u, orig=%u\n",
+ n, segments, orig_nb_pkts);
+ txq->errors += orig_nb_pkts;
+ return 0;
+ }
+
+ tx_bytes = 0;
+ count = 0;
+ for (i = 0; i < nb_pkts; i++) {
+ /* process each packet to be transmitted */
+ m = tx_pkts[i];
+
+ /* determine how many buffers are required for this packet */
+ required = (rte_pktmbuf_pkt_len(m) + avp->host_mbuf_size - 1) /
+ avp->host_mbuf_size;
+
+ tx_bytes += avp_dev_copy_to_buffers(avp, m,
+ &avp_bufs[count], required);
+ tx_bufs[i] = avp_bufs[count];
+ count += required;
+
+ /* free the original mbuf */
+ rte_pktmbuf_free(m);
+ }
+
+ txq->packets += nb_pkts;
+ txq->bytes += tx_bytes;
+
+#ifdef RTE_LIBRTE_AVP_DEBUG_BUFFERS
+ for (i = 0; i < nb_pkts; i++)
+ avp_dev_buffer_sanity_check(avp, tx_bufs[i]);
+#endif
+
+ /* send the packets */
+ n = avp_fifo_put(tx_q, (void **)&tx_bufs[0], nb_pkts);
+ if (unlikely(n != orig_nb_pkts))
+ txq->errors += (orig_nb_pkts - n);
+
+ return n;
+}
+
+
+static uint16_t
+avp_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
+{
+ struct avp_queue *txq = (struct avp_queue *)tx_queue;
+ struct rte_avp_desc *avp_bufs[AVP_MAX_TX_BURST];
+ struct avp_dev *avp = txq->avp;
+ struct rte_avp_desc *pkt_buf;
+ struct rte_avp_fifo *alloc_q;
+ struct rte_avp_fifo *tx_q;
+ unsigned int count, avail, n;
+ struct rte_mbuf *m;
+ unsigned int pkt_len;
+ unsigned int tx_bytes;
+ char *pkt_data;
+ unsigned int i;
+
+ if (unlikely(avp->flags & AVP_F_DETACHED)) {
+ /* VM live migration in progress */
+ /* TODO ... buffer for X packets then drop?! */
+ txq->errors++;
+ return 0;
+ }
+
+ tx_q = avp->tx_q[txq->queue_id];
+ alloc_q = avp->alloc_q[txq->queue_id];
+
+ /* limit the number of transmitted packets to the max burst size */
+ if (unlikely(nb_pkts > AVP_MAX_TX_BURST))
+ nb_pkts = AVP_MAX_TX_BURST;
+
+ /* determine how many buffers are available to copy into */
+ avail = avp_fifo_count(alloc_q);
+
+ /* determine how many slots are available in the transmit queue */
+ count = avp_fifo_free_count(tx_q);
+
+ /* determine how many packets can be sent */
+ count = RTE_MIN(count, avail);
+ count = RTE_MIN(count, nb_pkts);
+
+ if (unlikely(count == 0)) {
+ /* no available buffers, or no space on the tx queue */
+ txq->errors += nb_pkts;
+ return 0;
+ }
+
+ PMD_TX_LOG(DEBUG, "Sending %u packets on Tx queue at %p\n",
+ count, tx_q);
+
+ /* retrieve sufficient send buffers */
+ n = avp_fifo_get(alloc_q, (void **)&avp_bufs, count);
+ if (unlikely(n != count)) {
+ txq->errors++;
+ return 0;
+ }
+
+ tx_bytes = 0;
+ for (i = 0; i < count; i++) {
+ /* prefetch next entry while processing the current one */
+ if (i < count - 1) {
+ pkt_buf = avp_dev_translate_buffer(avp,
+ avp_bufs[i + 1]);
+ rte_prefetch0(pkt_buf);
+ }
+
+ /* process each packet to be transmitted */
+ m = tx_pkts[i];
+
+ /* Adjust pointers for guest addressing */
+ pkt_buf = avp_dev_translate_buffer(avp, avp_bufs[i]);
+ pkt_data = avp_dev_translate_buffer(avp, pkt_buf->data);
+ pkt_len = rte_pktmbuf_pkt_len(m);
+
+ if (unlikely((pkt_len > avp->guest_mbuf_size) ||
+ (pkt_len > avp->host_mbuf_size))) {
+ /*
+ * application should be using the scattered transmit
+ * function; send it truncated to avoid the performance
+ * hit of having to manage returning the already
+ * allocated buffer to the free list. This should not
+ * happen since the application should have set the
+ * max_rx_pkt_len based on its MTU and it should be
+ * policing its own packet sizes.
+ */
+ txq->errors++;
+ pkt_len = RTE_MIN(avp->guest_mbuf_size,
+ avp->host_mbuf_size);
+ }
+
+ /* copy data out of our mbuf and into the AVP buffer */
+ rte_memcpy(pkt_data, rte_pktmbuf_mtod(m, void *), pkt_len);
+ pkt_buf->pkt_len = pkt_len;
+ pkt_buf->data_len = pkt_len;
+ pkt_buf->nb_segs = 1;
+ pkt_buf->next = NULL;
+
+ if (m->ol_flags & PKT_TX_VLAN_PKT) {
+ pkt_buf->ol_flags |= RTE_AVP_TX_VLAN_PKT;
+ pkt_buf->vlan_tci = m->vlan_tci;
+ }
+
+ tx_bytes += pkt_len;
+
+ /* free the original mbuf */
+ rte_pktmbuf_free(m);
+ }
+
+ txq->packets += count;
+ txq->bytes += tx_bytes;
+
+ /* send the packets */
+ n = avp_fifo_put(tx_q, (void **)&avp_bufs[0], count);
+
+ return n;
+}
+
+static void
+avp_dev_rx_queue_release(void *rx_queue)
+{
+ struct avp_queue *rxq = (struct avp_queue *)rx_queue;
+ struct avp_dev *avp = rxq->avp;
+ struct rte_eth_dev_data *data = avp->dev_data;
+ unsigned int i;
+
+ for (i = 0; i < avp->num_rx_queues; i++) {
+ if (data->rx_queues[i] == rxq)
+ data->rx_queues[i] = NULL;
+ }
+}
+
+static void
+avp_dev_tx_queue_release(void *tx_queue)
+{
+ struct avp_queue *txq = (struct avp_queue *)tx_queue;
+ struct avp_dev *avp = txq->avp;
+ struct rte_eth_dev_data *data = avp->dev_data;
+ unsigned int i;
+
+ for (i = 0; i < avp->num_tx_queues; i++) {
+ if (data->tx_queues[i] == txq)
+ data->tx_queues[i] = NULL;
+ }
+}
+
+static int
+avp_dev_configure(struct rte_eth_dev *eth_dev)
+{
+ struct rte_pci_device *pci_dev = AVP_DEV_TO_PCI(eth_dev);
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct rte_avp_device_info *host_info;
+ struct rte_avp_device_config config;
+ int mask = 0;
+ void *addr;
+ int ret;
+
+ rte_spinlock_lock(&avp->lock);
+ if (avp->flags & AVP_F_DETACHED) {
+ PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
+ ret = -ENOTSUP;
+ goto unlock;
+ }
+
+ addr = pci_dev->mem_resource[RTE_AVP_PCI_DEVICE_BAR].addr;
+ host_info = (struct rte_avp_device_info *)addr;
+
+ /* Setup required number of queues */
+ _avp_set_queue_counts(eth_dev);
+
+ mask = (ETH_VLAN_STRIP_MASK |
+ ETH_VLAN_FILTER_MASK |
+ ETH_VLAN_EXTEND_MASK);
+ avp_vlan_offload_set(eth_dev, mask);
+
+ /* update device config */
+ memset(&config, 0, sizeof(config));
+ config.device_id = host_info->device_id;
+ config.driver_type = RTE_AVP_DRIVER_TYPE_DPDK;
+ config.driver_version = AVP_DPDK_DRIVER_VERSION;
+ config.features = avp->features;
+ config.num_tx_queues = avp->num_tx_queues;
+ config.num_rx_queues = avp->num_rx_queues;
+
+ ret = avp_dev_ctrl_set_config(eth_dev, &config);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Config request failed by host, ret=%d\n",
+ ret);
+ goto unlock;
+ }
+
+ avp->flags |= AVP_F_CONFIGURED;
+ ret = 0;
+
+unlock:
+ rte_spinlock_unlock(&avp->lock);
+ return ret;
+}
+
+static int
+avp_dev_start(struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ int ret;
+
+ rte_spinlock_lock(&avp->lock);
+ if (avp->flags & AVP_F_DETACHED) {
+ PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
+ ret = -ENOTSUP;
+ goto unlock;
+ }
+
+ /* disable features that we do not support */
+ eth_dev->data->dev_conf.rxmode.hw_ip_checksum = 0;
+ eth_dev->data->dev_conf.rxmode.hw_vlan_filter = 0;
+ eth_dev->data->dev_conf.rxmode.hw_vlan_extend = 0;
+ eth_dev->data->dev_conf.rxmode.hw_strip_crc = 0;
+
+ /* update link state */
+ ret = avp_dev_ctrl_set_link_state(eth_dev, 1);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
+ ret);
+ goto unlock;
+ }
+
+ /* remember current link state */
+ avp->flags |= AVP_F_LINKUP;
+
+ ret = 0;
+
+unlock:
+ rte_spinlock_unlock(&avp->lock);
+ return ret;
+}
+
+static void
+avp_dev_stop(struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ int ret;
+
+ rte_spinlock_lock(&avp->lock);
+ if (avp->flags & AVP_F_DETACHED) {
+ PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
+ goto unlock;
+ }
+
+ /* remember current link state */
+ avp->flags &= ~AVP_F_LINKUP;
+
+ /* update link state */
+ ret = avp_dev_ctrl_set_link_state(eth_dev, 0);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Link state change failed by host, ret=%d\n",
+ ret);
+ }
+
+unlock:
+ rte_spinlock_unlock(&avp->lock);
+}
+
+static void
+avp_dev_close(struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ int ret;
+
+ rte_spinlock_lock(&avp->lock);
+ if (avp->flags & AVP_F_DETACHED) {
+ PMD_DRV_LOG(ERR, "Operation not supported during VM live migration\n");
+ goto unlock;
+ }
+
+ /* remember current link state */
+ avp->flags &= ~AVP_F_LINKUP;
+ avp->flags &= ~AVP_F_CONFIGURED;
+
+ ret = avp_dev_disable_interrupts(eth_dev);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Failed to disable interrupts\n");
+ /* continue */
+ }
+
+ /* update device state */
+ ret = avp_dev_ctrl_shutdown(eth_dev);
+ if (ret < 0) {
+ PMD_DRV_LOG(ERR, "Device shutdown failed by host, ret=%d\n",
+ ret);
+ /* continue */
+ }
+
+unlock:
+ rte_spinlock_unlock(&avp->lock);
+}
+
+static int
+avp_dev_link_update(struct rte_eth_dev *eth_dev,
+ __rte_unused int wait_to_complete)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ struct rte_eth_link *link = &eth_dev->data->dev_link;
+
+ link->link_speed = ETH_SPEED_NUM_10G;
+ link->link_duplex = ETH_LINK_FULL_DUPLEX;
+ link->link_status = !!(avp->flags & AVP_F_LINKUP);
+
+ return -1;
+}
+
+static void
+avp_dev_promiscuous_enable(struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+
+ rte_spinlock_lock(&avp->lock);
+ if ((avp->flags & AVP_F_PROMISC) == 0) {
+ avp->flags |= AVP_F_PROMISC;
+ PMD_DRV_LOG(DEBUG, "Promiscuous mode enabled on %u\n",
+ eth_dev->data->port_id);
+ }
+ rte_spinlock_unlock(&avp->lock);
+}
+
+static void
+avp_dev_promiscuous_disable(struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+
+ rte_spinlock_lock(&avp->lock);
+ if ((avp->flags & AVP_F_PROMISC) != 0) {
+ avp->flags &= ~AVP_F_PROMISC;
+ PMD_DRV_LOG(DEBUG, "Promiscuous mode disabled on %u\n",
+ eth_dev->data->port_id);
+ }
+ rte_spinlock_unlock(&avp->lock);
+}
+
+static void
+avp_dev_info_get(struct rte_eth_dev *eth_dev,
+ struct rte_eth_dev_info *dev_info)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+
+ dev_info->driver_name = "rte_avp_pmd";
+ dev_info->pci_dev = RTE_DEV_TO_PCI(eth_dev->device);
+ dev_info->max_rx_queues = avp->max_rx_queues;
+ dev_info->max_tx_queues = avp->max_tx_queues;
+ dev_info->min_rx_bufsize = AVP_MIN_RX_BUFSIZE;
+ dev_info->max_rx_pktlen = avp->max_rx_pkt_len;
+ dev_info->max_mac_addrs = AVP_MAX_MAC_ADDRS;
+ if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
+ dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP;
+ dev_info->tx_offload_capa = DEV_TX_OFFLOAD_VLAN_INSERT;
+ }
+}
+
+static void
+avp_vlan_offload_set(struct rte_eth_dev *eth_dev, int mask)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+
+ if (mask & ETH_VLAN_STRIP_MASK) {
+ if (avp->host_features & RTE_AVP_FEATURE_VLAN_OFFLOAD) {
+ if (eth_dev->data->dev_conf.rxmode.hw_vlan_strip)
+ avp->features |= RTE_AVP_FEATURE_VLAN_OFFLOAD;
+ else
+ avp->features &= ~RTE_AVP_FEATURE_VLAN_OFFLOAD;
+ } else {
+ PMD_DRV_LOG(ERR, "VLAN strip offload not supported\n");
+ }
+ }
+
+ if (mask & ETH_VLAN_FILTER_MASK) {
+ if (eth_dev->data->dev_conf.rxmode.hw_vlan_filter)
+ PMD_DRV_LOG(ERR, "VLAN filter offload not supported\n");
+ }
+
+ if (mask & ETH_VLAN_EXTEND_MASK) {
+ if (eth_dev->data->dev_conf.rxmode.hw_vlan_extend)
+ PMD_DRV_LOG(ERR, "VLAN extend offload not supported\n");
+ }
+}
+
+static void
+avp_dev_stats_get(struct rte_eth_dev *eth_dev, struct rte_eth_stats *stats)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ unsigned int i;
+
+ for (i = 0; i < avp->num_rx_queues; i++) {
+ struct avp_queue *rxq = avp->dev_data->rx_queues[i];
+
+ if (rxq) {
+ stats->ipackets += rxq->packets;
+ stats->ibytes += rxq->bytes;
+ stats->ierrors += rxq->errors;
+
+ stats->q_ipackets[i] += rxq->packets;
+ stats->q_ibytes[i] += rxq->bytes;
+ stats->q_errors[i] += rxq->errors;
+ }
+ }
+
+ for (i = 0; i < avp->num_tx_queues; i++) {
+ struct avp_queue *txq = avp->dev_data->tx_queues[i];
+
+ if (txq) {
+ stats->opackets += txq->packets;
+ stats->obytes += txq->bytes;
+ stats->oerrors += txq->errors;
+
+ stats->q_opackets[i] += txq->packets;
+ stats->q_obytes[i] += txq->bytes;
+ stats->q_errors[i] += txq->errors;
+ }
+ }
+}
+
+static void
+avp_dev_stats_reset(struct rte_eth_dev *eth_dev)
+{
+ struct avp_dev *avp = AVP_DEV_PRIVATE_TO_HW(eth_dev->data->dev_private);
+ unsigned int i;
+
+ for (i = 0; i < avp->num_rx_queues; i++) {
+ struct avp_queue *rxq = avp->dev_data->rx_queues[i];
+
+ if (rxq) {
+ rxq->bytes = 0;
+ rxq->packets = 0;
+ rxq->errors = 0;
+ }
+ }
+
+ for (i = 0; i < avp->num_tx_queues; i++) {
+ struct avp_queue *txq = avp->dev_data->tx_queues[i];
+
+ if (txq) {
+ txq->bytes = 0;
+ txq->packets = 0;
+ txq->errors = 0;
+ }
+ }
+}
+
+RTE_PMD_REGISTER_PCI(net_avp, rte_avp_pmd);
+RTE_PMD_REGISTER_PCI_TABLE(net_avp, pci_id_avp_map);
diff --git a/drivers/net/avp/avp_logs.h b/drivers/net/avp/avp_logs.h
new file mode 100644
index 00000000..252cab7d
--- /dev/null
+++ b/drivers/net/avp/avp_logs.h
@@ -0,0 +1,59 @@
+/*
+ * BSD LICENSE
+ *
+ * Copyright (c) 2013-2015, Wind River Systems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * 1) Redistributions of source code must retain the above copyright notice,
+ * this list of conditions and the following disclaimer.
+ *
+ * 2) Redistributions in binary form must reproduce the above copyright notice,
+ * this list of conditions and the following disclaimer in the documentation
+ * and/or other materials provided with the distribution.
+ *
+ * 3) Neither the name of Wind River Systems nor the names of its contributors
+ * may be used to endorse or promote products derived from this software
+ * without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef _AVP_LOGS_H_
+#define _AVP_LOGS_H_
+
+#include <rte_log.h>
+
+#ifdef RTE_LIBRTE_AVP_DEBUG_RX
+#define PMD_RX_LOG(level, fmt, args...) \
+ RTE_LOG(level, PMD, "%s() rx: " fmt, __func__, ## args)
+#else
+#define PMD_RX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_AVP_DEBUG_TX
+#define PMD_TX_LOG(level, fmt, args...) \
+ RTE_LOG(level, PMD, "%s() tx: " fmt, __func__, ## args)
+#else
+#define PMD_TX_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#ifdef RTE_LIBRTE_AVP_DEBUG_DRIVER
+#define PMD_DRV_LOG(level, fmt, args...) \
+ RTE_LOG(level, PMD, "%s(): " fmt, __func__, ## args)
+#else
+#define PMD_DRV_LOG(level, fmt, args...) do { } while (0)
+#endif
+
+#endif /* _AVP_LOGS_H_ */
diff --git a/drivers/net/avp/rte_avp_common.h b/drivers/net/avp/rte_avp_common.h
new file mode 100644
index 00000000..488d7216
--- /dev/null
+++ b/drivers/net/avp/rte_avp_common.h
@@ -0,0 +1,432 @@
+/*-
+ * This file is provided under a dual BSD/LGPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * Copyright(c) 2014-2017 Wind River Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * Contact Information:
+ * Wind River Systems, Inc.
+ *
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * Copyright(c) 2014-2017 Wind River Systems, Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_AVP_COMMON_H_
+#define _RTE_AVP_COMMON_H_
+
+#ifdef __KERNEL__
+#include <linux/if.h>
+#define RTE_STD_C11
+#else
+#include <stdint.h>
+#include <rte_common.h>
+#include <rte_memory.h>
+#include <rte_ether.h>
+#include <rte_atomic.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * AVP name is part of network device name.
+ */
+#define RTE_AVP_NAMESIZE 32
+
+/**
+ * AVP alias is a user-defined value used for lookups from secondary
+ * processes. Typically, this is a UUID.
+ */
+#define RTE_AVP_ALIASSIZE 128
+
+/*
+ * Request id.
+ */
+enum rte_avp_req_id {
+ RTE_AVP_REQ_UNKNOWN = 0,
+ RTE_AVP_REQ_CHANGE_MTU,
+ RTE_AVP_REQ_CFG_NETWORK_IF,
+ RTE_AVP_REQ_CFG_DEVICE,
+ RTE_AVP_REQ_SHUTDOWN_DEVICE,
+ RTE_AVP_REQ_MAX,
+};
+
+/**@{ AVP device driver types */
+#define RTE_AVP_DRIVER_TYPE_UNKNOWN 0
+#define RTE_AVP_DRIVER_TYPE_DPDK 1
+#define RTE_AVP_DRIVER_TYPE_KERNEL 2
+#define RTE_AVP_DRIVER_TYPE_QEMU 3
+/**@} */
+
+/**@{ AVP device operational modes */
+#define RTE_AVP_MODE_HOST 0 /**< AVP interface created in host */
+#define RTE_AVP_MODE_GUEST 1 /**< AVP interface created for export to guest */
+#define RTE_AVP_MODE_TRACE 2 /**< AVP interface created for packet tracing */
+/**@} */
+
+/*
+ * Structure for AVP queue configuration query request/result
+ */
+struct rte_avp_device_config {
+ uint64_t device_id; /**< Unique system identifier */
+ uint32_t driver_type; /**< Device Driver type */
+ uint32_t driver_version; /**< Device Driver version */
+ uint32_t features; /**< Negotiated features */
+ uint16_t num_tx_queues; /**< Number of active transmit queues */
+ uint16_t num_rx_queues; /**< Number of active receive queues */
+ uint8_t if_up; /**< 1: interface up, 0: interface down */
+} __attribute__ ((__packed__));
+
+/*
+ * Structure for AVP request.
+ */
+struct rte_avp_request {
+ uint32_t req_id; /**< Request id */
+ RTE_STD_C11
+ union {
+ uint32_t new_mtu; /**< New MTU */
+ uint8_t if_up; /**< 1: interface up, 0: interface down */
+ struct rte_avp_device_config config; /**< Queue configuration */
+ };
+ int32_t result; /**< Result for processing request */
+} __attribute__ ((__packed__));
+
+/*
+ * FIFO struct mapped in a shared memory. It describes a circular buffer FIFO
+ * Write and read should wrap around. FIFO is empty when write == read
+ * Writing should never overwrite the read position
+ */
+struct rte_avp_fifo {
+ volatile unsigned int write; /**< Next position to be written*/
+ volatile unsigned int read; /**< Next position to be read */
+ unsigned int len; /**< Circular buffer length */
+ unsigned int elem_size; /**< Pointer size - for 32/64 bit OS */
+ void *volatile buffer[]; /**< The buffer contains mbuf pointers */
+};
+
+
+/*
+ * AVP packet buffer header used to define the exchange of packet data.
+ */
+struct rte_avp_desc {
+ uint64_t pad0;
+ void *pkt_mbuf; /**< Reference to packet mbuf */
+ uint8_t pad1[14];
+ uint16_t ol_flags; /**< Offload features. */
+ void *next; /**< Reference to next buffer in chain */
+ void *data; /**< Start address of data in segment buffer. */
+ uint16_t data_len; /**< Amount of data in segment buffer. */
+ uint8_t nb_segs; /**< Number of segments */
+ uint8_t pad2;
+ uint16_t pkt_len; /**< Total pkt len: sum of all segment data_len. */
+ uint32_t pad3;
+ uint16_t vlan_tci; /**< VLAN Tag Control Identifier (CPU order). */
+ uint32_t pad4;
+} __attribute__ ((__aligned__(RTE_CACHE_LINE_SIZE), __packed__));
+
+
+/**{ AVP device features */
+#define RTE_AVP_FEATURE_VLAN_OFFLOAD (1 << 0) /**< Emulated HW VLAN offload */
+/**@} */
+
+
+/**@{ Offload feature flags */
+#define RTE_AVP_TX_VLAN_PKT 0x0001 /**< TX packet is a 802.1q VLAN packet. */
+#define RTE_AVP_RX_VLAN_PKT 0x0800 /**< RX packet is a 802.1q VLAN packet. */
+/**@} */
+
+
+/**@{ AVP PCI identifiers */
+#define RTE_AVP_PCI_VENDOR_ID 0x1af4
+#define RTE_AVP_PCI_DEVICE_ID 0x1110
+/**@} */
+
+/**@{ AVP PCI subsystem identifiers */
+#define RTE_AVP_PCI_SUB_VENDOR_ID RTE_AVP_PCI_VENDOR_ID
+#define RTE_AVP_PCI_SUB_DEVICE_ID 0x1104
+/**@} */
+
+/**@{ AVP PCI BAR definitions */
+#define RTE_AVP_PCI_MMIO_BAR 0
+#define RTE_AVP_PCI_MSIX_BAR 1
+#define RTE_AVP_PCI_MEMORY_BAR 2
+#define RTE_AVP_PCI_MEMMAP_BAR 4
+#define RTE_AVP_PCI_DEVICE_BAR 5
+#define RTE_AVP_PCI_MAX_BAR 6
+/**@} */
+
+/**@{ AVP PCI BAR name definitions */
+#define RTE_AVP_MMIO_BAR_NAME "avp-mmio"
+#define RTE_AVP_MSIX_BAR_NAME "avp-msix"
+#define RTE_AVP_MEMORY_BAR_NAME "avp-memory"
+#define RTE_AVP_MEMMAP_BAR_NAME "avp-memmap"
+#define RTE_AVP_DEVICE_BAR_NAME "avp-device"
+/**@} */
+
+/**@{ AVP PCI MSI-X vectors */
+#define RTE_AVP_MIGRATION_MSIX_VECTOR 0 /**< Migration interrupts */
+#define RTE_AVP_MAX_MSIX_VECTORS 1
+/**@} */
+
+/**@} AVP Migration status/ack register values */
+#define RTE_AVP_MIGRATION_NONE 0 /**< Migration never executed */
+#define RTE_AVP_MIGRATION_DETACHED 1 /**< Device attached during migration */
+#define RTE_AVP_MIGRATION_ATTACHED 2 /**< Device reattached during migration */
+#define RTE_AVP_MIGRATION_ERROR 3 /**< Device failed to attach/detach */
+/**@} */
+
+/**@} AVP MMIO Register Offsets */
+#define RTE_AVP_REGISTER_BASE 0
+#define RTE_AVP_INTERRUPT_MASK_OFFSET (RTE_AVP_REGISTER_BASE + 0)
+#define RTE_AVP_INTERRUPT_STATUS_OFFSET (RTE_AVP_REGISTER_BASE + 4)
+#define RTE_AVP_MIGRATION_STATUS_OFFSET (RTE_AVP_REGISTER_BASE + 8)
+#define RTE_AVP_MIGRATION_ACK_OFFSET (RTE_AVP_REGISTER_BASE + 12)
+/**@} */
+
+/**@} AVP Interrupt Status Mask */
+#define RTE_AVP_MIGRATION_INTERRUPT_MASK (1 << 1)
+#define RTE_AVP_APP_INTERRUPTS_MASK 0xFFFFFFFF
+#define RTE_AVP_NO_INTERRUPTS_MASK 0
+/**@} */
+
+/*
+ * Maximum number of memory regions to export
+ */
+#define RTE_AVP_MAX_MAPS 2048
+
+/*
+ * Description of a single memory region
+ */
+struct rte_avp_memmap {
+ void *addr;
+ phys_addr_t phys_addr;
+ uint64_t length;
+};
+
+/*
+ * AVP memory mapping validation marker
+ */
+#define RTE_AVP_MEMMAP_MAGIC 0x20131969
+
+/**@{ AVP memory map versions */
+#define RTE_AVP_MEMMAP_VERSION_1 1
+#define RTE_AVP_MEMMAP_VERSION RTE_AVP_MEMMAP_VERSION_1
+/**@} */
+
+/*
+ * Defines a list of memory regions exported from the host to the guest
+ */
+struct rte_avp_memmap_info {
+ uint32_t magic; /**< Memory validation marker */
+ uint32_t version; /**< Data format version */
+ uint32_t nb_maps;
+ struct rte_avp_memmap maps[RTE_AVP_MAX_MAPS];
+};
+
+/*
+ * AVP device memory validation marker
+ */
+#define RTE_AVP_DEVICE_MAGIC 0x20131975
+
+/**@{ AVP device map versions
+ * WARNING: do not change the format or names of these variables. They are
+ * automatically parsed from the build system to generate the SDK package
+ * name.
+ **/
+#define RTE_AVP_RELEASE_VERSION_1 1
+#define RTE_AVP_RELEASE_VERSION RTE_AVP_RELEASE_VERSION_1
+#define RTE_AVP_MAJOR_VERSION_0 0
+#define RTE_AVP_MAJOR_VERSION_1 1
+#define RTE_AVP_MAJOR_VERSION_2 2
+#define RTE_AVP_MAJOR_VERSION RTE_AVP_MAJOR_VERSION_2
+#define RTE_AVP_MINOR_VERSION_0 0
+#define RTE_AVP_MINOR_VERSION_1 1
+#define RTE_AVP_MINOR_VERSION_13 13
+#define RTE_AVP_MINOR_VERSION RTE_AVP_MINOR_VERSION_13
+/**@} */
+
+
+/**
+ * Generates a 32-bit version number from the specified version number
+ * components
+ */
+#define RTE_AVP_MAKE_VERSION(_release, _major, _minor) \
+((((_release) & 0xffff) << 16) | (((_major) & 0xff) << 8) | ((_minor) & 0xff))
+
+
+/**
+ * Represents the current version of the AVP host driver
+ * WARNING: in the current development branch the host and guest driver
+ * version should always be the same. When patching guest features back to
+ * GA releases the host version number should not be updated unless there was
+ * an actual change made to the host driver.
+ */
+#define RTE_AVP_CURRENT_HOST_VERSION \
+RTE_AVP_MAKE_VERSION(RTE_AVP_RELEASE_VERSION_1, \
+ RTE_AVP_MAJOR_VERSION_0, \
+ RTE_AVP_MINOR_VERSION_1)
+
+
+/**
+ * Represents the current version of the AVP guest drivers
+ */
+#define RTE_AVP_CURRENT_GUEST_VERSION \
+RTE_AVP_MAKE_VERSION(RTE_AVP_RELEASE_VERSION_1, \
+ RTE_AVP_MAJOR_VERSION_2, \
+ RTE_AVP_MINOR_VERSION_13)
+
+/**
+ * Access AVP device version values
+ */
+#define RTE_AVP_GET_RELEASE_VERSION(_version) (((_version) >> 16) & 0xffff)
+#define RTE_AVP_GET_MAJOR_VERSION(_version) (((_version) >> 8) & 0xff)
+#define RTE_AVP_GET_MINOR_VERSION(_version) ((_version) & 0xff)
+/**@}*/
+
+
+/**
+ * Remove the minor version number so that only the release and major versions
+ * are used for comparisons.
+ */
+#define RTE_AVP_STRIP_MINOR_VERSION(_version) ((_version) >> 8)
+
+
+/**
+ * Defines the number of mbuf pools supported per device (1 per socket)
+ */
+#define RTE_AVP_MAX_MEMPOOLS 8
+
+/*
+ * Defines address translation parameters for each support mbuf pool
+ */
+struct rte_avp_mempool_info {
+ void *addr;
+ phys_addr_t phys_addr;
+ uint64_t length;
+};
+
+/*
+ * Struct used to create a AVP device. Passed to the kernel in IOCTL call or
+ * via inter-VM shared memory when used in a guest.
+ */
+struct rte_avp_device_info {
+ uint32_t magic; /**< Memory validation marker */
+ uint32_t version; /**< Data format version */
+
+ char ifname[RTE_AVP_NAMESIZE]; /**< Network device name for AVP */
+
+ phys_addr_t tx_phys;
+ phys_addr_t rx_phys;
+ phys_addr_t alloc_phys;
+ phys_addr_t free_phys;
+
+ uint32_t features; /**< Supported feature bitmap */
+ uint8_t min_rx_queues; /**< Minimum supported receive/free queues */
+ uint8_t num_rx_queues; /**< Recommended number of receive/free queues */
+ uint8_t max_rx_queues; /**< Maximum supported receive/free queues */
+ uint8_t min_tx_queues; /**< Minimum supported transmit/alloc queues */
+ uint8_t num_tx_queues;
+ /**< Recommended number of transmit/alloc queues */
+ uint8_t max_tx_queues; /**< Maximum supported transmit/alloc queues */
+
+ uint32_t tx_size; /**< Size of each transmit queue */
+ uint32_t rx_size; /**< Size of each receive queue */
+ uint32_t alloc_size; /**< Size of each alloc queue */
+ uint32_t free_size; /**< Size of each free queue */
+
+ /* Used by Ethtool */
+ phys_addr_t req_phys;
+ phys_addr_t resp_phys;
+ phys_addr_t sync_phys;
+ void *sync_va;
+
+ /* mbuf mempool (used when a single memory area is supported) */
+ void *mbuf_va;
+ phys_addr_t mbuf_phys;
+
+ /* mbuf mempools */
+ struct rte_avp_mempool_info pool[RTE_AVP_MAX_MEMPOOLS];
+
+#ifdef __KERNEL__
+ /* Ethernet info */
+ char ethaddr[ETH_ALEN];
+#else
+ char ethaddr[ETHER_ADDR_LEN];
+#endif
+
+ uint8_t mode; /**< device mode, i.e guest, host, trace */
+
+ /* mbuf size */
+ unsigned int mbuf_size;
+
+ /*
+ * unique id to differentiate between two instantiations of the same
+ * AVP device (i.e., the guest needs to know if the device has been
+ * deleted and recreated).
+ */
+ uint64_t device_id;
+
+ uint32_t max_rx_pkt_len; /**< Maximum receive unit size */
+};
+
+#define RTE_AVP_MAX_QUEUES 8 /**< Maximum number of queues per device */
+
+/** Maximum number of chained mbufs in a packet */
+#define RTE_AVP_MAX_MBUF_SEGMENTS 5
+
+#define RTE_AVP_DEVICE "avp"
+
+#define RTE_AVP_IOCTL_TEST _IOWR(0, 1, int)
+#define RTE_AVP_IOCTL_CREATE _IOWR(0, 2, struct rte_avp_device_info)
+#define RTE_AVP_IOCTL_RELEASE _IOWR(0, 3, struct rte_avp_device_info)
+#define RTE_AVP_IOCTL_QUERY _IOWR(0, 4, struct rte_avp_device_config)
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_AVP_COMMON_H_ */
diff --git a/drivers/net/avp/rte_avp_fifo.h b/drivers/net/avp/rte_avp_fifo.h
new file mode 100644
index 00000000..803eb80a
--- /dev/null
+++ b/drivers/net/avp/rte_avp_fifo.h
@@ -0,0 +1,169 @@
+/*-
+ * This file is provided under a dual BSD/LGPLv2 license. When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GNU LESSER GENERAL PUBLIC LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * Copyright(c) 2014 Wind River Systems, Inc. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2.1 of the GNU Lesser General Public License
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * Contact Information:
+ * Wind River Systems, Inc.
+ *
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2010-2013 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013-2017 Wind River Systems, Inc. All rights reserved.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ */
+
+#ifndef _RTE_AVP_FIFO_H_
+#define _RTE_AVP_FIFO_H_
+
+#include "rte_avp_common.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef __KERNEL__
+/* Write memory barrier for kernel compiles */
+#define AVP_WMB() smp_wmb()
+/* Read memory barrier for kernel compiles */
+#define AVP_RMB() smp_rmb()
+#else
+/* Write memory barrier for userspace compiles */
+#define AVP_WMB() rte_wmb()
+/* Read memory barrier for userspace compiles */
+#define AVP_RMB() rte_rmb()
+#endif
+
+#ifndef __KERNEL__
+#include <rte_debug.h>
+
+/**
+ * Initializes the avp fifo structure
+ */
+static inline void
+avp_fifo_init(struct rte_avp_fifo *fifo, unsigned int size)
+{
+ /* Ensure size is power of 2 */
+ if (size & (size - 1))
+ rte_panic("AVP fifo size must be power of 2\n");
+
+ fifo->write = 0;
+ fifo->read = 0;
+ fifo->len = size;
+ fifo->elem_size = sizeof(void *);
+}
+#endif
+
+/**
+ * Adds num elements into the fifo. Return the number actually written
+ */
+static inline unsigned
+avp_fifo_put(struct rte_avp_fifo *fifo, void **data, unsigned int num)
+{
+ unsigned int i = 0;
+ unsigned int fifo_write = fifo->write;
+ unsigned int fifo_read = fifo->read;
+ unsigned int new_write = fifo_write;
+
+ for (i = 0; i < num; i++) {
+ new_write = (new_write + 1) & (fifo->len - 1);
+
+ if (new_write == fifo_read)
+ break;
+ fifo->buffer[fifo_write] = data[i];
+ fifo_write = new_write;
+ }
+ AVP_WMB();
+ fifo->write = fifo_write;
+ return i;
+}
+
+/**
+ * Get up to num elements from the fifo. Return the number actually read
+ */
+static inline unsigned int
+avp_fifo_get(struct rte_avp_fifo *fifo, void **data, unsigned int num)
+{
+ unsigned int i = 0;
+ unsigned int new_read = fifo->read;
+ unsigned int fifo_write = fifo->write;
+
+ if (new_read == fifo_write)
+ return 0; /* empty */
+
+ for (i = 0; i < num; i++) {
+ if (new_read == fifo_write)
+ break;
+
+ data[i] = fifo->buffer[new_read];
+ new_read = (new_read + 1) & (fifo->len - 1);
+ }
+ AVP_RMB();
+ fifo->read = new_read;
+ return i;
+}
+
+/**
+ * Get the num of elements in the fifo
+ */
+static inline unsigned int
+avp_fifo_count(struct rte_avp_fifo *fifo)
+{
+ return (fifo->len + fifo->write - fifo->read) & (fifo->len - 1);
+}
+
+/**
+ * Get the num of available elements in the fifo
+ */
+static inline unsigned int
+avp_fifo_free_count(struct rte_avp_fifo *fifo)
+{
+ return (fifo->read - fifo->write - 1) & (fifo->len - 1);
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _RTE_AVP_FIFO_H_ */
diff --git a/drivers/net/avp/rte_pmd_avp_version.map b/drivers/net/avp/rte_pmd_avp_version.map
new file mode 100644
index 00000000..af8f3f47
--- /dev/null
+++ b/drivers/net/avp/rte_pmd_avp_version.map
@@ -0,0 +1,4 @@
+DPDK_17.05 {
+
+ local: *;
+};