diff options
Diffstat (limited to 'drivers/net/virtio')
-rw-r--r-- | drivers/net/virtio/Makefile | 63 | ||||
-rw-r--r-- | drivers/net/virtio/rte_pmd_virtio_version.map | 4 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_ethdev.c | 1461 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_ethdev.h | 125 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_logs.h | 70 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_pci.c | 666 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_pci.h | 314 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_ring.h | 163 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_rxtx.c | 940 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_rxtx.h | 41 | ||||
-rw-r--r-- | drivers/net/virtio/virtio_rxtx_simple.c | 418 | ||||
-rw-r--r-- | drivers/net/virtio/virtqueue.c | 72 | ||||
-rw-r--r-- | drivers/net/virtio/virtqueue.h | 344 |
13 files changed, 4681 insertions, 0 deletions
diff --git a/drivers/net/virtio/Makefile b/drivers/net/virtio/Makefile new file mode 100644 index 00000000..ef84f604 --- /dev/null +++ b/drivers/net/virtio/Makefile @@ -0,0 +1,63 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pmd_virtio.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +EXPORT_MAP := rte_pmd_virtio_version.map + +LIBABIVER := 1 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtqueue.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_pci.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx.c +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_ethdev.c + +ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSSE3,$(CFLAGS)),RTE_MACHINE_CPUFLAG_SSSE3) +SRCS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += virtio_rxtx_simple.c +endif + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_eal lib/librte_ether +DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_mempool lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_VIRTIO_PMD) += lib/librte_net + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/net/virtio/rte_pmd_virtio_version.map b/drivers/net/virtio/rte_pmd_virtio_version.map new file mode 100644 index 00000000..ef353984 --- /dev/null +++ b/drivers/net/virtio/rte_pmd_virtio_version.map @@ -0,0 +1,4 @@ +DPDK_2.0 { + + local: *; +}; diff --git a/drivers/net/virtio/virtio_ethdev.c b/drivers/net/virtio/virtio_ethdev.c new file mode 100644 index 00000000..63a368ac --- /dev/null +++ b/drivers/net/virtio/virtio_ethdev.c @@ -0,0 +1,1461 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> + +#include <rte_ethdev.h> +#include <rte_memcpy.h> +#include <rte_string_fns.h> +#include <rte_memzone.h> +#include <rte_malloc.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_pci.h> +#include <rte_ether.h> +#include <rte_common.h> +#include <rte_errno.h> + +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_dev.h> + +#include "virtio_ethdev.h" +#include "virtio_pci.h" +#include "virtio_logs.h" +#include "virtqueue.h" +#include "virtio_rxtx.h" + + +static int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); +static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); +static int virtio_dev_configure(struct rte_eth_dev *dev); +static int virtio_dev_start(struct rte_eth_dev *dev); +static void virtio_dev_stop(struct rte_eth_dev *dev); +static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev); +static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev); +static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev); +static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev); +static void virtio_dev_info_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +static int virtio_dev_link_update(struct rte_eth_dev *dev, + __rte_unused int wait_to_complete); + +static void virtio_set_hwaddr(struct virtio_hw *hw); +static void virtio_get_hwaddr(struct virtio_hw *hw); + +static void virtio_dev_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *stats); +static int virtio_dev_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstats *xstats, unsigned n); +static void virtio_dev_stats_reset(struct rte_eth_dev *dev); +static void virtio_dev_free_mbufs(struct rte_eth_dev *dev); +static int virtio_vlan_filter_set(struct rte_eth_dev *dev, + uint16_t vlan_id, int on); +static void virtio_mac_addr_add(struct rte_eth_dev *dev, + struct ether_addr *mac_addr, + uint32_t index, uint32_t vmdq __rte_unused); +static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index); +static void virtio_mac_addr_set(struct rte_eth_dev *dev, + struct ether_addr *mac_addr); + +static int virtio_dev_queue_stats_mapping_set( + __rte_unused struct rte_eth_dev *eth_dev, + __rte_unused uint16_t queue_id, + __rte_unused uint8_t stat_idx, + __rte_unused uint8_t is_rx); + +/* + * The set of PCI devices this driver supports + */ +static const struct rte_pci_id pci_id_virtio_map[] = { + +#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) {RTE_PCI_DEVICE(vend, dev)}, +#include "rte_pci_dev_ids.h" + +{ .vendor_id = 0, /* sentinel */ }, +}; + +struct rte_virtio_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned offset; +}; + +/* [rt]x_qX_ is prepended to the name string here */ +static const struct rte_virtio_xstats_name_off rte_virtio_q_stat_strings[] = { + {"good_packets", offsetof(struct virtqueue, packets)}, + {"good_bytes", offsetof(struct virtqueue, bytes)}, + {"errors", offsetof(struct virtqueue, errors)}, + {"multicast_packets", offsetof(struct virtqueue, multicast)}, + {"broadcast_packets", offsetof(struct virtqueue, broadcast)}, + {"undersize_packets", offsetof(struct virtqueue, size_bins[0])}, + {"size_64_packets", offsetof(struct virtqueue, size_bins[1])}, + {"size_65_127_packets", offsetof(struct virtqueue, size_bins[2])}, + {"size_128_255_packets", offsetof(struct virtqueue, size_bins[3])}, + {"size_256_511_packets", offsetof(struct virtqueue, size_bins[4])}, + {"size_512_1023_packets", offsetof(struct virtqueue, size_bins[5])}, + {"size_1024_1517_packets", offsetof(struct virtqueue, size_bins[6])}, + {"size_1518_max_packets", offsetof(struct virtqueue, size_bins[7])}, +}; + +#define VIRTIO_NB_Q_XSTATS (sizeof(rte_virtio_q_stat_strings) / \ + sizeof(rte_virtio_q_stat_strings[0])) + +static int +virtio_send_command(struct virtqueue *vq, struct virtio_pmd_ctrl *ctrl, + int *dlen, int pkt_num) +{ + uint32_t head, i; + int k, sum = 0; + virtio_net_ctrl_ack status = ~0; + struct virtio_pmd_ctrl result; + + ctrl->status = status; + + if (!(vq && vq->hw->cvq)) { + PMD_INIT_LOG(ERR, "Control queue is not supported."); + return -1; + } + head = vq->vq_desc_head_idx; + + PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, " + "vq->hw->cvq = %p vq = %p", + vq->vq_desc_head_idx, status, vq->hw->cvq, vq); + + if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1)) + return -1; + + memcpy(vq->virtio_net_hdr_mz->addr, ctrl, + sizeof(struct virtio_pmd_ctrl)); + + /* + * Format is enforced in qemu code: + * One TX packet for header; + * At least one TX packet per argument; + * One RX packet for ACK. + */ + vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; + vq->vq_ring.desc[head].addr = vq->virtio_net_hdr_mz->phys_addr; + vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); + vq->vq_free_cnt--; + i = vq->vq_ring.desc[head].next; + + for (k = 0; k < pkt_num; k++) { + vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + + sizeof(struct virtio_net_ctrl_hdr) + + sizeof(ctrl->status) + sizeof(uint8_t)*sum; + vq->vq_ring.desc[i].len = dlen[k]; + sum += dlen[k]; + vq->vq_free_cnt--; + i = vq->vq_ring.desc[i].next; + } + + vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; + vq->vq_ring.desc[i].addr = vq->virtio_net_hdr_mz->phys_addr + + sizeof(struct virtio_net_ctrl_hdr); + vq->vq_ring.desc[i].len = sizeof(ctrl->status); + vq->vq_free_cnt--; + + vq->vq_desc_head_idx = vq->vq_ring.desc[i].next; + + vq_update_avail_ring(vq, head); + vq_update_avail_idx(vq); + + PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index); + + virtqueue_notify(vq); + + rte_rmb(); + while (vq->vq_used_cons_idx == vq->vq_ring.used->idx) { + rte_rmb(); + usleep(100); + } + + while (vq->vq_used_cons_idx != vq->vq_ring.used->idx) { + uint32_t idx, desc_idx, used_idx; + struct vring_used_elem *uep; + + used_idx = (uint32_t)(vq->vq_used_cons_idx + & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + idx = (uint32_t) uep->id; + desc_idx = idx; + + while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) { + desc_idx = vq->vq_ring.desc[desc_idx].next; + vq->vq_free_cnt++; + } + + vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx; + vq->vq_desc_head_idx = idx; + + vq->vq_used_cons_idx++; + vq->vq_free_cnt++; + } + + PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d", + vq->vq_free_cnt, vq->vq_desc_head_idx); + + memcpy(&result, vq->virtio_net_hdr_mz->addr, + sizeof(struct virtio_pmd_ctrl)); + + return result.status; +} + +static int +virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + ctrl.hdr.class = VIRTIO_NET_CTRL_MQ; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; + memcpy(ctrl.data, &nb_queues, sizeof(uint16_t)); + + dlen[0] = sizeof(uint16_t); + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + if (ret) { + PMD_INIT_LOG(ERR, "Multiqueue configured but send command " + "failed, this is too late now..."); + return -EINVAL; + } + + return 0; +} + +void +virtio_dev_queue_release(struct virtqueue *vq) { + struct virtio_hw *hw; + + if (vq) { + hw = vq->hw; + hw->vtpci_ops->del_queue(hw, vq); + + rte_free(vq->sw_ring); + rte_free(vq); + } +} + +int virtio_dev_queue_setup(struct rte_eth_dev *dev, + int queue_type, + uint16_t queue_idx, + uint16_t vtpci_queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + struct virtqueue **pvq) +{ + char vq_name[VIRTQUEUE_MAX_NAME_SZ]; + const struct rte_memzone *mz; + unsigned int vq_size, size; + struct virtio_hw *hw = dev->data->dev_private; + struct virtqueue *vq = NULL; + + PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx); + + /* + * Read the virtqueue size from the Queue Size field + * Always power of 2 and if 0 virtqueue does not exist + */ + vq_size = hw->vtpci_ops->get_queue_num(hw, vtpci_queue_idx); + PMD_INIT_LOG(DEBUG, "vq_size: %u nb_desc:%u", vq_size, nb_desc); + if (vq_size == 0) { + PMD_INIT_LOG(ERR, "virtqueue does not exist"); + return -EINVAL; + } + + if (!rte_is_power_of_2(vq_size)) { + PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2"); + return -EINVAL; + } + + if (queue_type == VTNET_RQ) { + snprintf(vq_name, sizeof(vq_name), "port%d_rvq%d", + dev->data->port_id, queue_idx); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + + vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); + vq->sw_ring = rte_zmalloc_socket("rxq->sw_ring", + (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * + sizeof(vq->sw_ring[0]), RTE_CACHE_LINE_SIZE, socket_id); + } else if (queue_type == VTNET_TQ) { + snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d", + dev->data->port_id, queue_idx); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + + vq_size * sizeof(struct vq_desc_extra), RTE_CACHE_LINE_SIZE); + } else if (queue_type == VTNET_CQ) { + snprintf(vq_name, sizeof(vq_name), "port%d_cvq", + dev->data->port_id); + vq = rte_zmalloc(vq_name, sizeof(struct virtqueue) + + vq_size * sizeof(struct vq_desc_extra), + RTE_CACHE_LINE_SIZE); + } + if (vq == NULL) { + PMD_INIT_LOG(ERR, "Can not allocate virtqueue"); + return -ENOMEM; + } + if (queue_type == VTNET_RQ && vq->sw_ring == NULL) { + PMD_INIT_LOG(ERR, "Can not allocate RX soft ring"); + rte_free(vq); + return -ENOMEM; + } + + vq->hw = hw; + vq->port_id = dev->data->port_id; + vq->queue_id = queue_idx; + vq->vq_queue_index = vtpci_queue_idx; + vq->vq_nentries = vq_size; + + if (nb_desc == 0 || nb_desc > vq_size) + nb_desc = vq_size; + vq->vq_free_cnt = nb_desc; + + /* + * Reserve a memzone for vring elements + */ + size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN); + vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN); + PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", size, vq->vq_ring_size); + + mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, + socket_id, 0, VIRTIO_PCI_VRING_ALIGN); + if (mz == NULL) { + if (rte_errno == EEXIST) + mz = rte_memzone_lookup(vq_name); + if (mz == NULL) { + rte_free(vq); + return -ENOMEM; + } + } + + /* + * Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, + * and only accepts 32 bit page frame number. + * Check if the allocated physical memory exceeds 16TB. + */ + if ((mz->phys_addr + vq->vq_ring_size - 1) >> (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); + rte_free(vq); + return -ENOMEM; + } + + memset(mz->addr, 0, sizeof(mz->len)); + vq->mz = mz; + vq->vq_ring_mem = mz->phys_addr; + vq->vq_ring_virt_mem = mz->addr; + PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%"PRIx64, (uint64_t)mz->phys_addr); + PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%"PRIx64, (uint64_t)(uintptr_t)mz->addr); + vq->virtio_net_hdr_mz = NULL; + vq->virtio_net_hdr_mem = 0; + + if (queue_type == VTNET_TQ) { + const struct rte_memzone *hdr_mz; + struct virtio_tx_region *txr; + unsigned int i; + + /* + * For each xmit packet, allocate a virtio_net_hdr + * and indirect ring elements + */ + snprintf(vq_name, sizeof(vq_name), "port%d_tvq%d_hdrzone", + dev->data->port_id, queue_idx); + hdr_mz = rte_memzone_reserve_aligned(vq_name, + vq_size * sizeof(*txr), + socket_id, 0, + RTE_CACHE_LINE_SIZE); + if (hdr_mz == NULL) { + if (rte_errno == EEXIST) + hdr_mz = rte_memzone_lookup(vq_name); + if (hdr_mz == NULL) { + rte_free(vq); + return -ENOMEM; + } + } + vq->virtio_net_hdr_mz = hdr_mz; + vq->virtio_net_hdr_mem = hdr_mz->phys_addr; + + txr = hdr_mz->addr; + memset(txr, 0, vq_size * sizeof(*txr)); + for (i = 0; i < vq_size; i++) { + struct vring_desc *start_dp = txr[i].tx_indir; + + vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); + + /* first indirect descriptor is always the tx header */ + start_dp->addr = vq->virtio_net_hdr_mem + + i * sizeof(*txr) + + offsetof(struct virtio_tx_region, tx_hdr); + + start_dp->len = vq->hw->vtnet_hdr_size; + start_dp->flags = VRING_DESC_F_NEXT; + } + + } else if (queue_type == VTNET_CQ) { + /* Allocate a page for control vq command, data and status */ + snprintf(vq_name, sizeof(vq_name), "port%d_cvq_hdrzone", + dev->data->port_id); + vq->virtio_net_hdr_mz = rte_memzone_reserve_aligned(vq_name, + PAGE_SIZE, socket_id, 0, RTE_CACHE_LINE_SIZE); + if (vq->virtio_net_hdr_mz == NULL) { + if (rte_errno == EEXIST) + vq->virtio_net_hdr_mz = + rte_memzone_lookup(vq_name); + if (vq->virtio_net_hdr_mz == NULL) { + rte_free(vq); + return -ENOMEM; + } + } + vq->virtio_net_hdr_mem = + vq->virtio_net_hdr_mz->phys_addr; + memset(vq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); + } + + hw->vtpci_ops->setup_queue(hw, vq); + + *pvq = vq; + return 0; +} + +static int +virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx, + uint32_t socket_id) +{ + struct virtqueue *vq; + int ret; + struct virtio_hw *hw = dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX, + vtpci_queue_idx, 0, socket_id, &vq); + if (ret < 0) { + PMD_INIT_LOG(ERR, "control vq initialization failed"); + return ret; + } + + hw->cvq = vq; + return 0; +} + +static void +virtio_free_queues(struct rte_eth_dev *dev) +{ + unsigned int i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) + virtio_dev_rx_queue_release(dev->data->rx_queues[i]); + + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) + virtio_dev_tx_queue_release(dev->data->tx_queues[i]); + + dev->data->nb_tx_queues = 0; +} + +static void +virtio_dev_close(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct rte_pci_device *pci_dev = dev->pci_dev; + + PMD_INIT_LOG(DEBUG, "virtio_dev_close"); + + if (hw->started == 1) + virtio_dev_stop(dev); + + /* reset the NIC */ + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR); + vtpci_reset(hw); + virtio_dev_free_mbufs(dev); + virtio_free_queues(dev); +} + +static void +virtio_dev_promiscuous_enable(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { + PMD_INIT_LOG(INFO, "host does not support rx control\n"); + return; + } + + ctrl.hdr.class = VIRTIO_NET_CTRL_RX; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC; + ctrl.data[0] = 1; + dlen[0] = 1; + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + if (ret) + PMD_INIT_LOG(ERR, "Failed to enable promisc"); +} + +static void +virtio_dev_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { + PMD_INIT_LOG(INFO, "host does not support rx control\n"); + return; + } + + ctrl.hdr.class = VIRTIO_NET_CTRL_RX; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC; + ctrl.data[0] = 0; + dlen[0] = 1; + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + if (ret) + PMD_INIT_LOG(ERR, "Failed to disable promisc"); +} + +static void +virtio_dev_allmulticast_enable(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { + PMD_INIT_LOG(INFO, "host does not support rx control\n"); + return; + } + + ctrl.hdr.class = VIRTIO_NET_CTRL_RX; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI; + ctrl.data[0] = 1; + dlen[0] = 1; + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + if (ret) + PMD_INIT_LOG(ERR, "Failed to enable allmulticast"); +} + +static void +virtio_dev_allmulticast_disable(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { + PMD_INIT_LOG(INFO, "host does not support rx control\n"); + return; + } + + ctrl.hdr.class = VIRTIO_NET_CTRL_RX; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI; + ctrl.data[0] = 0; + dlen[0] = 1; + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + if (ret) + PMD_INIT_LOG(ERR, "Failed to disable allmulticast"); +} + +/* + * dev_ops for virtio, bare necessities for basic operation + */ +static const struct eth_dev_ops virtio_eth_dev_ops = { + .dev_configure = virtio_dev_configure, + .dev_start = virtio_dev_start, + .dev_stop = virtio_dev_stop, + .dev_close = virtio_dev_close, + .promiscuous_enable = virtio_dev_promiscuous_enable, + .promiscuous_disable = virtio_dev_promiscuous_disable, + .allmulticast_enable = virtio_dev_allmulticast_enable, + .allmulticast_disable = virtio_dev_allmulticast_disable, + + .dev_infos_get = virtio_dev_info_get, + .stats_get = virtio_dev_stats_get, + .xstats_get = virtio_dev_xstats_get, + .stats_reset = virtio_dev_stats_reset, + .xstats_reset = virtio_dev_stats_reset, + .link_update = virtio_dev_link_update, + .rx_queue_setup = virtio_dev_rx_queue_setup, + .rx_queue_release = virtio_dev_rx_queue_release, + .tx_queue_setup = virtio_dev_tx_queue_setup, + .tx_queue_release = virtio_dev_tx_queue_release, + /* collect stats per queue */ + .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set, + .vlan_filter_set = virtio_vlan_filter_set, + .mac_addr_add = virtio_mac_addr_add, + .mac_addr_remove = virtio_mac_addr_remove, + .mac_addr_set = virtio_mac_addr_set, +}; + +static inline int +virtio_dev_atomic_read_link_status(struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + struct rte_eth_link *dst = link; + struct rte_eth_link *src = &(dev->data->dev_link); + + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, + *(uint64_t *)src) == 0) + return -1; + + return 0; +} + +/** + * Atomically writes the link status information into global + * structure rte_eth_dev. + * + * @param dev + * - Pointer to the structure rte_eth_dev to read from. + * - Pointer to the buffer to be saved with the link status. + * + * @return + * - On success, zero. + * - On failure, negative value. + */ +static inline int +virtio_dev_atomic_write_link_status(struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + struct rte_eth_link *dst = &(dev->data->dev_link); + struct rte_eth_link *src = link; + + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, + *(uint64_t *)src) == 0) + return -1; + + return 0; +} + +static void +virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + unsigned i; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + const struct virtqueue *txvq = dev->data->tx_queues[i]; + if (txvq == NULL) + continue; + + stats->opackets += txvq->packets; + stats->obytes += txvq->bytes; + stats->oerrors += txvq->errors; + + if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { + stats->q_opackets[i] = txvq->packets; + stats->q_obytes[i] = txvq->bytes; + } + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + const struct virtqueue *rxvq = dev->data->rx_queues[i]; + if (rxvq == NULL) + continue; + + stats->ipackets += rxvq->packets; + stats->ibytes += rxvq->bytes; + stats->ierrors += rxvq->errors; + + if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { + stats->q_ipackets[i] = rxvq->packets; + stats->q_ibytes[i] = rxvq->bytes; + } + } + + stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; +} + +static int +virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstats *xstats, + unsigned n) +{ + unsigned i; + unsigned count = 0; + + unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_Q_XSTATS + + dev->data->nb_rx_queues * VIRTIO_NB_Q_XSTATS; + + if (n < nstats) + return nstats; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct virtqueue *rxvq = dev->data->rx_queues[i]; + + if (rxvq == NULL) + continue; + + unsigned t; + + for (t = 0; t < VIRTIO_NB_Q_XSTATS; t++) { + snprintf(xstats[count].name, sizeof(xstats[count].name), + "rx_q%u_%s", i, + rte_virtio_q_stat_strings[t].name); + xstats[count].value = *(uint64_t *)(((char *)rxvq) + + rte_virtio_q_stat_strings[t].offset); + count++; + } + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct virtqueue *txvq = dev->data->tx_queues[i]; + + if (txvq == NULL) + continue; + + unsigned t; + + for (t = 0; t < VIRTIO_NB_Q_XSTATS; t++) { + snprintf(xstats[count].name, sizeof(xstats[count].name), + "tx_q%u_%s", i, + rte_virtio_q_stat_strings[t].name); + xstats[count].value = *(uint64_t *)(((char *)txvq) + + rte_virtio_q_stat_strings[t].offset); + count++; + } + } + + return count; +} + +static void +virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + virtio_update_stats(dev, stats); +} + +static void +virtio_dev_stats_reset(struct rte_eth_dev *dev) +{ + unsigned int i; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct virtqueue *txvq = dev->data->tx_queues[i]; + if (txvq == NULL) + continue; + + txvq->packets = 0; + txvq->bytes = 0; + txvq->errors = 0; + txvq->multicast = 0; + txvq->broadcast = 0; + memset(txvq->size_bins, 0, sizeof(txvq->size_bins[0]) * 8); + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct virtqueue *rxvq = dev->data->rx_queues[i]; + if (rxvq == NULL) + continue; + + rxvq->packets = 0; + rxvq->bytes = 0; + rxvq->errors = 0; + rxvq->multicast = 0; + rxvq->broadcast = 0; + memset(rxvq->size_bins, 0, sizeof(rxvq->size_bins[0]) * 8); + } +} + +static void +virtio_set_hwaddr(struct virtio_hw *hw) +{ + vtpci_write_dev_config(hw, + offsetof(struct virtio_net_config, mac), + &hw->mac_addr, ETHER_ADDR_LEN); +} + +static void +virtio_get_hwaddr(struct virtio_hw *hw) +{ + if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) { + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, mac), + &hw->mac_addr, ETHER_ADDR_LEN); + } else { + eth_random_addr(&hw->mac_addr[0]); + virtio_set_hwaddr(hw); + } +} + +static void +virtio_mac_table_set(struct virtio_hw *hw, + const struct virtio_net_ctrl_mac *uc, + const struct virtio_net_ctrl_mac *mc) +{ + struct virtio_pmd_ctrl ctrl; + int err, len[2]; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) { + PMD_DRV_LOG(INFO, "host does not support mac table\n"); + return; + } + + ctrl.hdr.class = VIRTIO_NET_CTRL_MAC; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; + + len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries); + memcpy(ctrl.data, uc, len[0]); + + len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries); + memcpy(ctrl.data + len[0], mc, len[1]); + + err = virtio_send_command(hw->cvq, &ctrl, len, 2); + if (err != 0) + PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err); +} + +static void +virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr, + uint32_t index, uint32_t vmdq __rte_unused) +{ + struct virtio_hw *hw = dev->data->dev_private; + const struct ether_addr *addrs = dev->data->mac_addrs; + unsigned int i; + struct virtio_net_ctrl_mac *uc, *mc; + + if (index >= VIRTIO_MAX_MAC_ADDRS) { + PMD_DRV_LOG(ERR, "mac address index %u out of range", index); + return; + } + + uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries)); + uc->entries = 0; + mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries)); + mc->entries = 0; + + for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) { + const struct ether_addr *addr + = (i == index) ? mac_addr : addrs + i; + struct virtio_net_ctrl_mac *tbl + = is_multicast_ether_addr(addr) ? mc : uc; + + memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN); + } + + virtio_mac_table_set(hw, uc, mc); +} + +static void +virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct ether_addr *addrs = dev->data->mac_addrs; + struct virtio_net_ctrl_mac *uc, *mc; + unsigned int i; + + if (index >= VIRTIO_MAX_MAC_ADDRS) { + PMD_DRV_LOG(ERR, "mac address index %u out of range", index); + return; + } + + uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries)); + uc->entries = 0; + mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries)); + mc->entries = 0; + + for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) { + struct virtio_net_ctrl_mac *tbl; + + if (i == index || is_zero_ether_addr(addrs + i)) + continue; + + tbl = is_multicast_ether_addr(addrs + i) ? mc : uc; + memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN); + } + + virtio_mac_table_set(hw, uc, mc); +} + +static void +virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) +{ + struct virtio_hw *hw = dev->data->dev_private; + + memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN); + + /* Use atomic update if available */ + if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) { + struct virtio_pmd_ctrl ctrl; + int len = ETHER_ADDR_LEN; + + ctrl.hdr.class = VIRTIO_NET_CTRL_MAC; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; + + memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN); + virtio_send_command(hw->cvq, &ctrl, &len, 1); + } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) + virtio_set_hwaddr(hw); +} + +static int +virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int len; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) + return -ENOTSUP; + + ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN; + ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; + memcpy(ctrl.data, &vlan_id, sizeof(vlan_id)); + len = sizeof(vlan_id); + + return virtio_send_command(hw->cvq, &ctrl, &len, 1); +} + +static int +virtio_negotiate_features(struct virtio_hw *hw) +{ + uint64_t host_features; + + /* Prepare guest_features: feature that driver wants to support */ + hw->guest_features = VIRTIO_PMD_GUEST_FEATURES; + PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64, + hw->guest_features); + + /* Read device(host) feature bits */ + host_features = hw->vtpci_ops->get_features(hw); + PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64, + host_features); + + /* + * Negotiate features: Subset of device feature bits are written back + * guest feature bits. + */ + hw->guest_features = vtpci_negotiate_features(hw, host_features); + PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64, + hw->guest_features); + + if (hw->modern) { + if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) { + PMD_INIT_LOG(ERR, + "VIRTIO_F_VERSION_1 features is not enabled."); + return -1; + } + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK); + if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) { + PMD_INIT_LOG(ERR, + "failed to set FEATURES_OK status!"); + return -1; + } + } + + return 0; +} + +/* + * Process Virtio Config changed interrupt and call the callback + * if link state changed. + */ +static void +virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle, + void *param) +{ + struct rte_eth_dev *dev = param; + struct virtio_hw *hw = dev->data->dev_private; + uint8_t isr; + + /* Read interrupt status which clears interrupt */ + isr = vtpci_isr(hw); + PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); + + if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) + PMD_DRV_LOG(ERR, "interrupt enable failed"); + + if (isr & VIRTIO_PCI_ISR_CONFIG) { + if (virtio_dev_link_update(dev, 0) == 0) + _rte_eth_dev_callback_process(dev, + RTE_ETH_EVENT_INTR_LSC); + } + +} + +static void +rx_func_get(struct rte_eth_dev *eth_dev) +{ + struct virtio_hw *hw = eth_dev->data->dev_private; + if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) + eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts; + else + eth_dev->rx_pkt_burst = &virtio_recv_pkts; +} + +/* + * This function is based on probe() function in virtio_pci.c + * It returns 0 on success. + */ +static int +eth_virtio_dev_init(struct rte_eth_dev *eth_dev) +{ + struct virtio_hw *hw = eth_dev->data->dev_private; + struct virtio_net_config *config; + struct virtio_net_config local_config; + struct rte_pci_device *pci_dev; + int ret; + + RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr)); + + eth_dev->dev_ops = &virtio_eth_dev_ops; + eth_dev->tx_pkt_burst = &virtio_xmit_pkts; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + rx_func_get(eth_dev); + return 0; + } + + /* Allocate memory for storing MAC addresses */ + eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_INIT_LOG(ERR, + "Failed to allocate %d bytes needed to store MAC addresses", + VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN); + return -ENOMEM; + } + + pci_dev = eth_dev->pci_dev; + + ret = vtpci_init(pci_dev, hw); + if (ret) + return ret; + + /* Reset the device although not necessary at startup */ + vtpci_reset(hw); + + /* Tell the host we've noticed this device. */ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK); + + /* Tell the host we've known how to drive the device. */ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER); + if (virtio_negotiate_features(hw) < 0) + return -1; + + /* If host does not support status then disable LSC */ + if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) + pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC; + + rte_eth_copy_pci_info(eth_dev, pci_dev); + + rx_func_get(eth_dev); + + /* Setting up rx_header size for the device */ + if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) || + vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) + hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); + else + hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr); + + /* Copy the permanent MAC address to: virtio_hw */ + virtio_get_hwaddr(hw); + ether_addr_copy((struct ether_addr *) hw->mac_addr, + ð_dev->data->mac_addrs[0]); + PMD_INIT_LOG(DEBUG, + "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X", + hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2], + hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); + + if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) { + config = &local_config; + + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, mac), + &config->mac, sizeof(config->mac)); + + if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, status), + &config->status, sizeof(config->status)); + } else { + PMD_INIT_LOG(DEBUG, + "VIRTIO_NET_F_STATUS is not supported"); + config->status = 0; + } + + if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) { + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, max_virtqueue_pairs), + &config->max_virtqueue_pairs, + sizeof(config->max_virtqueue_pairs)); + } else { + PMD_INIT_LOG(DEBUG, + "VIRTIO_NET_F_MQ is not supported"); + config->max_virtqueue_pairs = 1; + } + + hw->max_rx_queues = + (VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ? + VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs; + hw->max_tx_queues = + (VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ? + VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs; + + virtio_dev_cq_queue_setup(eth_dev, + config->max_virtqueue_pairs * 2, + SOCKET_ID_ANY); + + PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d", + config->max_virtqueue_pairs); + PMD_INIT_LOG(DEBUG, "config->status=%d", config->status); + PMD_INIT_LOG(DEBUG, + "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X", + config->mac[0], config->mac[1], + config->mac[2], config->mac[3], + config->mac[4], config->mac[5]); + } else { + hw->max_rx_queues = 1; + hw->max_tx_queues = 1; + } + + PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d hw->max_tx_queues=%d", + hw->max_rx_queues, hw->max_tx_queues); + PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", + eth_dev->data->port_id, pci_dev->id.vendor_id, + pci_dev->id.device_id); + + /* Setup interrupt callback */ + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + rte_intr_callback_register(&pci_dev->intr_handle, + virtio_interrupt_handler, eth_dev); + + virtio_dev_cq_start(eth_dev); + + return 0; +} + +static int +eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev; + struct virtio_hw *hw = eth_dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return -EPERM; + + /* Close it anyway since there's no way to know if closed */ + virtio_dev_close(eth_dev); + + pci_dev = eth_dev->pci_dev; + + eth_dev->dev_ops = NULL; + eth_dev->tx_pkt_burst = NULL; + eth_dev->rx_pkt_burst = NULL; + + virtio_dev_queue_release(hw->cvq); + + rte_free(eth_dev->data->mac_addrs); + eth_dev->data->mac_addrs = NULL; + + /* reset interrupt callback */ + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + rte_intr_callback_unregister(&pci_dev->intr_handle, + virtio_interrupt_handler, + eth_dev); + rte_eal_pci_unmap_device(pci_dev); + + PMD_INIT_LOG(DEBUG, "dev_uninit completed"); + + return 0; +} + +static struct eth_driver rte_virtio_pmd = { + .pci_drv = { + .name = "rte_virtio_pmd", + .id_table = pci_id_virtio_map, + .drv_flags = RTE_PCI_DRV_DETACHABLE, + }, + .eth_dev_init = eth_virtio_dev_init, + .eth_dev_uninit = eth_virtio_dev_uninit, + .dev_private_size = sizeof(struct virtio_hw), +}; + +/* + * Driver initialization routine. + * Invoked once at EAL init time. + * Register itself as the [Poll Mode] Driver of PCI virtio devices. + * Returns 0 on success. + */ +static int +rte_virtio_pmd_init(const char *name __rte_unused, + const char *param __rte_unused) +{ + if (rte_eal_iopl_init() != 0) { + PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD"); + return -1; + } + + rte_eth_driver_register(&rte_virtio_pmd); + return 0; +} + +/* + * Configure virtio device + * It returns 0 on success. + */ +static int +virtio_dev_configure(struct rte_eth_dev *dev) +{ + const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; + struct virtio_hw *hw = dev->data->dev_private; + struct rte_pci_device *pci_dev = dev->pci_dev; + + PMD_INIT_LOG(DEBUG, "configure"); + + if (rxmode->hw_ip_checksum) { + PMD_DRV_LOG(ERR, "HW IP checksum not supported"); + return -EINVAL; + } + + hw->vlan_strip = rxmode->hw_vlan_strip; + + if (rxmode->hw_vlan_filter + && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) { + PMD_DRV_LOG(NOTICE, + "vlan filtering not available on this host"); + return -ENOTSUP; + } + + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) { + PMD_DRV_LOG(ERR, "failed to set config vector"); + return -EBUSY; + } + + return 0; +} + + +static int +virtio_dev_start(struct rte_eth_dev *dev) +{ + uint16_t nb_queues, i; + struct virtio_hw *hw = dev->data->dev_private; + struct rte_pci_device *pci_dev = dev->pci_dev; + + /* check if lsc interrupt feature is enabled */ + if (dev->data->dev_conf.intr_conf.lsc) { + if (!(pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC)) { + PMD_DRV_LOG(ERR, "link status not supported by host"); + return -ENOTSUP; + } + + if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) { + PMD_DRV_LOG(ERR, "interrupt enable failed"); + return -EIO; + } + } + + /* Initialize Link state */ + virtio_dev_link_update(dev, 0); + + /* On restart after stop do not touch queues */ + if (hw->started) + return 0; + + /* Do final configuration before rx/tx engine starts */ + virtio_dev_rxtx_start(dev); + vtpci_reinit_complete(hw); + + hw->started = 1; + + /*Notify the backend + *Otherwise the tap backend might already stop its queue due to fullness. + *vhost backend will have no chance to be waked up + */ + nb_queues = dev->data->nb_rx_queues; + if (nb_queues > 1) { + if (virtio_set_multiple_queues(dev, nb_queues) != 0) + return -EINVAL; + } + + PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues); + + for (i = 0; i < nb_queues; i++) + virtqueue_notify(dev->data->rx_queues[i]); + + PMD_INIT_LOG(DEBUG, "Notified backend at initialization"); + + for (i = 0; i < dev->data->nb_rx_queues; i++) + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + + for (i = 0; i < dev->data->nb_tx_queues; i++) + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + + return 0; +} + +static void virtio_dev_free_mbufs(struct rte_eth_dev *dev) +{ + struct rte_mbuf *buf; + int i, mbuf_num = 0; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + PMD_INIT_LOG(DEBUG, + "Before freeing rxq[%d] used and unused buf", i); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + + PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p", + i, dev->data->rx_queues[i]); + while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused( + dev->data->rx_queues[i])) != NULL) { + rte_pktmbuf_free(buf); + mbuf_num++; + } + + PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num); + PMD_INIT_LOG(DEBUG, + "After freeing rxq[%d] used and unused buf", i); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + PMD_INIT_LOG(DEBUG, + "Before freeing txq[%d] used and unused bufs", + i); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + + mbuf_num = 0; + while ((buf = (struct rte_mbuf *)virtqueue_detatch_unused( + dev->data->tx_queues[i])) != NULL) { + rte_pktmbuf_free(buf); + + mbuf_num++; + } + + PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num); + PMD_INIT_LOG(DEBUG, + "After freeing txq[%d] used and unused buf", i); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + } +} + +/* + * Stop device: disable interrupt and mark link down + */ +static void +virtio_dev_stop(struct rte_eth_dev *dev) +{ + struct rte_eth_link link; + struct virtio_hw *hw = dev->data->dev_private; + + PMD_INIT_LOG(DEBUG, "stop"); + + hw->started = 0; + + if (dev->data->dev_conf.intr_conf.lsc) + rte_intr_disable(&dev->pci_dev->intr_handle); + + memset(&link, 0, sizeof(link)); + virtio_dev_atomic_write_link_status(dev, &link); +} + +static int +virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) +{ + struct rte_eth_link link, old; + uint16_t status; + struct virtio_hw *hw = dev->data->dev_private; + memset(&link, 0, sizeof(link)); + virtio_dev_atomic_read_link_status(dev, &link); + old = link; + link.link_duplex = ETH_LINK_FULL_DUPLEX; + link.link_speed = SPEED_10G; + + if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { + PMD_INIT_LOG(DEBUG, "Get link status from hw"); + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, status), + &status, sizeof(status)); + if ((status & VIRTIO_NET_S_LINK_UP) == 0) { + link.link_status = ETH_LINK_DOWN; + PMD_INIT_LOG(DEBUG, "Port %d is down", + dev->data->port_id); + } else { + link.link_status = ETH_LINK_UP; + PMD_INIT_LOG(DEBUG, "Port %d is up", + dev->data->port_id); + } + } else { + link.link_status = ETH_LINK_UP; + } + virtio_dev_atomic_write_link_status(dev, &link); + + return (old.link_status == link.link_status) ? -1 : 0; +} + +static void +virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) +{ + struct virtio_hw *hw = dev->data->dev_private; + + dev_info->driver_name = dev->driver->pci_drv.name; + dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; + dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; + dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; + dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN; + dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS; + dev_info->default_txconf = (struct rte_eth_txconf) { + .txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS + }; +} + +/* + * It enables testpmd to collect per queue stats. + */ +static int +virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev, +__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx, +__rte_unused uint8_t is_rx) +{ + return 0; +} + +static struct rte_driver rte_virtio_driver = { + .type = PMD_PDEV, + .init = rte_virtio_pmd_init, +}; + +PMD_REGISTER_DRIVER(rte_virtio_driver); diff --git a/drivers/net/virtio/virtio_ethdev.h b/drivers/net/virtio/virtio_ethdev.h new file mode 100644 index 00000000..66423a07 --- /dev/null +++ b/drivers/net/virtio/virtio_ethdev.h @@ -0,0 +1,125 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_ETHDEV_H_ +#define _VIRTIO_ETHDEV_H_ + +#include <stdint.h> + +#include "virtio_pci.h" + +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 +#define SPEED_10G 10000 + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#define VIRTIO_MAX_RX_QUEUES 128 +#define VIRTIO_MAX_TX_QUEUES 128 +#define VIRTIO_MAX_MAC_ADDRS 64 +#define VIRTIO_MIN_RX_BUFSIZE 64 +#define VIRTIO_MAX_RX_PKTLEN 9728 + +/* Features desired/implemented by this driver. */ +#define VIRTIO_PMD_GUEST_FEATURES \ + (1u << VIRTIO_NET_F_MAC | \ + 1u << VIRTIO_NET_F_STATUS | \ + 1u << VIRTIO_NET_F_MQ | \ + 1u << VIRTIO_NET_F_CTRL_MAC_ADDR | \ + 1u << VIRTIO_NET_F_CTRL_VQ | \ + 1u << VIRTIO_NET_F_CTRL_RX | \ + 1u << VIRTIO_NET_F_CTRL_VLAN | \ + 1u << VIRTIO_NET_F_MRG_RXBUF | \ + 1ULL << VIRTIO_F_VERSION_1) + +/* + * CQ function prototype + */ +void virtio_dev_cq_start(struct rte_eth_dev *dev); + +/* + * RX/TX function prototypes + */ +void virtio_dev_rxtx_start(struct rte_eth_dev *dev); + +int virtio_dev_queue_setup(struct rte_eth_dev *dev, + int queue_type, + uint16_t queue_idx, + uint16_t vtpci_queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + struct virtqueue **pvq); + +void virtio_dev_queue_release(struct virtqueue *vq); + +int virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); + +void virtio_dev_rx_queue_release(void *rxq); + +int virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); + +void virtio_dev_tx_queue_release(void *txq); + +uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + +uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + +/* + * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us + * frames larger than 1514 bytes. We do not yet support software LRO + * via tcp_lro_rx(). + */ +#define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \ + VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN) + + +#endif /* _VIRTIO_ETHDEV_H_ */ diff --git a/drivers/net/virtio/virtio_logs.h b/drivers/net/virtio/virtio_logs.h new file mode 100644 index 00000000..d6c33f7b --- /dev/null +++ b/drivers/net/virtio/virtio_logs.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_LOGS_H_ +#define _VIRTIO_LOGS_H_ + +#include <rte_log.h> + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT +#define PMD_INIT_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") +#else +#define PMD_INIT_LOG(level, fmt, args...) do { } while(0) +#define PMD_INIT_FUNC_TRACE() do { } while(0) +#endif + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s() rx: " fmt , __func__, ## args) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while(0) +#endif + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s() tx: " fmt , __func__, ## args) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while(0) +#endif + + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER +#define PMD_DRV_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt , __func__, ## args) +#else +#define PMD_DRV_LOG(level, fmt, args...) do { } while(0) +#endif + +#endif /* _VIRTIO_LOGS_H_ */ diff --git a/drivers/net/virtio/virtio_pci.c b/drivers/net/virtio/virtio_pci.c new file mode 100644 index 00000000..c007959f --- /dev/null +++ b/drivers/net/virtio/virtio_pci.c @@ -0,0 +1,666 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdint.h> + +#ifdef RTE_EXEC_ENV_LINUXAPP + #include <dirent.h> + #include <fcntl.h> +#endif + +#include "virtio_pci.h" +#include "virtio_logs.h" +#include "virtqueue.h" + +/* + * Following macros are derived from linux/pci_regs.h, however, + * we can't simply include that header here, as there is no such + * file for non-Linux platform. + */ +#define PCI_CAPABILITY_LIST 0x34 +#define PCI_CAP_ID_VNDR 0x09 + +/* + * The remaining space is defined by each driver as the per-driver + * configuration space. + */ +#define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20) + +static void +legacy_read_dev_config(struct virtio_hw *hw, size_t offset, + void *dst, int length) +{ + rte_eal_pci_ioport_read(&hw->io, dst, length, + VIRTIO_PCI_CONFIG(hw) + offset); +} + +static void +legacy_write_dev_config(struct virtio_hw *hw, size_t offset, + const void *src, int length) +{ + rte_eal_pci_ioport_write(&hw->io, src, length, + VIRTIO_PCI_CONFIG(hw) + offset); +} + +static uint64_t +legacy_get_features(struct virtio_hw *hw) +{ + uint32_t dst; + + rte_eal_pci_ioport_read(&hw->io, &dst, 4, VIRTIO_PCI_HOST_FEATURES); + return dst; +} + +static void +legacy_set_features(struct virtio_hw *hw, uint64_t features) +{ + if ((features >> 32) != 0) { + PMD_DRV_LOG(ERR, + "only 32 bit features are allowed for legacy virtio!"); + return; + } + rte_eal_pci_ioport_write(&hw->io, &features, 4, + VIRTIO_PCI_GUEST_FEATURES); +} + +static uint8_t +legacy_get_status(struct virtio_hw *hw) +{ + uint8_t dst; + + rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_STATUS); + return dst; +} + +static void +legacy_set_status(struct virtio_hw *hw, uint8_t status) +{ + rte_eal_pci_ioport_write(&hw->io, &status, 1, VIRTIO_PCI_STATUS); +} + +static void +legacy_reset(struct virtio_hw *hw) +{ + legacy_set_status(hw, VIRTIO_CONFIG_STATUS_RESET); +} + +static uint8_t +legacy_get_isr(struct virtio_hw *hw) +{ + uint8_t dst; + + rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_ISR); + return dst; +} + +/* Enable one vector (0) for Link State Intrerrupt */ +static uint16_t +legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec) +{ + uint16_t dst; + + rte_eal_pci_ioport_write(&hw->io, &vec, 2, VIRTIO_MSI_CONFIG_VECTOR); + rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_MSI_CONFIG_VECTOR); + return dst; +} + +static uint16_t +legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) +{ + uint16_t dst; + + rte_eal_pci_ioport_write(&hw->io, &queue_id, 2, VIRTIO_PCI_QUEUE_SEL); + rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_PCI_QUEUE_NUM); + return dst; +} + +static void +legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + uint32_t src; + + rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_SEL); + src = vq->mz->phys_addr >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN); +} + +static void +legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + uint32_t src = 0; + + rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_SEL); + rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN); +} + +static void +legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_NOTIFY); +} + +#ifdef RTE_EXEC_ENV_LINUXAPP +static int +legacy_virtio_has_msix(const struct rte_pci_addr *loc) +{ + DIR *d; + char dirname[PATH_MAX]; + + snprintf(dirname, sizeof(dirname), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/msi_irqs", + loc->domain, loc->bus, loc->devid, loc->function); + + d = opendir(dirname); + if (d) + closedir(d); + + return d != NULL; +} +#else +static int +legacy_virtio_has_msix(const struct rte_pci_addr *loc __rte_unused) +{ + /* nic_uio does not enable interrupts, return 0 (false). */ + return 0; +} +#endif + +static int +legacy_virtio_resource_init(struct rte_pci_device *pci_dev, + struct virtio_hw *hw) +{ + if (rte_eal_pci_ioport_map(pci_dev, 0, &hw->io) < 0) + return -1; + + if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN) + pci_dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC; + else + pci_dev->driver->drv_flags &= ~RTE_PCI_DRV_INTR_LSC; + + return 0; +} + +static const struct virtio_pci_ops legacy_ops = { + .read_dev_cfg = legacy_read_dev_config, + .write_dev_cfg = legacy_write_dev_config, + .reset = legacy_reset, + .get_status = legacy_get_status, + .set_status = legacy_set_status, + .get_features = legacy_get_features, + .set_features = legacy_set_features, + .get_isr = legacy_get_isr, + .set_config_irq = legacy_set_config_irq, + .get_queue_num = legacy_get_queue_num, + .setup_queue = legacy_setup_queue, + .del_queue = legacy_del_queue, + .notify_queue = legacy_notify_queue, +}; + + +static inline uint8_t +io_read8(uint8_t *addr) +{ + return *(volatile uint8_t *)addr; +} + +static inline void +io_write8(uint8_t val, uint8_t *addr) +{ + *(volatile uint8_t *)addr = val; +} + +static inline uint16_t +io_read16(uint16_t *addr) +{ + return *(volatile uint16_t *)addr; +} + +static inline void +io_write16(uint16_t val, uint16_t *addr) +{ + *(volatile uint16_t *)addr = val; +} + +static inline uint32_t +io_read32(uint32_t *addr) +{ + return *(volatile uint32_t *)addr; +} + +static inline void +io_write32(uint32_t val, uint32_t *addr) +{ + *(volatile uint32_t *)addr = val; +} + +static inline void +io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi) +{ + io_write32(val & ((1ULL << 32) - 1), lo); + io_write32(val >> 32, hi); +} + +static void +modern_read_dev_config(struct virtio_hw *hw, size_t offset, + void *dst, int length) +{ + int i; + uint8_t *p; + uint8_t old_gen, new_gen; + + do { + old_gen = io_read8(&hw->common_cfg->config_generation); + + p = dst; + for (i = 0; i < length; i++) + *p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i); + + new_gen = io_read8(&hw->common_cfg->config_generation); + } while (old_gen != new_gen); +} + +static void +modern_write_dev_config(struct virtio_hw *hw, size_t offset, + const void *src, int length) +{ + int i; + const uint8_t *p = src; + + for (i = 0; i < length; i++) + io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i); +} + +static uint64_t +modern_get_features(struct virtio_hw *hw) +{ + uint32_t features_lo, features_hi; + + io_write32(0, &hw->common_cfg->device_feature_select); + features_lo = io_read32(&hw->common_cfg->device_feature); + + io_write32(1, &hw->common_cfg->device_feature_select); + features_hi = io_read32(&hw->common_cfg->device_feature); + + return ((uint64_t)features_hi << 32) | features_lo; +} + +static void +modern_set_features(struct virtio_hw *hw, uint64_t features) +{ + io_write32(0, &hw->common_cfg->guest_feature_select); + io_write32(features & ((1ULL << 32) - 1), + &hw->common_cfg->guest_feature); + + io_write32(1, &hw->common_cfg->guest_feature_select); + io_write32(features >> 32, + &hw->common_cfg->guest_feature); +} + +static uint8_t +modern_get_status(struct virtio_hw *hw) +{ + return io_read8(&hw->common_cfg->device_status); +} + +static void +modern_set_status(struct virtio_hw *hw, uint8_t status) +{ + io_write8(status, &hw->common_cfg->device_status); +} + +static void +modern_reset(struct virtio_hw *hw) +{ + modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET); + modern_get_status(hw); +} + +static uint8_t +modern_get_isr(struct virtio_hw *hw) +{ + return io_read8(hw->isr); +} + +static uint16_t +modern_set_config_irq(struct virtio_hw *hw, uint16_t vec) +{ + io_write16(vec, &hw->common_cfg->msix_config); + return io_read16(&hw->common_cfg->msix_config); +} + +static uint16_t +modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) +{ + io_write16(queue_id, &hw->common_cfg->queue_select); + return io_read16(&hw->common_cfg->queue_size); +} + +static void +modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + uint64_t desc_addr, avail_addr, used_addr; + uint16_t notify_off; + + desc_addr = vq->mz->phys_addr; + avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); + used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, + ring[vq->vq_nentries]), + VIRTIO_PCI_VRING_ALIGN); + + io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + + io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo, + &hw->common_cfg->queue_desc_hi); + io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo, + &hw->common_cfg->queue_avail_hi); + io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo, + &hw->common_cfg->queue_used_hi); + + notify_off = io_read16(&hw->common_cfg->queue_notify_off); + vq->notify_addr = (void *)((uint8_t *)hw->notify_base + + notify_off * hw->notify_off_multiplier); + + io_write16(1, &hw->common_cfg->queue_enable); + + PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index); + PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr); + PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr); + PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr); + PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)", + vq->notify_addr, notify_off); +} + +static void +modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + + io_write64_twopart(0, &hw->common_cfg->queue_desc_lo, + &hw->common_cfg->queue_desc_hi); + io_write64_twopart(0, &hw->common_cfg->queue_avail_lo, + &hw->common_cfg->queue_avail_hi); + io_write64_twopart(0, &hw->common_cfg->queue_used_lo, + &hw->common_cfg->queue_used_hi); + + io_write16(0, &hw->common_cfg->queue_enable); +} + +static void +modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq) +{ + io_write16(1, vq->notify_addr); +} + +static const struct virtio_pci_ops modern_ops = { + .read_dev_cfg = modern_read_dev_config, + .write_dev_cfg = modern_write_dev_config, + .reset = modern_reset, + .get_status = modern_get_status, + .set_status = modern_set_status, + .get_features = modern_get_features, + .set_features = modern_set_features, + .get_isr = modern_get_isr, + .set_config_irq = modern_set_config_irq, + .get_queue_num = modern_get_queue_num, + .setup_queue = modern_setup_queue, + .del_queue = modern_del_queue, + .notify_queue = modern_notify_queue, +}; + + +void +vtpci_read_dev_config(struct virtio_hw *hw, size_t offset, + void *dst, int length) +{ + hw->vtpci_ops->read_dev_cfg(hw, offset, dst, length); +} + +void +vtpci_write_dev_config(struct virtio_hw *hw, size_t offset, + const void *src, int length) +{ + hw->vtpci_ops->write_dev_cfg(hw, offset, src, length); +} + +uint64_t +vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features) +{ + uint64_t features; + + /* + * Limit negotiated features to what the driver, virtqueue, and + * host all support. + */ + features = host_features & hw->guest_features; + hw->vtpci_ops->set_features(hw, features); + + return features; +} + +void +vtpci_reset(struct virtio_hw *hw) +{ + hw->vtpci_ops->set_status(hw, VIRTIO_CONFIG_STATUS_RESET); + /* flush status write */ + hw->vtpci_ops->get_status(hw); +} + +void +vtpci_reinit_complete(struct virtio_hw *hw) +{ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK); +} + +void +vtpci_set_status(struct virtio_hw *hw, uint8_t status) +{ + if (status != VIRTIO_CONFIG_STATUS_RESET) + status |= hw->vtpci_ops->get_status(hw); + + hw->vtpci_ops->set_status(hw, status); +} + +uint8_t +vtpci_get_status(struct virtio_hw *hw) +{ + return hw->vtpci_ops->get_status(hw); +} + +uint8_t +vtpci_isr(struct virtio_hw *hw) +{ + return hw->vtpci_ops->get_isr(hw); +} + + +/* Enable one vector (0) for Link State Intrerrupt */ +uint16_t +vtpci_irq_config(struct virtio_hw *hw, uint16_t vec) +{ + return hw->vtpci_ops->set_config_irq(hw, vec); +} + +static void * +get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap) +{ + uint8_t bar = cap->bar; + uint32_t length = cap->length; + uint32_t offset = cap->offset; + uint8_t *base; + + if (bar > 5) { + PMD_INIT_LOG(ERR, "invalid bar: %u", bar); + return NULL; + } + + if (offset + length < offset) { + PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows", + offset, length); + return NULL; + } + + if (offset + length > dev->mem_resource[bar].len) { + PMD_INIT_LOG(ERR, + "invalid cap: overflows bar space: %u > %" PRIu64, + offset + length, dev->mem_resource[bar].len); + return NULL; + } + + base = dev->mem_resource[bar].addr; + if (base == NULL) { + PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar); + return NULL; + } + + return base + offset; +} + +static int +virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw) +{ + uint8_t pos; + struct virtio_pci_cap cap; + int ret; + + if (rte_eal_pci_map_device(dev)) { + PMD_INIT_LOG(DEBUG, "failed to map pci device!"); + return -1; + } + + ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST); + if (ret < 0) { + PMD_INIT_LOG(DEBUG, "failed to read pci capability list"); + return -1; + } + + while (pos) { + ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos); + if (ret < 0) { + PMD_INIT_LOG(ERR, + "failed to read pci cap at pos: %x", pos); + break; + } + + if (cap.cap_vndr != PCI_CAP_ID_VNDR) { + PMD_INIT_LOG(DEBUG, + "[%2x] skipping non VNDR cap id: %02x", + pos, cap.cap_vndr); + goto next; + } + + PMD_INIT_LOG(DEBUG, + "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u", + pos, cap.cfg_type, cap.bar, cap.offset, cap.length); + + switch (cap.cfg_type) { + case VIRTIO_PCI_CAP_COMMON_CFG: + hw->common_cfg = get_cfg_addr(dev, &cap); + break; + case VIRTIO_PCI_CAP_NOTIFY_CFG: + rte_eal_pci_read_config(dev, &hw->notify_off_multiplier, + 4, pos + sizeof(cap)); + hw->notify_base = get_cfg_addr(dev, &cap); + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + hw->dev_cfg = get_cfg_addr(dev, &cap); + break; + case VIRTIO_PCI_CAP_ISR_CFG: + hw->isr = get_cfg_addr(dev, &cap); + break; + } + +next: + pos = cap.cap_next; + } + + if (hw->common_cfg == NULL || hw->notify_base == NULL || + hw->dev_cfg == NULL || hw->isr == NULL) { + PMD_INIT_LOG(INFO, "no modern virtio pci device found."); + return -1; + } + + PMD_INIT_LOG(INFO, "found modern virtio pci device."); + + PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg); + PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg); + PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr); + PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u", + hw->notify_base, hw->notify_off_multiplier); + + return 0; +} + +/* + * Return -1: + * if there is error mapping with VFIO/UIO. + * if port map error when driver type is KDRV_NONE. + * Return 1 if kernel driver is managing the device. + * Return 0 on success. + */ +int +vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw) +{ + hw->dev = dev; + + /* + * Try if we can succeed reading virtio pci caps, which exists + * only on modern pci device. If failed, we fallback to legacy + * virtio handling. + */ + if (virtio_read_caps(dev, hw) == 0) { + PMD_INIT_LOG(INFO, "modern virtio pci detected."); + hw->vtpci_ops = &modern_ops; + hw->modern = 1; + dev->driver->drv_flags |= RTE_PCI_DRV_INTR_LSC; + return 0; + } + + PMD_INIT_LOG(INFO, "trying with legacy virtio pci."); + if (legacy_virtio_resource_init(dev, hw) < 0) { + if (dev->kdrv == RTE_KDRV_UNKNOWN && + dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) { + PMD_INIT_LOG(INFO, + "skip kernel managed virtio device."); + return 1; + } + return -1; + } + + hw->vtpci_ops = &legacy_ops; + hw->use_msix = legacy_virtio_has_msix(&dev->addr); + hw->modern = 0; + + return 0; +} diff --git a/drivers/net/virtio/virtio_pci.h b/drivers/net/virtio/virtio_pci.h new file mode 100644 index 00000000..b69785ea --- /dev/null +++ b/drivers/net/virtio/virtio_pci.h @@ -0,0 +1,314 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_PCI_H_ +#define _VIRTIO_PCI_H_ + +#include <stdint.h> + +#include <rte_pci.h> +#include <rte_ethdev.h> + +struct virtqueue; + +/* VirtIO PCI vendor/device ID. */ +#define VIRTIO_PCI_VENDORID 0x1AF4 +#define VIRTIO_PCI_DEVICEID_MIN 0x1000 +#define VIRTIO_PCI_DEVICEID_MAX 0x103F + +/* VirtIO ABI version, this must match exactly. */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* + * VirtIO Header, located in BAR 0. + */ +#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO)*/ +#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */ +#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */ +#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */ +#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */ +#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */ +#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading + * also clears the register (8, RO) */ +/* Only if MSIX is enabled: */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications + (16, RW) */ + +/* The bit of the ISR which indicates a device has an interrupt. */ +#define VIRTIO_PCI_ISR_INTR 0x1 +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 +/* Vector value used to disable MSI for queue. */ +#define VIRTIO_MSI_NO_VECTOR 0xFFFF + +/* VirtIO device IDs. */ +#define VIRTIO_ID_NETWORK 0x01 +#define VIRTIO_ID_BLOCK 0x02 +#define VIRTIO_ID_CONSOLE 0x03 +#define VIRTIO_ID_ENTROPY 0x04 +#define VIRTIO_ID_BALLOON 0x05 +#define VIRTIO_ID_IOMEMORY 0x06 +#define VIRTIO_ID_9P 0x09 + +/* Status byte for guest to report progress. */ +#define VIRTIO_CONFIG_STATUS_RESET 0x00 +#define VIRTIO_CONFIG_STATUS_ACK 0x01 +#define VIRTIO_CONFIG_STATUS_DRIVER 0x02 +#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04 +#define VIRTIO_CONFIG_STATUS_FEATURES_OK 0x08 +#define VIRTIO_CONFIG_STATUS_FAILED 0x80 + +/* + * Each virtqueue indirect descriptor list must be physically contiguous. + * To allow us to malloc(9) each list individually, limit the number + * supported to what will fit in one page. With 4KB pages, this is a limit + * of 256 descriptors. If there is ever a need for more, we can switch to + * contigmalloc(9) for the larger allocations, similar to what + * bus_dmamem_alloc(9) does. + * + * Note the sizeof(struct vring_desc) is 16 bytes. + */ +#define VIRTIO_MAX_INDIRECT ((int) (PAGE_SIZE / 16)) + +/* The feature bitmap for virtio net */ +#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ +#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ +#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ +#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ +#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */ +#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */ +#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ +#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ +#define VIRTIO_NET_F_STATUS 16 /* virtio_net_config.status available */ +#define VIRTIO_NET_F_CTRL_VQ 17 /* Control channel available */ +#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */ +#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */ +#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce device on the + * network */ +#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow + * Steering */ +#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ + +/* Do we get callbacks when the ring is completely used, even if we've + * suppressed them? */ +#define VIRTIO_F_NOTIFY_ON_EMPTY 24 + +/* Can the device handle any descriptor layout? */ +#define VIRTIO_F_ANY_LAYOUT 27 + +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + +#define VIRTIO_F_VERSION_1 32 + +/* + * Some VirtIO feature bits (currently bits 28 through 31) are + * reserved for the transport being used (eg. virtio_ring), the + * rest are per-device feature bits. + */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 32 + +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + +#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ +#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ + +/* + * Maximum number of virtqueues per device. + */ +#define VIRTIO_MAX_VIRTQUEUES 8 + +/* Common configuration */ +#define VIRTIO_PCI_CAP_COMMON_CFG 1 +/* Notifications */ +#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 +/* ISR Status */ +#define VIRTIO_PCI_CAP_ISR_CFG 3 +/* Device specific configuration */ +#define VIRTIO_PCI_CAP_DEVICE_CFG 4 +/* PCI configuration access */ +#define VIRTIO_PCI_CAP_PCI_CFG 5 + +/* This is the PCI capability header: */ +struct virtio_pci_cap { + uint8_t cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + uint8_t cap_next; /* Generic PCI field: next ptr. */ + uint8_t cap_len; /* Generic PCI field: capability length */ + uint8_t cfg_type; /* Identifies the structure. */ + uint8_t bar; /* Where to find it. */ + uint8_t padding[3]; /* Pad to full dword. */ + uint32_t offset; /* Offset within bar. */ + uint32_t length; /* Length of the structure, in bytes. */ +}; + +struct virtio_pci_notify_cap { + struct virtio_pci_cap cap; + uint32_t notify_off_multiplier; /* Multiplier for queue_notify_off. */ +}; + +/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ +struct virtio_pci_common_cfg { + /* About the whole device. */ + uint32_t device_feature_select; /* read-write */ + uint32_t device_feature; /* read-only */ + uint32_t guest_feature_select; /* read-write */ + uint32_t guest_feature; /* read-write */ + uint16_t msix_config; /* read-write */ + uint16_t num_queues; /* read-only */ + uint8_t device_status; /* read-write */ + uint8_t config_generation; /* read-only */ + + /* About a specific virtqueue. */ + uint16_t queue_select; /* read-write */ + uint16_t queue_size; /* read-write, power of 2. */ + uint16_t queue_msix_vector; /* read-write */ + uint16_t queue_enable; /* read-write */ + uint16_t queue_notify_off; /* read-only */ + uint32_t queue_desc_lo; /* read-write */ + uint32_t queue_desc_hi; /* read-write */ + uint32_t queue_avail_lo; /* read-write */ + uint32_t queue_avail_hi; /* read-write */ + uint32_t queue_used_lo; /* read-write */ + uint32_t queue_used_hi; /* read-write */ +}; + +struct virtio_hw; + +struct virtio_pci_ops { + void (*read_dev_cfg)(struct virtio_hw *hw, size_t offset, + void *dst, int len); + void (*write_dev_cfg)(struct virtio_hw *hw, size_t offset, + const void *src, int len); + void (*reset)(struct virtio_hw *hw); + + uint8_t (*get_status)(struct virtio_hw *hw); + void (*set_status)(struct virtio_hw *hw, uint8_t status); + + uint64_t (*get_features)(struct virtio_hw *hw); + void (*set_features)(struct virtio_hw *hw, uint64_t features); + + uint8_t (*get_isr)(struct virtio_hw *hw); + + uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); + + uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); + void (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); + void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); + void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq); +}; + +struct virtio_net_config; + +struct virtio_hw { + struct virtqueue *cvq; + struct rte_pci_ioport io; + uint64_t guest_features; + uint32_t max_tx_queues; + uint32_t max_rx_queues; + uint16_t vtnet_hdr_size; + uint8_t vlan_strip; + uint8_t use_msix; + uint8_t started; + uint8_t modern; + uint8_t mac_addr[ETHER_ADDR_LEN]; + uint32_t notify_off_multiplier; + uint8_t *isr; + uint16_t *notify_base; + struct rte_pci_device *dev; + struct virtio_pci_common_cfg *common_cfg; + struct virtio_net_config *dev_cfg; + const struct virtio_pci_ops *vtpci_ops; +}; + +/* + * This structure is just a reference to read + * net device specific config space; it just a chodu structure + * + */ +struct virtio_net_config { + /* The config defining mac address (if VIRTIO_NET_F_MAC) */ + uint8_t mac[ETHER_ADDR_LEN]; + /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ + uint16_t status; + uint16_t max_virtqueue_pairs; +} __attribute__((packed)); + +/* + * How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. + */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 + +static inline int +vtpci_with_feature(struct virtio_hw *hw, uint64_t bit) +{ + return (hw->guest_features & (1ULL << bit)) != 0; +} + +/* + * Function declaration from virtio_pci.c + */ +int vtpci_init(struct rte_pci_device *, struct virtio_hw *); +void vtpci_reset(struct virtio_hw *); + +void vtpci_reinit_complete(struct virtio_hw *); + +uint8_t vtpci_get_status(struct virtio_hw *); +void vtpci_set_status(struct virtio_hw *, uint8_t); + +uint64_t vtpci_negotiate_features(struct virtio_hw *, uint64_t); + +void vtpci_write_dev_config(struct virtio_hw *, size_t, const void *, int); + +void vtpci_read_dev_config(struct virtio_hw *, size_t, void *, int); + +uint8_t vtpci_isr(struct virtio_hw *); + +uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t); + +#endif /* _VIRTIO_PCI_H_ */ diff --git a/drivers/net/virtio/virtio_ring.h b/drivers/net/virtio/virtio_ring.h new file mode 100644 index 00000000..447760a8 --- /dev/null +++ b/drivers/net/virtio/virtio_ring.h @@ -0,0 +1,163 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_RING_H_ +#define _VIRTIO_RING_H_ + +#include <stdint.h> + +#include <rte_common.h> + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* The Host uses this in used->flags to advise the Guest: don't kick me + * when you add a buffer. It's unreliable, so it's simply an + * optimization. Guest will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't + * interrupt me when you consume a buffer. It's unreliable, so it's + * simply an optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* VirtIO ring descriptors: 16 bytes. + * These can chain together via "next". */ +struct vring_desc { + uint64_t addr; /* Address (guest-physical). */ + uint32_t len; /* Length. */ + uint16_t flags; /* The flags as indicated above. */ + uint16_t next; /* We chain unused descriptors via this. */ +}; + +struct vring_avail { + uint16_t flags; + uint16_t idx; + uint16_t ring[0]; +}; + +/* id is a 16bit index. uint32_t is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + uint32_t id; + /* Total length of the descriptor chain which was written to. */ + uint32_t len; +}; + +struct vring_used { + uint16_t flags; + uint16_t idx; + struct vring_used_elem ring[0]; +}; + +struct vring { + unsigned int num; + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; +}; + +/* The standard layout for the ring is a continuous chunk of memory which + * looks like this. We assume num is a power of 2. + * + * struct vring { + * // The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * // A ring of available descriptor heads with free-running index. + * __u16 avail_flags; + * __u16 avail_idx; + * __u16 available[num]; + * __u16 used_event_idx; + * + * // Padding to the next align boundary. + * char pad[]; + * + * // A ring of used descriptor heads with free-running index. + * __u16 used_flags; + * __u16 used_idx; + * struct vring_used_elem used[num]; + * __u16 avail_event_idx; + * }; + * + * NOTE: for VirtIO PCI, align is 4096. + */ + +/* + * We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. + */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num]) + +static inline size_t +vring_size(unsigned int num, unsigned long align) +{ + size_t size; + + size = num * sizeof(struct vring_desc); + size += sizeof(struct vring_avail) + (num * sizeof(uint16_t)); + size = RTE_ALIGN_CEIL(size, align); + size += sizeof(struct vring_used) + + (num * sizeof(struct vring_used_elem)); + return size; +} + +static inline void +vring_init(struct vring *vr, unsigned int num, uint8_t *p, + unsigned long align) +{ + vr->num = num; + vr->desc = (struct vring_desc *) p; + vr->avail = (struct vring_avail *) (p + + num * sizeof(struct vring_desc)); + vr->used = (void *) + RTE_ALIGN_CEIL((uintptr_t)(&vr->avail->ring[num]), align); +} + +/* + * The following is used with VIRTIO_RING_F_EVENT_IDX. + * Assuming a given event_idx value from the other size, if we have + * just incremented index from old to new_idx, should we trigger an + * event? + */ +static inline int +vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) +{ + return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); +} + +#endif /* _VIRTIO_RING_H_ */ diff --git a/drivers/net/virtio/virtio_rxtx.c b/drivers/net/virtio/virtio_rxtx.c new file mode 100644 index 00000000..ef21d8e3 --- /dev/null +++ b/drivers/net/virtio/virtio_rxtx.c @@ -0,0 +1,940 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <rte_cycles.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_branch_prediction.h> +#include <rte_mempool.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_prefetch.h> +#include <rte_string_fns.h> +#include <rte_errno.h> +#include <rte_byteorder.h> + +#include "virtio_logs.h" +#include "virtio_ethdev.h" +#include "virtio_pci.h" +#include "virtqueue.h" +#include "virtio_rxtx.h" + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP +#define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) +#else +#define VIRTIO_DUMP_PACKET(m, len) do { } while (0) +#endif + + +#define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ + ETH_TXQ_FLAGS_NOOFFLOADS) + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 +static int use_simple_rxtx; +#endif + +static void +vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) +{ + struct vring_desc *dp, *dp_tail; + struct vq_desc_extra *dxp; + uint16_t desc_idx_last = desc_idx; + + dp = &vq->vq_ring.desc[desc_idx]; + dxp = &vq->vq_descx[desc_idx]; + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); + if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { + while (dp->flags & VRING_DESC_F_NEXT) { + desc_idx_last = dp->next; + dp = &vq->vq_ring.desc[dp->next]; + } + } + dxp->ndescs = 0; + + /* + * We must append the existing free chain, if any, to the end of + * newly freed chain. If the virtqueue was completely used, then + * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). + */ + if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { + vq->vq_desc_head_idx = desc_idx; + } else { + dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; + dp_tail->next = desc_idx; + } + + vq->vq_desc_tail_idx = desc_idx_last; + dp->next = VQ_RING_DESC_CHAIN_END; +} + +static uint16_t +virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, + uint32_t *len, uint16_t num) +{ + struct vring_used_elem *uep; + struct rte_mbuf *cookie; + uint16_t used_idx, desc_idx; + uint16_t i; + + /* Caller does the check */ + for (i = 0; i < num ; i++) { + used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + desc_idx = (uint16_t) uep->id; + len[i] = uep->len; + cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; + + if (unlikely(cookie == NULL)) { + PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", + vq->vq_used_cons_idx); + break; + } + + rte_prefetch0(cookie); + rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); + rx_pkts[i] = cookie; + vq->vq_used_cons_idx++; + vq_ring_free_chain(vq, desc_idx); + vq->vq_descx[desc_idx].cookie = NULL; + } + + return i; +} + +#ifndef DEFAULT_TX_FREE_THRESH +#define DEFAULT_TX_FREE_THRESH 32 +#endif + +/* Cleanup from completed transmits. */ +static void +virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) +{ + uint16_t i, used_idx, desc_idx; + for (i = 0; i < num; i++) { + struct vring_used_elem *uep; + struct vq_desc_extra *dxp; + + used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + + desc_idx = (uint16_t) uep->id; + dxp = &vq->vq_descx[desc_idx]; + vq->vq_used_cons_idx++; + vq_ring_free_chain(vq, desc_idx); + + if (dxp->cookie != NULL) { + rte_pktmbuf_free(dxp->cookie); + dxp->cookie = NULL; + } + } +} + + +static inline int +virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) +{ + struct vq_desc_extra *dxp; + struct virtio_hw *hw = vq->hw; + struct vring_desc *start_dp; + uint16_t needed = 1; + uint16_t head_idx, idx; + + if (unlikely(vq->vq_free_cnt == 0)) + return -ENOSPC; + if (unlikely(vq->vq_free_cnt < needed)) + return -EMSGSIZE; + + head_idx = vq->vq_desc_head_idx; + if (unlikely(head_idx >= vq->vq_nentries)) + return -EFAULT; + + idx = head_idx; + dxp = &vq->vq_descx[idx]; + dxp->cookie = (void *)cookie; + dxp->ndescs = needed; + + start_dp = vq->vq_ring.desc; + start_dp[idx].addr = + (uint64_t)(cookie->buf_physaddr + RTE_PKTMBUF_HEADROOM + - hw->vtnet_hdr_size); + start_dp[idx].len = + cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; + start_dp[idx].flags = VRING_DESC_F_WRITE; + idx = start_dp[idx].next; + vq->vq_desc_head_idx = idx; + if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) + vq->vq_desc_tail_idx = idx; + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); + vq_update_avail_ring(vq, head_idx); + + return 0; +} + +static inline void +virtqueue_enqueue_xmit(struct virtqueue *txvq, struct rte_mbuf *cookie, + uint16_t needed, int use_indirect, int can_push) +{ + struct vq_desc_extra *dxp; + struct vring_desc *start_dp; + uint16_t seg_num = cookie->nb_segs; + uint16_t head_idx, idx; + uint16_t head_size = txvq->hw->vtnet_hdr_size; + unsigned long offs; + + head_idx = txvq->vq_desc_head_idx; + idx = head_idx; + dxp = &txvq->vq_descx[idx]; + dxp->cookie = (void *)cookie; + dxp->ndescs = needed; + + start_dp = txvq->vq_ring.desc; + + if (can_push) { + /* put on zero'd transmit header (no offloads) */ + void *hdr = rte_pktmbuf_prepend(cookie, head_size); + + memset(hdr, 0, head_size); + } else if (use_indirect) { + /* setup tx ring slot to point to indirect + * descriptor list stored in reserved region. + * + * the first slot in indirect ring is already preset + * to point to the header in reserved region + */ + struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; + + offs = idx * sizeof(struct virtio_tx_region) + + offsetof(struct virtio_tx_region, tx_indir); + + start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); + start_dp[idx].flags = VRING_DESC_F_INDIRECT; + + /* loop below will fill in rest of the indirect elements */ + start_dp = txr[idx].tx_indir; + idx = 1; + } else { + /* setup first tx ring slot to point to header + * stored in reserved region. + */ + offs = idx * sizeof(struct virtio_tx_region) + + offsetof(struct virtio_tx_region, tx_hdr); + + start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].len = txvq->hw->vtnet_hdr_size; + start_dp[idx].flags = VRING_DESC_F_NEXT; + idx = start_dp[idx].next; + } + + do { + start_dp[idx].addr = rte_mbuf_data_dma_addr(cookie); + start_dp[idx].len = cookie->data_len; + start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; + idx = start_dp[idx].next; + } while ((cookie = cookie->next) != NULL); + + start_dp[idx].flags &= ~VRING_DESC_F_NEXT; + + if (use_indirect) + idx = txvq->vq_ring.desc[head_idx].next; + + txvq->vq_desc_head_idx = idx; + if (txvq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) + txvq->vq_desc_tail_idx = idx; + txvq->vq_free_cnt = (uint16_t)(txvq->vq_free_cnt - needed); + vq_update_avail_ring(txvq, head_idx); +} + +static inline struct rte_mbuf * +rte_rxmbuf_alloc(struct rte_mempool *mp) +{ + struct rte_mbuf *m; + + m = __rte_mbuf_raw_alloc(mp); + __rte_mbuf_sanity_check_raw(m, 0); + + return m; +} + +static void +virtio_dev_vring_start(struct virtqueue *vq, int queue_type) +{ + struct rte_mbuf *m; + int i, nbufs, error, size = vq->vq_nentries; + struct vring *vr = &vq->vq_ring; + uint8_t *ring_mem = vq->vq_ring_virt_mem; + + PMD_INIT_FUNC_TRACE(); + + /* + * Reinitialise since virtio port might have been stopped and restarted + */ + memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size); + vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN); + vq->vq_used_cons_idx = 0; + vq->vq_desc_head_idx = 0; + vq->vq_avail_idx = 0; + vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); + vq->vq_free_cnt = vq->vq_nentries; + memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); + + vring_desc_init(vr->desc, size); + + /* + * Disable device(host) interrupting guest + */ + virtqueue_disable_intr(vq); + + /* Only rx virtqueue needs mbufs to be allocated at initialization */ + if (queue_type == VTNET_RQ) { + if (vq->mpool == NULL) + rte_exit(EXIT_FAILURE, + "Cannot allocate initial mbufs for rx virtqueue"); + + /* Allocate blank mbufs for the each rx descriptor */ + nbufs = 0; + error = ENOSPC; + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + if (use_simple_rxtx) + for (i = 0; i < vq->vq_nentries; i++) { + vq->vq_ring.avail->ring[i] = i; + vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; + } +#endif + memset(&vq->fake_mbuf, 0, sizeof(vq->fake_mbuf)); + for (i = 0; i < RTE_PMD_VIRTIO_RX_MAX_BURST; i++) + vq->sw_ring[vq->vq_nentries + i] = &vq->fake_mbuf; + + while (!virtqueue_full(vq)) { + m = rte_rxmbuf_alloc(vq->mpool); + if (m == NULL) + break; + + /****************************************** + * Enqueue allocated buffers * + *******************************************/ +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + if (use_simple_rxtx) + error = virtqueue_enqueue_recv_refill_simple(vq, m); + else +#endif + error = virtqueue_enqueue_recv_refill(vq, m); + if (error) { + rte_pktmbuf_free(m); + break; + } + nbufs++; + } + + vq_update_avail_idx(vq); + + PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); + } else if (queue_type == VTNET_TQ) { +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + if (use_simple_rxtx) { + int mid_idx = vq->vq_nentries >> 1; + for (i = 0; i < mid_idx; i++) { + vq->vq_ring.avail->ring[i] = i + mid_idx; + vq->vq_ring.desc[i + mid_idx].next = i; + vq->vq_ring.desc[i + mid_idx].addr = + vq->virtio_net_hdr_mem + + offsetof(struct virtio_tx_region, tx_hdr); + vq->vq_ring.desc[i + mid_idx].len = + vq->hw->vtnet_hdr_size; + vq->vq_ring.desc[i + mid_idx].flags = + VRING_DESC_F_NEXT; + vq->vq_ring.desc[i].flags = 0; + } + for (i = mid_idx; i < vq->vq_nentries; i++) + vq->vq_ring.avail->ring[i] = i; + } +#endif + } +} + +void +virtio_dev_cq_start(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (hw->cvq) { + virtio_dev_vring_start(hw->cvq, VTNET_CQ); + VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq); + } +} + +void +virtio_dev_rxtx_start(struct rte_eth_dev *dev) +{ + /* + * Start receive and transmit vrings + * - Setup vring structure for all queues + * - Initialize descriptor for the rx vring + * - Allocate blank mbufs for the each rx descriptor + * + */ + int i; + + PMD_INIT_FUNC_TRACE(); + + /* Start rx vring. */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + virtio_dev_vring_start(dev->data->rx_queues[i], VTNET_RQ); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->rx_queues[i]); + } + + /* Start tx vring. */ + for (i = 0; i < dev->data->nb_tx_queues; i++) { + virtio_dev_vring_start(dev->data->tx_queues[i], VTNET_TQ); + VIRTQUEUE_DUMP((struct virtqueue *)dev->data->tx_queues[i]); + } +} + +int +virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + __rte_unused const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; + struct virtqueue *vq; + int ret; + + PMD_INIT_FUNC_TRACE(); + ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx, + nb_desc, socket_id, &vq); + if (ret < 0) { + PMD_INIT_LOG(ERR, "rvq initialization failed"); + return ret; + } + + /* Create mempool for rx mbuf allocation */ + vq->mpool = mp; + + dev->data->rx_queues[queue_idx] = vq; + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + virtio_rxq_vec_setup(vq); +#endif + + return 0; +} + +void +virtio_dev_rx_queue_release(void *rxq) +{ + virtio_dev_queue_release(rxq); +} + +/* + * struct rte_eth_dev *dev: Used to update dev + * uint16_t nb_desc: Defaults to values read from config space + * unsigned int socket_id: Used to allocate memzone + * const struct rte_eth_txconf *tx_conf: Used to setup tx engine + * uint16_t queue_idx: Just used as an index in dev txq list + */ +int +virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + struct virtio_hw *hw = dev->data->dev_private; +#endif + struct virtqueue *vq; + uint16_t tx_free_thresh; + int ret; + + PMD_INIT_FUNC_TRACE(); + + if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) + != ETH_TXQ_FLAGS_NOXSUMS) { + PMD_INIT_LOG(ERR, "TX checksum offload not supported\n"); + return -EINVAL; + } + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + /* Use simple rx/tx func if single segment and no offloads */ + if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS && + !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { + PMD_INIT_LOG(INFO, "Using simple rx/tx path"); + dev->tx_pkt_burst = virtio_xmit_pkts_simple; + dev->rx_pkt_burst = virtio_recv_pkts_vec; + use_simple_rxtx = 1; + } +#endif + + ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx, + nb_desc, socket_id, &vq); + if (ret < 0) { + PMD_INIT_LOG(ERR, "rvq initialization failed"); + return ret; + } + + tx_free_thresh = tx_conf->tx_free_thresh; + if (tx_free_thresh == 0) + tx_free_thresh = + RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); + + if (tx_free_thresh >= (vq->vq_nentries - 3)) { + RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " + "number of TX entries minus 3 (%u)." + " (tx_free_thresh=%u port=%u queue=%u)\n", + vq->vq_nentries - 3, + tx_free_thresh, dev->data->port_id, queue_idx); + return -EINVAL; + } + + vq->vq_free_thresh = tx_free_thresh; + + dev->data->tx_queues[queue_idx] = vq; + return 0; +} + +void +virtio_dev_tx_queue_release(void *txq) +{ + virtio_dev_queue_release(txq); +} + +static void +virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) +{ + int error; + /* + * Requeue the discarded mbuf. This should always be + * successful since it was just dequeued. + */ + error = virtqueue_enqueue_recv_refill(vq, m); + if (unlikely(error)) { + RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); + rte_pktmbuf_free(m); + } +} + +static void +virtio_update_packet_stats(struct virtqueue *vq, struct rte_mbuf *mbuf) +{ + uint32_t s = mbuf->pkt_len; + struct ether_addr *ea; + + if (s == 64) { + vq->size_bins[1]++; + } else if (s > 64 && s < 1024) { + uint32_t bin; + + /* count zeros, and offset into correct bin */ + bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; + vq->size_bins[bin]++; + } else { + if (s < 64) + vq->size_bins[0]++; + else if (s < 1519) + vq->size_bins[6]++; + else if (s >= 1519) + vq->size_bins[7]++; + } + + ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); + if (is_multicast_ether_addr(ea)) { + if (is_broadcast_ether_addr(ea)) + vq->broadcast++; + else + vq->multicast++; + } +} + +#define VIRTIO_MBUF_BURST_SZ 64 +#define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) +uint16_t +virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + struct virtqueue *rxvq = rx_queue; + struct virtio_hw *hw; + struct rte_mbuf *rxm, *new_mbuf; + uint16_t nb_used, num, nb_rx; + uint32_t len[VIRTIO_MBUF_BURST_SZ]; + struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; + int error; + uint32_t i, nb_enqueued; + uint32_t hdr_size; + + nb_used = VIRTQUEUE_NUSED(rxvq); + + virtio_rmb(); + + num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts); + num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); + if (likely(num > DESC_PER_CACHELINE)) + num = num - ((rxvq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); + + num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, num); + PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); + + hw = rxvq->hw; + nb_rx = 0; + nb_enqueued = 0; + hdr_size = hw->vtnet_hdr_size; + + for (i = 0; i < num ; i++) { + rxm = rcv_pkts[i]; + + PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); + + if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { + PMD_RX_LOG(ERR, "Packet drop"); + nb_enqueued++; + virtio_discard_rxbuf(rxvq, rxm); + rxvq->errors++; + continue; + } + + rxm->port = rxvq->port_id; + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rxm->ol_flags = 0; + rxm->vlan_tci = 0; + + rxm->nb_segs = 1; + rxm->next = NULL; + rxm->pkt_len = (uint32_t)(len[i] - hdr_size); + rxm->data_len = (uint16_t)(len[i] - hdr_size); + + if (hw->vlan_strip) + rte_vlan_strip(rxm); + + VIRTIO_DUMP_PACKET(rxm, rxm->data_len); + + rx_pkts[nb_rx++] = rxm; + + rxvq->bytes += rx_pkts[nb_rx - 1]->pkt_len; + virtio_update_packet_stats(rxvq, rxm); + } + + rxvq->packets += nb_rx; + + /* Allocate new mbuf for the used descriptor */ + error = ENOSPC; + while (likely(!virtqueue_full(rxvq))) { + new_mbuf = rte_rxmbuf_alloc(rxvq->mpool); + if (unlikely(new_mbuf == NULL)) { + struct rte_eth_dev *dev + = &rte_eth_devices[rxvq->port_id]; + dev->data->rx_mbuf_alloc_failed++; + break; + } + error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf); + if (unlikely(error)) { + rte_pktmbuf_free(new_mbuf); + break; + } + nb_enqueued++; + } + + if (likely(nb_enqueued)) { + vq_update_avail_idx(rxvq); + + if (unlikely(virtqueue_kick_prepare(rxvq))) { + virtqueue_notify(rxvq); + PMD_RX_LOG(DEBUG, "Notified\n"); + } + } + + return nb_rx; +} + +uint16_t +virtio_recv_mergeable_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct virtqueue *rxvq = rx_queue; + struct virtio_hw *hw; + struct rte_mbuf *rxm, *new_mbuf; + uint16_t nb_used, num, nb_rx; + uint32_t len[VIRTIO_MBUF_BURST_SZ]; + struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; + struct rte_mbuf *prev; + int error; + uint32_t i, nb_enqueued; + uint32_t seg_num; + uint16_t extra_idx; + uint32_t seg_res; + uint32_t hdr_size; + + nb_used = VIRTQUEUE_NUSED(rxvq); + + virtio_rmb(); + + PMD_RX_LOG(DEBUG, "used:%d\n", nb_used); + + hw = rxvq->hw; + nb_rx = 0; + i = 0; + nb_enqueued = 0; + seg_num = 0; + extra_idx = 0; + seg_res = 0; + hdr_size = hw->vtnet_hdr_size; + + while (i < nb_used) { + struct virtio_net_hdr_mrg_rxbuf *header; + + if (nb_rx == nb_pkts) + break; + + num = virtqueue_dequeue_burst_rx(rxvq, rcv_pkts, len, 1); + if (num != 1) + continue; + + i++; + + PMD_RX_LOG(DEBUG, "dequeue:%d\n", num); + PMD_RX_LOG(DEBUG, "packet len:%d\n", len[0]); + + rxm = rcv_pkts[0]; + + if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { + PMD_RX_LOG(ERR, "Packet drop\n"); + nb_enqueued++; + virtio_discard_rxbuf(rxvq, rxm); + rxvq->errors++; + continue; + } + + header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + + RTE_PKTMBUF_HEADROOM - hdr_size); + seg_num = header->num_buffers; + + if (seg_num == 0) + seg_num = 1; + + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rxm->nb_segs = seg_num; + rxm->next = NULL; + rxm->ol_flags = 0; + rxm->vlan_tci = 0; + rxm->pkt_len = (uint32_t)(len[0] - hdr_size); + rxm->data_len = (uint16_t)(len[0] - hdr_size); + + rxm->port = rxvq->port_id; + rx_pkts[nb_rx] = rxm; + prev = rxm; + + seg_res = seg_num - 1; + + while (seg_res != 0) { + /* + * Get extra segments for current uncompleted packet. + */ + uint16_t rcv_cnt = + RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); + if (likely(VIRTQUEUE_NUSED(rxvq) >= rcv_cnt)) { + uint32_t rx_num = + virtqueue_dequeue_burst_rx(rxvq, + rcv_pkts, len, rcv_cnt); + i += rx_num; + rcv_cnt = rx_num; + } else { + PMD_RX_LOG(ERR, + "No enough segments for packet.\n"); + nb_enqueued++; + virtio_discard_rxbuf(rxvq, rxm); + rxvq->errors++; + break; + } + + extra_idx = 0; + + while (extra_idx < rcv_cnt) { + rxm = rcv_pkts[extra_idx]; + + rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; + rxm->next = NULL; + rxm->pkt_len = (uint32_t)(len[extra_idx]); + rxm->data_len = (uint16_t)(len[extra_idx]); + + if (prev) + prev->next = rxm; + + prev = rxm; + rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; + extra_idx++; + }; + seg_res -= rcv_cnt; + } + + if (hw->vlan_strip) + rte_vlan_strip(rx_pkts[nb_rx]); + + VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], + rx_pkts[nb_rx]->data_len); + + rxvq->bytes += rx_pkts[nb_rx]->pkt_len; + virtio_update_packet_stats(rxvq, rx_pkts[nb_rx]); + nb_rx++; + } + + rxvq->packets += nb_rx; + + /* Allocate new mbuf for the used descriptor */ + error = ENOSPC; + while (likely(!virtqueue_full(rxvq))) { + new_mbuf = rte_rxmbuf_alloc(rxvq->mpool); + if (unlikely(new_mbuf == NULL)) { + struct rte_eth_dev *dev + = &rte_eth_devices[rxvq->port_id]; + dev->data->rx_mbuf_alloc_failed++; + break; + } + error = virtqueue_enqueue_recv_refill(rxvq, new_mbuf); + if (unlikely(error)) { + rte_pktmbuf_free(new_mbuf); + break; + } + nb_enqueued++; + } + + if (likely(nb_enqueued)) { + vq_update_avail_idx(rxvq); + + if (unlikely(virtqueue_kick_prepare(rxvq))) { + virtqueue_notify(rxvq); + PMD_RX_LOG(DEBUG, "Notified"); + } + } + + return nb_rx; +} + +uint16_t +virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct virtqueue *txvq = tx_queue; + struct virtio_hw *hw = txvq->hw; + uint16_t hdr_size = hw->vtnet_hdr_size; + uint16_t nb_used, nb_tx; + int error; + + if (unlikely(nb_pkts < 1)) + return nb_pkts; + + PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); + nb_used = VIRTQUEUE_NUSED(txvq); + + virtio_rmb(); + if (likely(nb_used > txvq->vq_nentries - txvq->vq_free_thresh)) + virtio_xmit_cleanup(txvq, nb_used); + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + struct rte_mbuf *txm = tx_pkts[nb_tx]; + int can_push = 0, use_indirect = 0, slots, need; + + /* Do VLAN tag insertion */ + if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { + error = rte_vlan_insert(&txm); + if (unlikely(error)) { + rte_pktmbuf_free(txm); + continue; + } + } + + /* optimize ring usage */ + if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) && + rte_mbuf_refcnt_read(txm) == 1 && + txm->nb_segs == 1 && + rte_pktmbuf_headroom(txm) >= hdr_size && + rte_is_aligned(rte_pktmbuf_mtod(txm, char *), + __alignof__(struct virtio_net_hdr_mrg_rxbuf))) + can_push = 1; + else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && + txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) + use_indirect = 1; + + /* How many main ring entries are needed to this Tx? + * any_layout => number of segments + * indirect => 1 + * default => number of segments + 1 + */ + slots = use_indirect ? 1 : (txm->nb_segs + !can_push); + need = slots - txvq->vq_free_cnt; + + /* Positive value indicates it need free vring descriptors */ + if (unlikely(need > 0)) { + nb_used = VIRTQUEUE_NUSED(txvq); + virtio_rmb(); + need = RTE_MIN(need, (int)nb_used); + + virtio_xmit_cleanup(txvq, need); + need = slots - txvq->vq_free_cnt; + if (unlikely(need > 0)) { + PMD_TX_LOG(ERR, + "No free tx descriptors to transmit"); + break; + } + } + + /* Enqueue Packet buffers */ + virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); + + txvq->bytes += txm->pkt_len; + virtio_update_packet_stats(txvq, txm); + } + + txvq->packets += nb_tx; + + if (likely(nb_tx)) { + vq_update_avail_idx(txvq); + + if (unlikely(virtqueue_kick_prepare(txvq))) { + virtqueue_notify(txvq); + PMD_TX_LOG(DEBUG, "Notified backend after xmit"); + } + } + + return nb_tx; +} diff --git a/drivers/net/virtio/virtio_rxtx.h b/drivers/net/virtio/virtio_rxtx.h new file mode 100644 index 00000000..a76c3e52 --- /dev/null +++ b/drivers/net/virtio/virtio_rxtx.h @@ -0,0 +1,41 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define RTE_PMD_VIRTIO_RX_MAX_BURST 64 + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 +int virtio_rxq_vec_setup(struct virtqueue *rxq); + +int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, + struct rte_mbuf *m); +#endif diff --git a/drivers/net/virtio/virtio_rxtx_simple.c b/drivers/net/virtio/virtio_rxtx_simple.c new file mode 100644 index 00000000..8f5293dd --- /dev/null +++ b/drivers/net/virtio/virtio_rxtx_simple.c @@ -0,0 +1,418 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <tmmintrin.h> + +#include <rte_cycles.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_branch_prediction.h> +#include <rte_mempool.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_prefetch.h> +#include <rte_string_fns.h> +#include <rte_errno.h> +#include <rte_byteorder.h> + +#include "virtio_logs.h" +#include "virtio_ethdev.h" +#include "virtqueue.h" +#include "virtio_rxtx.h" + +#define RTE_VIRTIO_VPMD_RX_BURST 32 +#define RTE_VIRTIO_DESC_PER_LOOP 8 +#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif + +int __attribute__((cold)) +virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, + struct rte_mbuf *cookie) +{ + struct vq_desc_extra *dxp; + struct vring_desc *start_dp; + uint16_t desc_idx; + + desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1); + dxp = &vq->vq_descx[desc_idx]; + dxp->cookie = (void *)cookie; + vq->sw_ring[desc_idx] = cookie; + + start_dp = vq->vq_ring.desc; + start_dp[desc_idx].addr = (uint64_t)((uintptr_t)cookie->buf_physaddr + + RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size); + start_dp[desc_idx].len = cookie->buf_len - + RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size; + + vq->vq_free_cnt--; + vq->vq_avail_idx++; + + return 0; +} + +static inline void +virtio_rxq_rearm_vec(struct virtqueue *rxvq) +{ + int i; + uint16_t desc_idx; + struct rte_mbuf **sw_ring; + struct vring_desc *start_dp; + int ret; + + desc_idx = rxvq->vq_avail_idx & (rxvq->vq_nentries - 1); + sw_ring = &rxvq->sw_ring[desc_idx]; + start_dp = &rxvq->vq_ring.desc[desc_idx]; + + ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring, + RTE_VIRTIO_VPMD_RX_REARM_THRESH); + if (unlikely(ret)) { + rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed += + RTE_VIRTIO_VPMD_RX_REARM_THRESH; + return; + } + + for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) { + uintptr_t p; + + p = (uintptr_t)&sw_ring[i]->rearm_data; + *(uint64_t *)p = rxvq->mbuf_initializer; + + start_dp[i].addr = + (uint64_t)((uintptr_t)sw_ring[i]->buf_physaddr + + RTE_PKTMBUF_HEADROOM - rxvq->hw->vtnet_hdr_size); + start_dp[i].len = sw_ring[i]->buf_len - + RTE_PKTMBUF_HEADROOM + rxvq->hw->vtnet_hdr_size; + } + + rxvq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH; + rxvq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH; + vq_update_avail_idx(rxvq); +} + +/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) + * + * This routine is for non-mergeable RX, one desc for each guest buffer. + * This routine is based on the RX ring layout optimization. Each entry in the + * avail ring points to the desc with the same index in the desc ring and this + * will never be changed in the driver. + * + * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet + */ +uint16_t +virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct virtqueue *rxvq = rx_queue; + uint16_t nb_used; + uint16_t desc_idx; + struct vring_used_elem *rused; + struct rte_mbuf **sw_ring; + struct rte_mbuf **sw_ring_end; + uint16_t nb_pkts_received; + __m128i shuf_msk1, shuf_msk2, len_adjust; + + shuf_msk1 = _mm_set_epi8( + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, /* vlan tci */ + 5, 4, /* dat len */ + 0xFF, 0xFF, 5, 4, /* pkt len */ + 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ + + ); + + shuf_msk2 = _mm_set_epi8( + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, /* vlan tci */ + 13, 12, /* dat len */ + 0xFF, 0xFF, 13, 12, /* pkt len */ + 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ + ); + + /* Subtract the header length. + * In which case do we need the header length in used->len ? + */ + len_adjust = _mm_set_epi16( + 0, 0, + 0, + (uint16_t)-rxvq->hw->vtnet_hdr_size, + 0, (uint16_t)-rxvq->hw->vtnet_hdr_size, + 0, 0); + + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) + return 0; + + nb_used = *(volatile uint16_t *)&rxvq->vq_ring.used->idx - + rxvq->vq_used_cons_idx; + + rte_compiler_barrier(); + + if (unlikely(nb_used == 0)) + return 0; + + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); + nb_used = RTE_MIN(nb_used, nb_pkts); + + desc_idx = (uint16_t)(rxvq->vq_used_cons_idx & (rxvq->vq_nentries - 1)); + rused = &rxvq->vq_ring.used->ring[desc_idx]; + sw_ring = &rxvq->sw_ring[desc_idx]; + sw_ring_end = &rxvq->sw_ring[rxvq->vq_nentries]; + + _mm_prefetch((const void *)rused, _MM_HINT_T0); + + if (rxvq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { + virtio_rxq_rearm_vec(rxvq); + if (unlikely(virtqueue_kick_prepare(rxvq))) + virtqueue_notify(rxvq); + } + + for (nb_pkts_received = 0; + nb_pkts_received < nb_used;) { + __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; + __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; + __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; + + mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0)); + desc[0] = _mm_loadu_si128((__m128i *)(rused + 0)); + _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]); + + mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2)); + desc[1] = _mm_loadu_si128((__m128i *)(rused + 2)); + _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]); + + mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4)); + desc[2] = _mm_loadu_si128((__m128i *)(rused + 4)); + _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]); + + mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6)); + desc[3] = _mm_loadu_si128((__m128i *)(rused + 6)); + _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]); + + pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2); + pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1); + pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust); + pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1, + pkt_mb[1]); + _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1, + pkt_mb[0]); + + pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2); + pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1); + pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust); + pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1, + pkt_mb[3]); + _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1, + pkt_mb[2]); + + pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2); + pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1); + pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust); + pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1, + pkt_mb[5]); + _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1, + pkt_mb[4]); + + pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2); + pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1); + pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust); + pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1, + pkt_mb[7]); + _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1, + pkt_mb[6]); + + if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { + if (sw_ring + nb_used <= sw_ring_end) + nb_pkts_received += nb_used; + else + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= + sw_ring_end)) { + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; + + rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; + sw_ring += RTE_VIRTIO_DESC_PER_LOOP; + rused += RTE_VIRTIO_DESC_PER_LOOP; + nb_used -= RTE_VIRTIO_DESC_PER_LOOP; + } + } + } + + rxvq->vq_used_cons_idx += nb_pkts_received; + rxvq->vq_free_cnt += nb_pkts_received; + rxvq->packets += nb_pkts_received; + return nb_pkts_received; +} + +#define VIRTIO_TX_FREE_THRESH 32 +#define VIRTIO_TX_MAX_FREE_BUF_SZ 32 +#define VIRTIO_TX_FREE_NR 32 +/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */ +static inline void +virtio_xmit_cleanup(struct virtqueue *vq) +{ + uint16_t i, desc_idx; + int nb_free = 0; + struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ]; + + desc_idx = (uint16_t)(vq->vq_used_cons_idx & + ((vq->vq_nentries >> 1) - 1)); + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (likely(m != NULL)) { + free[0] = m; + nb_free = 1; + for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (likely(m != NULL)) { + if (likely(m->pool == free[0]->pool)) + free[nb_free++] = m; + else { + rte_mempool_put_bulk(free[0]->pool, + (void **)free, nb_free); + free[0] = m; + nb_free = 1; + } + } + } + rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free); + } else { + for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (m != NULL) + rte_mempool_put(m->pool, m); + } + } + + vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR; + vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1); +} + +uint16_t +virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct virtqueue *txvq = tx_queue; + uint16_t nb_used; + uint16_t desc_idx; + struct vring_desc *start_dp; + uint16_t nb_tail, nb_commit; + int i; + uint16_t desc_idx_max = (txvq->vq_nentries >> 1) - 1; + + nb_used = VIRTQUEUE_NUSED(txvq); + rte_compiler_barrier(); + + if (nb_used >= VIRTIO_TX_FREE_THRESH) + virtio_xmit_cleanup(tx_queue); + + nb_commit = nb_pkts = RTE_MIN((txvq->vq_free_cnt >> 1), nb_pkts); + desc_idx = (uint16_t) (txvq->vq_avail_idx & desc_idx_max); + start_dp = txvq->vq_ring.desc; + nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx); + + if (nb_commit >= nb_tail) { + for (i = 0; i < nb_tail; i++) + txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; + for (i = 0; i < nb_tail; i++) { + start_dp[desc_idx].addr = + rte_mbuf_data_dma_addr(*tx_pkts); + start_dp[desc_idx].len = (*tx_pkts)->pkt_len; + tx_pkts++; + desc_idx++; + } + nb_commit -= nb_tail; + desc_idx = 0; + } + for (i = 0; i < nb_commit; i++) + txvq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; + for (i = 0; i < nb_commit; i++) { + start_dp[desc_idx].addr = rte_mbuf_data_dma_addr(*tx_pkts); + start_dp[desc_idx].len = (*tx_pkts)->pkt_len; + tx_pkts++; + desc_idx++; + } + + rte_compiler_barrier(); + + txvq->vq_free_cnt -= (uint16_t)(nb_pkts << 1); + txvq->vq_avail_idx += nb_pkts; + txvq->vq_ring.avail->idx = txvq->vq_avail_idx; + txvq->packets += nb_pkts; + + if (likely(nb_pkts)) { + if (unlikely(virtqueue_kick_prepare(txvq))) + virtqueue_notify(txvq); + } + + return nb_pkts; +} + +int __attribute__((cold)) +virtio_rxq_vec_setup(struct virtqueue *rxq) +{ + uintptr_t p; + struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ + + mb_def.nb_segs = 1; + mb_def.data_off = RTE_PKTMBUF_HEADROOM; + mb_def.port = rxq->port_id; + rte_mbuf_refcnt_set(&mb_def, 1); + + /* prevent compiler reordering: rearm_data covers previous fields */ + rte_compiler_barrier(); + p = (uintptr_t)&mb_def.rearm_data; + rxq->mbuf_initializer = *(uint64_t *)p; + + return 0; +} diff --git a/drivers/net/virtio/virtqueue.c b/drivers/net/virtio/virtqueue.c new file mode 100644 index 00000000..7f60e3ef --- /dev/null +++ b/drivers/net/virtio/virtqueue.c @@ -0,0 +1,72 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdint.h> + +#include <rte_mbuf.h> + +#include "virtqueue.h" +#include "virtio_logs.h" +#include "virtio_pci.h" + +void +virtqueue_disable_intr(struct virtqueue *vq) +{ + /* + * Set VRING_AVAIL_F_NO_INTERRUPT to hint host + * not to interrupt when it consumes packets + * Note: this is only considered a hint to the host + */ + vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; +} + +/* + * Two types of mbuf to be cleaned: + * 1) mbuf that has been consumed by backend but not used by virtio. + * 2) mbuf that hasn't been consued by backend. + */ +struct rte_mbuf * +virtqueue_detatch_unused(struct virtqueue *vq) +{ + struct rte_mbuf *cookie; + int idx; + + if (vq != NULL) + for (idx = 0; idx < vq->vq_nentries; idx++) { + cookie = vq->vq_descx[idx].cookie; + if (cookie != NULL) { + vq->vq_descx[idx].cookie = NULL; + return cookie; + } + } + return NULL; +} diff --git a/drivers/net/virtio/virtqueue.h b/drivers/net/virtio/virtqueue.h new file mode 100644 index 00000000..4e9239e0 --- /dev/null +++ b/drivers/net/virtio/virtqueue.h @@ -0,0 +1,344 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTQUEUE_H_ +#define _VIRTQUEUE_H_ + +#include <stdint.h> + +#include <rte_atomic.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_mempool.h> + +#include "virtio_pci.h" +#include "virtio_ring.h" +#include "virtio_logs.h" + +struct rte_mbuf; + +/* + * Per virtio_config.h in Linux. + * For virtio_pci on SMP, we don't need to order with respect to MMIO + * accesses through relaxed memory I/O windows, so smp_mb() et al are + * sufficient. + * + */ +#define virtio_mb() rte_smp_mb() +#define virtio_rmb() rte_smp_rmb() +#define virtio_wmb() rte_smp_wmb() + +#ifdef RTE_PMD_PACKET_PREFETCH +#define rte_packet_prefetch(p) rte_prefetch1(p) +#else +#define rte_packet_prefetch(p) do {} while(0) +#endif + +#define VIRTQUEUE_MAX_NAME_SZ 32 + +#define VTNET_SQ_RQ_QUEUE_IDX 0 +#define VTNET_SQ_TQ_QUEUE_IDX 1 +#define VTNET_SQ_CQ_QUEUE_IDX 2 + +enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 }; +/** + * The maximum virtqueue size is 2^15. Use that value as the end of + * descriptor chain terminator since it will never be a valid index + * in the descriptor table. This is used to verify we are correctly + * handling vq_free_cnt. + */ +#define VQ_RING_DESC_CHAIN_END 32768 + +/** + * Control the RX mode, ie. promiscuous, allmulti, etc... + * All commands require an "out" sg entry containing a 1 byte + * state value, zero = disable, non-zero = enable. Commands + * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. + * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. + */ +#define VIRTIO_NET_CTRL_RX 0 +#define VIRTIO_NET_CTRL_RX_PROMISC 0 +#define VIRTIO_NET_CTRL_RX_ALLMULTI 1 +#define VIRTIO_NET_CTRL_RX_ALLUNI 2 +#define VIRTIO_NET_CTRL_RX_NOMULTI 3 +#define VIRTIO_NET_CTRL_RX_NOUNI 4 +#define VIRTIO_NET_CTRL_RX_NOBCAST 5 + +/** + * Control the MAC + * + * The MAC filter table is managed by the hypervisor, the guest should + * assume the size is infinite. Filtering should be considered + * non-perfect, ie. based on hypervisor resources, the guest may + * received packets from sources not specified in the filter list. + * + * In addition to the class/cmd header, the TABLE_SET command requires + * two out scatterlists. Each contains a 4 byte count of entries followed + * by a concatenated byte stream of the ETH_ALEN MAC addresses. The + * first sg list contains unicast addresses, the second is for multicast. + * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature + * is available. + * + * The ADDR_SET command requests one out scatterlist, it contains a + * 6 bytes MAC address. This functionality is present if the + * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. + */ +struct virtio_net_ctrl_mac { + uint32_t entries; + uint8_t macs[][ETHER_ADDR_LEN]; +} __attribute__((__packed__)); + +#define VIRTIO_NET_CTRL_MAC 1 + #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 + +/** + * Control VLAN filtering + * + * The VLAN filter table is controlled via a simple ADD/DEL interface. + * VLAN IDs not added may be filtered by the hypervisor. Del is the + * opposite of add. Both commands expect an out entry containing a 2 + * byte VLAN ID. VLAN filtering is available with the + * VIRTIO_NET_F_CTRL_VLAN feature bit. + */ +#define VIRTIO_NET_CTRL_VLAN 2 +#define VIRTIO_NET_CTRL_VLAN_ADD 0 +#define VIRTIO_NET_CTRL_VLAN_DEL 1 + +struct virtio_net_ctrl_hdr { + uint8_t class; + uint8_t cmd; +} __attribute__((packed)); + +typedef uint8_t virtio_net_ctrl_ack; + +#define VIRTIO_NET_OK 0 +#define VIRTIO_NET_ERR 1 + +#define VIRTIO_MAX_CTRL_DATA 2048 + +struct virtio_pmd_ctrl { + struct virtio_net_ctrl_hdr hdr; + virtio_net_ctrl_ack status; + uint8_t data[VIRTIO_MAX_CTRL_DATA]; +}; + +struct virtqueue { + struct virtio_hw *hw; /**< virtio_hw structure pointer. */ + const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ + const struct rte_memzone *virtio_net_hdr_mz; /**< memzone to populate hdr. */ + struct rte_mempool *mpool; /**< mempool for mbuf allocation */ + uint16_t queue_id; /**< DPDK queue index. */ + uint8_t port_id; /**< Device port identifier. */ + uint16_t vq_queue_index; /**< PCI queue index */ + + void *vq_ring_virt_mem; /**< linear address of vring*/ + unsigned int vq_ring_size; + phys_addr_t vq_ring_mem; /**< physical address of vring */ + + struct vring vq_ring; /**< vring keeping desc, used and avail */ + uint16_t vq_free_cnt; /**< num of desc available */ + uint16_t vq_nentries; /**< vring desc numbers */ + uint16_t vq_free_thresh; /**< free threshold */ + /** + * Head of the free chain in the descriptor table. If + * there are no free descriptors, this will be set to + * VQ_RING_DESC_CHAIN_END. + */ + uint16_t vq_desc_head_idx; + uint16_t vq_desc_tail_idx; + /** + * Last consumed descriptor in the used table, + * trails vq_ring.used->idx. + */ + uint16_t vq_used_cons_idx; + uint16_t vq_avail_idx; + uint64_t mbuf_initializer; /**< value to init mbufs. */ + phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */ + + struct rte_mbuf **sw_ring; /**< RX software ring. */ + /* dummy mbuf, for wraparound when processing RX ring. */ + struct rte_mbuf fake_mbuf; + + /* Statistics */ + uint64_t packets; + uint64_t bytes; + uint64_t errors; + uint64_t multicast; + uint64_t broadcast; + /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */ + uint64_t size_bins[8]; + + uint16_t *notify_addr; + + struct vq_desc_extra { + void *cookie; + uint16_t ndescs; + } vq_descx[0]; +}; + +/* If multiqueue is provided by host, then we suppport it. */ +#define VIRTIO_NET_CTRL_MQ 4 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + +#define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 + +/** + * This is the first element of the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. + */ +struct virtio_net_hdr { +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /**< Use csum_start,csum_offset*/ + uint8_t flags; +#define VIRTIO_NET_HDR_GSO_NONE 0 /**< Not a GSO frame */ +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /**< GSO frame, IPv4 TCP (TSO) */ +#define VIRTIO_NET_HDR_GSO_UDP 3 /**< GSO frame, IPv4 UDP (UFO) */ +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /**< GSO frame, IPv6 TCP */ +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /**< TCP has ECN set */ + uint8_t gso_type; + uint16_t hdr_len; /**< Ethernet + IP + tcp/udp hdrs */ + uint16_t gso_size; /**< Bytes to append to hdr_len per frame */ + uint16_t csum_start; /**< Position to start checksumming from */ + uint16_t csum_offset; /**< Offset after that to place checksum */ +}; + +/** + * This is the version of the header to use when the MRG_RXBUF + * feature has been negotiated. + */ +struct virtio_net_hdr_mrg_rxbuf { + struct virtio_net_hdr hdr; + uint16_t num_buffers; /**< Number of merged rx buffers */ +}; + +/* Region reserved to allow for transmit header and indirect ring */ +#define VIRTIO_MAX_TX_INDIRECT 8 +struct virtio_tx_region { + struct virtio_net_hdr_mrg_rxbuf tx_hdr; + struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT] + __attribute__((__aligned__(16))); +}; + +/* Chain all the descriptors in the ring with an END */ +static inline void +vring_desc_init(struct vring_desc *dp, uint16_t n) +{ + uint16_t i; + + for (i = 0; i < n - 1; i++) + dp[i].next = (uint16_t)(i + 1); + dp[i].next = VQ_RING_DESC_CHAIN_END; +} + +/** + * Tell the backend not to interrupt us. + */ +void virtqueue_disable_intr(struct virtqueue *vq); +/** + * Dump virtqueue internal structures, for debug purpose only. + */ +void virtqueue_dump(struct virtqueue *vq); +/** + * Get all mbufs to be freed. + */ +struct rte_mbuf *virtqueue_detatch_unused(struct virtqueue *vq); + +static inline int +virtqueue_full(const struct virtqueue *vq) +{ + return vq->vq_free_cnt == 0; +} + +#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx)) + +static inline void +vq_update_avail_idx(struct virtqueue *vq) +{ + virtio_wmb(); + vq->vq_ring.avail->idx = vq->vq_avail_idx; +} + +static inline void +vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx) +{ + uint16_t avail_idx; + /* + * Place the head of the descriptor chain into the next slot and make + * it usable to the host. The chain is made available now rather than + * deferring to virtqueue_notify() in the hopes that if the host is + * currently running on another CPU, we can keep it processing the new + * descriptor. + */ + avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1)); + vq->vq_ring.avail->ring[avail_idx] = desc_idx; + vq->vq_avail_idx++; +} + +static inline int +virtqueue_kick_prepare(struct virtqueue *vq) +{ + return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY); +} + +static inline void +virtqueue_notify(struct virtqueue *vq) +{ + /* + * Ensure updated avail->idx is visible to host. + * For virtio on IA, the notificaiton is through io port operation + * which is a serialization instruction itself. + */ + vq->hw->vtpci_ops->notify_queue(vq->hw, vq); +} + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP +#define VIRTQUEUE_DUMP(vq) do { \ + uint16_t used_idx, nused; \ + used_idx = (vq)->vq_ring.used->idx; \ + nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \ + PMD_INIT_LOG(DEBUG, \ + "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \ + " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \ + " avail.flags=0x%x; used.flags=0x%x", \ + (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \ + (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \ + (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \ + (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \ +} while (0) +#else +#define VIRTQUEUE_DUMP(vq) do { } while (0) +#endif + +#endif /* _VIRTQUEUE_H_ */ |