diff options
Diffstat (limited to 'src/dpdk/drivers/net/virtio')
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtio_ethdev.c | 1572 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtio_ethdev.h | 127 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtio_logs.h | 70 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtio_pci.c | 762 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtio_pci.h | 317 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtio_ring.h | 163 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtio_rxtx.c | 982 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtio_rxtx.h | 95 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c | 425 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtio_user_ethdev.c | 476 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtqueue.c | 72 | ||||
-rw-r--r-- | src/dpdk/drivers/net/virtio/virtqueue.h | 347 |
12 files changed, 5408 insertions, 0 deletions
diff --git a/src/dpdk/drivers/net/virtio/virtio_ethdev.c b/src/dpdk/drivers/net/virtio/virtio_ethdev.c new file mode 100644 index 00000000..07d64497 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_ethdev.c @@ -0,0 +1,1572 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <string.h> +#include <stdio.h> +#include <errno.h> +#include <unistd.h> + +#include <rte_ethdev.h> +#include <rte_memcpy.h> +#include <rte_string_fns.h> +#include <rte_memzone.h> +#include <rte_malloc.h> +#include <rte_atomic.h> +#include <rte_branch_prediction.h> +#include <rte_pci.h> +#include <rte_ether.h> +#include <rte_common.h> +#include <rte_errno.h> + +#include <rte_memory.h> +#include <rte_eal.h> +#include <rte_dev.h> + +#include "virtio_ethdev.h" +#include "virtio_pci.h" +#include "virtio_logs.h" +#include "virtqueue.h" +#include "virtio_rxtx.h" + +static int eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev); +static int virtio_dev_configure(struct rte_eth_dev *dev); +static int virtio_dev_start(struct rte_eth_dev *dev); +static void virtio_dev_stop(struct rte_eth_dev *dev); +static void virtio_dev_promiscuous_enable(struct rte_eth_dev *dev); +static void virtio_dev_promiscuous_disable(struct rte_eth_dev *dev); +static void virtio_dev_allmulticast_enable(struct rte_eth_dev *dev); +static void virtio_dev_allmulticast_disable(struct rte_eth_dev *dev); +static void virtio_dev_info_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +static int virtio_dev_link_update(struct rte_eth_dev *dev, + __rte_unused int wait_to_complete); + +static void virtio_set_hwaddr(struct virtio_hw *hw); +static void virtio_get_hwaddr(struct virtio_hw *hw); + +static void virtio_dev_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *stats); +static int virtio_dev_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, unsigned n); +static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + unsigned limit); +static void virtio_dev_stats_reset(struct rte_eth_dev *dev); +static void virtio_dev_free_mbufs(struct rte_eth_dev *dev); +static int virtio_vlan_filter_set(struct rte_eth_dev *dev, + uint16_t vlan_id, int on); +static void virtio_mac_addr_add(struct rte_eth_dev *dev, + struct ether_addr *mac_addr, + uint32_t index, uint32_t vmdq __rte_unused); +static void virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index); +static void virtio_mac_addr_set(struct rte_eth_dev *dev, + struct ether_addr *mac_addr); + +static int virtio_dev_queue_stats_mapping_set( + __rte_unused struct rte_eth_dev *eth_dev, + __rte_unused uint16_t queue_id, + __rte_unused uint8_t stat_idx, + __rte_unused uint8_t is_rx); + +/* + * The set of PCI devices this driver supports + */ +static const struct rte_pci_id pci_id_virtio_map[] = { + { RTE_PCI_DEVICE(VIRTIO_PCI_VENDORID, VIRTIO_PCI_DEVICEID_MIN) }, + { .vendor_id = 0, /* sentinel */ }, +}; + +struct rte_virtio_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned offset; +}; + +/* [rt]x_qX_ is prepended to the name string here */ +static const struct rte_virtio_xstats_name_off rte_virtio_rxq_stat_strings[] = { + {"good_packets", offsetof(struct virtnet_rx, stats.packets)}, + {"good_bytes", offsetof(struct virtnet_rx, stats.bytes)}, + {"errors", offsetof(struct virtnet_rx, stats.errors)}, + {"multicast_packets", offsetof(struct virtnet_rx, stats.multicast)}, + {"broadcast_packets", offsetof(struct virtnet_rx, stats.broadcast)}, + {"undersize_packets", offsetof(struct virtnet_rx, stats.size_bins[0])}, + {"size_64_packets", offsetof(struct virtnet_rx, stats.size_bins[1])}, + {"size_65_127_packets", offsetof(struct virtnet_rx, stats.size_bins[2])}, + {"size_128_255_packets", offsetof(struct virtnet_rx, stats.size_bins[3])}, + {"size_256_511_packets", offsetof(struct virtnet_rx, stats.size_bins[4])}, + {"size_512_1023_packets", offsetof(struct virtnet_rx, stats.size_bins[5])}, + {"size_1024_1517_packets", offsetof(struct virtnet_rx, stats.size_bins[6])}, + {"size_1518_max_packets", offsetof(struct virtnet_rx, stats.size_bins[7])}, +}; + +/* [rt]x_qX_ is prepended to the name string here */ +static const struct rte_virtio_xstats_name_off rte_virtio_txq_stat_strings[] = { + {"good_packets", offsetof(struct virtnet_tx, stats.packets)}, + {"good_bytes", offsetof(struct virtnet_tx, stats.bytes)}, + {"errors", offsetof(struct virtnet_tx, stats.errors)}, + {"multicast_packets", offsetof(struct virtnet_tx, stats.multicast)}, + {"broadcast_packets", offsetof(struct virtnet_tx, stats.broadcast)}, + {"undersize_packets", offsetof(struct virtnet_tx, stats.size_bins[0])}, + {"size_64_packets", offsetof(struct virtnet_tx, stats.size_bins[1])}, + {"size_65_127_packets", offsetof(struct virtnet_tx, stats.size_bins[2])}, + {"size_128_255_packets", offsetof(struct virtnet_tx, stats.size_bins[3])}, + {"size_256_511_packets", offsetof(struct virtnet_tx, stats.size_bins[4])}, + {"size_512_1023_packets", offsetof(struct virtnet_tx, stats.size_bins[5])}, + {"size_1024_1517_packets", offsetof(struct virtnet_tx, stats.size_bins[6])}, + {"size_1518_max_packets", offsetof(struct virtnet_tx, stats.size_bins[7])}, +}; + +#define VIRTIO_NB_RXQ_XSTATS (sizeof(rte_virtio_rxq_stat_strings) / \ + sizeof(rte_virtio_rxq_stat_strings[0])) +#define VIRTIO_NB_TXQ_XSTATS (sizeof(rte_virtio_txq_stat_strings) / \ + sizeof(rte_virtio_txq_stat_strings[0])) + +static int +virtio_send_command(struct virtnet_ctl *cvq, struct virtio_pmd_ctrl *ctrl, + int *dlen, int pkt_num) +{ + uint32_t head, i; + int k, sum = 0; + virtio_net_ctrl_ack status = ~0; + struct virtio_pmd_ctrl result; + struct virtqueue *vq; + + ctrl->status = status; + + if (!cvq || !cvq->vq) { + PMD_INIT_LOG(ERR, "Control queue is not supported."); + return -1; + } + vq = cvq->vq; + head = vq->vq_desc_head_idx; + + PMD_INIT_LOG(DEBUG, "vq->vq_desc_head_idx = %d, status = %d, " + "vq->hw->cvq = %p vq = %p", + vq->vq_desc_head_idx, status, vq->hw->cvq, vq); + + if ((vq->vq_free_cnt < ((uint32_t)pkt_num + 2)) || (pkt_num < 1)) + return -1; + + memcpy(cvq->virtio_net_hdr_mz->addr, ctrl, + sizeof(struct virtio_pmd_ctrl)); + + /* + * Format is enforced in qemu code: + * One TX packet for header; + * At least one TX packet per argument; + * One RX packet for ACK. + */ + vq->vq_ring.desc[head].flags = VRING_DESC_F_NEXT; + vq->vq_ring.desc[head].addr = cvq->virtio_net_hdr_mem; + vq->vq_ring.desc[head].len = sizeof(struct virtio_net_ctrl_hdr); + vq->vq_free_cnt--; + i = vq->vq_ring.desc[head].next; + + for (k = 0; k < pkt_num; k++) { + vq->vq_ring.desc[i].flags = VRING_DESC_F_NEXT; + vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem + + sizeof(struct virtio_net_ctrl_hdr) + + sizeof(ctrl->status) + sizeof(uint8_t)*sum; + vq->vq_ring.desc[i].len = dlen[k]; + sum += dlen[k]; + vq->vq_free_cnt--; + i = vq->vq_ring.desc[i].next; + } + + vq->vq_ring.desc[i].flags = VRING_DESC_F_WRITE; + vq->vq_ring.desc[i].addr = cvq->virtio_net_hdr_mem + + sizeof(struct virtio_net_ctrl_hdr); + vq->vq_ring.desc[i].len = sizeof(ctrl->status); + vq->vq_free_cnt--; + + vq->vq_desc_head_idx = vq->vq_ring.desc[i].next; + + vq_update_avail_ring(vq, head); + vq_update_avail_idx(vq); + + PMD_INIT_LOG(DEBUG, "vq->vq_queue_index = %d", vq->vq_queue_index); + + virtqueue_notify(vq); + + rte_rmb(); + while (VIRTQUEUE_NUSED(vq) == 0) { + rte_rmb(); + usleep(100); + } + + while (VIRTQUEUE_NUSED(vq)) { + uint32_t idx, desc_idx, used_idx; + struct vring_used_elem *uep; + + used_idx = (uint32_t)(vq->vq_used_cons_idx + & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + idx = (uint32_t) uep->id; + desc_idx = idx; + + while (vq->vq_ring.desc[desc_idx].flags & VRING_DESC_F_NEXT) { + desc_idx = vq->vq_ring.desc[desc_idx].next; + vq->vq_free_cnt++; + } + + vq->vq_ring.desc[desc_idx].next = vq->vq_desc_head_idx; + vq->vq_desc_head_idx = idx; + + vq->vq_used_cons_idx++; + vq->vq_free_cnt++; + } + + PMD_INIT_LOG(DEBUG, "vq->vq_free_cnt=%d\nvq->vq_desc_head_idx=%d", + vq->vq_free_cnt, vq->vq_desc_head_idx); + + memcpy(&result, cvq->virtio_net_hdr_mz->addr, + sizeof(struct virtio_pmd_ctrl)); + + return result.status; +} + +static int +virtio_set_multiple_queues(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + ctrl.hdr.class = VIRTIO_NET_CTRL_MQ; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; + memcpy(ctrl.data, &nb_queues, sizeof(uint16_t)); + + dlen[0] = sizeof(uint16_t); + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + if (ret) { + PMD_INIT_LOG(ERR, "Multiqueue configured but send command " + "failed, this is too late now..."); + return -EINVAL; + } + + return 0; +} + +void +virtio_dev_queue_release(struct virtqueue *vq) +{ + struct virtio_hw *hw; + + if (vq) { + hw = vq->hw; + if (vq->configured) + hw->vtpci_ops->del_queue(hw, vq); + + rte_free(vq->sw_ring); + rte_free(vq); + } +} + +int virtio_dev_queue_setup(struct rte_eth_dev *dev, + int queue_type, + uint16_t queue_idx, + uint16_t vtpci_queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + void **pvq) +{ + char vq_name[VIRTQUEUE_MAX_NAME_SZ]; + char vq_hdr_name[VIRTQUEUE_MAX_NAME_SZ]; + const struct rte_memzone *mz = NULL, *hdr_mz = NULL; + unsigned int vq_size, size; + struct virtio_hw *hw = dev->data->dev_private; + struct virtnet_rx *rxvq = NULL; + struct virtnet_tx *txvq = NULL; + struct virtnet_ctl *cvq = NULL; + struct virtqueue *vq; + const char *queue_names[] = {"rvq", "txq", "cvq"}; + size_t sz_vq, sz_q = 0, sz_hdr_mz = 0; + void *sw_ring = NULL; + int ret; + + PMD_INIT_LOG(DEBUG, "setting up queue: %u", vtpci_queue_idx); + + /* + * Read the virtqueue size from the Queue Size field + * Always power of 2 and if 0 virtqueue does not exist + */ + vq_size = hw->vtpci_ops->get_queue_num(hw, vtpci_queue_idx); + PMD_INIT_LOG(DEBUG, "vq_size: %u nb_desc:%u", vq_size, nb_desc); + if (vq_size == 0) { + PMD_INIT_LOG(ERR, "virtqueue does not exist"); + return -EINVAL; + } + + if (!rte_is_power_of_2(vq_size)) { + PMD_INIT_LOG(ERR, "virtqueue size is not powerof 2"); + return -EINVAL; + } + + snprintf(vq_name, sizeof(vq_name), "port%d_%s%d", + dev->data->port_id, queue_names[queue_type], queue_idx); + + sz_vq = RTE_ALIGN_CEIL(sizeof(*vq) + + vq_size * sizeof(struct vq_desc_extra), + RTE_CACHE_LINE_SIZE); + if (queue_type == VTNET_RQ) { + sz_q = sz_vq + sizeof(*rxvq); + } else if (queue_type == VTNET_TQ) { + sz_q = sz_vq + sizeof(*txvq); + /* + * For each xmit packet, allocate a virtio_net_hdr + * and indirect ring elements + */ + sz_hdr_mz = vq_size * sizeof(struct virtio_tx_region); + } else if (queue_type == VTNET_CQ) { + sz_q = sz_vq + sizeof(*cvq); + /* Allocate a page for control vq command, data and status */ + sz_hdr_mz = PAGE_SIZE; + } + + vq = rte_zmalloc_socket(vq_name, sz_q, RTE_CACHE_LINE_SIZE, socket_id); + if (vq == NULL) { + PMD_INIT_LOG(ERR, "can not allocate vq"); + return -ENOMEM; + } + vq->hw = hw; + vq->vq_queue_index = vtpci_queue_idx; + vq->vq_nentries = vq_size; + + if (nb_desc == 0 || nb_desc > vq_size) + nb_desc = vq_size; + vq->vq_free_cnt = nb_desc; + + /* + * Reserve a memzone for vring elements + */ + size = vring_size(vq_size, VIRTIO_PCI_VRING_ALIGN); + vq->vq_ring_size = RTE_ALIGN_CEIL(size, VIRTIO_PCI_VRING_ALIGN); + PMD_INIT_LOG(DEBUG, "vring_size: %d, rounded_vring_size: %d", + size, vq->vq_ring_size); + + mz = rte_memzone_reserve_aligned(vq_name, vq->vq_ring_size, socket_id, + 0, VIRTIO_PCI_VRING_ALIGN); + if (mz == NULL) { + if (rte_errno == EEXIST) + mz = rte_memzone_lookup(vq_name); + if (mz == NULL) { + ret = -ENOMEM; + goto fail_q_alloc; + } + } + + memset(mz->addr, 0, sizeof(mz->len)); + + vq->vq_ring_mem = mz->phys_addr; + vq->vq_ring_virt_mem = mz->addr; + PMD_INIT_LOG(DEBUG, "vq->vq_ring_mem: 0x%" PRIx64, + (uint64_t)mz->phys_addr); + PMD_INIT_LOG(DEBUG, "vq->vq_ring_virt_mem: 0x%" PRIx64, + (uint64_t)(uintptr_t)mz->addr); + + if (sz_hdr_mz) { + snprintf(vq_hdr_name, sizeof(vq_hdr_name), "port%d_%s%d_hdr", + dev->data->port_id, queue_names[queue_type], + queue_idx); + hdr_mz = rte_memzone_reserve_aligned(vq_hdr_name, sz_hdr_mz, + socket_id, 0, + RTE_CACHE_LINE_SIZE); + if (hdr_mz == NULL) { + if (rte_errno == EEXIST) + hdr_mz = rte_memzone_lookup(vq_hdr_name); + if (hdr_mz == NULL) { + ret = -ENOMEM; + goto fail_q_alloc; + } + } + } + + if (queue_type == VTNET_RQ) { + size_t sz_sw = (RTE_PMD_VIRTIO_RX_MAX_BURST + vq_size) * + sizeof(vq->sw_ring[0]); + + sw_ring = rte_zmalloc_socket("sw_ring", sz_sw, + RTE_CACHE_LINE_SIZE, socket_id); + if (!sw_ring) { + PMD_INIT_LOG(ERR, "can not allocate RX soft ring"); + ret = -ENOMEM; + goto fail_q_alloc; + } + + vq->sw_ring = sw_ring; + rxvq = (struct virtnet_rx *)RTE_PTR_ADD(vq, sz_vq); + rxvq->vq = vq; + rxvq->port_id = dev->data->port_id; + rxvq->queue_id = queue_idx; + rxvq->mz = mz; + *pvq = rxvq; + } else if (queue_type == VTNET_TQ) { + txvq = (struct virtnet_tx *)RTE_PTR_ADD(vq, sz_vq); + txvq->vq = vq; + txvq->port_id = dev->data->port_id; + txvq->queue_id = queue_idx; + txvq->mz = mz; + txvq->virtio_net_hdr_mz = hdr_mz; + txvq->virtio_net_hdr_mem = hdr_mz->phys_addr; + + *pvq = txvq; + } else if (queue_type == VTNET_CQ) { + cvq = (struct virtnet_ctl *)RTE_PTR_ADD(vq, sz_vq); + cvq->vq = vq; + cvq->mz = mz; + cvq->virtio_net_hdr_mz = hdr_mz; + cvq->virtio_net_hdr_mem = hdr_mz->phys_addr; + memset(cvq->virtio_net_hdr_mz->addr, 0, PAGE_SIZE); + *pvq = cvq; + } + + /* For virtio_user case (that is when dev->pci_dev is NULL), we use + * virtual address. And we need properly set _offset_, please see + * VIRTIO_MBUF_DATA_DMA_ADDR in virtqueue.h for more information. + */ + if (dev->pci_dev) + vq->offset = offsetof(struct rte_mbuf, buf_physaddr); + else { + vq->vq_ring_mem = (uintptr_t)mz->addr; + vq->offset = offsetof(struct rte_mbuf, buf_addr); + if (queue_type == VTNET_TQ) + txvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr; + else if (queue_type == VTNET_CQ) + cvq->virtio_net_hdr_mem = (uintptr_t)hdr_mz->addr; + } + + if (queue_type == VTNET_TQ) { + struct virtio_tx_region *txr; + unsigned int i; + + txr = hdr_mz->addr; + memset(txr, 0, vq_size * sizeof(*txr)); + for (i = 0; i < vq_size; i++) { + struct vring_desc *start_dp = txr[i].tx_indir; + + vring_desc_init(start_dp, RTE_DIM(txr[i].tx_indir)); + + /* first indirect descriptor is always the tx header */ + start_dp->addr = txvq->virtio_net_hdr_mem + + i * sizeof(*txr) + + offsetof(struct virtio_tx_region, tx_hdr); + + start_dp->len = hw->vtnet_hdr_size; + start_dp->flags = VRING_DESC_F_NEXT; + } + } + + if (hw->vtpci_ops->setup_queue(hw, vq) < 0) { + PMD_INIT_LOG(ERR, "setup_queue failed"); + virtio_dev_queue_release(vq); + return -EINVAL; + } + + vq->configured = 1; + return 0; + +fail_q_alloc: + rte_free(sw_ring); + rte_memzone_free(hdr_mz); + rte_memzone_free(mz); + rte_free(vq); + + return ret; +} + +static int +virtio_dev_cq_queue_setup(struct rte_eth_dev *dev, uint16_t vtpci_queue_idx, + uint32_t socket_id) +{ + struct virtnet_ctl *cvq; + int ret; + struct virtio_hw *hw = dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + ret = virtio_dev_queue_setup(dev, VTNET_CQ, VTNET_SQ_CQ_QUEUE_IDX, + vtpci_queue_idx, 0, socket_id, (void **)&cvq); + if (ret < 0) { + PMD_INIT_LOG(ERR, "control vq initialization failed"); + return ret; + } + + hw->cvq = cvq; + return 0; +} + +static void +virtio_free_queues(struct rte_eth_dev *dev) +{ + unsigned int i; + + for (i = 0; i < dev->data->nb_rx_queues; i++) + virtio_dev_rx_queue_release(dev->data->rx_queues[i]); + + dev->data->nb_rx_queues = 0; + + for (i = 0; i < dev->data->nb_tx_queues; i++) + virtio_dev_tx_queue_release(dev->data->tx_queues[i]); + + dev->data->nb_tx_queues = 0; +} + +static void +virtio_dev_close(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + PMD_INIT_LOG(DEBUG, "virtio_dev_close"); + + if (hw->started == 1) + virtio_dev_stop(dev); + + /* reset the NIC */ + if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) + vtpci_irq_config(hw, VIRTIO_MSI_NO_VECTOR); + vtpci_reset(hw); + virtio_dev_free_mbufs(dev); + virtio_free_queues(dev); +} + +static void +virtio_dev_promiscuous_enable(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { + PMD_INIT_LOG(INFO, "host does not support rx control\n"); + return; + } + + ctrl.hdr.class = VIRTIO_NET_CTRL_RX; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC; + ctrl.data[0] = 1; + dlen[0] = 1; + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + if (ret) + PMD_INIT_LOG(ERR, "Failed to enable promisc"); +} + +static void +virtio_dev_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { + PMD_INIT_LOG(INFO, "host does not support rx control\n"); + return; + } + + ctrl.hdr.class = VIRTIO_NET_CTRL_RX; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_PROMISC; + ctrl.data[0] = 0; + dlen[0] = 1; + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + if (ret) + PMD_INIT_LOG(ERR, "Failed to disable promisc"); +} + +static void +virtio_dev_allmulticast_enable(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { + PMD_INIT_LOG(INFO, "host does not support rx control\n"); + return; + } + + ctrl.hdr.class = VIRTIO_NET_CTRL_RX; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI; + ctrl.data[0] = 1; + dlen[0] = 1; + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + if (ret) + PMD_INIT_LOG(ERR, "Failed to enable allmulticast"); +} + +static void +virtio_dev_allmulticast_disable(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int dlen[1]; + int ret; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_RX)) { + PMD_INIT_LOG(INFO, "host does not support rx control\n"); + return; + } + + ctrl.hdr.class = VIRTIO_NET_CTRL_RX; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_RX_ALLMULTI; + ctrl.data[0] = 0; + dlen[0] = 1; + + ret = virtio_send_command(hw->cvq, &ctrl, dlen, 1); + if (ret) + PMD_INIT_LOG(ERR, "Failed to disable allmulticast"); +} + +/* + * dev_ops for virtio, bare necessities for basic operation + */ +static const struct eth_dev_ops virtio_eth_dev_ops = { + .dev_configure = virtio_dev_configure, + .dev_start = virtio_dev_start, + .dev_stop = virtio_dev_stop, + .dev_close = virtio_dev_close, + .promiscuous_enable = virtio_dev_promiscuous_enable, + .promiscuous_disable = virtio_dev_promiscuous_disable, + .allmulticast_enable = virtio_dev_allmulticast_enable, + .allmulticast_disable = virtio_dev_allmulticast_disable, + + .dev_infos_get = virtio_dev_info_get, + .stats_get = virtio_dev_stats_get, + .xstats_get = virtio_dev_xstats_get, + .xstats_get_names = virtio_dev_xstats_get_names, + .stats_reset = virtio_dev_stats_reset, + .xstats_reset = virtio_dev_stats_reset, + .link_update = virtio_dev_link_update, + .rx_queue_setup = virtio_dev_rx_queue_setup, + .rx_queue_release = virtio_dev_rx_queue_release, + .tx_queue_setup = virtio_dev_tx_queue_setup, + .tx_queue_release = virtio_dev_tx_queue_release, + /* collect stats per queue */ + .queue_stats_mapping_set = virtio_dev_queue_stats_mapping_set, + .vlan_filter_set = virtio_vlan_filter_set, + .mac_addr_add = virtio_mac_addr_add, + .mac_addr_remove = virtio_mac_addr_remove, + .mac_addr_set = virtio_mac_addr_set, +}; + +static inline int +virtio_dev_atomic_read_link_status(struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + struct rte_eth_link *dst = link; + struct rte_eth_link *src = &(dev->data->dev_link); + + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, + *(uint64_t *)src) == 0) + return -1; + + return 0; +} + +/** + * Atomically writes the link status information into global + * structure rte_eth_dev. + * + * @param dev + * - Pointer to the structure rte_eth_dev to read from. + * - Pointer to the buffer to be saved with the link status. + * + * @return + * - On success, zero. + * - On failure, negative value. + */ +static inline int +virtio_dev_atomic_write_link_status(struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + struct rte_eth_link *dst = &(dev->data->dev_link); + struct rte_eth_link *src = link; + + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, + *(uint64_t *)src) == 0) + return -1; + + return 0; +} + +static void +virtio_update_stats(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + unsigned i; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + const struct virtnet_tx *txvq = dev->data->tx_queues[i]; + if (txvq == NULL) + continue; + + stats->opackets += txvq->stats.packets; + stats->obytes += txvq->stats.bytes; + stats->oerrors += txvq->stats.errors; + + if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { + stats->q_opackets[i] = txvq->stats.packets; + stats->q_obytes[i] = txvq->stats.bytes; + } + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + const struct virtnet_rx *rxvq = dev->data->rx_queues[i]; + if (rxvq == NULL) + continue; + + stats->ipackets += rxvq->stats.packets; + stats->ibytes += rxvq->stats.bytes; + stats->ierrors += rxvq->stats.errors; + + if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { + stats->q_ipackets[i] = rxvq->stats.packets; + stats->q_ibytes[i] = rxvq->stats.bytes; + } + } + + stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; +} + +static int virtio_dev_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + __rte_unused unsigned limit) +{ + unsigned i; + unsigned count = 0; + unsigned t; + + unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS + + dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS; + + if (xstats_names != NULL) { + /* Note: limit checked in rte_eth_xstats_names() */ + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct virtqueue *rxvq = dev->data->rx_queues[i]; + if (rxvq == NULL) + continue; + for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) { + snprintf(xstats_names[count].name, + sizeof(xstats_names[count].name), + "rx_q%u_%s", i, + rte_virtio_rxq_stat_strings[t].name); + count++; + } + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct virtqueue *txvq = dev->data->tx_queues[i]; + if (txvq == NULL) + continue; + for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) { + snprintf(xstats_names[count].name, + sizeof(xstats_names[count].name), + "tx_q%u_%s", i, + rte_virtio_txq_stat_strings[t].name); + count++; + } + } + return count; + } + return nstats; +} + +static int +virtio_dev_xstats_get(struct rte_eth_dev *dev, struct rte_eth_xstat *xstats, + unsigned n) +{ + unsigned i; + unsigned count = 0; + + unsigned nstats = dev->data->nb_tx_queues * VIRTIO_NB_TXQ_XSTATS + + dev->data->nb_rx_queues * VIRTIO_NB_RXQ_XSTATS; + + if (n < nstats) + return nstats; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct virtnet_rx *rxvq = dev->data->rx_queues[i]; + + if (rxvq == NULL) + continue; + + unsigned t; + + for (t = 0; t < VIRTIO_NB_RXQ_XSTATS; t++) { + xstats[count].value = *(uint64_t *)(((char *)rxvq) + + rte_virtio_rxq_stat_strings[t].offset); + count++; + } + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct virtnet_tx *txvq = dev->data->tx_queues[i]; + + if (txvq == NULL) + continue; + + unsigned t; + + for (t = 0; t < VIRTIO_NB_TXQ_XSTATS; t++) { + xstats[count].value = *(uint64_t *)(((char *)txvq) + + rte_virtio_txq_stat_strings[t].offset); + count++; + } + } + + return count; +} + +static void +virtio_dev_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats) +{ + virtio_update_stats(dev, stats); +} + +static void +virtio_dev_stats_reset(struct rte_eth_dev *dev) +{ + unsigned int i; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct virtnet_tx *txvq = dev->data->tx_queues[i]; + if (txvq == NULL) + continue; + + txvq->stats.packets = 0; + txvq->stats.bytes = 0; + txvq->stats.errors = 0; + txvq->stats.multicast = 0; + txvq->stats.broadcast = 0; + memset(txvq->stats.size_bins, 0, + sizeof(txvq->stats.size_bins[0]) * 8); + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct virtnet_rx *rxvq = dev->data->rx_queues[i]; + if (rxvq == NULL) + continue; + + rxvq->stats.packets = 0; + rxvq->stats.bytes = 0; + rxvq->stats.errors = 0; + rxvq->stats.multicast = 0; + rxvq->stats.broadcast = 0; + memset(rxvq->stats.size_bins, 0, + sizeof(rxvq->stats.size_bins[0]) * 8); + } +} + +static void +virtio_set_hwaddr(struct virtio_hw *hw) +{ + vtpci_write_dev_config(hw, + offsetof(struct virtio_net_config, mac), + &hw->mac_addr, ETHER_ADDR_LEN); +} + +static void +virtio_get_hwaddr(struct virtio_hw *hw) +{ + if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) { + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, mac), + &hw->mac_addr, ETHER_ADDR_LEN); + } else { + eth_random_addr(&hw->mac_addr[0]); + virtio_set_hwaddr(hw); + } +} + +static void +virtio_mac_table_set(struct virtio_hw *hw, + const struct virtio_net_ctrl_mac *uc, + const struct virtio_net_ctrl_mac *mc) +{ + struct virtio_pmd_ctrl ctrl; + int err, len[2]; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) { + PMD_DRV_LOG(INFO, "host does not support mac table"); + return; + } + + ctrl.hdr.class = VIRTIO_NET_CTRL_MAC; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; + + len[0] = uc->entries * ETHER_ADDR_LEN + sizeof(uc->entries); + memcpy(ctrl.data, uc, len[0]); + + len[1] = mc->entries * ETHER_ADDR_LEN + sizeof(mc->entries); + memcpy(ctrl.data + len[0], mc, len[1]); + + err = virtio_send_command(hw->cvq, &ctrl, len, 2); + if (err != 0) + PMD_DRV_LOG(NOTICE, "mac table set failed: %d", err); +} + +static void +virtio_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr, + uint32_t index, uint32_t vmdq __rte_unused) +{ + struct virtio_hw *hw = dev->data->dev_private; + const struct ether_addr *addrs = dev->data->mac_addrs; + unsigned int i; + struct virtio_net_ctrl_mac *uc, *mc; + + if (index >= VIRTIO_MAX_MAC_ADDRS) { + PMD_DRV_LOG(ERR, "mac address index %u out of range", index); + return; + } + + uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries)); + uc->entries = 0; + mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries)); + mc->entries = 0; + + for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) { + const struct ether_addr *addr + = (i == index) ? mac_addr : addrs + i; + struct virtio_net_ctrl_mac *tbl + = is_multicast_ether_addr(addr) ? mc : uc; + + memcpy(&tbl->macs[tbl->entries++], addr, ETHER_ADDR_LEN); + } + + virtio_mac_table_set(hw, uc, mc); +} + +static void +virtio_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct ether_addr *addrs = dev->data->mac_addrs; + struct virtio_net_ctrl_mac *uc, *mc; + unsigned int i; + + if (index >= VIRTIO_MAX_MAC_ADDRS) { + PMD_DRV_LOG(ERR, "mac address index %u out of range", index); + return; + } + + uc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(uc->entries)); + uc->entries = 0; + mc = alloca(VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN + sizeof(mc->entries)); + mc->entries = 0; + + for (i = 0; i < VIRTIO_MAX_MAC_ADDRS; i++) { + struct virtio_net_ctrl_mac *tbl; + + if (i == index || is_zero_ether_addr(addrs + i)) + continue; + + tbl = is_multicast_ether_addr(addrs + i) ? mc : uc; + memcpy(&tbl->macs[tbl->entries++], addrs + i, ETHER_ADDR_LEN); + } + + virtio_mac_table_set(hw, uc, mc); +} + +static void +virtio_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) +{ + struct virtio_hw *hw = dev->data->dev_private; + + memcpy(hw->mac_addr, mac_addr, ETHER_ADDR_LEN); + + /* Use atomic update if available */ + if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_MAC_ADDR)) { + struct virtio_pmd_ctrl ctrl; + int len = ETHER_ADDR_LEN; + + ctrl.hdr.class = VIRTIO_NET_CTRL_MAC; + ctrl.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; + + memcpy(ctrl.data, mac_addr, ETHER_ADDR_LEN); + virtio_send_command(hw->cvq, &ctrl, &len, 1); + } else if (vtpci_with_feature(hw, VIRTIO_NET_F_MAC)) + virtio_set_hwaddr(hw); +} + +static int +virtio_vlan_filter_set(struct rte_eth_dev *dev, uint16_t vlan_id, int on) +{ + struct virtio_hw *hw = dev->data->dev_private; + struct virtio_pmd_ctrl ctrl; + int len; + + if (!vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) + return -ENOTSUP; + + ctrl.hdr.class = VIRTIO_NET_CTRL_VLAN; + ctrl.hdr.cmd = on ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; + memcpy(ctrl.data, &vlan_id, sizeof(vlan_id)); + len = sizeof(vlan_id); + + return virtio_send_command(hw->cvq, &ctrl, &len, 1); +} + +static int +virtio_negotiate_features(struct virtio_hw *hw) +{ + uint64_t host_features; + + /* Prepare guest_features: feature that driver wants to support */ + hw->guest_features = VIRTIO_PMD_GUEST_FEATURES; + PMD_INIT_LOG(DEBUG, "guest_features before negotiate = %" PRIx64, + hw->guest_features); + + /* Read device(host) feature bits */ + host_features = hw->vtpci_ops->get_features(hw); + PMD_INIT_LOG(DEBUG, "host_features before negotiate = %" PRIx64, + host_features); + + /* + * Negotiate features: Subset of device feature bits are written back + * guest feature bits. + */ + hw->guest_features = vtpci_negotiate_features(hw, host_features); + PMD_INIT_LOG(DEBUG, "features after negotiate = %" PRIx64, + hw->guest_features); + + if (hw->modern) { + if (!vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) { + PMD_INIT_LOG(ERR, + "VIRTIO_F_VERSION_1 features is not enabled."); + return -1; + } + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_FEATURES_OK); + if (!(vtpci_get_status(hw) & VIRTIO_CONFIG_STATUS_FEATURES_OK)) { + PMD_INIT_LOG(ERR, + "failed to set FEATURES_OK status!"); + return -1; + } + } + + return 0; +} + +/* + * Process Virtio Config changed interrupt and call the callback + * if link state changed. + */ +static void +virtio_interrupt_handler(__rte_unused struct rte_intr_handle *handle, + void *param) +{ + struct rte_eth_dev *dev = param; + struct virtio_hw *hw = dev->data->dev_private; + uint8_t isr; + + /* Read interrupt status which clears interrupt */ + isr = vtpci_isr(hw); + PMD_DRV_LOG(INFO, "interrupt status = %#x", isr); + + if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) + PMD_DRV_LOG(ERR, "interrupt enable failed"); + + if (isr & VIRTIO_PCI_ISR_CONFIG) { + if (virtio_dev_link_update(dev, 0) == 0) + _rte_eth_dev_callback_process(dev, + RTE_ETH_EVENT_INTR_LSC); + } + +} + +static void +rx_func_get(struct rte_eth_dev *eth_dev) +{ + struct virtio_hw *hw = eth_dev->data->dev_private; + if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) + eth_dev->rx_pkt_burst = &virtio_recv_mergeable_pkts; + else + eth_dev->rx_pkt_burst = &virtio_recv_pkts; +} + +/* + * This function is based on probe() function in virtio_pci.c + * It returns 0 on success. + */ +int +eth_virtio_dev_init(struct rte_eth_dev *eth_dev) +{ + struct virtio_hw *hw = eth_dev->data->dev_private; + struct virtio_net_config *config; + struct virtio_net_config local_config; + struct rte_pci_device *pci_dev; + uint32_t dev_flags = RTE_ETH_DEV_DETACHABLE; + int ret; + + RTE_BUILD_BUG_ON(RTE_PKTMBUF_HEADROOM < sizeof(struct virtio_net_hdr_mrg_rxbuf)); + + eth_dev->dev_ops = &virtio_eth_dev_ops; + eth_dev->tx_pkt_burst = &virtio_xmit_pkts; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + rx_func_get(eth_dev); + return 0; + } + + /* Allocate memory for storing MAC addresses */ + eth_dev->data->mac_addrs = rte_zmalloc("virtio", VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN, 0); + if (eth_dev->data->mac_addrs == NULL) { + PMD_INIT_LOG(ERR, + "Failed to allocate %d bytes needed to store MAC addresses", + VIRTIO_MAX_MAC_ADDRS * ETHER_ADDR_LEN); + return -ENOMEM; + } + + pci_dev = eth_dev->pci_dev; + + if (pci_dev) { + ret = vtpci_init(pci_dev, hw, &dev_flags); + if (ret) + return ret; + } + + /* Reset the device although not necessary at startup */ + vtpci_reset(hw); + + /* Tell the host we've noticed this device. */ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_ACK); + + /* Tell the host we've known how to drive the device. */ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER); + if (virtio_negotiate_features(hw) < 0) + return -1; + + /* If host does not support status then disable LSC */ + if (!vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) + dev_flags &= ~RTE_ETH_DEV_INTR_LSC; + + rte_eth_copy_pci_info(eth_dev, pci_dev); + eth_dev->data->dev_flags = dev_flags; + + rx_func_get(eth_dev); + + /* Setting up rx_header size for the device */ + if (vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF) || + vtpci_with_feature(hw, VIRTIO_F_VERSION_1)) + hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); + else + hw->vtnet_hdr_size = sizeof(struct virtio_net_hdr); + + /* Copy the permanent MAC address to: virtio_hw */ + virtio_get_hwaddr(hw); + ether_addr_copy((struct ether_addr *) hw->mac_addr, + ð_dev->data->mac_addrs[0]); + PMD_INIT_LOG(DEBUG, + "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X", + hw->mac_addr[0], hw->mac_addr[1], hw->mac_addr[2], + hw->mac_addr[3], hw->mac_addr[4], hw->mac_addr[5]); + + if (vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VQ)) { + config = &local_config; + + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, mac), + &config->mac, sizeof(config->mac)); + + if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, status), + &config->status, sizeof(config->status)); + } else { + PMD_INIT_LOG(DEBUG, + "VIRTIO_NET_F_STATUS is not supported"); + config->status = 0; + } + + if (vtpci_with_feature(hw, VIRTIO_NET_F_MQ)) { + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, max_virtqueue_pairs), + &config->max_virtqueue_pairs, + sizeof(config->max_virtqueue_pairs)); + } else { + PMD_INIT_LOG(DEBUG, + "VIRTIO_NET_F_MQ is not supported"); + config->max_virtqueue_pairs = 1; + } + + hw->max_rx_queues = + (VIRTIO_MAX_RX_QUEUES < config->max_virtqueue_pairs) ? + VIRTIO_MAX_RX_QUEUES : config->max_virtqueue_pairs; + hw->max_tx_queues = + (VIRTIO_MAX_TX_QUEUES < config->max_virtqueue_pairs) ? + VIRTIO_MAX_TX_QUEUES : config->max_virtqueue_pairs; + + virtio_dev_cq_queue_setup(eth_dev, + config->max_virtqueue_pairs * 2, + SOCKET_ID_ANY); + + PMD_INIT_LOG(DEBUG, "config->max_virtqueue_pairs=%d", + config->max_virtqueue_pairs); + PMD_INIT_LOG(DEBUG, "config->status=%d", config->status); + PMD_INIT_LOG(DEBUG, + "PORT MAC: %02X:%02X:%02X:%02X:%02X:%02X", + config->mac[0], config->mac[1], + config->mac[2], config->mac[3], + config->mac[4], config->mac[5]); + } else { + hw->max_rx_queues = 1; + hw->max_tx_queues = 1; + } + + PMD_INIT_LOG(DEBUG, "hw->max_rx_queues=%d hw->max_tx_queues=%d", + hw->max_rx_queues, hw->max_tx_queues); + if (pci_dev) + PMD_INIT_LOG(DEBUG, "port %d vendorID=0x%x deviceID=0x%x", + eth_dev->data->port_id, pci_dev->id.vendor_id, + pci_dev->id.device_id); + + /* Setup interrupt callback */ + if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) + rte_intr_callback_register(&pci_dev->intr_handle, + virtio_interrupt_handler, eth_dev); + + virtio_dev_cq_start(eth_dev); + + return 0; +} + +static int +eth_virtio_dev_uninit(struct rte_eth_dev *eth_dev) +{ + struct rte_pci_device *pci_dev; + struct virtio_hw *hw = eth_dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return -EPERM; + + /* Close it anyway since there's no way to know if closed */ + virtio_dev_close(eth_dev); + + pci_dev = eth_dev->pci_dev; + + eth_dev->dev_ops = NULL; + eth_dev->tx_pkt_burst = NULL; + eth_dev->rx_pkt_burst = NULL; + + if (hw->cvq) + virtio_dev_queue_release(hw->cvq->vq); + + rte_free(eth_dev->data->mac_addrs); + eth_dev->data->mac_addrs = NULL; + + /* reset interrupt callback */ + if (eth_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) + rte_intr_callback_unregister(&pci_dev->intr_handle, + virtio_interrupt_handler, + eth_dev); + rte_eal_pci_unmap_device(pci_dev); + + PMD_INIT_LOG(DEBUG, "dev_uninit completed"); + + return 0; +} + +static struct eth_driver rte_virtio_pmd = { + .pci_drv = { + .name = "rte_virtio_pmd", + .id_table = pci_id_virtio_map, + .drv_flags = RTE_PCI_DRV_DETACHABLE, + }, + .eth_dev_init = eth_virtio_dev_init, + .eth_dev_uninit = eth_virtio_dev_uninit, + .dev_private_size = sizeof(struct virtio_hw), +}; + +/* + * Driver initialization routine. + * Invoked once at EAL init time. + * Register itself as the [Poll Mode] Driver of PCI virtio devices. + * Returns 0 on success. + */ +static int +rte_virtio_pmd_init(const char *name __rte_unused, + const char *param __rte_unused) +{ + if (rte_eal_iopl_init() != 0) { + PMD_INIT_LOG(ERR, "IOPL call failed - cannot use virtio PMD"); + return -1; + } + + rte_eth_driver_register(&rte_virtio_pmd); + return 0; +} + +/* + * Configure virtio device + * It returns 0 on success. + */ +static int +virtio_dev_configure(struct rte_eth_dev *dev) +{ + const struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode; + struct virtio_hw *hw = dev->data->dev_private; + + PMD_INIT_LOG(DEBUG, "configure"); + + if (rxmode->hw_ip_checksum) { + PMD_DRV_LOG(ERR, "HW IP checksum not supported"); + return -EINVAL; + } + + hw->vlan_strip = rxmode->hw_vlan_strip; + + if (rxmode->hw_vlan_filter + && !vtpci_with_feature(hw, VIRTIO_NET_F_CTRL_VLAN)) { + PMD_DRV_LOG(NOTICE, + "vlan filtering not available on this host"); + return -ENOTSUP; + } + + if (dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC) + if (vtpci_irq_config(hw, 0) == VIRTIO_MSI_NO_VECTOR) { + PMD_DRV_LOG(ERR, "failed to set config vector"); + return -EBUSY; + } + + return 0; +} + + +static int +virtio_dev_start(struct rte_eth_dev *dev) +{ + uint16_t nb_queues, i; + struct virtio_hw *hw = dev->data->dev_private; + struct virtnet_rx *rxvq; + struct virtnet_tx *txvq __rte_unused; + + /* check if lsc interrupt feature is enabled */ + if (dev->data->dev_conf.intr_conf.lsc) { + if (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) { + PMD_DRV_LOG(ERR, "link status not supported by host"); + return -ENOTSUP; + } + + if (rte_intr_enable(&dev->pci_dev->intr_handle) < 0) { + PMD_DRV_LOG(ERR, "interrupt enable failed"); + return -EIO; + } + } + + /* Initialize Link state */ + virtio_dev_link_update(dev, 0); + + /* On restart after stop do not touch queues */ + if (hw->started) + return 0; + + /* Do final configuration before rx/tx engine starts */ + virtio_dev_rxtx_start(dev); + vtpci_reinit_complete(hw); + + hw->started = 1; + + /*Notify the backend + *Otherwise the tap backend might already stop its queue due to fullness. + *vhost backend will have no chance to be waked up + */ + nb_queues = dev->data->nb_rx_queues; + if (nb_queues > 1) { + if (virtio_set_multiple_queues(dev, nb_queues) != 0) + return -EINVAL; + } + + PMD_INIT_LOG(DEBUG, "nb_queues=%d", nb_queues); + + for (i = 0; i < nb_queues; i++) { + rxvq = dev->data->rx_queues[i]; + virtqueue_notify(rxvq->vq); + } + + PMD_INIT_LOG(DEBUG, "Notified backend at initialization"); + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + rxvq = dev->data->rx_queues[i]; + VIRTQUEUE_DUMP(rxvq->vq); + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + txvq = dev->data->tx_queues[i]; + VIRTQUEUE_DUMP(txvq->vq); + } + + return 0; +} + +static void virtio_dev_free_mbufs(struct rte_eth_dev *dev) +{ + struct rte_mbuf *buf; + int i, mbuf_num = 0; + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct virtnet_rx *rxvq = dev->data->rx_queues[i]; + + PMD_INIT_LOG(DEBUG, + "Before freeing rxq[%d] used and unused buf", i); + VIRTQUEUE_DUMP(rxvq->vq); + + PMD_INIT_LOG(DEBUG, "rx_queues[%d]=%p", i, rxvq); + while ((buf = virtqueue_detatch_unused(rxvq->vq)) != NULL) { + rte_pktmbuf_free(buf); + mbuf_num++; + } + + PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num); + PMD_INIT_LOG(DEBUG, + "After freeing rxq[%d] used and unused buf", i); + VIRTQUEUE_DUMP(rxvq->vq); + } + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct virtnet_tx *txvq = dev->data->tx_queues[i]; + + PMD_INIT_LOG(DEBUG, + "Before freeing txq[%d] used and unused bufs", + i); + VIRTQUEUE_DUMP(txvq->vq); + + mbuf_num = 0; + while ((buf = virtqueue_detatch_unused(txvq->vq)) != NULL) { + rte_pktmbuf_free(buf); + mbuf_num++; + } + + PMD_INIT_LOG(DEBUG, "free %d mbufs", mbuf_num); + PMD_INIT_LOG(DEBUG, + "After freeing txq[%d] used and unused buf", i); + VIRTQUEUE_DUMP(txvq->vq); + } +} + +/* + * Stop device: disable interrupt and mark link down + */ +static void +virtio_dev_stop(struct rte_eth_dev *dev) +{ + struct rte_eth_link link; + struct virtio_hw *hw = dev->data->dev_private; + + PMD_INIT_LOG(DEBUG, "stop"); + + hw->started = 0; + + if (dev->data->dev_conf.intr_conf.lsc) + rte_intr_disable(&dev->pci_dev->intr_handle); + + memset(&link, 0, sizeof(link)); + virtio_dev_atomic_write_link_status(dev, &link); +} + +static int +virtio_dev_link_update(struct rte_eth_dev *dev, __rte_unused int wait_to_complete) +{ + struct rte_eth_link link, old; + uint16_t status; + struct virtio_hw *hw = dev->data->dev_private; + memset(&link, 0, sizeof(link)); + virtio_dev_atomic_read_link_status(dev, &link); + old = link; + link.link_duplex = ETH_LINK_FULL_DUPLEX; + link.link_speed = SPEED_10G; + + if (vtpci_with_feature(hw, VIRTIO_NET_F_STATUS)) { + PMD_INIT_LOG(DEBUG, "Get link status from hw"); + vtpci_read_dev_config(hw, + offsetof(struct virtio_net_config, status), + &status, sizeof(status)); + if ((status & VIRTIO_NET_S_LINK_UP) == 0) { + link.link_status = ETH_LINK_DOWN; + PMD_INIT_LOG(DEBUG, "Port %d is down", + dev->data->port_id); + } else { + link.link_status = ETH_LINK_UP; + PMD_INIT_LOG(DEBUG, "Port %d is up", + dev->data->port_id); + } + } else { + link.link_status = ETH_LINK_UP; + } + virtio_dev_atomic_write_link_status(dev, &link); + + return (old.link_status == link.link_status) ? -1 : 0; +} + +static void +virtio_dev_info_get(struct rte_eth_dev *dev, struct rte_eth_dev_info *dev_info) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (dev->pci_dev) + dev_info->driver_name = dev->driver->pci_drv.name; + else + dev_info->driver_name = "virtio_user PMD"; + dev_info->max_rx_queues = (uint16_t)hw->max_rx_queues; + dev_info->max_tx_queues = (uint16_t)hw->max_tx_queues; + dev_info->min_rx_bufsize = VIRTIO_MIN_RX_BUFSIZE; + dev_info->max_rx_pktlen = VIRTIO_MAX_RX_PKTLEN; + dev_info->max_mac_addrs = VIRTIO_MAX_MAC_ADDRS; + dev_info->default_txconf = (struct rte_eth_txconf) { + .txq_flags = ETH_TXQ_FLAGS_NOOFFLOADS + }; +} + +/* + * It enables testpmd to collect per queue stats. + */ +static int +virtio_dev_queue_stats_mapping_set(__rte_unused struct rte_eth_dev *eth_dev, +__rte_unused uint16_t queue_id, __rte_unused uint8_t stat_idx, +__rte_unused uint8_t is_rx) +{ + return 0; +} + +static struct rte_driver rte_virtio_driver = { + .type = PMD_PDEV, + .init = rte_virtio_pmd_init, +}; + +PMD_REGISTER_DRIVER(rte_virtio_driver, virtio_net); +DRIVER_REGISTER_PCI_TABLE(virtio_net, pci_id_virtio_map); diff --git a/src/dpdk/drivers/net/virtio/virtio_ethdev.h b/src/dpdk/drivers/net/virtio/virtio_ethdev.h new file mode 100644 index 00000000..2ecec6eb --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_ethdev.h @@ -0,0 +1,127 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_ETHDEV_H_ +#define _VIRTIO_ETHDEV_H_ + +#include <stdint.h> + +#include "virtio_pci.h" + +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 +#define SPEED_10G 10000 + +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#define VIRTIO_MAX_RX_QUEUES 128 +#define VIRTIO_MAX_TX_QUEUES 128 +#define VIRTIO_MAX_MAC_ADDRS 64 +#define VIRTIO_MIN_RX_BUFSIZE 64 +#define VIRTIO_MAX_RX_PKTLEN 9728 + +/* Features desired/implemented by this driver. */ +#define VIRTIO_PMD_GUEST_FEATURES \ + (1u << VIRTIO_NET_F_MAC | \ + 1u << VIRTIO_NET_F_STATUS | \ + 1u << VIRTIO_NET_F_MQ | \ + 1u << VIRTIO_NET_F_CTRL_MAC_ADDR | \ + 1u << VIRTIO_NET_F_CTRL_VQ | \ + 1u << VIRTIO_NET_F_CTRL_RX | \ + 1u << VIRTIO_NET_F_CTRL_VLAN | \ + 1u << VIRTIO_NET_F_MRG_RXBUF | \ + 1ULL << VIRTIO_F_VERSION_1) + +/* + * CQ function prototype + */ +void virtio_dev_cq_start(struct rte_eth_dev *dev); + +/* + * RX/TX function prototypes + */ +void virtio_dev_rxtx_start(struct rte_eth_dev *dev); + +int virtio_dev_queue_setup(struct rte_eth_dev *dev, + int queue_type, + uint16_t queue_idx, + uint16_t vtpci_queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + void **pvq); + +void virtio_dev_queue_release(struct virtqueue *vq); + +int virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); + +void virtio_dev_rx_queue_release(void *rxq); + +int virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); + +void virtio_dev_tx_queue_release(void *txq); + +uint16_t virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +uint16_t virtio_recv_mergeable_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +uint16_t virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + +uint16_t virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +uint16_t virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + +int eth_virtio_dev_init(struct rte_eth_dev *eth_dev); + +/* + * The VIRTIO_NET_F_GUEST_TSO[46] features permit the host to send us + * frames larger than 1514 bytes. We do not yet support software LRO + * via tcp_lro_rx(). + */ +#define VTNET_LRO_FEATURES (VIRTIO_NET_F_GUEST_TSO4 | \ + VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN) + + +#endif /* _VIRTIO_ETHDEV_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_logs.h b/src/dpdk/drivers/net/virtio/virtio_logs.h new file mode 100644 index 00000000..90a79eaa --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_logs.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_LOGS_H_ +#define _VIRTIO_LOGS_H_ + +#include <rte_log.h> + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_INIT +#define PMD_INIT_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") +#else +#define PMD_INIT_LOG(level, fmt, args...) do { } while(0) +#define PMD_INIT_FUNC_TRACE() do { } while(0) +#endif + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s() rx: " fmt "\n", __func__, ## args) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while(0) +#endif + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s() tx: " fmt "\n", __func__, ## args) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while(0) +#endif + + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DRIVER +#define PMD_DRV_LOG(level, fmt, args...) \ + RTE_LOG(level, PMD, "%s(): " fmt "\n", __func__, ## args) +#else +#define PMD_DRV_LOG(level, fmt, args...) do { } while(0) +#endif + +#endif /* _VIRTIO_LOGS_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_pci.c b/src/dpdk/drivers/net/virtio/virtio_pci.c new file mode 100644 index 00000000..f1a7ca7e --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_pci.c @@ -0,0 +1,762 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdint.h> + +#ifdef RTE_EXEC_ENV_LINUXAPP + #include <dirent.h> + #include <fcntl.h> +#endif + +#include "virtio_pci.h" +#include "virtio_logs.h" +#include "virtqueue.h" + +/* + * Following macros are derived from linux/pci_regs.h, however, + * we can't simply include that header here, as there is no such + * file for non-Linux platform. + */ +#define PCI_CAPABILITY_LIST 0x34 +#define PCI_CAP_ID_VNDR 0x09 + +/* + * The remaining space is defined by each driver as the per-driver + * configuration space. + */ +#define VIRTIO_PCI_CONFIG(hw) (((hw)->use_msix) ? 24 : 20) + +static inline int +check_vq_phys_addr_ok(struct virtqueue *vq) +{ + /* Virtio PCI device VIRTIO_PCI_QUEUE_PF register is 32bit, + * and only accepts 32 bit page frame number. + * Check if the allocated physical memory exceeds 16TB. + */ + if ((vq->vq_ring_mem + vq->vq_ring_size - 1) >> + (VIRTIO_PCI_QUEUE_ADDR_SHIFT + 32)) { + PMD_INIT_LOG(ERR, "vring address shouldn't be above 16TB!"); + return 0; + } + + return 1; +} + +/* + * Since we are in legacy mode: + * http://ozlabs.org/~rusty/virtio-spec/virtio-0.9.5.pdf + * + * "Note that this is possible because while the virtio header is PCI (i.e. + * little) endian, the device-specific region is encoded in the native endian of + * the guest (where such distinction is applicable)." + * + * For powerpc which supports both, qemu supposes that cpu is big endian and + * enforces this for the virtio-net stuff. + */ +static void +legacy_read_dev_config(struct virtio_hw *hw, size_t offset, + void *dst, int length) +{ +#ifdef RTE_ARCH_PPC_64 + int size; + + while (length > 0) { + if (length >= 4) { + size = 4; + rte_eal_pci_ioport_read(&hw->io, dst, size, + VIRTIO_PCI_CONFIG(hw) + offset); + *(uint32_t *)dst = rte_be_to_cpu_32(*(uint32_t *)dst); + } else if (length >= 2) { + size = 2; + rte_eal_pci_ioport_read(&hw->io, dst, size, + VIRTIO_PCI_CONFIG(hw) + offset); + *(uint16_t *)dst = rte_be_to_cpu_16(*(uint16_t *)dst); + } else { + size = 1; + rte_eal_pci_ioport_read(&hw->io, dst, size, + VIRTIO_PCI_CONFIG(hw) + offset); + } + + dst = (char *)dst + size; + offset += size; + length -= size; + } +#else + rte_eal_pci_ioport_read(&hw->io, dst, length, + VIRTIO_PCI_CONFIG(hw) + offset); +#endif +} + +static void +legacy_write_dev_config(struct virtio_hw *hw, size_t offset, + const void *src, int length) +{ +#ifdef RTE_ARCH_PPC_64 + union { + uint32_t u32; + uint16_t u16; + } tmp; + int size; + + while (length > 0) { + if (length >= 4) { + size = 4; + tmp.u32 = rte_cpu_to_be_32(*(const uint32_t *)src); + rte_eal_pci_ioport_write(&hw->io, &tmp.u32, size, + VIRTIO_PCI_CONFIG(hw) + offset); + } else if (length >= 2) { + size = 2; + tmp.u16 = rte_cpu_to_be_16(*(const uint16_t *)src); + rte_eal_pci_ioport_write(&hw->io, &tmp.u16, size, + VIRTIO_PCI_CONFIG(hw) + offset); + } else { + size = 1; + rte_eal_pci_ioport_write(&hw->io, src, size, + VIRTIO_PCI_CONFIG(hw) + offset); + } + + src = (const char *)src + size; + offset += size; + length -= size; + } +#else + rte_eal_pci_ioport_write(&hw->io, src, length, + VIRTIO_PCI_CONFIG(hw) + offset); +#endif +} + +static uint64_t +legacy_get_features(struct virtio_hw *hw) +{ + uint32_t dst; + + rte_eal_pci_ioport_read(&hw->io, &dst, 4, VIRTIO_PCI_HOST_FEATURES); + return dst; +} + +static void +legacy_set_features(struct virtio_hw *hw, uint64_t features) +{ + if ((features >> 32) != 0) { + PMD_DRV_LOG(ERR, + "only 32 bit features are allowed for legacy virtio!"); + return; + } + rte_eal_pci_ioport_write(&hw->io, &features, 4, + VIRTIO_PCI_GUEST_FEATURES); +} + +static uint8_t +legacy_get_status(struct virtio_hw *hw) +{ + uint8_t dst; + + rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_STATUS); + return dst; +} + +static void +legacy_set_status(struct virtio_hw *hw, uint8_t status) +{ + rte_eal_pci_ioport_write(&hw->io, &status, 1, VIRTIO_PCI_STATUS); +} + +static void +legacy_reset(struct virtio_hw *hw) +{ + legacy_set_status(hw, VIRTIO_CONFIG_STATUS_RESET); +} + +static uint8_t +legacy_get_isr(struct virtio_hw *hw) +{ + uint8_t dst; + + rte_eal_pci_ioport_read(&hw->io, &dst, 1, VIRTIO_PCI_ISR); + return dst; +} + +/* Enable one vector (0) for Link State Intrerrupt */ +static uint16_t +legacy_set_config_irq(struct virtio_hw *hw, uint16_t vec) +{ + uint16_t dst; + + rte_eal_pci_ioport_write(&hw->io, &vec, 2, VIRTIO_MSI_CONFIG_VECTOR); + rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_MSI_CONFIG_VECTOR); + return dst; +} + +static uint16_t +legacy_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) +{ + uint16_t dst; + + rte_eal_pci_ioport_write(&hw->io, &queue_id, 2, VIRTIO_PCI_QUEUE_SEL); + rte_eal_pci_ioport_read(&hw->io, &dst, 2, VIRTIO_PCI_QUEUE_NUM); + return dst; +} + +static int +legacy_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + uint32_t src; + + if (!check_vq_phys_addr_ok(vq)) + return -1; + + rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_SEL); + src = vq->vq_ring_mem >> VIRTIO_PCI_QUEUE_ADDR_SHIFT; + rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN); + + return 0; +} + +static void +legacy_del_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + uint32_t src = 0; + + rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_SEL); + rte_eal_pci_ioport_write(&hw->io, &src, 4, VIRTIO_PCI_QUEUE_PFN); +} + +static void +legacy_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + rte_eal_pci_ioport_write(&hw->io, &vq->vq_queue_index, 2, + VIRTIO_PCI_QUEUE_NOTIFY); +} + +#ifdef RTE_EXEC_ENV_LINUXAPP +static int +legacy_virtio_has_msix(const struct rte_pci_addr *loc) +{ + DIR *d; + char dirname[PATH_MAX]; + + snprintf(dirname, sizeof(dirname), + "%s/" PCI_PRI_FMT "/msi_irqs", pci_get_sysfs_path(), + loc->domain, loc->bus, loc->devid, loc->function); + + d = opendir(dirname); + if (d) + closedir(d); + + return d != NULL; +} +#else +static int +legacy_virtio_has_msix(const struct rte_pci_addr *loc __rte_unused) +{ + /* nic_uio does not enable interrupts, return 0 (false). */ + return 0; +} +#endif + +static int +legacy_virtio_resource_init(struct rte_pci_device *pci_dev, + struct virtio_hw *hw, uint32_t *dev_flags) +{ + if (rte_eal_pci_ioport_map(pci_dev, 0, &hw->io) < 0) + return -1; + + if (pci_dev->intr_handle.type != RTE_INTR_HANDLE_UNKNOWN) + *dev_flags |= RTE_ETH_DEV_INTR_LSC; + else + *dev_flags &= ~RTE_ETH_DEV_INTR_LSC; + + return 0; +} + +static const struct virtio_pci_ops legacy_ops = { + .read_dev_cfg = legacy_read_dev_config, + .write_dev_cfg = legacy_write_dev_config, + .reset = legacy_reset, + .get_status = legacy_get_status, + .set_status = legacy_set_status, + .get_features = legacy_get_features, + .set_features = legacy_set_features, + .get_isr = legacy_get_isr, + .set_config_irq = legacy_set_config_irq, + .get_queue_num = legacy_get_queue_num, + .setup_queue = legacy_setup_queue, + .del_queue = legacy_del_queue, + .notify_queue = legacy_notify_queue, +}; + + +static inline uint8_t +io_read8(uint8_t *addr) +{ + return *(volatile uint8_t *)addr; +} + +static inline void +io_write8(uint8_t val, uint8_t *addr) +{ + *(volatile uint8_t *)addr = val; +} + +static inline uint16_t +io_read16(uint16_t *addr) +{ + return *(volatile uint16_t *)addr; +} + +static inline void +io_write16(uint16_t val, uint16_t *addr) +{ + *(volatile uint16_t *)addr = val; +} + +static inline uint32_t +io_read32(uint32_t *addr) +{ + return *(volatile uint32_t *)addr; +} + +static inline void +io_write32(uint32_t val, uint32_t *addr) +{ + *(volatile uint32_t *)addr = val; +} + +static inline void +io_write64_twopart(uint64_t val, uint32_t *lo, uint32_t *hi) +{ + io_write32(val & ((1ULL << 32) - 1), lo); + io_write32(val >> 32, hi); +} + +static void +modern_read_dev_config(struct virtio_hw *hw, size_t offset, + void *dst, int length) +{ + int i; + uint8_t *p; + uint8_t old_gen, new_gen; + + do { + old_gen = io_read8(&hw->common_cfg->config_generation); + + p = dst; + for (i = 0; i < length; i++) + *p++ = io_read8((uint8_t *)hw->dev_cfg + offset + i); + + new_gen = io_read8(&hw->common_cfg->config_generation); + } while (old_gen != new_gen); +} + +static void +modern_write_dev_config(struct virtio_hw *hw, size_t offset, + const void *src, int length) +{ + int i; + const uint8_t *p = src; + + for (i = 0; i < length; i++) + io_write8(*p++, (uint8_t *)hw->dev_cfg + offset + i); +} + +static uint64_t +modern_get_features(struct virtio_hw *hw) +{ + uint32_t features_lo, features_hi; + + io_write32(0, &hw->common_cfg->device_feature_select); + features_lo = io_read32(&hw->common_cfg->device_feature); + + io_write32(1, &hw->common_cfg->device_feature_select); + features_hi = io_read32(&hw->common_cfg->device_feature); + + return ((uint64_t)features_hi << 32) | features_lo; +} + +static void +modern_set_features(struct virtio_hw *hw, uint64_t features) +{ + io_write32(0, &hw->common_cfg->guest_feature_select); + io_write32(features & ((1ULL << 32) - 1), + &hw->common_cfg->guest_feature); + + io_write32(1, &hw->common_cfg->guest_feature_select); + io_write32(features >> 32, + &hw->common_cfg->guest_feature); +} + +static uint8_t +modern_get_status(struct virtio_hw *hw) +{ + return io_read8(&hw->common_cfg->device_status); +} + +static void +modern_set_status(struct virtio_hw *hw, uint8_t status) +{ + io_write8(status, &hw->common_cfg->device_status); +} + +static void +modern_reset(struct virtio_hw *hw) +{ + modern_set_status(hw, VIRTIO_CONFIG_STATUS_RESET); + modern_get_status(hw); +} + +static uint8_t +modern_get_isr(struct virtio_hw *hw) +{ + return io_read8(hw->isr); +} + +static uint16_t +modern_set_config_irq(struct virtio_hw *hw, uint16_t vec) +{ + io_write16(vec, &hw->common_cfg->msix_config); + return io_read16(&hw->common_cfg->msix_config); +} + +static uint16_t +modern_get_queue_num(struct virtio_hw *hw, uint16_t queue_id) +{ + io_write16(queue_id, &hw->common_cfg->queue_select); + return io_read16(&hw->common_cfg->queue_size); +} + +static int +modern_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + uint64_t desc_addr, avail_addr, used_addr; + uint16_t notify_off; + + if (!check_vq_phys_addr_ok(vq)) + return -1; + + desc_addr = vq->vq_ring_mem; + avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); + used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, + ring[vq->vq_nentries]), + VIRTIO_PCI_VRING_ALIGN); + + io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + + io_write64_twopart(desc_addr, &hw->common_cfg->queue_desc_lo, + &hw->common_cfg->queue_desc_hi); + io_write64_twopart(avail_addr, &hw->common_cfg->queue_avail_lo, + &hw->common_cfg->queue_avail_hi); + io_write64_twopart(used_addr, &hw->common_cfg->queue_used_lo, + &hw->common_cfg->queue_used_hi); + + notify_off = io_read16(&hw->common_cfg->queue_notify_off); + vq->notify_addr = (void *)((uint8_t *)hw->notify_base + + notify_off * hw->notify_off_multiplier); + + io_write16(1, &hw->common_cfg->queue_enable); + + PMD_INIT_LOG(DEBUG, "queue %u addresses:", vq->vq_queue_index); + PMD_INIT_LOG(DEBUG, "\t desc_addr: %" PRIx64, desc_addr); + PMD_INIT_LOG(DEBUG, "\t aval_addr: %" PRIx64, avail_addr); + PMD_INIT_LOG(DEBUG, "\t used_addr: %" PRIx64, used_addr); + PMD_INIT_LOG(DEBUG, "\t notify addr: %p (notify offset: %u)", + vq->notify_addr, notify_off); + + return 0; +} + +static void +modern_del_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + io_write16(vq->vq_queue_index, &hw->common_cfg->queue_select); + + io_write64_twopart(0, &hw->common_cfg->queue_desc_lo, + &hw->common_cfg->queue_desc_hi); + io_write64_twopart(0, &hw->common_cfg->queue_avail_lo, + &hw->common_cfg->queue_avail_hi); + io_write64_twopart(0, &hw->common_cfg->queue_used_lo, + &hw->common_cfg->queue_used_hi); + + io_write16(0, &hw->common_cfg->queue_enable); +} + +static void +modern_notify_queue(struct virtio_hw *hw __rte_unused, struct virtqueue *vq) +{ + io_write16(1, vq->notify_addr); +} + +static const struct virtio_pci_ops modern_ops = { + .read_dev_cfg = modern_read_dev_config, + .write_dev_cfg = modern_write_dev_config, + .reset = modern_reset, + .get_status = modern_get_status, + .set_status = modern_set_status, + .get_features = modern_get_features, + .set_features = modern_set_features, + .get_isr = modern_get_isr, + .set_config_irq = modern_set_config_irq, + .get_queue_num = modern_get_queue_num, + .setup_queue = modern_setup_queue, + .del_queue = modern_del_queue, + .notify_queue = modern_notify_queue, +}; + + +void +vtpci_read_dev_config(struct virtio_hw *hw, size_t offset, + void *dst, int length) +{ + hw->vtpci_ops->read_dev_cfg(hw, offset, dst, length); +} + +void +vtpci_write_dev_config(struct virtio_hw *hw, size_t offset, + const void *src, int length) +{ + hw->vtpci_ops->write_dev_cfg(hw, offset, src, length); +} + +uint64_t +vtpci_negotiate_features(struct virtio_hw *hw, uint64_t host_features) +{ + uint64_t features; + + /* + * Limit negotiated features to what the driver, virtqueue, and + * host all support. + */ + features = host_features & hw->guest_features; + hw->vtpci_ops->set_features(hw, features); + + return features; +} + +void +vtpci_reset(struct virtio_hw *hw) +{ + hw->vtpci_ops->set_status(hw, VIRTIO_CONFIG_STATUS_RESET); + /* flush status write */ + hw->vtpci_ops->get_status(hw); +} + +void +vtpci_reinit_complete(struct virtio_hw *hw) +{ + vtpci_set_status(hw, VIRTIO_CONFIG_STATUS_DRIVER_OK); +} + +void +vtpci_set_status(struct virtio_hw *hw, uint8_t status) +{ + if (status != VIRTIO_CONFIG_STATUS_RESET) + status |= hw->vtpci_ops->get_status(hw); + + hw->vtpci_ops->set_status(hw, status); +} + +uint8_t +vtpci_get_status(struct virtio_hw *hw) +{ + return hw->vtpci_ops->get_status(hw); +} + +uint8_t +vtpci_isr(struct virtio_hw *hw) +{ + return hw->vtpci_ops->get_isr(hw); +} + + +/* Enable one vector (0) for Link State Intrerrupt */ +uint16_t +vtpci_irq_config(struct virtio_hw *hw, uint16_t vec) +{ + return hw->vtpci_ops->set_config_irq(hw, vec); +} + +static void * +get_cfg_addr(struct rte_pci_device *dev, struct virtio_pci_cap *cap) +{ + uint8_t bar = cap->bar; + uint32_t length = cap->length; + uint32_t offset = cap->offset; + uint8_t *base; + + if (bar > 5) { + PMD_INIT_LOG(ERR, "invalid bar: %u", bar); + return NULL; + } + + if (offset + length < offset) { + PMD_INIT_LOG(ERR, "offset(%u) + length(%u) overflows", + offset, length); + return NULL; + } + + if (offset + length > dev->mem_resource[bar].len) { + PMD_INIT_LOG(ERR, + "invalid cap: overflows bar space: %u > %" PRIu64, + offset + length, dev->mem_resource[bar].len); + return NULL; + } + + base = dev->mem_resource[bar].addr; + if (base == NULL) { + PMD_INIT_LOG(ERR, "bar %u base addr is NULL", bar); + return NULL; + } + + return base + offset; +} + +static int +virtio_read_caps(struct rte_pci_device *dev, struct virtio_hw *hw) +{ + uint8_t pos; + struct virtio_pci_cap cap; + int ret; + + if (rte_eal_pci_map_device(dev)) { + PMD_INIT_LOG(DEBUG, "failed to map pci device!"); + return -1; + } + + ret = rte_eal_pci_read_config(dev, &pos, 1, PCI_CAPABILITY_LIST); + if (ret < 0) { + PMD_INIT_LOG(DEBUG, "failed to read pci capability list"); + return -1; + } + + while (pos) { + ret = rte_eal_pci_read_config(dev, &cap, sizeof(cap), pos); + if (ret < 0) { + PMD_INIT_LOG(ERR, + "failed to read pci cap at pos: %x", pos); + break; + } + + if (cap.cap_vndr != PCI_CAP_ID_VNDR) { + PMD_INIT_LOG(DEBUG, + "[%2x] skipping non VNDR cap id: %02x", + pos, cap.cap_vndr); + goto next; + } + + PMD_INIT_LOG(DEBUG, + "[%2x] cfg type: %u, bar: %u, offset: %04x, len: %u", + pos, cap.cfg_type, cap.bar, cap.offset, cap.length); + + switch (cap.cfg_type) { + case VIRTIO_PCI_CAP_COMMON_CFG: + hw->common_cfg = get_cfg_addr(dev, &cap); + break; + case VIRTIO_PCI_CAP_NOTIFY_CFG: + rte_eal_pci_read_config(dev, &hw->notify_off_multiplier, + 4, pos + sizeof(cap)); + hw->notify_base = get_cfg_addr(dev, &cap); + break; + case VIRTIO_PCI_CAP_DEVICE_CFG: + hw->dev_cfg = get_cfg_addr(dev, &cap); + break; + case VIRTIO_PCI_CAP_ISR_CFG: + hw->isr = get_cfg_addr(dev, &cap); + break; + } + +next: + pos = cap.cap_next; + } + + if (hw->common_cfg == NULL || hw->notify_base == NULL || + hw->dev_cfg == NULL || hw->isr == NULL) { + PMD_INIT_LOG(INFO, "no modern virtio pci device found."); + return -1; + } + + PMD_INIT_LOG(INFO, "found modern virtio pci device."); + + PMD_INIT_LOG(DEBUG, "common cfg mapped at: %p", hw->common_cfg); + PMD_INIT_LOG(DEBUG, "device cfg mapped at: %p", hw->dev_cfg); + PMD_INIT_LOG(DEBUG, "isr cfg mapped at: %p", hw->isr); + PMD_INIT_LOG(DEBUG, "notify base: %p, notify off multiplier: %u", + hw->notify_base, hw->notify_off_multiplier); + + return 0; +} + +/* + * Return -1: + * if there is error mapping with VFIO/UIO. + * if port map error when driver type is KDRV_NONE. + * if whitelisted but driver type is KDRV_UNKNOWN. + * Return 1 if kernel driver is managing the device. + * Return 0 on success. + */ +int +vtpci_init(struct rte_pci_device *dev, struct virtio_hw *hw, + uint32_t *dev_flags) +{ + hw->dev = dev; + + /* + * Try if we can succeed reading virtio pci caps, which exists + * only on modern pci device. If failed, we fallback to legacy + * virtio handling. + */ + if (virtio_read_caps(dev, hw) == 0) { + PMD_INIT_LOG(INFO, "modern virtio pci detected."); + hw->vtpci_ops = &modern_ops; + hw->modern = 1; + *dev_flags |= RTE_ETH_DEV_INTR_LSC; + return 0; + } + + PMD_INIT_LOG(INFO, "trying with legacy virtio pci."); + if (legacy_virtio_resource_init(dev, hw, dev_flags) < 0) { + if (dev->kdrv == RTE_KDRV_UNKNOWN && + (!dev->devargs || + dev->devargs->type != RTE_DEVTYPE_WHITELISTED_PCI)) { + PMD_INIT_LOG(INFO, + "skip kernel managed virtio device."); + return 1; + } + return -1; + } + + hw->vtpci_ops = &legacy_ops; + hw->use_msix = legacy_virtio_has_msix(&dev->addr); + hw->modern = 0; + + return 0; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_pci.h b/src/dpdk/drivers/net/virtio/virtio_pci.h new file mode 100644 index 00000000..dd7693fe --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_pci.h @@ -0,0 +1,317 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_PCI_H_ +#define _VIRTIO_PCI_H_ + +#include <stdint.h> + +#include <rte_pci.h> +#include <rte_ethdev.h> + +struct virtqueue; +struct virtnet_ctl; + +/* VirtIO PCI vendor/device ID. */ +#define VIRTIO_PCI_VENDORID 0x1AF4 +#define VIRTIO_PCI_DEVICEID_MIN 0x1000 +#define VIRTIO_PCI_DEVICEID_MAX 0x103F + +/* VirtIO ABI version, this must match exactly. */ +#define VIRTIO_PCI_ABI_VERSION 0 + +/* + * VirtIO Header, located in BAR 0. + */ +#define VIRTIO_PCI_HOST_FEATURES 0 /* host's supported features (32bit, RO)*/ +#define VIRTIO_PCI_GUEST_FEATURES 4 /* guest's supported features (32, RW) */ +#define VIRTIO_PCI_QUEUE_PFN 8 /* physical address of VQ (32, RW) */ +#define VIRTIO_PCI_QUEUE_NUM 12 /* number of ring entries (16, RO) */ +#define VIRTIO_PCI_QUEUE_SEL 14 /* current VQ selection (16, RW) */ +#define VIRTIO_PCI_QUEUE_NOTIFY 16 /* notify host regarding VQ (16, RW) */ +#define VIRTIO_PCI_STATUS 18 /* device status register (8, RW) */ +#define VIRTIO_PCI_ISR 19 /* interrupt status register, reading + * also clears the register (8, RO) */ +/* Only if MSIX is enabled: */ +#define VIRTIO_MSI_CONFIG_VECTOR 20 /* configuration change vector (16, RW) */ +#define VIRTIO_MSI_QUEUE_VECTOR 22 /* vector for selected VQ notifications + (16, RW) */ + +/* The bit of the ISR which indicates a device has an interrupt. */ +#define VIRTIO_PCI_ISR_INTR 0x1 +/* The bit of the ISR which indicates a device configuration change. */ +#define VIRTIO_PCI_ISR_CONFIG 0x2 +/* Vector value used to disable MSI for queue. */ +#define VIRTIO_MSI_NO_VECTOR 0xFFFF + +/* VirtIO device IDs. */ +#define VIRTIO_ID_NETWORK 0x01 +#define VIRTIO_ID_BLOCK 0x02 +#define VIRTIO_ID_CONSOLE 0x03 +#define VIRTIO_ID_ENTROPY 0x04 +#define VIRTIO_ID_BALLOON 0x05 +#define VIRTIO_ID_IOMEMORY 0x06 +#define VIRTIO_ID_9P 0x09 + +/* Status byte for guest to report progress. */ +#define VIRTIO_CONFIG_STATUS_RESET 0x00 +#define VIRTIO_CONFIG_STATUS_ACK 0x01 +#define VIRTIO_CONFIG_STATUS_DRIVER 0x02 +#define VIRTIO_CONFIG_STATUS_DRIVER_OK 0x04 +#define VIRTIO_CONFIG_STATUS_FEATURES_OK 0x08 +#define VIRTIO_CONFIG_STATUS_FAILED 0x80 + +/* + * Each virtqueue indirect descriptor list must be physically contiguous. + * To allow us to malloc(9) each list individually, limit the number + * supported to what will fit in one page. With 4KB pages, this is a limit + * of 256 descriptors. If there is ever a need for more, we can switch to + * contigmalloc(9) for the larger allocations, similar to what + * bus_dmamem_alloc(9) does. + * + * Note the sizeof(struct vring_desc) is 16 bytes. + */ +#define VIRTIO_MAX_INDIRECT ((int) (PAGE_SIZE / 16)) + +/* The feature bitmap for virtio net */ +#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */ +#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/ partial csum */ +#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */ +#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */ +#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */ +#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */ +#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */ +#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */ +#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */ +#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */ +#define VIRTIO_NET_F_MRG_RXBUF 15 /* Host can merge receive buffers. */ +#define VIRTIO_NET_F_STATUS 16 /* virtio_net_config.status available */ +#define VIRTIO_NET_F_CTRL_VQ 17 /* Control channel available */ +#define VIRTIO_NET_F_CTRL_RX 18 /* Control channel RX mode support */ +#define VIRTIO_NET_F_CTRL_VLAN 19 /* Control channel VLAN filtering */ +#define VIRTIO_NET_F_CTRL_RX_EXTRA 20 /* Extra RX mode control support */ +#define VIRTIO_NET_F_GUEST_ANNOUNCE 21 /* Guest can announce device on the + * network */ +#define VIRTIO_NET_F_MQ 22 /* Device supports Receive Flow + * Steering */ +#define VIRTIO_NET_F_CTRL_MAC_ADDR 23 /* Set MAC address */ + +/* Do we get callbacks when the ring is completely used, even if we've + * suppressed them? */ +#define VIRTIO_F_NOTIFY_ON_EMPTY 24 + +/* Can the device handle any descriptor layout? */ +#define VIRTIO_F_ANY_LAYOUT 27 + +/* We support indirect buffer descriptors */ +#define VIRTIO_RING_F_INDIRECT_DESC 28 + +#define VIRTIO_F_VERSION_1 32 + +/* + * Some VirtIO feature bits (currently bits 28 through 31) are + * reserved for the transport being used (eg. virtio_ring), the + * rest are per-device feature bits. + */ +#define VIRTIO_TRANSPORT_F_START 28 +#define VIRTIO_TRANSPORT_F_END 32 + +/* The Guest publishes the used index for which it expects an interrupt + * at the end of the avail ring. Host should ignore the avail->flags field. */ +/* The Host publishes the avail index for which it expects a kick + * at the end of the used ring. Guest should ignore the used->flags field. */ +#define VIRTIO_RING_F_EVENT_IDX 29 + +#define VIRTIO_NET_S_LINK_UP 1 /* Link is up */ +#define VIRTIO_NET_S_ANNOUNCE 2 /* Announcement is needed */ + +/* + * Maximum number of virtqueues per device. + */ +#define VIRTIO_MAX_VIRTQUEUES 8 + +/* Common configuration */ +#define VIRTIO_PCI_CAP_COMMON_CFG 1 +/* Notifications */ +#define VIRTIO_PCI_CAP_NOTIFY_CFG 2 +/* ISR Status */ +#define VIRTIO_PCI_CAP_ISR_CFG 3 +/* Device specific configuration */ +#define VIRTIO_PCI_CAP_DEVICE_CFG 4 +/* PCI configuration access */ +#define VIRTIO_PCI_CAP_PCI_CFG 5 + +/* This is the PCI capability header: */ +struct virtio_pci_cap { + uint8_t cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */ + uint8_t cap_next; /* Generic PCI field: next ptr. */ + uint8_t cap_len; /* Generic PCI field: capability length */ + uint8_t cfg_type; /* Identifies the structure. */ + uint8_t bar; /* Where to find it. */ + uint8_t padding[3]; /* Pad to full dword. */ + uint32_t offset; /* Offset within bar. */ + uint32_t length; /* Length of the structure, in bytes. */ +}; + +struct virtio_pci_notify_cap { + struct virtio_pci_cap cap; + uint32_t notify_off_multiplier; /* Multiplier for queue_notify_off. */ +}; + +/* Fields in VIRTIO_PCI_CAP_COMMON_CFG: */ +struct virtio_pci_common_cfg { + /* About the whole device. */ + uint32_t device_feature_select; /* read-write */ + uint32_t device_feature; /* read-only */ + uint32_t guest_feature_select; /* read-write */ + uint32_t guest_feature; /* read-write */ + uint16_t msix_config; /* read-write */ + uint16_t num_queues; /* read-only */ + uint8_t device_status; /* read-write */ + uint8_t config_generation; /* read-only */ + + /* About a specific virtqueue. */ + uint16_t queue_select; /* read-write */ + uint16_t queue_size; /* read-write, power of 2. */ + uint16_t queue_msix_vector; /* read-write */ + uint16_t queue_enable; /* read-write */ + uint16_t queue_notify_off; /* read-only */ + uint32_t queue_desc_lo; /* read-write */ + uint32_t queue_desc_hi; /* read-write */ + uint32_t queue_avail_lo; /* read-write */ + uint32_t queue_avail_hi; /* read-write */ + uint32_t queue_used_lo; /* read-write */ + uint32_t queue_used_hi; /* read-write */ +}; + +struct virtio_hw; + +struct virtio_pci_ops { + void (*read_dev_cfg)(struct virtio_hw *hw, size_t offset, + void *dst, int len); + void (*write_dev_cfg)(struct virtio_hw *hw, size_t offset, + const void *src, int len); + void (*reset)(struct virtio_hw *hw); + + uint8_t (*get_status)(struct virtio_hw *hw); + void (*set_status)(struct virtio_hw *hw, uint8_t status); + + uint64_t (*get_features)(struct virtio_hw *hw); + void (*set_features)(struct virtio_hw *hw, uint64_t features); + + uint8_t (*get_isr)(struct virtio_hw *hw); + + uint16_t (*set_config_irq)(struct virtio_hw *hw, uint16_t vec); + + uint16_t (*get_queue_num)(struct virtio_hw *hw, uint16_t queue_id); + int (*setup_queue)(struct virtio_hw *hw, struct virtqueue *vq); + void (*del_queue)(struct virtio_hw *hw, struct virtqueue *vq); + void (*notify_queue)(struct virtio_hw *hw, struct virtqueue *vq); +}; + +struct virtio_net_config; + +struct virtio_hw { + struct virtnet_ctl *cvq; + struct rte_pci_ioport io; + uint64_t guest_features; + uint32_t max_tx_queues; + uint32_t max_rx_queues; + uint16_t vtnet_hdr_size; + uint8_t vlan_strip; + uint8_t use_msix; + uint8_t started; + uint8_t modern; + uint8_t mac_addr[ETHER_ADDR_LEN]; + uint32_t notify_off_multiplier; + uint8_t *isr; + uint16_t *notify_base; + struct rte_pci_device *dev; + struct virtio_pci_common_cfg *common_cfg; + struct virtio_net_config *dev_cfg; + const struct virtio_pci_ops *vtpci_ops; + void *virtio_user_dev; +}; + +/* + * This structure is just a reference to read + * net device specific config space; it just a chodu structure + * + */ +struct virtio_net_config { + /* The config defining mac address (if VIRTIO_NET_F_MAC) */ + uint8_t mac[ETHER_ADDR_LEN]; + /* See VIRTIO_NET_F_STATUS and VIRTIO_NET_S_* above */ + uint16_t status; + uint16_t max_virtqueue_pairs; +} __attribute__((packed)); + +/* + * How many bits to shift physical queue address written to QUEUE_PFN. + * 12 is historical, and due to x86 page size. + */ +#define VIRTIO_PCI_QUEUE_ADDR_SHIFT 12 + +/* The alignment to use between consumer and producer parts of vring. */ +#define VIRTIO_PCI_VRING_ALIGN 4096 + +static inline int +vtpci_with_feature(struct virtio_hw *hw, uint64_t bit) +{ + return (hw->guest_features & (1ULL << bit)) != 0; +} + +/* + * Function declaration from virtio_pci.c + */ +int vtpci_init(struct rte_pci_device *, struct virtio_hw *, + uint32_t *dev_flags); +void vtpci_reset(struct virtio_hw *); + +void vtpci_reinit_complete(struct virtio_hw *); + +uint8_t vtpci_get_status(struct virtio_hw *); +void vtpci_set_status(struct virtio_hw *, uint8_t); + +uint64_t vtpci_negotiate_features(struct virtio_hw *, uint64_t); + +void vtpci_write_dev_config(struct virtio_hw *, size_t, const void *, int); + +void vtpci_read_dev_config(struct virtio_hw *, size_t, void *, int); + +uint8_t vtpci_isr(struct virtio_hw *); + +uint16_t vtpci_irq_config(struct virtio_hw *, uint16_t); + +#endif /* _VIRTIO_PCI_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_ring.h b/src/dpdk/drivers/net/virtio/virtio_ring.h new file mode 100644 index 00000000..fcecc161 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_ring.h @@ -0,0 +1,163 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_RING_H_ +#define _VIRTIO_RING_H_ + +#include <stdint.h> + +#include <rte_common.h> + +/* This marks a buffer as continuing via the next field. */ +#define VRING_DESC_F_NEXT 1 +/* This marks a buffer as write-only (otherwise read-only). */ +#define VRING_DESC_F_WRITE 2 +/* This means the buffer contains a list of buffer descriptors. */ +#define VRING_DESC_F_INDIRECT 4 + +/* The Host uses this in used->flags to advise the Guest: don't kick me + * when you add a buffer. It's unreliable, so it's simply an + * optimization. Guest will still kick if it's out of buffers. */ +#define VRING_USED_F_NO_NOTIFY 1 +/* The Guest uses this in avail->flags to advise the Host: don't + * interrupt me when you consume a buffer. It's unreliable, so it's + * simply an optimization. */ +#define VRING_AVAIL_F_NO_INTERRUPT 1 + +/* VirtIO ring descriptors: 16 bytes. + * These can chain together via "next". */ +struct vring_desc { + uint64_t addr; /* Address (guest-physical). */ + uint32_t len; /* Length. */ + uint16_t flags; /* The flags as indicated above. */ + uint16_t next; /* We chain unused descriptors via this. */ +}; + +struct vring_avail { + uint16_t flags; + uint16_t idx; + uint16_t ring[0]; +}; + +/* id is a 16bit index. uint32_t is used here for ids for padding reasons. */ +struct vring_used_elem { + /* Index of start of used descriptor chain. */ + uint32_t id; + /* Total length of the descriptor chain which was written to. */ + uint32_t len; +}; + +struct vring_used { + uint16_t flags; + volatile uint16_t idx; + struct vring_used_elem ring[0]; +}; + +struct vring { + unsigned int num; + struct vring_desc *desc; + struct vring_avail *avail; + struct vring_used *used; +}; + +/* The standard layout for the ring is a continuous chunk of memory which + * looks like this. We assume num is a power of 2. + * + * struct vring { + * // The actual descriptors (16 bytes each) + * struct vring_desc desc[num]; + * + * // A ring of available descriptor heads with free-running index. + * __u16 avail_flags; + * __u16 avail_idx; + * __u16 available[num]; + * __u16 used_event_idx; + * + * // Padding to the next align boundary. + * char pad[]; + * + * // A ring of used descriptor heads with free-running index. + * __u16 used_flags; + * __u16 used_idx; + * struct vring_used_elem used[num]; + * __u16 avail_event_idx; + * }; + * + * NOTE: for VirtIO PCI, align is 4096. + */ + +/* + * We publish the used event index at the end of the available ring, and vice + * versa. They are at the end for backwards compatibility. + */ +#define vring_used_event(vr) ((vr)->avail->ring[(vr)->num]) +#define vring_avail_event(vr) (*(uint16_t *)&(vr)->used->ring[(vr)->num]) + +static inline size_t +vring_size(unsigned int num, unsigned long align) +{ + size_t size; + + size = num * sizeof(struct vring_desc); + size += sizeof(struct vring_avail) + (num * sizeof(uint16_t)); + size = RTE_ALIGN_CEIL(size, align); + size += sizeof(struct vring_used) + + (num * sizeof(struct vring_used_elem)); + return size; +} + +static inline void +vring_init(struct vring *vr, unsigned int num, uint8_t *p, + unsigned long align) +{ + vr->num = num; + vr->desc = (struct vring_desc *) p; + vr->avail = (struct vring_avail *) (p + + num * sizeof(struct vring_desc)); + vr->used = (void *) + RTE_ALIGN_CEIL((uintptr_t)(&vr->avail->ring[num]), align); +} + +/* + * The following is used with VIRTIO_RING_F_EVENT_IDX. + * Assuming a given event_idx value from the other size, if we have + * just incremented index from old to new_idx, should we trigger an + * event? + */ +static inline int +vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) +{ + return (uint16_t)(new_idx - event_idx - 1) < (uint16_t)(new_idx - old); +} + +#endif /* _VIRTIO_RING_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx.c b/src/dpdk/drivers/net/virtio/virtio_rxtx.c new file mode 100644 index 00000000..724517e2 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx.c @@ -0,0 +1,982 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <rte_cycles.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_branch_prediction.h> +#include <rte_mempool.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_prefetch.h> +#include <rte_string_fns.h> +#include <rte_errno.h> +#include <rte_byteorder.h> + +#include "virtio_logs.h" +#include "virtio_ethdev.h" +#include "virtio_pci.h" +#include "virtqueue.h" +#include "virtio_rxtx.h" + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP +#define VIRTIO_DUMP_PACKET(m, len) rte_pktmbuf_dump(stdout, m, len) +#else +#define VIRTIO_DUMP_PACKET(m, len) do { } while (0) +#endif + + +#define VIRTIO_SIMPLE_FLAGS ((uint32_t)ETH_TXQ_FLAGS_NOMULTSEGS | \ + ETH_TXQ_FLAGS_NOOFFLOADS) + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 +static int use_simple_rxtx; +#endif + +static void +vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx) +{ + struct vring_desc *dp, *dp_tail; + struct vq_desc_extra *dxp; + uint16_t desc_idx_last = desc_idx; + + dp = &vq->vq_ring.desc[desc_idx]; + dxp = &vq->vq_descx[desc_idx]; + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt + dxp->ndescs); + if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) { + while (dp->flags & VRING_DESC_F_NEXT) { + desc_idx_last = dp->next; + dp = &vq->vq_ring.desc[dp->next]; + } + } + dxp->ndescs = 0; + + /* + * We must append the existing free chain, if any, to the end of + * newly freed chain. If the virtqueue was completely used, then + * head would be VQ_RING_DESC_CHAIN_END (ASSERTed above). + */ + if (vq->vq_desc_tail_idx == VQ_RING_DESC_CHAIN_END) { + vq->vq_desc_head_idx = desc_idx; + } else { + dp_tail = &vq->vq_ring.desc[vq->vq_desc_tail_idx]; + dp_tail->next = desc_idx; + } + + vq->vq_desc_tail_idx = desc_idx_last; + dp->next = VQ_RING_DESC_CHAIN_END; +} + +static uint16_t +virtqueue_dequeue_burst_rx(struct virtqueue *vq, struct rte_mbuf **rx_pkts, + uint32_t *len, uint16_t num) +{ + struct vring_used_elem *uep; + struct rte_mbuf *cookie; + uint16_t used_idx, desc_idx; + uint16_t i; + + /* Caller does the check */ + for (i = 0; i < num ; i++) { + used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + desc_idx = (uint16_t) uep->id; + len[i] = uep->len; + cookie = (struct rte_mbuf *)vq->vq_descx[desc_idx].cookie; + + if (unlikely(cookie == NULL)) { + PMD_DRV_LOG(ERR, "vring descriptor with no mbuf cookie at %u\n", + vq->vq_used_cons_idx); + break; + } + + rte_prefetch0(cookie); + rte_packet_prefetch(rte_pktmbuf_mtod(cookie, void *)); + rx_pkts[i] = cookie; + vq->vq_used_cons_idx++; + vq_ring_free_chain(vq, desc_idx); + vq->vq_descx[desc_idx].cookie = NULL; + } + + return i; +} + +#ifndef DEFAULT_TX_FREE_THRESH +#define DEFAULT_TX_FREE_THRESH 32 +#endif + +/* Cleanup from completed transmits. */ +static void +virtio_xmit_cleanup(struct virtqueue *vq, uint16_t num) +{ + uint16_t i, used_idx, desc_idx; + for (i = 0; i < num; i++) { + struct vring_used_elem *uep; + struct vq_desc_extra *dxp; + + used_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + uep = &vq->vq_ring.used->ring[used_idx]; + + desc_idx = (uint16_t) uep->id; + dxp = &vq->vq_descx[desc_idx]; + vq->vq_used_cons_idx++; + vq_ring_free_chain(vq, desc_idx); + + if (dxp->cookie != NULL) { + rte_pktmbuf_free(dxp->cookie); + dxp->cookie = NULL; + } + } +} + + +static inline int +virtqueue_enqueue_recv_refill(struct virtqueue *vq, struct rte_mbuf *cookie) +{ + struct vq_desc_extra *dxp; + struct virtio_hw *hw = vq->hw; + struct vring_desc *start_dp; + uint16_t needed = 1; + uint16_t head_idx, idx; + + if (unlikely(vq->vq_free_cnt == 0)) + return -ENOSPC; + if (unlikely(vq->vq_free_cnt < needed)) + return -EMSGSIZE; + + head_idx = vq->vq_desc_head_idx; + if (unlikely(head_idx >= vq->vq_nentries)) + return -EFAULT; + + idx = head_idx; + dxp = &vq->vq_descx[idx]; + dxp->cookie = (void *)cookie; + dxp->ndescs = needed; + + start_dp = vq->vq_ring.desc; + start_dp[idx].addr = + VIRTIO_MBUF_ADDR(cookie, vq) + + RTE_PKTMBUF_HEADROOM - hw->vtnet_hdr_size; + start_dp[idx].len = + cookie->buf_len - RTE_PKTMBUF_HEADROOM + hw->vtnet_hdr_size; + start_dp[idx].flags = VRING_DESC_F_WRITE; + idx = start_dp[idx].next; + vq->vq_desc_head_idx = idx; + if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) + vq->vq_desc_tail_idx = idx; + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); + vq_update_avail_ring(vq, head_idx); + + return 0; +} + +static inline void +virtqueue_enqueue_xmit(struct virtnet_tx *txvq, struct rte_mbuf *cookie, + uint16_t needed, int use_indirect, int can_push) +{ + struct vq_desc_extra *dxp; + struct virtqueue *vq = txvq->vq; + struct vring_desc *start_dp; + uint16_t seg_num = cookie->nb_segs; + uint16_t head_idx, idx; + uint16_t head_size = vq->hw->vtnet_hdr_size; + unsigned long offs; + + head_idx = vq->vq_desc_head_idx; + idx = head_idx; + dxp = &vq->vq_descx[idx]; + dxp->cookie = (void *)cookie; + dxp->ndescs = needed; + + start_dp = vq->vq_ring.desc; + + if (can_push) { + /* put on zero'd transmit header (no offloads) */ + void *hdr = rte_pktmbuf_prepend(cookie, head_size); + + memset(hdr, 0, head_size); + } else if (use_indirect) { + /* setup tx ring slot to point to indirect + * descriptor list stored in reserved region. + * + * the first slot in indirect ring is already preset + * to point to the header in reserved region + */ + struct virtio_tx_region *txr = txvq->virtio_net_hdr_mz->addr; + + offs = idx * sizeof(struct virtio_tx_region) + + offsetof(struct virtio_tx_region, tx_indir); + + start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].len = (seg_num + 1) * sizeof(struct vring_desc); + start_dp[idx].flags = VRING_DESC_F_INDIRECT; + + /* loop below will fill in rest of the indirect elements */ + start_dp = txr[idx].tx_indir; + idx = 1; + } else { + /* setup first tx ring slot to point to header + * stored in reserved region. + */ + offs = idx * sizeof(struct virtio_tx_region) + + offsetof(struct virtio_tx_region, tx_hdr); + + start_dp[idx].addr = txvq->virtio_net_hdr_mem + offs; + start_dp[idx].len = vq->hw->vtnet_hdr_size; + start_dp[idx].flags = VRING_DESC_F_NEXT; + idx = start_dp[idx].next; + } + + do { + start_dp[idx].addr = VIRTIO_MBUF_DATA_DMA_ADDR(cookie, vq); + start_dp[idx].len = cookie->data_len; + start_dp[idx].flags = cookie->next ? VRING_DESC_F_NEXT : 0; + idx = start_dp[idx].next; + } while ((cookie = cookie->next) != NULL); + + if (use_indirect) + idx = vq->vq_ring.desc[head_idx].next; + + vq->vq_desc_head_idx = idx; + if (vq->vq_desc_head_idx == VQ_RING_DESC_CHAIN_END) + vq->vq_desc_tail_idx = idx; + vq->vq_free_cnt = (uint16_t)(vq->vq_free_cnt - needed); + vq_update_avail_ring(vq, head_idx); +} + +static void +virtio_dev_vring_start(struct virtqueue *vq) +{ + int size = vq->vq_nentries; + struct vring *vr = &vq->vq_ring; + uint8_t *ring_mem = vq->vq_ring_virt_mem; + + PMD_INIT_FUNC_TRACE(); + + /* + * Reinitialise since virtio port might have been stopped and restarted + */ + memset(vq->vq_ring_virt_mem, 0, vq->vq_ring_size); + vring_init(vr, size, ring_mem, VIRTIO_PCI_VRING_ALIGN); + vq->vq_used_cons_idx = 0; + vq->vq_desc_head_idx = 0; + vq->vq_avail_idx = 0; + vq->vq_desc_tail_idx = (uint16_t)(vq->vq_nentries - 1); + vq->vq_free_cnt = vq->vq_nentries; + memset(vq->vq_descx, 0, sizeof(struct vq_desc_extra) * vq->vq_nentries); + + vring_desc_init(vr->desc, size); + + /* + * Disable device(host) interrupting guest + */ + virtqueue_disable_intr(vq); +} + +void +virtio_dev_cq_start(struct rte_eth_dev *dev) +{ + struct virtio_hw *hw = dev->data->dev_private; + + if (hw->cvq && hw->cvq->vq) { + virtio_dev_vring_start(hw->cvq->vq); + VIRTQUEUE_DUMP((struct virtqueue *)hw->cvq->vq); + } +} + +void +virtio_dev_rxtx_start(struct rte_eth_dev *dev) +{ + /* + * Start receive and transmit vrings + * - Setup vring structure for all queues + * - Initialize descriptor for the rx vring + * - Allocate blank mbufs for the each rx descriptor + * + */ + uint16_t i; + uint16_t desc_idx; + + PMD_INIT_FUNC_TRACE(); + + /* Start rx vring. */ + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct virtnet_rx *rxvq = dev->data->rx_queues[i]; + struct virtqueue *vq = rxvq->vq; + int error, nbufs; + struct rte_mbuf *m; + + virtio_dev_vring_start(vq); + if (rxvq->mpool == NULL) { + rte_exit(EXIT_FAILURE, + "Cannot allocate mbufs for rx virtqueue"); + } + + /* Allocate blank mbufs for the each rx descriptor */ + nbufs = 0; + error = ENOSPC; + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + if (use_simple_rxtx) { + for (desc_idx = 0; desc_idx < vq->vq_nentries; + desc_idx++) { + vq->vq_ring.avail->ring[desc_idx] = desc_idx; + vq->vq_ring.desc[desc_idx].flags = + VRING_DESC_F_WRITE; + } + } +#endif + memset(&rxvq->fake_mbuf, 0, sizeof(rxvq->fake_mbuf)); + for (desc_idx = 0; desc_idx < RTE_PMD_VIRTIO_RX_MAX_BURST; + desc_idx++) { + vq->sw_ring[vq->vq_nentries + desc_idx] = + &rxvq->fake_mbuf; + } + + while (!virtqueue_full(vq)) { + m = rte_mbuf_raw_alloc(rxvq->mpool); + if (m == NULL) + break; + + /****************************************** + * Enqueue allocated buffers * + *******************************************/ +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + if (use_simple_rxtx) + error = virtqueue_enqueue_recv_refill_simple(vq, m); + else +#endif + error = virtqueue_enqueue_recv_refill(vq, m); + if (error) { + rte_pktmbuf_free(m); + break; + } + nbufs++; + } + + vq_update_avail_idx(vq); + + PMD_INIT_LOG(DEBUG, "Allocated %d bufs", nbufs); + + VIRTQUEUE_DUMP(vq); + } + + /* Start tx vring. */ + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct virtnet_tx *txvq = dev->data->tx_queues[i]; + struct virtqueue *vq = txvq->vq; + + virtio_dev_vring_start(vq); +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + if (use_simple_rxtx) { + uint16_t mid_idx = vq->vq_nentries >> 1; + + for (desc_idx = 0; desc_idx < mid_idx; desc_idx++) { + vq->vq_ring.avail->ring[desc_idx] = + desc_idx + mid_idx; + vq->vq_ring.desc[desc_idx + mid_idx].next = + desc_idx; + vq->vq_ring.desc[desc_idx + mid_idx].addr = + txvq->virtio_net_hdr_mem + + offsetof(struct virtio_tx_region, tx_hdr); + vq->vq_ring.desc[desc_idx + mid_idx].len = + vq->hw->vtnet_hdr_size; + vq->vq_ring.desc[desc_idx + mid_idx].flags = + VRING_DESC_F_NEXT; + vq->vq_ring.desc[desc_idx].flags = 0; + } + for (desc_idx = mid_idx; desc_idx < vq->vq_nentries; + desc_idx++) + vq->vq_ring.avail->ring[desc_idx] = desc_idx; + } +#endif + VIRTQUEUE_DUMP(vq); + } +} + +int +virtio_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + __rte_unused const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + uint16_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_RQ_QUEUE_IDX; + struct virtnet_rx *rxvq; + int ret; + + PMD_INIT_FUNC_TRACE(); + ret = virtio_dev_queue_setup(dev, VTNET_RQ, queue_idx, vtpci_queue_idx, + nb_desc, socket_id, (void **)&rxvq); + if (ret < 0) { + PMD_INIT_LOG(ERR, "rvq initialization failed"); + return ret; + } + + /* Create mempool for rx mbuf allocation */ + rxvq->mpool = mp; + + dev->data->rx_queues[queue_idx] = rxvq; + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + virtio_rxq_vec_setup(rxvq); +#endif + + return 0; +} + +void +virtio_dev_rx_queue_release(void *rxq) +{ + struct virtnet_rx *rxvq = rxq; + struct virtqueue *vq; + const struct rte_memzone *mz; + + if (rxvq == NULL) + return; + + /* + * rxvq is freed when vq is freed, and as mz should be freed after the + * del_queue, so we reserve the mz pointer first. + */ + vq = rxvq->vq; + mz = rxvq->mz; + + virtio_dev_queue_release(vq); + rte_memzone_free(mz); +} + +/* + * struct rte_eth_dev *dev: Used to update dev + * uint16_t nb_desc: Defaults to values read from config space + * unsigned int socket_id: Used to allocate memzone + * const struct rte_eth_txconf *tx_conf: Used to setup tx engine + * uint16_t queue_idx: Just used as an index in dev txq list + */ +int +virtio_dev_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + uint8_t vtpci_queue_idx = 2 * queue_idx + VTNET_SQ_TQ_QUEUE_IDX; + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + struct virtio_hw *hw = dev->data->dev_private; +#endif + struct virtnet_tx *txvq; + struct virtqueue *vq; + uint16_t tx_free_thresh; + int ret; + + PMD_INIT_FUNC_TRACE(); + + if ((tx_conf->txq_flags & ETH_TXQ_FLAGS_NOXSUMS) + != ETH_TXQ_FLAGS_NOXSUMS) { + PMD_INIT_LOG(ERR, "TX checksum offload not supported\n"); + return -EINVAL; + } + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 + /* Use simple rx/tx func if single segment and no offloads */ + if ((tx_conf->txq_flags & VIRTIO_SIMPLE_FLAGS) == VIRTIO_SIMPLE_FLAGS && + !vtpci_with_feature(hw, VIRTIO_NET_F_MRG_RXBUF)) { + PMD_INIT_LOG(INFO, "Using simple rx/tx path"); + dev->tx_pkt_burst = virtio_xmit_pkts_simple; + dev->rx_pkt_burst = virtio_recv_pkts_vec; + use_simple_rxtx = 1; + } +#endif + + ret = virtio_dev_queue_setup(dev, VTNET_TQ, queue_idx, vtpci_queue_idx, + nb_desc, socket_id, (void **)&txvq); + if (ret < 0) { + PMD_INIT_LOG(ERR, "tvq initialization failed"); + return ret; + } + vq = txvq->vq; + + tx_free_thresh = tx_conf->tx_free_thresh; + if (tx_free_thresh == 0) + tx_free_thresh = + RTE_MIN(vq->vq_nentries / 4, DEFAULT_TX_FREE_THRESH); + + if (tx_free_thresh >= (vq->vq_nentries - 3)) { + RTE_LOG(ERR, PMD, "tx_free_thresh must be less than the " + "number of TX entries minus 3 (%u)." + " (tx_free_thresh=%u port=%u queue=%u)\n", + vq->vq_nentries - 3, + tx_free_thresh, dev->data->port_id, queue_idx); + return -EINVAL; + } + + vq->vq_free_thresh = tx_free_thresh; + + dev->data->tx_queues[queue_idx] = txvq; + return 0; +} + +void +virtio_dev_tx_queue_release(void *txq) +{ + struct virtnet_tx *txvq = txq; + struct virtqueue *vq; + const struct rte_memzone *mz; + const struct rte_memzone *hdr_mz; + + if (txvq == NULL) + return; + + /* + * txvq is freed when vq is freed, and as mz should be freed after the + * del_queue, so we reserve the mz pointer first. + */ + vq = txvq->vq; + mz = txvq->mz; + hdr_mz = txvq->virtio_net_hdr_mz; + + virtio_dev_queue_release(vq); + rte_memzone_free(mz); + rte_memzone_free(hdr_mz); +} + +static void +virtio_discard_rxbuf(struct virtqueue *vq, struct rte_mbuf *m) +{ + int error; + /* + * Requeue the discarded mbuf. This should always be + * successful since it was just dequeued. + */ + error = virtqueue_enqueue_recv_refill(vq, m); + if (unlikely(error)) { + RTE_LOG(ERR, PMD, "cannot requeue discarded mbuf"); + rte_pktmbuf_free(m); + } +} + +static void +virtio_update_packet_stats(struct virtnet_stats *stats, struct rte_mbuf *mbuf) +{ + uint32_t s = mbuf->pkt_len; + struct ether_addr *ea; + + if (s == 64) { + stats->size_bins[1]++; + } else if (s > 64 && s < 1024) { + uint32_t bin; + + /* count zeros, and offset into correct bin */ + bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; + stats->size_bins[bin]++; + } else { + if (s < 64) + stats->size_bins[0]++; + else if (s < 1519) + stats->size_bins[6]++; + else if (s >= 1519) + stats->size_bins[7]++; + } + + ea = rte_pktmbuf_mtod(mbuf, struct ether_addr *); + if (is_multicast_ether_addr(ea)) { + if (is_broadcast_ether_addr(ea)) + stats->broadcast++; + else + stats->multicast++; + } +} + +#define VIRTIO_MBUF_BURST_SZ 64 +#define DESC_PER_CACHELINE (RTE_CACHE_LINE_SIZE / sizeof(struct vring_desc)) +uint16_t +virtio_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; + struct virtio_hw *hw; + struct rte_mbuf *rxm, *new_mbuf; + uint16_t nb_used, num, nb_rx; + uint32_t len[VIRTIO_MBUF_BURST_SZ]; + struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; + int error; + uint32_t i, nb_enqueued; + uint32_t hdr_size; + + nb_used = VIRTQUEUE_NUSED(vq); + + virtio_rmb(); + + num = (uint16_t)(likely(nb_used <= nb_pkts) ? nb_used : nb_pkts); + num = (uint16_t)(likely(num <= VIRTIO_MBUF_BURST_SZ) ? num : VIRTIO_MBUF_BURST_SZ); + if (likely(num > DESC_PER_CACHELINE)) + num = num - ((vq->vq_used_cons_idx + num) % DESC_PER_CACHELINE); + + num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, num); + PMD_RX_LOG(DEBUG, "used:%d dequeue:%d", nb_used, num); + + hw = vq->hw; + nb_rx = 0; + nb_enqueued = 0; + hdr_size = hw->vtnet_hdr_size; + + for (i = 0; i < num ; i++) { + rxm = rcv_pkts[i]; + + PMD_RX_LOG(DEBUG, "packet len:%d", len[i]); + + if (unlikely(len[i] < hdr_size + ETHER_HDR_LEN)) { + PMD_RX_LOG(ERR, "Packet drop"); + nb_enqueued++; + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; + continue; + } + + rxm->port = rxvq->port_id; + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rxm->ol_flags = 0; + rxm->vlan_tci = 0; + + rxm->nb_segs = 1; + rxm->next = NULL; + rxm->pkt_len = (uint32_t)(len[i] - hdr_size); + rxm->data_len = (uint16_t)(len[i] - hdr_size); + + if (hw->vlan_strip) + rte_vlan_strip(rxm); + + VIRTIO_DUMP_PACKET(rxm, rxm->data_len); + + rx_pkts[nb_rx++] = rxm; + + rxvq->stats.bytes += rx_pkts[nb_rx - 1]->pkt_len; + virtio_update_packet_stats(&rxvq->stats, rxm); + } + + rxvq->stats.packets += nb_rx; + + /* Allocate new mbuf for the used descriptor */ + error = ENOSPC; + while (likely(!virtqueue_full(vq))) { + new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); + if (unlikely(new_mbuf == NULL)) { + struct rte_eth_dev *dev + = &rte_eth_devices[rxvq->port_id]; + dev->data->rx_mbuf_alloc_failed++; + break; + } + error = virtqueue_enqueue_recv_refill(vq, new_mbuf); + if (unlikely(error)) { + rte_pktmbuf_free(new_mbuf); + break; + } + nb_enqueued++; + } + + if (likely(nb_enqueued)) { + vq_update_avail_idx(vq); + + if (unlikely(virtqueue_kick_prepare(vq))) { + virtqueue_notify(vq); + PMD_RX_LOG(DEBUG, "Notified"); + } + } + + return nb_rx; +} + +uint16_t +virtio_recv_mergeable_pkts(void *rx_queue, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; + struct virtio_hw *hw; + struct rte_mbuf *rxm, *new_mbuf; + uint16_t nb_used, num, nb_rx; + uint32_t len[VIRTIO_MBUF_BURST_SZ]; + struct rte_mbuf *rcv_pkts[VIRTIO_MBUF_BURST_SZ]; + struct rte_mbuf *prev; + int error; + uint32_t i, nb_enqueued; + uint32_t seg_num; + uint16_t extra_idx; + uint32_t seg_res; + uint32_t hdr_size; + + nb_used = VIRTQUEUE_NUSED(vq); + + virtio_rmb(); + + PMD_RX_LOG(DEBUG, "used:%d", nb_used); + + hw = vq->hw; + nb_rx = 0; + i = 0; + nb_enqueued = 0; + seg_num = 0; + extra_idx = 0; + seg_res = 0; + hdr_size = hw->vtnet_hdr_size; + + while (i < nb_used) { + struct virtio_net_hdr_mrg_rxbuf *header; + + if (nb_rx == nb_pkts) + break; + + num = virtqueue_dequeue_burst_rx(vq, rcv_pkts, len, 1); + if (num != 1) + continue; + + i++; + + PMD_RX_LOG(DEBUG, "dequeue:%d", num); + PMD_RX_LOG(DEBUG, "packet len:%d", len[0]); + + rxm = rcv_pkts[0]; + + if (unlikely(len[0] < hdr_size + ETHER_HDR_LEN)) { + PMD_RX_LOG(ERR, "Packet drop"); + nb_enqueued++; + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; + continue; + } + + header = (struct virtio_net_hdr_mrg_rxbuf *)((char *)rxm->buf_addr + + RTE_PKTMBUF_HEADROOM - hdr_size); + seg_num = header->num_buffers; + + if (seg_num == 0) + seg_num = 1; + + rxm->data_off = RTE_PKTMBUF_HEADROOM; + rxm->nb_segs = seg_num; + rxm->next = NULL; + rxm->ol_flags = 0; + rxm->vlan_tci = 0; + rxm->pkt_len = (uint32_t)(len[0] - hdr_size); + rxm->data_len = (uint16_t)(len[0] - hdr_size); + + rxm->port = rxvq->port_id; + rx_pkts[nb_rx] = rxm; + prev = rxm; + + seg_res = seg_num - 1; + + while (seg_res != 0) { + /* + * Get extra segments for current uncompleted packet. + */ + uint16_t rcv_cnt = + RTE_MIN(seg_res, RTE_DIM(rcv_pkts)); + if (likely(VIRTQUEUE_NUSED(vq) >= rcv_cnt)) { + uint32_t rx_num = + virtqueue_dequeue_burst_rx(vq, + rcv_pkts, len, rcv_cnt); + i += rx_num; + rcv_cnt = rx_num; + } else { + PMD_RX_LOG(ERR, + "No enough segments for packet."); + nb_enqueued++; + virtio_discard_rxbuf(vq, rxm); + rxvq->stats.errors++; + break; + } + + extra_idx = 0; + + while (extra_idx < rcv_cnt) { + rxm = rcv_pkts[extra_idx]; + + rxm->data_off = RTE_PKTMBUF_HEADROOM - hdr_size; + rxm->next = NULL; + rxm->pkt_len = (uint32_t)(len[extra_idx]); + rxm->data_len = (uint16_t)(len[extra_idx]); + + if (prev) + prev->next = rxm; + + prev = rxm; + rx_pkts[nb_rx]->pkt_len += rxm->pkt_len; + extra_idx++; + }; + seg_res -= rcv_cnt; + } + + if (hw->vlan_strip) + rte_vlan_strip(rx_pkts[nb_rx]); + + VIRTIO_DUMP_PACKET(rx_pkts[nb_rx], + rx_pkts[nb_rx]->data_len); + + rxvq->stats.bytes += rx_pkts[nb_rx]->pkt_len; + virtio_update_packet_stats(&rxvq->stats, rx_pkts[nb_rx]); + nb_rx++; + } + + rxvq->stats.packets += nb_rx; + + /* Allocate new mbuf for the used descriptor */ + error = ENOSPC; + while (likely(!virtqueue_full(vq))) { + new_mbuf = rte_mbuf_raw_alloc(rxvq->mpool); + if (unlikely(new_mbuf == NULL)) { + struct rte_eth_dev *dev + = &rte_eth_devices[rxvq->port_id]; + dev->data->rx_mbuf_alloc_failed++; + break; + } + error = virtqueue_enqueue_recv_refill(vq, new_mbuf); + if (unlikely(error)) { + rte_pktmbuf_free(new_mbuf); + break; + } + nb_enqueued++; + } + + if (likely(nb_enqueued)) { + vq_update_avail_idx(vq); + + if (unlikely(virtqueue_kick_prepare(vq))) { + virtqueue_notify(vq); + PMD_RX_LOG(DEBUG, "Notified"); + } + } + + return nb_rx; +} + +uint16_t +virtio_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct virtnet_tx *txvq = tx_queue; + struct virtqueue *vq = txvq->vq; + struct virtio_hw *hw = vq->hw; + uint16_t hdr_size = hw->vtnet_hdr_size; + uint16_t nb_used, nb_tx; + int error; + + if (unlikely(nb_pkts < 1)) + return nb_pkts; + + PMD_TX_LOG(DEBUG, "%d packets to xmit", nb_pkts); + nb_used = VIRTQUEUE_NUSED(vq); + + virtio_rmb(); + if (likely(nb_used > vq->vq_nentries - vq->vq_free_thresh)) + virtio_xmit_cleanup(vq, nb_used); + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + struct rte_mbuf *txm = tx_pkts[nb_tx]; + int can_push = 0, use_indirect = 0, slots, need; + + /* Do VLAN tag insertion */ + if (unlikely(txm->ol_flags & PKT_TX_VLAN_PKT)) { + error = rte_vlan_insert(&txm); + if (unlikely(error)) { + rte_pktmbuf_free(txm); + continue; + } + } + + /* optimize ring usage */ + if (vtpci_with_feature(hw, VIRTIO_F_ANY_LAYOUT) && + rte_mbuf_refcnt_read(txm) == 1 && + RTE_MBUF_DIRECT(txm) && + txm->nb_segs == 1 && + rte_pktmbuf_headroom(txm) >= hdr_size && + rte_is_aligned(rte_pktmbuf_mtod(txm, char *), + __alignof__(struct virtio_net_hdr_mrg_rxbuf))) + can_push = 1; + else if (vtpci_with_feature(hw, VIRTIO_RING_F_INDIRECT_DESC) && + txm->nb_segs < VIRTIO_MAX_TX_INDIRECT) + use_indirect = 1; + + /* How many main ring entries are needed to this Tx? + * any_layout => number of segments + * indirect => 1 + * default => number of segments + 1 + */ + slots = use_indirect ? 1 : (txm->nb_segs + !can_push); + need = slots - vq->vq_free_cnt; + + /* Positive value indicates it need free vring descriptors */ + if (unlikely(need > 0)) { + nb_used = VIRTQUEUE_NUSED(vq); + virtio_rmb(); + need = RTE_MIN(need, (int)nb_used); + + virtio_xmit_cleanup(vq, need); + need = slots - vq->vq_free_cnt; + if (unlikely(need > 0)) { + PMD_TX_LOG(ERR, + "No free tx descriptors to transmit"); + break; + } + } + + /* Enqueue Packet buffers */ + virtqueue_enqueue_xmit(txvq, txm, slots, use_indirect, can_push); + + txvq->stats.bytes += txm->pkt_len; + virtio_update_packet_stats(&txvq->stats, txm); + } + + txvq->stats.packets += nb_tx; + + if (likely(nb_tx)) { + vq_update_avail_idx(vq); + + if (unlikely(virtqueue_kick_prepare(vq))) { + virtqueue_notify(vq); + PMD_TX_LOG(DEBUG, "Notified backend after xmit"); + } + } + + return nb_tx; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx.h b/src/dpdk/drivers/net/virtio/virtio_rxtx.h new file mode 100644 index 00000000..058b56a1 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx.h @@ -0,0 +1,95 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_RXTX_H_ +#define _VIRTIO_RXTX_H_ + +#define RTE_PMD_VIRTIO_RX_MAX_BURST 64 + +struct virtnet_stats { + uint64_t packets; + uint64_t bytes; + uint64_t errors; + uint64_t multicast; + uint64_t broadcast; + /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */ + uint64_t size_bins[8]; +}; + +struct virtnet_rx { + struct virtqueue *vq; + /* dummy mbuf, for wraparound when processing RX ring. */ + struct rte_mbuf fake_mbuf; + uint64_t mbuf_initializer; /**< value to init mbufs. */ + struct rte_mempool *mpool; /**< mempool for mbuf allocation */ + + uint16_t queue_id; /**< DPDK queue index. */ + uint8_t port_id; /**< Device port identifier. */ + + /* Statistics */ + struct virtnet_stats stats; + + const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ +}; + +struct virtnet_tx { + struct virtqueue *vq; + /**< memzone to populate hdr. */ + const struct rte_memzone *virtio_net_hdr_mz; + phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */ + + uint16_t queue_id; /**< DPDK queue index. */ + uint8_t port_id; /**< Device port identifier. */ + + /* Statistics */ + struct virtnet_stats stats; + + const struct rte_memzone *mz; /**< mem zone to populate TX ring. */ +}; + +struct virtnet_ctl { + struct virtqueue *vq; + /**< memzone to populate hdr. */ + const struct rte_memzone *virtio_net_hdr_mz; + phys_addr_t virtio_net_hdr_mem; /**< hdr for each xmit packet */ + uint8_t port_id; /**< Device port identifier. */ + const struct rte_memzone *mz; /**< mem zone to populate RX ring. */ +}; + +#ifdef RTE_MACHINE_CPUFLAG_SSSE3 +int virtio_rxq_vec_setup(struct virtnet_rx *rxvq); + +int virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, + struct rte_mbuf *m); +#endif +#endif /* _VIRTIO_RXTX_H_ */ diff --git a/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c new file mode 100644 index 00000000..6517aa80 --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_rxtx_simple.c @@ -0,0 +1,425 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> + +#include <tmmintrin.h> + +#include <rte_cycles.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_branch_prediction.h> +#include <rte_mempool.h> +#include <rte_malloc.h> +#include <rte_mbuf.h> +#include <rte_ether.h> +#include <rte_ethdev.h> +#include <rte_prefetch.h> +#include <rte_string_fns.h> +#include <rte_errno.h> +#include <rte_byteorder.h> + +#include "virtio_logs.h" +#include "virtio_ethdev.h" +#include "virtqueue.h" +#include "virtio_rxtx.h" + +#define RTE_VIRTIO_VPMD_RX_BURST 32 +#define RTE_VIRTIO_DESC_PER_LOOP 8 +#define RTE_VIRTIO_VPMD_RX_REARM_THRESH RTE_VIRTIO_VPMD_RX_BURST + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif + +int __attribute__((cold)) +virtqueue_enqueue_recv_refill_simple(struct virtqueue *vq, + struct rte_mbuf *cookie) +{ + struct vq_desc_extra *dxp; + struct vring_desc *start_dp; + uint16_t desc_idx; + + desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1); + dxp = &vq->vq_descx[desc_idx]; + dxp->cookie = (void *)cookie; + vq->sw_ring[desc_idx] = cookie; + + start_dp = vq->vq_ring.desc; + start_dp[desc_idx].addr = + VIRTIO_MBUF_ADDR(cookie, vq) + + RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size; + start_dp[desc_idx].len = cookie->buf_len - + RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size; + + vq->vq_free_cnt--; + vq->vq_avail_idx++; + + return 0; +} + +static inline void +virtio_rxq_rearm_vec(struct virtnet_rx *rxvq) +{ + int i; + uint16_t desc_idx; + struct rte_mbuf **sw_ring; + struct vring_desc *start_dp; + int ret; + struct virtqueue *vq = rxvq->vq; + + desc_idx = vq->vq_avail_idx & (vq->vq_nentries - 1); + sw_ring = &vq->sw_ring[desc_idx]; + start_dp = &vq->vq_ring.desc[desc_idx]; + + ret = rte_mempool_get_bulk(rxvq->mpool, (void **)sw_ring, + RTE_VIRTIO_VPMD_RX_REARM_THRESH); + if (unlikely(ret)) { + rte_eth_devices[rxvq->port_id].data->rx_mbuf_alloc_failed += + RTE_VIRTIO_VPMD_RX_REARM_THRESH; + return; + } + + for (i = 0; i < RTE_VIRTIO_VPMD_RX_REARM_THRESH; i++) { + uintptr_t p; + + p = (uintptr_t)&sw_ring[i]->rearm_data; + *(uint64_t *)p = rxvq->mbuf_initializer; + + start_dp[i].addr = + VIRTIO_MBUF_ADDR(sw_ring[i], vq) + + RTE_PKTMBUF_HEADROOM - vq->hw->vtnet_hdr_size; + start_dp[i].len = sw_ring[i]->buf_len - + RTE_PKTMBUF_HEADROOM + vq->hw->vtnet_hdr_size; + } + + vq->vq_avail_idx += RTE_VIRTIO_VPMD_RX_REARM_THRESH; + vq->vq_free_cnt -= RTE_VIRTIO_VPMD_RX_REARM_THRESH; + vq_update_avail_idx(vq); +} + +/* virtio vPMD receive routine, only accept(nb_pkts >= RTE_VIRTIO_DESC_PER_LOOP) + * + * This routine is for non-mergeable RX, one desc for each guest buffer. + * This routine is based on the RX ring layout optimization. Each entry in the + * avail ring points to the desc with the same index in the desc ring and this + * will never be changed in the driver. + * + * - nb_pkts < RTE_VIRTIO_DESC_PER_LOOP, just return no packet + */ +uint16_t +virtio_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts) +{ + struct virtnet_rx *rxvq = rx_queue; + struct virtqueue *vq = rxvq->vq; + uint16_t nb_used; + uint16_t desc_idx; + struct vring_used_elem *rused; + struct rte_mbuf **sw_ring; + struct rte_mbuf **sw_ring_end; + uint16_t nb_pkts_received; + __m128i shuf_msk1, shuf_msk2, len_adjust; + + shuf_msk1 = _mm_set_epi8( + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, /* vlan tci */ + 5, 4, /* dat len */ + 0xFF, 0xFF, 5, 4, /* pkt len */ + 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ + + ); + + shuf_msk2 = _mm_set_epi8( + 0xFF, 0xFF, 0xFF, 0xFF, + 0xFF, 0xFF, /* vlan tci */ + 13, 12, /* dat len */ + 0xFF, 0xFF, 13, 12, /* pkt len */ + 0xFF, 0xFF, 0xFF, 0xFF /* packet type */ + ); + + /* Subtract the header length. + * In which case do we need the header length in used->len ? + */ + len_adjust = _mm_set_epi16( + 0, 0, + 0, + (uint16_t)-vq->hw->vtnet_hdr_size, + 0, (uint16_t)-vq->hw->vtnet_hdr_size, + 0, 0); + + if (unlikely(nb_pkts < RTE_VIRTIO_DESC_PER_LOOP)) + return 0; + + nb_used = VIRTQUEUE_NUSED(vq); + + rte_compiler_barrier(); + + if (unlikely(nb_used == 0)) + return 0; + + nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, RTE_VIRTIO_DESC_PER_LOOP); + nb_used = RTE_MIN(nb_used, nb_pkts); + + desc_idx = (uint16_t)(vq->vq_used_cons_idx & (vq->vq_nentries - 1)); + rused = &vq->vq_ring.used->ring[desc_idx]; + sw_ring = &vq->sw_ring[desc_idx]; + sw_ring_end = &vq->sw_ring[vq->vq_nentries]; + + _mm_prefetch((const void *)rused, _MM_HINT_T0); + + if (vq->vq_free_cnt >= RTE_VIRTIO_VPMD_RX_REARM_THRESH) { + virtio_rxq_rearm_vec(rxvq); + if (unlikely(virtqueue_kick_prepare(vq))) + virtqueue_notify(vq); + } + + for (nb_pkts_received = 0; + nb_pkts_received < nb_used;) { + __m128i desc[RTE_VIRTIO_DESC_PER_LOOP / 2]; + __m128i mbp[RTE_VIRTIO_DESC_PER_LOOP / 2]; + __m128i pkt_mb[RTE_VIRTIO_DESC_PER_LOOP]; + + mbp[0] = _mm_loadu_si128((__m128i *)(sw_ring + 0)); + desc[0] = _mm_loadu_si128((__m128i *)(rused + 0)); + _mm_storeu_si128((__m128i *)&rx_pkts[0], mbp[0]); + + mbp[1] = _mm_loadu_si128((__m128i *)(sw_ring + 2)); + desc[1] = _mm_loadu_si128((__m128i *)(rused + 2)); + _mm_storeu_si128((__m128i *)&rx_pkts[2], mbp[1]); + + mbp[2] = _mm_loadu_si128((__m128i *)(sw_ring + 4)); + desc[2] = _mm_loadu_si128((__m128i *)(rused + 4)); + _mm_storeu_si128((__m128i *)&rx_pkts[4], mbp[2]); + + mbp[3] = _mm_loadu_si128((__m128i *)(sw_ring + 6)); + desc[3] = _mm_loadu_si128((__m128i *)(rused + 6)); + _mm_storeu_si128((__m128i *)&rx_pkts[6], mbp[3]); + + pkt_mb[1] = _mm_shuffle_epi8(desc[0], shuf_msk2); + pkt_mb[0] = _mm_shuffle_epi8(desc[0], shuf_msk1); + pkt_mb[1] = _mm_add_epi16(pkt_mb[1], len_adjust); + pkt_mb[0] = _mm_add_epi16(pkt_mb[0], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[1]->rx_descriptor_fields1, + pkt_mb[1]); + _mm_storeu_si128((void *)&rx_pkts[0]->rx_descriptor_fields1, + pkt_mb[0]); + + pkt_mb[3] = _mm_shuffle_epi8(desc[1], shuf_msk2); + pkt_mb[2] = _mm_shuffle_epi8(desc[1], shuf_msk1); + pkt_mb[3] = _mm_add_epi16(pkt_mb[3], len_adjust); + pkt_mb[2] = _mm_add_epi16(pkt_mb[2], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[3]->rx_descriptor_fields1, + pkt_mb[3]); + _mm_storeu_si128((void *)&rx_pkts[2]->rx_descriptor_fields1, + pkt_mb[2]); + + pkt_mb[5] = _mm_shuffle_epi8(desc[2], shuf_msk2); + pkt_mb[4] = _mm_shuffle_epi8(desc[2], shuf_msk1); + pkt_mb[5] = _mm_add_epi16(pkt_mb[5], len_adjust); + pkt_mb[4] = _mm_add_epi16(pkt_mb[4], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[5]->rx_descriptor_fields1, + pkt_mb[5]); + _mm_storeu_si128((void *)&rx_pkts[4]->rx_descriptor_fields1, + pkt_mb[4]); + + pkt_mb[7] = _mm_shuffle_epi8(desc[3], shuf_msk2); + pkt_mb[6] = _mm_shuffle_epi8(desc[3], shuf_msk1); + pkt_mb[7] = _mm_add_epi16(pkt_mb[7], len_adjust); + pkt_mb[6] = _mm_add_epi16(pkt_mb[6], len_adjust); + _mm_storeu_si128((void *)&rx_pkts[7]->rx_descriptor_fields1, + pkt_mb[7]); + _mm_storeu_si128((void *)&rx_pkts[6]->rx_descriptor_fields1, + pkt_mb[6]); + + if (unlikely(nb_used <= RTE_VIRTIO_DESC_PER_LOOP)) { + if (sw_ring + nb_used <= sw_ring_end) + nb_pkts_received += nb_used; + else + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + if (unlikely(sw_ring + RTE_VIRTIO_DESC_PER_LOOP >= + sw_ring_end)) { + nb_pkts_received += sw_ring_end - sw_ring; + break; + } else { + nb_pkts_received += RTE_VIRTIO_DESC_PER_LOOP; + + rx_pkts += RTE_VIRTIO_DESC_PER_LOOP; + sw_ring += RTE_VIRTIO_DESC_PER_LOOP; + rused += RTE_VIRTIO_DESC_PER_LOOP; + nb_used -= RTE_VIRTIO_DESC_PER_LOOP; + } + } + } + + vq->vq_used_cons_idx += nb_pkts_received; + vq->vq_free_cnt += nb_pkts_received; + rxvq->stats.packets += nb_pkts_received; + return nb_pkts_received; +} + +#define VIRTIO_TX_FREE_THRESH 32 +#define VIRTIO_TX_MAX_FREE_BUF_SZ 32 +#define VIRTIO_TX_FREE_NR 32 +/* TODO: vq->tx_free_cnt could mean num of free slots so we could avoid shift */ +static inline void +virtio_xmit_cleanup(struct virtqueue *vq) +{ + uint16_t i, desc_idx; + uint32_t nb_free = 0; + struct rte_mbuf *m, *free[VIRTIO_TX_MAX_FREE_BUF_SZ]; + + desc_idx = (uint16_t)(vq->vq_used_cons_idx & + ((vq->vq_nentries >> 1) - 1)); + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (likely(m != NULL)) { + free[0] = m; + nb_free = 1; + for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (likely(m != NULL)) { + if (likely(m->pool == free[0]->pool)) + free[nb_free++] = m; + else { + rte_mempool_put_bulk(free[0]->pool, + (void **)free, + RTE_MIN(RTE_DIM(free), + nb_free)); + free[0] = m; + nb_free = 1; + } + } + } + rte_mempool_put_bulk(free[0]->pool, (void **)free, + RTE_MIN(RTE_DIM(free), nb_free)); + } else { + for (i = 1; i < VIRTIO_TX_FREE_NR; i++) { + m = (struct rte_mbuf *)vq->vq_descx[desc_idx++].cookie; + m = __rte_pktmbuf_prefree_seg(m); + if (m != NULL) + rte_mempool_put(m->pool, m); + } + } + + vq->vq_used_cons_idx += VIRTIO_TX_FREE_NR; + vq->vq_free_cnt += (VIRTIO_TX_FREE_NR << 1); +} + +uint16_t +virtio_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts) +{ + struct virtnet_tx *txvq = tx_queue; + struct virtqueue *vq = txvq->vq; + uint16_t nb_used; + uint16_t desc_idx; + struct vring_desc *start_dp; + uint16_t nb_tail, nb_commit; + int i; + uint16_t desc_idx_max = (vq->vq_nentries >> 1) - 1; + + nb_used = VIRTQUEUE_NUSED(vq); + rte_compiler_barrier(); + + if (nb_used >= VIRTIO_TX_FREE_THRESH) + virtio_xmit_cleanup(vq); + + nb_commit = nb_pkts = RTE_MIN((vq->vq_free_cnt >> 1), nb_pkts); + desc_idx = (uint16_t)(vq->vq_avail_idx & desc_idx_max); + start_dp = vq->vq_ring.desc; + nb_tail = (uint16_t) (desc_idx_max + 1 - desc_idx); + + if (nb_commit >= nb_tail) { + for (i = 0; i < nb_tail; i++) + vq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; + for (i = 0; i < nb_tail; i++) { + start_dp[desc_idx].addr = + VIRTIO_MBUF_DATA_DMA_ADDR(*tx_pkts, vq); + start_dp[desc_idx].len = (*tx_pkts)->pkt_len; + tx_pkts++; + desc_idx++; + } + nb_commit -= nb_tail; + desc_idx = 0; + } + for (i = 0; i < nb_commit; i++) + vq->vq_descx[desc_idx + i].cookie = tx_pkts[i]; + for (i = 0; i < nb_commit; i++) { + start_dp[desc_idx].addr = + VIRTIO_MBUF_DATA_DMA_ADDR(*tx_pkts, vq); + start_dp[desc_idx].len = (*tx_pkts)->pkt_len; + tx_pkts++; + desc_idx++; + } + + rte_compiler_barrier(); + + vq->vq_free_cnt -= (uint16_t)(nb_pkts << 1); + vq->vq_avail_idx += nb_pkts; + vq->vq_ring.avail->idx = vq->vq_avail_idx; + txvq->stats.packets += nb_pkts; + + if (likely(nb_pkts)) { + if (unlikely(virtqueue_kick_prepare(vq))) + virtqueue_notify(vq); + } + + return nb_pkts; +} + +int __attribute__((cold)) +virtio_rxq_vec_setup(struct virtnet_rx *rxq) +{ + uintptr_t p; + struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */ + + mb_def.nb_segs = 1; + mb_def.data_off = RTE_PKTMBUF_HEADROOM; + mb_def.port = rxq->port_id; + rte_mbuf_refcnt_set(&mb_def, 1); + + /* prevent compiler reordering: rearm_data covers previous fields */ + rte_compiler_barrier(); + p = (uintptr_t)&mb_def.rearm_data; + rxq->mbuf_initializer = *(uint64_t *)p; + + return 0; +} diff --git a/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c b/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c new file mode 100644 index 00000000..daef09bd --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtio_user_ethdev.c @@ -0,0 +1,476 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <stdint.h> +#include <sys/types.h> +#include <unistd.h> + +#include <rte_malloc.h> +#include <rte_kvargs.h> + +#include "virtio_ethdev.h" +#include "virtio_logs.h" +#include "virtio_pci.h" +#include "virtqueue.h" +#include "virtio_rxtx.h" +#include "virtio_user/virtio_user_dev.h" + +#define virtio_user_get_dev(hw) \ + ((struct virtio_user_dev *)(hw)->virtio_user_dev) + +static void +virtio_user_read_dev_config(struct virtio_hw *hw, size_t offset, + void *dst, int length) +{ + int i; + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if (offset == offsetof(struct virtio_net_config, mac) && + length == ETHER_ADDR_LEN) { + for (i = 0; i < ETHER_ADDR_LEN; ++i) + ((uint8_t *)dst)[i] = dev->mac_addr[i]; + return; + } + + if (offset == offsetof(struct virtio_net_config, status)) + *(uint16_t *)dst = dev->status; + + if (offset == offsetof(struct virtio_net_config, max_virtqueue_pairs)) + *(uint16_t *)dst = dev->max_queue_pairs; +} + +static void +virtio_user_write_dev_config(struct virtio_hw *hw, size_t offset, + const void *src, int length) +{ + int i; + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if ((offset == offsetof(struct virtio_net_config, mac)) && + (length == ETHER_ADDR_LEN)) + for (i = 0; i < ETHER_ADDR_LEN; ++i) + dev->mac_addr[i] = ((const uint8_t *)src)[i]; + else + PMD_DRV_LOG(ERR, "not supported offset=%zu, len=%d\n", + offset, length); +} + +static void +virtio_user_set_status(struct virtio_hw *hw, uint8_t status) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if (status & VIRTIO_CONFIG_STATUS_DRIVER_OK) + virtio_user_start_device(dev); + dev->status = status; +} + +static void +virtio_user_reset(struct virtio_hw *hw) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + virtio_user_stop_device(dev); +} + +static uint8_t +virtio_user_get_status(struct virtio_hw *hw) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + return dev->status; +} + +static uint64_t +virtio_user_get_features(struct virtio_hw *hw) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + return dev->features; +} + +static void +virtio_user_set_features(struct virtio_hw *hw, uint64_t features) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + dev->features = features; +} + +static uint8_t +virtio_user_get_isr(struct virtio_hw *hw __rte_unused) +{ + /* When config interrupt happens, driver calls this function to query + * what kinds of change happen. Interrupt mode not supported for now. + */ + return 0; +} + +static uint16_t +virtio_user_set_config_irq(struct virtio_hw *hw __rte_unused, + uint16_t vec __rte_unused) +{ + return VIRTIO_MSI_NO_VECTOR; +} + +/* This function is to get the queue size, aka, number of descs, of a specified + * queue. Different with the VHOST_USER_GET_QUEUE_NUM, which is used to get the + * max supported queues. + */ +static uint16_t +virtio_user_get_queue_num(struct virtio_hw *hw, uint16_t queue_id __rte_unused) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + /* Currently, each queue has same queue size */ + return dev->queue_size; +} + +static int +virtio_user_setup_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + uint16_t queue_idx = vq->vq_queue_index; + uint64_t desc_addr, avail_addr, used_addr; + + desc_addr = (uintptr_t)vq->vq_ring_virt_mem; + avail_addr = desc_addr + vq->vq_nentries * sizeof(struct vring_desc); + used_addr = RTE_ALIGN_CEIL(avail_addr + offsetof(struct vring_avail, + ring[vq->vq_nentries]), + VIRTIO_PCI_VRING_ALIGN); + + dev->vrings[queue_idx].num = vq->vq_nentries; + dev->vrings[queue_idx].desc = (void *)(uintptr_t)desc_addr; + dev->vrings[queue_idx].avail = (void *)(uintptr_t)avail_addr; + dev->vrings[queue_idx].used = (void *)(uintptr_t)used_addr; + + return 0; +} + +static void +virtio_user_del_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + /* For legacy devices, write 0 to VIRTIO_PCI_QUEUE_PFN port, QEMU + * correspondingly stops the ioeventfds, and reset the status of + * the device. + * For modern devices, set queue desc, avail, used in PCI bar to 0, + * not see any more behavior in QEMU. + * + * Here we just care about what information to deliver to vhost-user + * or vhost-kernel. So we just close ioeventfd for now. + */ + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + close(dev->callfds[vq->vq_queue_index]); + close(dev->kickfds[vq->vq_queue_index]); +} + +static void +virtio_user_notify_queue(struct virtio_hw *hw, struct virtqueue *vq) +{ + uint64_t buf = 1; + struct virtio_user_dev *dev = virtio_user_get_dev(hw); + + if (hw->cvq && (hw->cvq->vq == vq)) { + virtio_user_handle_cq(dev, vq->vq_queue_index); + return; + } + + if (write(dev->kickfds[vq->vq_queue_index], &buf, sizeof(buf)) < 0) + PMD_DRV_LOG(ERR, "failed to kick backend: %s\n", + strerror(errno)); +} + +static const struct virtio_pci_ops virtio_user_ops = { + .read_dev_cfg = virtio_user_read_dev_config, + .write_dev_cfg = virtio_user_write_dev_config, + .reset = virtio_user_reset, + .get_status = virtio_user_get_status, + .set_status = virtio_user_set_status, + .get_features = virtio_user_get_features, + .set_features = virtio_user_set_features, + .get_isr = virtio_user_get_isr, + .set_config_irq = virtio_user_set_config_irq, + .get_queue_num = virtio_user_get_queue_num, + .setup_queue = virtio_user_setup_queue, + .del_queue = virtio_user_del_queue, + .notify_queue = virtio_user_notify_queue, +}; + +static const char *valid_args[] = { +#define VIRTIO_USER_ARG_QUEUES_NUM "queues" + VIRTIO_USER_ARG_QUEUES_NUM, +#define VIRTIO_USER_ARG_CQ_NUM "cq" + VIRTIO_USER_ARG_CQ_NUM, +#define VIRTIO_USER_ARG_MAC "mac" + VIRTIO_USER_ARG_MAC, +#define VIRTIO_USER_ARG_PATH "path" + VIRTIO_USER_ARG_PATH, +#define VIRTIO_USER_ARG_QUEUE_SIZE "queue_size" + VIRTIO_USER_ARG_QUEUE_SIZE, + NULL +}; + +#define VIRTIO_USER_DEF_CQ_EN 0 +#define VIRTIO_USER_DEF_Q_NUM 1 +#define VIRTIO_USER_DEF_Q_SZ 256 + +static int +get_string_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + if (!value || !extra_args) + return -EINVAL; + + *(char **)extra_args = strdup(value); + + return 0; +} + +static int +get_integer_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + if (!value || !extra_args) + return -EINVAL; + + *(uint64_t *)extra_args = strtoull(value, NULL, 0); + + return 0; +} + +static struct rte_eth_dev * +virtio_user_eth_dev_alloc(const char *name) +{ + struct rte_eth_dev *eth_dev; + struct rte_eth_dev_data *data; + struct virtio_hw *hw; + struct virtio_user_dev *dev; + + eth_dev = rte_eth_dev_allocate(name, RTE_ETH_DEV_VIRTUAL); + if (!eth_dev) { + PMD_INIT_LOG(ERR, "cannot alloc rte_eth_dev"); + return NULL; + } + + data = eth_dev->data; + + hw = rte_zmalloc(NULL, sizeof(*hw), 0); + if (!hw) { + PMD_INIT_LOG(ERR, "malloc virtio_hw failed"); + rte_eth_dev_release_port(eth_dev); + return NULL; + } + + dev = rte_zmalloc(NULL, sizeof(*dev), 0); + if (!dev) { + PMD_INIT_LOG(ERR, "malloc virtio_user_dev failed"); + rte_eth_dev_release_port(eth_dev); + rte_free(hw); + return NULL; + } + + hw->vtpci_ops = &virtio_user_ops; + hw->use_msix = 0; + hw->modern = 0; + hw->virtio_user_dev = dev; + data->dev_private = hw; + data->numa_node = SOCKET_ID_ANY; + data->kdrv = RTE_KDRV_NONE; + data->dev_flags = RTE_ETH_DEV_DETACHABLE; + eth_dev->pci_dev = NULL; + eth_dev->driver = NULL; + return eth_dev; +} + +/* Dev initialization routine. Invoked once for each virtio vdev at + * EAL init time, see rte_eal_dev_init(). + * Returns 0 on success. + */ +static int +virtio_user_pmd_devinit(const char *name, const char *params) +{ + struct rte_kvargs *kvlist = NULL; + struct rte_eth_dev *eth_dev; + struct virtio_hw *hw; + uint64_t queues = VIRTIO_USER_DEF_Q_NUM; + uint64_t cq = VIRTIO_USER_DEF_CQ_EN; + uint64_t queue_size = VIRTIO_USER_DEF_Q_SZ; + char *path = NULL; + char *mac_addr = NULL; + int ret = -1; + + if (!params || params[0] == '\0') { + PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user", + VIRTIO_USER_ARG_QUEUE_SIZE); + goto end; + } + + kvlist = rte_kvargs_parse(params, valid_args); + if (!kvlist) { + PMD_INIT_LOG(ERR, "error when parsing param"); + goto end; + } + + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_PATH) == 1) { + ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_PATH, + &get_string_arg, &path); + if (ret < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", + VIRTIO_USER_ARG_PATH); + goto end; + } + } else { + PMD_INIT_LOG(ERR, "arg %s is mandatory for virtio_user\n", + VIRTIO_USER_ARG_QUEUE_SIZE); + goto end; + } + + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_MAC) == 1) { + ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_MAC, + &get_string_arg, &mac_addr); + if (ret < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", + VIRTIO_USER_ARG_MAC); + goto end; + } + } + + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE) == 1) { + ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUE_SIZE, + &get_integer_arg, &queue_size); + if (ret < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", + VIRTIO_USER_ARG_QUEUE_SIZE); + goto end; + } + } + + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_QUEUES_NUM) == 1) { + ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_QUEUES_NUM, + &get_integer_arg, &queues); + if (ret < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", + VIRTIO_USER_ARG_QUEUES_NUM); + goto end; + } + } + + if (rte_kvargs_count(kvlist, VIRTIO_USER_ARG_CQ_NUM) == 1) { + ret = rte_kvargs_process(kvlist, VIRTIO_USER_ARG_CQ_NUM, + &get_integer_arg, &cq); + if (ret < 0) { + PMD_INIT_LOG(ERR, "error to parse %s", + VIRTIO_USER_ARG_CQ_NUM); + goto end; + } + } else if (queues > 1) { + cq = 1; + } + + if (queues > 1 && cq == 0) { + PMD_INIT_LOG(ERR, "multi-q requires ctrl-q"); + goto end; + } + + eth_dev = virtio_user_eth_dev_alloc(name); + if (!eth_dev) { + PMD_INIT_LOG(ERR, "virtio_user fails to alloc device"); + goto end; + } + + hw = eth_dev->data->dev_private; + if (virtio_user_dev_init(hw->virtio_user_dev, path, queues, cq, + queue_size, mac_addr) < 0) + goto end; + + /* previously called by rte_eal_pci_probe() for physical dev */ + if (eth_virtio_dev_init(eth_dev) < 0) { + PMD_INIT_LOG(ERR, "eth_virtio_dev_init fails"); + goto end; + } + ret = 0; + +end: + if (kvlist) + rte_kvargs_free(kvlist); + if (path) + free(path); + if (mac_addr) + free(mac_addr); + return ret; +} + +/** Called by rte_eth_dev_detach() */ +static int +virtio_user_pmd_devuninit(const char *name) +{ + struct rte_eth_dev *eth_dev; + struct virtio_hw *hw; + struct virtio_user_dev *dev; + + if (!name) + return -EINVAL; + + PMD_DRV_LOG(INFO, "Un-Initializing %s\n", name); + eth_dev = rte_eth_dev_allocated(name); + if (!eth_dev) + return -ENODEV; + + /* make sure the device is stopped, queues freed */ + rte_eth_dev_close(eth_dev->data->port_id); + + hw = eth_dev->data->dev_private; + dev = hw->virtio_user_dev; + virtio_user_dev_uninit(dev); + + rte_free(eth_dev->data->dev_private); + rte_free(eth_dev->data); + rte_eth_dev_release_port(eth_dev); + + return 0; +} + +static struct rte_driver virtio_user_driver = { + .type = PMD_VDEV, + .init = virtio_user_pmd_devinit, + .uninit = virtio_user_pmd_devuninit, +}; + +PMD_REGISTER_DRIVER(virtio_user_driver, virtio_user); +DRIVER_REGISTER_PARAM_STRING(virtio_user, + "path=<path> " + "mac=<mac addr> " + "cq=<int> " + "queue_size=<int> " + "queues=<int>"); diff --git a/src/dpdk/drivers/net/virtio/virtqueue.c b/src/dpdk/drivers/net/virtio/virtqueue.c new file mode 100644 index 00000000..7f60e3ef --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtqueue.c @@ -0,0 +1,72 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include <stdint.h> + +#include <rte_mbuf.h> + +#include "virtqueue.h" +#include "virtio_logs.h" +#include "virtio_pci.h" + +void +virtqueue_disable_intr(struct virtqueue *vq) +{ + /* + * Set VRING_AVAIL_F_NO_INTERRUPT to hint host + * not to interrupt when it consumes packets + * Note: this is only considered a hint to the host + */ + vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT; +} + +/* + * Two types of mbuf to be cleaned: + * 1) mbuf that has been consumed by backend but not used by virtio. + * 2) mbuf that hasn't been consued by backend. + */ +struct rte_mbuf * +virtqueue_detatch_unused(struct virtqueue *vq) +{ + struct rte_mbuf *cookie; + int idx; + + if (vq != NULL) + for (idx = 0; idx < vq->vq_nentries; idx++) { + cookie = vq->vq_descx[idx].cookie; + if (cookie != NULL) { + vq->vq_descx[idx].cookie = NULL; + return cookie; + } + } + return NULL; +} diff --git a/src/dpdk/drivers/net/virtio/virtqueue.h b/src/dpdk/drivers/net/virtio/virtqueue.h new file mode 100644 index 00000000..6737b81d --- /dev/null +++ b/src/dpdk/drivers/net/virtio/virtqueue.h @@ -0,0 +1,347 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTQUEUE_H_ +#define _VIRTQUEUE_H_ + +#include <stdint.h> + +#include <rte_atomic.h> +#include <rte_memory.h> +#include <rte_memzone.h> +#include <rte_mempool.h> + +#include "virtio_pci.h" +#include "virtio_ring.h" +#include "virtio_logs.h" + +struct rte_mbuf; + +/* + * Per virtio_config.h in Linux. + * For virtio_pci on SMP, we don't need to order with respect to MMIO + * accesses through relaxed memory I/O windows, so smp_mb() et al are + * sufficient. + * + */ +#define virtio_mb() rte_smp_mb() +#define virtio_rmb() rte_smp_rmb() +#define virtio_wmb() rte_smp_wmb() + +#ifdef RTE_PMD_PACKET_PREFETCH +#define rte_packet_prefetch(p) rte_prefetch1(p) +#else +#define rte_packet_prefetch(p) do {} while(0) +#endif + +#define VIRTQUEUE_MAX_NAME_SZ 32 + +#ifdef RTE_VIRTIO_USER +/** + * Return the physical address (or virtual address in case of + * virtio-user) of mbuf data buffer. + */ +#define VIRTIO_MBUF_ADDR(mb, vq) (*(uint64_t *)((uintptr_t)(mb) + (vq)->offset)) +#else +#define VIRTIO_MBUF_ADDR(mb, vq) ((mb)->buf_physaddr) +#endif + +/** + * Return the physical address (or virtual address in case of + * virtio-user) of mbuf data buffer, taking care of mbuf data offset + */ +#define VIRTIO_MBUF_DATA_DMA_ADDR(mb, vq) \ + (VIRTIO_MBUF_ADDR(mb, vq) + (mb)->data_off) + +#define VTNET_SQ_RQ_QUEUE_IDX 0 +#define VTNET_SQ_TQ_QUEUE_IDX 1 +#define VTNET_SQ_CQ_QUEUE_IDX 2 + +enum { VTNET_RQ = 0, VTNET_TQ = 1, VTNET_CQ = 2 }; +/** + * The maximum virtqueue size is 2^15. Use that value as the end of + * descriptor chain terminator since it will never be a valid index + * in the descriptor table. This is used to verify we are correctly + * handling vq_free_cnt. + */ +#define VQ_RING_DESC_CHAIN_END 32768 + +/** + * Control the RX mode, ie. promiscuous, allmulti, etc... + * All commands require an "out" sg entry containing a 1 byte + * state value, zero = disable, non-zero = enable. Commands + * 0 and 1 are supported with the VIRTIO_NET_F_CTRL_RX feature. + * Commands 2-5 are added with VIRTIO_NET_F_CTRL_RX_EXTRA. + */ +#define VIRTIO_NET_CTRL_RX 0 +#define VIRTIO_NET_CTRL_RX_PROMISC 0 +#define VIRTIO_NET_CTRL_RX_ALLMULTI 1 +#define VIRTIO_NET_CTRL_RX_ALLUNI 2 +#define VIRTIO_NET_CTRL_RX_NOMULTI 3 +#define VIRTIO_NET_CTRL_RX_NOUNI 4 +#define VIRTIO_NET_CTRL_RX_NOBCAST 5 + +/** + * Control the MAC + * + * The MAC filter table is managed by the hypervisor, the guest should + * assume the size is infinite. Filtering should be considered + * non-perfect, ie. based on hypervisor resources, the guest may + * received packets from sources not specified in the filter list. + * + * In addition to the class/cmd header, the TABLE_SET command requires + * two out scatterlists. Each contains a 4 byte count of entries followed + * by a concatenated byte stream of the ETH_ALEN MAC addresses. The + * first sg list contains unicast addresses, the second is for multicast. + * This functionality is present if the VIRTIO_NET_F_CTRL_RX feature + * is available. + * + * The ADDR_SET command requests one out scatterlist, it contains a + * 6 bytes MAC address. This functionality is present if the + * VIRTIO_NET_F_CTRL_MAC_ADDR feature is available. + */ +struct virtio_net_ctrl_mac { + uint32_t entries; + uint8_t macs[][ETHER_ADDR_LEN]; +} __attribute__((__packed__)); + +#define VIRTIO_NET_CTRL_MAC 1 + #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0 + #define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 + +/** + * Control VLAN filtering + * + * The VLAN filter table is controlled via a simple ADD/DEL interface. + * VLAN IDs not added may be filtered by the hypervisor. Del is the + * opposite of add. Both commands expect an out entry containing a 2 + * byte VLAN ID. VLAN filtering is available with the + * VIRTIO_NET_F_CTRL_VLAN feature bit. + */ +#define VIRTIO_NET_CTRL_VLAN 2 +#define VIRTIO_NET_CTRL_VLAN_ADD 0 +#define VIRTIO_NET_CTRL_VLAN_DEL 1 + +struct virtio_net_ctrl_hdr { + uint8_t class; + uint8_t cmd; +} __attribute__((packed)); + +typedef uint8_t virtio_net_ctrl_ack; + +#define VIRTIO_NET_OK 0 +#define VIRTIO_NET_ERR 1 + +#define VIRTIO_MAX_CTRL_DATA 2048 + +struct virtio_pmd_ctrl { + struct virtio_net_ctrl_hdr hdr; + virtio_net_ctrl_ack status; + uint8_t data[VIRTIO_MAX_CTRL_DATA]; +}; + +struct vq_desc_extra { + void *cookie; + uint16_t ndescs; +}; + +struct virtqueue { + struct virtio_hw *hw; /**< virtio_hw structure pointer. */ + struct vring vq_ring; /**< vring keeping desc, used and avail */ + /** + * Last consumed descriptor in the used table, + * trails vq_ring.used->idx. + */ + uint16_t vq_used_cons_idx; + uint16_t vq_nentries; /**< vring desc numbers */ + uint16_t vq_free_cnt; /**< num of desc available */ + uint16_t vq_avail_idx; /**< sync until needed */ + uint16_t vq_free_thresh; /**< free threshold */ + + void *vq_ring_virt_mem; /**< linear address of vring*/ + unsigned int vq_ring_size; + + phys_addr_t vq_ring_mem; /**< physical address of vring, + * or virtual address for virtio_user. */ + + /** + * Head of the free chain in the descriptor table. If + * there are no free descriptors, this will be set to + * VQ_RING_DESC_CHAIN_END. + */ + uint16_t vq_desc_head_idx; + uint16_t vq_desc_tail_idx; + uint16_t vq_queue_index; /**< PCI queue index */ + uint16_t offset; /**< relative offset to obtain addr in mbuf */ + uint16_t *notify_addr; + int configured; + struct rte_mbuf **sw_ring; /**< RX software ring. */ + struct vq_desc_extra vq_descx[0]; +}; + +/* If multiqueue is provided by host, then we suppport it. */ +#define VIRTIO_NET_CTRL_MQ 4 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET 0 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN 1 +#define VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX 0x8000 + +#define VIRTIO_NET_CTRL_MAC_ADDR_SET 1 + +/** + * This is the first element of the scatter-gather list. If you don't + * specify GSO or CSUM features, you can simply ignore the header. + */ +struct virtio_net_hdr { +#define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /**< Use csum_start,csum_offset*/ + uint8_t flags; +#define VIRTIO_NET_HDR_GSO_NONE 0 /**< Not a GSO frame */ +#define VIRTIO_NET_HDR_GSO_TCPV4 1 /**< GSO frame, IPv4 TCP (TSO) */ +#define VIRTIO_NET_HDR_GSO_UDP 3 /**< GSO frame, IPv4 UDP (UFO) */ +#define VIRTIO_NET_HDR_GSO_TCPV6 4 /**< GSO frame, IPv6 TCP */ +#define VIRTIO_NET_HDR_GSO_ECN 0x80 /**< TCP has ECN set */ + uint8_t gso_type; + uint16_t hdr_len; /**< Ethernet + IP + tcp/udp hdrs */ + uint16_t gso_size; /**< Bytes to append to hdr_len per frame */ + uint16_t csum_start; /**< Position to start checksumming from */ + uint16_t csum_offset; /**< Offset after that to place checksum */ +}; + +/** + * This is the version of the header to use when the MRG_RXBUF + * feature has been negotiated. + */ +struct virtio_net_hdr_mrg_rxbuf { + struct virtio_net_hdr hdr; + uint16_t num_buffers; /**< Number of merged rx buffers */ +}; + +/* Region reserved to allow for transmit header and indirect ring */ +#define VIRTIO_MAX_TX_INDIRECT 8 +struct virtio_tx_region { + struct virtio_net_hdr_mrg_rxbuf tx_hdr; + struct vring_desc tx_indir[VIRTIO_MAX_TX_INDIRECT] + __attribute__((__aligned__(16))); +}; + +/* Chain all the descriptors in the ring with an END */ +static inline void +vring_desc_init(struct vring_desc *dp, uint16_t n) +{ + uint16_t i; + + for (i = 0; i < n - 1; i++) + dp[i].next = (uint16_t)(i + 1); + dp[i].next = VQ_RING_DESC_CHAIN_END; +} + +/** + * Tell the backend not to interrupt us. + */ +void virtqueue_disable_intr(struct virtqueue *vq); +/** + * Dump virtqueue internal structures, for debug purpose only. + */ +void virtqueue_dump(struct virtqueue *vq); +/** + * Get all mbufs to be freed. + */ +struct rte_mbuf *virtqueue_detatch_unused(struct virtqueue *vq); + +static inline int +virtqueue_full(const struct virtqueue *vq) +{ + return vq->vq_free_cnt == 0; +} + +#define VIRTQUEUE_NUSED(vq) ((uint16_t)((vq)->vq_ring.used->idx - (vq)->vq_used_cons_idx)) + +static inline void +vq_update_avail_idx(struct virtqueue *vq) +{ + virtio_wmb(); + vq->vq_ring.avail->idx = vq->vq_avail_idx; +} + +static inline void +vq_update_avail_ring(struct virtqueue *vq, uint16_t desc_idx) +{ + uint16_t avail_idx; + /* + * Place the head of the descriptor chain into the next slot and make + * it usable to the host. The chain is made available now rather than + * deferring to virtqueue_notify() in the hopes that if the host is + * currently running on another CPU, we can keep it processing the new + * descriptor. + */ + avail_idx = (uint16_t)(vq->vq_avail_idx & (vq->vq_nentries - 1)); + if (unlikely(vq->vq_ring.avail->ring[avail_idx] != desc_idx)) + vq->vq_ring.avail->ring[avail_idx] = desc_idx; + vq->vq_avail_idx++; +} + +static inline int +virtqueue_kick_prepare(struct virtqueue *vq) +{ + return !(vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY); +} + +static inline void +virtqueue_notify(struct virtqueue *vq) +{ + /* + * Ensure updated avail->idx is visible to host. + * For virtio on IA, the notificaiton is through io port operation + * which is a serialization instruction itself. + */ + vq->hw->vtpci_ops->notify_queue(vq->hw, vq); +} + +#ifdef RTE_LIBRTE_VIRTIO_DEBUG_DUMP +#define VIRTQUEUE_DUMP(vq) do { \ + uint16_t used_idx, nused; \ + used_idx = (vq)->vq_ring.used->idx; \ + nused = (uint16_t)(used_idx - (vq)->vq_used_cons_idx); \ + PMD_INIT_LOG(DEBUG, \ + "VQ: - size=%d; free=%d; used=%d; desc_head_idx=%d;" \ + " avail.idx=%d; used_cons_idx=%d; used.idx=%d;" \ + " avail.flags=0x%x; used.flags=0x%x", \ + (vq)->vq_nentries, (vq)->vq_free_cnt, nused, \ + (vq)->vq_desc_head_idx, (vq)->vq_ring.avail->idx, \ + (vq)->vq_used_cons_idx, (vq)->vq_ring.used->idx, \ + (vq)->vq_ring.avail->flags, (vq)->vq_ring.used->flags); \ +} while (0) +#else +#define VIRTQUEUE_DUMP(vq) do { } while (0) +#endif + +#endif /* _VIRTQUEUE_H_ */ |